00001 /* 00002 * Copyright (c) 2003 Hewlett-Packard Development Company, L.P. 00003 * 00004 * Permission is hereby granted, free of charge, to any person obtaining a copy 00005 * of this software and associated documentation files (the "Software"), to deal 00006 * in the Software without restriction, including without limitation the rights 00007 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 00008 * copies of the Software, and to permit persons to whom the Software is 00009 * furnished to do so, subject to the following conditions: 00010 * 00011 * The above copyright notice and this permission notice shall be included in 00012 * all copies or substantial portions of the Software. 00013 * 00014 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 00015 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 00016 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 00017 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 00018 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 00019 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 00020 * SOFTWARE. 00021 */ 00022 00023 #ifndef ATOMIC_OPS_H 00024 00025 #define ATOMIC_OPS_H 00026 00027 #include <assert.h> 00028 #include <stddef.h> 00029 00030 /* We define various atomic operations on memory in a */ 00031 /* machine-specific way. Unfortunately, this is complicated */ 00032 /* by the fact that these may or may not be combined with */ 00033 /* various memory barriers. Thus the actual operations we */ 00034 /* define have the form AO_<atomic-op>_<barrier>, for all */ 00035 /* plausible combinations of <atomic-op> and <barrier>. */ 00036 /* This of course results in a mild combinatorial explosion. */ 00037 /* To deal with it, we try to generate derived */ 00038 /* definitions for as many of the combinations as we can, as */ 00039 /* automatically as possible. */ 00040 /* */ 00041 /* Our assumption throughout is that the programmer will */ 00042 /* specify the least demanding operation and memory barrier */ 00043 /* that will guarantee correctness for the implementation. */ 00044 /* Our job is to find the least expensive way to implement it */ 00045 /* on the applicable hardware. In many cases that will */ 00046 /* involve, for example, a stronger memory barrier, or a */ 00047 /* combination of hardware primitives. */ 00048 /* */ 00049 /* Conventions: */ 00050 /* "plain" atomic operations are not guaranteed to include */ 00051 /* a barrier. The suffix in the name specifies the barrier */ 00052 /* type. Suffixes are: */ 00053 /* _release: Earlier operations may not be delayed past it. */ 00054 /* _acquire: Later operations may not move ahead of it. */ 00055 /* _read: Subsequent reads must follow this operation and */ 00056 /* preceding reads. */ 00057 /* _write: Earlier writes precede both this operation and */ 00058 /* later writes. */ 00059 /* _full: Ordered with respect to both earlier and later memops.*/ 00060 /* _release_write: Ordered with respect to earlier writes. */ 00061 /* _acquire_read: Ordered with respect to later reads. */ 00062 /* */ 00063 /* Currently we try to define the following atomic memory */ 00064 /* operations, in combination with the above barriers: */ 00065 /* AO_nop */ 00066 /* AO_load */ 00067 /* AO_store */ 00068 /* AO_test_and_set (binary) */ 00069 /* AO_fetch_and_add */ 00070 /* AO_fetch_and_add1 */ 00071 /* AO_fetch_and_sub1 */ 00072 /* AO_or */ 00073 /* AO_compare_and_swap */ 00074 /* */ 00075 /* Note that atomicity guarantees are valid only if both */ 00076 /* readers and writers use AO_ operations to access the */ 00077 /* shared value, while ordering constraints are intended to */ 00078 /* apply all memory operations. If a location can potentially */ 00079 /* be accessed simultaneously from multiple threads, and one of */ 00080 /* those accesses may be a write access, then all such */ 00081 /* accesses to that location should be through AO_ primitives. */ 00082 /* However if AO_ operations enforce sufficient ordering to */ 00083 /* ensure that a location x cannot be accessed concurrently, */ 00084 /* or can only be read concurrently, then x can be accessed */ 00085 /* via ordinary references and assignments. */ 00086 /* */ 00087 /* Compare_and_exchange takes an address and an expected old */ 00088 /* value and a new value, and returns an int. Nonzero */ 00089 /* indicates that it succeeded. */ 00090 /* Test_and_set takes an address, atomically replaces it by */ 00091 /* AO_TS_SET, and returns the prior value. */ 00092 /* An AO_TS_t location can be reset with the */ 00093 /* AO_CLEAR macro, which normally uses AO_store_release. */ 00094 /* AO_fetch_and_add takes an address and an AO_t increment */ 00095 /* value. The AO_fetch_and_add1 and AO_fetch_and_sub1 variants */ 00096 /* are provided, since they allow faster implementations on */ 00097 /* some hardware. AO_or atomically ors an AO_t value into a */ 00098 /* memory location, but does not provide access to the original.*/ 00099 /* */ 00100 /* We expect this list to grow slowly over time. */ 00101 /* */ 00102 /* Note that AO_nop_full is a full memory barrier. */ 00103 /* */ 00104 /* Note that if some data is initialized with */ 00105 /* data.x = ...; data.y = ...; ... */ 00106 /* AO_store_release_write(&data_is_initialized, 1) */ 00107 /* then data is guaranteed to be initialized after the test */ 00108 /* if (AO_load_release_read(&data_is_initialized)) ... */ 00109 /* succeeds. Furthermore, this should generate near-optimal */ 00110 /* code on all common platforms. */ 00111 /* */ 00112 /* All operations operate on unsigned AO_t, which */ 00113 /* is the natural word size, and usually unsigned long. */ 00114 /* It is possible to check whether a particular operation op */ 00115 /* is available on a particular platform by checking whether */ 00116 /* AO_HAVE_op is defined. We make heavy use of these macros */ 00117 /* internally. */ 00118 00119 /* The rest of this file basically has three sections: */ 00120 /* */ 00121 /* Some utility and default definitions. */ 00122 /* */ 00123 /* The architecture dependent section: */ 00124 /* This defines atomic operations that have direct hardware */ 00125 /* support on a particular platform, mostly by including the */ 00126 /* appropriate compiler- and hardware-dependent file. */ 00127 /* */ 00128 /* The synthesis section: */ 00129 /* This tries to define other atomic operations in terms of */ 00130 /* those that are explicitly available on the platform. */ 00131 /* This section is hardware independent. */ 00132 /* We make no attempt to synthesize operations in ways that */ 00133 /* effectively introduce locks, except for the debugging/demo */ 00134 /* pthread-based implementation at the beginning. A more */ 00135 /* realistic implementation that falls back to locks could be */ 00136 /* added as a higher layer. But that would sacrifice */ 00137 /* usability from signal handlers. */ 00138 /* The synthesis section is implemented almost entirely in */ 00139 /* atomic_ops_generalize.h. */ 00140 00141 /* Some common defaults. Overridden for some architectures. */ 00142 #define AO_t size_t 00143 00144 /* The test_and_set primitive returns an AO_TS_VAL_t value. */ 00145 /* AO_TS_t is the type of an in-memory test-and-set location. */ 00146 00147 #define AO_TS_INITIALIZER (AO_t)AO_TS_CLEAR 00148 00149 /* Platform-dependent stuff: */ 00150 #if defined(__GNUC__) || defined(_MSC_VER) || defined(__INTEL_COMPILER) \ 00151 || defined(__DMC__) || defined(__WATCOMC__) 00152 # define AO_INLINE static __inline 00153 #elif defined(__sun) 00154 # define AO_INLINE static inline 00155 #else 00156 # define AO_INLINE static 00157 #endif 00158 00159 #if defined(__GNUC__) && !defined(__INTEL_COMPILER) 00160 # define AO_compiler_barrier() __asm__ __volatile__("" : : : "memory") 00161 #elif defined(_MSC_VER) || defined(__DMC__) || defined(__BORLANDC__) \ 00162 || defined(__WATCOMC__) 00163 # if defined(_AMD64_) || defined(_M_X64) || _MSC_VER >= 1400 00164 # if defined(_WIN32_WCE) 00165 /* # include <cmnintrin.h> */ 00166 # elif defined(_MSC_VER) 00167 # include <intrin.h> 00168 # endif 00169 # pragma intrinsic(_ReadWriteBarrier) 00170 # define AO_compiler_barrier() _ReadWriteBarrier() 00171 /* We assume this does not generate a fence instruction. */ 00172 /* The documentation is a bit unclear. */ 00173 # else 00174 # define AO_compiler_barrier() __asm { } 00175 /* The preceding implementation may be preferable here too. */ 00176 /* But the documentation warns about VC++ 2003 and earlier. */ 00177 # endif 00178 #elif defined(__INTEL_COMPILER) 00179 # define AO_compiler_barrier() __memory_barrier() /* Too strong? IA64-only? */ 00180 #elif defined(_HPUX_SOURCE) 00181 # if defined(__ia64) 00182 # include <machine/sys/inline.h> 00183 # define AO_compiler_barrier() _Asm_sched_fence() 00184 # else 00185 /* FIXME - We dont know how to do this. This is a guess. */ 00186 /* And probably a bad one. */ 00187 static volatile int AO_barrier_dummy; 00188 # define AO_compiler_barrier() AO_barrier_dummy = AO_barrier_dummy 00189 # endif 00190 #else 00191 /* We conjecture that the following usually gives us the right */ 00192 /* semantics or an error. */ 00193 # define AO_compiler_barrier() asm("") 00194 #endif 00195 00196 #if defined(AO_USE_PTHREAD_DEFS) 00197 # include "atomic_ops/sysdeps/generic_pthread.h" 00198 #endif /* AO_USE_PTHREAD_DEFS */ 00199 00200 #if defined(__GNUC__) && !defined(AO_USE_PTHREAD_DEFS) \ 00201 && !defined(__INTEL_COMPILER) 00202 # if defined(__i386__) 00203 /* We don't define AO_USE_SYNC_CAS_BUILTIN for x86 here because */ 00204 /* it might require specifying additional options (like -march) */ 00205 /* or additional link libraries (if -march is not specified). */ 00206 # include "./x86.h" 00207 # endif /* __i386__ */ 00208 # if defined(__x86_64__) 00209 # if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 2) 00210 /* It is safe to use __sync CAS built-in on this architecture. */ 00211 # define AO_USE_SYNC_CAS_BUILTIN 00212 # endif 00213 # include "./x86_64.h" 00214 # endif /* __x86_64__ */ 00215 # if defined(__ia64__) 00216 # include "./ia64.h" 00217 # define AO_GENERALIZE_TWICE 00218 # endif /* __ia64__ */ 00219 # if defined(__hppa__) 00220 # include "atomic_ops/sysdeps/gcc/hppa.h" 00221 # define AO_CAN_EMUL_CAS 00222 # endif /* __hppa__ */ 00223 # if defined(__alpha__) 00224 # include "atomic_ops/sysdeps/gcc/alpha.h" 00225 # define AO_GENERALIZE_TWICE 00226 # endif /* __alpha__ */ 00227 # if defined(__s390__) 00228 # include "atomic_ops/sysdeps/gcc/s390.h" 00229 # endif /* __s390__ */ 00230 # if defined(__sparc__) 00231 # include "./sparc.h" 00232 # define AO_CAN_EMUL_CAS 00233 # endif /* __sparc__ */ 00234 # if defined(__m68k__) 00235 # include "atomic_ops/sysdeps/gcc/m68k.h" 00236 # endif /* __m68k__ */ 00237 # if defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) \ 00238 || defined(__powerpc64__) || defined(__ppc64__) 00239 # include "./powerpc.h" 00240 # endif /* __powerpc__ */ 00241 # if defined(__arm__) && !defined(AO_USE_PTHREAD_DEFS) 00242 # include "atomic_ops/sysdeps/gcc/arm.h" 00243 # define AO_CAN_EMUL_CAS 00244 # endif /* __arm__ */ 00245 # if defined(__cris__) || defined(CRIS) 00246 # include "atomic_ops/sysdeps/gcc/cris.h" 00247 # endif 00248 # if defined(__mips__) 00249 # include "atomic_ops/sysdeps/gcc/mips.h" 00250 # endif /* __mips__ */ 00251 # if defined(__sh__) || defined(SH4) 00252 # include "atomic_ops/sysdeps/gcc/sh.h" 00253 # define AO_CAN_EMUL_CAS 00254 # endif /* __sh__ */ 00255 #endif /* __GNUC__ && !AO_USE_PTHREAD_DEFS */ 00256 00257 #if defined(__INTEL_COMPILER) && !defined(AO_USE_PTHREAD_DEFS) 00258 # if defined(__ia64__) 00259 # include "./ia64.h" 00260 # define AO_GENERALIZE_TWICE 00261 # endif 00262 # if defined(__GNUC__) 00263 /* Intel Compiler in GCC compatible mode */ 00264 # if defined(__i386__) 00265 # include "./x86.h" 00266 # endif /* __i386__ */ 00267 # if defined(__x86_64__) 00268 # if __INTEL_COMPILER > 1110 00269 # define AO_USE_SYNC_CAS_BUILTIN 00270 # endif 00271 # include "./x86_64.h" 00272 # endif /* __x86_64__ */ 00273 # endif 00274 #endif 00275 00276 #if defined(_HPUX_SOURCE) && !defined(__GNUC__) && !defined(AO_USE_PTHREAD_DEFS) 00277 # if defined(__ia64) 00278 # include "atomic_ops/sysdeps/hpc/ia64.h" 00279 # define AO_GENERALIZE_TWICE 00280 # else 00281 # include "atomic_ops/sysdeps/hpc/hppa.h" 00282 # define AO_CAN_EMUL_CAS 00283 # endif 00284 #endif 00285 00286 #if defined(__sun) && !defined(__GNUC__) && !defined(AO_USE_PTHREAD_DEFS) 00287 /* Note: use -DAO_USE_PTHREAD_DEFS if Sun CC does not handle inline asm. */ 00288 # if defined(__i386) 00289 # include "atomic_ops/sysdeps/sunc/x86.h" 00290 # endif /* __i386 */ 00291 # if defined(__x86_64) || defined(__amd64) 00292 # include "atomic_ops/sysdeps/sunc/x86_64.h" 00293 # endif /* __x86_64 */ 00294 #endif 00295 00296 #if !defined(__GNUC__) && (defined(sparc) || defined(__sparc)) \ 00297 && !defined(AO_USE_PTHREAD_DEFS) 00298 # include "atomic_ops/sysdeps/sunc/sparc.h" 00299 # define AO_CAN_EMUL_CAS 00300 #endif 00301 00302 #if defined(_MSC_VER) || defined(__DMC__) || defined(__BORLANDC__) \ 00303 || (defined(__WATCOMC__) && defined(__NT__)) 00304 # if defined(_AMD64_) || defined(_M_X64) 00305 # include "atomic_ops/sysdeps/msftc/x86_64.h" 00306 # elif defined(_M_IX86) || defined(x86) 00307 # include "atomic_ops/sysdeps/msftc/x86.h" 00308 # elif defined(_M_ARM) || defined(ARM) || defined(_ARM_) 00309 # include "atomic_ops/sysdeps/msftc/arm.h" 00310 # endif 00311 #endif 00312 00313 #if defined(AO_REQUIRE_CAS) && !defined(AO_HAVE_compare_and_swap) \ 00314 && !defined(AO_HAVE_compare_and_swap_full) \ 00315 && !defined(AO_HAVE_compare_and_swap_acquire) 00316 # if defined(AO_CAN_EMUL_CAS) 00317 # include "atomic_ops/sysdeps/emul_cas.h" 00318 # else 00319 # error Cannot implement AO_compare_and_swap_full on this architecture. 00320 # endif 00321 #endif /* AO_REQUIRE_CAS && !AO_HAVE_compare_and_swap ... */ 00322 00323 /* The most common way to clear a test-and-set location */ 00324 /* at the end of a critical section. */ 00325 #if AO_AO_TS_T && !defined(AO_CLEAR) 00326 # define AO_CLEAR(addr) AO_store_release((AO_TS_t *)(addr), AO_TS_CLEAR) 00327 #endif 00328 #if AO_CHAR_TS_T && !defined(AO_CLEAR) 00329 # define AO_CLEAR(addr) AO_char_store_release((AO_TS_t *)(addr), AO_TS_CLEAR) 00330 #endif 00331 00332 /* 00333 * The generalization section. 00334 * Theoretically this should repeatedly include atomic_ops_generalize.h. 00335 * In fact, we observe that this converges after a small fixed number 00336 * of iterations, usually one. 00337 */ 00338 #include "./generalize.h" 00339 #ifdef AO_GENERALIZE_TWICE 00340 # include "./generalize.h" 00341 #endif 00342 00343 /* For compatibility with version 0.4 and earlier */ 00344 #define AO_TS_T AO_TS_t 00345 #define AO_T AO_t 00346 #define AO_TS_VAL AO_TS_VAL_t 00347 00348 #endif /* ATOMIC_OPS_H */