usermode/library/atomic_ops/atomic_ops.h

00001 /*
00002  * Copyright (c) 2003 Hewlett-Packard Development Company, L.P.
00003  *
00004  * Permission is hereby granted, free of charge, to any person obtaining a copy
00005  * of this software and associated documentation files (the "Software"), to deal
00006  * in the Software without restriction, including without limitation the rights
00007  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
00008  * copies of the Software, and to permit persons to whom the Software is
00009  * furnished to do so, subject to the following conditions:
00010  *
00011  * The above copyright notice and this permission notice shall be included in
00012  * all copies or substantial portions of the Software.
00013  *
00014  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
00015  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
00016  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
00017  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
00018  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
00019  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
00020  * SOFTWARE.
00021  */
00022 
00023 #ifndef ATOMIC_OPS_H
00024 
00025 #define ATOMIC_OPS_H
00026 
00027 #include <assert.h>
00028 #include <stddef.h>
00029 
00030 /* We define various atomic operations on memory in a           */
00031 /* machine-specific way.  Unfortunately, this is complicated    */
00032 /* by the fact that these may or may not be combined with       */
00033 /* various memory barriers.  Thus the actual operations we      */
00034 /* define have the form AO_<atomic-op>_<barrier>, for all       */
00035 /* plausible combinations of <atomic-op> and <barrier>.         */
00036 /* This of course results in a mild combinatorial explosion.    */
00037 /* To deal with it, we try to generate derived                  */
00038 /* definitions for as many of the combinations as we can, as    */
00039 /* automatically as possible.                                   */
00040 /*                                                              */
00041 /* Our assumption throughout is that the programmer will        */
00042 /* specify the least demanding operation and memory barrier     */
00043 /* that will guarantee correctness for the implementation.      */
00044 /* Our job is to find the least expensive way to implement it   */
00045 /* on the applicable hardware.  In many cases that will         */
00046 /* involve, for example, a stronger memory barrier, or a        */
00047 /* combination of hardware primitives.                          */
00048 /*                                                              */
00049 /* Conventions:                                                 */
00050 /* "plain" atomic operations are not guaranteed to include      */
00051 /* a barrier.  The suffix in the name specifies the barrier     */
00052 /* type.  Suffixes are:                                         */
00053 /* _release: Earlier operations may not be delayed past it.     */
00054 /* _acquire: Later operations may not move ahead of it.         */
00055 /* _read: Subsequent reads must follow this operation and       */
00056 /*        preceding reads.                                      */
00057 /* _write: Earlier writes precede both this operation and       */
00058 /*        later writes.                                         */
00059 /* _full: Ordered with respect to both earlier and later memops.*/
00060 /* _release_write: Ordered with respect to earlier writes.      */
00061 /* _acquire_read: Ordered with respect to later reads.          */
00062 /*                                                              */
00063 /* Currently we try to define the following atomic memory       */
00064 /* operations, in combination with the above barriers:          */
00065 /* AO_nop                                                       */
00066 /* AO_load                                                      */
00067 /* AO_store                                                     */
00068 /* AO_test_and_set (binary)                                     */
00069 /* AO_fetch_and_add                                             */
00070 /* AO_fetch_and_add1                                            */
00071 /* AO_fetch_and_sub1                                            */
00072 /* AO_or                                                        */
00073 /* AO_compare_and_swap                                          */
00074 /*                                                              */
00075 /* Note that atomicity guarantees are valid only if both        */
00076 /* readers and writers use AO_ operations to access the         */
00077 /* shared value, while ordering constraints are intended to     */
00078 /* apply all memory operations.  If a location can potentially  */
00079 /* be accessed simultaneously from multiple threads, and one of */
00080 /* those accesses may be a write access, then all such          */
00081 /* accesses to that location should be through AO_ primitives.  */
00082 /* However if AO_ operations enforce sufficient ordering to     */
00083 /* ensure that a location x cannot be accessed concurrently,    */
00084 /* or can only be read concurrently, then x can be accessed     */
00085 /* via ordinary references and assignments.                     */
00086 /*                                                              */
00087 /* Compare_and_exchange takes an address and an expected old    */
00088 /* value and a new value, and returns an int.  Nonzero          */
00089 /* indicates that it succeeded.                                 */
00090 /* Test_and_set takes an address, atomically replaces it by     */
00091 /* AO_TS_SET, and returns the prior value.                      */
00092 /* An AO_TS_t location can be reset with the                    */
00093 /* AO_CLEAR macro, which normally uses AO_store_release.        */
00094 /* AO_fetch_and_add takes an address and an AO_t increment      */
00095 /* value.  The AO_fetch_and_add1 and AO_fetch_and_sub1 variants */
00096 /* are provided, since they allow faster implementations on     */
00097 /* some hardware. AO_or atomically ors an AO_t value into a     */
00098 /* memory location, but does not provide access to the original.*/
00099 /*                                                              */
00100 /* We expect this list to grow slowly over time.                */
00101 /*                                                              */
00102 /* Note that AO_nop_full is a full memory barrier.              */
00103 /*                                                              */
00104 /* Note that if some data is initialized with                   */
00105 /*      data.x = ...; data.y = ...; ...                         */
00106 /*      AO_store_release_write(&data_is_initialized, 1)         */
00107 /* then data is guaranteed to be initialized after the test     */
00108 /*      if (AO_load_release_read(&data_is_initialized)) ...     */
00109 /* succeeds.  Furthermore, this should generate near-optimal    */
00110 /* code on all common platforms.                                */
00111 /*                                                              */
00112 /* All operations operate on unsigned AO_t, which               */
00113 /* is the natural word size, and usually unsigned long.         */
00114 /* It is possible to check whether a particular operation op    */
00115 /* is available on a particular platform by checking whether    */
00116 /* AO_HAVE_op is defined.  We make heavy use of these macros    */
00117 /* internally.                                                  */
00118 
00119 /* The rest of this file basically has three sections:          */
00120 /*                                                              */
00121 /* Some utility and default definitions.                        */
00122 /*                                                              */
00123 /* The architecture dependent section:                          */
00124 /* This defines atomic operations that have direct hardware     */
00125 /* support on a particular platform, mostly by including the    */
00126 /* appropriate compiler- and hardware-dependent file.           */
00127 /*                                                              */
00128 /* The synthesis section:                                       */
00129 /* This tries to define other atomic operations in terms of     */
00130 /* those that are explicitly available on the platform.         */
00131 /* This section is hardware independent.                        */
00132 /* We make no attempt to synthesize operations in ways that     */
00133 /* effectively introduce locks, except for the debugging/demo   */
00134 /* pthread-based implementation at the beginning.  A more       */
00135 /* realistic implementation that falls back to locks could be   */
00136 /* added as a higher layer.  But that would sacrifice           */
00137 /* usability from signal handlers.                              */
00138 /* The synthesis section is implemented almost entirely in      */
00139 /* atomic_ops_generalize.h.                                     */
00140 
00141 /* Some common defaults.  Overridden for some architectures.    */
00142 #define AO_t size_t
00143 
00144 /* The test_and_set primitive returns an AO_TS_VAL_t value.     */
00145 /* AO_TS_t is the type of an in-memory test-and-set location.   */
00146 
00147 #define AO_TS_INITIALIZER (AO_t)AO_TS_CLEAR
00148 
00149 /* Platform-dependent stuff:                                    */
00150 #if defined(__GNUC__) || defined(_MSC_VER) || defined(__INTEL_COMPILER) \
00151         || defined(__DMC__) || defined(__WATCOMC__)
00152 # define AO_INLINE static __inline
00153 #elif defined(__sun)
00154 # define AO_INLINE static inline
00155 #else
00156 # define AO_INLINE static
00157 #endif
00158 
00159 #if defined(__GNUC__) && !defined(__INTEL_COMPILER)
00160 # define AO_compiler_barrier() __asm__ __volatile__("" : : : "memory")
00161 #elif defined(_MSC_VER) || defined(__DMC__) || defined(__BORLANDC__) \
00162         || defined(__WATCOMC__)
00163 # if defined(_AMD64_) || defined(_M_X64) || _MSC_VER >= 1400
00164 #   if defined(_WIN32_WCE)
00165 /* #     include <cmnintrin.h> */
00166 #   elif defined(_MSC_VER)
00167 #     include <intrin.h>
00168 #   endif
00169 #   pragma intrinsic(_ReadWriteBarrier)
00170 #   define AO_compiler_barrier() _ReadWriteBarrier()
00171         /* We assume this does not generate a fence instruction.        */
00172         /* The documentation is a bit unclear.                          */
00173 # else
00174 #   define AO_compiler_barrier() __asm { }
00175         /* The preceding implementation may be preferable here too.     */
00176         /* But the documentation warns about VC++ 2003 and earlier.     */
00177 # endif
00178 #elif defined(__INTEL_COMPILER)
00179 # define AO_compiler_barrier() __memory_barrier() /* Too strong? IA64-only? */
00180 #elif defined(_HPUX_SOURCE)
00181 # if defined(__ia64)
00182 #   include <machine/sys/inline.h>
00183 #   define AO_compiler_barrier() _Asm_sched_fence()
00184 # else
00185     /* FIXME - We dont know how to do this.  This is a guess.   */
00186     /* And probably a bad one.                                  */
00187     static volatile int AO_barrier_dummy;
00188 #   define AO_compiler_barrier() AO_barrier_dummy = AO_barrier_dummy
00189 # endif
00190 #else
00191   /* We conjecture that the following usually gives us the right        */
00192   /* semantics or an error.                                             */
00193 # define AO_compiler_barrier() asm("")
00194 #endif
00195 
00196 #if defined(AO_USE_PTHREAD_DEFS)
00197 # include "atomic_ops/sysdeps/generic_pthread.h"
00198 #endif /* AO_USE_PTHREAD_DEFS */
00199 
00200 #if defined(__GNUC__) && !defined(AO_USE_PTHREAD_DEFS) \
00201     && !defined(__INTEL_COMPILER)
00202 # if defined(__i386__)
00203     /* We don't define AO_USE_SYNC_CAS_BUILTIN for x86 here because     */
00204     /* it might require specifying additional options (like -march)     */
00205     /* or additional link libraries (if -march is not specified).       */
00206 #   include "./x86.h"
00207 # endif /* __i386__ */
00208 # if defined(__x86_64__)
00209 #   if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 2)
00210       /* It is safe to use __sync CAS built-in on this architecture.    */
00211 #     define AO_USE_SYNC_CAS_BUILTIN
00212 #   endif
00213 #   include "./x86_64.h"
00214 # endif /* __x86_64__ */
00215 # if defined(__ia64__)
00216 #   include "./ia64.h"
00217 #   define AO_GENERALIZE_TWICE
00218 # endif /* __ia64__ */
00219 # if defined(__hppa__)
00220 #   include "atomic_ops/sysdeps/gcc/hppa.h"
00221 #   define AO_CAN_EMUL_CAS
00222 # endif /* __hppa__ */
00223 # if defined(__alpha__)
00224 #   include "atomic_ops/sysdeps/gcc/alpha.h"
00225 #   define AO_GENERALIZE_TWICE
00226 # endif /* __alpha__ */
00227 # if defined(__s390__)
00228 #   include "atomic_ops/sysdeps/gcc/s390.h"
00229 # endif /* __s390__ */
00230 # if defined(__sparc__)
00231 #   include "./sparc.h"
00232 #   define AO_CAN_EMUL_CAS
00233 # endif /* __sparc__ */
00234 # if defined(__m68k__)
00235 #   include "atomic_ops/sysdeps/gcc/m68k.h"
00236 # endif /* __m68k__ */
00237 # if defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) \
00238      || defined(__powerpc64__) || defined(__ppc64__)
00239 #   include "./powerpc.h"
00240 # endif /* __powerpc__ */
00241 # if defined(__arm__) && !defined(AO_USE_PTHREAD_DEFS)
00242 #   include "atomic_ops/sysdeps/gcc/arm.h"
00243 #   define AO_CAN_EMUL_CAS
00244 # endif /* __arm__ */
00245 # if defined(__cris__) || defined(CRIS)
00246 #   include "atomic_ops/sysdeps/gcc/cris.h"
00247 # endif
00248 # if defined(__mips__)
00249 #   include "atomic_ops/sysdeps/gcc/mips.h"
00250 # endif /* __mips__ */
00251 # if defined(__sh__) || defined(SH4)
00252 #   include "atomic_ops/sysdeps/gcc/sh.h"
00253 #   define AO_CAN_EMUL_CAS
00254 # endif /* __sh__ */
00255 #endif /* __GNUC__ && !AO_USE_PTHREAD_DEFS */
00256 
00257 #if defined(__INTEL_COMPILER) && !defined(AO_USE_PTHREAD_DEFS)
00258 # if defined(__ia64__)
00259 #   include "./ia64.h"
00260 #   define AO_GENERALIZE_TWICE
00261 # endif
00262 # if defined(__GNUC__)
00263     /* Intel Compiler in GCC compatible mode */
00264 #   if defined(__i386__)
00265 #     include "./x86.h"
00266 #   endif /* __i386__ */
00267 #   if defined(__x86_64__)
00268 #     if __INTEL_COMPILER > 1110
00269 #       define AO_USE_SYNC_CAS_BUILTIN
00270 #     endif
00271 #     include "./x86_64.h"
00272 #   endif /* __x86_64__ */
00273 # endif
00274 #endif
00275 
00276 #if defined(_HPUX_SOURCE) && !defined(__GNUC__) && !defined(AO_USE_PTHREAD_DEFS)
00277 # if defined(__ia64)
00278 #   include "atomic_ops/sysdeps/hpc/ia64.h"
00279 #   define AO_GENERALIZE_TWICE
00280 # else
00281 #   include "atomic_ops/sysdeps/hpc/hppa.h"
00282 #   define AO_CAN_EMUL_CAS
00283 # endif
00284 #endif
00285 
00286 #if defined(__sun) && !defined(__GNUC__) && !defined(AO_USE_PTHREAD_DEFS)
00287   /* Note: use -DAO_USE_PTHREAD_DEFS if Sun CC does not handle inline asm. */
00288 # if defined(__i386)
00289 #   include "atomic_ops/sysdeps/sunc/x86.h"
00290 # endif /* __i386 */
00291 # if defined(__x86_64) || defined(__amd64)
00292 #   include "atomic_ops/sysdeps/sunc/x86_64.h"
00293 # endif /* __x86_64 */
00294 #endif
00295 
00296 #if !defined(__GNUC__) && (defined(sparc) || defined(__sparc)) \
00297     && !defined(AO_USE_PTHREAD_DEFS)
00298 #   include "atomic_ops/sysdeps/sunc/sparc.h"
00299 #   define AO_CAN_EMUL_CAS
00300 #endif
00301 
00302 #if defined(_MSC_VER) || defined(__DMC__) || defined(__BORLANDC__) \
00303         || (defined(__WATCOMC__) && defined(__NT__))
00304 # if defined(_AMD64_) || defined(_M_X64)
00305 #   include "atomic_ops/sysdeps/msftc/x86_64.h"
00306 # elif defined(_M_IX86) || defined(x86)
00307 #   include "atomic_ops/sysdeps/msftc/x86.h"
00308 # elif defined(_M_ARM) || defined(ARM) || defined(_ARM_)
00309 #   include "atomic_ops/sysdeps/msftc/arm.h"
00310 # endif
00311 #endif
00312 
00313 #if defined(AO_REQUIRE_CAS) && !defined(AO_HAVE_compare_and_swap) \
00314     && !defined(AO_HAVE_compare_and_swap_full) \
00315     && !defined(AO_HAVE_compare_and_swap_acquire)
00316 # if defined(AO_CAN_EMUL_CAS)
00317 #   include "atomic_ops/sysdeps/emul_cas.h"
00318 # else
00319 #  error Cannot implement AO_compare_and_swap_full on this architecture.
00320 # endif
00321 #endif  /* AO_REQUIRE_CAS && !AO_HAVE_compare_and_swap ... */
00322 
00323 /* The most common way to clear a test-and-set location         */
00324 /* at the end of a critical section.                            */
00325 #if AO_AO_TS_T && !defined(AO_CLEAR)
00326 # define AO_CLEAR(addr) AO_store_release((AO_TS_t *)(addr), AO_TS_CLEAR)
00327 #endif
00328 #if AO_CHAR_TS_T && !defined(AO_CLEAR)
00329 # define AO_CLEAR(addr) AO_char_store_release((AO_TS_t *)(addr), AO_TS_CLEAR)
00330 #endif
00331 
00332 /*
00333  * The generalization section.
00334  * Theoretically this should repeatedly include atomic_ops_generalize.h.
00335  * In fact, we observe that this converges after a small fixed number
00336  * of iterations, usually one.
00337  */
00338 #include "./generalize.h"
00339 #ifdef AO_GENERALIZE_TWICE
00340 # include "./generalize.h"
00341 #endif
00342 
00343 /* For compatibility with version 0.4 and earlier       */
00344 #define AO_TS_T AO_TS_t
00345 #define AO_T AO_t
00346 #define AO_TS_VAL AO_TS_VAL_t
00347 
00348 #endif /* ATOMIC_OPS_H */

Generated on Sat Apr 23 11:43:34 2011 for Mnemosyne by  doxygen 1.4.7