usermode/library/atomic_ops/powerpc.h

00001 /*
00002  * Copyright (c) 1991-1994 by Xerox Corporation.  All rights reserved.
00003  * Copyright (c) 1996-1999 by Silicon Graphics.  All rights reserved.
00004  * Copyright (c) 1999-2004 Hewlett-Packard Development Company, L.P.
00005  *
00006  *
00007  * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
00008  * OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
00009  *
00010  * Permission is hereby granted to use or copy this program
00011  * for any purpose,  provided the above notices are retained on all copies.
00012  * Permission to modify the code and to distribute modified code is granted,
00013  * provided the above notices are retained, and a notice that the code was
00014  * modified is included with the above copyright notice.
00015  *
00016  */
00017 
00018 /* Memory model documented at http://www-106.ibm.com/developerworks/    */
00019 /* eserver/articles/archguide.html and (clearer)                        */
00020 /* http://www-106.ibm.com/developerworks/eserver/articles/powerpc.html. */
00021 /* There appears to be no implicit ordering between any kind of         */
00022 /* independent memory references.                                       */
00023 /* Architecture enforces some ordering based on control dependence.     */
00024 /* I don't know if that could help.                                     */
00025 /* Data-dependent loads are always ordered.                             */
00026 /* Based on the above references, eieio is intended for use on          */
00027 /* uncached memory, which we don't support.  It does not order loads    */
00028 /* from cached memory.                                                  */
00029 /* Thanks to Maged Michael, Doug Lea, and Roger Hoover for helping to   */
00030 /* track some of this down and correcting my misunderstandings. -HB     */
00031 /* Earl Chew subsequently contributed further fixes & additions.        */
00032 
00033 #include "./aligned_atomic_load_store.h"
00034 
00035 #include "./test_and_set_t_is_ao_t.h"
00036         /* There seems to be no byte equivalent of lwarx, so this       */
00037         /* may really be what we want, at least in the 32-bit case.     */
00038 
00039 AO_INLINE void
00040 AO_nop_full(void)
00041 {
00042   __asm__ __volatile__("sync" : : : "memory");
00043 }
00044 
00045 #define AO_HAVE_nop_full
00046 
00047 /* lwsync apparently works for everything but a StoreLoad barrier.      */
00048 AO_INLINE void
00049 AO_lwsync(void)
00050 {
00051 #ifdef __NO_LWSYNC__
00052   __asm__ __volatile__("sync" : : : "memory");
00053 #else
00054   __asm__ __volatile__("lwsync" : : : "memory");
00055 #endif
00056 }
00057 
00058 #define AO_nop_write() AO_lwsync()
00059 #define AO_HAVE_nop_write
00060 
00061 #define AO_nop_read() AO_lwsync()
00062 #define AO_HAVE_nop_read
00063 
00064 /* We explicitly specify load_acquire, since it is important, and can   */
00065 /* be implemented relatively cheaply.  It could be implemented          */
00066 /* with an ordinary load followed by a lwsync.  But the general wisdom  */
00067 /* seems to be that a data dependent branch followed by an isync is     */
00068 /* cheaper.  And the documentation is fairly explicit that this also    */
00069 /* has acquire semantics.                                               */
00070 /* ppc64 uses ld not lwz */
00071 #if defined(__powerpc64__) || defined(__ppc64__) || defined(__64BIT__)
00072 AO_INLINE AO_t
00073 AO_load_acquire(const volatile AO_t *addr)
00074 {
00075   AO_t result;
00076 
00077    __asm__ __volatile__ (
00078     "ld%U1%X1 %0,%1\n"
00079     "cmpw %0,%0\n"
00080     "bne- 1f\n"
00081     "1: isync\n"
00082     : "=r" (result)
00083     : "m"(*addr) : "memory", "cr0");
00084   return result;
00085 }
00086 #else
00087 AO_INLINE AO_t
00088 AO_load_acquire(const volatile AO_t *addr)
00089 {
00090   AO_t result;
00091 
00092   /* FIXME: We should get gcc to allocate one of the condition  */
00093   /* registers.  I always got "impossible constraint" when I    */
00094   /* tried the "y" constraint.                                  */
00095   __asm__ __volatile__ (
00096     "lwz%U1%X1 %0,%1\n"
00097     "cmpw %0,%0\n"
00098     "bne- 1f\n"
00099     "1: isync\n"
00100     : "=r" (result)
00101     : "m"(*addr) : "memory", "cc");
00102   return result;
00103 }
00104 #endif
00105 #define AO_HAVE_load_acquire
00106 
00107 /* We explicitly specify store_release, since it relies         */
00108 /* on the fact that lwsync is also a LoadStore barrier.         */
00109 AO_INLINE void
00110 AO_store_release(volatile AO_t *addr, AO_t value)
00111 {
00112   AO_lwsync();
00113   *addr = value;
00114 }
00115 
00116 #define AO_HAVE_load_acquire
00117 
00118 /* This is similar to the code in the garbage collector.  Deleting      */
00119 /* this and having it synthesized from compare_and_swap would probably  */
00120 /* only cost us a load immediate instruction.                           */
00121 #if defined(__powerpc64__) || defined(__ppc64__) || defined(__64BIT__)
00122 /* Completely untested.  And we should be using smaller objects anyway. */
00123 AO_INLINE AO_TS_VAL_t
00124 AO_test_and_set(volatile AO_TS_t *addr) {
00125   unsigned long oldval;
00126   unsigned long temp = 1; /* locked value */
00127 
00128   __asm__ __volatile__(
00129                "1:ldarx %0,0,%1\n"   /* load and reserve               */
00130                "cmpdi %0, 0\n"       /* if load is                     */
00131                "bne 2f\n"            /*   non-zero, return already set */
00132                "stdcx. %2,0,%1\n"    /* else store conditional         */
00133                "bne- 1b\n"           /* retry if lost reservation      */
00134                "2:\n"                /* oldval is zero if we set       */
00135               : "=&r"(oldval)
00136               : "r"(addr), "r"(temp)
00137               : "memory", "cr0");
00138 
00139   return (AO_TS_VAL_t)oldval;
00140 }
00141 
00142 #else
00143 
00144 AO_INLINE AO_TS_VAL_t
00145 AO_test_and_set(volatile AO_TS_t *addr) {
00146   int oldval;
00147   int temp = 1; /* locked value */
00148 
00149   __asm__ __volatile__(
00150                "1:lwarx %0,0,%1\n"   /* load and reserve               */
00151                "cmpwi %0, 0\n"       /* if load is                     */
00152                "bne 2f\n"            /*   non-zero, return already set */
00153                "stwcx. %2,0,%1\n"    /* else store conditional         */
00154                "bne- 1b\n"           /* retry if lost reservation      */
00155                "2:\n"                /* oldval is zero if we set       */
00156               : "=&r"(oldval)
00157               : "r"(addr), "r"(temp)
00158               : "memory", "cr0");
00159 
00160   return (AO_TS_VAL_t)oldval;
00161 }
00162 
00163 #endif
00164 
00165 #define AO_HAVE_test_and_set
00166 
00167 AO_INLINE AO_TS_VAL_t
00168 AO_test_and_set_acquire(volatile AO_TS_t *addr) {
00169   AO_TS_VAL_t result = AO_test_and_set(addr);
00170   AO_lwsync();
00171   return result;
00172 }
00173 
00174 #define AO_HAVE_test_and_set_acquire
00175 
00176 AO_INLINE AO_TS_VAL_t
00177 AO_test_and_set_release(volatile AO_TS_t *addr) {
00178   AO_lwsync();
00179   return AO_test_and_set(addr);
00180 }
00181 
00182 #define AO_HAVE_test_and_set_release
00183 
00184 AO_INLINE AO_TS_VAL_t
00185 AO_test_and_set_full(volatile AO_TS_t *addr) {
00186   AO_TS_VAL_t result;
00187   AO_lwsync();
00188   result = AO_test_and_set(addr);
00189   AO_lwsync();
00190   return result;
00191 }
00192 
00193 #define AO_HAVE_test_and_set_full
00194 
00195 #if defined(__powerpc64__) || defined(__ppc64__) || defined(__64BIT__)
00196 /* FIXME: Completely untested.  */
00197 AO_INLINE int
00198 AO_compare_and_swap(volatile AO_t *addr, AO_t old, AO_t new_val) {
00199   AO_t oldval;
00200   int result = 0;
00201 
00202   __asm__ __volatile__(
00203                "1:ldarx %0,0,%2\n"   /* load and reserve              */
00204                "cmpd %0, %4\n"      /* if load is not equal to  */
00205                "bne 2f\n"            /*   old, fail                     */
00206                "stdcx. %3,0,%2\n"    /* else store conditional         */
00207                "bne- 1b\n"           /* retry if lost reservation      */
00208                "li %1,1\n"           /* result = 1;                     */
00209                "2:\n"
00210               : "=&r"(oldval), "=&r"(result)
00211               : "r"(addr), "r"(new_val), "r"(old), "1"(result)
00212               : "memory", "cr0");
00213 
00214   return result;
00215 }
00216 
00217 #else
00218 
00219 AO_INLINE int
00220 AO_compare_and_swap(volatile AO_t *addr, AO_t old, AO_t new_val) {
00221   AO_t oldval;
00222   int result = 0;
00223 
00224   __asm__ __volatile__(
00225                "1:lwarx %0,0,%2\n"   /* load and reserve              */
00226                "cmpw %0, %4\n"      /* if load is not equal to  */
00227                "bne 2f\n"            /*   old, fail                     */
00228                "stwcx. %3,0,%2\n"    /* else store conditional         */
00229                "bne- 1b\n"           /* retry if lost reservation      */
00230                "li %1,1\n"           /* result = 1;                     */
00231                "2:\n"
00232               : "=&r"(oldval), "=&r"(result)
00233               : "r"(addr), "r"(new_val), "r"(old), "1"(result)
00234               : "memory", "cr0");
00235 
00236   return result;
00237 }
00238 #endif
00239 
00240 #define AO_HAVE_compare_and_swap
00241 
00242 AO_INLINE int
00243 AO_compare_and_swap_acquire(volatile AO_t *addr, AO_t old, AO_t new_val) {
00244   int result = AO_compare_and_swap(addr, old, new_val);
00245   AO_lwsync();
00246   return result;
00247 }
00248 
00249 #define AO_HAVE_compare_and_swap_acquire
00250 
00251 AO_INLINE int
00252 AO_compare_and_swap_release(volatile AO_t *addr, AO_t old, AO_t new_val) {
00253   AO_lwsync();
00254   return AO_compare_and_swap(addr, old, new_val);
00255 }
00256 
00257 #define AO_HAVE_compare_and_swap_release
00258 
00259 AO_INLINE int
00260 AO_compare_and_swap_full(volatile AO_t *addr, AO_t old, AO_t new_val) {
00261   AO_t result;
00262   AO_lwsync();
00263   result = AO_compare_and_swap(addr, old, new_val);
00264   AO_lwsync();
00265   return result;
00266 }
00267 
00268 #define AO_HAVE_compare_and_swap_full
00269 
00270 #if defined(__powerpc64__) || defined(__ppc64__) || defined(__64BIT__)
00271 /* FIXME: Completely untested.                                          */
00272 
00273 AO_INLINE AO_t
00274 AO_fetch_and_add(volatile AO_t *addr, AO_t incr) {
00275   AO_t oldval;
00276   AO_t newval;
00277 
00278   __asm__ __volatile__(
00279                "1:ldarx %0,0,%2\n"   /* load and reserve                */
00280                "add %1,%0,%3\n"      /* increment                       */
00281                "stdcx. %1,0,%2\n"    /* store conditional               */
00282                "bne- 1b\n"           /* retry if lost reservation       */
00283               : "=&r"(oldval), "=&r"(newval)
00284                : "r"(addr), "r"(incr)
00285               : "memory", "cr0");
00286 
00287   return oldval;
00288 }
00289 
00290 #define AO_HAVE_fetch_and_add
00291 
00292 #else
00293 
00294 AO_INLINE AO_t
00295 AO_fetch_and_add(volatile AO_t *addr, AO_t incr) {
00296   AO_t oldval;
00297   AO_t newval;
00298 
00299   __asm__ __volatile__(
00300                "1:lwarx %0,0,%2\n"   /* load and reserve                */
00301                "add %1,%0,%3\n"      /* increment                       */
00302                "stwcx. %1,0,%2\n"    /* store conditional               */
00303                "bne- 1b\n"           /* retry if lost reservation       */
00304               : "=&r"(oldval), "=&r"(newval)
00305                : "r"(addr), "r"(incr)
00306               : "memory", "cr0");
00307 
00308   return oldval;
00309 }
00310 
00311 #define AO_HAVE_fetch_and_add
00312 
00313 #endif
00314 
00315 AO_INLINE AO_t
00316 AO_fetch_and_add_acquire(volatile AO_t *addr, AO_t incr) {
00317   AO_t result = AO_fetch_and_add(addr, incr);
00318   AO_lwsync();
00319   return result;
00320 }
00321 
00322 #define AO_HAVE_fetch_and_add_acquire
00323 
00324 AO_INLINE AO_t
00325 AO_fetch_and_add_release(volatile AO_t *addr, AO_t incr) {
00326   AO_lwsync();
00327   return AO_fetch_and_add(addr, incr);
00328 }
00329 
00330 #define AO_HAVE_fetch_and_add_release
00331 
00332 AO_INLINE AO_t
00333 AO_fetch_and_add_full(volatile AO_t *addr, AO_t incr) {
00334   AO_t result;
00335   AO_lwsync();
00336   result = AO_fetch_and_add(addr, incr);
00337   AO_lwsync();
00338   return result;
00339 }
00340 
00341 #define AO_HAVE_fetch_and_add_full
00342 
00343 #if defined(__powerpc64__) || defined(__ppc64__) || defined(__64BIT__)
00344 #else
00345 # include "./ao_t_is_int.h"
00346 #endif

Generated on Sat Apr 23 11:43:34 2011 for Mnemosyne by  doxygen 1.4.7