usermode/library/pmalloc/src/pdlmalloc.c

Go to the documentation of this file.
00001 
00009 /* 
00010   A version of malloc/free/realloc written by Doug Lea and released to the 
00011   public domain. 
00012 
00013   VERSION 2.5
00014 
00015 * History:
00016     Based loosely on libg++-1.2X malloc. (It retains some of the overall 
00017        structure of old version,  but most details differ.)
00018     trial version Fri Aug 28 13:14:29 1992  Doug Lea  (dl at g.oswego.edu)
00019     fixups Sat Aug  7 07:41:59 1993  Doug Lea  (dl at g.oswego.edu)
00020       * removed potential for odd address access in prev_chunk
00021       * removed dependency on getpagesize.h
00022       * misc cosmetics and a bit more internal documentation
00023       * anticosmetics: mangled names in macros to evade debugger strangeness
00024       * tested on sparc, hp-700, dec-mips, rs6000 
00025           with gcc & native cc (hp, dec only) allowing
00026           Detlefs & Zorn comparison study (to appear, SIGPLAN Notices.)
00027 
00028      Sat Apr  2 06:51:25 1994  Doug Lea  (dl at g)
00029       * Propagate failure in realloc if malloc returns 0
00030       * Add stuff to allow compilation on non-ANSI compilers
00031 
00032 * Overview
00033 
00034   This malloc, like any other, is a compromised design. 
00035 
00036   Chunks of memory are maintained using a `boundary tag' method as
00037   described in e.g., Knuth or Standish.  The size of the chunk is
00038   stored both in the front of the chunk and at the end.  This makes
00039   consolidating fragmented chunks into bigger chunks very fast.  The
00040   size field also hold a bit representing whether a chunk is free or
00041   in use.
00042 
00043   Malloced chunks have space overhead of 8 bytes: The preceding and
00044   trailing size fields.  When a chunk is freed, 8 additional bytes are
00045   needed for free list pointers. Thus, the minimum allocatable size is
00046   16 bytes.  8 byte alignment is currently hardwired into the design.
00047   This seems to suffice for all current machines and C compilers.
00048   Calling memalign will return a chunk that is both 8-byte aligned
00049   and meets the requested (power of two) alignment.
00050 
00051   It is assumed that 32 bits suffice to represent chunk sizes.  The
00052   maximum size chunk is 2^31 - 8 bytes.  malloc(0) returns a pointer
00053   to something of the minimum allocatable size.  Requests for negative
00054   sizes (when size_t is signed) or with the highest bit set (when
00055   unsigned) will also return a minimum-sized chunk.
00056 
00057   Available chunks are kept in doubly linked lists. The lists are
00058   maintained in an array of bins using a power-of-two method, except
00059   that instead of 32 bins (one for each 1 << i), there are 128: each
00060   power of two is split in quarters.  Chunk sizes up to 128 are
00061   treated specially; they are categorized on 8-byte boundaries.  This
00062   both better distributes them and allows for special faster
00063   processing.
00064 
00065   The use of very fine bin sizes closely approximates the use of one
00066   bin per actually used size, without necessitating the overhead of
00067   locating such bins. It is especially desirable in common
00068   applications where large numbers of identically-sized blocks are
00069   malloced/freed in some dynamic manner, and then later are all freed.
00070   The finer bin sizes make finding blocks fast, with little wasted
00071   overallocation. The consolidation methods ensure that once the
00072   collection of blocks is no longer useful, fragments are gathered
00073   into bigger chunks awaiting new roles.
00074 
00075   The bins av[i] serve as heads of the lists. Bins contain a dummy
00076   header for the chunk lists. Each bin has two lists. The `dirty' list
00077   holds chunks that have been returned (freed) and not yet either
00078   re-malloc'ed or consolidated. (A third free-standing list contains
00079   returned chunks that have not yet been processed at all.) The `clean'
00080   list holds split-off fragments and consolidated space. All
00081   procedures maintain the invariant that no clean chunk physically
00082   borders another clean chunk. Thus, clean chunks never need to be
00083   scanned during consolidation.
00084 
00085 * Algorithms
00086 
00087   Malloc:
00088 
00089     This is a very heavily disguised first-fit algorithm.
00090     Most of the heuristics are designed to maximize the likelihood
00091     that a usable chunk will most often be found very quickly,
00092     while still minimizing fragmentation and overhead.
00093 
00094     The allocation strategy has several phases:
00095 
00096       0. Convert the request size into a usable form. This currently
00097          means to add 8 bytes overhead plus possibly more to obtain
00098          8-byte alignment. Call this size `nb'.
00099 
00100       1. Check if the last returned (free()'d) or preallocated chunk
00101          is of the exact size nb. If so, use it.  `Exact' means no
00102          more than MINSIZE (currently 16) bytes larger than nb. This
00103          cannot be reduced, since a chunk with size < MINSIZE cannot
00104          be created to hold the remainder.
00105 
00106          This check need not fire very often to be effective.  It
00107          reduces overhead for sequences of requests for the same
00108          preallocated size to a dead minimum.
00109 
00110       2. Look for a dirty chunk of exact size in the bin associated
00111          with nb.  `Dirty' chunks are those that have never been
00112          consolidated.  Besides the fact that they, but not clean
00113          chunks require scanning for consolidation, these chunks are
00114          of sizes likely to be useful because they have been
00115          previously requested and then freed by the user program.
00116 
00117          Dirty chunks of bad sizes (even if too big) are never used
00118          without consolidation. Among other things, this maintains the
00119          invariant that split chunks (see below) are ALWAYS clean.
00120 
00121          2a If there are dirty chunks, but none of the right size,
00122              consolidate them all, as well as any returned chunks
00123              (i.e., the ones from step 3). This is all a heuristic for
00124              detecting and dealing with excess fragmentation and
00125              random traversals through memory that degrade
00126              performance especially when the user program is running
00127              out of physical memory.
00128 
00129       3. Pull other requests off the returned chunk list, using one if
00130          it is of exact size, else distributing into the appropriate
00131          bins.
00132 
00133       4. Try to use the last chunk remaindered during a previous
00134          malloc. (The ptr to this chunk is kept in var last_remainder,
00135          to make it easy to find and to avoid useless re-binning
00136          during repeated splits.  The code surrounding it is fairly
00137          delicate. This chunk must be pulled out and placed in a bin
00138          prior to any consolidation, to avoid having other pointers
00139          point into the middle of it, or try to unlink it.) If
00140          it is usable, proceed to step 9.
00141 
00142       5. Scan through clean chunks in the bin, choosing any of size >=
00143          nb. Split later (step 9) if necessary below.  (Unlike in step
00144          2, it is good to split here, because it creates a chunk of a
00145          known-to-be-useful size out of a fragment that happened to be
00146          close in size.)
00147 
00148       6. Scan through the clean lists of all larger bins, selecting
00149          any chunk at all. (It will surely be big enough since it is
00150          in a bigger bin.)  The scan goes upward from small bins to
00151          large.  It would be faster downward, but could lead to excess
00152          fragmentation. If successful, proceed to step 9.
00153 
00154       7. Consolidate chunks in other dirty bins until a large enough
00155          chunk is created. Break out to step 9 when one is found.
00156 
00157          Bins are selected for consolidation in a circular fashion
00158          spanning across malloc calls. This very crudely approximates
00159          LRU scanning -- it is an effective enough approximation for
00160          these purposes.
00161 
00162       8. Get space from the system using sbrk.
00163 
00164          Memory is gathered from the system (via sbrk) in a way that
00165          allows chunks obtained across different sbrk calls to be
00166          consolidated, but does not require contiguous memory. Thus,
00167          it should be safe to intersperse mallocs with other sbrk
00168          calls.
00169 
00170       9. If the selected chunk is too big, then:
00171 
00172          9a If this is the second split request for nb bytes in a row,
00173              use this chunk to preallocate up to MAX_PREALLOCS
00174              additional chunks of size nb and place them on the
00175              returned chunk list.  (Placing them here rather than in
00176              bins speeds up the most common case where the user
00177              program requests an uninterrupted series of identically
00178              sized chunks. If this is not true, the chunks will be
00179              binned in step 3 next time.)
00180 
00181          9b Split off the remainder and place in last_remainder.
00182              Because of all the above, the remainder is always a
00183              `clean' chunk.
00184 
00185      10.  Return the chunk.
00186 
00187 
00188   Free: 
00189     Deallocation (free) consists only of placing the chunk on a list
00190     of returned chunks. free(0) has no effect.  Because freed chunks
00191     may be overwritten with link fields, this malloc will often die
00192     when freed memory is overwritten by user programs.  This can be
00193     very effective (albeit in an annoying way) in helping users track
00194     down dangling pointers.
00195 
00196   Realloc:
00197     Reallocation proceeds in the usual way. If a chunk can be extended,
00198     it is, else a malloc-copy-free sequence is taken. 
00199 
00200   Memalign, valloc:
00201     memalign arequests more than enough space from malloc, finds a spot
00202     within that chunk that meets the alignment request, and then
00203     possibly frees the leading and trailing space. Overreliance on
00204     memalign is a sure way to fragment space.
00205 
00206 * Other implementation notes
00207 
00208   This malloc is NOT designed to work in multiprocessing applications.
00209   No semaphores or other concurrency control are provided to ensure
00210   that multiple malloc or free calls don't run at the same time, which
00211   could be disasterous. A single semaphore could be used across malloc,
00212   realloc, and free. It would be hard to obtain finer granularity.
00213 
00214   The implementation is in straight, hand-tuned ANSI C.  Among other
00215   consequences, it uses a lot of macros. These would be nicer as
00216   inlinable procedures, but using macros allows use with non-inlining
00217   compilers, and also makes it a bit easier to control when they
00218   should be expanded out by selectively embedding them in other macros
00219   and procedures. (According to profile information, it is almost, but
00220   not quite always best to expand.)
00221 
00222 */
00223 
00224 
00225 /* TUNABLE PARAMETERS */
00226 
00227 /* 
00228   SBRK_UNIT is a good power of two to call sbrk with It should
00229   normally be system page size or a multiple thereof.  If sbrk is very
00230   slow on a system, it pays to increase this.  Otherwise, it should
00231   not matter too much.
00232 */
00233 
00234 #define SBRK_UNIT 8192
00235 
00236 /* 
00237   MAX_PREALLOCS is the maximum number of chunks to preallocate.  The
00238   actual number to prealloc depends on the available space in a
00239   selected victim, so typical numbers will be less than the maximum.
00240   Because of this, the exact value seems not to matter too much, at
00241   least within values from around 1 to 100.  Since preallocation is
00242   heuristic, making it too huge doesn't help though. It may blindly
00243   create a lot of chunks when it turns out not to need any more, and
00244   then consolidate them all back again immediatetly. While this is
00245   pretty fast, it is better to avoid it.
00246 */
00247 
00248 #define MAX_PREALLOCS 5
00249 
00250 /* preliminaries */
00251 
00252 #ifndef __STD_C
00253 #ifdef __STDC__
00254 #define __STD_C         1
00255 #else
00256 #if __cplusplus
00257 #define __STD_C         1
00258 #else
00259 #define __STD_C         0
00260 #endif /*__cplusplus*/
00261 #endif /*__STDC__*/
00262 #endif /*__STD_C*/
00263 
00264 
00265 #ifndef _BEGIN_EXTERNS_
00266 #ifdef __cplusplus
00267 #define _BEGIN_EXTERNS_ extern "C" {
00268 #define _END_EXTERNS_   }
00269 #else
00270 #define _BEGIN_EXTERNS_
00271 #define _END_EXTERNS_
00272 #endif
00273 #endif /*_BEGIN_EXTERNS_*/
00274 
00275 #ifndef _ARG_
00276 #if __STD_C
00277 #define _ARG_(x)        x
00278 #else
00279 #define _ARG_(x)        ()
00280 #endif
00281 #endif /*_ARG_*/
00282 
00283 #ifndef Void_t
00284 #if __STD_C
00285 #define Void_t          void
00286 #else
00287 #define Void_t          char
00288 #endif
00289 #endif /*Void_t*/
00290 
00291 #ifndef NIL
00292 #define NIL(type)       ((type)0)
00293 #endif /*NIL*/
00294 
00295 #if __STD_C
00296 #include <stddef.h>   /* for size_t */
00297 #else
00298 #include        <sys/types.h>
00299 #endif
00300 #include <stdio.h>    /* needed for malloc_stats */
00301 
00302 #include "genalloc.h"
00303 #include "pdlmalloc.h"
00304 
00305 #define DLMALLOC_PHEAP_BASE 0xc00000000
00306 #define DLMALLOC_PHEAP_SIZE (1024*1024*1024) /* Bytes */
00307 
00308 #ifdef __cplusplus
00309 extern "C" {
00310 #endif
00311 
00312 /* mechanics for getpagesize; adapted from bsd/gnu getpagesize.h */
00313 
00314 #if defined(BSD) || defined(DGUX) || defined(sun) || defined(HAVE_GETPAGESIZE)
00315    extern size_t getpagesize();
00316 #  define malloc_getpagesize getpagesize()
00317 #else
00318 #  include <sys/param.h>
00319 #  ifdef EXEC_PAGESIZE
00320 #    define malloc_getpagesize EXEC_PAGESIZE
00321 #  else
00322 #    ifdef NBPG
00323 #      ifndef CLSIZE
00324 #        define malloc_getpagesize NBPG
00325 #      else
00326 #        define malloc_getpagesize (NBPG * CLSIZE)
00327 #      endif
00328 #    else 
00329 #      ifdef NBPC
00330 #        define malloc_getpagesize NBPC
00331 #      else
00332 #        define malloc_getpagesize SBRK_UNIT    /* just guess */
00333 #      endif
00334 #    endif 
00335 #  endif
00336 #endif 
00337 
00338 #ifdef __cplusplus
00339 }  /* end of extern "C" */
00340 #endif
00341 
00342 #include <mnemosyne.h>
00343 #include <stdint.h>
00344 #include <stdlib.h>
00345 #include <sys/mman.h>
00346 #include <pcm.h>
00347 
00348 
00349 /*  CHUNKS */
00350 
00351 
00352 struct malloc_chunk
00353 {
00354         uint64_t             size; /* Size in bytes, including overhead. Or'ed with INUSE if in use. */
00355         struct malloc_chunk* fd;   /* double links -- used only if free. */
00356         struct malloc_chunk* bk;
00357 
00358 };
00359 
00360 typedef struct malloc_chunk* mchunkptr;
00361 
00362 /*  sizes, alignments */
00363 
00364 #define SIZE_SZ              (sizeof(size_t))
00365 #define MALLOC_MIN_OVERHEAD  (SIZE_SZ + SIZE_SZ)
00366 #define MALLOC_ALIGN_MASK    (MALLOC_MIN_OVERHEAD - 1)
00367 #define MINSIZE              (sizeof(struct malloc_chunk) + SIZE_SZ)
00368 
00369 
00370 /* pad request bytes into a usable size */
00371 
00372 #define request2size(req) \
00373   (((long)(req) <= 0) ?  MINSIZE : \
00374     (((req) + MALLOC_MIN_OVERHEAD + MALLOC_ALIGN_MASK) \
00375       & ~(MALLOC_ALIGN_MASK)))
00376 
00377 
00378 /* Check if m has acceptable alignment */
00379 
00380 #define aligned_OK(m)    (((size_t)((m)) & (MALLOC_ALIGN_MASK)) == 0)
00381 
00382 
00383 /* Check if a chunk is immediately usable */
00384 
00385 #define exact_fit(ptr, req) ((unsigned)((ptr)->size - (req)) < MINSIZE)
00386 
00387 /* maintaining INUSE via size field */
00388 
00389 #define INUSE  0x1     /* size field is or'd with INUSE when in use */
00390                        /* INUSE must be exactly 1, so can coexist with size */
00391 
00392 #define inuse(p)       ((p)->size & INUSE)
00393 #define set_inuse(p)   ((p)->size |= INUSE)
00394 #define clear_inuse(p) ((p)->size &= ~INUSE)
00395 
00396 
00397 
00398 /* Physical chunk operations  */
00399 
00400 /* Ptr to next physical malloc_chunk. */
00401 
00402 #define next_chunk(p)\
00403   ((mchunkptr)( ((char*)(p)) + ((p)->size & ~INUSE) ))
00404 
00405 /* Ptr to previous physical malloc_chunk */
00406 
00407 #define prev_chunk(p)\
00408   ((mchunkptr)( ((char*)(p)) - ( *((size_t*)((char*)(p) - SIZE_SZ)) & ~INUSE)))
00409 
00410 /* place size at front and back of chunk */
00411 
00412 #define set_size(P, Sz)                                                                                                           \
00413 {                                                                                                                                                         \
00414   size_t Sss = (Sz);                                                                                                              \
00415   (P)->size = *((size_t*)((char*)(P) + Sss - SIZE_SZ)) = Sss;                             \
00416 }                                                                                                                                                         \
00417 
00418 
00419 /* conversion from malloc headers to user pointers, and back */
00420 
00421 #define chunk2mem(p)   ((Void_t*)((char*)(p) + SIZE_SZ))
00422 #define mem2chunk(mem) ((mchunkptr)((char*)(mem) - SIZE_SZ))
00423 
00424 
00425 
00426 
00427 /* BINS */
00428 
00429 struct malloc_bin
00430 {
00431   struct malloc_chunk dhd;   /* dirty list header */
00432   struct malloc_chunk chd;   /* clean list header */
00433 };
00434 
00435 typedef struct malloc_bin* mbinptr;
00436 
00437 
00438 /* field-extraction macros */
00439 
00440 #define clean_head(b)  (&((b)->chd))
00441 #define first_clean(b) ((b)->chd.fd)
00442 #define last_clean(b)  ((b)->chd.bk)
00443 
00444 #define dirty_head(b)  (&((b)->dhd))
00445 #define first_dirty(b) ((b)->dhd.fd)
00446 #define last_dirty(b)  ((b)->dhd.bk)
00447 
00448 
00449 
00450 
00451 /* The bins, initialized to have null double linked lists */
00452 
00453 #define NBINS     128             /* for 32 bit addresses */
00454 #define LASTBIN   (&(av[NBINS-1]))
00455 #define FIRSTBIN  (&(av[2]))      /* 1st 2 bins unused but simplify indexing */
00456 
00457 /* Bins < MAX_SMALLBIN_OFFSET are special-cased since they are 8 bytes apart */
00458 
00459 #define MAX_SMALLBIN_OFFSET  18
00460 #define MAX_SMALLBIN_SIZE   144  /* Max size for which small bin rules apply */
00461 
00462 /* Helper macro to initialize bins */
00463 #define IAV(i)\
00464   {{ 0, &(av[i].dhd),  &(av[i].dhd) }, { 0, &(av[i].chd),  &(av[i].chd) }}
00465 
00466 __attribute__ ((section("PERSISTENT"))) uint64_t           dlmalloc_pheap = 0x0;
00467 __attribute__ ((section("PERSISTENT"))) size_t             sbrk_limit = 0; 
00468 __attribute__ ((section("PERSISTENT"))) int                init_done = 0; 
00469 __attribute__ ((section("PERSISTENT"))) struct malloc_bin  *av;
00470 
00471 /* The end of memory returned from previous sbrk call */
00472 __attribute__ ((section("PERSISTENT"))) size_t* last_sbrk_end = 0; 
00473 __attribute__ ((section("PERSISTENT"))) size_t sbrked_mem = 0; /* Keep track of total mem for malloc_stats */
00474 
00475 /* Keep track of the maximum actually used clean bin, to make loops faster */
00476 /* (Not worth it to do the same for dirty ones) */
00477 __attribute__ ((section("PERSISTENT"))) mbinptr maxClean;
00478 
00479 
00480 TM_CALLABLE static 
00481 void *
00482 persistent_sbrk(size_t size) 
00483 {
00484         sbrk_limit += size;
00485         return (void *) (dlmalloc_pheap + sbrk_limit);
00486 }       
00487 
00488 TM_PURE static inline
00489 size_t
00490 init_bin_lists(pcm_storeset_t *set, uintptr_t av_base)
00491 {
00492         int i;
00493 
00494         av = (struct malloc_bin *) av_base;
00495         for (i=0; i<NBINS; i++) {
00496 #if 1   
00497                 PCM_NT_STORE(set, (volatile pcm_word_t *) &av[i].dhd.size, (pcm_word_t) 0);
00498                 PCM_NT_STORE(set, (volatile pcm_word_t *) &av[i].dhd.fd, (pcm_word_t) &(av[i].dhd));
00499                 PCM_NT_STORE(set, (volatile pcm_word_t *) &av[i].dhd.bk, (pcm_word_t) &(av[i].dhd));
00500                 PCM_NT_STORE(set, (volatile pcm_word_t *) &av[i].chd.size, (pcm_word_t) 0);
00501                 PCM_NT_STORE(set, (volatile pcm_word_t *) &av[i].chd.fd, (pcm_word_t) &(av[i].chd));
00502                 PCM_NT_STORE(set, (volatile pcm_word_t *) &av[i].chd.bk, (pcm_word_t) &(av[i].chd));
00503 #else
00504                 av[i].dhd.size = 0;
00505                 av[i].dhd.fd = &(av[i].dhd);
00506                 av[i].dhd.bk = &(av[i].dhd);
00507                 av[i].chd.size = 0;
00508                 av[i].chd.fd = &(av[i].chd);
00509                 av[i].chd.bk = &(av[i].chd);
00510 #endif
00511         }
00512         return NBINS*sizeof(struct malloc_bin);
00513 }
00514 
00515 TM_PURE static inline
00516 void
00517 init(void)
00518 {
00519         size_t         av_size;
00520         pcm_storeset_t *set;
00521 
00522         if (init_done) {
00523                 return;
00524         }
00525 
00526         set = pcm_storeset_get ();
00527 
00528         if (dlmalloc_pheap == 0x0) {
00529             TM_WAIVER {
00530                         dlmalloc_pheap = (uint64_t) m_pmap((void *) DLMALLOC_PHEAP_BASE, DLMALLOC_PHEAP_SIZE, PROT_READ|PROT_WRITE, 0);
00531                 }       
00532                 if ((void *) dlmalloc_pheap == (void *) -1) {
00533                         TM_WAIVER {
00534                                 abort();
00535                         }       
00536                 }
00537         }
00538 
00539         av_size = init_bin_lists(set, dlmalloc_pheap);
00540         maxClean = FIRSTBIN;
00541 
00542         /* Advance the sbrk_limit by av_size + MINSIZE to ensure no memory is 
00543          * allocated over the av table.
00544          */
00545         persistent_sbrk(av_size + 2*MINSIZE);
00546         last_sbrk_end = (size_t *) (av_size + dlmalloc_pheap + MINSIZE);
00547         *last_sbrk_end = SIZE_SZ | INUSE;
00548         PCM_NT_FLUSH(set);
00549         init_done = 1;
00550 }
00551 
00552 
00553 
00554 /* 
00555   Auxiliary lists 
00556 
00557   Even though they use bk/fd ptrs, neither of these are doubly linked!
00558   They are null-terminated since only the first is ever accessed.
00559   returned_list is just singly linked for speed in free().
00560   last_remainder currently has length of at most one.
00561 
00562 */
00563 
00564 __attribute__ ((section("PERSISTENT"))) mchunkptr returned_list = 0;  /* List of (unbinned) returned chunks */
00565 __attribute__ ((section("PERSISTENT"))) mchunkptr last_remainder = 0; /* last remaindered chunk from malloc */
00566 
00567 /* 
00568   Indexing into bins 
00569   
00570   Funny-looking mechanics: 
00571     For small bins, the index is just size/8.
00572     For others, first find index corresponding to the power of 2
00573         closest to size, using a variant of standard base-2 log
00574         calculation that starts with the first non-small index and
00575         adjusts the size so that zero corresponds with it. On each
00576         iteration, the index is incremented across the four quadrants
00577         per power of two. (This loop runs a max of 27 iterations;
00578         usually much less.) After the loop, the remainder is quartered
00579         to find quadrant. The offsets for loop termination and
00580         quartering allow bins to start, not end at powers.
00581 */
00582 
00583 
00584 #define findbin(Sizefb, Bfb)                                                                                              \
00585 {                                                                                                                                                         \
00586   size_t Sfb = (Sizefb);                                                                                                          \
00587   if (Sfb < MAX_SMALLBIN_SIZE)                                                                                            \
00588     (Bfb) = (av + (Sfb >> 3));                                                                                            \
00589   else                                                                                                                                            \
00590   {                                                                                                                                                       \
00591     /* Offset wrt small bins */                                                                                           \
00592     size_t Ifb = MAX_SMALLBIN_OFFSET;                                                                             \
00593     Sfb >>= 3;                                                                                                                            \
00594         /* find power of 2 */                                                                                                     \
00595     while (Sfb >= (MINSIZE * 2)) { Ifb += 4; Sfb >>= 1; }                                         \
00596         /* adjust for quadrant */                                                                                                 \
00597     Ifb += (Sfb - MINSIZE) >> 2;                                                          \
00598     (Bfb) = av + Ifb;                                                                                                             \
00599   }                                                                                                                                                       \
00600 }                                                                                                                                                         \
00601 
00602 
00603 
00604 
00605 #define reset_maxClean                                                                                                            \
00606 {                                                                                                                                                         \
00607   while (maxClean > FIRSTBIN && clean_head(maxClean)==last_clean(maxClean))       \
00608     --maxClean;                                                                                                                           \
00609 }                                                                                                                                                         \
00610 
00611 
00612 /* Macros for linking and unlinking chunks */
00613 
00614 /* take a chunk off a list */
00615 
00616 #define unlink(Qul)                                                                                                                       \
00617 {                                                                                                                                                         \
00618   mchunkptr Bul = (Qul)->bk;                                                                                              \
00619   mchunkptr Ful = (Qul)->fd;                                                                                              \
00620   Ful->bk = Bul;  Bul->fd = Ful;                                                                                          \
00621 }                                                                                                                                                         \
00622 
00623 
00624 /* place a chunk on the dirty list of its bin */
00625 
00626 #define dirtylink(Qdl)                                                                                                            \
00627 {                                                                                                                                                         \
00628   mchunkptr Pdl = (Qdl);                                                                                                          \
00629   mbinptr   Bndl;                                                                                                                         \
00630   mchunkptr Hdl, Fdl;                                                                                                             \
00631                                                                                                                                                           \
00632   findbin(Pdl->size, Bndl);                                                                                                       \
00633   Hdl  = dirty_head(Bndl);                                                                                                        \
00634   Fdl  = Hdl->fd;                                                                                                                         \
00635                                                                                                                                                           \
00636   Pdl->bk = Hdl;  Pdl->fd = Fdl;  Fdl->bk = Hdl->fd = Pdl;                                        \
00637 }                                                                                                                                                         \
00638 
00639 
00640 
00641 /* Place a consolidated chunk on a clean list */
00642 
00643 #define cleanlink(Qcl)                                                                                                            \
00644 {                                                                                                                                                         \
00645   mchunkptr Pcl = (Qcl);                                                                                                          \
00646   mbinptr Bcl;                                                                                                                            \
00647   mchunkptr Hcl, Fcl;                                                                                                             \
00648                                                                                                                                                           \
00649   findbin(Qcl->size, Bcl);                                                                                                        \
00650   Hcl  = clean_head(Bcl);                                                                                                         \
00651   Fcl  = Hcl->fd;                                                                                                                         \
00652   if (Hcl == Fcl && Bcl > maxClean) maxClean = Bcl;                                                       \
00653                                                                                                                                                           \
00654   Pcl->bk = Hcl;  Pcl->fd = Fcl;  Fcl->bk = Hcl->fd = Pcl;                                        \
00655 }                                                                                                                                                         \
00656 
00657 
00658 
00659 /* consolidate one chunk */
00660 
00661 #define consolidate(Qc)                                                                                                           \
00662 {                                                                                                                                                         \
00663   for (;;)                                                                                                                                        \
00664   {                                                                                                                                                       \
00665     mchunkptr Pc = prev_chunk(Qc);                                                                                        \
00666     if (!inuse(Pc))                                                                                                                       \
00667     {                                                                                                                                             \
00668       unlink(Pc);                                                                                                                         \
00669       set_size(Pc, Pc->size + (Qc)->size);                                                                        \
00670       (Qc) = Pc;                                                                                                                          \
00671     }                                                                                                                                             \
00672     else break;                                                                                                                           \
00673   }                                                                                                                                                       \
00674   for (;;)                                                                                                                                        \
00675   {                                                                                                                                                       \
00676     mchunkptr Nc = next_chunk(Qc);                                                                                        \
00677     if (!inuse(Nc))                                                                                                                       \
00678     {                                                                                                                                             \
00679       unlink(Nc);                                                                                                                         \
00680       set_size((Qc), (Qc)->size + Nc->size);                                                              \
00681     }                                                                                                                                             \
00682     else break;                                                                                                                           \
00683   }                                                                                                                                                       \
00684 }                                                                                                                                                         \
00685 
00686 
00687 
00688 /* Place the held remainder in its bin */
00689 /* This MUST be invoked prior to ANY consolidation */
00690 
00691 #define clear_last_remainder                                                                                              \
00692 {                                                                                                                                                         \
00693   if (last_remainder != 0)                                                                                                        \
00694   {                                                                                                                                                       \
00695     cleanlink(last_remainder);                                                                                            \
00696     last_remainder = 0;                                                                                                           \
00697   }                                                                                                                                                       \
00698 }                                                                                                                                                         \
00699 
00700 
00701 /* Place a freed chunk on the returned_list */
00702 
00703 #define return_chunk(Prc)                                                                                                         \
00704 {                                                                                                                                                         \
00705   (Prc)->fd = returned_list;                                                                                              \
00706   returned_list = (Prc);                                                                                                          \
00707 }                                                                                                                                                         \
00708 
00709 
00710 /* Misc utilities */
00711 
00712 /* A helper for realloc */
00713 
00714 TM_CALLABLE static void free_returned_list()
00715 {
00716   clear_last_remainder;
00717   while (returned_list != 0)
00718   {
00719     mchunkptr p = returned_list;
00720     returned_list = p->fd;
00721     clear_inuse(p);
00722     dirtylink(p);
00723   }
00724 }
00725 
00726 /* Utilities needed below for memalign */
00727 /* Standard greatest common divisor algorithm */
00728 
00729 TM_PURE static size_t gcd(size_t a, size_t b)
00730 {
00731   size_t tmp;
00732   
00733   if (b > a)
00734   {
00735     tmp = a; a = b; b = tmp;
00736   }
00737   for(;;)
00738   {
00739     if (b == 0)
00740       return a;
00741     else if (b == 1)
00742       return b;
00743     else
00744     {
00745       tmp = b;
00746       b = a % b;
00747       a = tmp;
00748     }
00749   }
00750 }
00751 
00752 TM_PURE static size_t  lcm(size_t x, size_t y)
00753 {
00754   return x / gcd(x, y) * y;
00755 }
00756 
00757 
00758 
00759 
00760 
00761 /* Dealing with sbrk */
00762 /* This is one step of malloc; broken out for simplicity */
00763 
00764 TM_CALLABLE static mchunkptr malloc_from_sys(size_t nb)
00765 {
00766   mchunkptr p;            /* Will hold a usable chunk */
00767   size_t*   ip;           /* to traverse sbrk ptr in size_t units */
00768   char*     cp;           /* result of sbrk call */
00769   
00770   /* Find a good size to ask sbrk for.  */
00771   /* Minimally, we need to pad with enough space */
00772   /* to place dummy size/use fields to ends if needed */
00773 
00774   size_t sbrk_size = ((nb + SBRK_UNIT - 1 + SIZE_SZ + SIZE_SZ) 
00775                        / SBRK_UNIT) * SBRK_UNIT;
00776 
00777   cp = (char*)(persistent_sbrk(sbrk_size));
00778   if (cp == (char*)(-1)) /* sbrk returns -1 on failure */
00779     return 0;
00780 
00781   ip = (size_t*)cp;
00782 
00783   sbrked_mem += sbrk_size;
00784 
00785   if (last_sbrk_end != &ip[-1]) /* Is this chunk continguous with last? */
00786   {                             
00787     /* It's either first time through or someone else called sbrk. */
00788     /* Arrange end-markers at front & back */
00789 
00790     /* Shouldn't be necessary, but better to be safe */
00791     while (!aligned_OK(ip)) { ++ip; sbrk_size -= SIZE_SZ; }
00792 
00793     /* Mark the front as in use to prevent merging. (End done below.) */
00794     /* Note we can get away with only 1 word, not MINSIZE overhead here */
00795 
00796     *ip++ = SIZE_SZ | INUSE;
00797     
00798     p = (mchunkptr)ip;
00799     set_size(p,sbrk_size - (SIZE_SZ + SIZE_SZ)); 
00800     
00801   }
00802   else 
00803   {
00804     mchunkptr l;  
00805 
00806     /* We can safely make the header start at end of prev sbrked chunk. */
00807     /* We will still have space left at the end from a previous call */
00808     /* to place the end marker, below */
00809 
00810     p = (mchunkptr)(last_sbrk_end);
00811     set_size(p, sbrk_size);
00812 
00813     /* Even better, maybe we can merge with last fragment: */
00814 
00815     l = prev_chunk(p);
00816     if (!inuse(l))  
00817     {
00818       unlink(l);
00819       set_size(l, p->size + l->size);
00820       p = l;
00821     }
00822 
00823   }
00824 
00825   /* mark the end of sbrked space as in use to prevent merging */
00826 
00827   last_sbrk_end = (size_t*)((char*)p + p->size);
00828   *last_sbrk_end = SIZE_SZ | INUSE;
00829 
00830   return p;
00831 }
00832 
00833 
00834 /* Consolidate dirty chunks until create one big enough for current req. */
00835 /* Call malloc_from_sys if can't create one. */
00836 /* This is just one phase of malloc, but broken out for sanity */
00837 
00838 TM_CALLABLE static mchunkptr malloc_find_space(size_t nb)
00839 {
00840   /* Circularly traverse bins so as not to pick on any one too much */
00841   static int rover_init = 0;
00842   static mbinptr rover;        /* Circular roving ptr */
00843 
00844   if (rover_init == 0) {
00845                 rover = LASTBIN;
00846                 rover_init = 1;
00847   }
00848   mbinptr origin = rover;
00849   mbinptr b      = rover;
00850 
00851   /* Preliminaries.  */
00852   clear_last_remainder;
00853   reset_maxClean;
00854 
00855   do
00856   {
00857     mchunkptr p;
00858 
00859     while ( (p = last_dirty(b)) != dirty_head(b))
00860     {
00861       unlink(p);
00862       consolidate(p);
00863 
00864       if (p->size >= nb)
00865       {
00866         rover = b;
00867         return p;
00868       }
00869       else
00870         cleanlink(p);
00871     }
00872 
00873     b = (b == FIRSTBIN)? LASTBIN : b - 1;      /* circularly sweep down */
00874 
00875   } while (b != origin);
00876 
00877   /* If no return above, chain to the next step of malloc */
00878   return  malloc_from_sys(nb);
00879 }
00880 
00881 
00882 /* Clear out dirty chunks from a bin, along with the free list. */
00883 /* Invoked from malloc when things look too fragmented */
00884 
00885 TM_CALLABLE static void malloc_clean_bin(mbinptr bin)
00886 {
00887   mchunkptr p;
00888 
00889   clear_last_remainder;
00890   
00891   while ( (p = last_dirty(bin)) != dirty_head(bin))
00892   {
00893     unlink(p);
00894     consolidate(p);
00895     cleanlink(p);
00896   }
00897 
00898   while (returned_list != 0)
00899   {
00900     p = returned_list;
00901     returned_list = p->fd;
00902     clear_inuse(p);
00903     consolidate(p);
00904     cleanlink(p);
00905   }
00906 }
00907 
00908 
00909 /*   Finally, the user-level functions  */
00910 
00911 TM_CALLABLE Void_t* PDL_MALLOC(size_t bytes)
00912 {
00913   static size_t previous_request = 0;  /* To control preallocation */
00914 
00915   size_t    nb = request2size(bytes);  /* padded request size */
00916   mbinptr   bin;                       /* corresponding bin */
00917   mchunkptr victim;                    /* will hold selected chunk */
00918 
00919   init();
00920   /* ----------- Peek at returned_list; hope for luck */
00921 
00922   if ((victim = returned_list) != 0 && 
00923       exact_fit(victim, nb)) /* size check works even though INUSE set */
00924   {
00925     returned_list = victim->fd;
00926     return chunk2mem(victim);
00927   }
00928   
00929   findbin(nb, bin);  /*  Need to know bin for other traversals */
00930 
00931   /* ---------- Scan dirty list of own bin */
00932 
00933      /* Code for small bins special-cased out since */
00934      /* no size check or traversal needed and */
00935      /* clean bins are exact matches so might as well test now */
00936 
00937   if (nb < MAX_SMALLBIN_SIZE)
00938   {
00939     if ( ((victim = first_dirty(bin)) != dirty_head(bin)) ||
00940          ((victim = last_clean(bin))  != clean_head(bin)))
00941     {
00942       unlink(victim);
00943       set_inuse(victim);
00944       return chunk2mem(victim);
00945     }
00946   }
00947   else
00948   {
00949     if ( (victim = first_dirty(bin)) != dirty_head(bin))
00950     {
00951       do
00952       {
00953         if (exact_fit(victim, nb))
00954         {
00955           unlink(victim);
00956           set_inuse(victim);
00957           return chunk2mem(victim);
00958         }
00959         else victim = victim->fd;
00960       } while (victim != dirty_head(bin));
00961       
00962       /* If there were chunks in there but none matched, then */
00963       /* consolidate all chunks in this bin plus those on free list */
00964       /* to prevent further traversals and fragmentation. */
00965       
00966       malloc_clean_bin(bin);
00967     }
00968   }
00969     
00970   /* ------------ Search free list */
00971 
00972   if ( (victim = returned_list) != 0)
00973   {
00974     do
00975     {
00976       mchunkptr next = victim->fd;
00977       if (exact_fit(victim, nb))
00978       {
00979         returned_list = next;
00980         return chunk2mem(victim);
00981       }
00982       else    /* Place unusable chunks in their bins  */
00983       {
00984         clear_inuse(victim);
00985         dirtylink(victim);
00986         victim = next;
00987       }
00988     } while (victim != 0);
00989     returned_list = 0;
00990   }
00991 
00992   /* -------------- Try the remainder from last successful split */
00993 
00994   if ( (victim = last_remainder) != 0 && victim->size >= nb)
00995   {
00996     last_remainder = 0; /* reset for next time */
00997     goto split;
00998   }
00999 
01000   /* -------------- Scan through own clean bin */
01001 
01002       /* (Traversing back-to-front tends to choose `old' */
01003       /*  chunks that could not be further consolidated.) */
01004 
01005   for (victim = last_clean(bin); victim != clean_head(bin); victim=victim->bk)
01006   {
01007     if (victim->size >= nb)
01008     {
01009       unlink(victim); 
01010       goto split;
01011     }
01012   }
01013 
01014   /* -------------- Try all bigger clean bins */
01015 
01016       /* (Scanning upwards is slower but prevents fragmenting big */
01017       /* chunks that we might need later. If there aren't any smaller */
01018       /* ones, most likely we got a big one from last_remainder anyway.) */
01019 
01020   {
01021     mbinptr b;
01022 
01023     for (b = bin + 1; b <= maxClean; ++b)
01024     {
01025       if ( (victim = last_clean(b)) != clean_head(b) ) 
01026       {
01027         unlink(victim);
01028         goto split;
01029       }
01030     }
01031   }
01032 
01033   /* -------------  Consolidate or sbrk */
01034 
01035   victim =  malloc_find_space(nb);
01036 
01037   if (victim == 0)  /* propagate failure */
01038     return 0; 
01039 
01040   /* -------------- Possibly split victim chunk */
01041 
01042  split:  
01043   {
01044     size_t room = victim->size - nb;
01045     if (room >= MINSIZE)     
01046     {
01047       mchunkptr v = victim;  /* Hold so can break up in prealloc */
01048       
01049       set_size(victim, nb);  /* Adjust size of chunk to be returned */
01050       
01051       /* ---------- Preallocate */
01052       
01053           /* Try to preallocate some more of this size if */
01054           /* last (split) req was of same size */
01055       
01056       if (previous_request == nb)
01057       {
01058         int i;
01059         
01060         for (i = 0; i < MAX_PREALLOCS && room >= nb + MINSIZE; ++i)
01061         {
01062           room -= nb;
01063            
01064           v = (mchunkptr)((char*)(v) + nb); 
01065           set_size(v, nb);
01066           set_inuse(v);     /* free-list chunks must have inuse set */
01067           return_chunk(v);  /* add to free list */
01068         } 
01069       }
01070 
01071       previous_request = nb;  /* record for next time */
01072 
01073       /* ---------- Create remainder chunk  */
01074       
01075       /* Get rid of the old one first */
01076       if (last_remainder != 0) cleanlink(last_remainder);
01077       
01078       last_remainder = (mchunkptr)((char*)(v) + nb);
01079       set_size(last_remainder, room);
01080     }
01081 
01082     set_inuse(victim);
01083     return chunk2mem(victim);
01084   }
01085 }
01086 
01087 
01088 TM_CALLABLE void PDL_FREE(Void_t* mem)
01089 {
01090   if (mem != 0)
01091   {
01092     mchunkptr p = mem2chunk(mem);
01093     return_chunk(p);
01094   }
01095 }
01096 
01097  
01098 TM_CALLABLE Void_t* PDL_REALLOC(Void_t* mem, size_t bytes)
01099 {
01100   if (mem == 0) {
01101     return PDL_MALLOC(bytes);
01102   }     
01103   else
01104   {
01105     size_t       nb      = request2size(bytes);
01106     mchunkptr    p       = mem2chunk(mem);
01107     size_t       oldsize;
01108     long         room;
01109     mchunkptr    nxt;
01110 
01111     if (p == returned_list) /* support realloc-last-freed-chunk idiocy */
01112        returned_list = returned_list->fd;
01113 
01114     clear_inuse(p);
01115     oldsize = p->size;
01116 
01117     /* try to expand (even if already big enough), to clean up chunk */
01118 
01119     free_returned_list(); /* make freed chunks available to consolidate */
01120 
01121     while (!inuse(nxt = next_chunk(p))) /* Expand the chunk forward */
01122     {
01123       unlink(nxt);
01124       set_size(p, p->size + nxt->size);
01125     }
01126 
01127     room = p->size - nb;
01128     if (room >= 0)          /* Successful expansion */
01129     {
01130       if (room >= MINSIZE)  /* give some back if possible */
01131       {
01132         mchunkptr remainder = (mchunkptr)((char*)(p) + nb);
01133         set_size(remainder, room);
01134         cleanlink(remainder);
01135         set_size(p, nb);
01136       }
01137       set_inuse(p);
01138       return chunk2mem(p);
01139     }
01140     else /* Could not expand. Get another chunk and copy. */
01141     {
01142       Void_t* newmem;
01143       size_t count;
01144       size_t* src;
01145       size_t* dst;
01146 
01147       set_inuse(p);    /* don't let malloc consolidate us yet! */
01148       newmem = PDL_MALLOC(nb);
01149 
01150       if (newmem != 0) {
01151         /* Copy -- we know that alignment is at least `size_t' */
01152         src = (size_t*) mem;
01153         dst = (size_t*) newmem;
01154         count = (oldsize - SIZE_SZ) / sizeof(size_t);
01155         while (count-- > 0) *dst++ = *src++;
01156       }
01157 
01158       PDL_FREE(mem);
01159       return newmem;
01160     }
01161   }
01162 }
01163 
01164 
01165 /* Return a pointer to space with at least the alignment requested */
01166 /* Alignment argument should be a power of two */
01167 
01168 TM_CALLABLE Void_t* PDL_MEMALIGN(size_t alignment, size_t bytes)
01169 {
01170   mchunkptr p;
01171   size_t    nb = request2size(bytes);
01172   size_t    room;
01173 
01174   /* find an alignment that both we and the user can live with: */
01175   /* least common multiple guarantees mutual happiness */
01176   size_t    align = lcm(alignment, MALLOC_MIN_OVERHEAD);
01177 
01178   /* call malloc with worst case padding to hit alignment; */
01179   /* we will give back extra */
01180 
01181   size_t req = nb + align + MINSIZE;
01182   Void_t*  m = PDL_MALLOC(req);
01183 
01184   if (m == 0) return 0; /* propagate failure */
01185 
01186   p = mem2chunk(m);
01187   clear_inuse(p);
01188 
01189 
01190   if (((size_t)(m) % align) != 0) /* misaligned */
01191   {
01192 
01193     /* find an aligned spot inside chunk */
01194 
01195     mchunkptr ap = (mchunkptr)((((size_t)(m) + align-1) & -align) - SIZE_SZ);
01196 
01197     size_t gap = (size_t)(ap) - (size_t)(p);
01198 
01199     /* we need to give back leading space in a chunk of at least MINSIZE */
01200 
01201     if (gap < MINSIZE)
01202     {
01203       /* This works since align >= MINSIZE */
01204       /* and we've malloc'd enough total room */
01205 
01206       ap = (mchunkptr)( (size_t)(ap) + align );
01207       gap += align;    
01208     }
01209 
01210     room = p->size - gap;
01211 
01212     /* give back leader */
01213     set_size(p, gap);
01214     dirtylink(p); /* Don't really know if clean or dirty; be safe */
01215 
01216     /* use the rest */
01217     p = ap;
01218     set_size(p, room);
01219   }
01220 
01221   /* also give back spare room at the end */
01222 
01223   room = p->size - nb;
01224   if (room >= MINSIZE)
01225   {
01226     mchunkptr remainder = (mchunkptr)((char*)(p) + nb);
01227     set_size(remainder, room);
01228     dirtylink(remainder); /* Don't really know; be safe */
01229     set_size(p, nb);
01230   }
01231 
01232   set_inuse(p);
01233   return chunk2mem(p);
01234 
01235 }
01236 
01237 size_t PDL_OBJSIZE(void *mem)
01238 {
01239   mchunkptr  p  = mem2chunk(mem);
01240   return p->size;
01241 }  
01242 
01243 
01244 
01245 /* Derivatives */
01246 
01247 TM_CALLABLE Void_t* PDL_VALLOC(size_t bytes)
01248 {
01249   /* Cache result of getpagesize */
01250   static size_t malloc_pagesize = 0;
01251 
01252   if (malloc_pagesize == 0) malloc_pagesize = malloc_getpagesize;
01253   return PDL_MEMALIGN (malloc_pagesize, bytes);
01254 }
01255 
01256 
01257 TM_CALLABLE Void_t* PDL_CALLOC(size_t n, size_t elem_size)
01258 {
01259   size_t sz = n * elem_size;
01260   Void_t* p = PDL_MALLOC(sz);
01261   char* q = (char*) p;
01262   while (sz-- > 0) *q++ = 0;
01263   return p;
01264 }
01265 
01266 TM_CALLABLE void PDL_CFREE(Void_t *mem)
01267 {
01268   PDL_FREE(mem);
01269 }
01270 
01271 TM_CALLABLE size_t PDL_GET_USABLE_SIZE(Void_t* mem)
01272 {
01273   if (mem == 0)
01274     return 0;
01275   else
01276   {
01277     mchunkptr p = (mchunkptr)((char*)(mem) - SIZE_SZ); 
01278     size_t sz = p->size & ~(INUSE);
01279     /* report zero if not in use or detectably corrupt */
01280     if (p->size == sz || sz != *((size_t*)((char*)(p) + sz - SIZE_SZ)))
01281       return 0;
01282     else
01283       return sz - MALLOC_MIN_OVERHEAD;
01284   }
01285 }
01286     
01287 TM_CALLABLE void PDL_MALLOC_STATS()
01288 {
01289 
01290   /* Traverse through and count all sizes of all chunks */
01291 
01292   size_t avail = 0;
01293   size_t malloced_mem;
01294 
01295   mbinptr b;
01296 
01297   free_returned_list();
01298 
01299   for (b = FIRSTBIN; b <= LASTBIN; ++b)
01300   {
01301     mchunkptr p;
01302 
01303     for (p = first_dirty(b); p != dirty_head(b); p = p->fd)
01304       avail += p->size;
01305 
01306     for (p = first_clean(b); p != clean_head(b); p = p->fd)
01307       avail += p->size;
01308   }
01309 
01310   malloced_mem = sbrked_mem - avail;
01311 
01312   TM_WAIVER {
01313     fprintf(stderr, "total mem = %10u\n", (unsigned int) sbrked_mem);
01314     fprintf(stderr, "in use    = %10u\n", (unsigned int) malloced_mem);
01315   }     
01316 
01317 }

Generated on Sat Apr 23 11:43:36 2011 for Mnemosyne by  doxygen 1.4.7