usermode/library/malloc-original/benchmarks/larson/driver_mt.cpp

00001 #ifdef WIN32
00002 #define __WIN32__
00003 #endif
00004 
00005 #ifdef __WIN32__
00006 #include  <windows.h>
00007 #include  <conio.h>
00008 #include  <process.h>
00009 #else
00010 #include <unistd.h>
00011 #include <sys/resource.h>
00012 #include <sys/time.h>
00013 extern "C" int pthread_setconcurrency (int);
00014 typedef void * LPVOID;
00015 typedef long long LONGLONG;
00016 typedef long DWORD;
00017 typedef long LONG;
00018 typedef unsigned long ULONG;
00019 typedef union _LARGE_INTEGER {
00020   struct {
00021     DWORD LowPart;
00022     LONG  HighPart;
00023   } foo;
00024   LONGLONG QuadPart;    // In Visual C++, a typedef to _ _int64} LARGE_INTEGER;
00025 } LARGE_INTEGER;
00026 typedef long long _int64;
00027 #ifndef TRUE
00028 enum { TRUE = 1, FALSE = 0 };
00029 #endif
00030 #include <assert.h>
00031 #define _ASSERTE(x) assert(x)
00032 #define _inline inline
00033 void Sleep (long x) 
00034 {
00035   //  printf ("sleeping for %ld seconds.\n", x/1000);
00036   sleep(x/1000);
00037 }
00038 
00039 void QueryPerformanceCounter (long * x)
00040 {
00041   struct timezone tz;
00042   struct timeval tv;
00043   gettimeofday (&tv, &tz);
00044   *x = tv.tv_sec * 1000000L + tv.tv_usec;
00045 }
00046 
00047 void QueryPerformanceFrequency(long * x)
00048 {
00049   *x = 1000000L;
00050 }
00051 
00052 
00053 #define _REENTRANT 1
00054 #include <pthread.h>
00055 #ifdef __sun
00056 #include <thread.h>
00057 #endif
00058 typedef void * VoidFunction (void *);
00059 void _beginthread (VoidFunction x, int, void * z)
00060 {
00061   pthread_t pt;
00062   pthread_attr_t pa;
00063   pthread_attr_init (&pa);
00064 
00065 #if 0 // __SVR4
00066   pthread_attr_setscope (&pa, PTHREAD_SCOPE_SYSTEM); /* bound behavior */
00067 #endif
00068 
00069   //  printf ("creating a thread.\n");
00070   pthread_create(&pt, &pa, x, z);
00071 }
00072 #endif
00073 
00074 
00075 #include  <stdio.h>
00076 #include  <stdlib.h>
00077 #include  <stddef.h>
00078 #include  <string.h>
00079 #include  <ctype.h>
00080 #include  <time.h>
00081 #include  <assert.h>
00082 
00083 #define CPP
00084 //#include "arch-specific.h"
00085 
00086 #if USE_ROCKALL
00087 //#include "FastHeap.hpp"
00088 //FAST_HEAP theFastHeap (1024 * 1024, true, true, true);
00089 
00090 typedef int SBIT32;
00091 
00092 #include "SmpHeap.hpp"
00093 SMP_HEAP theFastHeap (1024 * 1024, true, true, true);
00094 
00095 void * operator new( unsigned int cb )
00096 {
00097   void *pRet = theFastHeap.New ((size_t)cb) ;
00098   return pRet;
00099 }
00100 
00101 void operator delete(void *pUserData )
00102 {
00103   theFastHeap.Delete (pUserData) ;
00104 }
00105 #endif
00106 
00107 #if 0
00108 extern "C" void * hdmalloc (size_t sz) ;
00109 extern "C" void hdfree (void * ptr) ;
00110 extern "C" void hdmalloc_stats (void) ;
00111 void * operator new( unsigned int cb )
00112 {
00113   void *pRet = hdmalloc((size_t)cb) ;
00114   return pRet;
00115 }
00116 
00117 void operator delete(void *pUserData )
00118 {
00119   hdfree(pUserData) ;
00120 }
00121 #endif
00122 
00123 
00124 
00125 /* Test driver for memory allocators           */
00126 /* Author: Paul Larson, palarson@microsoft.com */
00127 #define MAX_THREADS     100
00128 #define MAX_BLOCKS  1000000
00129 
00130 int volatile  stopflag=FALSE ;       
00131 
00132 struct lran2_st {
00133   long x, y, v[97];
00134 };
00135 
00136 int     TotalAllocs=0 ;
00137 
00138 typedef struct thr_data {
00139 
00140   int    threadno ;
00141   int    NumBlocks ;
00142   int    seed ;
00143 
00144   int    min_size ;
00145   int    max_size ;
00146 
00147   LPVOID *array ;
00148   int    *blksize ;
00149   int     asize ;
00150 
00151   int    cAllocs ;
00152   int    cFrees ;
00153   int    cThreads ;
00154   int    cBytesAlloced ;
00155 
00156   volatile int finished ;
00157   struct lran2_st rgen ;
00158 
00159 } thread_data;
00160 
00161 void runthreads(long sleep_cnt, int min_threads, int max_threads, 
00162                 int chperthread, int num_rounds) ;
00163 void runloops(long sleep_cnt, int num_chunks ) ;
00164 static void warmup(LPVOID *blkp, int num_chunks );
00165 static void * exercise_heap( void *pinput) ;
00166 static void lran2_init(struct lran2_st* d, long seed) ;
00167 static long lran2(struct lran2_st* d) ;
00168 ULONG CountReservedSpace() ;
00169  
00170 LPVOID          blkp[MAX_BLOCKS] ;
00171 int             blksize[MAX_BLOCKS] ;
00172 long            seqlock=0 ;
00173 struct lran2_st rgen ;
00174 int             min_size=10, max_size=500 ;
00175 int             num_threads ;
00176 ULONG           init_space ;
00177 
00178 extern  int   cLockSleeps ;
00179 extern  int   cAllocedChunks ;
00180 extern  int   cAllocedSpace ;
00181 extern  int   cUsedSpace ;
00182 extern  int   cFreeChunks ;
00183 extern  int   cFreeSpace ;
00184 
00185 int cChecked=0 ;
00186 
00187 
00188 
00189 int main()
00190 {
00191      //ReferenceLibHoard();
00192 #ifdef _MT
00193   int          min_threads, max_threads ;
00194   int          num_rounds ;
00195   int          chperthread ;
00196 #endif
00197   unsigned     seed=12345 ;
00198   int          num_chunks=10000;
00199   long sleep_cnt;
00200 
00201 #ifdef _MT
00202   printf( "\nMulti-threaded test driver \n") ;
00203 #else
00204   printf( "\nSingle-threaded test driver \n") ;
00205 #endif
00206 #ifdef CPP
00207   printf("C++ version (new and delete)\n") ;
00208 #else
00209   printf("C version (malloc and free)\n") ;
00210 #endif
00211   printf("runtime (sec): ") ;
00212   scanf ("%d", &sleep_cnt);
00213   printf("chunk size (min,max): ") ;
00214   scanf("%d %d", &min_size, &max_size ) ;
00215 #ifdef _MT
00216   printf("threads (min, max):   ") ; 
00217   scanf("%d %d", &min_threads, &max_threads) ;
00218 #ifndef __WIN32__
00219 #ifdef __SVR4
00220   pthread_setconcurrency (max_threads);
00221 #endif
00222 #endif
00223   printf("chunks/thread:  ") ; scanf("%d", &chperthread ) ;
00224   printf("no of rounds:   ") ; scanf("%d", &num_rounds ) ;
00225   num_chunks = max_threads*chperthread ;
00226 #else 
00227   printf("no of chunks:  ") ; scanf("%d", &num_chunks ) ;
00228 #endif
00229   printf("random seed:    ") ; scanf("%d", &seed) ;
00230 
00231   if( num_chunks > MAX_BLOCKS ){
00232     printf("Max %d chunks - exiting\n", MAX_BLOCKS ) ;
00233     return(1) ;
00234   }
00235 
00236   lran2_init(&rgen, seed) ;
00237   // init_space = CountReservedSpace() ;
00238 
00239 #ifdef _MT
00240   runthreads(sleep_cnt, min_threads, max_threads, chperthread, num_rounds) ;
00241 #else
00242   runloops(sleep_cnt, num_chunks ) ;
00243 #endif
00244 
00245 #ifdef _DEBUG
00246   _cputs("Hit any key to exit...") ;    (void)_getch() ;
00247 #endif
00248 
00249   return(0) ;
00250 
00251 } /* main */
00252 
00253 void runloops(long sleep_cnt, int num_chunks )
00254 {
00255   int     cblks ;
00256   int     victim ;
00257   int     blk_size ;
00258 #ifdef __WIN32__
00259         _LARGE_INTEGER ticks_per_sec, start_cnt, end_cnt;
00260 #else
00261   long ticks_per_sec ;
00262   long start_cnt, end_cnt ;
00263 #endif
00264   _int64        ticks ;
00265   double        duration ;
00266   double        reqd_space ;
00267   ULONG         used_space ;
00268   int           sum_allocs=0 ;
00269 
00270   QueryPerformanceFrequency( &ticks_per_sec ) ;
00271   QueryPerformanceCounter( &start_cnt) ;
00272 
00273   for( cblks=0; cblks<num_chunks; cblks++){
00274     blk_size = min_size+lran2(&rgen)%(max_size - min_size) ;
00275 #ifdef CPP
00276     blkp[cblks] = new char[blk_size] ;
00277 #else
00278     blkp[cblks] = malloc(blk_size) ;
00279 #endif
00280     blksize[cblks] = blk_size ;
00281     assert(blkp[cblks] != NULL) ;
00282   }
00283 
00284   while(TRUE){
00285     for( cblks=0; cblks<num_chunks; cblks++){
00286       victim = lran2(&rgen)%num_chunks ;
00287 #ifdef CPP
00288       delete blkp[victim] ;
00289 #else
00290       free(blkp[victim]) ;
00291 #endif
00292 
00293       blk_size = min_size+lran2(&rgen)%(max_size - min_size) ;
00294 #ifdef CPP
00295       blkp[victim] = new char[blk_size] ;
00296 #else
00297       blkp[victim] = malloc(blk_size) ;
00298 #endif
00299       blksize[victim] = blk_size ;
00300       assert(blkp[victim] != NULL) ;
00301     }
00302     sum_allocs += num_chunks ;
00303 
00304     QueryPerformanceCounter( &end_cnt) ;
00305 #ifdef __WIN32__
00306                 ticks = end_cnt.QuadPart - start_cnt.QuadPart ;
00307                 duration = (double)ticks/ticks_per_sec.QuadPart ;
00308 #else
00309     ticks = end_cnt - start_cnt ;
00310     duration = (double)ticks/ticks_per_sec ;
00311 #endif
00312 
00313     if( duration >= sleep_cnt) break ;
00314   }
00315   reqd_space = (0.5*(min_size+max_size)*num_chunks) ;
00316   // used_space = CountReservedSpace() - init_space;
00317 
00318   printf("%6.3f", duration  ) ;
00319   printf("%8.0f", sum_allocs/duration ) ;
00320   printf(" %6.3f %.3f", (double)used_space/(1024*1024), used_space/reqd_space) ;
00321   printf("\n") ;
00322 
00323 }
00324 
00325 
00326 #ifdef _MT
00327 void runthreads(long sleep_cnt, int min_threads, int max_threads, int chperthread, int num_rounds)
00328 {
00329   thread_data  de_area[MAX_THREADS] ;
00330   thread_data *pdea;
00331   int           nperthread ;
00332   int           sum_threads ;
00333   int           sum_allocs ;
00334   int           sum_frees ;
00335   double        duration ;
00336 #ifdef __WIN32__
00337         _LARGE_INTEGER ticks_per_sec, start_cnt, end_cnt;
00338 #else
00339         long ticks_per_sec ;
00340   long start_cnt, end_cnt ;
00341 #endif
00342         _int64        ticks ;
00343   double        rate_1=0, rate_n ;
00344   double        reqd_space ;
00345   ULONG         used_space ;
00346   int           prevthreads ;
00347   int           i ;
00348 
00349   QueryPerformanceFrequency( &ticks_per_sec ) ;
00350 
00351   pdea = &de_area[0] ;
00352   memset(&de_area[0], 0, sizeof(thread_data)) ;
00353 
00354   prevthreads = 0 ;
00355   for(num_threads=min_threads; num_threads <= max_threads; num_threads++ )
00356     {
00357 
00358       warmup(&blkp[prevthreads*chperthread], (num_threads-prevthreads)*chperthread );
00359 
00360       nperthread = chperthread ;
00361       stopflag   = FALSE ;
00362                 
00363       for(i=0; i< num_threads; i++){
00364         de_area[i].threadno    = i+1 ;
00365         de_area[i].NumBlocks   = num_rounds*nperthread;
00366         de_area[i].array       = &blkp[i*nperthread] ;
00367         de_area[i].blksize     = &blksize[i*nperthread] ;
00368         de_area[i].asize       = nperthread ;
00369         de_area[i].min_size    = min_size ;
00370         de_area[i].max_size    = max_size ;
00371         de_area[i].seed        = lran2(&rgen) ; ;
00372         de_area[i].finished    = 0 ;
00373         de_area[i].cAllocs     = 0 ;
00374         de_area[i].cFrees      = 0 ;
00375         de_area[i].cThreads    = 0 ;
00376         de_area[i].finished    = FALSE ;
00377         lran2_init(&de_area[i].rgen, de_area[i].seed) ;
00378 
00379 #ifdef __WIN32__
00380         _beginthread((void (__cdecl*)(void *)) exercise_heap, 0, &de_area[i]) ;  
00381 #else
00382         _beginthread(exercise_heap, 0, &de_area[i]) ;  
00383 #endif
00384 
00385         }
00386 
00387       QueryPerformanceCounter( &start_cnt) ;
00388 
00389       printf ("Sleeping for %d seconds.\n", sleep_cnt);
00390       Sleep(sleep_cnt * 1000L) ;
00391       // printf ("Time to die!\n");
00392 
00393       stopflag = TRUE ;
00394 
00395       for(i=0; i<num_threads; i++){
00396         while( !de_area[i].finished ){
00397 #ifdef __WIN32__
00398                 Sleep(1);
00399 #else
00400                 sched_yield();
00401 #endif
00402         }
00403       }
00404 
00405 
00406       QueryPerformanceCounter( &end_cnt) ;
00407 
00408       sum_frees = sum_allocs =0  ;
00409       sum_threads = 0 ;
00410       for(i=0;i< num_threads; i++){
00411         sum_allocs    += de_area[i].cAllocs ;
00412         sum_frees     += de_area[i].cFrees ;
00413         sum_threads   += de_area[i].cThreads ;
00414         de_area[i].cAllocs = de_area[i].cFrees = 0;
00415       }
00416 
00417  
00418 #ifdef __WIN32__
00419       ticks = end_cnt.QuadPart - start_cnt.QuadPart ;
00420      duration = (double)ticks/ticks_per_sec.QuadPart ;
00421 #else
00422       ticks = end_cnt - start_cnt ;
00423      duration = (double)ticks/ticks_per_sec ;
00424 #endif
00425 
00426       for( i=0; i<num_threads; i++){
00427         if( !de_area[i].finished )
00428           printf("Thread at %d not finished\n", i) ;
00429       }
00430 
00431 
00432       rate_n = sum_allocs/duration ;
00433       if( rate_1 == 0){
00434         rate_1 = rate_n ;
00435       }
00436                 
00437       reqd_space = (0.5*(min_size+max_size)*num_threads*chperthread) ;
00438       // used_space = CountReservedSpace() - init_space;
00439 
00440       printf("%2d ", num_threads ) ;
00441       printf("%6.3f", duration  ) ;
00442       printf("%6.3f", rate_n/rate_1 ) ;
00443       printf("%8.0f", sum_allocs/duration ) ;
00444       printf(" %6.3f %.3f", (double)used_space/(1024*1024), used_space/reqd_space) ;
00445       printf("\n") ;
00446 
00447       Sleep(5000L) ; // wait 5 sec for old threads to die
00448 
00449       prevthreads = num_threads ;
00450     }
00451 }
00452 
00453 
00454 static void * exercise_heap( void *pinput)
00455 {
00456   thread_data  *pdea;
00457   int           cblks=0 ;
00458   int           victim ;
00459   long          blk_size ;
00460   int           range ;
00461 
00462   if( stopflag ) return 0;
00463 
00464   pdea = (thread_data *)pinput ;
00465   pdea->finished = FALSE ;
00466   pdea->cThreads++ ;
00467   range = pdea->max_size - pdea->min_size ;
00468 
00469   /* allocate NumBlocks chunks of random size */
00470   for( cblks=0; cblks<pdea->NumBlocks; cblks++){
00471     victim = lran2(&pdea->rgen)%pdea->asize ;
00472 #ifdef CPP
00473     delete pdea->array[victim] ;
00474 #else
00475     free(pdea->array[victim]) ;
00476 #endif
00477     pdea->cFrees++ ;
00478 
00479     blk_size = pdea->min_size+lran2(&pdea->rgen)%range ;
00480 #ifdef CPP
00481     pdea->array[victim] = new char[blk_size] ;
00482 #else
00483     pdea->array[victim] = malloc(blk_size) ;
00484 #endif
00485 
00486     pdea->blksize[victim] = blk_size ;
00487     assert(pdea->array[victim] != NULL) ;
00488 
00489     pdea->cAllocs++ ;
00490 
00491                 /* Write something! */
00492 
00493                 volatile char * chptr = ((char *) pdea->array[victim]);
00494                 *chptr++ = 'a';
00495                 volatile char ch = *((char *) pdea->array[victim]);
00496                 *chptr = 'b';
00497 
00498     
00499                 if( stopflag ) break ;
00500   }
00501 
00502   //    printf("Thread %u terminating: %d allocs, %d frees\n",
00503   //                  pdea->threadno, pdea->cAllocs, pdea->cFrees) ;
00504   pdea->finished = TRUE ;
00505 
00506   if( !stopflag ){
00507 #ifdef __WIN32__
00508         _beginthread((void (__cdecl*)(void *)) exercise_heap, 0, pdea) ;  
00509 #else
00510     _beginthread(exercise_heap, 0, pdea) ;
00511 #endif
00512   }
00513   return 0;
00514 }
00515 
00516 static void warmup(LPVOID *blkp, int num_chunks )
00517 {
00518   int     cblks ;
00519   int     victim ;
00520   int     blk_size ;
00521   LPVOID  tmp ;
00522 
00523 
00524   for( cblks=0; cblks<num_chunks; cblks++){
00525     blk_size = min_size+lran2(&rgen)%(max_size-min_size) ;
00526 #ifdef CPP
00527     blkp[cblks] = new char[blk_size] ;
00528 #else
00529     blkp[cblks] = malloc(blk_size) ;
00530 #endif
00531                 /* Touch! */
00532                 volatile char * p = (char *) blkp[cblks];
00533                 *p = 'a';
00534 
00535     blksize[cblks] = blk_size ;
00536     assert(blkp[cblks] != NULL) ;
00537   }
00538 
00539   /* generate a random permutation of the chunks */
00540   for( cblks=num_chunks; cblks > 0 ; cblks--){
00541     victim = lran2(&rgen)%cblks ;
00542     tmp = blkp[victim] ;
00543     blkp[victim]  = blkp[cblks-1] ;
00544     blkp[cblks-1] = tmp ;
00545   }
00546 
00547   for( cblks=0; cblks<4*num_chunks; cblks++){
00548     victim = lran2(&rgen)%num_chunks ;
00549 #ifdef CPP
00550     delete blkp[victim] ;
00551 #else
00552     free(blkp[victim]) ;
00553 #endif
00554 
00555     blk_size = min_size+lran2(&rgen)%(max_size - min_size) ;
00556 #ifdef CPP
00557     blkp[victim] = new char[blk_size] ;
00558 #else
00559     blkp[victim] = malloc(blk_size) ;
00560 #endif
00561                 /* Touch! */
00562                 volatile char * p = (char *) blkp[victim];
00563                 *p = 'a';
00564                 *p = 'b';
00565     blksize[victim] = blk_size ;
00566     assert(blkp[victim] != NULL) ;
00567   }
00568 }
00569 #endif // _MT
00570 
00571 #ifdef __WIN32__
00572 ULONG CountReservedSpace()
00573 {
00574   MEMORY_BASIC_INFORMATION info;
00575   char                     *addr=NULL ;
00576   ULONG                     size=0 ;
00577 
00578   while( true){
00579     VirtualQuery(addr, &info, sizeof(info));
00580     switch( info.State){
00581     case MEM_FREE:
00582     case MEM_RESERVE:
00583       break ;
00584     case MEM_COMMIT:
00585       size += info.RegionSize ;
00586       break ;
00587     }
00588     addr += info.RegionSize ;
00589     if( addr >= (char *)0x80000000UL ) break ;
00590   }
00591 
00592   return size ;
00593 
00594 }
00595 #endif
00596 
00597 // =======================================================
00598 
00599 /* lran2.h
00600  * by Wolfram Gloger 1996.
00601  *
00602  * A small, portable pseudo-random number generator.
00603  */
00604 
00605 #ifndef _LRAN2_H
00606 #define _LRAN2_H
00607 
00608 #define LRAN2_MAX 714025l /* constants for portable */
00609 #define IA        1366l   /* random number generator */
00610 #define IC        150889l /* (see e.g. `Numerical Recipes') */
00611 
00612 //struct lran2_st {
00613 //    long x, y, v[97];
00614 //};
00615 
00616 static void
00617 lran2_init(struct lran2_st* d, long seed)
00618 {
00619   long x;
00620   int j;
00621 
00622   x = (IC - seed) % LRAN2_MAX;
00623   if(x < 0) x = -x;
00624   for(j=0; j<97; j++) {
00625     x = (IA*x + IC) % LRAN2_MAX;
00626     d->v[j] = x;
00627   }
00628   d->x = (IA*x + IC) % LRAN2_MAX;
00629   d->y = d->x;
00630 }
00631 
00632 static 
00633 long lran2(struct lran2_st* d)
00634 {
00635   int j = (d->y % 97);
00636 
00637   d->y = d->v[j];
00638   d->x = (IA*d->x + IC) % LRAN2_MAX;
00639   d->v[j] = d->x;
00640   return d->y;
00641 }
00642 
00643 #undef IA
00644 #undef IC
00645 
00646 #endif
00647 
00648 

Generated on Sat Apr 23 11:43:35 2011 for Mnemosyne by  doxygen 1.4.7