xCalls: src/libc/string.c Source File

00001 
00009 /* 
00010  * Some of the code below has been extracted from the uClibc. 
00011  * Library's license follows.
00012  */
00013 
00014 /* uClibc
00015  *
00016  * Copyright (C) 2002 by Erik Andersen <andersen@uclibc.org>
00017  *
00018  * This program is free software; you can redistribute it and/or modify it
00019  * under the terms of the GNU Library General Public License as published by
00020  * the Free Software Foundation; either version 2 of the License, or (at your
00021  * option) any later version.
00022  *
00023  * This program is distributed in the hope that it will be useful, but WITHOUT
00024  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
00025  * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License
00026  * for more details.
00027  *
00028  * You should have received a copy of the GNU Library General Public License
00029  * along with this program; if not, write to the Free Software Foundation,
00030  * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
00031  *
00032  */
00033 
00034 #include <string.h>
00035 #include <sys/types.h>
00036 #include <sys/stat.h>
00037 #include <unistd.h>
00038 #include <ctype.h>
00039 #include "memcopy.h"
00040 #include "pagecopy.h"
00041 #include "libc_internal.h"
00042 #include "tm_macros.h"
00043 
00044 
00045 TM_WAIVER int tolower(int c);
00046 TM_WAIVER extern __const __int32_t **__ctype_tolower_loc (void)
00047      __attribute__ ((__const));
00048 
00049 
00050 char *
00051 txc_libc_strcpy (char *dest, const char *src)
00052 {
00053    const char *p;
00054    char *q; 
00055 
00056    TM_BEGIN
00057    for(p = src, q = dest; *p != '\0'; p++, q++)
00058       *q = *p;
00059                           
00060    *q = '\0';
00061    TM_END
00062 
00063    return dest;
00064 }
00065 
00066 
00067 char *
00068 txc_libc_strncpy (char *s1, const char *s2, size_t n)
00069 {
00070   reg_char c;
00071   char *s = s1;
00072 
00073   TM_BEGIN
00074   --s1;
00075 
00076   if (n >= 4)
00077     {
00078       size_t n4 = n >> 2;
00079 
00080       for (;;)
00081         {
00082           c = *s2++;
00083           *++s1 = c;
00084           if (c == '\0')
00085             break;
00086           c = *s2++;
00087           *++s1 = c;
00088           if (c == '\0')
00089             break;
00090           c = *s2++;
00091           *++s1 = c;
00092           if (c == '\0')
00093             break;
00094           c = *s2++;
00095           *++s1 = c;
00096           if (c == '\0')
00097             break;
00098           if (--n4 == 0)
00099             goto last_chars;
00100         }
00101       n = n - (s1 - s) - 1;
00102       if (n == 0)
00103         return s;
00104       goto zero_fill;
00105     }
00106 
00107  last_chars:
00108   n &= 3;
00109   if (n == 0)
00110     return s;
00111 
00112   do
00113     {
00114       c = *s2++;
00115       *++s1 = c;
00116       if (--n == 0)
00117         return s;
00118     }
00119   while (c != '\0');
00120 
00121  zero_fill:
00122   do
00123     *++s1 = '\0';
00124   while (--n > 0);
00125 
00126   return s;
00127   TM_END
00128 }
00129 
00130 
00131 char *
00132 txc_libc_strcat (char *dest, const char *src)
00133 {
00134   char *s1 = dest;
00135   const char *s2 = src;
00136   reg_char c;
00137 
00138   /* Find the end of the string.  */
00139   do
00140     c = *s1++;
00141   while (c != '\0');
00142 
00143   /* Make S1 point before the next character, so we can increment
00144      it while memory is read (wins on pipelined cpus).  */
00145   s1 -= 2;
00146 
00147   do
00148     {
00149       c = *s2++;
00150       *++s1 = c;
00151     }
00152   while (c != '\0');
00153 
00154   return dest;
00155 }
00156 
00157 
00158 char *
00159 txc_libc_strncat (char *s1, const char *s2, size_t n)
00160 {
00161   reg_char c;
00162   char *s = s1;
00163 
00164   TM_BEGIN 
00165   /* Find the end of S1.  */
00166   do
00167     c = *s1++;
00168   while (c != '\0');
00169 
00170   /* Make S1 point before next character, so we can increment
00171      it while memory is read (wins on pipelined cpus).  */
00172   s1 -= 2;
00173 
00174   if (n >= 4)
00175     {
00176       size_t n4 = n >> 2;
00177       do
00178         {
00179           c = *s2++;
00180           *++s1 = c;
00181           if (c == '\0')
00182             return s;
00183           c = *s2++;
00184           *++s1 = c;
00185           if (c == '\0')
00186             return s;
00187           c = *s2++;
00188           *++s1 = c;
00189           if (c == '\0')
00190             return s;
00191           c = *s2++;
00192           *++s1 = c;
00193           if (c == '\0')
00194             return s;
00195         } while (--n4 > 0);
00196       n &= 3;
00197     }
00198 
00199   while (n > 0)
00200     {
00201       c = *s2++;
00202       *++s1 = c;
00203       if (c == '\0')
00204         return s;
00205       n--;
00206     }
00207 
00208   if (c != '\0')
00209     *++s1 = '\0';
00210 
00211   return s;
00212   TM_END
00213 }
00214 
00215 
00216 int 
00217 txc_libc_strcmp (const char * a, const char * b)
00218 {
00219         char c1;
00220         char c2;
00221 
00222         TM_BEGIN
00223         do 
00224         {
00225                 int delta = (c1=*a++)-(c2=*b++);
00226                 if (delta)
00227                         return delta;
00228         } while (c1 && c2);
00229 
00230         return 0;
00231         TM_END
00232 }
00233 
00234 /* Compare no more than N characters of S1 and S2,
00235    returning less than, equal to or greater than zero
00236    if S1 is lexicographically less than, equal to or
00237    greater than S2.  */
00238 int
00239 txc_libc_strncmp (const char *s1, const char *s2, size_t n)
00240 {
00241   unsigned reg_char c1 = '\0';
00242   unsigned reg_char c2 = '\0';
00243 
00244   TM_BEGIN
00245   if (n >= 4)
00246     {
00247       size_t n4 = n >> 2;
00248       do
00249         {
00250     c1 = (unsigned char) *s1++;
00251     c2 = (unsigned char) *s2++;
00252     if (c1 == '\0' || c1 != c2)
00253       return c1 - c2;
00254     c1 = (unsigned char) *s1++;
00255     c2 = (unsigned char) *s2++;
00256     if (c1 == '\0' || c1 != c2)
00257       return c1 - c2;
00258     c1 = (unsigned char) *s1++;
00259     c2 = (unsigned char) *s2++;
00260     if (c1 == '\0' || c1 != c2)
00261       return c1 - c2;
00262     c1 = (unsigned char) *s1++;
00263     c2 = (unsigned char) *s2++;
00264     if (c1 == '\0' || c1 != c2)
00265       return c1 - c2;
00266   } while (--n4 > 0);
00267       n &= 3;
00268     }
00269 
00270   while (n > 0)
00271     {
00272       c1 = (unsigned char) *s1++;
00273       c2 = (unsigned char) *s2++;
00274       if (c1 == '\0' || c1 != c2)
00275   return c1 - c2;
00276       n--;
00277     }
00278 
00279   return c1 - c2;
00280   TM_END
00281 }
00282 
00283 
00284 int 
00285 txc_libc_strcasecmp(const char *s1, const char *s2) 
00286 {
00287   TM_BEGIN
00288   while (*s1 != '\0' && tolower(*s1) == tolower(*s2))
00289     {
00290       s1++;
00291       s2++;
00292     }
00293 
00294   return tolower(*(unsigned char *) s1) - tolower(*(unsigned char *) s2);
00295   TM_END
00296 }
00297 
00298 
00299 size_t 
00300 txc_libc_strlen(char *str)
00301 {
00302         char *s;
00303 
00304         TM_BEGIN
00305         if (str == NULL) {
00306                 return -1; /* invalid string */
00307         }
00308                                  
00309         for (s=str; *s; s++);
00310         return s-str;
00311         TM_END
00312 }
00313 
00314 
00315 
00316 /* Find the first occurrence of C in S.  */
00317 char *
00318 txc_libc_strchr (const char *s, int c_in)
00319 {
00320   const unsigned char *char_ptr;
00321   const unsigned long int *longword_ptr;
00322   unsigned long int longword, magic_bits, charmask;
00323   unsigned reg_char c;
00324 
00325   c = (unsigned char) c_in;
00326 
00327   TM_BEGIN
00328 
00329   /* Handle the first few characters by reading one character at a time.
00330      Do this until CHAR_PTR is aligned on a longword boundary.  */
00331   for (char_ptr = (const unsigned char *) s;
00332        ((unsigned long int) char_ptr & (sizeof (longword) - 1)) != 0;
00333        ++char_ptr)
00334     if (*char_ptr == c)
00335       return (void *) char_ptr;
00336     else if (*char_ptr == '\0')
00337       return NULL;
00338 
00339   /* All these elucidatory comments refer to 4-byte longwords,
00340      but the theory applies equally well to 8-byte longwords.  */
00341 
00342   longword_ptr = (unsigned long int *) char_ptr;
00343 
00344   /* Bits 31, 24, 16, and 8 of this number are zero.  Call these bits
00345      the "holes."  Note that there is a hole just to the left of
00346      each byte, with an extra at the end:
00347 
00348      bits:  01111110 11111110 11111110 11111111
00349      bytes: AAAAAAAA BBBBBBBB CCCCCCCC DDDDDDDD
00350 
00351      The 1-bits make sure that carries propagate to the next 0-bit.
00352      The 0-bits provide holes for carries to fall into.  */
00353   switch (sizeof (longword))
00354     {
00355     case 4: magic_bits = 0x7efefeffL; break;
00356     case 8: magic_bits = ((0x7efefefeL << 16) << 16) | 0xfefefeffL; break;
00357     default:
00358       abort ();
00359     }
00360 
00361   /* Set up a longword, each of whose bytes is C.  */
00362   charmask = c | (c << 8);
00363   charmask |= charmask << 16;
00364   if (sizeof (longword) > 4)
00365     /* Do the shift in two steps to avoid a warning if long has 32 bits.  */
00366     charmask |= (charmask << 16) << 16;
00367   if (sizeof (longword) > 8)
00368     abort ();
00369 
00370   /* Instead of the traditional loop which tests each character,
00371      we will test a longword at a time.  The tricky part is testing
00372      if *any of the four* bytes in the longword in question are zero.  */
00373   for (;;)
00374     {
00375       /* We tentatively exit the loop if adding MAGIC_BITS to
00376          LONGWORD fails to change any of the hole bits of LONGWORD.
00377 
00378          1) Is this safe?  Will it catch all the zero bytes?
00379          Suppose there is a byte with all zeros.  Any carry bits
00380          propagating from its left will fall into the hole at its
00381          least significant bit and stop.  Since there will be no
00382          carry from its most significant bit, the LSB of the
00383          byte to the left will be unchanged, and the zero will be
00384          detected.
00385 
00386          2) Is this worthwhile?  Will it ignore everything except
00387          zero bytes?  Suppose every byte of LONGWORD has a bit set
00388          somewhere.  There will be a carry into bit 8.  If bit 8
00389          is set, this will carry into bit 16.  If bit 8 is clear,
00390          one of bits 9-15 must be set, so there will be a carry
00391          into bit 16.  Similarly, there will be a carry into bit
00392          24.  If one of bits 24-30 is set, there will be a carry
00393          into bit 31, so all of the hole bits will be changed.
00394 
00395          The one misfire occurs when bits 24-30 are clear and bit
00396          31 is set; in this case, the hole at bit 31 is not
00397          changed.  If we had access to the processor carry flag,
00398          we could close this loophole by putting the fourth hole
00399          at bit 32!
00400 
00401          So it ignores everything except 128's, when they're aligned
00402          properly.
00403 
00404          3) But wait!  Aren't we looking for C as well as zero?
00405          Good point.  So what we do is XOR LONGWORD with a longword,
00406          each of whose bytes is C.  This turns each byte that is C
00407          into a zero.  */
00408 
00409       longword = *longword_ptr++;
00410 
00411       /* Add MAGIC_BITS to LONGWORD.  */
00412       if ((((longword + magic_bits)
00413 
00414             /* Set those bits that were unchanged by the addition.  */
00415             ^ ~longword)
00416 
00417            /* Look at only the hole bits.  If any of the hole bits
00418               are unchanged, most likely one of the bytes was a
00419               zero.  */
00420            & ~magic_bits) != 0 ||
00421 
00422           /* That caught zeroes.  Now test for C.  */
00423           ((((longword ^ charmask) + magic_bits) ^ ~(longword ^ charmask))
00424            & ~magic_bits) != 0)
00425         {
00426           /* Which of the bytes was C or zero?
00427              If none of them were, it was a misfire; continue the search.  */
00428 
00429           const unsigned char *cp = (const unsigned char *) (longword_ptr - 1);
00430 
00431           if (*cp == c)
00432             return (char *) cp;
00433           else if (*cp == '\0')
00434             return NULL;
00435           if (*++cp == c)
00436             return (char *) cp;
00437           else if (*cp == '\0')
00438             return NULL;
00439           if (*++cp == c)
00440             return (char *) cp;
00441           else if (*cp == '\0')
00442             return NULL;
00443           if (*++cp == c)
00444             return (char *) cp;
00445           else if (*cp == '\0')
00446             return NULL;
00447           if (sizeof (longword) > 4)
00448             {
00449               if (*++cp == c)
00450                 return (char *) cp;
00451               else if (*cp == '\0')
00452                 return NULL;
00453               if (*++cp == c)
00454                 return (char *) cp;
00455               else if (*cp == '\0')
00456                 return NULL;
00457               if (*++cp == c)
00458                 return (char *) cp;
00459               else if (*cp == '\0')
00460                 return NULL;
00461               if (*++cp == c)
00462                 return (char *) cp;
00463               else if (*cp == '\0')
00464                 return NULL;
00465             }
00466         }
00467     }
00468 
00469   return NULL;
00470   TM_END
00471 }
00472 
00473 
00474 char *
00475 txc_libc_strrchr (const char *s, int c)
00476 {
00477   register const char *found, *p;
00478 
00479   c = (unsigned char) c;
00480 
00481   /* Since strchr is fast, we use it rather than the obvious loop.  */
00482 
00483   TM_BEGIN
00484   if (c == '\0')
00485     return strchr (s, '\0');
00486 
00487   found = NULL;
00488   while ((p = strchr (s, c)) != NULL)
00489     {
00490       found = p;
00491       s = p + 1;
00492     }
00493 
00494   return (char *) found;
00495   TM_END
00496 }
00497 
00498 
00499 int 
00500 txc_libc_strncasecmp (const char *s1, const char *s2,  size_t n)
00501 {
00502   if (n == 0)
00503     return 0;
00504 
00505   TM_BEGIN
00506   while (n-- != 0 && tolower(*s1) == tolower(*s2))
00507     {
00508       if (n == 0 || *s1 == '\0' || *s2 == '\0')
00509   break;
00510       s1++;
00511       s2++;
00512     }
00513 
00514   return tolower(*(unsigned char *) s1) - tolower(*(unsigned char *) s2);
00515   TM_END
00516 }