kernel_optimize_test/lib/vsprintf.c

1167 lines
28 KiB
C
Raw Normal View History

/*
* linux/lib/vsprintf.c
*
* Copyright (C) 1991, 1992 Linus Torvalds
*/
/* vsprintf.c -- Lars Wirzenius & Linus Torvalds. */
/*
* Wirzenius wrote this portably, Torvalds fucked it up :-)
*/
/*
* Fri Jul 13 2001 Crutcher Dunnavant <crutcher+kernel@datastacks.com>
* - changed to provide snprintf and vsnprintf functions
* So Feb 1 16:51:32 CET 2004 Juergen Quade <quade@hsnr.de>
* - scnprintf and vscnprintf
*/
#include <stdarg.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/string.h>
#include <linux/ctype.h>
#include <linux/kernel.h>
#include <linux/kallsyms.h>
#include <linux/uaccess.h>
#include <asm/page.h> /* for PAGE_SIZE */
#include <asm/div64.h>
/* Works only for digits and letters, but small and fast */
#define TOLOWER(x) ((x) | 0x20)
/**
* simple_strtoul - convert a string to an unsigned long
* @cp: The start of the string
* @endp: A pointer to the end of the parsed string will be placed here
* @base: The number base to use
*/
unsigned long simple_strtoul(const char *cp,char **endp,unsigned int base)
{
unsigned long result = 0,value;
if (!base) {
base = 10;
if (*cp == '0') {
base = 8;
cp++;
if ((TOLOWER(*cp) == 'x') && isxdigit(cp[1])) {
cp++;
base = 16;
}
}
} else if (base == 16) {
if (cp[0] == '0' && TOLOWER(cp[1]) == 'x')
cp += 2;
}
while (isxdigit(*cp) &&
(value = isdigit(*cp) ? *cp-'0' : TOLOWER(*cp)-'a'+10) < base) {
result = result*base + value;
cp++;
}
if (endp)
*endp = (char *)cp;
return result;
}
EXPORT_SYMBOL(simple_strtoul);
/**
* simple_strtol - convert a string to a signed long
* @cp: The start of the string
* @endp: A pointer to the end of the parsed string will be placed here
* @base: The number base to use
*/
long simple_strtol(const char *cp,char **endp,unsigned int base)
{
if(*cp=='-')
return -simple_strtoul(cp+1,endp,base);
return simple_strtoul(cp,endp,base);
}
EXPORT_SYMBOL(simple_strtol);
/**
* simple_strtoull - convert a string to an unsigned long long
* @cp: The start of the string
* @endp: A pointer to the end of the parsed string will be placed here
* @base: The number base to use
*/
unsigned long long simple_strtoull(const char *cp,char **endp,unsigned int base)
{
unsigned long long result = 0,value;
if (!base) {
base = 10;
if (*cp == '0') {
base = 8;
cp++;
if ((TOLOWER(*cp) == 'x') && isxdigit(cp[1])) {
cp++;
base = 16;
}
}
} else if (base == 16) {
if (cp[0] == '0' && TOLOWER(cp[1]) == 'x')
cp += 2;
}
while (isxdigit(*cp)
&& (value = isdigit(*cp) ? *cp-'0' : TOLOWER(*cp)-'a'+10) < base) {
result = result*base + value;
cp++;
}
if (endp)
*endp = (char *)cp;
return result;
}
EXPORT_SYMBOL(simple_strtoull);
/**
* simple_strtoll - convert a string to a signed long long
* @cp: The start of the string
* @endp: A pointer to the end of the parsed string will be placed here
* @base: The number base to use
*/
long long simple_strtoll(const char *cp,char **endp,unsigned int base)
{
if(*cp=='-')
return -simple_strtoull(cp+1,endp,base);
return simple_strtoull(cp,endp,base);
}
Add new string functions strict_strto* and convert kernel params to use them Currently, for every sysfs node, the callers will be responsible for implementing store operation, so many many callers are doing duplicate things to validate input, they have the same mistakes because they are calling simple_strtol/ul/ll/uul, especially for module params, they are just numeric, but you can echo such values as 0x1234xxx, 07777888 and 1234aaa, for these cases, module params store operation just ignores succesive invalid char and converts prefix part to a numeric although input is acctually invalid. This patch tries to fix the aforementioned issues and implements strict_strtox serial functions, kernel/params.c uses them to strictly validate input, so module params will reject such values as 0x1234xxxx and returns an error: write error: Invalid argument Any modules which export numeric sysfs node can use strict_strtox instead of simple_strtox to reject any invalid input. Here are some test results: Before applying this patch: [root@yangyi-dev /]# cat /sys/module/e1000/parameters/copybreak 4096 [root@yangyi-dev /]# echo 0x1000 > /sys/module/e1000/parameters/copybreak [root@yangyi-dev /]# cat /sys/module/e1000/parameters/copybreak 4096 [root@yangyi-dev /]# echo 0x1000g > /sys/module/e1000/parameters/copybreak [root@yangyi-dev /]# cat /sys/module/e1000/parameters/copybreak 4096 [root@yangyi-dev /]# echo 0x1000gggggggg > /sys/module/e1000/parameters/copybreak [root@yangyi-dev /]# cat /sys/module/e1000/parameters/copybreak 4096 [root@yangyi-dev /]# echo 010000 > /sys/module/e1000/parameters/copybreak [root@yangyi-dev /]# cat /sys/module/e1000/parameters/copybreak 4096 [root@yangyi-dev /]# echo 0100008 > /sys/module/e1000/parameters/copybreak [root@yangyi-dev /]# cat /sys/module/e1000/parameters/copybreak 4096 [root@yangyi-dev /]# echo 010000aaaaa > /sys/module/e1000/parameters/copybreak [root@yangyi-dev /]# cat /sys/module/e1000/parameters/copybreak 4096 [root@yangyi-dev /]# After applying this patch: [root@yangyi-dev /]# cat /sys/module/e1000/parameters/copybreak 4096 [root@yangyi-dev /]# echo 0x1000 > /sys/module/e1000/parameters/copybreak [root@yangyi-dev /]# cat /sys/module/e1000/parameters/copybreak 4096 [root@yangyi-dev /]# echo 0x1000g > /sys/module/e1000/parameters/copybreak -bash: echo: write error: Invalid argument [root@yangyi-dev /]# cat /sys/module/e1000/parameters/copybreak 4096 [root@yangyi-dev /]# echo 0x1000gggggggg > /sys/module/e1000/parameters/copybreak -bash: echo: write error: Invalid argument [root@yangyi-dev /]# echo 010000 > /sys/module/e1000/parameters/copybreak [root@yangyi-dev /]# echo 0100008 > /sys/module/e1000/parameters/copybreak -bash: echo: write error: Invalid argument [root@yangyi-dev /]# echo 010000aaaaa > /sys/module/e1000/parameters/copybreak -bash: echo: write error: Invalid argument [root@yangyi-dev /]# cat /sys/module/e1000/parameters/copybreak 4096 [root@yangyi-dev /]# echo -n 4096 > /sys/module/e1000/parameters/copybreak [root@yangyi-dev /]# cat /sys/module/e1000/parameters/copybreak 4096 [root@yangyi-dev /]# [akpm@linux-foundation.org: fix compiler warnings] [akpm@linux-foundation.org: fix off-by-one found by tiwai@suse.de] Signed-off-by: Yi Yang <yi.y.yang@intel.com> Cc: Greg KH <greg@kroah.com> Cc: "Randy.Dunlap" <rdunlap@xenotime.net> Cc: Takashi Iwai <tiwai@suse.de> Cc: Hugh Dickins <hugh@veritas.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-02-08 20:21:57 +08:00
/**
* strict_strtoul - convert a string to an unsigned long strictly
* @cp: The string to be converted
* @base: The number base to use
* @res: The converted result value
*
* strict_strtoul converts a string to an unsigned long only if the
* string is really an unsigned long string, any string containing
* any invalid char at the tail will be rejected and -EINVAL is returned,
* only a newline char at the tail is acceptible because people generally
* change a module parameter in the following way:
*
* echo 1024 > /sys/module/e1000/parameters/copybreak
*
* echo will append a newline to the tail.
*
* It returns 0 if conversion is successful and *res is set to the converted
* value, otherwise it returns -EINVAL and *res is set to 0.
*
* simple_strtoul just ignores the successive invalid characters and
* return the converted value of prefix part of the string.
*/
int strict_strtoul(const char *cp, unsigned int base, unsigned long *res);
/**
* strict_strtol - convert a string to a long strictly
* @cp: The string to be converted
* @base: The number base to use
* @res: The converted result value
*
* strict_strtol is similiar to strict_strtoul, but it allows the first
* character of a string is '-'.
*
* It returns 0 if conversion is successful and *res is set to the converted
* value, otherwise it returns -EINVAL and *res is set to 0.
*/
int strict_strtol(const char *cp, unsigned int base, long *res);
/**
* strict_strtoull - convert a string to an unsigned long long strictly
* @cp: The string to be converted
* @base: The number base to use
* @res: The converted result value
*
* strict_strtoull converts a string to an unsigned long long only if the
* string is really an unsigned long long string, any string containing
* any invalid char at the tail will be rejected and -EINVAL is returned,
* only a newline char at the tail is acceptible because people generally
* change a module parameter in the following way:
*
* echo 1024 > /sys/module/e1000/parameters/copybreak
*
* echo will append a newline to the tail of the string.
*
* It returns 0 if conversion is successful and *res is set to the converted
* value, otherwise it returns -EINVAL and *res is set to 0.
*
* simple_strtoull just ignores the successive invalid characters and
* return the converted value of prefix part of the string.
*/
int strict_strtoull(const char *cp, unsigned int base, unsigned long long *res);
/**
* strict_strtoll - convert a string to a long long strictly
* @cp: The string to be converted
* @base: The number base to use
* @res: The converted result value
*
* strict_strtoll is similiar to strict_strtoull, but it allows the first
* character of a string is '-'.
*
* It returns 0 if conversion is successful and *res is set to the converted
* value, otherwise it returns -EINVAL and *res is set to 0.
*/
int strict_strtoll(const char *cp, unsigned int base, long long *res);
#define define_strict_strtoux(type, valtype) \
int strict_strtou##type(const char *cp, unsigned int base, valtype *res)\
{ \
char *tail; \
valtype val; \
size_t len; \
\
*res = 0; \
len = strlen(cp); \
if (len == 0) \
return -EINVAL; \
\
val = simple_strtoul(cp, &tail, base); \
if ((*tail == '\0') || \
((len == (size_t)(tail - cp) + 1) && (*tail == '\n'))) {\
*res = val; \
return 0; \
} \
\
return -EINVAL; \
} \
#define define_strict_strtox(type, valtype) \
int strict_strto##type(const char *cp, unsigned int base, valtype *res) \
{ \
int ret; \
if (*cp == '-') { \
ret = strict_strtou##type(cp+1, base, res); \
if (!ret) \
Add new string functions strict_strto* and convert kernel params to use them Currently, for every sysfs node, the callers will be responsible for implementing store operation, so many many callers are doing duplicate things to validate input, they have the same mistakes because they are calling simple_strtol/ul/ll/uul, especially for module params, they are just numeric, but you can echo such values as 0x1234xxx, 07777888 and 1234aaa, for these cases, module params store operation just ignores succesive invalid char and converts prefix part to a numeric although input is acctually invalid. This patch tries to fix the aforementioned issues and implements strict_strtox serial functions, kernel/params.c uses them to strictly validate input, so module params will reject such values as 0x1234xxxx and returns an error: write error: Invalid argument Any modules which export numeric sysfs node can use strict_strtox instead of simple_strtox to reject any invalid input. Here are some test results: Before applying this patch: [root@yangyi-dev /]# cat /sys/module/e1000/parameters/copybreak 4096 [root@yangyi-dev /]# echo 0x1000 > /sys/module/e1000/parameters/copybreak [root@yangyi-dev /]# cat /sys/module/e1000/parameters/copybreak 4096 [root@yangyi-dev /]# echo 0x1000g > /sys/module/e1000/parameters/copybreak [root@yangyi-dev /]# cat /sys/module/e1000/parameters/copybreak 4096 [root@yangyi-dev /]# echo 0x1000gggggggg > /sys/module/e1000/parameters/copybreak [root@yangyi-dev /]# cat /sys/module/e1000/parameters/copybreak 4096 [root@yangyi-dev /]# echo 010000 > /sys/module/e1000/parameters/copybreak [root@yangyi-dev /]# cat /sys/module/e1000/parameters/copybreak 4096 [root@yangyi-dev /]# echo 0100008 > /sys/module/e1000/parameters/copybreak [root@yangyi-dev /]# cat /sys/module/e1000/parameters/copybreak 4096 [root@yangyi-dev /]# echo 010000aaaaa > /sys/module/e1000/parameters/copybreak [root@yangyi-dev /]# cat /sys/module/e1000/parameters/copybreak 4096 [root@yangyi-dev /]# After applying this patch: [root@yangyi-dev /]# cat /sys/module/e1000/parameters/copybreak 4096 [root@yangyi-dev /]# echo 0x1000 > /sys/module/e1000/parameters/copybreak [root@yangyi-dev /]# cat /sys/module/e1000/parameters/copybreak 4096 [root@yangyi-dev /]# echo 0x1000g > /sys/module/e1000/parameters/copybreak -bash: echo: write error: Invalid argument [root@yangyi-dev /]# cat /sys/module/e1000/parameters/copybreak 4096 [root@yangyi-dev /]# echo 0x1000gggggggg > /sys/module/e1000/parameters/copybreak -bash: echo: write error: Invalid argument [root@yangyi-dev /]# echo 010000 > /sys/module/e1000/parameters/copybreak [root@yangyi-dev /]# echo 0100008 > /sys/module/e1000/parameters/copybreak -bash: echo: write error: Invalid argument [root@yangyi-dev /]# echo 010000aaaaa > /sys/module/e1000/parameters/copybreak -bash: echo: write error: Invalid argument [root@yangyi-dev /]# cat /sys/module/e1000/parameters/copybreak 4096 [root@yangyi-dev /]# echo -n 4096 > /sys/module/e1000/parameters/copybreak [root@yangyi-dev /]# cat /sys/module/e1000/parameters/copybreak 4096 [root@yangyi-dev /]# [akpm@linux-foundation.org: fix compiler warnings] [akpm@linux-foundation.org: fix off-by-one found by tiwai@suse.de] Signed-off-by: Yi Yang <yi.y.yang@intel.com> Cc: Greg KH <greg@kroah.com> Cc: "Randy.Dunlap" <rdunlap@xenotime.net> Cc: Takashi Iwai <tiwai@suse.de> Cc: Hugh Dickins <hugh@veritas.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-02-08 20:21:57 +08:00
*res = -(*res); \
} else \
ret = strict_strtou##type(cp, base, res); \
\
return ret; \
} \
define_strict_strtoux(l, unsigned long)
define_strict_strtox(l, long)
define_strict_strtoux(ll, unsigned long long)
define_strict_strtox(ll, long long)
EXPORT_SYMBOL(strict_strtoul);
EXPORT_SYMBOL(strict_strtol);
EXPORT_SYMBOL(strict_strtoll);
EXPORT_SYMBOL(strict_strtoull);
static int skip_atoi(const char **s)
{
int i=0;
while (isdigit(**s))
i = i*10 + *((*s)++) - '0';
return i;
}
vsprintf.c: optimizing, part 2: base 10 conversion speedup, v2 Optimize integer-to-string conversion in vsprintf.c for base 10. This is by far the most used conversion, and in some use cases it impacts performance. For example, top reads /proc/$PID/stat for every process, and with 4000 processes decimal conversion alone takes noticeable time. Using code from http://www.cs.uiowa.edu/~jones/bcd/decimal.html (with permission from the author, Douglas W. Jones) binary-to-decimal-string conversion is done in groups of five digits at once, using only additions/subtractions/shifts (with -O2; -Os throws in some multiply instructions). On i386 arch gcc 4.1.2 -O2 generates ~500 bytes of code. This patch is run tested. Userspace benchmark/test is also attached. I tested it on PIII and AMD64 and new code is generally ~2.5 times faster. On AMD64: # ./vsprintf_verify-O2 Original decimal conv: .......... 151 ns per iteration Patched decimal conv: .......... 62 ns per iteration Testing correctness 12895992590592 ok... [Ctrl-C] # ./vsprintf_verify-O2 Original decimal conv: .......... 151 ns per iteration Patched decimal conv: .......... 62 ns per iteration Testing correctness 26025406464 ok... [Ctrl-C] More realistic test: top from busybox project was modified to report how many us it took to scan /proc (this does not account any processing done after that, like sorting process list), and then I test it with 4000 processes: #!/bin/sh i=4000 while test $i != 0; do sleep 30 & let i-- done busybox top -b -n3 >/dev/null on unpatched kernel: top: 4120 processes took 102864 microseconds to scan top: 4120 processes took 91757 microseconds to scan top: 4120 processes took 92517 microseconds to scan top: 4120 processes took 92581 microseconds to scan on patched kernel: top: 4120 processes took 75460 microseconds to scan top: 4120 processes took 66451 microseconds to scan top: 4120 processes took 67267 microseconds to scan top: 4120 processes took 67618 microseconds to scan The speedup comes from much faster generation of /proc/PID/stat by sprintf() calls inside the kernel. Signed-off-by: Douglas W Jones <jones@cs.uiowa.edu> Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-07-16 14:41:56 +08:00
/* Decimal conversion is by far the most typical, and is used
* for /proc and /sys data. This directly impacts e.g. top performance
* with many processes running. We optimize it for speed
* using code from
* http://www.cs.uiowa.edu/~jones/bcd/decimal.html
* (with permission from the author, Douglas W. Jones). */
/* Formats correctly any integer in [0,99999].
* Outputs from one to five digits depending on input.
* On i386 gcc 4.1.2 -O2: ~250 bytes of code. */
static char* put_dec_trunc(char *buf, unsigned q)
{
unsigned d3, d2, d1, d0;
d1 = (q>>4) & 0xf;
d2 = (q>>8) & 0xf;
d3 = (q>>12);
d0 = 6*(d3 + d2 + d1) + (q & 0xf);
q = (d0 * 0xcd) >> 11;
d0 = d0 - 10*q;
*buf++ = d0 + '0'; /* least significant digit */
d1 = q + 9*d3 + 5*d2 + d1;
if (d1 != 0) {
q = (d1 * 0xcd) >> 11;
d1 = d1 - 10*q;
*buf++ = d1 + '0'; /* next digit */
d2 = q + 2*d2;
if ((d2 != 0) || (d3 != 0)) {
q = (d2 * 0xd) >> 7;
d2 = d2 - 10*q;
*buf++ = d2 + '0'; /* next digit */
d3 = q + 4*d3;
if (d3 != 0) {
q = (d3 * 0xcd) >> 11;
d3 = d3 - 10*q;
*buf++ = d3 + '0'; /* next digit */
if (q != 0)
*buf++ = q + '0'; /* most sign. digit */
}
}
}
return buf;
}
/* Same with if's removed. Always emits five digits */
static char* put_dec_full(char *buf, unsigned q)
{
/* BTW, if q is in [0,9999], 8-bit ints will be enough, */
/* but anyway, gcc produces better code with full-sized ints */
unsigned d3, d2, d1, d0;
d1 = (q>>4) & 0xf;
d2 = (q>>8) & 0xf;
d3 = (q>>12);
/* Possible ways to approx. divide by 10 */
/* gcc -O2 replaces multiply with shifts and adds */
// (x * 0xcd) >> 11: 11001101 - shorter code than * 0x67 (on i386)
// (x * 0x67) >> 10: 1100111
// (x * 0x34) >> 9: 110100 - same
// (x * 0x1a) >> 8: 11010 - same
// (x * 0x0d) >> 7: 1101 - same, shortest code (on i386)
d0 = 6*(d3 + d2 + d1) + (q & 0xf);
q = (d0 * 0xcd) >> 11;
d0 = d0 - 10*q;
*buf++ = d0 + '0';
d1 = q + 9*d3 + 5*d2 + d1;
q = (d1 * 0xcd) >> 11;
d1 = d1 - 10*q;
*buf++ = d1 + '0';
d2 = q + 2*d2;
q = (d2 * 0xd) >> 7;
d2 = d2 - 10*q;
*buf++ = d2 + '0';
d3 = q + 4*d3;
q = (d3 * 0xcd) >> 11; /* - shorter code */
/* q = (d3 * 0x67) >> 10; - would also work */
d3 = d3 - 10*q;
*buf++ = d3 + '0';
*buf++ = q + '0';
return buf;
}
/* No inlining helps gcc to use registers better */
static noinline char* put_dec(char *buf, unsigned long long num)
{
while (1) {
unsigned rem;
if (num < 100000)
return put_dec_trunc(buf, num);
rem = do_div(num, 100000);
buf = put_dec_full(buf, rem);
}
}
#define ZEROPAD 1 /* pad with zero */
#define SIGN 2 /* unsigned/signed long */
#define PLUS 4 /* show plus */
#define SPACE 8 /* space if plus */
#define LEFT 16 /* left justified */
#define SMALL 32 /* Must be 32 == 0x20 */
#define SPECIAL 64 /* 0x */
static char *number(char *buf, char *end, unsigned long long num, int base, int size, int precision, int type)
{
/* we are called with base 8, 10 or 16, only, thus don't need "G..." */
static const char digits[16] = "0123456789ABCDEF"; /* "GHIJKLMNOPQRSTUVWXYZ"; */
char tmp[66];
char sign;
char locase;
int need_pfx = ((type & SPECIAL) && base != 10);
int i;
/* locase = 0 or 0x20. ORing digits or letters with 'locase'
* produces same digits or (maybe lowercased) letters */
locase = (type & SMALL);
if (type & LEFT)
type &= ~ZEROPAD;
sign = 0;
if (type & SIGN) {
if ((signed long long) num < 0) {
sign = '-';
num = - (signed long long) num;
size--;
} else if (type & PLUS) {
sign = '+';
size--;
} else if (type & SPACE) {
sign = ' ';
size--;
}
}
if (need_pfx) {
size--;
if (base == 16)
size--;
}
/* generate full string in tmp[], in reverse order */
i = 0;
if (num == 0)
tmp[i++] = '0';
vsprintf.c: optimizing, part 2: base 10 conversion speedup, v2 Optimize integer-to-string conversion in vsprintf.c for base 10. This is by far the most used conversion, and in some use cases it impacts performance. For example, top reads /proc/$PID/stat for every process, and with 4000 processes decimal conversion alone takes noticeable time. Using code from http://www.cs.uiowa.edu/~jones/bcd/decimal.html (with permission from the author, Douglas W. Jones) binary-to-decimal-string conversion is done in groups of five digits at once, using only additions/subtractions/shifts (with -O2; -Os throws in some multiply instructions). On i386 arch gcc 4.1.2 -O2 generates ~500 bytes of code. This patch is run tested. Userspace benchmark/test is also attached. I tested it on PIII and AMD64 and new code is generally ~2.5 times faster. On AMD64: # ./vsprintf_verify-O2 Original decimal conv: .......... 151 ns per iteration Patched decimal conv: .......... 62 ns per iteration Testing correctness 12895992590592 ok... [Ctrl-C] # ./vsprintf_verify-O2 Original decimal conv: .......... 151 ns per iteration Patched decimal conv: .......... 62 ns per iteration Testing correctness 26025406464 ok... [Ctrl-C] More realistic test: top from busybox project was modified to report how many us it took to scan /proc (this does not account any processing done after that, like sorting process list), and then I test it with 4000 processes: #!/bin/sh i=4000 while test $i != 0; do sleep 30 & let i-- done busybox top -b -n3 >/dev/null on unpatched kernel: top: 4120 processes took 102864 microseconds to scan top: 4120 processes took 91757 microseconds to scan top: 4120 processes took 92517 microseconds to scan top: 4120 processes took 92581 microseconds to scan on patched kernel: top: 4120 processes took 75460 microseconds to scan top: 4120 processes took 66451 microseconds to scan top: 4120 processes took 67267 microseconds to scan top: 4120 processes took 67618 microseconds to scan The speedup comes from much faster generation of /proc/PID/stat by sprintf() calls inside the kernel. Signed-off-by: Douglas W Jones <jones@cs.uiowa.edu> Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-07-16 14:41:56 +08:00
/* Generic code, for any base:
else do {
tmp[i++] = (digits[do_div(num,base)] | locase);
vsprintf.c: optimizing, part 2: base 10 conversion speedup, v2 Optimize integer-to-string conversion in vsprintf.c for base 10. This is by far the most used conversion, and in some use cases it impacts performance. For example, top reads /proc/$PID/stat for every process, and with 4000 processes decimal conversion alone takes noticeable time. Using code from http://www.cs.uiowa.edu/~jones/bcd/decimal.html (with permission from the author, Douglas W. Jones) binary-to-decimal-string conversion is done in groups of five digits at once, using only additions/subtractions/shifts (with -O2; -Os throws in some multiply instructions). On i386 arch gcc 4.1.2 -O2 generates ~500 bytes of code. This patch is run tested. Userspace benchmark/test is also attached. I tested it on PIII and AMD64 and new code is generally ~2.5 times faster. On AMD64: # ./vsprintf_verify-O2 Original decimal conv: .......... 151 ns per iteration Patched decimal conv: .......... 62 ns per iteration Testing correctness 12895992590592 ok... [Ctrl-C] # ./vsprintf_verify-O2 Original decimal conv: .......... 151 ns per iteration Patched decimal conv: .......... 62 ns per iteration Testing correctness 26025406464 ok... [Ctrl-C] More realistic test: top from busybox project was modified to report how many us it took to scan /proc (this does not account any processing done after that, like sorting process list), and then I test it with 4000 processes: #!/bin/sh i=4000 while test $i != 0; do sleep 30 & let i-- done busybox top -b -n3 >/dev/null on unpatched kernel: top: 4120 processes took 102864 microseconds to scan top: 4120 processes took 91757 microseconds to scan top: 4120 processes took 92517 microseconds to scan top: 4120 processes took 92581 microseconds to scan on patched kernel: top: 4120 processes took 75460 microseconds to scan top: 4120 processes took 66451 microseconds to scan top: 4120 processes took 67267 microseconds to scan top: 4120 processes took 67618 microseconds to scan The speedup comes from much faster generation of /proc/PID/stat by sprintf() calls inside the kernel. Signed-off-by: Douglas W Jones <jones@cs.uiowa.edu> Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-07-16 14:41:56 +08:00
} while (num != 0);
*/
else if (base != 10) { /* 8 or 16 */
int mask = base - 1;
int shift = 3;
if (base == 16) shift = 4;
do {
tmp[i++] = (digits[((unsigned char)num) & mask] | locase);
num >>= shift;
} while (num);
vsprintf.c: optimizing, part 2: base 10 conversion speedup, v2 Optimize integer-to-string conversion in vsprintf.c for base 10. This is by far the most used conversion, and in some use cases it impacts performance. For example, top reads /proc/$PID/stat for every process, and with 4000 processes decimal conversion alone takes noticeable time. Using code from http://www.cs.uiowa.edu/~jones/bcd/decimal.html (with permission from the author, Douglas W. Jones) binary-to-decimal-string conversion is done in groups of five digits at once, using only additions/subtractions/shifts (with -O2; -Os throws in some multiply instructions). On i386 arch gcc 4.1.2 -O2 generates ~500 bytes of code. This patch is run tested. Userspace benchmark/test is also attached. I tested it on PIII and AMD64 and new code is generally ~2.5 times faster. On AMD64: # ./vsprintf_verify-O2 Original decimal conv: .......... 151 ns per iteration Patched decimal conv: .......... 62 ns per iteration Testing correctness 12895992590592 ok... [Ctrl-C] # ./vsprintf_verify-O2 Original decimal conv: .......... 151 ns per iteration Patched decimal conv: .......... 62 ns per iteration Testing correctness 26025406464 ok... [Ctrl-C] More realistic test: top from busybox project was modified to report how many us it took to scan /proc (this does not account any processing done after that, like sorting process list), and then I test it with 4000 processes: #!/bin/sh i=4000 while test $i != 0; do sleep 30 & let i-- done busybox top -b -n3 >/dev/null on unpatched kernel: top: 4120 processes took 102864 microseconds to scan top: 4120 processes took 91757 microseconds to scan top: 4120 processes took 92517 microseconds to scan top: 4120 processes took 92581 microseconds to scan on patched kernel: top: 4120 processes took 75460 microseconds to scan top: 4120 processes took 66451 microseconds to scan top: 4120 processes took 67267 microseconds to scan top: 4120 processes took 67618 microseconds to scan The speedup comes from much faster generation of /proc/PID/stat by sprintf() calls inside the kernel. Signed-off-by: Douglas W Jones <jones@cs.uiowa.edu> Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-07-16 14:41:56 +08:00
} else { /* base 10 */
i = put_dec(tmp, num) - tmp;
}
/* printing 100 using %2d gives "100", not "00" */
if (i > precision)
precision = i;
/* leading space padding */
size -= precision;
if (!(type & (ZEROPAD+LEFT))) {
while(--size >= 0) {
if (buf < end)
*buf = ' ';
++buf;
}
}
/* sign */
if (sign) {
if (buf < end)
*buf = sign;
++buf;
}
/* "0x" / "0" prefix */
if (need_pfx) {
if (buf < end)
*buf = '0';
++buf;
if (base == 16) {
if (buf < end)
*buf = ('X' | locase);
++buf;
}
}
/* zero or space padding */
if (!(type & LEFT)) {
char c = (type & ZEROPAD) ? '0' : ' ';
while (--size >= 0) {
if (buf < end)
*buf = c;
++buf;
}
}
/* hmm even more zero padding? */
while (i <= --precision) {
if (buf < end)
*buf = '0';
++buf;
}
/* actual digits of result */
while (--i >= 0) {
if (buf < end)
*buf = tmp[i];
++buf;
}
/* trailing space padding */
while (--size >= 0) {
if (buf < end)
*buf = ' ';
++buf;
}
return buf;
}
static char *string(char *buf, char *end, char *s, int field_width, int precision, int flags)
{
int len, i;
if ((unsigned long)s < PAGE_SIZE)
s = "<NULL>";
len = strnlen(s, precision);
if (!(flags & LEFT)) {
while (len < field_width--) {
if (buf < end)
*buf = ' ';
++buf;
}
}
for (i = 0; i < len; ++i) {
if (buf < end)
*buf = *s;
++buf; ++s;
}
while (len < field_width--) {
if (buf < end)
*buf = ' ';
++buf;
}
return buf;
}
static inline void *dereference_function_descriptor(void *ptr)
{
#if defined(CONFIG_IA64) || defined(CONFIG_PPC64)
void *p;
if (!probe_kernel_address(ptr, p))
ptr = p;
#endif
return ptr;
}
static char *symbol_string(char *buf, char *end, void *ptr, int field_width, int precision, int flags)
{
unsigned long value = (unsigned long) ptr;
#ifdef CONFIG_KALLSYMS
char sym[KSYM_SYMBOL_LEN];
sprint_symbol(sym, value);
return string(buf, end, sym, field_width, precision, flags);
#else
field_width = 2*sizeof(void *);
flags |= SPECIAL | SMALL | ZEROPAD;
return number(buf, end, value, 16, field_width, precision, flags);
#endif
}
/*
* Show a '%p' thing. A kernel extension is that the '%p' is followed
* by an extra set of alphanumeric characters that are extended format
* specifiers.
*
* Right now we just handle 'F' (for symbolic Function descriptor pointers)
* and 'S' (for Symbolic direct pointers), but this can easily be
* extended in the future (network address types etc).
*
* The difference between 'S' and 'F' is that on ia64 and ppc64 function
* pointers are really function descriptors, which contain a pointer the
* real address.
*/
static char *pointer(const char *fmt, char *buf, char *end, void *ptr, int field_width, int precision, int flags)
{
switch (*fmt) {
case 'F':
ptr = dereference_function_descriptor(ptr);
/* Fallthrough */
case 'S':
return symbol_string(buf, end, ptr, field_width, precision, flags);
}
flags |= SMALL;
if (field_width == -1) {
field_width = 2*sizeof(void *);
flags |= ZEROPAD;
}
return number(buf, end, (unsigned long) ptr, 16, field_width, precision, flags);
}
/**
* vsnprintf - Format a string and place it in a buffer
* @buf: The buffer to place the result into
* @size: The size of the buffer, including the trailing null space
* @fmt: The format string to use
* @args: Arguments for the format string
*
* The return value is the number of characters which would
* be generated for the given input, excluding the trailing
* '\0', as per ISO C99. If you want to have the exact
* number of characters written into @buf as return value
* (not including the trailing '\0'), use vscnprintf(). If the
* return is greater than or equal to @size, the resulting
* string is truncated.
*
* Call this function if you are already dealing with a va_list.
* You probably want snprintf() instead.
*/
int vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
{
unsigned long long num;
int base;
char *str, *end, c;
int flags; /* flags to number() */
int field_width; /* width of output field */
int precision; /* min. # of digits for integers; max
number of chars for from string */
int qualifier; /* 'h', 'l', or 'L' for integer fields */
/* 'z' support added 23/7/1999 S.H. */
/* 'z' changed to 'Z' --davidm 1/25/99 */
/* 't' added for ptrdiff_t */
/* Reject out-of-range values early. Large positive sizes are
used for unknown buffer sizes. */
if (unlikely((int) size < 0)) {
/* There can be only one.. */
static char warn = 1;
WARN_ON(warn);
warn = 0;
return 0;
}
str = buf;
end = buf + size;
/* Make sure end is always >= buf */
if (end < buf) {
end = ((void *)-1);
size = end - buf;
}
for (; *fmt ; ++fmt) {
if (*fmt != '%') {
if (str < end)
*str = *fmt;
++str;
continue;
}
/* process flags */
flags = 0;
repeat:
++fmt; /* this also skips first '%' */
switch (*fmt) {
case '-': flags |= LEFT; goto repeat;
case '+': flags |= PLUS; goto repeat;
case ' ': flags |= SPACE; goto repeat;
case '#': flags |= SPECIAL; goto repeat;
case '0': flags |= ZEROPAD; goto repeat;
}
/* get field width */
field_width = -1;
if (isdigit(*fmt))
field_width = skip_atoi(&fmt);
else if (*fmt == '*') {
++fmt;
/* it's the next argument */
field_width = va_arg(args, int);
if (field_width < 0) {
field_width = -field_width;
flags |= LEFT;
}
}
/* get the precision */
precision = -1;
if (*fmt == '.') {
++fmt;
if (isdigit(*fmt))
precision = skip_atoi(&fmt);
else if (*fmt == '*') {
++fmt;
/* it's the next argument */
precision = va_arg(args, int);
}
if (precision < 0)
precision = 0;
}
/* get the conversion qualifier */
qualifier = -1;
if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L' ||
*fmt =='Z' || *fmt == 'z' || *fmt == 't') {
qualifier = *fmt;
++fmt;
if (qualifier == 'l' && *fmt == 'l') {
qualifier = 'L';
++fmt;
}
}
/* default base */
base = 10;
switch (*fmt) {
case 'c':
if (!(flags & LEFT)) {
while (--field_width > 0) {
if (str < end)
*str = ' ';
++str;
}
}
c = (unsigned char) va_arg(args, int);
if (str < end)
*str = c;
++str;
while (--field_width > 0) {
if (str < end)
*str = ' ';
++str;
}
continue;
case 's':
str = string(str, end, va_arg(args, char *), field_width, precision, flags);
continue;
case 'p':
str = pointer(fmt+1, str, end,
va_arg(args, void *),
field_width, precision, flags);
/* Skip all alphanumeric pointer suffixes */
while (isalnum(fmt[1]))
fmt++;
continue;
case 'n':
/* FIXME:
* What does C99 say about the overflow case here? */
if (qualifier == 'l') {
long * ip = va_arg(args, long *);
*ip = (str - buf);
} else if (qualifier == 'Z' || qualifier == 'z') {
size_t * ip = va_arg(args, size_t *);
*ip = (str - buf);
} else {
int * ip = va_arg(args, int *);
*ip = (str - buf);
}
continue;
case '%':
if (str < end)
*str = '%';
++str;
continue;
/* integer number formats - set up the flags and "break" */
case 'o':
base = 8;
break;
case 'x':
flags |= SMALL;
case 'X':
base = 16;
break;
case 'd':
case 'i':
flags |= SIGN;
case 'u':
break;
default:
if (str < end)
*str = '%';
++str;
if (*fmt) {
if (str < end)
*str = *fmt;
++str;
} else {
--fmt;
}
continue;
}
if (qualifier == 'L')
num = va_arg(args, long long);
else if (qualifier == 'l') {
num = va_arg(args, unsigned long);
if (flags & SIGN)
num = (signed long) num;
} else if (qualifier == 'Z' || qualifier == 'z') {
num = va_arg(args, size_t);
} else if (qualifier == 't') {
num = va_arg(args, ptrdiff_t);
} else if (qualifier == 'h') {
num = (unsigned short) va_arg(args, int);
if (flags & SIGN)
num = (signed short) num;
} else {
num = va_arg(args, unsigned int);
if (flags & SIGN)
num = (signed int) num;
}
str = number(str, end, num, base,
field_width, precision, flags);
}
if (size > 0) {
if (str < end)
*str = '\0';
else
end[-1] = '\0';
}
/* the trailing null byte doesn't count towards the total */
return str-buf;
}
EXPORT_SYMBOL(vsnprintf);
/**
* vscnprintf - Format a string and place it in a buffer
* @buf: The buffer to place the result into
* @size: The size of the buffer, including the trailing null space
* @fmt: The format string to use
* @args: Arguments for the format string
*
* The return value is the number of characters which have been written into
* the @buf not including the trailing '\0'. If @size is <= 0 the function
* returns 0.
*
* Call this function if you are already dealing with a va_list.
* You probably want scnprintf() instead.
*/
int vscnprintf(char *buf, size_t size, const char *fmt, va_list args)
{
int i;
i=vsnprintf(buf,size,fmt,args);
return (i >= size) ? (size - 1) : i;
}
EXPORT_SYMBOL(vscnprintf);
/**
* snprintf - Format a string and place it in a buffer
* @buf: The buffer to place the result into
* @size: The size of the buffer, including the trailing null space
* @fmt: The format string to use
* @...: Arguments for the format string
*
* The return value is the number of characters which would be
* generated for the given input, excluding the trailing null,
* as per ISO C99. If the return is greater than or equal to
* @size, the resulting string is truncated.
*/
int snprintf(char * buf, size_t size, const char *fmt, ...)
{
va_list args;
int i;
va_start(args, fmt);
i=vsnprintf(buf,size,fmt,args);
va_end(args);
return i;
}
EXPORT_SYMBOL(snprintf);
/**
* scnprintf - Format a string and place it in a buffer
* @buf: The buffer to place the result into
* @size: The size of the buffer, including the trailing null space
* @fmt: The format string to use
* @...: Arguments for the format string
*
* The return value is the number of characters written into @buf not including
* the trailing '\0'. If @size is <= 0 the function returns 0.
*/
int scnprintf(char * buf, size_t size, const char *fmt, ...)
{
va_list args;
int i;
va_start(args, fmt);
i = vsnprintf(buf, size, fmt, args);
va_end(args);
return (i >= size) ? (size - 1) : i;
}
EXPORT_SYMBOL(scnprintf);
/**
* vsprintf - Format a string and place it in a buffer
* @buf: The buffer to place the result into
* @fmt: The format string to use
* @args: Arguments for the format string
*
* The function returns the number of characters written
* into @buf. Use vsnprintf() or vscnprintf() in order to avoid
* buffer overflows.
*
* Call this function if you are already dealing with a va_list.
* You probably want sprintf() instead.
*/
int vsprintf(char *buf, const char *fmt, va_list args)
{
return vsnprintf(buf, INT_MAX, fmt, args);
}
EXPORT_SYMBOL(vsprintf);
/**
* sprintf - Format a string and place it in a buffer
* @buf: The buffer to place the result into
* @fmt: The format string to use
* @...: Arguments for the format string
*
* The function returns the number of characters written
* into @buf. Use snprintf() or scnprintf() in order to avoid
* buffer overflows.
*/
int sprintf(char * buf, const char *fmt, ...)
{
va_list args;
int i;
va_start(args, fmt);
i=vsnprintf(buf, INT_MAX, fmt, args);
va_end(args);
return i;
}
EXPORT_SYMBOL(sprintf);
/**
* vsscanf - Unformat a buffer into a list of arguments
* @buf: input buffer
* @fmt: format of buffer
* @args: arguments
*/
int vsscanf(const char * buf, const char * fmt, va_list args)
{
const char *str = buf;
char *next;
char digit;
int num = 0;
int qualifier;
int base;
int field_width;
int is_sign = 0;
while(*fmt && *str) {
/* skip any white space in format */
/* white space in format matchs any amount of
* white space, including none, in the input.
*/
if (isspace(*fmt)) {
while (isspace(*fmt))
++fmt;
while (isspace(*str))
++str;
}
/* anything that is not a conversion must match exactly */
if (*fmt != '%' && *fmt) {
if (*fmt++ != *str++)
break;
continue;
}
if (!*fmt)
break;
++fmt;
/* skip this conversion.
* advance both strings to next white space
*/
if (*fmt == '*') {
while (!isspace(*fmt) && *fmt)
fmt++;
while (!isspace(*str) && *str)
str++;
continue;
}
/* get field width */
field_width = -1;
if (isdigit(*fmt))
field_width = skip_atoi(&fmt);
/* get conversion qualifier */
qualifier = -1;
if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L' ||
*fmt == 'Z' || *fmt == 'z') {
qualifier = *fmt++;
if (unlikely(qualifier == *fmt)) {
if (qualifier == 'h') {
qualifier = 'H';
fmt++;
} else if (qualifier == 'l') {
qualifier = 'L';
fmt++;
}
}
}
base = 10;
is_sign = 0;
if (!*fmt || !*str)
break;
switch(*fmt++) {
case 'c':
{
char *s = (char *) va_arg(args,char*);
if (field_width == -1)
field_width = 1;
do {
*s++ = *str++;
} while (--field_width > 0 && *str);
num++;
}
continue;
case 's':
{
char *s = (char *) va_arg(args, char *);
if(field_width == -1)
field_width = INT_MAX;
/* first, skip leading white space in buffer */
while (isspace(*str))
str++;
/* now copy until next white space */
while (*str && !isspace(*str) && field_width--) {
*s++ = *str++;
}
*s = '\0';
num++;
}
continue;
case 'n':
/* return number of characters read so far */
{
int *i = (int *)va_arg(args,int*);
*i = str - buf;
}
continue;
case 'o':
base = 8;
break;
case 'x':
case 'X':
base = 16;
break;
case 'i':
base = 0;
case 'd':
is_sign = 1;
case 'u':
break;
case '%':
/* looking for '%' in str */
if (*str++ != '%')
return num;
continue;
default:
/* invalid format; stop here */
return num;
}
/* have some sort of integer conversion.
* first, skip white space in buffer.
*/
while (isspace(*str))
str++;
digit = *str;
if (is_sign && digit == '-')
digit = *(str + 1);
if (!digit
|| (base == 16 && !isxdigit(digit))
|| (base == 10 && !isdigit(digit))
|| (base == 8 && (!isdigit(digit) || digit > '7'))
|| (base == 0 && !isdigit(digit)))
break;
switch(qualifier) {
case 'H': /* that's 'hh' in format */
if (is_sign) {
signed char *s = (signed char *) va_arg(args,signed char *);
*s = (signed char) simple_strtol(str,&next,base);
} else {
unsigned char *s = (unsigned char *) va_arg(args, unsigned char *);
*s = (unsigned char) simple_strtoul(str, &next, base);
}
break;
case 'h':
if (is_sign) {
short *s = (short *) va_arg(args,short *);
*s = (short) simple_strtol(str,&next,base);
} else {
unsigned short *s = (unsigned short *) va_arg(args, unsigned short *);
*s = (unsigned short) simple_strtoul(str, &next, base);
}
break;
case 'l':
if (is_sign) {
long *l = (long *) va_arg(args,long *);
*l = simple_strtol(str,&next,base);
} else {
unsigned long *l = (unsigned long*) va_arg(args,unsigned long*);
*l = simple_strtoul(str,&next,base);
}
break;
case 'L':
if (is_sign) {
long long *l = (long long*) va_arg(args,long long *);
*l = simple_strtoll(str,&next,base);
} else {
unsigned long long *l = (unsigned long long*) va_arg(args,unsigned long long*);
*l = simple_strtoull(str,&next,base);
}
break;
case 'Z':
case 'z':
{
size_t *s = (size_t*) va_arg(args,size_t*);
*s = (size_t) simple_strtoul(str,&next,base);
}
break;
default:
if (is_sign) {
int *i = (int *) va_arg(args, int*);
*i = (int) simple_strtol(str,&next,base);
} else {
unsigned int *i = (unsigned int*) va_arg(args, unsigned int*);
*i = (unsigned int) simple_strtoul(str,&next,base);
}
break;
}
num++;
if (!next)
break;
str = next;
}
/*
* Now we've come all the way through so either the input string or the
* format ended. In the former case, there can be a %n at the current
* position in the format that needs to be filled.
*/
if (*fmt == '%' && *(fmt + 1) == 'n') {
int *p = (int *)va_arg(args, int *);
*p = str - buf;
}
return num;
}
EXPORT_SYMBOL(vsscanf);
/**
* sscanf - Unformat a buffer into a list of arguments
* @buf: input buffer
* @fmt: formatting of buffer
* @...: resulting arguments
*/
int sscanf(const char * buf, const char * fmt, ...)
{
va_list args;
int i;
va_start(args,fmt);
i = vsscanf(buf,fmt,args);
va_end(args);
return i;
}
EXPORT_SYMBOL(sscanf);