kernel_optimize_test/lib/sort.c

143 lines
2.9 KiB
C
Raw Normal View History

/*
* A fast, small, non-recursive O(nlog n) sort for the Linux kernel
*
* Jan 23 2005 Matt Mackall <mpm@selenic.com>
*/
#include <linux/types.h>
#include <linux/export.h>
#include <linux/sort.h>
lib/sort: Add 64 bit swap function In case the call side is not providing a swap function, we either use a 32 bit or a generic swap function. When swapping around pointers on 64 bit architectures falling back to use the generic swap function seems like an unnecessary waste. There at least 9 users ('sort' is of difficult to grep for) of sort() and all of them use the sort function without a customized swap function. Furthermore, they are all using pointers to swap around: arch/x86/kernel/e820.c:sanitize_e820_map() arch/x86/mm/extable.c:sort_extable() drivers/acpi/fan.c:acpi_fan_get_fps() fs/btrfs/super.c:btrfs_descending_sort_devices() fs/xfs/libxfs/xfs_dir2_block.c:xfs_dir2_sf_to_block() kernel/range.c:clean_sort_range() mm/memcontrol.c:__mem_cgroup_usage_register_event() sound/pci/hda/hda_auto_parser.c:snd_hda_parse_pin_defcfg() sound/pci/hda/hda_auto_parser.c:sort_pins_by_sequence() Obviously, we could improve the swap for other sizes as well but this is overkill at this point. A simple test shows sorting a 400 element array (try to stay in one page) with either with u32_swap() or u64_swap() show that the theory actually works. This test was done on a x86_64 (Intel Xeon E5-4610) machine. - swap_32: NumSamples = 100; Min = 48.00; Max = 49.00 Mean = 48.320000; Variance = 0.217600; SD = 0.466476; Median 48.000000 each * represents a count of 1 48.0000 - 48.1000 [ 68]: ******************************************************************** 48.1000 - 48.2000 [ 0]: 48.2000 - 48.3000 [ 0]: 48.3000 - 48.4000 [ 0]: 48.4000 - 48.5000 [ 0]: 48.5000 - 48.6000 [ 0]: 48.6000 - 48.7000 [ 0]: 48.7000 - 48.8000 [ 0]: 48.8000 - 48.9000 [ 0]: 48.9000 - 49.0000 [ 32]: ******************************** - swap_64: NumSamples = 100; Min = 44.00; Max = 63.00 Mean = 48.250000; Variance = 18.687500; SD = 4.322904; Median 47.000000 each * represents a count of 1 44.0000 - 45.9000 [ 15]: *************** 45.9000 - 47.8000 [ 37]: ************************************* 47.8000 - 49.7000 [ 39]: *************************************** 49.7000 - 51.6000 [ 0]: 51.6000 - 53.5000 [ 0]: 53.5000 - 55.4000 [ 0]: 55.4000 - 57.3000 [ 0]: 57.3000 - 59.2000 [ 1]: * 59.2000 - 61.1000 [ 3]: *** 61.1000 - 63.0000 [ 5]: ***** - swap_72: NumSamples = 100; Min = 53.00; Max = 71.00 Mean = 55.070000; Variance = 21.565100; SD = 4.643824; Median 53.000000 each * represents a count of 1 53.0000 - 54.8000 [ 73]: ************************************************************************* 54.8000 - 56.6000 [ 9]: ********* 56.6000 - 58.4000 [ 9]: ********* 58.4000 - 60.2000 [ 0]: 60.2000 - 62.0000 [ 0]: 62.0000 - 63.8000 [ 0]: 63.8000 - 65.6000 [ 0]: 65.6000 - 67.4000 [ 1]: * 67.4000 - 69.2000 [ 4]: **** 69.2000 - 71.0000 [ 4]: **** - test program: static int cmp_32(const void *a, const void *b) { u32 l = *(u32 *)a; u32 r = *(u32 *)b; if (l < r) return -1; if (l > r) return 1; return 0; } static int cmp_64(const void *a, const void *b) { u64 l = *(u64 *)a; u64 r = *(u64 *)b; if (l < r) return -1; if (l > r) return 1; return 0; } static int cmp_72(const void *a, const void *b) { u32 l = get_unaligned((u32 *) a); u32 r = get_unaligned((u32 *) b); if (l < r) return -1; if (l > r) return 1; return 0; } static void init_array32(void *array) { u32 *a = array; int i; a[0] = 3821; for (i = 1; i < ARRAY_ELEMENTS; i++) a[i] = next_pseudo_random32(a[i-1]); } static void init_array64(void *array) { u64 *a = array; int i; a[0] = 3821; for (i = 1; i < ARRAY_ELEMENTS; i++) a[i] = next_pseudo_random32(a[i-1]); } static void init_array72(void *array) { char *p; u32 v; int i; v = 3821; for (i = 0; i < ARRAY_ELEMENTS; i++) { p = (char *)array + (i * 9); put_unaligned(v, (u32*) p); v = next_pseudo_random32(v); } } static void sort_test(void (*init)(void *array), int (*cmp) (const void *, const void *), void *array, size_t size) { ktime_t start, stop; int i; for (i = 0; i < 10000; i++) { init(array); local_irq_disable(); start = ktime_get(); sort(array, ARRAY_ELEMENTS, size, cmp, NULL); stop = ktime_get(); local_irq_enable(); if (i > 10000 - 101) pr_info("%lld\n", ktime_to_us(ktime_sub(stop, start))); } } static void *create_array(size_t size) { void *array; array = kmalloc(ARRAY_ELEMENTS * size, GFP_KERNEL); if (!array) return NULL; return array; } static int perform_test(size_t size) { void *array; array = create_array(size); if (!array) return -ENOMEM; pr_info("test element size %d bytes\n", (int)size); switch (size) { case 4: sort_test(init_array32, cmp_32, array, size); break; case 8: sort_test(init_array64, cmp_64, array, size); break; case 9: sort_test(init_array72, cmp_72, array, size); break; } kfree(array); return 0; } static int __init sort_tests_init(void) { int err; err = perform_test(sizeof(u32)); if (err) return err; err = perform_test(sizeof(u64)); if (err) return err; err = perform_test(sizeof(u64)+1); if (err) return err; return 0; } static void __exit sort_tests_exit(void) { } module_init(sort_tests_init); module_exit(sort_tests_exit); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Daniel Wagner"); MODULE_DESCRIPTION("sort perfomance tests"); Signed-off-by: Daniel Wagner <daniel.wagner@bmw-carit.de> Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2015-06-26 06:02:14 +08:00
static int alignment_ok(const void *base, int align)
{
return IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) ||
((unsigned long)base & (align - 1)) == 0;
}
static void u32_swap(void *a, void *b, int size)
{
u32 t = *(u32 *)a;
*(u32 *)a = *(u32 *)b;
*(u32 *)b = t;
}
lib/sort: Add 64 bit swap function In case the call side is not providing a swap function, we either use a 32 bit or a generic swap function. When swapping around pointers on 64 bit architectures falling back to use the generic swap function seems like an unnecessary waste. There at least 9 users ('sort' is of difficult to grep for) of sort() and all of them use the sort function without a customized swap function. Furthermore, they are all using pointers to swap around: arch/x86/kernel/e820.c:sanitize_e820_map() arch/x86/mm/extable.c:sort_extable() drivers/acpi/fan.c:acpi_fan_get_fps() fs/btrfs/super.c:btrfs_descending_sort_devices() fs/xfs/libxfs/xfs_dir2_block.c:xfs_dir2_sf_to_block() kernel/range.c:clean_sort_range() mm/memcontrol.c:__mem_cgroup_usage_register_event() sound/pci/hda/hda_auto_parser.c:snd_hda_parse_pin_defcfg() sound/pci/hda/hda_auto_parser.c:sort_pins_by_sequence() Obviously, we could improve the swap for other sizes as well but this is overkill at this point. A simple test shows sorting a 400 element array (try to stay in one page) with either with u32_swap() or u64_swap() show that the theory actually works. This test was done on a x86_64 (Intel Xeon E5-4610) machine. - swap_32: NumSamples = 100; Min = 48.00; Max = 49.00 Mean = 48.320000; Variance = 0.217600; SD = 0.466476; Median 48.000000 each * represents a count of 1 48.0000 - 48.1000 [ 68]: ******************************************************************** 48.1000 - 48.2000 [ 0]: 48.2000 - 48.3000 [ 0]: 48.3000 - 48.4000 [ 0]: 48.4000 - 48.5000 [ 0]: 48.5000 - 48.6000 [ 0]: 48.6000 - 48.7000 [ 0]: 48.7000 - 48.8000 [ 0]: 48.8000 - 48.9000 [ 0]: 48.9000 - 49.0000 [ 32]: ******************************** - swap_64: NumSamples = 100; Min = 44.00; Max = 63.00 Mean = 48.250000; Variance = 18.687500; SD = 4.322904; Median 47.000000 each * represents a count of 1 44.0000 - 45.9000 [ 15]: *************** 45.9000 - 47.8000 [ 37]: ************************************* 47.8000 - 49.7000 [ 39]: *************************************** 49.7000 - 51.6000 [ 0]: 51.6000 - 53.5000 [ 0]: 53.5000 - 55.4000 [ 0]: 55.4000 - 57.3000 [ 0]: 57.3000 - 59.2000 [ 1]: * 59.2000 - 61.1000 [ 3]: *** 61.1000 - 63.0000 [ 5]: ***** - swap_72: NumSamples = 100; Min = 53.00; Max = 71.00 Mean = 55.070000; Variance = 21.565100; SD = 4.643824; Median 53.000000 each * represents a count of 1 53.0000 - 54.8000 [ 73]: ************************************************************************* 54.8000 - 56.6000 [ 9]: ********* 56.6000 - 58.4000 [ 9]: ********* 58.4000 - 60.2000 [ 0]: 60.2000 - 62.0000 [ 0]: 62.0000 - 63.8000 [ 0]: 63.8000 - 65.6000 [ 0]: 65.6000 - 67.4000 [ 1]: * 67.4000 - 69.2000 [ 4]: **** 69.2000 - 71.0000 [ 4]: **** - test program: static int cmp_32(const void *a, const void *b) { u32 l = *(u32 *)a; u32 r = *(u32 *)b; if (l < r) return -1; if (l > r) return 1; return 0; } static int cmp_64(const void *a, const void *b) { u64 l = *(u64 *)a; u64 r = *(u64 *)b; if (l < r) return -1; if (l > r) return 1; return 0; } static int cmp_72(const void *a, const void *b) { u32 l = get_unaligned((u32 *) a); u32 r = get_unaligned((u32 *) b); if (l < r) return -1; if (l > r) return 1; return 0; } static void init_array32(void *array) { u32 *a = array; int i; a[0] = 3821; for (i = 1; i < ARRAY_ELEMENTS; i++) a[i] = next_pseudo_random32(a[i-1]); } static void init_array64(void *array) { u64 *a = array; int i; a[0] = 3821; for (i = 1; i < ARRAY_ELEMENTS; i++) a[i] = next_pseudo_random32(a[i-1]); } static void init_array72(void *array) { char *p; u32 v; int i; v = 3821; for (i = 0; i < ARRAY_ELEMENTS; i++) { p = (char *)array + (i * 9); put_unaligned(v, (u32*) p); v = next_pseudo_random32(v); } } static void sort_test(void (*init)(void *array), int (*cmp) (const void *, const void *), void *array, size_t size) { ktime_t start, stop; int i; for (i = 0; i < 10000; i++) { init(array); local_irq_disable(); start = ktime_get(); sort(array, ARRAY_ELEMENTS, size, cmp, NULL); stop = ktime_get(); local_irq_enable(); if (i > 10000 - 101) pr_info("%lld\n", ktime_to_us(ktime_sub(stop, start))); } } static void *create_array(size_t size) { void *array; array = kmalloc(ARRAY_ELEMENTS * size, GFP_KERNEL); if (!array) return NULL; return array; } static int perform_test(size_t size) { void *array; array = create_array(size); if (!array) return -ENOMEM; pr_info("test element size %d bytes\n", (int)size); switch (size) { case 4: sort_test(init_array32, cmp_32, array, size); break; case 8: sort_test(init_array64, cmp_64, array, size); break; case 9: sort_test(init_array72, cmp_72, array, size); break; } kfree(array); return 0; } static int __init sort_tests_init(void) { int err; err = perform_test(sizeof(u32)); if (err) return err; err = perform_test(sizeof(u64)); if (err) return err; err = perform_test(sizeof(u64)+1); if (err) return err; return 0; } static void __exit sort_tests_exit(void) { } module_init(sort_tests_init); module_exit(sort_tests_exit); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Daniel Wagner"); MODULE_DESCRIPTION("sort perfomance tests"); Signed-off-by: Daniel Wagner <daniel.wagner@bmw-carit.de> Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2015-06-26 06:02:14 +08:00
static void u64_swap(void *a, void *b, int size)
{
u64 t = *(u64 *)a;
*(u64 *)a = *(u64 *)b;
*(u64 *)b = t;
}
static void generic_swap(void *a, void *b, int size)
{
char t;
do {
t = *(char *)a;
*(char *)a++ = *(char *)b;
*(char *)b++ = t;
} while (--size > 0);
}
/**
* sort - sort an array of elements
* @base: pointer to data to sort
* @num: number of elements
* @size: size of each element
* @cmp_func: pointer to comparison function
* @swap_func: pointer to swap function or NULL
*
* This function does a heapsort on the given array. You may provide a
* swap_func function optimized to your element type.
*
* Sorting time is O(n log n) both on average and worst-case. While
* qsort is about 20% faster on average, it suffers from exploitable
* O(n*n) worst-case behavior and extra memory requirements that make
* it less suitable for kernel use.
*/
void sort(void *base, size_t num, size_t size,
int (*cmp_func)(const void *, const void *),
void (*swap_func)(void *, void *, int size))
{
/* pre-scale counters for performance */
int i = (num/2 - 1) * size, n = num * size, c, r;
lib/sort: Add 64 bit swap function In case the call side is not providing a swap function, we either use a 32 bit or a generic swap function. When swapping around pointers on 64 bit architectures falling back to use the generic swap function seems like an unnecessary waste. There at least 9 users ('sort' is of difficult to grep for) of sort() and all of them use the sort function without a customized swap function. Furthermore, they are all using pointers to swap around: arch/x86/kernel/e820.c:sanitize_e820_map() arch/x86/mm/extable.c:sort_extable() drivers/acpi/fan.c:acpi_fan_get_fps() fs/btrfs/super.c:btrfs_descending_sort_devices() fs/xfs/libxfs/xfs_dir2_block.c:xfs_dir2_sf_to_block() kernel/range.c:clean_sort_range() mm/memcontrol.c:__mem_cgroup_usage_register_event() sound/pci/hda/hda_auto_parser.c:snd_hda_parse_pin_defcfg() sound/pci/hda/hda_auto_parser.c:sort_pins_by_sequence() Obviously, we could improve the swap for other sizes as well but this is overkill at this point. A simple test shows sorting a 400 element array (try to stay in one page) with either with u32_swap() or u64_swap() show that the theory actually works. This test was done on a x86_64 (Intel Xeon E5-4610) machine. - swap_32: NumSamples = 100; Min = 48.00; Max = 49.00 Mean = 48.320000; Variance = 0.217600; SD = 0.466476; Median 48.000000 each * represents a count of 1 48.0000 - 48.1000 [ 68]: ******************************************************************** 48.1000 - 48.2000 [ 0]: 48.2000 - 48.3000 [ 0]: 48.3000 - 48.4000 [ 0]: 48.4000 - 48.5000 [ 0]: 48.5000 - 48.6000 [ 0]: 48.6000 - 48.7000 [ 0]: 48.7000 - 48.8000 [ 0]: 48.8000 - 48.9000 [ 0]: 48.9000 - 49.0000 [ 32]: ******************************** - swap_64: NumSamples = 100; Min = 44.00; Max = 63.00 Mean = 48.250000; Variance = 18.687500; SD = 4.322904; Median 47.000000 each * represents a count of 1 44.0000 - 45.9000 [ 15]: *************** 45.9000 - 47.8000 [ 37]: ************************************* 47.8000 - 49.7000 [ 39]: *************************************** 49.7000 - 51.6000 [ 0]: 51.6000 - 53.5000 [ 0]: 53.5000 - 55.4000 [ 0]: 55.4000 - 57.3000 [ 0]: 57.3000 - 59.2000 [ 1]: * 59.2000 - 61.1000 [ 3]: *** 61.1000 - 63.0000 [ 5]: ***** - swap_72: NumSamples = 100; Min = 53.00; Max = 71.00 Mean = 55.070000; Variance = 21.565100; SD = 4.643824; Median 53.000000 each * represents a count of 1 53.0000 - 54.8000 [ 73]: ************************************************************************* 54.8000 - 56.6000 [ 9]: ********* 56.6000 - 58.4000 [ 9]: ********* 58.4000 - 60.2000 [ 0]: 60.2000 - 62.0000 [ 0]: 62.0000 - 63.8000 [ 0]: 63.8000 - 65.6000 [ 0]: 65.6000 - 67.4000 [ 1]: * 67.4000 - 69.2000 [ 4]: **** 69.2000 - 71.0000 [ 4]: **** - test program: static int cmp_32(const void *a, const void *b) { u32 l = *(u32 *)a; u32 r = *(u32 *)b; if (l < r) return -1; if (l > r) return 1; return 0; } static int cmp_64(const void *a, const void *b) { u64 l = *(u64 *)a; u64 r = *(u64 *)b; if (l < r) return -1; if (l > r) return 1; return 0; } static int cmp_72(const void *a, const void *b) { u32 l = get_unaligned((u32 *) a); u32 r = get_unaligned((u32 *) b); if (l < r) return -1; if (l > r) return 1; return 0; } static void init_array32(void *array) { u32 *a = array; int i; a[0] = 3821; for (i = 1; i < ARRAY_ELEMENTS; i++) a[i] = next_pseudo_random32(a[i-1]); } static void init_array64(void *array) { u64 *a = array; int i; a[0] = 3821; for (i = 1; i < ARRAY_ELEMENTS; i++) a[i] = next_pseudo_random32(a[i-1]); } static void init_array72(void *array) { char *p; u32 v; int i; v = 3821; for (i = 0; i < ARRAY_ELEMENTS; i++) { p = (char *)array + (i * 9); put_unaligned(v, (u32*) p); v = next_pseudo_random32(v); } } static void sort_test(void (*init)(void *array), int (*cmp) (const void *, const void *), void *array, size_t size) { ktime_t start, stop; int i; for (i = 0; i < 10000; i++) { init(array); local_irq_disable(); start = ktime_get(); sort(array, ARRAY_ELEMENTS, size, cmp, NULL); stop = ktime_get(); local_irq_enable(); if (i > 10000 - 101) pr_info("%lld\n", ktime_to_us(ktime_sub(stop, start))); } } static void *create_array(size_t size) { void *array; array = kmalloc(ARRAY_ELEMENTS * size, GFP_KERNEL); if (!array) return NULL; return array; } static int perform_test(size_t size) { void *array; array = create_array(size); if (!array) return -ENOMEM; pr_info("test element size %d bytes\n", (int)size); switch (size) { case 4: sort_test(init_array32, cmp_32, array, size); break; case 8: sort_test(init_array64, cmp_64, array, size); break; case 9: sort_test(init_array72, cmp_72, array, size); break; } kfree(array); return 0; } static int __init sort_tests_init(void) { int err; err = perform_test(sizeof(u32)); if (err) return err; err = perform_test(sizeof(u64)); if (err) return err; err = perform_test(sizeof(u64)+1); if (err) return err; return 0; } static void __exit sort_tests_exit(void) { } module_init(sort_tests_init); module_exit(sort_tests_exit); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Daniel Wagner"); MODULE_DESCRIPTION("sort perfomance tests"); Signed-off-by: Daniel Wagner <daniel.wagner@bmw-carit.de> Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2015-06-26 06:02:14 +08:00
if (!swap_func) {
if (size == 4 && alignment_ok(base, 4))
swap_func = u32_swap;
else if (size == 8 && alignment_ok(base, 8))
swap_func = u64_swap;
else
swap_func = generic_swap;
}
/* heapify */
for ( ; i >= 0; i -= size) {
for (r = i; r * 2 + size < n; r = c) {
c = r * 2 + size;
if (c < n - size &&
cmp_func(base + c, base + c + size) < 0)
c += size;
if (cmp_func(base + r, base + c) >= 0)
break;
swap_func(base + r, base + c, size);
}
}
/* sort */
for (i = n - size; i > 0; i -= size) {
swap_func(base, base + i, size);
for (r = 0; r * 2 + size < i; r = c) {
c = r * 2 + size;
if (c < i - size &&
cmp_func(base + c, base + c + size) < 0)
c += size;
if (cmp_func(base + r, base + c) >= 0)
break;
swap_func(base + r, base + c, size);
}
}
}
EXPORT_SYMBOL(sort);
#if 0
#include <linux/slab.h>
/* a simple boot-time regression test */
int cmpint(const void *a, const void *b)
{
return *(int *)a - *(int *)b;
}
static int sort_test(void)
{
int *a, i, r = 1;
a = kmalloc(1000 * sizeof(int), GFP_KERNEL);
BUG_ON(!a);
printk("testing sort()\n");
for (i = 0; i < 1000; i++) {
r = (r * 725861) % 6599;
a[i] = r;
}
sort(a, 1000, sizeof(int), cmpint, NULL);
for (i = 0; i < 999; i++)
if (a[i] > a[i+1]) {
printk("sort() failed!\n");
break;
}
kfree(a);
return 0;
}
module_init(sort_test);
#endif