|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#include <config.h> |
|
|
|
|
|
#include <stdint.h> |
|
|
#include <stdlib.h> |
|
|
#include <string.h> |
|
|
#include <locale.h> |
|
|
#include <uchar.h> |
|
|
|
|
|
#include "bench.h" |
|
|
#include "bench-multibyte.h" |
|
|
#include "mbiter.h" |
|
|
#include "mbiterf.h" |
|
|
#include "mbuiter.h" |
|
|
#include "mbuiterf.h" |
|
|
#include "mcel.h" |
|
|
|
|
|
typedef unsigned long long (*test_function) (char const *, char const *, int); |
|
|
|
|
|
static unsigned long long |
|
|
noop_test (char const *text, char const *text_end, int repeat) |
|
|
{ |
|
|
unsigned long long sum = 0; |
|
|
|
|
|
for (int count = 0; count < repeat; count++) |
|
|
{ |
|
|
char const *iter; |
|
|
for (iter = text; iter < text_end; iter++) |
|
|
sum += (uintptr_t) iter; |
|
|
} |
|
|
|
|
|
return sum; |
|
|
} |
|
|
|
|
|
static unsigned long long |
|
|
single_test (char const *text, char const *text_end, int repeat) |
|
|
{ |
|
|
unsigned long long sum = 0; |
|
|
|
|
|
for (int count = 0; count < repeat; count++) |
|
|
for (char const *iter = text; iter < text_end; ) |
|
|
{ |
|
|
unsigned char c = *iter++; |
|
|
sum += c; |
|
|
} |
|
|
|
|
|
return sum; |
|
|
} |
|
|
|
|
|
static unsigned long long |
|
|
mbiter_test (char const *text, char const *text_end, int repeat) |
|
|
{ |
|
|
unsigned long long sum = 0; |
|
|
|
|
|
size_t text_len = text_end - text; |
|
|
for (int count = 0; count < repeat; count++) |
|
|
{ |
|
|
mbi_iterator_t iter; |
|
|
for (mbi_init (iter, text, text_len); mbi_avail (iter); ) |
|
|
{ |
|
|
mbchar_t cur = mbi_cur (iter); |
|
|
mbi_advance (iter); |
|
|
sum += cur.wc_valid ? cur.wc : (unsigned char) *mb_ptr (cur) << 16; |
|
|
} |
|
|
} |
|
|
|
|
|
return sum; |
|
|
} |
|
|
|
|
|
static unsigned long long |
|
|
mbiterf_test (char const *text, char const *text_end, int repeat) |
|
|
{ |
|
|
unsigned long long sum = 0; |
|
|
|
|
|
for (int count = 0; count < repeat; count++) |
|
|
{ |
|
|
mbif_state_t state; |
|
|
char const *iter; |
|
|
for (mbif_init (state), iter = text; mbif_avail (state, iter, text_end); ) |
|
|
{ |
|
|
mbchar_t cur = mbif_next (state, iter, text_end); |
|
|
iter += mb_len (cur); |
|
|
sum += cur.wc_valid ? cur.wc : (unsigned char) *mb_ptr (cur) << 16; |
|
|
} |
|
|
} |
|
|
|
|
|
return sum; |
|
|
} |
|
|
|
|
|
static unsigned long long |
|
|
mbuiter_test (char const *text, char const *text_end, int repeat) |
|
|
{ |
|
|
unsigned long long sum = 0; |
|
|
|
|
|
for (int count = 0; count < repeat; count++) |
|
|
for (char const *t = text; t < text_end; t++) |
|
|
{ |
|
|
mbui_iterator_t iter; |
|
|
for (mbui_init (iter, t); mbui_avail (iter); ) |
|
|
{ |
|
|
mbchar_t cur = mbui_cur (iter); |
|
|
mbui_advance (iter); |
|
|
sum += cur.wc_valid ? cur.wc : (unsigned char) *mb_ptr (cur) << 16; |
|
|
} |
|
|
t = mbui_cur_ptr (iter); |
|
|
} |
|
|
|
|
|
return sum; |
|
|
} |
|
|
|
|
|
static unsigned long long |
|
|
mbuiterf_test (char const *text, _GL_UNUSED char const *text_end, int repeat) |
|
|
{ |
|
|
unsigned long long sum = 0; |
|
|
|
|
|
for (int count = 0; count < repeat; count++) |
|
|
for (char const *t = text; t < text_end; t++) |
|
|
{ |
|
|
mbuif_state_t state; |
|
|
char const *iter; |
|
|
for (mbuif_init (state), iter = t; mbuif_avail (state, iter); ) |
|
|
{ |
|
|
mbchar_t cur = mbuif_next (state, iter); |
|
|
iter += mb_len (cur); |
|
|
sum += cur.wc_valid ? cur.wc : (unsigned char) *mb_ptr (cur) << 16; |
|
|
} |
|
|
t = iter; |
|
|
} |
|
|
|
|
|
return sum; |
|
|
} |
|
|
|
|
|
static unsigned long long |
|
|
mcel_test (char const *text, char const *text_end, int repeat) |
|
|
{ |
|
|
unsigned long long sum = 0; |
|
|
|
|
|
for (int count = 0; count < repeat; count++) |
|
|
for (char const *iter = text; iter < text_end; ) |
|
|
{ |
|
|
mcel_t g = mcel_scan (iter, text_end); |
|
|
iter += g.len; |
|
|
sum += g.ch | (g.err << 16); |
|
|
} |
|
|
|
|
|
return sum; |
|
|
} |
|
|
|
|
|
static unsigned long long |
|
|
mcuel_test (char const *text, char const *text_end, int repeat) |
|
|
{ |
|
|
unsigned long long sum = 0; |
|
|
|
|
|
for (int count = 0; count < repeat; count++) |
|
|
for (char const *t = text; t < text_end; t++) |
|
|
{ |
|
|
char const *iter = t; |
|
|
while (*iter) |
|
|
{ |
|
|
mcel_t g = mcel_scanz (iter); |
|
|
iter += g.len; |
|
|
sum += g.ch | (g.err << 16); |
|
|
} |
|
|
t = iter; |
|
|
} |
|
|
|
|
|
return sum; |
|
|
} |
|
|
|
|
|
static unsigned long long |
|
|
do_1_test (test_function test, char const *text, |
|
|
char const *text_end, int repeat, struct timings_state *ts) |
|
|
{ |
|
|
timing_start (ts); |
|
|
unsigned long long sum = test (text, text_end, repeat); |
|
|
timing_end (ts); |
|
|
return sum; |
|
|
} |
|
|
|
|
|
static void |
|
|
do_test (char test, int repeat, char const *locale_name, |
|
|
char const *text, size_t text_len) |
|
|
{ |
|
|
if (setlocale (LC_ALL, locale_name) != NULL) |
|
|
{ |
|
|
char const *text_end = text + text_len; |
|
|
|
|
|
static struct |
|
|
{ |
|
|
char const *name; |
|
|
test_function fn; |
|
|
struct timings_state ts; |
|
|
unsigned long long volatile sum; |
|
|
} testdesc[] = { |
|
|
{ "noop", noop_test }, |
|
|
{ "single", single_test }, |
|
|
{ "mbiter", mbiter_test }, |
|
|
{ "mbiterf", mbiterf_test }, |
|
|
{ "mbuiter", mbuiter_test }, |
|
|
{ "mbuiterf", mbuiterf_test }, |
|
|
{ "mcel", mcel_test }, |
|
|
{ "mcuel", mcuel_test }, |
|
|
}; |
|
|
int ntestdesc = sizeof testdesc / sizeof *testdesc; |
|
|
for (int i = 0; i < ntestdesc; i++) |
|
|
testdesc[i].sum = |
|
|
do_1_test (testdesc[i].fn, text, text_end, repeat, &testdesc[i].ts); |
|
|
|
|
|
setlocale (LC_ALL, "C"); |
|
|
|
|
|
static bool header_printed; |
|
|
if (!header_printed) |
|
|
{ |
|
|
printf (" "); |
|
|
for (int i = 0; i < ntestdesc; i++) |
|
|
printf (" %8s", testdesc[i].name); |
|
|
printf ("\n"); |
|
|
header_printed = true; |
|
|
} |
|
|
|
|
|
printf ("%c", test); |
|
|
for (int i = 0; i < ntestdesc; i++) |
|
|
{ |
|
|
double user_usec = testdesc[i].ts.user_usec; |
|
|
double sys_usec = testdesc[i].ts.sys_usec; |
|
|
printf (" %8.3f", (user_usec + sys_usec) / 1e6); |
|
|
} |
|
|
printf ("\n"); |
|
|
} |
|
|
else |
|
|
{ |
|
|
printf ("Skipping test: locale %s not installed.\n", locale_name); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
int |
|
|
main (int argc, char *argv[]) |
|
|
{ |
|
|
if (argc != 3) |
|
|
{ |
|
|
fprintf (stderr, "Usage: %s TESTS REPETITIONS\n", argv[0]); |
|
|
|
|
|
fprintf (stderr, "Example: %s ABCDEFGHIJKLabehi 100000\n", argv[0]); |
|
|
exit (1); |
|
|
} |
|
|
|
|
|
char const *tests = argv[1]; |
|
|
int repeat = atoi (argv[2]); |
|
|
|
|
|
text_init (); |
|
|
|
|
|
|
|
|
size_t i; |
|
|
for (i = 0; i < strlen (tests); i++) |
|
|
{ |
|
|
char test = tests[i]; |
|
|
|
|
|
switch (test) |
|
|
{ |
|
|
case 'A': |
|
|
do_test (test, repeat, "C", text_latin_ascii, |
|
|
strlen (text_latin_ascii)); |
|
|
break; |
|
|
case 'a': |
|
|
do_test (test, repeat, "C", TEXT_LATIN_ASCII_LINE1, |
|
|
strlen (TEXT_LATIN_ASCII_LINE1)); |
|
|
break; |
|
|
case 'B': |
|
|
do_test (test, repeat, "en_US.UTF-8", text_latin_ascii, |
|
|
strlen (text_latin_ascii)); |
|
|
break; |
|
|
case 'b': |
|
|
do_test (test, repeat, "en_US.UTF-8", TEXT_LATIN_ASCII_LINE1, |
|
|
strlen (TEXT_LATIN_ASCII_LINE1)); |
|
|
break; |
|
|
case 'C': |
|
|
do_test (test, repeat, "C", text_french_iso8859, |
|
|
strlen (text_french_iso8859)); |
|
|
break; |
|
|
case 'D': |
|
|
do_test (test, repeat, "fr_FR.ISO-8859-1", text_french_iso8859, |
|
|
strlen (text_french_iso8859)); |
|
|
break; |
|
|
case 'E': |
|
|
do_test (test, repeat, "en_US.UTF-8", text_french_utf8, |
|
|
strlen (text_french_utf8)); |
|
|
break; |
|
|
case 'e': |
|
|
do_test (test, repeat, "en_US.UTF-8", TEXT_FRENCH_UTF8_LINE1, |
|
|
strlen (TEXT_FRENCH_UTF8_LINE1)); |
|
|
break; |
|
|
case 'F': |
|
|
do_test (test, repeat, "C", text_greek_iso8859, |
|
|
strlen (text_greek_iso8859)); |
|
|
break; |
|
|
case 'G': |
|
|
do_test (test, repeat, "el_GR.ISO-8859-7", text_greek_iso8859, |
|
|
strlen (text_greek_iso8859)); |
|
|
break; |
|
|
case 'H': |
|
|
do_test (test, repeat, "en_US.UTF-8", text_greek_utf8, |
|
|
strlen (text_greek_utf8)); |
|
|
break; |
|
|
case 'h': |
|
|
do_test (test, repeat, "en_US.UTF-8", TEXT_GREEK_UTF8_LINE1, |
|
|
strlen (TEXT_GREEK_UTF8_LINE1)); |
|
|
break; |
|
|
case 'I': |
|
|
do_test (test, repeat, "en_US.UTF-8", text_chinese_utf8, |
|
|
strlen (text_chinese_utf8)); |
|
|
break; |
|
|
case 'i': |
|
|
do_test (test, repeat, "en_US.UTF-8", TEXT_CHINESE_UTF8_LINE1, |
|
|
strlen (TEXT_CHINESE_UTF8_LINE1)); |
|
|
break; |
|
|
case 'J': |
|
|
do_test (test, repeat, "zh_CN.GB18030", text_chinese_gb18030, |
|
|
strlen (text_chinese_gb18030)); |
|
|
break; |
|
|
case 'K': |
|
|
do_test (test, repeat, "C", text_random_bytes, |
|
|
sizeof text_random_bytes - 1); |
|
|
break; |
|
|
case 'L': |
|
|
do_test (test, repeat, "en_US.UTF-8", text_random_bytes, |
|
|
sizeof text_random_bytes - 1); |
|
|
break; |
|
|
default: |
|
|
|
|
|
; |
|
|
} |
|
|
} |
|
|
|
|
|
return 0; |
|
|
} |
|
|
|