| |
| |
| |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
|
|
| |
| |
| |
| |
| |
| |
| |
|
|
|
|
|
|
| #include <ctype.h> |
| #include <stdio.h> |
| #include <string.h> |
| #include <locale.h> |
|
|
| |
| #define PCRE2_DFTABLES |
| |
| #define PRIV(name) name |
|
|
| #define PCRE2_CODE_UNIT_WIDTH 0 |
| #include "pcre2_internal.h" |
|
|
| #include "pcre2_maketables.c" |
| #include "pcre2_tables.c" |
|
|
|
|
| static const char *classlist[] = |
| { |
| "space", "xdigit", "digit", "upper", "lower", |
| "word", "graph", "print", "punct", "cntrl" |
| }; |
|
|
| static int identity(int c) { return c; } |
|
|
| #ifdef EBCDIC |
| static int ebcdic_to_unicode(int c) |
| { |
| if (c < 0 || c > 255) abort(); |
|
|
| return ebcdic_1047_to_ascii[c]; |
| } |
|
|
| static int unicode_to_ebcdic(int c) |
| { |
| if (c < 0 || c > 255) abort(); |
|
|
| return ascii_to_ebcdic_1047[c]; |
| } |
| #endif |
|
|
|
|
| |
| |
| |
|
|
| static void |
| usage(void) |
| { |
| (void)fprintf(stderr, |
| "Usage: pcre2_dftables [options] <output file>\n" |
| " -b Write output in binary (default is source code)\n" |
| " -L Use locale from LC_ALL (default is \"C\" locale)\n" |
| #ifdef EBCDIC |
| " -E Use EBCDIC 1047 via locale C.UTF-8\n" |
| #endif |
| ); |
| } |
|
|
|
|
|
|
| |
| |
| |
|
|
| int main(int argc, char **argv) |
| { |
| FILE *f; |
| int i; |
| int nclass = 0; |
| BOOL binary = FALSE; |
| char *env = (char *)"C"; |
| const uint8_t *tables; |
| const uint8_t *base_of_tables; |
| int (*charfn_to)(int) = identity; |
| int (*charfn_from)(int) = identity; |
|
|
| |
|
|
| for (i = 1; i < argc; i++) |
| { |
| char *arg = argv[i]; |
| if (*arg != '-') break; |
|
|
| if (strcmp(arg, "-help") == 0 || strcmp(arg, "--help") == 0) |
| { |
| usage(); |
| return 0; |
| } |
|
|
| else if (strcmp(arg, "-L") == 0) |
| { |
| if (setlocale(LC_ALL, "") == NULL) |
| { |
| (void)fprintf(stderr, "pcre2_dftables: setlocale() failed\n"); |
| return 1; |
| } |
| env = getenv("LC_ALL"); |
| } |
|
|
| #ifdef EBCDIC |
| else if (strcmp(arg, "-E") == 0) |
| { |
| if (setlocale(LC_ALL, "C.UTF-8") == NULL) |
| { |
| (void)fprintf(stderr, "pcre2_dftables: setlocale() failed\n"); |
| return 1; |
| } |
| #ifdef EBCDIC_NL25 |
| env = "EBCDIC 1047 (NL 0x25)"; |
| #else |
| env = "EBCDIC 1047 (NL 0x15)"; |
| #endif |
| charfn_to = ebcdic_to_unicode; |
| charfn_from = unicode_to_ebcdic; |
| } |
| #endif |
|
|
| else if (strcmp(arg, "-b") == 0) |
| binary = TRUE; |
|
|
| else |
| { |
| (void)fprintf(stderr, "pcre2_dftables: unrecognized option %s\n", arg); |
| return 1; |
| } |
| } |
|
|
| if (i != argc - 1) |
| { |
| (void)fprintf(stderr, "pcre2_dftables: one filename argument is required\n"); |
| return 1; |
| } |
|
|
| |
|
|
| tables = maketables(charfn_to, charfn_from); |
| base_of_tables = tables; |
|
|
| f = fopen(argv[i], "wb"); |
| if (f == NULL) |
| { |
| fprintf(stderr, "pcre2_dftables: failed to open %s for writing\n", argv[1]); |
| return 1; |
| } |
|
|
| |
|
|
| if (binary) |
| { |
| int yield = 0; |
| size_t len = fwrite(tables, 1, TABLES_LENGTH, f); |
| if (len != TABLES_LENGTH) |
| { |
| (void)fprintf(stderr, "pcre2_dftables: fwrite() returned wrong length %d " |
| "instead of %d\n", (int)len, TABLES_LENGTH); |
| yield = 1; |
| } |
| fclose(f); |
| free((void *)base_of_tables); |
| return yield; |
| } |
|
|
| |
| |
| |
|
|
| (void)fprintf(f, |
| "/*************************************************\n" |
| "* Perl-Compatible Regular Expressions *\n" |
| "*************************************************/\n\n" |
| "/* This file was automatically written by the pcre2_dftables auxiliary\n" |
| "program. It contains character tables that are used when no external\n" |
| "tables are passed to PCRE2 by the application that calls it. The tables\n" |
| "are used only for characters whose code values are less than 256, and\n" |
| "only relevant if not in UCP mode. */\n\n"); |
|
|
| (void)fprintf(f, |
| "/* This set of tables was written in the %s locale. */\n\n", env); |
|
|
| (void)fprintf(f, |
| "/* The pcre2_ftables program (which is distributed with PCRE2) can be used\n" |
| "to build alternative versions of this file. This is necessary if you are\n" |
| "running in an EBCDIC environment, or if you want to default to a different\n" |
| "encoding, for example ISO-8859-1. When pcre2_dftables is run, it creates\n" |
| "these tables in the \"C\" locale by default. This happens automatically if\n" |
| "PCRE2 is configured with --enable-rebuild-chartables. However, you can run\n" |
| "pcre2_dftables manually with the -L option to build tables using the LC_ALL\n" |
| "locale. */\n\n"); |
|
|
| (void)fprintf(f, |
| "#include \"pcre2_internal.h\"\n\n"); |
|
|
| (void)fprintf(f, |
| "const uint8_t PRIV(default_tables)[] = {\n\n" |
| "/* This table is a lower casing table. */\n\n"); |
|
|
| (void)fprintf(f, " "); |
| for (i = 0; i < 256; i++) |
| { |
| if ((i & 7) == 0 && i != 0) fprintf(f, "\n "); |
| fprintf(f, "%3d", *tables++); |
| if (i != 255) fprintf(f, ","); |
| } |
| (void)fprintf(f, ",\n\n"); |
|
|
| (void)fprintf(f, "/* This table is a case flipping table. */\n\n"); |
|
|
| (void)fprintf(f, " "); |
| for (i = 0; i < 256; i++) |
| { |
| if ((i & 7) == 0 && i != 0) fprintf(f, "\n "); |
| fprintf(f, "%3d", *tables++); |
| if (i != 255) fprintf(f, ","); |
| } |
| (void)fprintf(f, ",\n\n"); |
|
|
| (void)fprintf(f, |
| "/* This table contains bit maps for various character classes. Each map is 32\n" |
| "bytes long and the bits run from the least significant end of each byte. The\n" |
| "classes that have their own maps are: space, xdigit, digit, upper, lower, word,\n" |
| "graph, print, punct, and cntrl. Other classes are built from combinations. */\n\n"); |
|
|
| (void)fprintf(f, " "); |
| for (i = 0; i < cbit_length; i++) |
| { |
| if ((i & 7) == 0 && i != 0) |
| { |
| if ((i & 31) == 0) (void)fprintf(f, "\n"); |
| if ((i & 24) == 8) (void)fprintf(f, " /* %s */", classlist[nclass++]); |
| (void)fprintf(f, "\n "); |
| } |
| (void)fprintf(f, "0x%02x", *tables++); |
| if (i != cbit_length - 1) (void)fprintf(f, ","); |
| } |
| (void)fprintf(f, ",\n\n"); |
|
|
| (void)fprintf(f, |
| "/* This table identifies various classes of character by individual bits:\n" |
| " 0x%02x white space character\n" |
| " 0x%02x letter\n" |
| " 0x%02x lower case letter\n" |
| " 0x%02x decimal digit\n" |
| " 0x%02x word (alphanumeric or '_')\n*/\n\n", |
| ctype_space, ctype_letter, ctype_lcletter, ctype_digit, ctype_word); |
|
|
| (void)fprintf(f, " "); |
| for (i = 0; i < 256; i++) |
| { |
| if ((i & 7) == 0 && i != 0) |
| { |
| (void)fprintf(f, " /* "); |
| if (isprint(i-8)) (void)fprintf(f, " %c -", i-8); |
| else (void)fprintf(f, "%3d-", i-8); |
| if (isprint(i-1)) (void)fprintf(f, " %c ", i-1); |
| else (void)fprintf(f, "%3d", i-1); |
| (void)fprintf(f, " */\n "); |
| } |
| (void)fprintf(f, "0x%02x", *tables++); |
| if (i != 255) (void)fprintf(f, ","); |
| } |
|
|
| (void)fprintf(f, "};/* "); |
| if (isprint(i-8)) (void)fprintf(f, " %c -", i-8); |
| else (void)fprintf(f, "%3d-", i-8); |
| if (isprint(i-1)) (void)fprintf(f, " %c ", i-1); |
| else (void)fprintf(f, "%3d", i-1); |
| (void)fprintf(f, " */\n\n/* End of pcre2_chartables.c */\n"); |
|
|
| fclose(f); |
| free((void *)base_of_tables); |
| return 0; |
| } |
|
|
| |
|
|