| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| #include <config.h> |
|
|
| |
| #include "unilbrk.h" |
|
|
| #include <stdlib.h> |
| #include <string.h> |
|
|
| #include "c-ctype.h" |
| #include "uniconv.h" |
| #include "unilbrk/internal.h" |
| #include "unilbrk/lbrktables.h" |
| #include "unilbrk/ulc-common.h" |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| static int |
| ulc_width_linebreaks_internal (const char *s, size_t n, |
| int width, int start_column, int at_end_columns, |
| const char *o, const char *encoding, int cr, |
| char *p) |
| { |
| if (n > 0) |
| { |
| if (is_utf8_encoding (encoding)) |
| return u8_width_linebreaks_internal ((const uint8_t *) s, n, width, start_column, at_end_columns, o, encoding, cr, p); |
| else |
| { |
| |
| |
| size_t *offsets = (size_t *) malloc (n * sizeof (size_t)); |
|
|
| if (offsets != NULL) |
| { |
| uint8_t *t; |
| size_t m; |
|
|
| t = u8_conv_from_encoding (encoding, iconveh_question_mark, |
| s, n, offsets, NULL, &m); |
| if (t != NULL) |
| { |
| char *memory = |
| (char *) (m > 0 ? malloc (m + (o != NULL ? m : 0)) : NULL); |
|
|
| if (m == 0 || memory != NULL) |
| { |
| char *q = (char *) memory; |
| char *o8 = (o != NULL ? (char *) (q + m) : NULL); |
| int res_column; |
| size_t i; |
|
|
| |
| if (o != NULL) |
| { |
| memset (o8, UC_BREAK_UNDEFINED, m); |
| for (i = 0; i < n; i++) |
| if (offsets[i] != (size_t)(-1)) |
| o8[offsets[i]] = o[i]; |
| } |
|
|
| |
| res_column = |
| u8_width_linebreaks_internal (t, m, width, start_column, at_end_columns, o8, encoding, cr, q); |
|
|
| |
| memset (p, UC_BREAK_PROHIBITED, n); |
| for (i = 0; i < n; i++) |
| if (offsets[i] != (size_t)(-1)) |
| p[i] = q[offsets[i]]; |
|
|
| free (memory); |
| free (t); |
| free (offsets); |
| return res_column; |
| } |
| free (t); |
| } |
| free (offsets); |
| } |
| |
| #if C_CTYPE_ASCII |
| if (is_all_ascii (s, n)) |
| { |
| |
| return u8_width_linebreaks_internal ((const uint8_t *) s, n, width, start_column, at_end_columns, o, encoding, cr, p); |
| } |
| #endif |
| |
| |
| |
| |
| { |
| const char *s_end = s + n; |
| while (s < s_end) |
| { |
| *p = ((o != NULL && *o == UC_BREAK_MANDATORY) |
| || *s == '\n' |
| ? UC_BREAK_MANDATORY |
| : ((o != NULL && *o == UC_BREAK_CR_BEFORE_LF) |
| || (cr >= 0 |
| && *s == '\r' |
| && s + 1 < s_end |
| && *(s + 1) == '\n') |
| ? UC_BREAK_CR_BEFORE_LF |
| : UC_BREAK_PROHIBITED)); |
| s++; |
| p++; |
| if (o != NULL) |
| o++; |
| } |
| |
| } |
| } |
| } |
| return start_column; |
| } |
|
|
| #if defined IN_LIBUNISTRING |
| |
|
|
| # undef ulc_width_linebreaks |
|
|
| int |
| ulc_width_linebreaks (const char *s, size_t n, |
| int width, int start_column, int at_end_columns, |
| const char *o, const char *encoding, |
| char *p) |
| { |
| return ulc_width_linebreaks_internal (s, n, |
| width, start_column, at_end_columns, |
| o, encoding, -1, p); |
| } |
|
|
| #endif |
|
|
| int |
| ulc_width_linebreaks_v2 (const char *s, size_t n, |
| int width, int start_column, int at_end_columns, |
| const char *o, const char *encoding, |
| char *p) |
| { |
| return ulc_width_linebreaks_internal (s, n, |
| width, start_column, at_end_columns, |
| o, encoding, LBP_CR, p); |
| } |
|
|
|
|
| #ifdef TEST |
|
|
| #include <stdio.h> |
| #include <locale.h> |
|
|
| |
| |
| char * |
| read_file (FILE *stream) |
| { |
| #define BUFSIZE 4096 |
| char *buf = NULL; |
| int alloc = 0; |
| int size = 0; |
| int count; |
|
|
| while (! feof (stream)) |
| { |
| if (size + BUFSIZE > alloc) |
| { |
| alloc = alloc + alloc / 2; |
| if (alloc < size + BUFSIZE) |
| alloc = size + BUFSIZE; |
| buf = realloc (buf, alloc); |
| if (buf == NULL) |
| { |
| fprintf (stderr, "out of memory\n"); |
| exit (1); |
| } |
| } |
| count = fread (buf + size, 1, BUFSIZE, stream); |
| if (count == 0) |
| { |
| if (ferror (stream)) |
| { |
| perror ("fread"); |
| exit (1); |
| } |
| } |
| else |
| size += count; |
| } |
| buf = realloc (buf, size + 1); |
| if (buf == NULL) |
| { |
| fprintf (stderr, "out of memory\n"); |
| exit (1); |
| } |
| buf[size] = '\0'; |
| return buf; |
| #undef BUFSIZE |
| } |
|
|
| int |
| main (int argc, char * argv[]) |
| { |
| setlocale (LC_CTYPE, ""); |
| if (argc == 2) |
| { |
| |
| int width = atoi (argv[1]); |
| char *input = read_file (stdin); |
| int length = strlen (input); |
| char *breaks = malloc (length); |
| int i; |
|
|
| ulc_width_linebreaks_v2 (input, length, width, 0, 0, NULL, locale_charset (), breaks); |
|
|
| for (i = 0; i < length; i++) |
| { |
| switch (breaks[i]) |
| { |
| case UC_BREAK_POSSIBLE: |
| putc ('\n', stdout); |
| break; |
| case UC_BREAK_MANDATORY: |
| break; |
| case UC_BREAK_CR_BEFORE_LF: |
| break; |
| case UC_BREAK_PROHIBITED: |
| break; |
| default: |
| abort (); |
| } |
| putc (input[i], stdout); |
| } |
|
|
| free (breaks); |
|
|
| return 0; |
| } |
| else |
| return 1; |
| } |
|
|
| #endif |
|
|