coreutils / tests /fmt /tests_for_get_line.c
AryaWu's picture
Upload folder using huggingface_hub
78d2150 verified
#include "../../unity/unity.h"
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <stdbool.h>
/* Helper: make a stream positioned so that getc(f) returns the byte
immediately after the supplied initial 'c'. I.e., write only the
remaining bytes after 'c' into the stream. */
static FILE* make_stream_from(const char* remainder)
{
FILE* f = tmpfile();
TEST_ASSERT_NOT_NULL(f);
if (remainder && *remainder)
{
size_t n = fwrite(remainder, 1, strlen(remainder), f);
TEST_ASSERT_EQUAL_UINT(strlen(remainder), n);
}
rewind(f);
return f;
}
/* Helper: copy word text into a NUL-terminated string for comparison. */
static char* copy_word_text(WORD *w)
{
char *s = (char*)malloc((size_t)w->length + 1);
TEST_ASSERT_NOT_NULL(s);
memcpy(s, w->text, (size_t)w->length);
s[w->length] = '\0';
return s;
}
/* Reset global parsing state to a clean baseline before each test. */
void setUp(void)
{
/* Ensure we start filling the paragraph buffers from scratch. */
wptr = parabuf;
word_limit = word;
/* Reset general flags/state affecting get_line and get_prefix. */
tabs = false;
uniform = false;
/* No prefix in effect. */
prefix = "";
prefix_full_length = 0;
prefix_length = 0;
prefix_lead_space = 0;
/* Initial column before the first non-blank char of the test line. */
in_column = 0;
}
void tearDown(void)
{
/* nothing */
}
/* Basic tokenization: two words, single spaces, newline ending; checks return of next line's first char and indentation. */
static void test_get_line_basic_two_words(void)
{
/* Simulate input line: "Hello world\n Next" with c = 'H' and stream containing "ello world\n Next" */
FILE *f = make_stream_from("ello world\n Next");
int c = 'H';
int ret = get_line(f, c);
/* After parsing, there should be 2 words: "Hello" and "world" */
TEST_ASSERT_EQUAL_INT(2, (int)(word_limit - word));
{
WORD *w0 = &word[0];
char *s0 = copy_word_text(w0);
TEST_ASSERT_EQUAL_STRING("Hello", s0);
TEST_ASSERT_EQUAL_INT(5, w0->length);
TEST_ASSERT_FALSE(w0->period);
TEST_ASSERT_FALSE(w0->punct);
TEST_ASSERT_FALSE(w0->final);
TEST_ASSERT_FALSE(w0->paren);
TEST_ASSERT_EQUAL_INT(1, w0->space);
free(s0);
}
{
WORD *w1 = &word[1];
char *s1 = copy_word_text(w1);
TEST_ASSERT_EQUAL_STRING("world", s1);
TEST_ASSERT_EQUAL_INT(5, w1->length);
TEST_ASSERT_FALSE(w1->period);
TEST_ASSERT_FALSE(w1->punct);
TEST_ASSERT_FALSE(w1->final);
TEST_ASSERT_FALSE(w1->paren);
/* End of line causes space to be normalized to 1 */
TEST_ASSERT_EQUAL_INT(1, w1->space);
free(s1);
}
/* Next line starts with two spaces then 'N'; get_line returns 'N' and sets in_column to 2 */
TEST_ASSERT_EQUAL_INT('N', ret);
TEST_ASSERT_EQUAL_INT(2, in_column);
fclose(f);
}
/* Sentence end detection with trailing closing punctuation and two spaces after, ensuring final and space=2. */
static void test_get_line_sentence_end_with_closer(void)
{
/* Simulate: "Hello.) Next\nX" */
FILE *f = make_stream_from("ello.) Next\nX");
int c = 'H';
int ret = get_line(f, c);
TEST_ASSERT_EQUAL_INT(2, (int)(word_limit - word));
{
WORD *w0 = &word[0];
char *s0 = copy_word_text(w0);
TEST_ASSERT_EQUAL_STRING("Hello.)", s0);
TEST_ASSERT_EQUAL_INT(7, w0->length);
/* ')' is punctuation, period should be detected at '.' after trimming ')' */
TEST_ASSERT_TRUE(w0->punct);
TEST_ASSERT_TRUE(w0->period);
/* Two spaces after a sentence-ending word -> final true, space remains 2 (uniform=false) */
TEST_ASSERT_TRUE(w0->final);
TEST_ASSERT_EQUAL_INT(2, w0->space);
TEST_ASSERT_FALSE(w0->paren);
free(s0);
}
{
WORD *w1 = &word[1];
char *s1 = copy_word_text(w1);
TEST_ASSERT_EQUAL_STRING("Next", s1);
TEST_ASSERT_EQUAL_INT(4, w1->length);
TEST_ASSERT_FALSE(w1->period);
TEST_ASSERT_FALSE(w1->final);
TEST_ASSERT_EQUAL_INT(1, w1->space); /* newline normalization */
free(s1);
}
/* Next line (after newline) starts with 'X' */
TEST_ASSERT_EQUAL_INT('X', ret);
TEST_ASSERT_EQUAL_INT(0, in_column);
fclose(f);
}
/* Tabs expansion: measures inter-word spacing to next tab stop and sets tabs=true. */
static void test_get_line_tab_expansion_and_tabs_flag(void)
{
/* Simulate: "abc\tdef\nZ" */
FILE *f = make_stream_from("bc\tdef\nZ");
int c = 'a';
int ret = get_line(f, c);
TEST_ASSERT_EQUAL_INT(2, (int)(word_limit - word));
{
WORD *w0 = &word[0];
char *s0 = copy_word_text(w0);
TEST_ASSERT_EQUAL_STRING("abc", s0);
TEST_ASSERT_EQUAL_INT(3, w0->length);
/* At col 3, next tab stop is at 8 -> space measured is 5 */
TEST_ASSERT_EQUAL_INT(5, w0->space);
TEST_ASSERT_TRUE(tabs);
free(s0);
}
{
WORD *w1 = &word[1];
char *s1 = copy_word_text(w1);
TEST_ASSERT_EQUAL_STRING("def", s1);
TEST_ASSERT_EQUAL_INT(3, w1->length);
TEST_ASSERT_EQUAL_INT(1, w1->space); /* newline normalization */
free(s1);
}
TEST_ASSERT_EQUAL_INT('Z', ret);
TEST_ASSERT_EQUAL_INT(0, in_column);
fclose(f);
}
/* Uniform spacing: multiple spaces between non-final words should be normalized to 1 when uniform=true. */
static void test_get_line_uniform_spacing_normalizes_interword_space(void)
{
/* Common input: "a b\nX" */
const char *remainder = " b\nX";
int c = 'a';
/* First with uniform=false: keep measured spaces (3) */
{
FILE *f = make_stream_from(remainder);
uniform = false;
wptr = parabuf;
word_limit = word;
in_column = 0;
int ret = get_line(f, c);
TEST_ASSERT_EQUAL_INT(2, (int)(word_limit - word));
TEST_ASSERT_EQUAL_INT(3, word[0].space);
TEST_ASSERT_EQUAL_INT('X', ret);
fclose(f);
}
/* Then with uniform=true: normalize to 1 space since not sentence-final */
{
FILE *f = make_stream_from(remainder);
uniform = true;
wptr = parabuf;
word_limit = word;
in_column = 0;
int ret = get_line(f, c);
TEST_ASSERT_EQUAL_INT(2, (int)(word_limit - word));
TEST_ASSERT_EQUAL_INT(1, word[0].space);
TEST_ASSERT_EQUAL_INT('X', ret);
fclose(f);
}
}
/* Parenthesis detection: word starting with '(' sets paren=true; trailing ')' is punctuation but not a period. */
static void test_get_line_paren_and_punct_flags(void)
{
/* Simulate: "(test) ok\nY" */
FILE *f = make_stream_from("test) ok\nY");
int c = '(';
int ret = get_line(f, c);
TEST_ASSERT_EQUAL_INT(2, (int)(word_limit - word));
{
WORD *w0 = &word[0];
char *s0 = copy_word_text(w0);
TEST_ASSERT_EQUAL_STRING("(test)", s0);
TEST_ASSERT_TRUE(w0->paren);
TEST_ASSERT_TRUE(w0->punct); /* ends with ')' */
TEST_ASSERT_FALSE(w0->period);/* '(' trimmed only affects period detection; here last non-closer is 't' */
TEST_ASSERT_FALSE(w0->final);
TEST_ASSERT_EQUAL_INT(1, w0->space);
free(s0);
}
{
WORD *w1 = &word[1];
char *s1 = copy_word_text(w1);
TEST_ASSERT_EQUAL_STRING("ok", s1);
TEST_ASSERT_FALSE(w1->paren);
TEST_ASSERT_FALSE(w1->period);
TEST_ASSERT_FALSE(w1->final);
TEST_ASSERT_EQUAL_INT(1, w1->space);
free(s1);
}
TEST_ASSERT_EQUAL_INT('Y', ret);
TEST_ASSERT_EQUAL_INT(0, in_column);
fclose(f);
}
/* EOF termination: line without trailing newline should set final=true and return EOF. */
static void test_get_line_eof_terminated_line(void)
{
/* Simulate "last" with EOF: pass c='l', stream contains "ast" */
FILE *f = make_stream_from("ast");
int c = 'l';
int ret = get_line(f, c);
TEST_ASSERT_EQUAL_INT(1, (int)(word_limit - word));
{
WORD *w0 = &word[0];
char *s0 = copy_word_text(w0);
TEST_ASSERT_EQUAL_STRING("last", s0);
TEST_ASSERT_TRUE(w0->final); /* EOF makes final true */
TEST_ASSERT_EQUAL_INT(2, w0->space); /* normalized at EOF */
free(s0);
}
TEST_ASSERT_EQUAL_INT(EOF, ret);
/* in_column gets reset to 0 in get_prefix when encountering EOF */
TEST_ASSERT_EQUAL_INT(0, in_column);
fclose(f);
}
int main(void)
{
UNITY_BEGIN();
RUN_TEST(test_get_line_basic_two_words);
RUN_TEST(test_get_line_sentence_end_with_closer);
RUN_TEST(test_get_line_tab_expansion_and_tabs_flag);
RUN_TEST(test_get_line_uniform_spacing_normalizes_interword_space);
RUN_TEST(test_get_line_paren_and_punct_flags);
RUN_TEST(test_get_line_eof_terminated_line);
return UNITY_END();
}