Spaces:
Running
Running
| from qalmsw.parse import has_math, has_prose, parse_paragraphs | |
| def test_simple_paragraphs(): | |
| source = ( | |
| "\\begin{document}\n" | |
| "First paragraph here.\n" | |
| "Still first.\n" | |
| "\n" | |
| "Second paragraph.\n" | |
| "\\end{document}\n" | |
| ) | |
| paras = parse_paragraphs(source) | |
| assert len(paras) == 2 | |
| assert paras[0].text == "First paragraph here.\nStill first." | |
| assert paras[0].start_line == 2 | |
| assert paras[0].end_line == 3 | |
| assert paras[1].text == "Second paragraph." | |
| assert paras[1].start_line == 5 | |
| assert paras[1].end_line == 5 | |
| def test_comments_stripped_but_lines_preserved(): | |
| source = "\\begin{document}\nLine one. % a comment\nLine two.\n\\end{document}\n" | |
| paras = parse_paragraphs(source) | |
| assert len(paras) == 1 | |
| assert "comment" not in paras[0].text | |
| assert paras[0].start_line == 2 | |
| def test_escaped_percent_not_a_comment(): | |
| source = "\\begin{document}\n50\\% efficiency.\n\\end{document}\n" | |
| paras = parse_paragraphs(source) | |
| assert paras[0].text == "50\\% efficiency." | |
| def test_no_begin_document(): | |
| source = "First line.\n\nSecond line." | |
| paras = parse_paragraphs(source) | |
| assert len(paras) == 2 | |
| assert paras[0].start_line == 1 | |
| assert paras[1].start_line == 3 | |
| def test_content_after_preamble_tracks_lines(): | |
| source = ( | |
| "\\documentclass{article}\n" | |
| "\\usepackage{amsmath}\n" | |
| "\\begin{document}\n" | |
| "Real content.\n" | |
| "\\end{document}\n" | |
| ) | |
| paras = parse_paragraphs(source) | |
| assert len(paras) == 1 | |
| assert paras[0].start_line == 4 | |
| def test_has_prose_rejects_bare_commands(): | |
| assert not has_prose("\\maketitle") | |
| assert not has_prose("\\section{Introduction}") | |
| assert not has_prose("\\begin{equation}\\end{equation}") | |
| def test_has_prose_accepts_real_sentences(): | |
| assert has_prose("This paper investigates attention mechanisms.") | |
| def test_has_prose_accepts_prose_with_latex_commands(): | |
| assert has_prose("Recent work~\\cite{foo} shows that self-attention scales well.") | |
| def test_has_math_detects_inline_and_display_math(): | |
| assert has_math("Let $x=1$ for the rest of the proof.") | |
| assert has_math("Let $$x=1$$ for the rest of the proof.") | |
| assert has_math("\\begin{equation}x=1\\end{equation}") | |
| assert has_math("We define \\(x\\) as the latent variable.") | |
| def test_has_math_rejects_plain_prose(): | |
| assert not has_math("This paragraph has no formulas.") | |
| def test_inline_thebibliography_is_excluded_from_body(): | |
| source = ( | |
| "\\begin{document}\n" | |
| "Real body content with enough words here.\n" | |
| "\n" | |
| "\\begin{thebibliography}{99}\n" | |
| "\\bibitem{foo} A. Author, Title, Venue, 2020.\n" | |
| "\\end{thebibliography}\n" | |
| "\\end{document}\n" | |
| ) | |
| paras = parse_paragraphs(source) | |
| assert len(paras) == 1 | |
| assert "bibitem" not in paras[0].text | |
| assert "thebibliography" not in paras[0].text | |