CiteGuard-RAG / tests /test_citation_parser.py
4waiz's picture
chore: flatten repo layout
910eb59
from citeguard.preprocess.citation_parser import CitationParser, parse_citations_in_sentence
def test_named_citation():
ids, uncertain = parse_citations_in_sentence("Foo bar [doc1].")
assert ids == ["doc1"]
assert uncertain is False
def test_numeric_citation_normalized_to_doc_prefix():
ids, _ = parse_citations_in_sentence("Foo bar [1] and [2].")
assert ids == ["doc1", "doc2"]
def test_multi_citation_inside_brackets():
ids, _ = parse_citations_in_sentence("Foo [doc1, doc2] bar.")
assert ids == ["doc1", "doc2"]
def test_no_citation_is_not_uncertain():
ids, uncertain = parse_citations_in_sentence("Plain sentence with no brackets.")
assert ids == []
assert uncertain is False
def test_unparseable_bracket_marks_uncertain():
ids, uncertain = parse_citations_in_sentence("Confusing [see Fig 2].")
assert ids == []
assert uncertain is True
def test_empty_bracket_marks_uncertain():
ids, uncertain = parse_citations_in_sentence("Trailing bracket [].")
assert ids == []
assert uncertain is True
def test_dedup_in_one_sentence():
ids, _ = parse_citations_in_sentence("Mix [doc1] and [doc1].")
assert ids == ["doc1"]
def test_custom_numeric_prefix():
p = CitationParser(numeric_prefix="ref")
ids, _ = p.parse("Cited as [3].")
assert ids == ["ref3"]