crcs-live / tests /test_citations.py
Nipun's picture
Fix L3 relevance, evidence scoring honesty, citation finalisation, safety + baseline prompts
bd7b508
import unittest
from pipeline_v10 import _citation_repair_needed, _inject_real_references, _reindex_final_answer_by_body_order
from tests.helpers import make_source
class CitationFinalizerTests(unittest.TestCase):
def test_reindex_final_answer_reorders_body_and_references_together(self):
text = (
"Pregnancy warning for isotretinoin [3].\n\n"
"ACE inhibitors are also avoided [1].\n\n"
"Warfarin is generally contraindicated [2].\n\n"
"### References\n\n"
"[1] ACE inhibitors source\n\n"
"[2] Warfarin source\n\n"
"[3] Isotretinoin source"
)
result = _reindex_final_answer_by_body_order(text)
self.assertIn("Pregnancy warning for isotretinoin [1].", result)
self.assertIn("ACE inhibitors are also avoided [2].", result)
self.assertIn("Warfarin is generally contraindicated [3].", result)
self.assertIn("[1] Isotretinoin source", result)
self.assertIn("[2] ACE inhibitors source", result)
self.assertIn("[3] Warfarin source", result)
def test_first_visible_citation_is_renumbered_to_one(self):
text = (
"Medicines such as isotretinoin are not recommended during pregnancy [3].\n\n"
"Later-pregnancy NSAID exposure is also avoided [1].\n\n"
"ACE inhibitors are generally not recommended either [2].\n\n"
"### References\n\n"
"[1] stale one\n\n"
"[2] stale two\n\n"
"[3] stale three"
)
sources = [
make_source("NSAIDs in pregnancy", 2021),
make_source("ACE inhibitors in pregnancy", 2022),
make_source("Isotretinoin teratogenicity", 2023),
]
result = _inject_real_references(text, sources)
self.assertIn("Medicines such as isotretinoin are not recommended during pregnancy [1].", result)
self.assertIn("Later-pregnancy NSAID exposure is also avoided [2].", result)
self.assertIn("ACE inhibitors are generally not recommended either [3].", result)
self.assertIn("[1] Doe et al. *Isotretinoin teratogenicity* (2023). *Journal*. DOI: 10.1000/test", result)
self.assertIn("[2] Doe et al. *NSAIDs in pregnancy* (2021). *Journal*. DOI: 10.1000/test", result)
self.assertIn("[3] Doe et al. *ACE inhibitors in pregnancy* (2022). *Journal*. DOI: 10.1000/test", result)
self.assertNotIn("stale one", result)
def test_grouped_and_phantom_citations_are_normalized(self):
text = (
"First paragraph uses grouped citations [3, 1, 9].\n\n"
"Second paragraph uses a range [1-2].\n\n"
"### References\n\n"
"[9] bogus"
)
sources = [
make_source("Source A", 2021),
make_source("Source B", 2022),
make_source("Source C", 2023),
]
result = _inject_real_references(text, sources)
self.assertIn("First paragraph uses grouped citations [1][2].", result)
self.assertIn("Second paragraph uses a range [2][3].", result)
self.assertNotIn("[9]", result)
self.assertIn("[1] Doe et al. *Source C* (2023). *Journal*. DOI: 10.1000/test", result)
self.assertIn("[2] Doe et al. *Source A* (2021). *Journal*. DOI: 10.1000/test", result)
self.assertIn("[3] Doe et al. *Source B* (2022). *Journal*. DOI: 10.1000/test", result)
def test_uncited_substantive_paragraph_is_backfilled_from_neighbor(self):
text = (
"Opening paragraph with support [1].\n\n"
"This middle paragraph has enough words to count as substantive but lacks an inline citation entirely.\n\n"
"Closing paragraph also carries support from a second source [2]."
)
sources = [
make_source("Source A", 2021),
make_source("Source B", 2022),
]
result = _inject_real_references(text, sources)
self.assertIn(
"This middle paragraph has enough words to count as substantive but lacks an inline citation entirely. [2]",
result,
)
self.assertEqual(result.count("### References"), 1)
def test_references_are_rebuilt_from_used_sources_only(self):
text = (
"Only one source is cited here [2].\n\n"
"### References\n\n"
"[1] wrong\n\n"
"[2] wrong"
)
sources = [
make_source("Source A", 2021),
make_source("Source B", 2022),
]
result = _inject_real_references(text, sources)
self.assertIn("Only one source is cited here [1].", result)
self.assertIn("[1] Doe et al. *Source B* (2022). *Journal*. DOI: 10.1000/test", result)
self.assertNotIn("*Source A*", result)
self.assertNotIn("[2] wrong", result)
def test_plain_references_heading_is_replaced_not_appended(self):
text = (
"A paragraph with grouped citations [3, 1, 9].\n\n"
"References\n"
"[1] stale ref one\n\n"
"[2] stale ref two\n\n"
"[3] stale ref three"
)
sources = [
make_source("Source A", 2021),
make_source("Source B", 2022),
make_source("Source C", 2023),
]
result = _inject_real_references(text, sources)
self.assertNotIn("stale ref", result)
self.assertEqual(result.count("### References"), 1)
self.assertNotIn("\nReferences\n", result)
self.assertIn("A paragraph with grouped citations [1][2].", result)
def test_repair_needed_flags_grouped_citations(self):
text = "Paragraph with grouped citations [1, 2]."
self.assertTrue(_citation_repair_needed(text, 3))
def test_repair_needed_flags_uncited_substantive_paragraph(self):
text = (
"A cited paragraph with enough words to count as substantive [1].\n\n"
"This second paragraph is also substantive, but it currently has no citation at all."
)
self.assertTrue(_citation_repair_needed(text, 2))
def test_repair_not_needed_for_clean_answer(self):
text = (
"Opening paragraph has enough words to count as substantive and includes a proper citation [1].\n\n"
"Second substantive paragraph also has enough words and includes support from another source [2].\n\n"
"### References\n\n"
"[1] old\n\n"
"[2] old"
)
self.assertFalse(_citation_repair_needed(text, 2))
def test_escaped_brackets_are_unescaped_and_normalized(self):
"""LLMs sometimes emit \\[1\\]\\[2\\] which markdown renders as [1][2].
These must be parsed and finalised the same as plain brackets."""
text = (
"Time crystals break time-translation symmetry \\[1\\]\\[2\\].\n\n"
"First proposed by Wilczek \\[3\\].\n\n"
"Discrete time crystals show period doubling \\[4\\]\\[6\\].\n\n"
"### References\n\n"
"\\[1\\] stale\n\n"
"\\[2\\] stale\n\n"
"\\[3\\] stale\n\n"
"\\[4\\] stale"
)
sources = [
make_source("Wilczek paper", 2012),
make_source("Khemani paper", 2016),
make_source("Else paper", 2016),
make_source("Zhang observation", 2017),
]
result = _inject_real_references(text, sources)
body = result.split("### References")[0]
# No phantom citations should remain
self.assertNotIn("[5]", body)
self.assertNotIn("[6]", body)
self.assertNotIn("\\[", result)
self.assertNotIn("\\]", result)
# Body and references should both use 1..N in matching order
import re as _re
body_cites = [int(x) for x in _re.findall(r"\[(\d+)\]", body)]
self.assertTrue(len(set(body_cites)) > 0)
self.assertEqual(set(body_cites), {1, 2, 3, 4})
def test_citations_appear_in_sequential_order(self):
"""First-cited source must be [1], second new source must be [2], etc."""
text = (
"Paragraph one cites the third source [3].\n\n"
"Paragraph two cites the first source [1].\n\n"
"Paragraph three cites the second source [2]."
)
sources = [
make_source("First in list", 2021), # body cites this 2nd
make_source("Second in list", 2022), # body cites this 3rd
make_source("Third in list", 2023), # body cites this 1st
]
result = _inject_real_references(text, sources)
body = result.split("### References")[0]
# First [N] in body should be [1]
import re as _re
first_cite = _re.search(r"\[(\d+)\]", body).group(1)
self.assertEqual(first_cite, "1")
# Sequential 1, 2, 3 in order of first appearance
self.assertIn("Paragraph one cites the third source [1].", body)
self.assertIn("Paragraph two cites the first source [2].", body)
self.assertIn("Paragraph three cites the second source [3].", body)
# References [1] = "Third in list" (the first thing cited in body)
self.assertIn("[1] Doe et al. *Third in list*", result)
self.assertIn("[2] Doe et al. *First in list*", result)
self.assertIn("[3] Doe et al. *Second in list*", result)