Spaces:
Sleeping
Sleeping
| from __future__ import annotations | |
| import sys | |
| import unittest | |
| from pathlib import Path | |
| SRC = Path(__file__).resolve().parents[1] / "src" | |
| if str(SRC) not in sys.path: | |
| sys.path.insert(0, str(SRC)) | |
| from legawa.tools.citations import ( | |
| CitationCheck, | |
| _topics_overlap, | |
| extract_citations, | |
| format_checks, | |
| verify_citation, | |
| verify_citations, | |
| ) | |
| class FakePasalClient: | |
| def __init__(self, responses: dict[str, dict], laws: dict[str, dict] | None = None): | |
| self.responses = responses | |
| self.laws = laws or {} | |
| self.calls: list[tuple[dict, ...]] = [] | |
| self.get_law_calls: list[str] = [] | |
| def search(self, **kwargs): | |
| self.calls.append((kwargs,)) | |
| return self.responses.get(kwargs["q"], {"results": []}) | |
| def get_law(self, frbr_uri: str): | |
| self.get_law_calls.append(frbr_uri) | |
| if frbr_uri not in self.laws: | |
| raise KeyError(f"no fixture for {frbr_uri}") | |
| return self.laws[frbr_uri] | |
| class CitationTests(unittest.TestCase): | |
| def test_extract_citations_normalizes_common_forms(self) -> None: | |
| text = ( | |
| "Mengacu pada UU No. 13 Tahun 2003, Peraturan Presiden Nomor 12 Tahun 2021, " | |
| "UUD 1945 Pasal 28E, Peraturan Pemerintah Pengganti Undang-Undang Nomor 2 Tahun 2023, " | |
| "Perda Provinsi Jawa Barat Nomor 1 Tahun 2024, dan akn/id/act/pp/2021/35." | |
| ) | |
| self.assertEqual( | |
| extract_citations(text), | |
| ["UU 13/2003", "Perpres 12/2021", "UUD 1945", "Perppu 2/2023", "Perda 1/2024", "PP 35/2021"], | |
| ) | |
| def test_verify_citation_matches_title_and_frbr_uri(self) -> None: | |
| fake = FakePasalClient( | |
| { | |
| "UU 13/2003": { | |
| "results": [ | |
| { | |
| "title": "Undang-Undang Nomor 13 Tahun 2003", | |
| "frbr_uri": "akn/id/act/uu/2003/13", | |
| "status": "berlaku", | |
| } | |
| ] | |
| } | |
| } | |
| ) | |
| check = verify_citation(fake, "UU No. 13 Tahun 2003") | |
| self.assertTrue(check.found) | |
| self.assertEqual(check.reference, "UU 13/2003") | |
| self.assertEqual(check.frbr_uri, "akn/id/act/uu/2003/13") | |
| self.assertEqual(check.status, "berlaku") | |
| def test_verify_citation_supports_perppu_and_region_perda(self) -> None: | |
| fake = FakePasalClient( | |
| { | |
| "Perppu 2/2023": { | |
| "results": [ | |
| { | |
| "title": "Peraturan Pemerintah Pengganti Undang-Undang Nomor 2 Tahun 2023", | |
| "frbr_uri": "akn/id/act/perppu/2023/2", | |
| "status": "berlaku", | |
| } | |
| ] | |
| }, | |
| "Perda 1/2024": { | |
| "results": [ | |
| { | |
| "title": "Peraturan Daerah Provinsi Jawa Barat Nomor 1 Tahun 2024", | |
| "frbr_uri": "akn/id/act/perda/2024/1", | |
| "status": "berlaku", | |
| } | |
| ] | |
| }, | |
| } | |
| ) | |
| perppu = verify_citation(fake, "Perppu Nomor 2 Tahun 2023") | |
| perda = verify_citation(fake, "Perda Provinsi Jawa Barat Nomor 1 Tahun 2024") | |
| self.assertTrue(perppu.found) | |
| self.assertEqual(perppu.query, "Perppu 2/2023") | |
| self.assertEqual(perppu.frbr_uri, "akn/id/act/perppu/2023/2") | |
| self.assertTrue(perda.found) | |
| self.assertEqual(perda.query, "Perda 1/2024") | |
| self.assertEqual(perda.frbr_uri, "akn/id/act/perda/2024/1") | |
| def test_topics_overlap_rejects_institutional_only_match(self) -> None: | |
| # Real false-positive case from gemma4 memo run: model claimed | |
| # Permen 24/2018 was about audit-mechanism, but it's about Pengelolaan | |
| # Arsip. They share only ministry-context tokens. | |
| claim = "Mekanisme Tindak Lanjut Hasil Audit Inspektorat Jenderal Kementerian Pendidikan dan Kebudayaan" | |
| title = "Pengelolaan Arsip Terjaga di Lingkungan Kementerian Pendidikan dan Kebudayaan" | |
| self.assertFalse(_topics_overlap(claim, title)) | |
| def test_topics_overlap_accepts_real_topical_match(self) -> None: | |
| claim = "Pengadaan Barang/Jasa Pemerintah" | |
| title = "Peraturan Presiden Nomor 12 Tahun 2021 tentang Perubahan Atas Pengadaan Barang/Jasa Pemerintah" | |
| self.assertTrue(_topics_overlap(claim, title)) | |
| def test_topics_overlap_acronym_vs_spelled_out_still_works(self) -> None: | |
| # Ensure the institutional filter doesn't break the acronym path — | |
| # UU 17/2014 is genuinely about MPR/DPR/DPD/DPRD. | |
| claim = "MPR, DPR, DPD, dan DPRD" | |
| title = "Majelis Permusyawaratan Rakyat, Dewan Perwakilan Rakyat, Dewan Perwakilan Daerah, dan Dewan Perwakilan Rakyat Daerah" | |
| self.assertTrue(_topics_overlap(claim, title)) | |
| def test_extract_citations_handles_kemenhub_pm_prefix(self) -> None: | |
| # Kemenhub uses "PM <n> Tahun YYYY" as its internal numbering. Without | |
| # this case the verifier would silently miss every Permenhub citation | |
| # in agent output. | |
| text = ( | |
| "Permenhub No. PM 94 Tahun 2018 tentang Perlintasan Sebidang. " | |
| "Lihat juga Permenhub PM 36/2011 dan Peraturan Menteri Perhubungan " | |
| "Nomor PM 73 Tahun 2018." | |
| ) | |
| cites = extract_citations(text) | |
| self.assertIn("Permen 94/2018", cites) | |
| self.assertIn("Permen 36/2011", cites) | |
| self.assertIn("Permen 73/2018", cites) | |
| def test_topics_overlap_uninformative_title_falls_back(self) -> None: | |
| # Echoed pasal.id title — no usable signal — accept by existence-only. | |
| claim = "Sistem Pendidikan Nasional" | |
| title = "Undang-Undang Nomor 20 Tahun 2003 tentang Undang-Undang Nomor 20 Tahun 2003" | |
| self.assertTrue(_topics_overlap(claim, title)) | |
| def test_verify_citation_advises_on_repealed_regulation(self) -> None: | |
| # When pasal.id reports a 'Dicabut oleh' relationship to a NEWER | |
| # regulation, surface as an advisory note. We do NOT reject the | |
| # citation outright because pasal.id's relationships graph | |
| # mis-classifies some cross-references (false positives would block | |
| # legitimate canonical statutes like UU 31/1999). | |
| fake = FakePasalClient( | |
| responses={ | |
| "UU 22/1999": { | |
| "results": [ | |
| { | |
| "title": "Undang-Undang Nomor 22 Tahun 1999 tentang Pemerintahan Daerah", | |
| "frbr_uri": "akn/id/act/uu/1999/22", | |
| "status": "berlaku", # pasal.id status field is unreliable | |
| } | |
| ] | |
| } | |
| }, | |
| laws={ | |
| "akn/id/act/uu/1999/22": { | |
| "title": "Undang-Undang Nomor 22 Tahun 1999 tentang Pemerintahan Daerah", | |
| "frbr_uri": "akn/id/act/uu/1999/22", | |
| "status": "berlaku", | |
| "relationships": [ | |
| { | |
| "type": "Dicabut oleh", | |
| "type_en": "Repealed by", | |
| "related_work": { | |
| "title": "Undang-Undang Nomor 32 Tahun 2004 tentang Pemerintahan Daerah", | |
| "frbr_uri": "/akn/id/act/uu/2004/32", | |
| }, | |
| } | |
| ], | |
| } | |
| }, | |
| ) | |
| check = verify_citation(fake, "UU 22/1999", claimed_topic="Pemerintahan Daerah") | |
| self.assertTrue(check.found) | |
| self.assertIn("Dicabut oleh", check.note or "") | |
| self.assertIn("Undang-Undang Nomor 32 Tahun 2004", check.note or "") | |
| self.assertIn("akn/id/act/uu/2004/32", check.note or "") | |
| self.assertIn("verifikasi manual", check.note or "") | |
| def test_verify_citation_ignores_mirrored_predecessor_relationship(self) -> None: | |
| # pasal.id mirrors relationships in both directions. UU 13/2003 | |
| # genuinely revokes UU 25/1997, but pasal.id also lists UU 25/1997 | |
| # under UU 13/2003's "Dicabut oleh". The disambiguator must skip | |
| # mirrored entries where the related work is OLDER than the cited | |
| # regulation. | |
| fake = FakePasalClient( | |
| responses={ | |
| "UU 13/2003": { | |
| "results": [ | |
| { | |
| "title": "Undang-Undang Nomor 13 Tahun 2003 tentang Ketenagakerjaan", | |
| "frbr_uri": "akn/id/act/uu/2003/13", | |
| "status": "berlaku", | |
| } | |
| ] | |
| } | |
| }, | |
| laws={ | |
| "akn/id/act/uu/2003/13": { | |
| "title": "Undang-Undang Nomor 13 Tahun 2003 tentang Ketenagakerjaan", | |
| "frbr_uri": "akn/id/act/uu/2003/13", | |
| "status": "berlaku", | |
| "relationships": [ | |
| # The mirrored predecessor — should be ignored. | |
| { | |
| "type": "Dicabut oleh", | |
| "type_en": "Repealed by", | |
| "related_work": { | |
| "title": "UU 25/1997", | |
| "frbr_uri": "/akn/id/act/uu/1997/25", | |
| }, | |
| }, | |
| ], | |
| } | |
| }, | |
| ) | |
| check = verify_citation(fake, "UU 13/2003", claimed_topic="Ketenagakerjaan") | |
| self.assertTrue(check.found) | |
| # No advisory should fire — the mirrored predecessor is filtered. | |
| self.assertIsNone(check.note) | |
| def test_verify_citation_warns_on_amended_regulation(self) -> None: | |
| # Real case: UU 13/2003 (Ketenagakerjaan) is 'Diubah oleh' UU 6/2023. | |
| # Citation should still pass (regulation is operative in modified form), | |
| # but with an advisory note. | |
| fake = FakePasalClient( | |
| responses={ | |
| "UU 13/2003": { | |
| "results": [ | |
| { | |
| "title": "Undang-Undang Nomor 13 Tahun 2003 tentang Ketenagakerjaan", | |
| "frbr_uri": "akn/id/act/uu/2003/13", | |
| "status": "berlaku", | |
| } | |
| ] | |
| } | |
| }, | |
| laws={ | |
| "akn/id/act/uu/2003/13": { | |
| "title": "Undang-Undang Nomor 13 Tahun 2003 tentang Ketenagakerjaan", | |
| "frbr_uri": "akn/id/act/uu/2003/13", | |
| "status": "berlaku", | |
| "relationships": [ | |
| { | |
| "type": "Diubah oleh", | |
| "type_en": "Amended by", | |
| "related_work": { | |
| "title": "UU Nomor 6 Tahun 2023 tentang Cipta Kerja", | |
| "frbr_uri": "/akn/id/act/uu/2023/6", | |
| }, | |
| } | |
| ], | |
| } | |
| }, | |
| ) | |
| check = verify_citation(fake, "UU 13/2003", claimed_topic="Ketenagakerjaan") | |
| self.assertTrue(check.found) | |
| self.assertIn("diubah", (check.note or "").lower()) | |
| self.assertIn("Cipta Kerja", check.note or "") | |
| def test_verify_citation_skips_amendment_check_when_disabled(self) -> None: | |
| # When check_amendments=False, no get_law call is made — useful for | |
| # cheap existence-only verification paths. | |
| fake = FakePasalClient( | |
| responses={ | |
| "UU 13/2003": { | |
| "results": [ | |
| { | |
| "title": "Undang-Undang Nomor 13 Tahun 2003 tentang Ketenagakerjaan", | |
| "frbr_uri": "akn/id/act/uu/2003/13", | |
| "status": "berlaku", | |
| } | |
| ] | |
| } | |
| }, | |
| laws={}, | |
| ) | |
| check = verify_citation(fake, "UU 13/2003", check_amendments=False) | |
| self.assertTrue(check.found) | |
| self.assertIsNone(check.note) | |
| self.assertEqual(fake.get_law_calls, []) | |
| def test_verify_citation_amendment_check_degrades_on_get_law_failure(self) -> None: | |
| # If pasal.id /laws/{uri} fails (network error, missing law, etc.), | |
| # the amendment check must not block legitimate citations. | |
| fake = FakePasalClient( | |
| responses={ | |
| "UU 13/2003": { | |
| "results": [ | |
| { | |
| "title": "Undang-Undang Nomor 13 Tahun 2003 tentang Ketenagakerjaan", | |
| "frbr_uri": "akn/id/act/uu/2003/13", | |
| "status": "berlaku", | |
| } | |
| ] | |
| } | |
| }, | |
| laws={}, # get_law will raise KeyError | |
| ) | |
| check = verify_citation(fake, "UU 13/2003", claimed_topic="Ketenagakerjaan") | |
| self.assertTrue(check.found) | |
| self.assertIsNone(check.note) | |
| # get_law was attempted but failed silently | |
| self.assertEqual(fake.get_law_calls, ["akn/id/act/uu/2003/13"]) | |
| def test_verify_citation_falls_back_to_trusted_recent(self) -> None: | |
| # Pasal.id returns no hit for Perpres 8/2026 (not yet ingested), but | |
| # trusted_recent has it. Verifier should accept with topical-overlap | |
| # enforced against the trusted title. | |
| fake = FakePasalClient(responses={}) | |
| from legawa.tools.trusted_recent import TRUSTED_RECENT # noqa: PLC0415 | |
| # Sanity check: the fixture entry exists. | |
| self.assertIn("Perpres 8/2026", TRUSTED_RECENT) | |
| check = verify_citation( | |
| fake, | |
| "Perpres 8/2026", | |
| claimed_topic="Rencana Aksi Nasional Pencegahan Ekstremisme Berbasis Kekerasan", | |
| check_amendments=False, | |
| ) | |
| self.assertTrue(check.found) | |
| self.assertIn("trusted_recent override", check.note or "") | |
| self.assertIn("akn/id/act/perpres/2026/8", check.frbr_uri or "") | |
| def test_verify_citation_trusted_recent_rejects_topical_mismatch(self) -> None: | |
| # Trusted entry exists for Perpres 8/2026 (RAN PE counter-terrorism), | |
| # but the claim is about something unrelated. Verifier must reject — | |
| # we don't blanket-whitelist a (kind, number, year) tuple. | |
| fake = FakePasalClient(responses={}) | |
| check = verify_citation( | |
| fake, | |
| "Perpres 8/2026", | |
| claimed_topic="Pengadaan Barang Jasa Pemerintah", | |
| check_amendments=False, | |
| ) | |
| self.assertFalse(check.found) | |
| self.assertIn("judul tidak cocok", check.note or "") | |
| self.assertIn("trusted_recent", check.note or "") | |
| def test_verify_citation_trusted_recent_fallback_on_pasal_unreachable(self) -> None: | |
| # When pasal.id is down (auth failure, network error), trusted_recent | |
| # should still serve as a fallback for citations we know about. | |
| class FailingClient: | |
| def search(self, **kwargs): | |
| raise RuntimeError("Client error '401 Unauthorized'") | |
| check = verify_citation( | |
| FailingClient(), | |
| "Perpres 8/2026", | |
| claimed_topic="Rencana Aksi Nasional Pencegahan Ekstremisme", | |
| check_amendments=False, | |
| ) | |
| self.assertTrue(check.found) | |
| self.assertIn("trusted_recent override", check.note or "") | |
| self.assertIn("pasal.id unreachable", check.note or "") | |
| def test_verify_citation_pasal_unreachable_unknown_ref_surfaces_error(self) -> None: | |
| # If pasal.id is unreachable AND the citation isn't in trusted_recent, | |
| # we want the user to see the original transport error so they can | |
| # fix the underlying issue (refresh token, check network). | |
| class FailingClient: | |
| def search(self, **kwargs): | |
| raise RuntimeError("Client error '401 Unauthorized'") | |
| check = verify_citation( | |
| FailingClient(), | |
| "UU 17/2023", | |
| claimed_topic="Kesehatan", | |
| check_amendments=False, | |
| ) | |
| self.assertFalse(check.found) | |
| self.assertIn("verifikasi gagal", check.note or "") | |
| self.assertIn("401", check.note or "") | |
| def test_verify_citation_unknown_reference_still_rejects_cleanly(self) -> None: | |
| # Sanity: a fresh regulation NOT in trusted_recent and NOT on pasal.id | |
| # still rejects with TIDAK DITEMUKAN — no silent acceptance. | |
| fake = FakePasalClient(responses={}) | |
| check = verify_citation( | |
| fake, | |
| "Perpres 999/2026", | |
| claimed_topic="Hypothetical regulation that doesn't exist", | |
| check_amendments=False, | |
| ) | |
| self.assertFalse(check.found) | |
| self.assertIn("TIDAK DITEMUKAN", check.note or "") | |
| def test_verify_citations_formats_mixed_results(self) -> None: | |
| fake = FakePasalClient( | |
| { | |
| "UU 13/2003": { | |
| "results": [ | |
| { | |
| "title": "Undang-Undang Nomor 13 Tahun 2003", | |
| "frbr_uri": "akn/id/act/uu/2003/13", | |
| "status": "berlaku", | |
| } | |
| ] | |
| } | |
| } | |
| ) | |
| checks = verify_citations(fake, ["UU 13/2003", "Perpres 76/2021"]) | |
| self.assertEqual([c.reference for c in checks], ["UU 13/2003", "Perpres 76/2021"]) | |
| self.assertTrue(checks[0].found) | |
| self.assertFalse(checks[1].found) | |
| self.assertIn("TIDAK DITEMUKAN", format_checks(checks)) | |
| if __name__ == "__main__": | |
| unittest.main() | |