Spaces:
Sleeping
Sleeping
File size: 7,906 Bytes
5031740 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 | """Regression check for legawa's pasal.id corpus dependencies.
Run periodically (suggested 3-week cadence) to detect drift in:
- Canonical probe URIs in agents/peneliti._CANONICAL_PROBES — pasal.id may add
new "Dicabut oleh" / "Diubah oleh" relationships, change titles, or change
status as the corpus evolves.
- Fixture-cited regulations across tests/fixtures/*.txt — if a citation in a
fixture changes, this catches it.
Operational caveat: this script does NOT re-run the live agents (which depend
on the user's local llama.cpp endpoints and the user's pasal.id token). It
only validates the static corpus dependencies the agents rely on. The user
still needs to manually re-run `legawa surat / research / draft` against the
fixtures to validate end-to-end behaviour after any model swap.
Usage:
python scripts/regression_check.py # compare against baseline, exit 1 on drift
python scripts/regression_check.py --update-baseline # snapshot current state as the new baseline
python scripts/regression_check.py --json # machine-readable output
Environment:
PASAL_API_TOKEN must be set (loaded via legawa.config.load_settings).
Exit codes:
0 — no drift
1 — drift detected (review output, decide if expected; rerun with
--update-baseline if changes are intentional)
2 — runtime error (network, missing token, etc.)
"""
from __future__ import annotations
import json
import sys
from pathlib import Path
from typing import Any
ROOT = Path(__file__).resolve().parents[1]
SRC = ROOT / "src"
if str(SRC) not in sys.path:
sys.path.insert(0, str(SRC))
from legawa.agents.peneliti import _CANONICAL_PROBES # noqa: E402
from legawa.config import load_settings # noqa: E402
from legawa.tools.cache import CachingPasalClient # noqa: E402
from legawa.tools.citations import _amendment_status, extract_citations # noqa: E402
from legawa.tools.pasal import PasalClient # noqa: E402
BASELINE_PATH = Path(__file__).resolve().parent / "regression_baseline.json"
FIXTURES_DIR = ROOT / "tests" / "fixtures"
def _format_rel(rel: dict[str, Any] | None) -> dict[str, str] | None:
if not rel:
return None
related = rel.get("related_work") or rel.get("related") or {}
if not isinstance(related, dict):
return None
return {
"title": (related.get("title") or "")[:120],
"frbr_uri": related.get("frbr_uri") or "?",
}
def _probe_uri(pasal: Any, uri: str) -> dict[str, Any]:
try:
law = pasal.get_law(uri)
except Exception as exc: # noqa: BLE001
return {"uri": uri, "error": str(exc)[:200]}
work = law.get("work") if isinstance(law, dict) else {}
title = (law.get("title") if isinstance(law, dict) else None) or (
work.get("title") if isinstance(work, dict) else None
)
status = (law.get("status") if isinstance(law, dict) else None) or (
work.get("status") if isinstance(work, dict) else None
)
repealed, amended = _amendment_status(pasal, uri)
return {
"uri": uri,
"title": (title or "?")[:120],
"status": status or "?",
"repealed_by": _format_rel(repealed),
"amended_by": [_format_rel(r) for r in amended[:5]],
}
def _collect_canonical_uris() -> list[str]:
seen: list[str] = []
for _keywords, uri in _CANONICAL_PROBES:
if uri not in seen:
seen.append(uri)
return seen
def _collect_fixture_citations() -> dict[str, list[str]]:
out: dict[str, list[str]] = {}
for fixture in sorted(FIXTURES_DIR.glob("*.txt")):
text = fixture.read_text(encoding="utf-8")
cites = extract_citations(text)
if cites:
out[fixture.name] = cites
return out
def _build_report(pasal: Any) -> dict[str, Any]:
uris = _collect_canonical_uris()
probes = {uri: _probe_uri(pasal, uri) for uri in uris}
return {
"schema_version": 1,
"canonical_probes": probes,
"fixture_citations": _collect_fixture_citations(),
}
def _diff(baseline: dict[str, Any], current: dict[str, Any]) -> dict[str, Any]:
drift: dict[str, Any] = {
"probe_changes": [],
"fixture_added": {},
"fixture_removed": {},
}
base_probes = baseline.get("canonical_probes", {})
cur_probes = current.get("canonical_probes", {})
for uri in sorted(set(base_probes) | set(cur_probes)):
before = base_probes.get(uri)
after = cur_probes.get(uri)
if before != after:
drift["probe_changes"].append({"uri": uri, "before": before, "after": after})
base_fix = baseline.get("fixture_citations", {})
cur_fix = current.get("fixture_citations", {})
for name in sorted(set(base_fix) | set(cur_fix)):
before = set(base_fix.get(name, []))
after = set(cur_fix.get(name, []))
added = sorted(after - before)
removed = sorted(before - after)
if added:
drift["fixture_added"][name] = added
if removed:
drift["fixture_removed"][name] = removed
return drift
def _has_drift(d: dict[str, Any]) -> bool:
return bool(d["probe_changes"] or d["fixture_added"] or d["fixture_removed"])
def main(argv: list[str]) -> int:
update = "--update-baseline" in argv
as_json = "--json" in argv
try:
settings = load_settings()
except RuntimeError as exc:
print(f"FAIL: {exc}", file=sys.stderr)
return 2
pasal = CachingPasalClient(PasalClient(settings))
try:
report = _build_report(pasal)
except Exception as exc: # noqa: BLE001
print(f"FAIL: probe error: {exc}", file=sys.stderr)
return 2
finally:
pasal.close()
if update:
BASELINE_PATH.write_text(
json.dumps(report, indent=2, ensure_ascii=False, sort_keys=True) + "\n",
encoding="utf-8",
)
print(f"Baseline updated: {BASELINE_PATH.relative_to(ROOT)}")
print(
f" canonical_probes: {len(report['canonical_probes'])} "
f"fixture_citations: {sum(len(v) for v in report['fixture_citations'].values())} refs "
f"across {len(report['fixture_citations'])} fixtures"
)
return 0
if not BASELINE_PATH.exists():
print(
f"No baseline at {BASELINE_PATH.relative_to(ROOT)}. "
"Run with --update-baseline to create one.",
file=sys.stderr,
)
return 2
baseline = json.loads(BASELINE_PATH.read_text(encoding="utf-8"))
drift = _diff(baseline, report)
if as_json:
print(json.dumps(drift, indent=2, ensure_ascii=False))
else:
if not _has_drift(drift):
print(
f"OK: no drift across {len(report['canonical_probes'])} canonical probes "
f"and {sum(len(v) for v in report['fixture_citations'].values())} fixture refs."
)
else:
print("DRIFT detected:")
for change in drift["probe_changes"]:
print(f" probe: {change['uri']}")
print(f" before: {json.dumps(change['before'], ensure_ascii=False)[:160]}")
print(f" after : {json.dumps(change['after'], ensure_ascii=False)[:160]}")
for fixture, refs in drift["fixture_added"].items():
print(f" + {fixture}: added {refs}")
for fixture, refs in drift["fixture_removed"].items():
print(f" - {fixture}: removed {refs}")
print(
"\nIf the changes are expected (e.g. you intentionally added a "
"canonical probe or a fixture citation), regenerate the baseline:\n"
" python scripts/regression_check.py --update-baseline"
)
return 0 if not _has_drift(drift) else 1
if __name__ == "__main__":
sys.exit(main(sys.argv[1:]))
|