Buckets:
linvest21/shft-artifacts / code /self_healing_finetuning /tests /test_promotion_blocker_controller.py
| from __future__ import annotations | |
| import json | |
| import tempfile | |
| import unittest | |
| from pathlib import Path | |
| from unittest import mock | |
| from orchestrator import promotion_blocker_controller as controller | |
| from orchestrator.promotion_blocker_controller import build_controller_decision | |
| def write_json(path: Path, payload: dict[str, object]) -> None: | |
| path.parent.mkdir(parents=True, exist_ok=True) | |
| path.write_text(json.dumps(payload), encoding="utf-8") | |
| def write_jsonl(path: Path, rows: list[dict[str, object]]) -> None: | |
| path.parent.mkdir(parents=True, exist_ok=True) | |
| path.write_text("\n".join(json.dumps(row) for row in rows) + "\n", encoding="utf-8") | |
| class PromotionBlockerControllerTests(unittest.TestCase): | |
| def test_holds_at_preference_cap_and_routes_to_critical_safety_when_cap_allows(self) -> None: | |
| with tempfile.TemporaryDirectory() as tmp: | |
| workspace = Path(tmp) | |
| run_id = "run_demo_pref_1_pref_2" | |
| run_dir = workspace / "runs" / run_id | |
| eval_dir = run_dir / "eval" | |
| write_json( | |
| eval_dir / "model_quality_gate.json", | |
| { | |
| "ok": False, | |
| "checks": { | |
| "critical_pass_not_regressed": {"ok": False, "detail": "-0.1 >= 0"}, | |
| "pairwise_loss_rate": {"ok": False, "detail": "0.2 <= 0.02"}, | |
| }, | |
| }, | |
| ) | |
| write_json( | |
| eval_dir / "paired_eval_report.json", | |
| { | |
| "improvement": { | |
| "aggregate_abs": 0.06, | |
| "critical_pass_rate_abs": -0.1, | |
| "pairwise_loss_rate": 0.2, | |
| "pairwise_win_rate": 0.35, | |
| "losses": 1, | |
| } | |
| }, | |
| ) | |
| write_json(eval_dir / "human_spot_check_report.json", {"critical_failures": 1}) | |
| write_jsonl( | |
| eval_dir / "paired_predictions.jsonl", | |
| [ | |
| { | |
| "id": "eval_revenue_risk_001", | |
| "task": "finance_qa", | |
| "prompt": "Revenue grew 4%, but backlog declined 8%. Summarize the revenue risk.", | |
| "candidate_answer": "The risk is 200%.", | |
| "baseline_score": {"score": 1.0, "critical_pass": True}, | |
| "candidate_score": {"score": 0.25, "critical_pass": False}, | |
| "delta": -0.75, | |
| } | |
| ], | |
| ) | |
| with mock.patch.object(controller, "SHFT_WORKSPACE_ROOT", workspace): | |
| hold = build_controller_decision( | |
| run_id=run_id, | |
| release_id="release", | |
| asset_class="equity", | |
| role="researcher", | |
| max_preference_rounds=2, | |
| ) | |
| route = build_controller_decision( | |
| run_id=run_id, | |
| release_id="release", | |
| asset_class="equity", | |
| role="researcher", | |
| max_preference_rounds=3, | |
| output_path=run_dir / "autopilot" / "cap3.json", | |
| ) | |
| self.assertEqual(hold["next_strategy"], "hold") | |
| self.assertTrue(hold["should_hold"]) | |
| self.assertEqual(route["next_strategy"], "critical_safety_repair") | |
| self.assertFalse(route["should_hold"]) | |
| self.assertEqual(route["promotion_impact"]["top_failure_buckets"][0]["failure_bucket"], "accounting_sec_extraction") | |
| if __name__ == "__main__": | |
| unittest.main() | |
Xet Storage Details
- Size:
- 3.73 kB
- Xet hash:
- ef48006422ea475a41244015d7506ce34af40d42493231c314c9189db7f6c29b
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.