linvest21/shft-artifacts / code /self_healing_finetuning /tests /test_promotion_blocker_controller.py
linvest21's picture
download
raw
3.73 kB
from __future__ import annotations
import json
import tempfile
import unittest
from pathlib import Path
from unittest import mock
from orchestrator import promotion_blocker_controller as controller
from orchestrator.promotion_blocker_controller import build_controller_decision
def write_json(path: Path, payload: dict[str, object]) -> None:
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(json.dumps(payload), encoding="utf-8")
def write_jsonl(path: Path, rows: list[dict[str, object]]) -> None:
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text("\n".join(json.dumps(row) for row in rows) + "\n", encoding="utf-8")
class PromotionBlockerControllerTests(unittest.TestCase):
def test_holds_at_preference_cap_and_routes_to_critical_safety_when_cap_allows(self) -> None:
with tempfile.TemporaryDirectory() as tmp:
workspace = Path(tmp)
run_id = "run_demo_pref_1_pref_2"
run_dir = workspace / "runs" / run_id
eval_dir = run_dir / "eval"
write_json(
eval_dir / "model_quality_gate.json",
{
"ok": False,
"checks": {
"critical_pass_not_regressed": {"ok": False, "detail": "-0.1 >= 0"},
"pairwise_loss_rate": {"ok": False, "detail": "0.2 <= 0.02"},
},
},
)
write_json(
eval_dir / "paired_eval_report.json",
{
"improvement": {
"aggregate_abs": 0.06,
"critical_pass_rate_abs": -0.1,
"pairwise_loss_rate": 0.2,
"pairwise_win_rate": 0.35,
"losses": 1,
}
},
)
write_json(eval_dir / "human_spot_check_report.json", {"critical_failures": 1})
write_jsonl(
eval_dir / "paired_predictions.jsonl",
[
{
"id": "eval_revenue_risk_001",
"task": "finance_qa",
"prompt": "Revenue grew 4%, but backlog declined 8%. Summarize the revenue risk.",
"candidate_answer": "The risk is 200%.",
"baseline_score": {"score": 1.0, "critical_pass": True},
"candidate_score": {"score": 0.25, "critical_pass": False},
"delta": -0.75,
}
],
)
with mock.patch.object(controller, "SHFT_WORKSPACE_ROOT", workspace):
hold = build_controller_decision(
run_id=run_id,
release_id="release",
asset_class="equity",
role="researcher",
max_preference_rounds=2,
)
route = build_controller_decision(
run_id=run_id,
release_id="release",
asset_class="equity",
role="researcher",
max_preference_rounds=3,
output_path=run_dir / "autopilot" / "cap3.json",
)
self.assertEqual(hold["next_strategy"], "hold")
self.assertTrue(hold["should_hold"])
self.assertEqual(route["next_strategy"], "critical_safety_repair")
self.assertFalse(route["should_hold"])
self.assertEqual(route["promotion_impact"]["top_failure_buckets"][0]["failure_bucket"], "accounting_sec_extraction")
if __name__ == "__main__":
unittest.main()

Xet Storage Details

Size:
3.73 kB
·
Xet hash:
ef48006422ea475a41244015d7506ce34af40d42493231c314c9189db7f6c29b

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.