Buckets:

linvest21
/

shft-artifacts

Files

xet

linvest21/shft-artifacts / code /self_healing_finetuning /tests /test_promotion_blocker_controller.py

linvest21

28 days ago

download

raw

3.73 kB

	from __future__ import annotations

	import json
	import tempfile
	import unittest
	from pathlib import Path
	from unittest import mock

	from orchestrator import promotion_blocker_controller as controller
	from orchestrator.promotion_blocker_controller import build_controller_decision


	def write_json(path: Path, payload: dict[str, object]) -> None:
	path.parent.mkdir(parents=True, exist_ok=True)
	path.write_text(json.dumps(payload), encoding="utf-8")


	def write_jsonl(path: Path, rows: list[dict[str, object]]) -> None:
	path.parent.mkdir(parents=True, exist_ok=True)
	path.write_text("\n".join(json.dumps(row) for row in rows) + "\n", encoding="utf-8")


	class PromotionBlockerControllerTests(unittest.TestCase):
	def test_holds_at_preference_cap_and_routes_to_critical_safety_when_cap_allows(self) -> None:
	with tempfile.TemporaryDirectory() as tmp:
	workspace = Path(tmp)
	run_id = "run_demo_pref_1_pref_2"
	run_dir = workspace / "runs" / run_id
	eval_dir = run_dir / "eval"
	write_json(
	eval_dir / "model_quality_gate.json",
	{
	"ok": False,
	"checks": {
	"critical_pass_not_regressed": {"ok": False, "detail": "-0.1 >= 0"},
	"pairwise_loss_rate": {"ok": False, "detail": "0.2 <= 0.02"},
	},
	},
	)
	write_json(
	eval_dir / "paired_eval_report.json",
	{
	"improvement": {
	"aggregate_abs": 0.06,
	"critical_pass_rate_abs": -0.1,
	"pairwise_loss_rate": 0.2,
	"pairwise_win_rate": 0.35,
	"losses": 1,
	}
	},
	)
	write_json(eval_dir / "human_spot_check_report.json", {"critical_failures": 1})
	write_jsonl(
	eval_dir / "paired_predictions.jsonl",
	[
	{
	"id": "eval_revenue_risk_001",
	"task": "finance_qa",
	"prompt": "Revenue grew 4%, but backlog declined 8%. Summarize the revenue risk.",
	"candidate_answer": "The risk is 200%.",
	"baseline_score": {"score": 1.0, "critical_pass": True},
	"candidate_score": {"score": 0.25, "critical_pass": False},
	"delta": -0.75,
	}
	],
	)
	with mock.patch.object(controller, "SHFT_WORKSPACE_ROOT", workspace):
	hold = build_controller_decision(
	run_id=run_id,
	release_id="release",
	asset_class="equity",
	role="researcher",
	max_preference_rounds=2,
	)
	route = build_controller_decision(
	run_id=run_id,
	release_id="release",
	asset_class="equity",
	role="researcher",
	max_preference_rounds=3,
	output_path=run_dir / "autopilot" / "cap3.json",
	)
	self.assertEqual(hold["next_strategy"], "hold")
	self.assertTrue(hold["should_hold"])
	self.assertEqual(route["next_strategy"], "critical_safety_repair")
	self.assertFalse(route["should_hold"])
	self.assertEqual(route["promotion_impact"]["top_failure_buckets"][0]["failure_bucket"], "accounting_sec_extraction")


	if __name__ == "__main__":
	unittest.main()

Xet Storage Details

Size:: 3.73 kB
Xet hash:: ef48006422ea475a41244015d7506ce34af40d42493231c314c9189db7f6c29b

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.