Spaces:

Nomearod
/

agentbench

Running

App Files Files Community

agentbench / scripts /_dev /generate_kappa_fixtures.py

Nomearod

test(calibration): sklearn-parity fixtures + cross-check CI test

3a2ed35 10 days ago

raw

history blame contribute delete

2.94 kB

	"""Generate sklearn-parity fixtures for tests/evaluation/test_calibration_metrics.py.

	Run from a venv with sklearn installed (NOT the project venv):

	python -m venv /tmp/sklearn-fixture-venv
	/tmp/sklearn-fixture-venv/bin/pip install scikit-learn==1.5.2
	/tmp/sklearn-fixture-venv/bin/python scripts/_dev/generate_kappa_fixtures.py

	The script:
	1. Defines CASES (input arrays + weight option).
	2. Computes sklearn.metrics.cohen_kappa_score for each case.
	3. Prints copy-pasteable Python constants for the test file.
	4. Writes inputs to tests/evaluation/fixtures/sklearn_kappa_inputs.json
	for the cross-check CI test (forgot-to-regenerate detection).

	DO NOT add scikit-learn to the project's runtime dependencies — these
	constants are the contract; the project hand-rolls the implementation.
	"""

	from __future__ import annotations

	import json
	from pathlib import Path

	try:
	from sklearn.metrics import cohen_kappa_score
	except ImportError as e:
	raise SystemExit(
	"scikit-learn not installed. Install in a venv outside this project:\n"
	" python -m venv /tmp/sklearn-fixture-venv\n"
	" /tmp/sklearn-fixture-venv/bin/pip install scikit-learn==1.5.2\n"
	" /tmp/sklearn-fixture-venv/bin/python scripts/_dev/generate_kappa_fixtures.py"
	) from e

	CASES: list[dict] = [
	{
	"name": "imbalanced_binary",
	"y1": [1, 1, 1, 0, 1, 1, 0, 1, 1, 1],
	"y2": [1, 1, 0, 0, 1, 1, 1, 1, 1, 0],
	"weights": None,
	},
	{
	"name": "three_point_one_diagonal_swap",
	"y1": [0, 0, 1, 1, 2, 2, 0, 1, 2, 0],
	"y2": [0, 1, 1, 1, 2, 2, 0, 1, 2, 0],
	"weights": None,
	},
	{
	"name": "weighted_ordinal_drift_linear",
	"y1": [0, 1, 2, 0, 1, 2, 0, 1, 2, 0],
	"y2": [0, 1, 2, 1, 1, 2, 0, 2, 2, 1],
	"weights": "linear",
	},
	]

	OUT_INPUTS = (
	Path(__file__).resolve().parents[2]
	/ "tests"
	/ "evaluation"
	/ "fixtures"
	/ "sklearn_kappa_inputs.json"
	)

	print("# --- Paste into test_calibration_metrics.py ---\n")
	print("SKLEARN_KAPPA_FIXTURES: dict[str, float] = {")
	for case in CASES:
	expected = cohen_kappa_score(case["y1"], case["y2"], weights=case["weights"])
	print(f' "{case["name"]}": {expected:.10f}, # sklearn 1.5.2')
	print("}")

	print("\nSKLEARN_KAPPA_INPUTS: dict[str, dict] = {")
	for case in CASES:
	print(f' "{case["name"]}": {{')
	print(f' "y1": {case["y1"]},')
	print(f' "y2": {case["y2"]},')
	print(f' "weights": {case["weights"]!r},')
	print(" },")
	print("}")

	OUT_INPUTS.parent.mkdir(parents=True, exist_ok=True)
	OUT_INPUTS.write_text(
	json.dumps(
	{
	case["name"]: {
	"y1": case["y1"],
	"y2": case["y2"],
	"weights": case["weights"],
	}
	for case in CASES
	},
	indent=2,
	)
	)
	print(f"\n# Wrote {OUT_INPUTS}")