Spaces:

Nomearod
/

agentbench

Running

File size: 2,943 Bytes

3a2ed35

"""Generate sklearn-parity fixtures for tests/evaluation/test_calibration_metrics.py.

Run from a venv with sklearn installed (NOT the project venv):

    python -m venv /tmp/sklearn-fixture-venv
    /tmp/sklearn-fixture-venv/bin/pip install scikit-learn==1.5.2
    /tmp/sklearn-fixture-venv/bin/python scripts/_dev/generate_kappa_fixtures.py

The script:
  1. Defines CASES (input arrays + weight option).
  2. Computes sklearn.metrics.cohen_kappa_score for each case.
  3. Prints copy-pasteable Python constants for the test file.
  4. Writes inputs to tests/evaluation/fixtures/sklearn_kappa_inputs.json
     for the cross-check CI test (forgot-to-regenerate detection).

DO NOT add scikit-learn to the project's runtime dependencies — these
constants are the contract; the project hand-rolls the implementation.
"""

from __future__ import annotations

import json
from pathlib import Path

try:
    from sklearn.metrics import cohen_kappa_score
except ImportError as e:
    raise SystemExit(
        "scikit-learn not installed. Install in a venv outside this project:\n"
        "  python -m venv /tmp/sklearn-fixture-venv\n"
        "  /tmp/sklearn-fixture-venv/bin/pip install scikit-learn==1.5.2\n"
        "  /tmp/sklearn-fixture-venv/bin/python scripts/_dev/generate_kappa_fixtures.py"
    ) from e

CASES: list[dict] = [
    {
        "name": "imbalanced_binary",
        "y1": [1, 1, 1, 0, 1, 1, 0, 1, 1, 1],
        "y2": [1, 1, 0, 0, 1, 1, 1, 1, 1, 0],
        "weights": None,
    },
    {
        "name": "three_point_one_diagonal_swap",
        "y1": [0, 0, 1, 1, 2, 2, 0, 1, 2, 0],
        "y2": [0, 1, 1, 1, 2, 2, 0, 1, 2, 0],
        "weights": None,
    },
    {
        "name": "weighted_ordinal_drift_linear",
        "y1": [0, 1, 2, 0, 1, 2, 0, 1, 2, 0],
        "y2": [0, 1, 2, 1, 1, 2, 0, 2, 2, 1],
        "weights": "linear",
    },
]

OUT_INPUTS = (
    Path(__file__).resolve().parents[2]
    / "tests"
    / "evaluation"
    / "fixtures"
    / "sklearn_kappa_inputs.json"
)

print("# --- Paste into test_calibration_metrics.py ---\n")
print("SKLEARN_KAPPA_FIXTURES: dict[str, float] = {")
for case in CASES:
    expected = cohen_kappa_score(case["y1"], case["y2"], weights=case["weights"])
    print(f'    "{case["name"]}": {expected:.10f},  # sklearn 1.5.2')
print("}")

print("\nSKLEARN_KAPPA_INPUTS: dict[str, dict] = {")
for case in CASES:
    print(f'    "{case["name"]}": {{')
    print(f'        "y1": {case["y1"]},')
    print(f'        "y2": {case["y2"]},')
    print(f'        "weights": {case["weights"]!r},')
    print("    },")
print("}")

OUT_INPUTS.parent.mkdir(parents=True, exist_ok=True)
OUT_INPUTS.write_text(
    json.dumps(
        {
            case["name"]: {
                "y1": case["y1"],
                "y2": case["y2"],
                "weights": case["weights"],
            }
            for case in CASES
        },
        indent=2,
    )
)
print(f"\n# Wrote {OUT_INPUTS}")