import argparse import json from pathlib import Path import numpy as np import pandas as pd from sklearn.linear_model import LinearRegression from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score FEATURE_ORDER = ["overall", "p90", "high_ratio", "mid_ratio", "std"] def clip01(x): return np.clip(x, 0.0, 1.0) def main(): parser = argparse.ArgumentParser(description="Train kn-like calibration model") parser.add_argument("--input", required=True, help="CSV with feature columns + target") parser.add_argument("--target", default="kn_rate", help="target column name") parser.add_argument("--out", default="calibration/model.json", help="output model json") args = parser.parse_args() df = pd.read_csv(args.input) missing = [c for c in FEATURE_ORDER + [args.target] if c not in df.columns] if missing: raise ValueError(f"Missing columns: {missing}") X = df[FEATURE_ORDER].astype(float).values y = df[args.target].astype(float).values model = LinearRegression() model.fit(X, y) pred = clip01(model.predict(X)) metrics = { "mae": float(mean_absolute_error(y, pred)), "rmse": float(np.sqrt(mean_squared_error(y, pred))), "r2": float(r2_score(y, pred)), "n": int(len(df)), } payload = { "model_type": "linear", "feature_order": FEATURE_ORDER, "coef": [float(v) for v in model.coef_.tolist()], "intercept": float(model.intercept_), "train_metrics": metrics, } out = Path(args.out) out.parent.mkdir(parents=True, exist_ok=True) out.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8") print("Saved:", out) print("Metrics:", metrics) if __name__ == "__main__": main()