Spaces:
Running
Running
| import argparse | |
| import json | |
| from pathlib import Path | |
| import numpy as np | |
| import pandas as pd | |
| from sklearn.linear_model import LinearRegression | |
| from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score | |
| FEATURE_ORDER = ["overall", "p90", "high_ratio", "mid_ratio", "std"] | |
| def clip01(x): | |
| return np.clip(x, 0.0, 1.0) | |
| def main(): | |
| parser = argparse.ArgumentParser(description="Train kn-like calibration model") | |
| parser.add_argument("--input", required=True, help="CSV with feature columns + target") | |
| parser.add_argument("--target", default="kn_rate", help="target column name") | |
| parser.add_argument("--out", default="calibration/model.json", help="output model json") | |
| args = parser.parse_args() | |
| df = pd.read_csv(args.input) | |
| missing = [c for c in FEATURE_ORDER + [args.target] if c not in df.columns] | |
| if missing: | |
| raise ValueError(f"Missing columns: {missing}") | |
| X = df[FEATURE_ORDER].astype(float).values | |
| y = df[args.target].astype(float).values | |
| model = LinearRegression() | |
| model.fit(X, y) | |
| pred = clip01(model.predict(X)) | |
| metrics = { | |
| "mae": float(mean_absolute_error(y, pred)), | |
| "rmse": float(np.sqrt(mean_squared_error(y, pred))), | |
| "r2": float(r2_score(y, pred)), | |
| "n": int(len(df)), | |
| } | |
| payload = { | |
| "model_type": "linear", | |
| "feature_order": FEATURE_ORDER, | |
| "coef": [float(v) for v in model.coef_.tolist()], | |
| "intercept": float(model.intercept_), | |
| "train_metrics": metrics, | |
| } | |
| out = Path(args.out) | |
| out.parent.mkdir(parents=True, exist_ok=True) | |
| out.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8") | |
| print("Saved:", out) | |
| print("Metrics:", metrics) | |
| if __name__ == "__main__": | |
| main() | |