Spaces:
Running
Running
| from pathlib import Path | |
| import pandas as pd | |
| import numpy as np | |
| BASE_DIR = Path(__file__).resolve().parent | |
| OUT_DIR = BASE_DIR / "outputs" | |
| PRED_PATH = OUT_DIR / "model_predictions_label_1to1.csv" | |
| OUT_PATH = OUT_DIR / "threshold_evaluation_label_1to1.csv" | |
| THRESHOLDS = [0.50, 0.55, 0.60, 0.65, 0.70, 0.75, 0.80] | |
| def evaluate_thresholds(df: pd.DataFrame) -> pd.DataFrame: | |
| rows = [] | |
| for model_name, g in df.groupby("model"): | |
| g = g.copy().reset_index(drop=True) | |
| total_trades = len(g) | |
| baseline_hit_rate = g["y_true"].mean() if total_trades else 0.0 | |
| baseline_expectancy_r = 2 * baseline_hit_rate - 1 | |
| # threshold analysis | |
| for th in THRESHOLDS: | |
| kept = g[g["y_prob"] >= th].copy() | |
| kept_trades = len(kept) | |
| if kept_trades == 0: | |
| rows.append({ | |
| "model": model_name, | |
| "mode": "threshold", | |
| "threshold": th, | |
| "top_pct": None, | |
| "total_trades": total_trades, | |
| "kept_trades": 0, | |
| "coverage_pct": 0.0, | |
| "baseline_hit_rate": round(baseline_hit_rate, 4), | |
| "kept_hit_rate": None, | |
| "lift_vs_baseline_pct": None, | |
| "baseline_expectancy_R": round(baseline_expectancy_r, 4), | |
| "kept_expectancy_R": None, | |
| "wins_kept": 0, | |
| "losses_kept": 0, | |
| }) | |
| continue | |
| kept_hit_rate = kept["y_true"].mean() | |
| lift = ((kept_hit_rate / baseline_hit_rate) - 1) * 100 if baseline_hit_rate > 0 else None | |
| kept_expectancy_r = 2 * kept_hit_rate - 1 | |
| rows.append({ | |
| "model": model_name, | |
| "mode": "threshold", | |
| "threshold": th, | |
| "top_pct": None, | |
| "total_trades": total_trades, | |
| "kept_trades": kept_trades, | |
| "coverage_pct": round((kept_trades / total_trades) * 100, 2), | |
| "baseline_hit_rate": round(baseline_hit_rate, 4), | |
| "kept_hit_rate": round(kept_hit_rate, 4), | |
| "lift_vs_baseline_pct": round(lift, 2) if lift is not None else None, | |
| "baseline_expectancy_R": round(baseline_expectancy_r, 4), | |
| "kept_expectancy_R": round(kept_expectancy_r, 4), | |
| "wins_kept": int(kept["y_true"].sum()), | |
| "losses_kept": int(kept_trades - kept["y_true"].sum()), | |
| }) | |
| # top percentile analysis | |
| for top_pct in [10, 20, 30, 40, 50]: | |
| k = max(int(np.ceil(total_trades * top_pct / 100)), 1) | |
| kept = g.sort_values("y_prob", ascending=False).head(k).copy() | |
| kept_hit_rate = kept["y_true"].mean() | |
| lift = ((kept_hit_rate / baseline_hit_rate) - 1) * 100 if baseline_hit_rate > 0 else None | |
| kept_expectancy_r = 2 * kept_hit_rate - 1 | |
| min_prob_in_bucket = kept["y_prob"].min() | |
| rows.append({ | |
| "model": model_name, | |
| "mode": "top_pct", | |
| "threshold": round(float(min_prob_in_bucket), 4), | |
| "top_pct": top_pct, | |
| "total_trades": total_trades, | |
| "kept_trades": k, | |
| "coverage_pct": round((k / total_trades) * 100, 2), | |
| "baseline_hit_rate": round(baseline_hit_rate, 4), | |
| "kept_hit_rate": round(kept_hit_rate, 4), | |
| "lift_vs_baseline_pct": round(lift, 2) if lift is not None else None, | |
| "baseline_expectancy_R": round(baseline_expectancy_r, 4), | |
| "kept_expectancy_R": round(kept_expectancy_r, 4), | |
| "wins_kept": int(kept["y_true"].sum()), | |
| "losses_kept": int(k - kept["y_true"].sum()), | |
| }) | |
| return pd.DataFrame(rows) | |
| def main(): | |
| df = pd.read_csv(PRED_PATH) | |
| required_cols = {"model", "y_true", "y_prob"} | |
| missing = required_cols - set(df.columns) | |
| if missing: | |
| raise ValueError(f"Missing required columns in predictions file: {missing}") | |
| out = evaluate_thresholds(df) | |
| out.to_csv(OUT_PATH, index=False) | |
| print(f"Saved threshold evaluation to: {OUT_PATH}") | |
| print("\n=== Threshold rows only ===") | |
| print( | |
| out[out["mode"] == "threshold"] | |
| .sort_values(["model", "threshold"]) | |
| .to_string(index=False) | |
| ) | |
| print("\n=== Top-percentile rows only ===") | |
| print( | |
| out[out["mode"] == "top_pct"] | |
| .sort_values(["model", "top_pct"]) | |
| .to_string(index=False) | |
| ) | |
| print("\n=== Best rows by kept_expectancy_R (minimum 100 kept trades) ===") | |
| best = out[(out["kept_trades"] >= 100) & (out["kept_expectancy_R"].notna())].copy() | |
| if best.empty: | |
| print("No qualifying rows") | |
| else: | |
| best = best.sort_values( | |
| ["kept_expectancy_R", "kept_hit_rate", "kept_trades"], | |
| ascending=[False, False, False] | |
| ) | |
| print(best.head(15).to_string(index=False)) | |
| if __name__ == "__main__": | |
| main() |