nse-bot-backend / evaluate_thresholds.py
ash001's picture
Deploy from GitHub Actions to nse-bot-backend
789e5eb verified
from pathlib import Path
import pandas as pd
import numpy as np
BASE_DIR = Path(__file__).resolve().parent
OUT_DIR = BASE_DIR / "outputs"
PRED_PATH = OUT_DIR / "model_predictions_label_1to1.csv"
OUT_PATH = OUT_DIR / "threshold_evaluation_label_1to1.csv"
THRESHOLDS = [0.50, 0.55, 0.60, 0.65, 0.70, 0.75, 0.80]
def evaluate_thresholds(df: pd.DataFrame) -> pd.DataFrame:
rows = []
for model_name, g in df.groupby("model"):
g = g.copy().reset_index(drop=True)
total_trades = len(g)
baseline_hit_rate = g["y_true"].mean() if total_trades else 0.0
baseline_expectancy_r = 2 * baseline_hit_rate - 1
# threshold analysis
for th in THRESHOLDS:
kept = g[g["y_prob"] >= th].copy()
kept_trades = len(kept)
if kept_trades == 0:
rows.append({
"model": model_name,
"mode": "threshold",
"threshold": th,
"top_pct": None,
"total_trades": total_trades,
"kept_trades": 0,
"coverage_pct": 0.0,
"baseline_hit_rate": round(baseline_hit_rate, 4),
"kept_hit_rate": None,
"lift_vs_baseline_pct": None,
"baseline_expectancy_R": round(baseline_expectancy_r, 4),
"kept_expectancy_R": None,
"wins_kept": 0,
"losses_kept": 0,
})
continue
kept_hit_rate = kept["y_true"].mean()
lift = ((kept_hit_rate / baseline_hit_rate) - 1) * 100 if baseline_hit_rate > 0 else None
kept_expectancy_r = 2 * kept_hit_rate - 1
rows.append({
"model": model_name,
"mode": "threshold",
"threshold": th,
"top_pct": None,
"total_trades": total_trades,
"kept_trades": kept_trades,
"coverage_pct": round((kept_trades / total_trades) * 100, 2),
"baseline_hit_rate": round(baseline_hit_rate, 4),
"kept_hit_rate": round(kept_hit_rate, 4),
"lift_vs_baseline_pct": round(lift, 2) if lift is not None else None,
"baseline_expectancy_R": round(baseline_expectancy_r, 4),
"kept_expectancy_R": round(kept_expectancy_r, 4),
"wins_kept": int(kept["y_true"].sum()),
"losses_kept": int(kept_trades - kept["y_true"].sum()),
})
# top percentile analysis
for top_pct in [10, 20, 30, 40, 50]:
k = max(int(np.ceil(total_trades * top_pct / 100)), 1)
kept = g.sort_values("y_prob", ascending=False).head(k).copy()
kept_hit_rate = kept["y_true"].mean()
lift = ((kept_hit_rate / baseline_hit_rate) - 1) * 100 if baseline_hit_rate > 0 else None
kept_expectancy_r = 2 * kept_hit_rate - 1
min_prob_in_bucket = kept["y_prob"].min()
rows.append({
"model": model_name,
"mode": "top_pct",
"threshold": round(float(min_prob_in_bucket), 4),
"top_pct": top_pct,
"total_trades": total_trades,
"kept_trades": k,
"coverage_pct": round((k / total_trades) * 100, 2),
"baseline_hit_rate": round(baseline_hit_rate, 4),
"kept_hit_rate": round(kept_hit_rate, 4),
"lift_vs_baseline_pct": round(lift, 2) if lift is not None else None,
"baseline_expectancy_R": round(baseline_expectancy_r, 4),
"kept_expectancy_R": round(kept_expectancy_r, 4),
"wins_kept": int(kept["y_true"].sum()),
"losses_kept": int(k - kept["y_true"].sum()),
})
return pd.DataFrame(rows)
def main():
df = pd.read_csv(PRED_PATH)
required_cols = {"model", "y_true", "y_prob"}
missing = required_cols - set(df.columns)
if missing:
raise ValueError(f"Missing required columns in predictions file: {missing}")
out = evaluate_thresholds(df)
out.to_csv(OUT_PATH, index=False)
print(f"Saved threshold evaluation to: {OUT_PATH}")
print("\n=== Threshold rows only ===")
print(
out[out["mode"] == "threshold"]
.sort_values(["model", "threshold"])
.to_string(index=False)
)
print("\n=== Top-percentile rows only ===")
print(
out[out["mode"] == "top_pct"]
.sort_values(["model", "top_pct"])
.to_string(index=False)
)
print("\n=== Best rows by kept_expectancy_R (minimum 100 kept trades) ===")
best = out[(out["kept_trades"] >= 100) & (out["kept_expectancy_R"].notna())].copy()
if best.empty:
print("No qualifying rows")
else:
best = best.sort_values(
["kept_expectancy_R", "kept_hit_rate", "kept_trades"],
ascending=[False, False, False]
)
print(best.head(15).to_string(index=False))
if __name__ == "__main__":
main()