Spaces:

ash001
/

nse-bot-backend

Running

App Files Files Community

nse-bot-backend / evaluate_thresholds.py

ash001

Deploy from GitHub Actions to nse-bot-backend

789e5eb verified 25 days ago

raw

history blame contribute delete

5.27 kB

	from pathlib import Path
	import pandas as pd
	import numpy as np

	BASE_DIR = Path(__file__).resolve().parent
	OUT_DIR = BASE_DIR / "outputs"

	PRED_PATH = OUT_DIR / "model_predictions_label_1to1.csv"
	OUT_PATH = OUT_DIR / "threshold_evaluation_label_1to1.csv"

	THRESHOLDS = [0.50, 0.55, 0.60, 0.65, 0.70, 0.75, 0.80]


	def evaluate_thresholds(df: pd.DataFrame) -> pd.DataFrame:
	rows = []

	for model_name, g in df.groupby("model"):
	g = g.copy().reset_index(drop=True)

	total_trades = len(g)
	baseline_hit_rate = g["y_true"].mean() if total_trades else 0.0
	baseline_expectancy_r = 2 * baseline_hit_rate - 1

	# threshold analysis
	for th in THRESHOLDS:
	kept = g[g["y_prob"] >= th].copy()
	kept_trades = len(kept)

	if kept_trades == 0:
	rows.append({
	"model": model_name,
	"mode": "threshold",
	"threshold": th,
	"top_pct": None,
	"total_trades": total_trades,
	"kept_trades": 0,
	"coverage_pct": 0.0,
	"baseline_hit_rate": round(baseline_hit_rate, 4),
	"kept_hit_rate": None,
	"lift_vs_baseline_pct": None,
	"baseline_expectancy_R": round(baseline_expectancy_r, 4),
	"kept_expectancy_R": None,
	"wins_kept": 0,
	"losses_kept": 0,
	})
	continue

	kept_hit_rate = kept["y_true"].mean()
	lift = ((kept_hit_rate / baseline_hit_rate) - 1) * 100 if baseline_hit_rate > 0 else None
	kept_expectancy_r = 2 * kept_hit_rate - 1

	rows.append({
	"model": model_name,
	"mode": "threshold",
	"threshold": th,
	"top_pct": None,
	"total_trades": total_trades,
	"kept_trades": kept_trades,
	"coverage_pct": round((kept_trades / total_trades) * 100, 2),
	"baseline_hit_rate": round(baseline_hit_rate, 4),
	"kept_hit_rate": round(kept_hit_rate, 4),
	"lift_vs_baseline_pct": round(lift, 2) if lift is not None else None,
	"baseline_expectancy_R": round(baseline_expectancy_r, 4),
	"kept_expectancy_R": round(kept_expectancy_r, 4),
	"wins_kept": int(kept["y_true"].sum()),
	"losses_kept": int(kept_trades - kept["y_true"].sum()),
	})

	# top percentile analysis
	for top_pct in [10, 20, 30, 40, 50]:
	k = max(int(np.ceil(total_trades * top_pct / 100)), 1)
	kept = g.sort_values("y_prob", ascending=False).head(k).copy()

	kept_hit_rate = kept["y_true"].mean()
	lift = ((kept_hit_rate / baseline_hit_rate) - 1) * 100 if baseline_hit_rate > 0 else None
	kept_expectancy_r = 2 * kept_hit_rate - 1
	min_prob_in_bucket = kept["y_prob"].min()

	rows.append({
	"model": model_name,
	"mode": "top_pct",
	"threshold": round(float(min_prob_in_bucket), 4),
	"top_pct": top_pct,
	"total_trades": total_trades,
	"kept_trades": k,
	"coverage_pct": round((k / total_trades) * 100, 2),
	"baseline_hit_rate": round(baseline_hit_rate, 4),
	"kept_hit_rate": round(kept_hit_rate, 4),
	"lift_vs_baseline_pct": round(lift, 2) if lift is not None else None,
	"baseline_expectancy_R": round(baseline_expectancy_r, 4),
	"kept_expectancy_R": round(kept_expectancy_r, 4),
	"wins_kept": int(kept["y_true"].sum()),
	"losses_kept": int(k - kept["y_true"].sum()),
	})

	return pd.DataFrame(rows)


	def main():
	df = pd.read_csv(PRED_PATH)

	required_cols = {"model", "y_true", "y_prob"}
	missing = required_cols - set(df.columns)
	if missing:
	raise ValueError(f"Missing required columns in predictions file: {missing}")

	out = evaluate_thresholds(df)
	out.to_csv(OUT_PATH, index=False)

	print(f"Saved threshold evaluation to: {OUT_PATH}")

	print("\n=== Threshold rows only ===")
	print(
	out[out["mode"] == "threshold"]
	.sort_values(["model", "threshold"])
	.to_string(index=False)
	)

	print("\n=== Top-percentile rows only ===")
	print(
	out[out["mode"] == "top_pct"]
	.sort_values(["model", "top_pct"])
	.to_string(index=False)
	)

	print("\n=== Best rows by kept_expectancy_R (minimum 100 kept trades) ===")
	best = out[(out["kept_trades"] >= 100) & (out["kept_expectancy_R"].notna())].copy()
	if best.empty:
	print("No qualifying rows")
	else:
	best = best.sort_values(
	["kept_expectancy_R", "kept_hit_rate", "kept_trades"],
	ascending=[False, False, False]
	)
	print(best.head(15).to_string(index=False))


	if __name__ == "__main__":
	main()