Spaces:

ash001
/

nse-bot-backend

Running

App Files Files Community

nse-bot-backend / forward_test_xgboost.py

ash001

Deploy from GitHub Actions to nse-bot-backend

789e5eb verified about 1 month ago

raw

history blame contribute delete

4.03 kB

	from pathlib import Path
	import json
	import joblib
	import pandas as pd

	BASE_DIR = Path(__file__).resolve().parent
	OUT_DIR = BASE_DIR / "outputs"

	MODEL_PATH = OUT_DIR / "xgboost_label_1to1_pipeline.joblib"
	DATA_PATH = OUT_DIR / "ml_dataset_exact_all_v2_2026-03-02_to_2026-03-06.csv"
	OUT_SCORED_PATH = OUT_DIR / "forward_scored_xgboost_2026-03-02_to_2026-03-06.csv"
	OUT_SUMMARY_PATH = OUT_DIR / "forward_summary_xgboost_2026-03-02_to_2026-03-06.json"

	THRESHOLDS = [0.55, 0.60]

	DROP_COLS_ALWAYS = [
	"trade_key",
	"label_1to1",
	"label_1to2",
	"bt_buy_signal_time",
	"bt_sell_signal_time",
	"bt_buy_time",
	"bt_buy_price",
	"bt_stop_loss",
	"bt_target_1",
	"bt_target_2",
	"bt_qty_per_lot",
	"bt_capital_per_lot",
	"bt_stop_loss_amt_per_lot",
	"signal_time",
	"confirmation_time",
	"indication_time",
	"buy_time",
	]

	OPTIONAL_DROP_COLS = [
	"exit_status",
	"option_symbol",
	"trade_side",
	]


	def build_feature_matrix(df: pd.DataFrame):
	drop_cols = [c for c in DROP_COLS_ALWAYS if c in df.columns]
	drop_cols += [c for c in OPTIONAL_DROP_COLS if c in df.columns]

	X = df.drop(columns=drop_cols, errors="ignore").copy()

	if "sector" in X.columns:
	X["sector"] = X["sector"].fillna("UNKNOWN").replace("", "UNKNOWN")

	return X


	def summarize_for_threshold(df: pd.DataFrame, threshold: float):
	kept = df[df["y_prob"] >= threshold].copy()
	total = len(df)
	kept_n = len(kept)

	if kept_n == 0:
	return {
	"threshold": threshold,
	"total_trades": total,
	"kept_trades": 0,
	"coverage_pct": 0.0,
	"hit_rate_1to1": None,
	"gross_pnl_1to1_per_lot_sum": 0.0,
	"avg_pnl_1to1_per_lot": None,
	}

	hit_rate_1to1 = kept["label_1to1"].mean()

	# 1:1 realized PnL approximation:
	# if label_1to1 = 1 => +TOTAL PROFIT (PER LOT) (1:1)
	# else => -STOP LOSS AMOUNT FOR ONE LOT
	kept["realized_pnl_1to1_per_lot"] = kept.apply(
	lambda r: r["bt_target_1"] - r["bt_buy_price"] if False else (
	r["bt_stop_loss_amt_per_lot"] * -1 if r["label_1to1"] == 0 else r["bt_stop_loss_amt_per_lot"]
	),
	axis=1
	)

	# Since for a 1:1 setup target profit equals stop-loss amount, we can use stop_loss_amt_per_lot magnitude
	gross_pnl = kept["realized_pnl_1to1_per_lot"].sum()
	avg_pnl = kept["realized_pnl_1to1_per_lot"].mean()

	by_day = (
	kept.groupby("trade_date")
	.agg(
	trades=("label_1to1", "size"),
	wins=("label_1to1", "sum"),
	pnl_1to1_per_lot=("realized_pnl_1to1_per_lot", "sum"),
	)
	.reset_index()
	)

	by_day["trade_date"] = pd.to_datetime(by_day["trade_date"], errors="coerce").dt.strftime("%Y-%m-%d")

	return {
	"threshold": threshold,
	"total_trades": total,
	"kept_trades": kept_n,
	"coverage_pct": round((kept_n / total) * 100, 2),
	"hit_rate_1to1": round(float(hit_rate_1to1), 4),
	"gross_pnl_1to1_per_lot_sum": round(float(gross_pnl), 2),
	"avg_pnl_1to1_per_lot": round(float(avg_pnl), 2),
	"daily_breakdown": by_day.to_dict(orient="records"),
	}


	def main():
	model = joblib.load(MODEL_PATH)
	df = pd.read_csv(DATA_PATH)

	if "trade_date" in df.columns:
	df["trade_date"] = pd.to_datetime(df["trade_date"], errors="coerce")

	X = build_feature_matrix(df)
	df["y_prob"] = model.predict_proba(X)[:, 1]
	df.to_csv(OUT_SCORED_PATH, index=False)

	summaries = []
	for th in THRESHOLDS:
	summaries.append(summarize_for_threshold(df, th))

	with open(OUT_SUMMARY_PATH, "w") as f:
	json.dump(summaries, f, indent=2)

	print(f"Saved scored trades to: {OUT_SCORED_PATH}")
	print(f"Saved summary to: {OUT_SUMMARY_PATH}")

	for s in summaries:
	print("\n", s)


	if __name__ == "__main__":
	main()