Spaces:
Running
Running
| from pathlib import Path | |
| import json | |
| import joblib | |
| import pandas as pd | |
| BASE_DIR = Path(__file__).resolve().parent | |
| OUT_DIR = BASE_DIR / "outputs" | |
| MODEL_PATH = OUT_DIR / "xgboost_label_1to1_pipeline.joblib" | |
| DATA_PATH = OUT_DIR / "ml_dataset_exact_all_v2_2026-03-02_to_2026-03-06.csv" | |
| OUT_SCORED_PATH = OUT_DIR / "forward_scored_xgboost_2026-03-02_to_2026-03-06.csv" | |
| OUT_SUMMARY_PATH = OUT_DIR / "forward_summary_xgboost_2026-03-02_to_2026-03-06.json" | |
| THRESHOLDS = [0.55, 0.60] | |
| DROP_COLS_ALWAYS = [ | |
| "trade_key", | |
| "label_1to1", | |
| "label_1to2", | |
| "bt_buy_signal_time", | |
| "bt_sell_signal_time", | |
| "bt_buy_time", | |
| "bt_buy_price", | |
| "bt_stop_loss", | |
| "bt_target_1", | |
| "bt_target_2", | |
| "bt_qty_per_lot", | |
| "bt_capital_per_lot", | |
| "bt_stop_loss_amt_per_lot", | |
| "signal_time", | |
| "confirmation_time", | |
| "indication_time", | |
| "buy_time", | |
| ] | |
| OPTIONAL_DROP_COLS = [ | |
| "exit_status", | |
| "option_symbol", | |
| "trade_side", | |
| ] | |
| def build_feature_matrix(df: pd.DataFrame): | |
| drop_cols = [c for c in DROP_COLS_ALWAYS if c in df.columns] | |
| drop_cols += [c for c in OPTIONAL_DROP_COLS if c in df.columns] | |
| X = df.drop(columns=drop_cols, errors="ignore").copy() | |
| if "sector" in X.columns: | |
| X["sector"] = X["sector"].fillna("UNKNOWN").replace("", "UNKNOWN") | |
| return X | |
| def summarize_for_threshold(df: pd.DataFrame, threshold: float): | |
| kept = df[df["y_prob"] >= threshold].copy() | |
| total = len(df) | |
| kept_n = len(kept) | |
| if kept_n == 0: | |
| return { | |
| "threshold": threshold, | |
| "total_trades": total, | |
| "kept_trades": 0, | |
| "coverage_pct": 0.0, | |
| "hit_rate_1to1": None, | |
| "gross_pnl_1to1_per_lot_sum": 0.0, | |
| "avg_pnl_1to1_per_lot": None, | |
| } | |
| hit_rate_1to1 = kept["label_1to1"].mean() | |
| # 1:1 realized PnL approximation: | |
| # if label_1to1 = 1 => +TOTAL PROFIT (PER LOT) (1:1) | |
| # else => -STOP LOSS AMOUNT FOR ONE LOT | |
| kept["realized_pnl_1to1_per_lot"] = kept.apply( | |
| lambda r: r["bt_target_1"] - r["bt_buy_price"] if False else ( | |
| r["bt_stop_loss_amt_per_lot"] * -1 if r["label_1to1"] == 0 else r["bt_stop_loss_amt_per_lot"] | |
| ), | |
| axis=1 | |
| ) | |
| # Since for a 1:1 setup target profit equals stop-loss amount, we can use stop_loss_amt_per_lot magnitude | |
| gross_pnl = kept["realized_pnl_1to1_per_lot"].sum() | |
| avg_pnl = kept["realized_pnl_1to1_per_lot"].mean() | |
| by_day = ( | |
| kept.groupby("trade_date") | |
| .agg( | |
| trades=("label_1to1", "size"), | |
| wins=("label_1to1", "sum"), | |
| pnl_1to1_per_lot=("realized_pnl_1to1_per_lot", "sum"), | |
| ) | |
| .reset_index() | |
| ) | |
| by_day["trade_date"] = pd.to_datetime(by_day["trade_date"], errors="coerce").dt.strftime("%Y-%m-%d") | |
| return { | |
| "threshold": threshold, | |
| "total_trades": total, | |
| "kept_trades": kept_n, | |
| "coverage_pct": round((kept_n / total) * 100, 2), | |
| "hit_rate_1to1": round(float(hit_rate_1to1), 4), | |
| "gross_pnl_1to1_per_lot_sum": round(float(gross_pnl), 2), | |
| "avg_pnl_1to1_per_lot": round(float(avg_pnl), 2), | |
| "daily_breakdown": by_day.to_dict(orient="records"), | |
| } | |
| def main(): | |
| model = joblib.load(MODEL_PATH) | |
| df = pd.read_csv(DATA_PATH) | |
| if "trade_date" in df.columns: | |
| df["trade_date"] = pd.to_datetime(df["trade_date"], errors="coerce") | |
| X = build_feature_matrix(df) | |
| df["y_prob"] = model.predict_proba(X)[:, 1] | |
| df.to_csv(OUT_SCORED_PATH, index=False) | |
| summaries = [] | |
| for th in THRESHOLDS: | |
| summaries.append(summarize_for_threshold(df, th)) | |
| with open(OUT_SUMMARY_PATH, "w") as f: | |
| json.dump(summaries, f, indent=2) | |
| print(f"Saved scored trades to: {OUT_SCORED_PATH}") | |
| print(f"Saved summary to: {OUT_SUMMARY_PATH}") | |
| for s in summaries: | |
| print("\n", s) | |
| if __name__ == "__main__": | |
| main() |