Spaces:
Running
Running
| from pathlib import Path | |
| import json | |
| import joblib | |
| import pandas as pd | |
| import numpy as np | |
| from tensorflow.keras.models import load_model | |
| BASE_DIR = Path(__file__).resolve().parent | |
| OUT_DIR = BASE_DIR / "outputs" | |
| PREPROCESSOR_PATH = OUT_DIR / "nn_preprocessor_label_1to1.joblib" | |
| MODEL_PATH = OUT_DIR / "nn_label_1to1.keras" | |
| DATA_PATH = OUT_DIR / "ml_dataset_exact_all_v2_2026-03-02_to_2026-03-06.csv" | |
| OUT_SCORED_PATH = OUT_DIR / "forward_scored_nn_2026-03-02_to_2026-03-06.csv" | |
| OUT_SUMMARY_PATH = OUT_DIR / "forward_summary_nn_2026-03-02_to_2026-03-06.json" | |
| THRESHOLDS = [0.50, 0.55] | |
| DROP_COLS_ALWAYS = [ | |
| "trade_key", | |
| "label_1to1", | |
| "label_1to2", | |
| "bt_buy_signal_time", | |
| "bt_sell_signal_time", | |
| "bt_buy_time", | |
| "bt_buy_price", | |
| "bt_stop_loss", | |
| "bt_target_1", | |
| "bt_target_2", | |
| "bt_qty_per_lot", | |
| "bt_capital_per_lot", | |
| "bt_stop_loss_amt_per_lot", | |
| "signal_time", | |
| "confirmation_time", | |
| "indication_time", | |
| "buy_time", | |
| ] | |
| OPTIONAL_DROP_COLS = [ | |
| "exit_status", | |
| "option_symbol", | |
| "trade_side", | |
| ] | |
| def build_feature_matrix(df: pd.DataFrame): | |
| drop_cols = [c for c in DROP_COLS_ALWAYS if c in df.columns] | |
| drop_cols += [c for c in OPTIONAL_DROP_COLS if c in df.columns] | |
| X = df.drop(columns=drop_cols, errors="ignore").copy() | |
| if "sector" in X.columns: | |
| X["sector"] = X["sector"].fillna("UNKNOWN").replace("", "UNKNOWN") | |
| return X | |
| def summarize_for_threshold(df: pd.DataFrame, threshold: float): | |
| kept = df[df["y_prob"] >= threshold].copy() | |
| total = len(df) | |
| kept_n = len(kept) | |
| baseline_hit_rate = float(df["label_1to1"].mean()) if total else 0.0 | |
| if kept_n == 0: | |
| return { | |
| "threshold": threshold, | |
| "total_trades": total, | |
| "kept_trades": 0, | |
| "coverage_pct": 0.0, | |
| "baseline_hit_rate_1to1": round(baseline_hit_rate, 4), | |
| "kept_hit_rate_1to1": None, | |
| "gross_pnl_1to1_per_lot_sum": 0.0, | |
| "avg_pnl_1to1_per_lot": None, | |
| "daily_breakdown": [], | |
| } | |
| # For 1:1, profit magnitude equals stop-loss magnitude | |
| kept["realized_pnl_1to1_per_lot"] = np.where( | |
| kept["label_1to1"] == 1, | |
| kept["bt_stop_loss_amt_per_lot"], | |
| -kept["bt_stop_loss_amt_per_lot"], | |
| ) | |
| hit_rate_1to1 = float(kept["label_1to1"].mean()) | |
| gross_pnl = float(kept["realized_pnl_1to1_per_lot"].sum()) | |
| avg_pnl = float(kept["realized_pnl_1to1_per_lot"].mean()) | |
| by_day = ( | |
| kept.groupby("trade_date") | |
| .agg( | |
| trades=("label_1to1", "size"), | |
| wins=("label_1to1", "sum"), | |
| pnl_1to1_per_lot=("realized_pnl_1to1_per_lot", "sum"), | |
| ) | |
| .reset_index() | |
| ) | |
| by_day["trade_date"] = pd.to_datetime(by_day["trade_date"], errors="coerce").dt.strftime("%Y-%m-%d") | |
| return { | |
| "threshold": threshold, | |
| "total_trades": total, | |
| "kept_trades": kept_n, | |
| "coverage_pct": round((kept_n / total) * 100, 2), | |
| "baseline_hit_rate_1to1": round(baseline_hit_rate, 4), | |
| "kept_hit_rate_1to1": round(hit_rate_1to1, 4), | |
| "gross_pnl_1to1_per_lot_sum": round(gross_pnl, 2), | |
| "avg_pnl_1to1_per_lot": round(avg_pnl, 2), | |
| "daily_breakdown": by_day.to_dict(orient="records"), | |
| } | |
| def main(): | |
| preprocessor = joblib.load(PREPROCESSOR_PATH) | |
| model = load_model(MODEL_PATH) | |
| df = pd.read_csv(DATA_PATH) | |
| df["trade_date"] = pd.to_datetime(df["trade_date"], errors="coerce") | |
| X_raw = build_feature_matrix(df) | |
| X = preprocessor.transform(X_raw) | |
| if hasattr(X, "toarray"): | |
| X = X.toarray() | |
| df["y_prob"] = model.predict(X).ravel() | |
| df.to_csv(OUT_SCORED_PATH, index=False) | |
| summaries = [summarize_for_threshold(df, th) for th in THRESHOLDS] | |
| with open(OUT_SUMMARY_PATH, "w") as f: | |
| json.dump(summaries, f, indent=2) | |
| print(f"Saved scored trades to: {OUT_SCORED_PATH}") | |
| print(f"Saved summary to: {OUT_SUMMARY_PATH}") | |
| for s in summaries: | |
| print("\n", s) | |
| if __name__ == "__main__": | |
| main() |