nse-bot-backend / forward_test_nn.py
ash001's picture
Deploy from GitHub Actions to nse-bot-backend
789e5eb verified
from pathlib import Path
import json
import joblib
import pandas as pd
import numpy as np
from tensorflow.keras.models import load_model
BASE_DIR = Path(__file__).resolve().parent
OUT_DIR = BASE_DIR / "outputs"
PREPROCESSOR_PATH = OUT_DIR / "nn_preprocessor_label_1to1.joblib"
MODEL_PATH = OUT_DIR / "nn_label_1to1.keras"
DATA_PATH = OUT_DIR / "ml_dataset_exact_all_v2_2026-03-02_to_2026-03-06.csv"
OUT_SCORED_PATH = OUT_DIR / "forward_scored_nn_2026-03-02_to_2026-03-06.csv"
OUT_SUMMARY_PATH = OUT_DIR / "forward_summary_nn_2026-03-02_to_2026-03-06.json"
THRESHOLDS = [0.50, 0.55]
DROP_COLS_ALWAYS = [
"trade_key",
"label_1to1",
"label_1to2",
"bt_buy_signal_time",
"bt_sell_signal_time",
"bt_buy_time",
"bt_buy_price",
"bt_stop_loss",
"bt_target_1",
"bt_target_2",
"bt_qty_per_lot",
"bt_capital_per_lot",
"bt_stop_loss_amt_per_lot",
"signal_time",
"confirmation_time",
"indication_time",
"buy_time",
]
OPTIONAL_DROP_COLS = [
"exit_status",
"option_symbol",
"trade_side",
]
def build_feature_matrix(df: pd.DataFrame):
drop_cols = [c for c in DROP_COLS_ALWAYS if c in df.columns]
drop_cols += [c for c in OPTIONAL_DROP_COLS if c in df.columns]
X = df.drop(columns=drop_cols, errors="ignore").copy()
if "sector" in X.columns:
X["sector"] = X["sector"].fillna("UNKNOWN").replace("", "UNKNOWN")
return X
def summarize_for_threshold(df: pd.DataFrame, threshold: float):
kept = df[df["y_prob"] >= threshold].copy()
total = len(df)
kept_n = len(kept)
baseline_hit_rate = float(df["label_1to1"].mean()) if total else 0.0
if kept_n == 0:
return {
"threshold": threshold,
"total_trades": total,
"kept_trades": 0,
"coverage_pct": 0.0,
"baseline_hit_rate_1to1": round(baseline_hit_rate, 4),
"kept_hit_rate_1to1": None,
"gross_pnl_1to1_per_lot_sum": 0.0,
"avg_pnl_1to1_per_lot": None,
"daily_breakdown": [],
}
# For 1:1, profit magnitude equals stop-loss magnitude
kept["realized_pnl_1to1_per_lot"] = np.where(
kept["label_1to1"] == 1,
kept["bt_stop_loss_amt_per_lot"],
-kept["bt_stop_loss_amt_per_lot"],
)
hit_rate_1to1 = float(kept["label_1to1"].mean())
gross_pnl = float(kept["realized_pnl_1to1_per_lot"].sum())
avg_pnl = float(kept["realized_pnl_1to1_per_lot"].mean())
by_day = (
kept.groupby("trade_date")
.agg(
trades=("label_1to1", "size"),
wins=("label_1to1", "sum"),
pnl_1to1_per_lot=("realized_pnl_1to1_per_lot", "sum"),
)
.reset_index()
)
by_day["trade_date"] = pd.to_datetime(by_day["trade_date"], errors="coerce").dt.strftime("%Y-%m-%d")
return {
"threshold": threshold,
"total_trades": total,
"kept_trades": kept_n,
"coverage_pct": round((kept_n / total) * 100, 2),
"baseline_hit_rate_1to1": round(baseline_hit_rate, 4),
"kept_hit_rate_1to1": round(hit_rate_1to1, 4),
"gross_pnl_1to1_per_lot_sum": round(gross_pnl, 2),
"avg_pnl_1to1_per_lot": round(avg_pnl, 2),
"daily_breakdown": by_day.to_dict(orient="records"),
}
def main():
preprocessor = joblib.load(PREPROCESSOR_PATH)
model = load_model(MODEL_PATH)
df = pd.read_csv(DATA_PATH)
df["trade_date"] = pd.to_datetime(df["trade_date"], errors="coerce")
X_raw = build_feature_matrix(df)
X = preprocessor.transform(X_raw)
if hasattr(X, "toarray"):
X = X.toarray()
df["y_prob"] = model.predict(X).ravel()
df.to_csv(OUT_SCORED_PATH, index=False)
summaries = [summarize_for_threshold(df, th) for th in THRESHOLDS]
with open(OUT_SUMMARY_PATH, "w") as f:
json.dump(summaries, f, indent=2)
print(f"Saved scored trades to: {OUT_SCORED_PATH}")
print(f"Saved summary to: {OUT_SUMMARY_PATH}")
for s in summaries:
print("\n", s)
if __name__ == "__main__":
main()