nse-bot-backend / friday_only_model_compare.py
ash001's picture
Deploy from GitHub Actions to nse-bot-backend
8c79421 verified
from pathlib import Path
import json
import warnings
import joblib
import numpy as np
import pandas as pd
warnings.filterwarnings('ignore')
BASE_DIR = Path(__file__).resolve().parent
OUT_DIR = BASE_DIR / 'outputs'
# Prefer Mar27 merged file, fall back to Mar25 merged file if user hasn't updated yet
PREFERRED_DATA_PATHS = [
OUT_DIR / 'ml_dataset_exact_all_v2_2026-01-01_to_2026-03-27_merged.csv',
OUT_DIR / 'ml_dataset_exact_all_v2_2026-01-01_to_2026-03-25_merged.csv',
]
FRIDAY_DATE = '2026-03-27'
THRESHOLDS = [0.46, 0.48, 0.50, 0.52, 0.55]
TARGET = 'label_1to1'
MODELS = [
{
'name': 'old_champion',
'preprocessor': OUT_DIR / 'nn_preprocessor_label_1to1_jan_to_mar12_v2.joblib',
'model': OUT_DIR / 'nn_label_1to1_jan_to_mar12_v2.keras',
},
{
'name': 'same_arch_jan_to_mar25_v1',
'preprocessor': OUT_DIR / 'nn_preprocessor_label_1to1_jan_to_mar25_same_arch_v1.joblib',
'model': OUT_DIR / 'nn_label_1to1_jan_to_mar25_same_arch_v1.keras',
},
{
'name': 'large_arch_jan_to_mar25_v1',
'preprocessor': OUT_DIR / 'nn_preprocessor_label_1to1_jan_to_mar25_large_v1.joblib',
'model': OUT_DIR / 'nn_label_1to1_jan_to_mar25_large_v1.keras',
},
]
DROP_COLS_ALWAYS = [
'trade_key',
'label_1to1',
'label_1to2',
'bt_buy_signal_time',
'bt_sell_signal_time',
'bt_buy_time',
'bt_buy_price',
'bt_stop_loss',
'bt_target_1',
'bt_target_2',
'bt_qty_per_lot',
'bt_capital_per_lot',
'bt_stop_loss_amt_per_lot',
'signal_time',
'confirmation_time',
'indication_time',
'buy_time',
]
OPTIONAL_DROP_COLS = [
'exit_status',
'option_symbol',
'trade_side',
]
def resolve_data_path() -> Path:
for p in PREFERRED_DATA_PATHS:
if p.exists():
return p
raise FileNotFoundError(
'Could not find merged dataset. Expected one of:\n' + '\n'.join(str(p) for p in PREFERRED_DATA_PATHS)
)
def load_friday_data() -> pd.DataFrame:
data_path = resolve_data_path()
df = pd.read_csv(data_path)
df['trade_date'] = pd.to_datetime(df['trade_date'], errors='coerce')
friday_df = df[df['trade_date'].dt.strftime('%Y-%m-%d') == FRIDAY_DATE].copy()
friday_df = friday_df.sort_values([c for c in ['trade_date', 'signal_time', 'confirmation_time', 'buy_time', 'trade_key'] if c in friday_df.columns]).reset_index(drop=True)
if friday_df.empty:
raise ValueError(
f'No rows found for {FRIDAY_DATE} in {data_path.name}. '\
'This usually means your dataset_generator/merge still do not include Friday.'
)
return friday_df
def build_feature_matrix(df: pd.DataFrame):
drop_cols = [c for c in DROP_COLS_ALWAYS if c in df.columns]
drop_cols += [c for c in OPTIONAL_DROP_COLS if c in df.columns]
X = df.drop(columns=drop_cols, errors='ignore').copy()
y = df[TARGET].astype(int).copy()
if 'sector' in X.columns:
X['sector'] = X['sector'].fillna('UNKNOWN').replace('', 'UNKNOWN')
return X, y
def estimate_round_trip_charges(buy_price, exit_price, qty):
buy_turnover = float(buy_price) * float(qty)
sell_turnover = float(exit_price) * float(qty)
turnover = buy_turnover + sell_turnover
brokerage = 40.0
stt = 0.001 * sell_turnover
txn = 0.0003503 * turnover
sebi = 0.000001 * turnover
stamp = 0.00003 * buy_turnover
gst = 0.18 * (brokerage + txn + sebi)
return round(brokerage + stt + txn + sebi + stamp + gst, 2)
def compute_trade_pnl_1to1(df: pd.DataFrame) -> pd.DataFrame:
out = df.copy()
out['bt_buy_price'] = pd.to_numeric(out['bt_buy_price'], errors='coerce')
out['bt_stop_loss'] = pd.to_numeric(out['bt_stop_loss'], errors='coerce')
out['bt_target_1'] = pd.to_numeric(out['bt_target_1'], errors='coerce')
out['bt_qty_per_lot'] = pd.to_numeric(out['bt_qty_per_lot'], errors='coerce').fillna(0)
out['label_1to1'] = pd.to_numeric(out['label_1to1'], errors='coerce').fillna(0).astype(int)
out['exit_price_1to1'] = np.where(out['label_1to1'].eq(1), out['bt_target_1'], out['bt_stop_loss'])
out['gross_pnl_1to1'] = np.where(
out['label_1to1'].eq(1),
(out['bt_target_1'] - out['bt_buy_price']) * out['bt_qty_per_lot'],
-(out['bt_buy_price'] - out['bt_stop_loss']) * out['bt_qty_per_lot'],
)
out['est_charges_1to1'] = [
estimate_round_trip_charges(bp, ep, q)
for bp, ep, q in zip(out['bt_buy_price'], out['exit_price_1to1'], out['bt_qty_per_lot'])
]
out['net_pnl_1to1'] = out['gross_pnl_1to1'] - out['est_charges_1to1']
return out
def evaluate_model(model_name: str, preprocessor_path: Path, model_path: Path, df: pd.DataFrame, threshold: float):
import tensorflow as tf
X_raw, y = build_feature_matrix(df)
preprocessor = joblib.load(preprocessor_path)
model = tf.keras.models.load_model(model_path, compile=False)
X = preprocessor.transform(X_raw)
if hasattr(X, 'toarray'):
X = X.toarray()
y_prob = model.predict(X, verbose=0).ravel()
kept_mask = y_prob >= threshold
kept_df = df.loc[kept_mask].copy()
kept_df = compute_trade_pnl_1to1(kept_df) if not kept_df.empty else kept_df
total_count = int(len(df))
kept_count = int(kept_mask.sum())
keep_rate = float(kept_count / total_count) if total_count else 0.0
summary = {
'date': FRIDAY_DATE,
'model_name': model_name,
'threshold': threshold,
'rows_total': total_count,
'rows_kept': kept_count,
'keep_rate': keep_rate,
'kept_hit_rate_1to1': float(kept_df['label_1to1'].mean()) if kept_count > 0 else None,
'gross_pnl_1to1': float(kept_df['gross_pnl_1to1'].sum()) if kept_count > 0 else 0.0,
'est_charges_1to1': float(kept_df['est_charges_1to1'].sum()) if kept_count > 0 else 0.0,
'net_pnl_1to1': float(kept_df['net_pnl_1to1'].sum()) if kept_count > 0 else 0.0,
'avg_score_kept': float(np.mean(y_prob[kept_mask])) if kept_count > 0 else None,
}
pred_df = pd.DataFrame({
'trade_key': df['trade_key'] if 'trade_key' in df.columns else np.arange(len(df)),
'trade_date': df['trade_date'],
'symbol': df['symbol'] if 'symbol' in df.columns else None,
'direction': df['direction'] if 'direction' in df.columns else None,
'label_1to1': df['label_1to1'] if 'label_1to1' in df.columns else None,
f'{model_name}_prob': y_prob,
f'{model_name}_kept_thr_{str(threshold).replace('.', '_')}': kept_mask.astype(int),
})
return summary, pred_df
def baseline_no_gate(df: pd.DataFrame) -> dict:
work = compute_trade_pnl_1to1(df)
return {
'date': FRIDAY_DATE,
'model_name': 'no_gate',
'threshold': None,
'rows_total': int(len(work)),
'rows_kept': int(len(work)),
'keep_rate': 1.0,
'kept_hit_rate_1to1': float(work['label_1to1'].mean()) if len(work) else None,
'gross_pnl_1to1': float(work['gross_pnl_1to1'].sum()) if len(work) else 0.0,
'est_charges_1to1': float(work['est_charges_1to1'].sum()) if len(work) else 0.0,
'net_pnl_1to1': float(work['net_pnl_1to1'].sum()) if len(work) else 0.0,
'avg_score_kept': None,
}
def main():
df = load_friday_data()
print(f'Friday rows found: {len(df)} for {FRIDAY_DATE}')
print(f'Dataset date range includes Friday from: {resolve_data_path()}')
summaries = [baseline_no_gate(df)]
for threshold in THRESHOLDS:
pred_frames = []
print('\n' + '=' * 80)
print(f'Friday-only comparison at threshold = {threshold}')
print('=' * 80)
for cfg in MODELS:
print(f"Evaluating {cfg['name']}...")
summary, preds = evaluate_model(
model_name=cfg['name'],
preprocessor_path=cfg['preprocessor'],
model_path=cfg['model'],
df=df,
threshold=threshold,
)
summaries.append(summary)
pred_frames.append(preds)
print(json.dumps(summary, indent=2))
merged_preds = pred_frames[0]
for p in pred_frames[1:]:
keep_cols = [c for c in p.columns if c not in {'trade_date', 'symbol', 'direction', 'label_1to1'}]
merged_preds = merged_preds.merge(p[keep_cols], on='trade_key', how='left')
preds_out = OUT_DIR / f'friday_only_model_predictions_thr_{str(threshold).replace('.', '_')}.csv'
merged_preds.to_csv(preds_out, index=False)
print(f'Saved per-trade predictions to: {preds_out}')
summary_df = pd.DataFrame(summaries)
summary_out = OUT_DIR / 'friday_only_model_summary.csv'
summary_df.to_csv(summary_out, index=False)
print('\nSaved Friday-only summary to:', summary_out)
print('\nRanking by estimated net pnl (best first):')
show_cols = [
'model_name', 'threshold', 'rows_kept', 'keep_rate', 'kept_hit_rate_1to1',
'gross_pnl_1to1', 'est_charges_1to1', 'net_pnl_1to1', 'avg_score_kept'
]
print(summary_df[show_cols].sort_values(['net_pnl_1to1', 'kept_hit_rate_1to1'], ascending=[False, False]).to_string(index=False))
if __name__ == '__main__':
main()