Spaces:

ash001
/

nse-bot-backend

Running

App Files Files Community

nse-bot-backend / friday_only_model_compare.py

ash001

Deploy from GitHub Actions to nse-bot-backend

8c79421 verified 25 days ago

raw

history blame contribute delete

9.27 kB

	from pathlib import Path
	import json
	import warnings

	import joblib
	import numpy as np
	import pandas as pd

	warnings.filterwarnings('ignore')

	BASE_DIR = Path(__file__).resolve().parent
	OUT_DIR = BASE_DIR / 'outputs'

	# Prefer Mar27 merged file, fall back to Mar25 merged file if user hasn't updated yet
	PREFERRED_DATA_PATHS = [
	OUT_DIR / 'ml_dataset_exact_all_v2_2026-01-01_to_2026-03-27_merged.csv',
	OUT_DIR / 'ml_dataset_exact_all_v2_2026-01-01_to_2026-03-25_merged.csv',
	]

	FRIDAY_DATE = '2026-03-27'
	THRESHOLDS = [0.46, 0.48, 0.50, 0.52, 0.55]
	TARGET = 'label_1to1'

	MODELS = [
	{
	'name': 'old_champion',
	'preprocessor': OUT_DIR / 'nn_preprocessor_label_1to1_jan_to_mar12_v2.joblib',
	'model': OUT_DIR / 'nn_label_1to1_jan_to_mar12_v2.keras',
	},
	{
	'name': 'same_arch_jan_to_mar25_v1',
	'preprocessor': OUT_DIR / 'nn_preprocessor_label_1to1_jan_to_mar25_same_arch_v1.joblib',
	'model': OUT_DIR / 'nn_label_1to1_jan_to_mar25_same_arch_v1.keras',
	},
	{
	'name': 'large_arch_jan_to_mar25_v1',
	'preprocessor': OUT_DIR / 'nn_preprocessor_label_1to1_jan_to_mar25_large_v1.joblib',
	'model': OUT_DIR / 'nn_label_1to1_jan_to_mar25_large_v1.keras',
	},
	]

	DROP_COLS_ALWAYS = [
	'trade_key',
	'label_1to1',
	'label_1to2',
	'bt_buy_signal_time',
	'bt_sell_signal_time',
	'bt_buy_time',
	'bt_buy_price',
	'bt_stop_loss',
	'bt_target_1',
	'bt_target_2',
	'bt_qty_per_lot',
	'bt_capital_per_lot',
	'bt_stop_loss_amt_per_lot',
	'signal_time',
	'confirmation_time',
	'indication_time',
	'buy_time',
	]

	OPTIONAL_DROP_COLS = [
	'exit_status',
	'option_symbol',
	'trade_side',
	]


	def resolve_data_path() -> Path:
	for p in PREFERRED_DATA_PATHS:
	if p.exists():
	return p
	raise FileNotFoundError(
	'Could not find merged dataset. Expected one of:\n' + '\n'.join(str(p) for p in PREFERRED_DATA_PATHS)
	)


	def load_friday_data() -> pd.DataFrame:
	data_path = resolve_data_path()
	df = pd.read_csv(data_path)
	df['trade_date'] = pd.to_datetime(df['trade_date'], errors='coerce')
	friday_df = df[df['trade_date'].dt.strftime('%Y-%m-%d') == FRIDAY_DATE].copy()
	friday_df = friday_df.sort_values([c for c in ['trade_date', 'signal_time', 'confirmation_time', 'buy_time', 'trade_key'] if c in friday_df.columns]).reset_index(drop=True)
	if friday_df.empty:
	raise ValueError(
	f'No rows found for {FRIDAY_DATE} in {data_path.name}. '\
	'This usually means your dataset_generator/merge still do not include Friday.'
	)
	return friday_df


	def build_feature_matrix(df: pd.DataFrame):
	drop_cols = [c for c in DROP_COLS_ALWAYS if c in df.columns]
	drop_cols += [c for c in OPTIONAL_DROP_COLS if c in df.columns]

	X = df.drop(columns=drop_cols, errors='ignore').copy()
	y = df[TARGET].astype(int).copy()

	if 'sector' in X.columns:
	X['sector'] = X['sector'].fillna('UNKNOWN').replace('', 'UNKNOWN')

	return X, y


	def estimate_round_trip_charges(buy_price, exit_price, qty):
	buy_turnover = float(buy_price) * float(qty)
	sell_turnover = float(exit_price) * float(qty)
	turnover = buy_turnover + sell_turnover

	brokerage = 40.0
	stt = 0.001 * sell_turnover
	txn = 0.0003503 * turnover
	sebi = 0.000001 * turnover
	stamp = 0.00003 * buy_turnover
	gst = 0.18 * (brokerage + txn + sebi)
	return round(brokerage + stt + txn + sebi + stamp + gst, 2)


	def compute_trade_pnl_1to1(df: pd.DataFrame) -> pd.DataFrame:
	out = df.copy()
	out['bt_buy_price'] = pd.to_numeric(out['bt_buy_price'], errors='coerce')
	out['bt_stop_loss'] = pd.to_numeric(out['bt_stop_loss'], errors='coerce')
	out['bt_target_1'] = pd.to_numeric(out['bt_target_1'], errors='coerce')
	out['bt_qty_per_lot'] = pd.to_numeric(out['bt_qty_per_lot'], errors='coerce').fillna(0)
	out['label_1to1'] = pd.to_numeric(out['label_1to1'], errors='coerce').fillna(0).astype(int)

	out['exit_price_1to1'] = np.where(out['label_1to1'].eq(1), out['bt_target_1'], out['bt_stop_loss'])
	out['gross_pnl_1to1'] = np.where(
	out['label_1to1'].eq(1),
	(out['bt_target_1'] - out['bt_buy_price']) * out['bt_qty_per_lot'],
	-(out['bt_buy_price'] - out['bt_stop_loss']) * out['bt_qty_per_lot'],
	)
	out['est_charges_1to1'] = [
	estimate_round_trip_charges(bp, ep, q)
	for bp, ep, q in zip(out['bt_buy_price'], out['exit_price_1to1'], out['bt_qty_per_lot'])
	]
	out['net_pnl_1to1'] = out['gross_pnl_1to1'] - out['est_charges_1to1']
	return out


	def evaluate_model(model_name: str, preprocessor_path: Path, model_path: Path, df: pd.DataFrame, threshold: float):
	import tensorflow as tf

	X_raw, y = build_feature_matrix(df)

	preprocessor = joblib.load(preprocessor_path)
	model = tf.keras.models.load_model(model_path, compile=False)

	X = preprocessor.transform(X_raw)
	if hasattr(X, 'toarray'):
	X = X.toarray()

	y_prob = model.predict(X, verbose=0).ravel()
	kept_mask = y_prob >= threshold
	kept_df = df.loc[kept_mask].copy()
	kept_df = compute_trade_pnl_1to1(kept_df) if not kept_df.empty else kept_df

	total_count = int(len(df))
	kept_count = int(kept_mask.sum())
	keep_rate = float(kept_count / total_count) if total_count else 0.0

	summary = {
	'date': FRIDAY_DATE,
	'model_name': model_name,
	'threshold': threshold,
	'rows_total': total_count,
	'rows_kept': kept_count,
	'keep_rate': keep_rate,
	'kept_hit_rate_1to1': float(kept_df['label_1to1'].mean()) if kept_count > 0 else None,
	'gross_pnl_1to1': float(kept_df['gross_pnl_1to1'].sum()) if kept_count > 0 else 0.0,
	'est_charges_1to1': float(kept_df['est_charges_1to1'].sum()) if kept_count > 0 else 0.0,
	'net_pnl_1to1': float(kept_df['net_pnl_1to1'].sum()) if kept_count > 0 else 0.0,
	'avg_score_kept': float(np.mean(y_prob[kept_mask])) if kept_count > 0 else None,
	}

	pred_df = pd.DataFrame({
	'trade_key': df['trade_key'] if 'trade_key' in df.columns else np.arange(len(df)),
	'trade_date': df['trade_date'],
	'symbol': df['symbol'] if 'symbol' in df.columns else None,
	'direction': df['direction'] if 'direction' in df.columns else None,
	'label_1to1': df['label_1to1'] if 'label_1to1' in df.columns else None,
	f'{model_name}_prob': y_prob,
	f'{model_name}_kept_thr_{str(threshold).replace('.', '_')}': kept_mask.astype(int),
	})

	return summary, pred_df


	def baseline_no_gate(df: pd.DataFrame) -> dict:
	work = compute_trade_pnl_1to1(df)
	return {
	'date': FRIDAY_DATE,
	'model_name': 'no_gate',
	'threshold': None,
	'rows_total': int(len(work)),
	'rows_kept': int(len(work)),
	'keep_rate': 1.0,
	'kept_hit_rate_1to1': float(work['label_1to1'].mean()) if len(work) else None,
	'gross_pnl_1to1': float(work['gross_pnl_1to1'].sum()) if len(work) else 0.0,
	'est_charges_1to1': float(work['est_charges_1to1'].sum()) if len(work) else 0.0,
	'net_pnl_1to1': float(work['net_pnl_1to1'].sum()) if len(work) else 0.0,
	'avg_score_kept': None,
	}


	def main():
	df = load_friday_data()
	print(f'Friday rows found: {len(df)} for {FRIDAY_DATE}')
	print(f'Dataset date range includes Friday from: {resolve_data_path()}')

	summaries = [baseline_no_gate(df)]

	for threshold in THRESHOLDS:
	pred_frames = []
	print('\n' + '=' * 80)
	print(f'Friday-only comparison at threshold = {threshold}')
	print('=' * 80)
	for cfg in MODELS:
	print(f"Evaluating {cfg['name']}...")
	summary, preds = evaluate_model(
	model_name=cfg['name'],
	preprocessor_path=cfg['preprocessor'],
	model_path=cfg['model'],
	df=df,
	threshold=threshold,
	)
	summaries.append(summary)
	pred_frames.append(preds)
	print(json.dumps(summary, indent=2))

	merged_preds = pred_frames[0]
	for p in pred_frames[1:]:
	keep_cols = [c for c in p.columns if c not in {'trade_date', 'symbol', 'direction', 'label_1to1'}]
	merged_preds = merged_preds.merge(p[keep_cols], on='trade_key', how='left')
	preds_out = OUT_DIR / f'friday_only_model_predictions_thr_{str(threshold).replace('.', '_')}.csv'
	merged_preds.to_csv(preds_out, index=False)
	print(f'Saved per-trade predictions to: {preds_out}')

	summary_df = pd.DataFrame(summaries)
	summary_out = OUT_DIR / 'friday_only_model_summary.csv'
	summary_df.to_csv(summary_out, index=False)

	print('\nSaved Friday-only summary to:', summary_out)
	print('\nRanking by estimated net pnl (best first):')
	show_cols = [
	'model_name', 'threshold', 'rows_kept', 'keep_rate', 'kept_hit_rate_1to1',
	'gross_pnl_1to1', 'est_charges_1to1', 'net_pnl_1to1', 'avg_score_kept'
	]
	print(summary_df[show_cols].sort_values(['net_pnl_1to1', 'kept_hit_rate_1to1'], ascending=[False, False]).to_string(index=False))


	if __name__ == '__main__':
	main()