Spaces:

ash001
/

nse-bot-backend

Running

App Files Files Community

ash001 commited on Mar 25

Commit

b3405e6

verified ·

1 Parent(s): afe2b96

Deploy from GitHub Actions to nse-bot-backend

Browse files

Files changed (24) hide show

.gitattributes +1 -0
compare_nn_models.py +9 -4
dataset_generator_exact.py +2 -2
live_paper_bot_nn.py +2 -2
merge_datasets.py +2 -1
outputs/mar25_champion_scored.csv +0 -0
outputs/ml_dataset_exact_all_v2_2026-01-01_to_2026-03-25_merged.csv +3 -0
outputs/ml_dataset_exact_all_v2_2026-03-23_to_2026-03-25.csv +0 -0
outputs/ml_dataset_exact_all_v2_2026-03-25_to_2026-03-25.csv +0 -0
outputs/nn_label_1to1_jan_to_mar25_large_v1.keras +3 -0
outputs/nn_label_1to1_jan_to_mar25_same_arch_v1.keras +3 -0
outputs/nn_model_comparison_predictions_tail_15pct_thr_0_46.csv +0 -0
outputs/nn_model_comparison_predictions_tail_15pct_thr_0_48.csv +0 -0
outputs/nn_model_comparison_predictions_tail_15pct_thr_0_5.csv +0 -0
outputs/nn_model_comparison_predictions_tail_15pct_thr_0_52.csv +0 -0
outputs/nn_model_comparison_predictions_tail_15pct_thr_0_55.csv +0 -0
outputs/nn_model_comparison_tail_15pct_multi_threshold.csv +15 -10
outputs/nn_preprocessor_label_1to1_jan_to_mar25_large_v1.joblib +3 -0
outputs/nn_preprocessor_label_1to1_jan_to_mar25_same_arch_v1.joblib +3 -0
outputs/nn_saved_metrics_label_1to1_jan_to_mar25_large_v1.json +22 -0
outputs/nn_saved_metrics_label_1to1_jan_to_mar25_same_arch_v1.json +22 -0
score_mar25_champion.py +81 -0
train_nn_save.py +2 -2
train_nn_save_large.py +261 -0

.gitattributes CHANGED Viewed

@@ -1,3 +1,4 @@
 *.keras filter=lfs diff=lfs merge=lfs -text
 *.joblib filter=lfs diff=lfs merge=lfs -text
 outputs/ml_dataset_exact_all_v2_2026-01-01_to_2026-03-20_merged.csv filter=lfs diff=lfs merge=lfs -text

 *.keras filter=lfs diff=lfs merge=lfs -text
 *.joblib filter=lfs diff=lfs merge=lfs -text
 outputs/ml_dataset_exact_all_v2_2026-01-01_to_2026-03-20_merged.csv filter=lfs diff=lfs merge=lfs -text
+outputs/ml_dataset_exact_all_v2_2026-01-01_to_2026-03-25_merged.csv filter=lfs diff=lfs merge=lfs -text

compare_nn_models.py CHANGED Viewed

@@ -20,7 +20,7 @@ BASE_DIR = Path(__file__).resolve().parent
 OUT_DIR = BASE_DIR / "outputs"
 # Use the merged dataset that contains both old and new comparison dates
-DATA_PATH = OUT_DIR / "ml_dataset_exact_all_v2_2026-01-01_to_2026-03-20_merged.csv"
 HOLDOUT_FRAC = 0.15
@@ -34,9 +34,14 @@ MODELS = [
         "model": OUT_DIR / "nn_label_1to1_jan_to_mar12_v2.keras",
     },
     {
-        "name": "new_challenger_jan_to_mar20_candidate_v1",
-        "preprocessor": OUT_DIR / "nn_preprocessor_label_1to1_jan_to_mar20_candidate_v1.joblib",
-        "model": OUT_DIR / "nn_label_1to1_jan_to_mar20_candidate_v1.keras",
     },
 ]

 OUT_DIR = BASE_DIR / "outputs"
 # Use the merged dataset that contains both old and new comparison dates
+DATA_PATH = OUT_DIR / "ml_dataset_exact_all_v2_2026-01-01_to_2026-03-25_merged.csv"
 HOLDOUT_FRAC = 0.15
         "model": OUT_DIR / "nn_label_1to1_jan_to_mar12_v2.keras",
     },
     {
+        "name": "same_arch_jan_to_mar25_v1",
+        "preprocessor": OUT_DIR / "nn_preprocessor_label_1to1_jan_to_mar25_same_arch_v1.joblib",
+        "model": OUT_DIR / "nn_label_1to1_jan_to_mar25_same_arch_v1.keras",
+    },
+    {
+        "name": "large_arch_jan_to_mar25_v1",
+        "preprocessor": OUT_DIR / "nn_preprocessor_label_1to1_jan_to_mar25_large_v1.joblib",
+        "model": OUT_DIR / "nn_label_1to1_jan_to_mar25_large_v1.keras",
     },
 ]

dataset_generator_exact.py CHANGED Viewed

@@ -19,8 +19,8 @@ OUT_DIR.mkdir(exist_ok=True)
 # -----------------------------
 MODE = "all"          # "priority" or "all"
 VARIANT = "v2"             # "v1" or "v2"
-START_DATE = "2026-03-13"
-END_DATE = "2026-03-20"
 # Keep None to scan everything in chosen mode
 MAX_SYMBOLS = None

 # -----------------------------
 MODE = "all"          # "priority" or "all"
 VARIANT = "v2"             # "v1" or "v2"
+START_DATE = "2026-03-23"
+END_DATE = "2026-03-25"
 # Keep None to scan everything in chosen mode
 MAX_SYMBOLS = None

live_paper_bot_nn.py CHANGED Viewed

@@ -56,11 +56,11 @@ OUT_DIR = BASE_DIR / "outputs"
 NN_PREPROCESSOR_PATH = OUT_DIR / os.getenv(
     "BOT_NN_PREPROCESSOR_FILE",
-    "nn_preprocessor_label_1to1_jan_to_mar12_v2.joblib",
 )
 NN_MODEL_PATH = OUT_DIR / os.getenv(
     "BOT_NN_MODEL_FILE",
-    "nn_label_1to1_jan_to_mar12_v2.keras",
 )
 UNIVERSE_PATH = BASE_DIR / "option_stock_universe.csv"

 NN_PREPROCESSOR_PATH = OUT_DIR / os.getenv(
     "BOT_NN_PREPROCESSOR_FILE",
+    "nn_preprocessor_label_1to1_jan_to_mar25_large_v1.joblib",
 )
 NN_MODEL_PATH = OUT_DIR / os.getenv(
     "BOT_NN_MODEL_FILE",
+    "nn_label_1to1_jan_to_mar25_large_v1.keras",
 )
 UNIVERSE_PATH = BASE_DIR / "option_stock_universe.csv"

merge_datasets.py CHANGED Viewed

@@ -4,7 +4,7 @@ import pandas as pd
 BASE_DIR = Path(__file__).resolve().parent
 OUT_DIR = BASE_DIR / "outputs"
-MERGE_TAG = "2026-01-01_to_2026-03-20"
 FILES = [
     OUT_DIR / "ml_dataset_exact_all_v2_2026-01-01_to_2026-01-31.csv",
@@ -12,6 +12,7 @@ FILES = [
     OUT_DIR / "ml_dataset_exact_all_v2_2026-03-02_to_2026-03-06.csv",
     OUT_DIR / "ml_dataset_exact_all_v2_2026-03-09_to_2026-03-12.csv",
     OUT_DIR / "ml_dataset_exact_all_v2_2026-03-13_to_2026-03-20.csv",
 ]
 OUT_PATH = OUT_DIR / f"ml_dataset_exact_all_v2_{MERGE_TAG}_merged.csv"

 BASE_DIR = Path(__file__).resolve().parent
 OUT_DIR = BASE_DIR / "outputs"
+MERGE_TAG = "2026-01-01_to_2026-03-25"
 FILES = [
     OUT_DIR / "ml_dataset_exact_all_v2_2026-01-01_to_2026-01-31.csv",
     OUT_DIR / "ml_dataset_exact_all_v2_2026-03-02_to_2026-03-06.csv",
     OUT_DIR / "ml_dataset_exact_all_v2_2026-03-09_to_2026-03-12.csv",
     OUT_DIR / "ml_dataset_exact_all_v2_2026-03-13_to_2026-03-20.csv",
+    OUT_DIR / "ml_dataset_exact_all_v2_2026-03-23_to_2026-03-25.csv",
 ]
 OUT_PATH = OUT_DIR / f"ml_dataset_exact_all_v2_{MERGE_TAG}_merged.csv"

outputs/mar25_champion_scored.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

outputs/ml_dataset_exact_all_v2_2026-01-01_to_2026-03-25_merged.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5ca6415099211e285ced10fa15543eb24ff3b4261bbabaa9b75fbb465a37caa9
+size 13828933

outputs/ml_dataset_exact_all_v2_2026-03-23_to_2026-03-25.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

outputs/ml_dataset_exact_all_v2_2026-03-25_to_2026-03-25.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

outputs/nn_label_1to1_jan_to_mar25_large_v1.keras ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bab75e134b09b5484cb3fa921f088e3b9311a8adc746ec5ea4b747ce7f4269c1
+size 1708180

outputs/nn_label_1to1_jan_to_mar25_same_arch_v1.keras ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3da648dd3e355653de873cc95be5915d113405fb172b07625c4042e1c58c2703
+size 717996

outputs/nn_model_comparison_predictions_tail_15pct_thr_0_46.csv CHANGED Viewed

The diff for this file is too large to render. See raw diff

outputs/nn_model_comparison_predictions_tail_15pct_thr_0_48.csv CHANGED Viewed

The diff for this file is too large to render. See raw diff

outputs/nn_model_comparison_predictions_tail_15pct_thr_0_5.csv CHANGED Viewed

The diff for this file is too large to render. See raw diff

outputs/nn_model_comparison_predictions_tail_15pct_thr_0_52.csv CHANGED Viewed

The diff for this file is too large to render. See raw diff

outputs/nn_model_comparison_predictions_tail_15pct_thr_0_55.csv CHANGED Viewed

The diff for this file is too large to render. See raw diff

outputs/nn_model_comparison_tail_15pct_multi_threshold.csv CHANGED Viewed

@@ -1,11 +1,16 @@
 model_name,compare_mode,holdout_frac,threshold,rows_total,rows_kept,keep_rate,avg_score_all,avg_score_kept,kept_hit_rate_1to1,kept_hit_rate_1to2,capital_kept_per_lot_sum,metrics_all_rows
-old_champion,tail_row_holdout,0.15,0.46,2122,605,0.28510838831291235,0.36969414353370667,0.5534360408782959,0.5685950413223141,0.4396694214876033,9156934.95,"{'accuracy': 0.5782280867106503, 'precision': 0.5685950413223141, 'recall': 0.35173824130879344, 'f1': 0.43461781427668983, 'roc_auc': 0.6319652995266494}"
-new_challenger_jan_to_mar20_candidate_v1,tail_row_holdout,0.15,0.46,2122,779,0.36710650329877476,0.3983593285083771,0.5644234418869019,0.5455712451861361,0.4197689345314506,11732736.95,"{'accuracy': 0.5725730442978322, 'precision': 0.5455712451861361, 'recall': 0.434560327198364, 'f1': 0.48377916903813317, 'roc_auc': 0.6246970054485392}"
-old_champion,tail_row_holdout,0.15,0.48,2122,516,0.24316682375117812,0.36969414353370667,0.5678948760032654,0.5658914728682171,0.4476744186046512,7822197.45,"{'accuracy': 0.5711592836946278, 'precision': 0.5658914728682171, 'recall': 0.2985685071574642, 'f1': 0.3908969210174029, 'roc_auc': 0.6319652995266494}"
-new_challenger_jan_to_mar20_candidate_v1,tail_row_holdout,0.15,0.48,2122,676,0.31856738925541944,0.3983593285083771,0.5787112712860107,0.5547337278106509,0.42159763313609466,10197606.45,"{'accuracy': 0.5739868049010367, 'precision': 0.5547337278106509, 'recall': 0.3834355828220859, 'f1': 0.4534461910519952, 'roc_auc': 0.6246970054485392}"
-old_champion,tail_row_holdout,0.15,0.5,2122,430,0.20263901979264845,0.36969414353370667,0.5837038159370422,0.5767441860465117,0.4604651162790698,6500596.95,"{'accuracy': 0.5702167766258247, 'precision': 0.5767441860465117, 'recall': 0.25357873210633947, 'f1': 0.3522727272727273, 'roc_auc': 0.6319652995266494}"
-new_challenger_jan_to_mar20_candidate_v1,tail_row_holdout,0.15,0.5,2122,587,0.2766258246936852,0.3983593285083771,0.5921759009361267,0.5604770017035775,0.4293015332197615,8771410.95,"{'accuracy': 0.5725730442978322, 'precision': 0.5604770017035775, 'recall': 0.33640081799591004, 'f1': 0.4204472843450479, 'roc_auc': 0.6246970054485392}"
-old_champion,tail_row_holdout,0.15,0.52,2122,372,0.17530631479736097,0.36969414353370667,0.5953366160392761,0.5913978494623656,0.4731182795698925,5579611.0,"{'accuracy': 0.5711592836946278, 'precision': 0.5913978494623656, 'recall': 0.2249488752556237, 'f1': 0.32592592592592595, 'roc_auc': 0.6319652995266494}"
-new_challenger_jan_to_mar20_candidate_v1,tail_row_holdout,0.15,0.52,2122,481,0.22667295004712534,0.3983593285083771,0.610203742980957,0.5488565488565489,0.4178794178794179,7085520.2,"{'accuracy': 0.5612629594721961, 'precision': 0.5488565488565489, 'recall': 0.26993865030674846, 'f1': 0.3618917066483893, 'roc_auc': 0.6246970054485392}"
-old_champion,tail_row_holdout,0.15,0.55,2122,282,0.13289349670122527,0.36969414353370667,0.6147698760032654,0.5851063829787234,0.4787234042553192,4201818.5,"{'accuracy': 0.5617342130065975, 'precision': 0.5851063829787234, 'recall': 0.1687116564417178, 'f1': 0.2619047619047619, 'roc_auc': 0.6319652995266494}"
-new_challenger_jan_to_mar20_candidate_v1,tail_row_holdout,0.15,0.55,2122,363,0.17106503298774742,0.3983593285083771,0.6344473958015442,0.559228650137741,0.44077134986225897,5348681.0,"{'accuracy': 0.55937794533459, 'precision': 0.559228650137741, 'recall': 0.20756646216768918, 'f1': 0.30275913497390006, 'roc_auc': 0.6246970054485392}"

 model_name,compare_mode,holdout_frac,threshold,rows_total,rows_kept,keep_rate,avg_score_all,avg_score_kept,kept_hit_rate_1to1,kept_hit_rate_1to2,capital_kept_per_lot_sum,metrics_all_rows
+old_champion,tail_row_holdout,0.15,0.46,2381,602,0.252834943301134,0.3579495847225189,0.5610937476158142,0.5631229235880398,0.4053156146179402,7966851.65,"{'accuracy': 0.5963880722385553, 'precision': 0.5631229235880398, 'recall': 0.32690453230472516, 'f1': 0.41366687004270897, 'roc_auc': 0.6465372009459521}"
+same_arch_jan_to_mar25_v1,tail_row_holdout,0.15,0.46,2381,1131,0.4750104997900042,0.4296109080314636,0.5252474546432495,0.5384615384615384,0.3757736516357206,13787745.75,"{'accuracy': 0.6010079798404032, 'precision': 0.5384615384615384, 'recall': 0.5872709739633558, 'f1': 0.5618081180811808, 'roc_auc': 0.634292343986775}"
+large_arch_jan_to_mar25_v1,tail_row_holdout,0.15,0.46,2381,1059,0.4447711045779084,0.40435346961021423,0.5262973308563232,0.5486307837582625,0.3890462700661001,13143270.8,"{'accuracy': 0.6077278454430911, 'precision': 0.5486307837582625, 'recall': 0.5602700096432015, 'f1': 0.5543893129770993, 'roc_auc': 0.6401787149285945}"
+old_champion,tail_row_holdout,0.15,0.48,2381,519,0.21797564048719026,0.3579495847225189,0.5759100317955017,0.5722543352601156,0.41040462427745666,6910059.4,"{'accuracy': 0.5959680806383872, 'precision': 0.5722543352601156, 'recall': 0.28640308582449375, 'f1': 0.38174807197943444, 'roc_auc': 0.6465372009459521}"
+same_arch_jan_to_mar25_v1,tail_row_holdout,0.15,0.48,2381,974,0.40907181856362873,0.4296109080314636,0.5340793132781982,0.553388090349076,0.3921971252566735,11814094.0,"{'accuracy': 0.6081478370432591, 'precision': 0.553388090349076, 'recall': 0.5197685631629702, 'f1': 0.5360517155643958, 'roc_auc': 0.634292343986775}"
+large_arch_jan_to_mar25_v1,tail_row_holdout,0.15,0.48,2381,837,0.3515329693406132,0.40435346961021423,0.5411040782928467,0.5710872162485066,0.3966547192353644,10431748.95,"{'accuracy': 0.6144477110457791, 'precision': 0.5710872162485066, 'recall': 0.4609450337512054, 'f1': 0.5101387406616862, 'roc_auc': 0.6401787149285945}"
+old_champion,tail_row_holdout,0.15,0.5,2381,450,0.1889962200755985,0.3579495847225189,0.5891824960708618,0.5911111111111111,0.4288888888888889,5967847.9,"{'accuracy': 0.5989080218395632, 'precision': 0.5911111111111111, 'recall': 0.25650916104146576, 'f1': 0.3577673167451244, 'roc_auc': 0.6465372009459521}"
+same_arch_jan_to_mar25_v1,tail_row_holdout,0.15,0.5,2381,788,0.33095338093238136,0.4296109080314636,0.5442647933959961,0.5647208121827412,0.4137055837563452,9421223.1,"{'accuracy': 0.6073078538429232, 'precision': 0.5647208121827412, 'recall': 0.42912246865959497, 'f1': 0.4876712328767123, 'roc_auc': 0.634292343986775}"
+large_arch_jan_to_mar25_v1,tail_row_holdout,0.15,0.5,2381,608,0.25535489290214197,0.40435346961021423,0.5608430504798889,0.5904605263157895,0.42105263157894735,7560974.15,"{'accuracy': 0.6106677866442671, 'precision': 0.5904605263157895, 'recall': 0.3461909353905497, 'f1': 0.43647416413373863, 'roc_auc': 0.6401787149285945}"
+old_champion,tail_row_holdout,0.15,0.52,2381,384,0.16127677446451072,0.3579495847225189,0.6027748584747314,0.6119791666666666,0.453125,5130389.65,"{'accuracy': 0.6005879882402352, 'precision': 0.6119791666666666, 'recall': 0.2266152362584378, 'f1': 0.330752990851513, 'roc_auc': 0.6465372009459521}"
+same_arch_jan_to_mar25_v1,tail_row_holdout,0.15,0.52,2381,587,0.24653506929861402,0.4296109080314636,0.5558530688285828,0.5826235093696763,0.42759795570698467,6909312.35,"{'accuracy': 0.6052078958420831, 'precision': 0.5826235093696763, 'recall': 0.32979749276759884, 'f1': 0.4211822660098522, 'roc_auc': 0.634292343986775}"
+large_arch_jan_to_mar25_v1,tail_row_holdout,0.15,0.52,2381,436,0.18311633767324653,0.40435346961021423,0.581216037273407,0.6169724770642202,0.44724770642201833,5417916.05,"{'accuracy': 0.6073078538429232, 'precision': 0.6169724770642202, 'recall': 0.25940212150433944, 'f1': 0.3652410047522064, 'roc_auc': 0.6401787149285945}"
+old_champion,tail_row_holdout,0.15,0.55,2381,296,0.124317513649727,0.3579495847225189,0.6226372718811035,0.6351351351351351,0.47635135135135137,3913824.6500000004,"{'accuracy': 0.5980680386392272, 'precision': 0.6351351351351351, 'recall': 0.18129218900675023, 'f1': 0.2820705176294073, 'roc_auc': 0.6465372009459521}"
+same_arch_jan_to_mar25_v1,tail_row_holdout,0.15,0.55,2381,266,0.1117177656446871,0.4296109080314636,0.58200603723526,0.6428571428571429,0.5037593984962406,3083757.75,"{'accuracy': 0.5963880722385553, 'precision': 0.6428571428571429, 'recall': 0.16489874638379942, 'f1': 0.2624712202609363, 'roc_auc': 0.634292343986775}"
+large_arch_jan_to_mar25_v1,tail_row_holdout,0.15,0.55,2381,289,0.12137757244855103,0.40435346961021423,0.6054689884185791,0.5986159169550173,0.46366782006920415,3570467.75,"{'accuracy': 0.5884082318353633, 'precision': 0.5986159169550173, 'recall': 0.16682738669238187, 'f1': 0.2609351432880845, 'roc_auc': 0.6401787149285945}"

outputs/nn_preprocessor_label_1to1_jan_to_mar25_large_v1.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:81c39fc022bb9f56e75686548130c1fd8edbd0f9d484c3d8a9038ca8296276a4
+size 20196

outputs/nn_preprocessor_label_1to1_jan_to_mar25_same_arch_v1.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:81c39fc022bb9f56e75686548130c1fd8edbd0f9d484c3d8a9038ca8296276a4
+size 20196

outputs/nn_saved_metrics_label_1to1_jan_to_mar25_large_v1.json ADDED Viewed

	@@ -0,0 +1,22 @@

+{
+  "model_tag": "jan_to_mar25_large_v1",
+  "best_epoch_from_validation": 4,
+  "valid_metrics": {
+    "accuracy": 0.5936974789915966,
+    "precision": 0.5725308641975309,
+    "recall": 0.34967012252591895,
+    "f1": 0.43417203042715036,
+    "roc_auc": 0.6323236336327109
+  },
+  "test_metrics": {
+    "accuracy": 0.6085678286434272,
+    "precision": 0.5966850828729282,
+    "recall": 0.3124397299903568,
+    "f1": 0.41012658227848103,
+    "roc_auc": 0.6582776553244248
+  },
+  "train_rows": 11106,
+  "valid_rows": 2380,
+  "test_rows": 2381,
+  "final_train_rows": 13486
+}

outputs/nn_saved_metrics_label_1to1_jan_to_mar25_same_arch_v1.json ADDED Viewed

	@@ -0,0 +1,22 @@

+{
+  "model_tag": "jan_to_mar25_same_arch_v1",
+  "best_epoch_from_validation": 2,
+  "valid_metrics": {
+    "accuracy": 0.5823529411764706,
+    "precision": 0.5414091470951793,
+    "recall": 0.412818096135721,
+    "f1": 0.46844919786096256,
+    "roc_auc": 0.6146946784435985
+  },
+  "test_metrics": {
+    "accuracy": 0.5837883242335153,
+    "precision": 0.5309973045822103,
+    "recall": 0.3799421407907425,
+    "f1": 0.44294547498594716,
+    "roc_auc": 0.618118456398953
+  },
+  "train_rows": 11106,
+  "valid_rows": 2380,
+  "test_rows": 2381,
+  "final_train_rows": 13486
+}

score_mar25_champion.py ADDED Viewed

	@@ -0,0 +1,81 @@

+from pathlib import Path
+import joblib
+import pandas as pd
+import tensorflow as tf
+BASE_DIR = Path(__file__).resolve().parent
+OUT_DIR = BASE_DIR / "outputs"
+DATA_PATH = OUT_DIR / "ml_dataset_exact_all_v2_2026-03-25_to_2026-03-25.csv"
+PREPROCESSOR_PATH = OUT_DIR / "nn_preprocessor_label_1to1_jan_to_mar12_v2.joblib"
+MODEL_PATH = OUT_DIR / "nn_label_1to1_jan_to_mar12_v2.keras"
+OUT_PATH = OUT_DIR / "mar25_champion_scored.csv"
+THRESHOLD = 0.52
+DROP_COLS_ALWAYS = [
+    "trade_key",
+    "label_1to1",
+    "label_1to2",
+    "bt_buy_signal_time",
+    "bt_sell_signal_time",
+    "bt_buy_time",
+    "bt_buy_price",
+    "bt_stop_loss",
+    "bt_target_1",
+    "bt_target_2",
+    "bt_qty_per_lot",
+    "bt_capital_per_lot",
+    "bt_stop_loss_amt_per_lot",
+    "signal_time",
+    "confirmation_time",
+    "indication_time",
+    "buy_time",
+]
+OPTIONAL_DROP_COLS = [
+    "exit_status",
+    "option_symbol",
+    "trade_side",
+]
+def build_feature_matrix(df: pd.DataFrame):
+    drop_cols = [c for c in DROP_COLS_ALWAYS if c in df.columns]
+    drop_cols += [c for c in OPTIONAL_DROP_COLS if c in df.columns]
+    X = df.drop(columns=drop_cols, errors="ignore").copy()
+    if "sector" in X.columns:
+        X["sector"] = X["sector"].fillna("UNKNOWN").replace("", "UNKNOWN")
+    return X
+def main():
+    df = pd.read_csv(DATA_PATH)
+    preprocessor = joblib.load(PREPROCESSOR_PATH)
+    model = tf.keras.models.load_model(MODEL_PATH, compile=False)
+    X_raw = build_feature_matrix(df)
+    X = preprocessor.transform(X_raw)
+    if hasattr(X, "toarray"):
+        X = X.toarray()
+    probs = model.predict(X, verbose=0).ravel()
+    preds = (probs >= THRESHOLD).astype(int)
+    df["champion_prob"] = probs
+    df["champion_keep_at_052"] = preds
+    df.to_csv(OUT_PATH, index=False)
+    print(f"Saved scored file: {OUT_PATH}")
+    print(f"Rows: {len(df)}")
+    print(f"Kept at {THRESHOLD}: {int(df['champion_keep_at_052'].sum())}")
+    if "label_1to1" in df.columns:
+        kept = df[df["champion_keep_at_052"] == 1].copy()
+        if not kept.empty:
+            print("Kept hit rate 1:1:", kept["label_1to1"].astype(float).mean())
+if __name__ == "__main__":
+    main()

train_nn_save.py CHANGED Viewed

@@ -23,8 +23,8 @@ BASE_DIR = Path(__file__).resolve().parent
 OUT_DIR = BASE_DIR / "outputs"
 OUT_DIR.mkdir(exist_ok=True)
-MODEL_TAG = "jan_to_mar20_candidate_v1"
-MERGED_DATA_FILE = "ml_dataset_exact_all_v2_2026-01-01_to_2026-03-20_merged.csv"
 DATA_PATH = OUT_DIR / MERGED_DATA_FILE
 PREPROCESSOR_PATH = OUT_DIR / f"nn_preprocessor_label_1to1_{MODEL_TAG}.joblib"

 OUT_DIR = BASE_DIR / "outputs"
 OUT_DIR.mkdir(exist_ok=True)
+MODEL_TAG = "jan_to_mar25_same_arch_v1"
+MERGED_DATA_FILE = "ml_dataset_exact_all_v2_2026-01-01_to_2026-03-25_merged.csv"
 DATA_PATH = OUT_DIR / MERGED_DATA_FILE
 PREPROCESSOR_PATH = OUT_DIR / f"nn_preprocessor_label_1to1_{MODEL_TAG}.joblib"

train_nn_save_large.py ADDED Viewed

	@@ -0,0 +1,261 @@

+from pathlib import Path
+import json
+import warnings
+import joblib
+import numpy as np
+import pandas as pd
+from sklearn.compose import ColumnTransformer
+from sklearn.impute import SimpleImputer
+from sklearn.preprocessing import OneHotEncoder, StandardScaler
+from sklearn.metrics import (
+    accuracy_score,
+    precision_score,
+    recall_score,
+    f1_score,
+    roc_auc_score,
+)
+from sklearn.pipeline import Pipeline
+warnings.filterwarnings("ignore")
+BASE_DIR = Path(__file__).resolve().parent
+OUT_DIR = BASE_DIR / "outputs"
+OUT_DIR.mkdir(exist_ok=True)
+MODEL_TAG = "jan_to_mar25_large_v1"
+MERGED_DATA_FILE = "ml_dataset_exact_all_v2_2026-01-01_to_2026-03-25_merged.csv"
+DATA_PATH = OUT_DIR / MERGED_DATA_FILE
+PREPROCESSOR_PATH = OUT_DIR / f"nn_preprocessor_label_1to1_{MODEL_TAG}.joblib"
+MODEL_PATH = OUT_DIR / f"nn_label_1to1_{MODEL_TAG}.keras"
+METRICS_PATH = OUT_DIR / f"nn_saved_metrics_label_1to1_{MODEL_TAG}.json"
+TARGET = "label_1to1"
+DROP_COLS_ALWAYS = [
+    "trade_key",
+    "label_1to1",
+    "label_1to2",
+    "bt_buy_signal_time",
+    "bt_sell_signal_time",
+    "bt_buy_time",
+    "bt_buy_price",
+    "bt_stop_loss",
+    "bt_target_1",
+    "bt_target_2",
+    "bt_qty_per_lot",
+    "bt_capital_per_lot",
+    "bt_stop_loss_amt_per_lot",
+    "signal_time",
+    "confirmation_time",
+    "indication_time",
+    "buy_time",
+]
+OPTIONAL_DROP_COLS = [
+    "exit_status",
+    "option_symbol",
+    "trade_side",
+]
+def load_data():
+    df = pd.read_csv(DATA_PATH)
+    df["trade_date"] = pd.to_datetime(df["trade_date"], errors="coerce")
+    return df
+def time_split_by_row_count(df: pd.DataFrame, train_frac=0.70, valid_frac=0.15):
+    df = df.copy()
+    sort_cols = [c for c in ["trade_date", "signal_time", "confirmation_time", "buy_time", "trade_key"] if c in df.columns]
+    if sort_cols:
+        df = df.sort_values(sort_cols).reset_index(drop=True)
+    else:
+        df = df.sort_values("trade_date").reset_index(drop=True)
+    n = len(df)
+    train_end = max(int(n * train_frac), 1)
+    valid_end = max(int(n * (train_frac + valid_frac)), train_end + 1)
+    if valid_end >= n:
+        valid_end = n - 1
+    train_df = df.iloc[:train_end].copy()
+    valid_df = df.iloc[train_end:valid_end].copy()
+    test_df = df.iloc[valid_end:].copy()
+    return train_df, valid_df, test_df
+def build_feature_matrix(df: pd.DataFrame):
+    drop_cols = [c for c in DROP_COLS_ALWAYS if c in df.columns]
+    drop_cols += [c for c in OPTIONAL_DROP_COLS if c in df.columns]
+    X = df.drop(columns=drop_cols, errors="ignore").copy()
+    y = df[TARGET].astype(int).copy()
+    if "sector" in X.columns:
+        X["sector"] = X["sector"].fillna("UNKNOWN").replace("", "UNKNOWN")
+    return X, y
+def get_preprocessor(X: pd.DataFrame):
+    numeric_cols = X.select_dtypes(include=[np.number]).columns.tolist()
+    categorical_cols = [c for c in X.columns if c not in numeric_cols]
+    numeric_pipe = Pipeline([
+        ("imputer", SimpleImputer(strategy="median")),
+        ("scaler", StandardScaler()),
+    ])
+    categorical_pipe = Pipeline([
+        ("imputer", SimpleImputer(strategy="most_frequent")),
+        ("onehot", OneHotEncoder(handle_unknown="ignore")),
+    ])
+    preprocessor = ColumnTransformer([
+        ("num", numeric_pipe, numeric_cols),
+        ("cat", categorical_pipe, categorical_cols),
+    ])
+    return preprocessor
+def compute_metrics(y_true, y_pred, y_prob):
+    out = {
+        "accuracy": float(accuracy_score(y_true, y_pred)),
+        "precision": float(precision_score(y_true, y_pred, zero_division=0)),
+        "recall": float(recall_score(y_true, y_pred, zero_division=0)),
+        "f1": float(f1_score(y_true, y_pred, zero_division=0)),
+    }
+    try:
+        out["roc_auc"] = float(roc_auc_score(y_true, y_prob))
+    except Exception:
+        out["roc_auc"] = None
+    return out
+def build_model(input_dim):
+    from tensorflow.keras import Sequential
+    from tensorflow.keras.layers import Dense, Dropout, Input
+    model = Sequential([
+        Input(shape=(input_dim,)),
+        Dense(256, activation="relu"),
+        Dropout(0.35),
+        Dense(128, activation="relu"),
+        Dropout(0.25),
+        Dense(64, activation="relu"),
+        Dropout(0.15),
+        Dense(1, activation="sigmoid"),
+    ])
+    model.compile(
+        optimizer="adam",
+        loss="binary_crossentropy",
+        metrics=["accuracy"],
+    )
+    return model
+def main():
+    from tensorflow.keras.callbacks import EarlyStopping
+    df = load_data()
+    train_df, valid_df, test_df = time_split_by_row_count(df)
+    # ----------------------------
+    # Phase 1: evaluation training
+    # ----------------------------
+    X_train_raw, y_train = build_feature_matrix(train_df)
+    X_valid_raw, y_valid = build_feature_matrix(valid_df)
+    X_test_raw, y_test = build_feature_matrix(test_df)
+    preprocessor_eval = get_preprocessor(X_train_raw)
+    X_train = preprocessor_eval.fit_transform(X_train_raw)
+    X_valid = preprocessor_eval.transform(X_valid_raw)
+    X_test = preprocessor_eval.transform(X_test_raw)
+    if hasattr(X_train, "toarray"):
+        X_train = X_train.toarray()
+        X_valid = X_valid.toarray()
+        X_test = X_test.toarray()
+    input_dim = X_train.shape[1]
+    eval_model = build_model(input_dim)
+    early_stop = EarlyStopping(
+        monitor="val_loss",
+        patience=8,
+        restore_best_weights=True,
+    )
+    history = eval_model.fit(
+        X_train,
+        y_train.values,
+        validation_data=(X_valid, y_valid.values),
+        epochs=60,
+        batch_size=64,
+        callbacks=[early_stop],
+        verbose=1,
+    )
+    valid_prob = eval_model.predict(X_valid, verbose=0).ravel()
+    test_prob = eval_model.predict(X_test, verbose=0).ravel()
+    valid_pred = (valid_prob >= 0.5).astype(int)
+    test_pred = (test_prob >= 0.5).astype(int)
+    best_epoch = int(np.argmin(history.history["val_loss"])) + 1
+    # ----------------------------
+    # Phase 2: final deployment fit
+    # train on train + valid
+    # ----------------------------
+    train_valid_df = pd.concat([train_df, valid_df], ignore_index=True)
+    X_train_valid_raw, y_train_valid = build_feature_matrix(train_valid_df)
+    preprocessor_final = get_preprocessor(X_train_valid_raw)
+    X_train_valid = preprocessor_final.fit_transform(X_train_valid_raw)
+    if hasattr(X_train_valid, "toarray"):
+        X_train_valid = X_train_valid.toarray()
+    final_model = build_model(X_train_valid.shape[1])
+    final_model.fit(
+        X_train_valid,
+        y_train_valid.values,
+        epochs=best_epoch,
+        batch_size=64,
+        verbose=1,
+    )
+    metrics = {
+        "model_tag": MODEL_TAG,
+        "best_epoch_from_validation": best_epoch,
+        "valid_metrics": compute_metrics(y_valid, valid_pred, valid_prob),
+        "test_metrics": compute_metrics(y_test, test_pred, test_prob),
+        "train_rows": len(train_df),
+        "valid_rows": len(valid_df),
+        "test_rows": len(test_df),
+        "final_train_rows": len(train_valid_df),
+    }
+    joblib.dump(preprocessor_final, PREPROCESSOR_PATH)
+    final_model.save(MODEL_PATH)
+    with open(METRICS_PATH, "w") as f:
+        json.dump(metrics, f, indent=2)
+    print(f"Saved preprocessor to: {PREPROCESSOR_PATH}")
+    print(f"Saved model to: {MODEL_PATH}")
+    print(f"Saved metrics to: {METRICS_PATH}")
+    print(metrics)
+if __name__ == "__main__":
+    main()