ash001 commited on
Commit
b3405e6
·
verified ·
1 Parent(s): afe2b96

Deploy from GitHub Actions to nse-bot-backend

Browse files
.gitattributes CHANGED
@@ -1,3 +1,4 @@
1
  *.keras filter=lfs diff=lfs merge=lfs -text
2
  *.joblib filter=lfs diff=lfs merge=lfs -text
3
  outputs/ml_dataset_exact_all_v2_2026-01-01_to_2026-03-20_merged.csv filter=lfs diff=lfs merge=lfs -text
 
 
1
  *.keras filter=lfs diff=lfs merge=lfs -text
2
  *.joblib filter=lfs diff=lfs merge=lfs -text
3
  outputs/ml_dataset_exact_all_v2_2026-01-01_to_2026-03-20_merged.csv filter=lfs diff=lfs merge=lfs -text
4
+ outputs/ml_dataset_exact_all_v2_2026-01-01_to_2026-03-25_merged.csv filter=lfs diff=lfs merge=lfs -text
compare_nn_models.py CHANGED
@@ -20,7 +20,7 @@ BASE_DIR = Path(__file__).resolve().parent
20
  OUT_DIR = BASE_DIR / "outputs"
21
 
22
  # Use the merged dataset that contains both old and new comparison dates
23
- DATA_PATH = OUT_DIR / "ml_dataset_exact_all_v2_2026-01-01_to_2026-03-20_merged.csv"
24
 
25
  HOLDOUT_FRAC = 0.15
26
 
@@ -34,9 +34,14 @@ MODELS = [
34
  "model": OUT_DIR / "nn_label_1to1_jan_to_mar12_v2.keras",
35
  },
36
  {
37
- "name": "new_challenger_jan_to_mar20_candidate_v1",
38
- "preprocessor": OUT_DIR / "nn_preprocessor_label_1to1_jan_to_mar20_candidate_v1.joblib",
39
- "model": OUT_DIR / "nn_label_1to1_jan_to_mar20_candidate_v1.keras",
 
 
 
 
 
40
  },
41
  ]
42
 
 
20
  OUT_DIR = BASE_DIR / "outputs"
21
 
22
  # Use the merged dataset that contains both old and new comparison dates
23
+ DATA_PATH = OUT_DIR / "ml_dataset_exact_all_v2_2026-01-01_to_2026-03-25_merged.csv"
24
 
25
  HOLDOUT_FRAC = 0.15
26
 
 
34
  "model": OUT_DIR / "nn_label_1to1_jan_to_mar12_v2.keras",
35
  },
36
  {
37
+ "name": "same_arch_jan_to_mar25_v1",
38
+ "preprocessor": OUT_DIR / "nn_preprocessor_label_1to1_jan_to_mar25_same_arch_v1.joblib",
39
+ "model": OUT_DIR / "nn_label_1to1_jan_to_mar25_same_arch_v1.keras",
40
+ },
41
+ {
42
+ "name": "large_arch_jan_to_mar25_v1",
43
+ "preprocessor": OUT_DIR / "nn_preprocessor_label_1to1_jan_to_mar25_large_v1.joblib",
44
+ "model": OUT_DIR / "nn_label_1to1_jan_to_mar25_large_v1.keras",
45
  },
46
  ]
47
 
dataset_generator_exact.py CHANGED
@@ -19,8 +19,8 @@ OUT_DIR.mkdir(exist_ok=True)
19
  # -----------------------------
20
  MODE = "all" # "priority" or "all"
21
  VARIANT = "v2" # "v1" or "v2"
22
- START_DATE = "2026-03-13"
23
- END_DATE = "2026-03-20"
24
 
25
  # Keep None to scan everything in chosen mode
26
  MAX_SYMBOLS = None
 
19
  # -----------------------------
20
  MODE = "all" # "priority" or "all"
21
  VARIANT = "v2" # "v1" or "v2"
22
+ START_DATE = "2026-03-23"
23
+ END_DATE = "2026-03-25"
24
 
25
  # Keep None to scan everything in chosen mode
26
  MAX_SYMBOLS = None
live_paper_bot_nn.py CHANGED
@@ -56,11 +56,11 @@ OUT_DIR = BASE_DIR / "outputs"
56
 
57
  NN_PREPROCESSOR_PATH = OUT_DIR / os.getenv(
58
  "BOT_NN_PREPROCESSOR_FILE",
59
- "nn_preprocessor_label_1to1_jan_to_mar12_v2.joblib",
60
  )
61
  NN_MODEL_PATH = OUT_DIR / os.getenv(
62
  "BOT_NN_MODEL_FILE",
63
- "nn_label_1to1_jan_to_mar12_v2.keras",
64
  )
65
  UNIVERSE_PATH = BASE_DIR / "option_stock_universe.csv"
66
 
 
56
 
57
  NN_PREPROCESSOR_PATH = OUT_DIR / os.getenv(
58
  "BOT_NN_PREPROCESSOR_FILE",
59
+ "nn_preprocessor_label_1to1_jan_to_mar25_large_v1.joblib",
60
  )
61
  NN_MODEL_PATH = OUT_DIR / os.getenv(
62
  "BOT_NN_MODEL_FILE",
63
+ "nn_label_1to1_jan_to_mar25_large_v1.keras",
64
  )
65
  UNIVERSE_PATH = BASE_DIR / "option_stock_universe.csv"
66
 
merge_datasets.py CHANGED
@@ -4,7 +4,7 @@ import pandas as pd
4
  BASE_DIR = Path(__file__).resolve().parent
5
  OUT_DIR = BASE_DIR / "outputs"
6
 
7
- MERGE_TAG = "2026-01-01_to_2026-03-20"
8
 
9
  FILES = [
10
  OUT_DIR / "ml_dataset_exact_all_v2_2026-01-01_to_2026-01-31.csv",
@@ -12,6 +12,7 @@ FILES = [
12
  OUT_DIR / "ml_dataset_exact_all_v2_2026-03-02_to_2026-03-06.csv",
13
  OUT_DIR / "ml_dataset_exact_all_v2_2026-03-09_to_2026-03-12.csv",
14
  OUT_DIR / "ml_dataset_exact_all_v2_2026-03-13_to_2026-03-20.csv",
 
15
  ]
16
 
17
  OUT_PATH = OUT_DIR / f"ml_dataset_exact_all_v2_{MERGE_TAG}_merged.csv"
 
4
  BASE_DIR = Path(__file__).resolve().parent
5
  OUT_DIR = BASE_DIR / "outputs"
6
 
7
+ MERGE_TAG = "2026-01-01_to_2026-03-25"
8
 
9
  FILES = [
10
  OUT_DIR / "ml_dataset_exact_all_v2_2026-01-01_to_2026-01-31.csv",
 
12
  OUT_DIR / "ml_dataset_exact_all_v2_2026-03-02_to_2026-03-06.csv",
13
  OUT_DIR / "ml_dataset_exact_all_v2_2026-03-09_to_2026-03-12.csv",
14
  OUT_DIR / "ml_dataset_exact_all_v2_2026-03-13_to_2026-03-20.csv",
15
+ OUT_DIR / "ml_dataset_exact_all_v2_2026-03-23_to_2026-03-25.csv",
16
  ]
17
 
18
  OUT_PATH = OUT_DIR / f"ml_dataset_exact_all_v2_{MERGE_TAG}_merged.csv"
outputs/mar25_champion_scored.csv ADDED
The diff for this file is too large to render. See raw diff
 
outputs/ml_dataset_exact_all_v2_2026-01-01_to_2026-03-25_merged.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ca6415099211e285ced10fa15543eb24ff3b4261bbabaa9b75fbb465a37caa9
3
+ size 13828933
outputs/ml_dataset_exact_all_v2_2026-03-23_to_2026-03-25.csv ADDED
The diff for this file is too large to render. See raw diff
 
outputs/ml_dataset_exact_all_v2_2026-03-25_to_2026-03-25.csv ADDED
The diff for this file is too large to render. See raw diff
 
outputs/nn_label_1to1_jan_to_mar25_large_v1.keras ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bab75e134b09b5484cb3fa921f088e3b9311a8adc746ec5ea4b747ce7f4269c1
3
+ size 1708180
outputs/nn_label_1to1_jan_to_mar25_same_arch_v1.keras ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3da648dd3e355653de873cc95be5915d113405fb172b07625c4042e1c58c2703
3
+ size 717996
outputs/nn_model_comparison_predictions_tail_15pct_thr_0_46.csv CHANGED
The diff for this file is too large to render. See raw diff
 
outputs/nn_model_comparison_predictions_tail_15pct_thr_0_48.csv CHANGED
The diff for this file is too large to render. See raw diff
 
outputs/nn_model_comparison_predictions_tail_15pct_thr_0_5.csv CHANGED
The diff for this file is too large to render. See raw diff
 
outputs/nn_model_comparison_predictions_tail_15pct_thr_0_52.csv CHANGED
The diff for this file is too large to render. See raw diff
 
outputs/nn_model_comparison_predictions_tail_15pct_thr_0_55.csv CHANGED
The diff for this file is too large to render. See raw diff
 
outputs/nn_model_comparison_tail_15pct_multi_threshold.csv CHANGED
@@ -1,11 +1,16 @@
1
  model_name,compare_mode,holdout_frac,threshold,rows_total,rows_kept,keep_rate,avg_score_all,avg_score_kept,kept_hit_rate_1to1,kept_hit_rate_1to2,capital_kept_per_lot_sum,metrics_all_rows
2
- old_champion,tail_row_holdout,0.15,0.46,2122,605,0.28510838831291235,0.36969414353370667,0.5534360408782959,0.5685950413223141,0.4396694214876033,9156934.95,"{'accuracy': 0.5782280867106503, 'precision': 0.5685950413223141, 'recall': 0.35173824130879344, 'f1': 0.43461781427668983, 'roc_auc': 0.6319652995266494}"
3
- new_challenger_jan_to_mar20_candidate_v1,tail_row_holdout,0.15,0.46,2122,779,0.36710650329877476,0.3983593285083771,0.5644234418869019,0.5455712451861361,0.4197689345314506,11732736.95,"{'accuracy': 0.5725730442978322, 'precision': 0.5455712451861361, 'recall': 0.434560327198364, 'f1': 0.48377916903813317, 'roc_auc': 0.6246970054485392}"
4
- old_champion,tail_row_holdout,0.15,0.48,2122,516,0.24316682375117812,0.36969414353370667,0.5678948760032654,0.5658914728682171,0.4476744186046512,7822197.45,"{'accuracy': 0.5711592836946278, 'precision': 0.5658914728682171, 'recall': 0.2985685071574642, 'f1': 0.3908969210174029, 'roc_auc': 0.6319652995266494}"
5
- new_challenger_jan_to_mar20_candidate_v1,tail_row_holdout,0.15,0.48,2122,676,0.31856738925541944,0.3983593285083771,0.5787112712860107,0.5547337278106509,0.42159763313609466,10197606.45,"{'accuracy': 0.5739868049010367, 'precision': 0.5547337278106509, 'recall': 0.3834355828220859, 'f1': 0.4534461910519952, 'roc_auc': 0.6246970054485392}"
6
- old_champion,tail_row_holdout,0.15,0.5,2122,430,0.20263901979264845,0.36969414353370667,0.5837038159370422,0.5767441860465117,0.4604651162790698,6500596.95,"{'accuracy': 0.5702167766258247, 'precision': 0.5767441860465117, 'recall': 0.25357873210633947, 'f1': 0.3522727272727273, 'roc_auc': 0.6319652995266494}"
7
- new_challenger_jan_to_mar20_candidate_v1,tail_row_holdout,0.15,0.5,2122,587,0.2766258246936852,0.3983593285083771,0.5921759009361267,0.5604770017035775,0.4293015332197615,8771410.95,"{'accuracy': 0.5725730442978322, 'precision': 0.5604770017035775, 'recall': 0.33640081799591004, 'f1': 0.4204472843450479, 'roc_auc': 0.6246970054485392}"
8
- old_champion,tail_row_holdout,0.15,0.52,2122,372,0.17530631479736097,0.36969414353370667,0.5953366160392761,0.5913978494623656,0.4731182795698925,5579611.0,"{'accuracy': 0.5711592836946278, 'precision': 0.5913978494623656, 'recall': 0.2249488752556237, 'f1': 0.32592592592592595, 'roc_auc': 0.6319652995266494}"
9
- new_challenger_jan_to_mar20_candidate_v1,tail_row_holdout,0.15,0.52,2122,481,0.22667295004712534,0.3983593285083771,0.610203742980957,0.5488565488565489,0.4178794178794179,7085520.2,"{'accuracy': 0.5612629594721961, 'precision': 0.5488565488565489, 'recall': 0.26993865030674846, 'f1': 0.3618917066483893, 'roc_auc': 0.6246970054485392}"
10
- old_champion,tail_row_holdout,0.15,0.55,2122,282,0.13289349670122527,0.36969414353370667,0.6147698760032654,0.5851063829787234,0.4787234042553192,4201818.5,"{'accuracy': 0.5617342130065975, 'precision': 0.5851063829787234, 'recall': 0.1687116564417178, 'f1': 0.2619047619047619, 'roc_auc': 0.6319652995266494}"
11
- new_challenger_jan_to_mar20_candidate_v1,tail_row_holdout,0.15,0.55,2122,363,0.17106503298774742,0.3983593285083771,0.6344473958015442,0.559228650137741,0.44077134986225897,5348681.0,"{'accuracy': 0.55937794533459, 'precision': 0.559228650137741, 'recall': 0.20756646216768918, 'f1': 0.30275913497390006, 'roc_auc': 0.6246970054485392}"
 
 
 
 
 
 
1
  model_name,compare_mode,holdout_frac,threshold,rows_total,rows_kept,keep_rate,avg_score_all,avg_score_kept,kept_hit_rate_1to1,kept_hit_rate_1to2,capital_kept_per_lot_sum,metrics_all_rows
2
+ old_champion,tail_row_holdout,0.15,0.46,2381,602,0.252834943301134,0.3579495847225189,0.5610937476158142,0.5631229235880398,0.4053156146179402,7966851.65,"{'accuracy': 0.5963880722385553, 'precision': 0.5631229235880398, 'recall': 0.32690453230472516, 'f1': 0.41366687004270897, 'roc_auc': 0.6465372009459521}"
3
+ same_arch_jan_to_mar25_v1,tail_row_holdout,0.15,0.46,2381,1131,0.4750104997900042,0.4296109080314636,0.5252474546432495,0.5384615384615384,0.3757736516357206,13787745.75,"{'accuracy': 0.6010079798404032, 'precision': 0.5384615384615384, 'recall': 0.5872709739633558, 'f1': 0.5618081180811808, 'roc_auc': 0.634292343986775}"
4
+ large_arch_jan_to_mar25_v1,tail_row_holdout,0.15,0.46,2381,1059,0.4447711045779084,0.40435346961021423,0.5262973308563232,0.5486307837582625,0.3890462700661001,13143270.8,"{'accuracy': 0.6077278454430911, 'precision': 0.5486307837582625, 'recall': 0.5602700096432015, 'f1': 0.5543893129770993, 'roc_auc': 0.6401787149285945}"
5
+ old_champion,tail_row_holdout,0.15,0.48,2381,519,0.21797564048719026,0.3579495847225189,0.5759100317955017,0.5722543352601156,0.41040462427745666,6910059.4,"{'accuracy': 0.5959680806383872, 'precision': 0.5722543352601156, 'recall': 0.28640308582449375, 'f1': 0.38174807197943444, 'roc_auc': 0.6465372009459521}"
6
+ same_arch_jan_to_mar25_v1,tail_row_holdout,0.15,0.48,2381,974,0.40907181856362873,0.4296109080314636,0.5340793132781982,0.553388090349076,0.3921971252566735,11814094.0,"{'accuracy': 0.6081478370432591, 'precision': 0.553388090349076, 'recall': 0.5197685631629702, 'f1': 0.5360517155643958, 'roc_auc': 0.634292343986775}"
7
+ large_arch_jan_to_mar25_v1,tail_row_holdout,0.15,0.48,2381,837,0.3515329693406132,0.40435346961021423,0.5411040782928467,0.5710872162485066,0.3966547192353644,10431748.95,"{'accuracy': 0.6144477110457791, 'precision': 0.5710872162485066, 'recall': 0.4609450337512054, 'f1': 0.5101387406616862, 'roc_auc': 0.6401787149285945}"
8
+ old_champion,tail_row_holdout,0.15,0.5,2381,450,0.1889962200755985,0.3579495847225189,0.5891824960708618,0.5911111111111111,0.4288888888888889,5967847.9,"{'accuracy': 0.5989080218395632, 'precision': 0.5911111111111111, 'recall': 0.25650916104146576, 'f1': 0.3577673167451244, 'roc_auc': 0.6465372009459521}"
9
+ same_arch_jan_to_mar25_v1,tail_row_holdout,0.15,0.5,2381,788,0.33095338093238136,0.4296109080314636,0.5442647933959961,0.5647208121827412,0.4137055837563452,9421223.1,"{'accuracy': 0.6073078538429232, 'precision': 0.5647208121827412, 'recall': 0.42912246865959497, 'f1': 0.4876712328767123, 'roc_auc': 0.634292343986775}"
10
+ large_arch_jan_to_mar25_v1,tail_row_holdout,0.15,0.5,2381,608,0.25535489290214197,0.40435346961021423,0.5608430504798889,0.5904605263157895,0.42105263157894735,7560974.15,"{'accuracy': 0.6106677866442671, 'precision': 0.5904605263157895, 'recall': 0.3461909353905497, 'f1': 0.43647416413373863, 'roc_auc': 0.6401787149285945}"
11
+ old_champion,tail_row_holdout,0.15,0.52,2381,384,0.16127677446451072,0.3579495847225189,0.6027748584747314,0.6119791666666666,0.453125,5130389.65,"{'accuracy': 0.6005879882402352, 'precision': 0.6119791666666666, 'recall': 0.2266152362584378, 'f1': 0.330752990851513, 'roc_auc': 0.6465372009459521}"
12
+ same_arch_jan_to_mar25_v1,tail_row_holdout,0.15,0.52,2381,587,0.24653506929861402,0.4296109080314636,0.5558530688285828,0.5826235093696763,0.42759795570698467,6909312.35,"{'accuracy': 0.6052078958420831, 'precision': 0.5826235093696763, 'recall': 0.32979749276759884, 'f1': 0.4211822660098522, 'roc_auc': 0.634292343986775}"
13
+ large_arch_jan_to_mar25_v1,tail_row_holdout,0.15,0.52,2381,436,0.18311633767324653,0.40435346961021423,0.581216037273407,0.6169724770642202,0.44724770642201833,5417916.05,"{'accuracy': 0.6073078538429232, 'precision': 0.6169724770642202, 'recall': 0.25940212150433944, 'f1': 0.3652410047522064, 'roc_auc': 0.6401787149285945}"
14
+ old_champion,tail_row_holdout,0.15,0.55,2381,296,0.124317513649727,0.3579495847225189,0.6226372718811035,0.6351351351351351,0.47635135135135137,3913824.6500000004,"{'accuracy': 0.5980680386392272, 'precision': 0.6351351351351351, 'recall': 0.18129218900675023, 'f1': 0.2820705176294073, 'roc_auc': 0.6465372009459521}"
15
+ same_arch_jan_to_mar25_v1,tail_row_holdout,0.15,0.55,2381,266,0.1117177656446871,0.4296109080314636,0.58200603723526,0.6428571428571429,0.5037593984962406,3083757.75,"{'accuracy': 0.5963880722385553, 'precision': 0.6428571428571429, 'recall': 0.16489874638379942, 'f1': 0.2624712202609363, 'roc_auc': 0.634292343986775}"
16
+ large_arch_jan_to_mar25_v1,tail_row_holdout,0.15,0.55,2381,289,0.12137757244855103,0.40435346961021423,0.6054689884185791,0.5986159169550173,0.46366782006920415,3570467.75,"{'accuracy': 0.5884082318353633, 'precision': 0.5986159169550173, 'recall': 0.16682738669238187, 'f1': 0.2609351432880845, 'roc_auc': 0.6401787149285945}"
outputs/nn_preprocessor_label_1to1_jan_to_mar25_large_v1.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81c39fc022bb9f56e75686548130c1fd8edbd0f9d484c3d8a9038ca8296276a4
3
+ size 20196
outputs/nn_preprocessor_label_1to1_jan_to_mar25_same_arch_v1.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81c39fc022bb9f56e75686548130c1fd8edbd0f9d484c3d8a9038ca8296276a4
3
+ size 20196
outputs/nn_saved_metrics_label_1to1_jan_to_mar25_large_v1.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_tag": "jan_to_mar25_large_v1",
3
+ "best_epoch_from_validation": 4,
4
+ "valid_metrics": {
5
+ "accuracy": 0.5936974789915966,
6
+ "precision": 0.5725308641975309,
7
+ "recall": 0.34967012252591895,
8
+ "f1": 0.43417203042715036,
9
+ "roc_auc": 0.6323236336327109
10
+ },
11
+ "test_metrics": {
12
+ "accuracy": 0.6085678286434272,
13
+ "precision": 0.5966850828729282,
14
+ "recall": 0.3124397299903568,
15
+ "f1": 0.41012658227848103,
16
+ "roc_auc": 0.6582776553244248
17
+ },
18
+ "train_rows": 11106,
19
+ "valid_rows": 2380,
20
+ "test_rows": 2381,
21
+ "final_train_rows": 13486
22
+ }
outputs/nn_saved_metrics_label_1to1_jan_to_mar25_same_arch_v1.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_tag": "jan_to_mar25_same_arch_v1",
3
+ "best_epoch_from_validation": 2,
4
+ "valid_metrics": {
5
+ "accuracy": 0.5823529411764706,
6
+ "precision": 0.5414091470951793,
7
+ "recall": 0.412818096135721,
8
+ "f1": 0.46844919786096256,
9
+ "roc_auc": 0.6146946784435985
10
+ },
11
+ "test_metrics": {
12
+ "accuracy": 0.5837883242335153,
13
+ "precision": 0.5309973045822103,
14
+ "recall": 0.3799421407907425,
15
+ "f1": 0.44294547498594716,
16
+ "roc_auc": 0.618118456398953
17
+ },
18
+ "train_rows": 11106,
19
+ "valid_rows": 2380,
20
+ "test_rows": 2381,
21
+ "final_train_rows": 13486
22
+ }
score_mar25_champion.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+ import joblib
3
+ import pandas as pd
4
+ import tensorflow as tf
5
+
6
+ BASE_DIR = Path(__file__).resolve().parent
7
+ OUT_DIR = BASE_DIR / "outputs"
8
+
9
+ DATA_PATH = OUT_DIR / "ml_dataset_exact_all_v2_2026-03-25_to_2026-03-25.csv"
10
+ PREPROCESSOR_PATH = OUT_DIR / "nn_preprocessor_label_1to1_jan_to_mar12_v2.joblib"
11
+ MODEL_PATH = OUT_DIR / "nn_label_1to1_jan_to_mar12_v2.keras"
12
+ OUT_PATH = OUT_DIR / "mar25_champion_scored.csv"
13
+
14
+ THRESHOLD = 0.52
15
+
16
+ DROP_COLS_ALWAYS = [
17
+ "trade_key",
18
+ "label_1to1",
19
+ "label_1to2",
20
+ "bt_buy_signal_time",
21
+ "bt_sell_signal_time",
22
+ "bt_buy_time",
23
+ "bt_buy_price",
24
+ "bt_stop_loss",
25
+ "bt_target_1",
26
+ "bt_target_2",
27
+ "bt_qty_per_lot",
28
+ "bt_capital_per_lot",
29
+ "bt_stop_loss_amt_per_lot",
30
+ "signal_time",
31
+ "confirmation_time",
32
+ "indication_time",
33
+ "buy_time",
34
+ ]
35
+
36
+ OPTIONAL_DROP_COLS = [
37
+ "exit_status",
38
+ "option_symbol",
39
+ "trade_side",
40
+ ]
41
+
42
+ def build_feature_matrix(df: pd.DataFrame):
43
+ drop_cols = [c for c in DROP_COLS_ALWAYS if c in df.columns]
44
+ drop_cols += [c for c in OPTIONAL_DROP_COLS if c in df.columns]
45
+
46
+ X = df.drop(columns=drop_cols, errors="ignore").copy()
47
+
48
+ if "sector" in X.columns:
49
+ X["sector"] = X["sector"].fillna("UNKNOWN").replace("", "UNKNOWN")
50
+
51
+ return X
52
+
53
+ def main():
54
+ df = pd.read_csv(DATA_PATH)
55
+
56
+ preprocessor = joblib.load(PREPROCESSOR_PATH)
57
+ model = tf.keras.models.load_model(MODEL_PATH, compile=False)
58
+
59
+ X_raw = build_feature_matrix(df)
60
+ X = preprocessor.transform(X_raw)
61
+ if hasattr(X, "toarray"):
62
+ X = X.toarray()
63
+
64
+ probs = model.predict(X, verbose=0).ravel()
65
+ preds = (probs >= THRESHOLD).astype(int)
66
+
67
+ df["champion_prob"] = probs
68
+ df["champion_keep_at_052"] = preds
69
+
70
+ df.to_csv(OUT_PATH, index=False)
71
+
72
+ print(f"Saved scored file: {OUT_PATH}")
73
+ print(f"Rows: {len(df)}")
74
+ print(f"Kept at {THRESHOLD}: {int(df['champion_keep_at_052'].sum())}")
75
+ if "label_1to1" in df.columns:
76
+ kept = df[df["champion_keep_at_052"] == 1].copy()
77
+ if not kept.empty:
78
+ print("Kept hit rate 1:1:", kept["label_1to1"].astype(float).mean())
79
+
80
+ if __name__ == "__main__":
81
+ main()
train_nn_save.py CHANGED
@@ -23,8 +23,8 @@ BASE_DIR = Path(__file__).resolve().parent
23
  OUT_DIR = BASE_DIR / "outputs"
24
  OUT_DIR.mkdir(exist_ok=True)
25
 
26
- MODEL_TAG = "jan_to_mar20_candidate_v1"
27
- MERGED_DATA_FILE = "ml_dataset_exact_all_v2_2026-01-01_to_2026-03-20_merged.csv"
28
 
29
  DATA_PATH = OUT_DIR / MERGED_DATA_FILE
30
  PREPROCESSOR_PATH = OUT_DIR / f"nn_preprocessor_label_1to1_{MODEL_TAG}.joblib"
 
23
  OUT_DIR = BASE_DIR / "outputs"
24
  OUT_DIR.mkdir(exist_ok=True)
25
 
26
+ MODEL_TAG = "jan_to_mar25_same_arch_v1"
27
+ MERGED_DATA_FILE = "ml_dataset_exact_all_v2_2026-01-01_to_2026-03-25_merged.csv"
28
 
29
  DATA_PATH = OUT_DIR / MERGED_DATA_FILE
30
  PREPROCESSOR_PATH = OUT_DIR / f"nn_preprocessor_label_1to1_{MODEL_TAG}.joblib"
train_nn_save_large.py ADDED
@@ -0,0 +1,261 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+ import json
3
+ import warnings
4
+ import joblib
5
+ import numpy as np
6
+ import pandas as pd
7
+
8
+ from sklearn.compose import ColumnTransformer
9
+ from sklearn.impute import SimpleImputer
10
+ from sklearn.preprocessing import OneHotEncoder, StandardScaler
11
+ from sklearn.metrics import (
12
+ accuracy_score,
13
+ precision_score,
14
+ recall_score,
15
+ f1_score,
16
+ roc_auc_score,
17
+ )
18
+ from sklearn.pipeline import Pipeline
19
+
20
+ warnings.filterwarnings("ignore")
21
+
22
+ BASE_DIR = Path(__file__).resolve().parent
23
+ OUT_DIR = BASE_DIR / "outputs"
24
+ OUT_DIR.mkdir(exist_ok=True)
25
+
26
+ MODEL_TAG = "jan_to_mar25_large_v1"
27
+ MERGED_DATA_FILE = "ml_dataset_exact_all_v2_2026-01-01_to_2026-03-25_merged.csv"
28
+
29
+ DATA_PATH = OUT_DIR / MERGED_DATA_FILE
30
+ PREPROCESSOR_PATH = OUT_DIR / f"nn_preprocessor_label_1to1_{MODEL_TAG}.joblib"
31
+ MODEL_PATH = OUT_DIR / f"nn_label_1to1_{MODEL_TAG}.keras"
32
+ METRICS_PATH = OUT_DIR / f"nn_saved_metrics_label_1to1_{MODEL_TAG}.json"
33
+
34
+ TARGET = "label_1to1"
35
+
36
+ DROP_COLS_ALWAYS = [
37
+ "trade_key",
38
+ "label_1to1",
39
+ "label_1to2",
40
+ "bt_buy_signal_time",
41
+ "bt_sell_signal_time",
42
+ "bt_buy_time",
43
+ "bt_buy_price",
44
+ "bt_stop_loss",
45
+ "bt_target_1",
46
+ "bt_target_2",
47
+ "bt_qty_per_lot",
48
+ "bt_capital_per_lot",
49
+ "bt_stop_loss_amt_per_lot",
50
+ "signal_time",
51
+ "confirmation_time",
52
+ "indication_time",
53
+ "buy_time",
54
+ ]
55
+
56
+ OPTIONAL_DROP_COLS = [
57
+ "exit_status",
58
+ "option_symbol",
59
+ "trade_side",
60
+ ]
61
+
62
+
63
+ def load_data():
64
+ df = pd.read_csv(DATA_PATH)
65
+ df["trade_date"] = pd.to_datetime(df["trade_date"], errors="coerce")
66
+ return df
67
+
68
+
69
+ def time_split_by_row_count(df: pd.DataFrame, train_frac=0.70, valid_frac=0.15):
70
+ df = df.copy()
71
+
72
+ sort_cols = [c for c in ["trade_date", "signal_time", "confirmation_time", "buy_time", "trade_key"] if c in df.columns]
73
+ if sort_cols:
74
+ df = df.sort_values(sort_cols).reset_index(drop=True)
75
+ else:
76
+ df = df.sort_values("trade_date").reset_index(drop=True)
77
+
78
+ n = len(df)
79
+ train_end = max(int(n * train_frac), 1)
80
+ valid_end = max(int(n * (train_frac + valid_frac)), train_end + 1)
81
+
82
+ if valid_end >= n:
83
+ valid_end = n - 1
84
+
85
+ train_df = df.iloc[:train_end].copy()
86
+ valid_df = df.iloc[train_end:valid_end].copy()
87
+ test_df = df.iloc[valid_end:].copy()
88
+
89
+ return train_df, valid_df, test_df
90
+
91
+
92
+ def build_feature_matrix(df: pd.DataFrame):
93
+ drop_cols = [c for c in DROP_COLS_ALWAYS if c in df.columns]
94
+ drop_cols += [c for c in OPTIONAL_DROP_COLS if c in df.columns]
95
+
96
+ X = df.drop(columns=drop_cols, errors="ignore").copy()
97
+ y = df[TARGET].astype(int).copy()
98
+
99
+ if "sector" in X.columns:
100
+ X["sector"] = X["sector"].fillna("UNKNOWN").replace("", "UNKNOWN")
101
+
102
+ return X, y
103
+
104
+
105
+ def get_preprocessor(X: pd.DataFrame):
106
+ numeric_cols = X.select_dtypes(include=[np.number]).columns.tolist()
107
+ categorical_cols = [c for c in X.columns if c not in numeric_cols]
108
+
109
+ numeric_pipe = Pipeline([
110
+ ("imputer", SimpleImputer(strategy="median")),
111
+ ("scaler", StandardScaler()),
112
+ ])
113
+
114
+ categorical_pipe = Pipeline([
115
+ ("imputer", SimpleImputer(strategy="most_frequent")),
116
+ ("onehot", OneHotEncoder(handle_unknown="ignore")),
117
+ ])
118
+
119
+ preprocessor = ColumnTransformer([
120
+ ("num", numeric_pipe, numeric_cols),
121
+ ("cat", categorical_pipe, categorical_cols),
122
+ ])
123
+
124
+ return preprocessor
125
+
126
+
127
+ def compute_metrics(y_true, y_pred, y_prob):
128
+ out = {
129
+ "accuracy": float(accuracy_score(y_true, y_pred)),
130
+ "precision": float(precision_score(y_true, y_pred, zero_division=0)),
131
+ "recall": float(recall_score(y_true, y_pred, zero_division=0)),
132
+ "f1": float(f1_score(y_true, y_pred, zero_division=0)),
133
+ }
134
+ try:
135
+ out["roc_auc"] = float(roc_auc_score(y_true, y_prob))
136
+ except Exception:
137
+ out["roc_auc"] = None
138
+ return out
139
+
140
+
141
+ def build_model(input_dim):
142
+ from tensorflow.keras import Sequential
143
+ from tensorflow.keras.layers import Dense, Dropout, Input
144
+
145
+ model = Sequential([
146
+ Input(shape=(input_dim,)),
147
+ Dense(256, activation="relu"),
148
+ Dropout(0.35),
149
+ Dense(128, activation="relu"),
150
+ Dropout(0.25),
151
+ Dense(64, activation="relu"),
152
+ Dropout(0.15),
153
+ Dense(1, activation="sigmoid"),
154
+ ])
155
+
156
+ model.compile(
157
+ optimizer="adam",
158
+ loss="binary_crossentropy",
159
+ metrics=["accuracy"],
160
+ )
161
+ return model
162
+
163
+
164
+ def main():
165
+ from tensorflow.keras.callbacks import EarlyStopping
166
+
167
+ df = load_data()
168
+ train_df, valid_df, test_df = time_split_by_row_count(df)
169
+
170
+ # ----------------------------
171
+ # Phase 1: evaluation training
172
+ # ----------------------------
173
+ X_train_raw, y_train = build_feature_matrix(train_df)
174
+ X_valid_raw, y_valid = build_feature_matrix(valid_df)
175
+ X_test_raw, y_test = build_feature_matrix(test_df)
176
+
177
+ preprocessor_eval = get_preprocessor(X_train_raw)
178
+
179
+ X_train = preprocessor_eval.fit_transform(X_train_raw)
180
+ X_valid = preprocessor_eval.transform(X_valid_raw)
181
+ X_test = preprocessor_eval.transform(X_test_raw)
182
+
183
+ if hasattr(X_train, "toarray"):
184
+ X_train = X_train.toarray()
185
+ X_valid = X_valid.toarray()
186
+ X_test = X_test.toarray()
187
+
188
+ input_dim = X_train.shape[1]
189
+ eval_model = build_model(input_dim)
190
+
191
+ early_stop = EarlyStopping(
192
+ monitor="val_loss",
193
+ patience=8,
194
+ restore_best_weights=True,
195
+ )
196
+
197
+ history = eval_model.fit(
198
+ X_train,
199
+ y_train.values,
200
+ validation_data=(X_valid, y_valid.values),
201
+ epochs=60,
202
+ batch_size=64,
203
+ callbacks=[early_stop],
204
+ verbose=1,
205
+ )
206
+
207
+ valid_prob = eval_model.predict(X_valid, verbose=0).ravel()
208
+ test_prob = eval_model.predict(X_test, verbose=0).ravel()
209
+
210
+ valid_pred = (valid_prob >= 0.5).astype(int)
211
+ test_pred = (test_prob >= 0.5).astype(int)
212
+
213
+ best_epoch = int(np.argmin(history.history["val_loss"])) + 1
214
+
215
+ # ----------------------------
216
+ # Phase 2: final deployment fit
217
+ # train on train + valid
218
+ # ----------------------------
219
+ train_valid_df = pd.concat([train_df, valid_df], ignore_index=True)
220
+
221
+ X_train_valid_raw, y_train_valid = build_feature_matrix(train_valid_df)
222
+ preprocessor_final = get_preprocessor(X_train_valid_raw)
223
+
224
+ X_train_valid = preprocessor_final.fit_transform(X_train_valid_raw)
225
+ if hasattr(X_train_valid, "toarray"):
226
+ X_train_valid = X_train_valid.toarray()
227
+
228
+ final_model = build_model(X_train_valid.shape[1])
229
+ final_model.fit(
230
+ X_train_valid,
231
+ y_train_valid.values,
232
+ epochs=best_epoch,
233
+ batch_size=64,
234
+ verbose=1,
235
+ )
236
+
237
+ metrics = {
238
+ "model_tag": MODEL_TAG,
239
+ "best_epoch_from_validation": best_epoch,
240
+ "valid_metrics": compute_metrics(y_valid, valid_pred, valid_prob),
241
+ "test_metrics": compute_metrics(y_test, test_pred, test_prob),
242
+ "train_rows": len(train_df),
243
+ "valid_rows": len(valid_df),
244
+ "test_rows": len(test_df),
245
+ "final_train_rows": len(train_valid_df),
246
+ }
247
+
248
+ joblib.dump(preprocessor_final, PREPROCESSOR_PATH)
249
+ final_model.save(MODEL_PATH)
250
+
251
+ with open(METRICS_PATH, "w") as f:
252
+ json.dump(metrics, f, indent=2)
253
+
254
+ print(f"Saved preprocessor to: {PREPROCESSOR_PATH}")
255
+ print(f"Saved model to: {MODEL_PATH}")
256
+ print(f"Saved metrics to: {METRICS_PATH}")
257
+ print(metrics)
258
+
259
+
260
+ if __name__ == "__main__":
261
+ main()