PrazNeuro commited on
Commit
a590047
·
verified ·
1 Parent(s): d536f4d

Upload Scenario_heldout_final_PRECISE.py

Browse files
Files changed (1) hide show
  1. Scenario_heldout_final_PRECISE.py +369 -0
Scenario_heldout_final_PRECISE.py ADDED
@@ -0,0 +1,369 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import warnings
3
+ import pandas as pd
4
+ import numpy as np
5
+ import json
6
+ import time
7
+ from tqdm import tqdm
8
+ import os
9
+ from datetime import datetime as _dt, timezone as _tz
10
+
11
+
12
+ from sklearn.exceptions import ConvergenceWarning
13
+ from sklearn.mixture import GaussianMixture
14
+ from sklearn.preprocessing import StandardScaler
15
+ from sklearn.linear_model import LassoCV
16
+ from sklearn.svm import SVC
17
+ from sklearn.ensemble import RandomForestClassifier, HistGradientBoostingClassifier, VotingClassifier
18
+ from sklearn.pipeline import Pipeline
19
+ from sklearn.model_selection import StratifiedKFold, RandomizedSearchCV
20
+ from sklearn.metrics import (
21
+ accuracy_score, precision_score, recall_score,
22
+ f1_score, balanced_accuracy_score, matthews_corrcoef
23
+ )
24
+ from joblib import dump
25
+
26
+ # -------------------------
27
+ # Logging & warnings
28
+ # -------------------------
29
+ logging.basicConfig(
30
+ filename='nested_lodo_groupsv1.log',
31
+ level=logging.INFO,
32
+ format='%(asctime)s - %(levelname)s - %(message)s'
33
+ )
34
+ warnings.filterwarnings('ignore', category=UserWarning)
35
+ warnings.filterwarnings('ignore', category=ConvergenceWarning)
36
+
37
+ # Create directories for saving models if they don't exist
38
+ os.makedirs('models_GBMv1/scenario_1', exist_ok=True)
39
+ os.makedirs('models_GBMv1/scenario_2', exist_ok=True)
40
+ os.makedirs('models_GBMv1/scenario_3', exist_ok=True)
41
+ os.makedirs('models_LM22v1/scenario_1', exist_ok=True)
42
+ os.makedirs('models_LM22v1/scenario_2', exist_ok=True)
43
+ os.makedirs('models_LM22v1/scenario_3', exist_ok=True)
44
+
45
+ # -------------------------
46
+ # Caching for pipelines
47
+ # -------------------------
48
+ # Joblib.Memory cache disabled to avoid creating cache directories and
49
+ # PermissionError race conditions on Windows when using parallel workers.
50
+ memory = None
51
+ logging.info("Joblib Memory disabled; no pipeline caching will be used")
52
+
53
+ # Helper: convert numpy scalars/arrays and dicts into JSON-serializable Python types
54
+
55
+ def _convert_obj(o):
56
+ """Recursively convert numpy types/arrays to native Python objects for JSON dumping."""
57
+ # numpy arrays -> lists
58
+ if hasattr(o, 'tolist') and not isinstance(o, (dict, list, str, bytes)):
59
+ try:
60
+ return o.tolist()
61
+ except Exception:
62
+ return str(o)
63
+ # dict -> convert values
64
+ if isinstance(o, dict):
65
+ return {k: _convert_obj(v) for k, v in o.items()}
66
+ # list/tuple -> convert items
67
+ if isinstance(o, (list, tuple)):
68
+ return [_convert_obj(v) for v in o]
69
+ # numpy scalar -> python native
70
+ if isinstance(o, (np.integer, np.floating, np.bool_)):
71
+ return o.item()
72
+ # otherwise return as-is
73
+ return o
74
+
75
+ def _cv_results_to_serializable(cv_dict):
76
+ """Convert sklearn cv_results_ dict values (numpy arrays) into lists where needed."""
77
+ out = {}
78
+ for k, v in cv_dict.items():
79
+ if hasattr(v, 'tolist'):
80
+ try:
81
+ out[k] = v.tolist()
82
+ except Exception:
83
+ out[k] = str(v)
84
+ else:
85
+ out[k] = _convert_obj(v)
86
+ return out
87
+
88
+ # -------------------------
89
+ # Utility: two-step Lasso selection
90
+ # -------------------------
91
+ def select_features(X, y, alphas=(0.1, 0.01), cv=5, max_iter=10000, n_jobs=1, random_state=42):
92
+ for alpha in alphas:
93
+ lasso = LassoCV(
94
+ alphas=[alpha], cv=cv,
95
+ max_iter=max_iter, n_jobs=n_jobs,
96
+ random_state=random_state
97
+ )
98
+ # fit separately so static analyzers can see the correct type
99
+ lasso.fit(X, y)
100
+ # use flatnonzero to get selected indices as a 1-D array
101
+ support = np.flatnonzero(lasso.coef_ != 0)
102
+ if support.size > 0:
103
+ return support
104
+ raise ValueError(f"No features selected at alphas {alphas}")
105
+
106
+ # -------------------------
107
+ # Define two groups of scenarios with actual paths
108
+ # Scenario definitions_LM22
109
+ scenarios_LM22 = {
110
+ 1: {
111
+ 'train_radiomics': r"C:/Users/pg22/Downloads/PRECISE-GBM/LOOCV_withoutHarm/Radiomics/neuro_combat_radiomic_CGGA_Rem_CP_TC.csv",
112
+ 'train_immune': r"C:/Users/pg22/Downloads/PRECISE-GBM/LOOCV_withoutHarm/Genome/Heldout/heldout_Ivy/Cbx_LOOCV_heldout_Ivy_Lm22/CIBERSORTx_Job49_Results.csv",
113
+ 'heldout_radiomics': r"C:/Users/pg22/Downloads/PRECISE-GBM/LOOCV_withoutHarm/Radiomics/Radiomics_LOOCV_test_Ivy.csv",
114
+ 'heldout_immune': r"C:/Users/pg22/Downloads/PRECISE-GBM/LOOCV_withoutHarm/Genome/Testing/IvyGAP/Test_Ivy_LM22/CIBERSORTx_Job55_Results.csv"
115
+ },
116
+ 2: {
117
+ 'train_radiomics': r"C:/Users/pg22/Downloads/PRECISE-GBM/LOOCV_withoutHarm/Radiomics/neuro_combat_radiomic_CGGA_Rem_CP_ivy.csv",
118
+ 'train_immune': r"C:/Users/pg22/Downloads/PRECISE-GBM/LOOCV_withoutHarm/Genome/Heldout/heldout_TCGA/Cbx_heldoutTCGA_Lm22/CIBERSORTx_Job47_Results.csv",
119
+ 'heldout_radiomics': r"C:/Users/pg22/Downloads/PRECISE-GBM/LOOCV_withoutHarm/Radiomics/Radiomics_LOOCV_test_TCGA.csv",
120
+ 'heldout_immune': r"C:/Users/pg22/Downloads/PRECISE-GBM/LOOCV_withoutHarm/Genome/Testing/TCGA/Cbx_TCGA_Test_LM22/CIBERSORTx_Job53_Results.csv"
121
+ },
122
+ 3: {
123
+ 'train_radiomics': r"C:/Users/pg22/Downloads/PRECISE-GBM/LOOCV_withoutHarm/Radiomics/neuro_combat_radiomic_CGGA_Rem_TC_ivy.csv",
124
+ 'train_immune': r"C:/Users/pg22/Downloads/PRECISE-GBM/LOOCV_withoutHarm/Genome/Heldout/heldout_CPTAC/CBx_LOOCV_heldout_CPTAC_LM22/CIBERSORTx_Job51_Results.csv",
125
+ 'heldout_radiomics': r"C:/Users/pg22/Downloads/PRECISE-GBM/LOOCV_withoutHarm/Radiomics/Radiomics_LOOCV_test_CPTAC.csv",
126
+ 'heldout_immune': r"C:/Users/pg22/Downloads/PRECISE-GBM/LOOCV_withoutHarm/Genome/Testing/CPTAC/Test_CPTAC_LM22/CIBERSORTx_Job57_Results.csv"
127
+ }
128
+ }
129
+ # Scenario definitions_GBM
130
+ scenarios_GBM = {
131
+ 1: {
132
+ 'train_radiomics': r"C:/Users/pg22/Downloads/PRECISE-GBM/LOOCV_withoutHarm/Radiomics/neuro_combat_radiomic_CGGA_Rem_CP_TC.csv",
133
+ 'train_immune': r"C:/Users/pg22/Downloads/PRECISE-GBM/LOOCV_withoutHarm/Genome/Heldout/heldout_Ivy/Cbx_LOOCV_heldout_Ivy_GBM/CIBERSORTx_Job50_Results.csv",
134
+ 'heldout_radiomics': r"C:/Users/pg22/Downloads/PRECISE-GBM/LOOCV_withoutHarm/Radiomics/Radiomics_LOOCV_test_Ivy.csv",
135
+ 'heldout_immune': r"C:/Users/pg22/Downloads/PRECISE-GBM/LOOCV_withoutHarm/Genome/Testing/IvyGAP/Test_Ivy_GBM/CIBERSORTx_Job56_Results.csv"
136
+ },
137
+ 2: {
138
+ 'train_radiomics': r"C:/Users/pg22/Downloads/PRECISE-GBM/LOOCV_withoutHarm/Radiomics/neuro_combat_radiomic_CGGA_Rem_CP_ivy.csv",
139
+ 'train_immune': r"C:/Users/pg22/Downloads/PRECISE-GBM/LOOCV_withoutHarm/Genome/Heldout/heldout_TCGA/Cbx_LOOCV_TCGA_heldout_GBM/CIBERSORTx_Job48_Results.csv",
140
+ 'heldout_radiomics': r"C:/Users/pg22/Downloads/PRECISE-GBM/LOOCV_withoutHarm/Radiomics/Radiomics_LOOCV_test_TCGA.csv",
141
+ 'heldout_immune': r"C:/Users/pg22/Downloads/PRECISE-GBM/LOOCV_withoutHarm/Genome/Testing/TCGA/TCGA_test_GBM/CIBERSORTx_Job54_Results.csv"
142
+ },
143
+ 3: {
144
+ 'train_radiomics': r"C:/Users/pg22/Downloads/PRECISE-GBM/LOOCV_withoutHarm/Radiomics/neuro_combat_radiomic_CGGA_Rem_TC_ivy.csv",
145
+ 'train_immune': r"C:/Users/pg22/Downloads/PRECISE-GBM/LOOCV_withoutHarm/Genome/Heldout/heldout_CPTAC/Cbx_LOOCV_heldout_CPTAC_GBM/CIBERSORTx_Job52_Results.csv",
146
+ 'heldout_radiomics': r"C:/Users/pg22/Downloads/PRECISE-GBM/LOOCV_withoutHarm/Radiomics/Radiomics_LOOCV_test_CPTAC.csv",
147
+ 'heldout_immune': r"C:/Users/pg22/Downloads/PRECISE-GBM/LOOCV_withoutHarm/Genome/Testing/CPTAC/Test_CPTAC_GBM/CIBERSORTx_Job58_Results.csv"
148
+ }
149
+ }
150
+
151
+ signature_groups = {
152
+ 'LM22': scenarios_LM22,
153
+ 'GBM': scenarios_GBM
154
+ }
155
+
156
+ # -------------------------
157
+ # Hyperparameter grids
158
+ # -------------------------
159
+ param_dist_svm = {
160
+ 'clf__C': [1, 10],
161
+ 'clf__gamma': [0.01, 0.1],
162
+ 'clf__kernel': ['rbf']
163
+ }
164
+ param_dist_ensemble = {
165
+ 'ensemble__svm__classifier__C': [1],
166
+ 'ensemble__svm__classifier__kernel': ['rbf'],
167
+ 'ensemble__rf__n_estimators': [100, 200],
168
+ 'ensemble__rf__max_depth': [None],
169
+ 'ensemble__gb__max_iter': [100],
170
+ 'ensemble__gb__learning_rate': [0.1]
171
+ }
172
+
173
+ # -------------------------
174
+ # Process each signature group
175
+ # -------------------------
176
+ for sig_name, scenarios in signature_groups.items():
177
+ all_results = {}
178
+ all_features = {}
179
+ all_cv = {}
180
+
181
+ for scen_id, paths in scenarios.items():
182
+ logging.info(f"[{sig_name}] Starting {scen_id}")
183
+ t0 = time.time()
184
+
185
+ # Load & align training data
186
+ rad_tr = pd.read_csv(paths['train_radiomics'], index_col=0)
187
+ imm_tr = pd.read_csv(paths['train_immune'], index_col=0)
188
+ df_tr = pd.merge(rad_tr, imm_tr, left_index=True, right_index=True, how='inner')
189
+
190
+ # Load & align held-out data
191
+ rad_ho = pd.read_csv(paths['heldout_radiomics'], index_col=0)
192
+ imm_ho = pd.read_csv(paths['heldout_immune'], index_col=0)
193
+ df_ho = pd.merge(rad_ho, imm_ho, left_index=True, right_index=True, how='inner')
194
+
195
+ scen_results = {}
196
+ scen_features = {}
197
+ scen_cv = {}
198
+ inner_cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
199
+
200
+ # Determine immune feature columns (may differ by signature)
201
+ immune_cols = imm_tr.columns.intersection(imm_ho.columns)
202
+ if immune_cols.empty:
203
+ raise ValueError(f"{sig_name}:{scen_id} - no matching immune features between train and held-out")
204
+ logging.info(f"{sig_name}:{scen_id} - {len(immune_cols)} immune features: {immune_cols.tolist()}")
205
+
206
+ for col in tqdm(immune_cols, desc=f"{sig_name}:{scen_id}"):
207
+ try:
208
+ # GMM labeling on train
209
+ gmm = GaussianMixture(n_components=2, random_state=42)
210
+ y_tr = gmm.fit_predict(df_tr[[col]].values)
211
+ if len(np.unique(y_tr)) < 2:
212
+ continue
213
+ y_ho = gmm.predict(df_ho[[col]].values)
214
+ # ensure label 1 = higher mean
215
+ m0, m1 = gmm.means_.flatten()
216
+ if m0 < m1:
217
+ y_tr = 1 - y_tr; y_ho = 1 - y_ho
218
+ # save gmm model
219
+ gmm_model_path = f'models_{sig_name}/scenario_{scen_id}/{sig_name}_scen{scen_id}_{col}_gmm_model.joblib'
220
+ os.makedirs(os.path.dirname(gmm_model_path), exist_ok=True)
221
+ dump(gmm, gmm_model_path)
222
+ logging.info(f"Saved GMM model to {gmm_model_path}")
223
+ logging.info(f"GMM means for {sig_name}:{scen_id}, col {col}: {gmm.means_.flatten().tolist()}")
224
+
225
+ # Feature selection
226
+ X_tr = df_tr.drop(columns=[col]).values
227
+ X_ho = df_ho.drop(columns=[col]).values
228
+ sel = select_features(X_tr, y_tr)
229
+ X_tr_sel, X_ho_sel = X_tr[:, sel], X_ho[:, sel]
230
+ feat_names = df_tr.drop(columns=[col]).columns.tolist()
231
+ sel_names = [feat_names[i] for i in sel]
232
+
233
+ # Save selected feature names for this model so retraining can reuse them
234
+ sel_feat_path = f'models_{sig_name}/scenario_{scen_id}/{sig_name}_scen{scen_id}_{col}_selected_features.json'
235
+ os.makedirs(os.path.dirname(sel_feat_path), exist_ok=True)
236
+ ts = _dt.now(_tz.utc).strftime('%Y%m%d_%H%M%S')
237
+ meta = {'saved_at': _dt.now(_tz.utc).isoformat(), 'version': ts, 'selected_features': sel_names}
238
+ with open(sel_feat_path, 'w') as _f:
239
+ json.dump(meta, _f, indent=2)
240
+
241
+ # SVM nested CV
242
+ # Avoid using joblib.Memory at the Pipeline level when running parallel CV (n_jobs != 1).
243
+ # Joblib's Memory can hit race conditions on Windows when multiple workers try to
244
+ # read/write the same cache files which leads to PermissionError (output.pkl).
245
+ # We therefore disable pipeline caching here (memory=None). This does NOT affect
246
+ # saving final models or params (those are written explicitly with dump/json below).
247
+ pipe_svm = Pipeline([
248
+ ('scaler', StandardScaler()),
249
+ ('clf', SVC(class_weight='balanced', probability=True, random_state=42))
250
+ ], memory=None)
251
+ search_svm = RandomizedSearchCV(
252
+ pipe_svm, param_dist_svm, n_iter=5,
253
+ cv=inner_cv, scoring='balanced_accuracy',
254
+ n_jobs=1, refit=True, error_score='raise'
255
+ )
256
+ search_svm.fit(X_tr_sel, y_tr)
257
+ y_pred_svm = search_svm.predict(X_ho_sel)
258
+ cv_svm = {k: (v.tolist() if hasattr(v, 'tolist') else v)
259
+ for k, v in search_svm.cv_results_.items()}
260
+ # save SVM model
261
+ svm_model_path = f'models_{sig_name}/scenario_{scen_id}/{sig_name}_scen{scen_id}_{col}_svm_model.joblib'
262
+ os.makedirs(os.path.dirname(svm_model_path), exist_ok=True)
263
+ dump(search_svm.best_estimator_, svm_model_path)
264
+ logging.info(f"Saved SVM model to {svm_model_path}")
265
+ logging.info(f"SVM best params for {sig_name}:{scen_id}, col {col}: {search_svm.best_params_}")
266
+
267
+ # Save SVM best params and cv results for reproducibility / retraining (with metadata)
268
+ svm_params_path = f'models_{sig_name}/scenario_{scen_id}/{sig_name}_scen{scen_id}_{col}_svm_params.json'
269
+ svm_cv_path = f'models_{sig_name}/scenario_{scen_id}/{sig_name}_scen{scen_id}_{col}_svm_cv.json'
270
+ os.makedirs(os.path.dirname(svm_params_path), exist_ok=True)
271
+ svm_meta = {
272
+ 'saved_at': _dt.now(_tz.utc).isoformat(),
273
+ 'version': _dt.now(_tz.utc).strftime('%Y%m%d_%H%M%S'),
274
+ 'best_params': _convert_obj(search_svm.best_params_)
275
+ }
276
+ with open(svm_params_path, 'w') as _f:
277
+ json.dump(svm_meta, _f, indent=2)
278
+ svm_cv_meta = {
279
+ 'saved_at': _dt.now(_tz.utc).isoformat(),
280
+ 'version': _dt.now(_tz.utc).strftime('%Y%m%d_%H%M%S'),
281
+ 'cv_results': _cv_results_to_serializable(search_svm.cv_results_)
282
+ }
283
+ with open(svm_cv_path, 'w') as _f:
284
+ json.dump(svm_cv_meta, _f, indent=2)
285
+
286
+ # Ensemble nested CV
287
+ base_pipe = Pipeline([
288
+ ('scaler', StandardScaler()),
289
+ ('classifier', SVC(class_weight='balanced', probability=True, random_state=42))
290
+ ], memory=None)
291
+ ensemble = VotingClassifier([
292
+ ('svm', base_pipe),
293
+ ('rf', RandomForestClassifier(class_weight='balanced', random_state=42)),
294
+ ('gb', HistGradientBoostingClassifier(random_state=42))
295
+ ], voting='soft', weights=[1,1,1], n_jobs=1)
296
+ pipe_ens = Pipeline([
297
+ ('scaler', StandardScaler()),
298
+ ('ensemble', ensemble)
299
+ ], memory=None)
300
+ search_ens = RandomizedSearchCV(
301
+ pipe_ens, param_dist_ensemble, n_iter=3,
302
+ cv=inner_cv, scoring='balanced_accuracy',
303
+ n_jobs=1, refit=True, error_score='raise'
304
+ )
305
+ search_ens.fit(X_tr_sel, y_tr)
306
+ y_pred_ens = search_ens.predict(X_ho_sel)
307
+ cv_ens = {k: (v.tolist() if hasattr(v, 'tolist') else v)
308
+ for k, v in search_ens.cv_results_.items()}
309
+ # save Ensemble model
310
+ ens_model_path = f'models_{sig_name}/scenario_{scen_id}/{sig_name}_scen{scen_id}_{col}_ens_model.joblib'
311
+ os.makedirs(os.path.dirname(ens_model_path), exist_ok=True)
312
+ dump(search_ens.best_estimator_, ens_model_path)
313
+ logging.info(f"Saved Ensemble model to {ens_model_path}")
314
+ logging.info(f"Ensemble best params for {sig_name}:{scen_id}, col {col}: {search_ens.best_params_}")
315
+
316
+ # Save Ensemble best params and cv results for reproducibility / retraining (with metadata)
317
+ ens_params_path = f'models_{sig_name}/scenario_{scen_id}/{sig_name}_scen{scen_id}_{col}_ens_params.json'
318
+ ens_cv_path = f'models_{sig_name}/scenario_{scen_id}/{sig_name}_scen{scen_id}_{col}_ens_cv.json'
319
+ os.makedirs(os.path.dirname(ens_params_path), exist_ok=True)
320
+ ens_meta = {
321
+ 'saved_at': _dt.now(_tz.utc).isoformat(),
322
+ 'version': _dt.now(_tz.utc).strftime('%Y%m%d_%H%M%S'),
323
+ 'best_params': _convert_obj(search_ens.best_params_)
324
+ }
325
+ with open(ens_params_path, 'w') as _f:
326
+ json.dump(ens_meta, _f, indent=2)
327
+ ens_cv_meta = {
328
+ 'saved_at': _dt.now(_tz.utc).isoformat(),
329
+ 'version': _dt.now(_tz.utc).strftime('%Y%m%d_%H%M%S'),
330
+ 'cv_results': _cv_results_to_serializable(search_ens.cv_results_)
331
+ }
332
+ with open(ens_cv_path, 'w') as _f:
333
+ json.dump(ens_cv_meta, _f, indent=2)
334
+
335
+ # Metrics
336
+ def metrics(y_true, y_pred):
337
+ return {
338
+ 'Accuracy': accuracy_score(y_true, y_pred),
339
+ 'Precision': precision_score(y_true, y_pred, zero_division=1),
340
+ 'Recall': recall_score(y_true, y_pred, zero_division=1),
341
+ 'F1 Score': f1_score(y_true, y_pred, zero_division=1),
342
+ 'Balanced Accuracy': balanced_accuracy_score(y_true, y_pred),
343
+ 'MCC': matthews_corrcoef(y_true, y_pred)
344
+ }
345
+ scen_results[col] = {'SVM': metrics(y_ho, y_pred_svm), 'Ensemble': metrics(y_ho, y_pred_ens)}
346
+ scen_features[col] = sel_names
347
+ scen_cv[col] = {'svm_cv': cv_svm, 'ensemble_cv': cv_ens}
348
+
349
+ except Exception as e:
350
+ # log full traceback for easier debugging (written to nested_lodo_groupsv1.log)
351
+ logging.exception(f"{sig_name}:{scen_id}, col {col}: unexpected error")
352
+ print(f"[ERROR] {sig_name}:{scen_id}, column {col}: {e}")
353
+
354
+ # Save for this scenario
355
+ all_results[scen_id] = scen_results
356
+ all_features[scen_id] = scen_features
357
+ all_cv[scen_id] = scen_cv
358
+ logging.info(f"[{sig_name}] {scen_id} done in {time.time()-t0:.1f}s")
359
+
360
+ # Write group-level JSONs
361
+ with open(f'nestedv1_results111_{sig_name}.json', 'w') as f:
362
+ json.dump(all_results, f, indent=2)
363
+ with open(f'nestedv1_features111_{sig_name}.json', 'w') as f:
364
+ json.dump(all_features, f, indent=2)
365
+ with open(f'nestedv1_cv111_{sig_name}.json', 'w') as f:
366
+ json.dump(all_cv, f, indent=2)
367
+ print(f"✅ {sig_name} group complete: scenarios={list(all_results.keys())}")
368
+
369
+ print("All signature groups processed.")