bong9513 commited on
Commit
3597ca0
Β·
1 Parent(s): 79a10a0

checkpoint_1

Browse files
Analysis_code/5.optima/deepgbm_ctgan10000/deepgbm_ctgan10000_busan.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import optuna
2
+ import numpy as np
3
+ import random
4
+ import pandas as pd
5
+ import joblib
6
+ import os
7
+ import torch
8
+ from utils import *
9
+ # Python 및 Numpy μ‹œλ“œ κ³ μ •
10
+ seed = 42
11
+ random.seed(seed)
12
+ np.random.seed(seed)
13
+
14
+
15
+ # 1. Study 생성 μ‹œ 'maximize'둜 μ„€μ •
16
+ study = optuna.create_study(
17
+ direction="maximize", # CSI μ μˆ˜κ°€ λ†’μ„μˆ˜λ‘ μ’‹μœΌλ―€λ‘œ maximize
18
+ pruner=optuna.pruners.MedianPruner(n_warmup_steps=10) # 초반 10에폭은 μ§€μΌœλ³΄κ³  이후 κ°€μ§€μΉ˜κΈ°
19
+ )
20
+ # Trial μ™„λ£Œ μ‹œ 상세 정보 좜λ ₯ν•˜λŠ” callback ν•¨μˆ˜
21
+ def print_trial_callback(study, trial):
22
+ """각 trial μ™„λ£Œ μ‹œ best valueλ₯Ό ν¬ν•¨ν•œ 상세 정보 좜λ ₯"""
23
+ print(f"\n{'='*80}")
24
+ print(f"Trial {trial.number} μ™„λ£Œ")
25
+ print(f" Value (CSI): {trial.value:.6f}" if trial.value is not None else f" Value: {trial.value}")
26
+ print(f" Parameters: {trial.params}")
27
+ print(f" Best Value (CSI): {study.best_value:.6f}" if study.best_value is not None else f" Best Value: {study.best_value}")
28
+ print(f" Best Trial: {study.best_trial.number}")
29
+ print(f" Best Parameters: {study.best_params}")
30
+ print(f"{'='*80}\n")
31
+
32
+
33
+
34
+ # 2. μ΅œμ ν™” μ‹€ν–‰
35
+ study.optimize(
36
+ lambda trial: objective(trial, model_choose="deepgbm", region="busan", data_sample='ctgan10000'),
37
+ n_trials=100
38
+ ,
39
+ callbacks=[print_trial_callback]
40
+ )
41
+
42
+ # 3. κ²°κ³Ό 확인 및 μš”μ•½
43
+ print(f"\nμ΅œμ ν™” μ™„λ£Œ.")
44
+ print(f"Best CSI Score: {study.best_value:.4f}")
45
+ print(f"Best Hyperparameters: {study.best_params}")
46
+
47
+ try:
48
+ # λͺ¨λ“  trial의 CSI 점수 μΆ”μΆœ
49
+ csi_scores = [trial.value for trial in study.trials if trial.value is not None]
50
+
51
+ if len(csi_scores) > 0:
52
+ print(f"\nμ΅œμ ν™” κ³Όμ • μš”μ•½:")
53
+ print(f" - 총 μ‹œλ„ 횟수: {len(study.trials)}")
54
+ print(f" - μ„±κ³΅ν•œ μ‹œλ„: {len(csi_scores)}")
55
+ print(f" - 졜초 CSI: {csi_scores[0]:.4f}")
56
+ print(f" - μ΅œμ’… CSI: {csi_scores[-1]:.4f}")
57
+ print(f" - 졜고 CSI: {max(csi_scores):.4f}")
58
+ print(f" - μ΅œμ € CSI: {min(csi_scores):.4f}")
59
+ print(f" - 평균 CSI: {np.mean(csi_scores):.4f}")
60
+
61
+ # Study 객체 μ €μž₯
62
+ # 파일 μœ„μΉ˜ 기반으둜 base 디렉토리 경둜 μ„€μ •
63
+ current_file_dir = os.path.dirname(os.path.abspath(__file__))
64
+ base_dir = os.path.dirname(os.path.dirname(current_file_dir)) # 5.optima 디렉토리
65
+ os.makedirs(os.path.join(base_dir, "optimization_history"), exist_ok=True)
66
+ study_path = os.path.join(base_dir, "optimization_history/deepgbm_ctgan10000_busan_trials.pkl")
67
+ joblib.dump(study, study_path)
68
+ print(f"\nμ΅œμ ν™” Study 객체가 {study_path}에 μ €μž₯λ˜μ—ˆμŠ΅λ‹ˆλ‹€.")
69
+
70
+ # μ΅œμ ν™”λœ ν•˜μ΄νΌνŒŒλΌλ―Έν„°λ‘œ μ΅œμ’… λͺ¨λΈ ν•™μŠ΅ 및 μ €μž₯
71
+ print("\n" + "="*50)
72
+ print("μ΅œμ ν™”λœ ν•˜μ΄νΌνŒŒλΌλ―Έν„°λ‘œ μ΅œμ’… λͺ¨λΈ ν•™μŠ΅ μ‹œμž‘")
73
+ print("="*50)
74
+
75
+ best_params = study.best_params
76
+ model_path = train_final_model(
77
+ best_params=best_params,
78
+ model_choose="deepgbm",
79
+ region="busan",
80
+ data_sample='ctgan10000',
81
+ target='multi',
82
+ n_folds=3,
83
+ random_state=seed
84
+ )
85
+
86
+ print(f"\nμ΅œμ’… λͺ¨λΈ ν•™μŠ΅ 및 μ €μž₯ μ™„λ£Œ!")
87
+ print(f"μ €μž₯된 λͺ¨λΈ 경둜: {model_path}")
88
+
89
+ except Exception as e:
90
+ print(f"\n⚠️ μ΅œμ ν™” κ²°κ³Ό 뢄석 쀑 였λ₯˜ λ°œμƒ: {e}")
91
+ import traceback
92
+ traceback.print_exc()
93
+
94
+ # 정상 μ’…λ£Œ
95
+ import sys
96
+ sys.exit(0)
97
+
98
+
Analysis_code/5.optima/deepgbm_ctgan10000/utils.py ADDED
@@ -0,0 +1,721 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import torch.optim as optim
4
+ import numpy as np
5
+ import random
6
+ import os
7
+ import copy
8
+ from sklearn.preprocessing import QuantileTransformer, LabelEncoder
9
+ from torch.utils.data import DataLoader, TensorDataset
10
+ from sklearn.metrics import confusion_matrix
11
+ from sklearn.utils.class_weight import compute_class_weight
12
+ import pandas as pd
13
+ import optuna
14
+ from sklearn.metrics import accuracy_score, f1_score
15
+ import joblib
16
+
17
+
18
+ import sys
19
+ # 파일 μœ„μΉ˜ 기반으둜 models 디렉토리 경둜 μ„€μ •
20
+ current_file_dir = os.path.dirname(os.path.abspath(__file__))
21
+ models_path = os.path.abspath(os.path.join(current_file_dir, '../../models'))
22
+ sys.path.insert(0, models_path)
23
+ from ft_transformer import FTTransformer
24
+ from resnet_like import ResNetLike
25
+ from deepgbm import DeepGBM
26
+ import warnings
27
+ warnings.filterwarnings('ignore')
28
+
29
+
30
+ # Python 및 Numpy μ‹œλ“œ κ³ μ •
31
+ seed = 42
32
+ random.seed(seed)
33
+ np.random.seed(seed)
34
+
35
+ # PyTorch μ‹œλ“œ κ³ μ •
36
+ torch.manual_seed(seed)
37
+ torch.cuda.manual_seed(seed)
38
+ torch.cuda.manual_seed_all(seed) # Multi-GPU ν™˜κ²½μ—μ„œ λ™μΌν•œ μ‹œλ“œ 적용
39
+
40
+ # PyTorch μ—°μ‚°μ˜ 결정적 λͺ¨λ“œ μ„€μ •
41
+ torch.backends.cudnn.deterministic = True # μ‹€ν–‰λ§ˆλ‹€ λ™μΌν•œ κ²°κ³Όλ₯Ό 보μž₯
42
+ torch.backends.cudnn.benchmark = True # μ„±λŠ₯ μ΅œμ ν™”λ₯Ό ν™œμ„±ν™” (κ°€λŠ₯ν•œ ν•œ λΉ λ₯Έ μ—°μ‚° μˆ˜ν–‰)
43
+
44
+
45
+ def add_derived_features(df: pd.DataFrame) -> pd.DataFrame:
46
+ """
47
+ μ œκ±°ν–ˆλ˜ νŒŒμƒ λ³€μˆ˜λ“€μ„ 볡ꡬ
48
+
49
+ Args:
50
+ df: λ°μ΄ν„°ν”„λ ˆμž„
51
+
52
+ Returns:
53
+ νŒŒμƒ λ³€μˆ˜κ°€ μΆ”κ°€λœ λ°μ΄ν„°ν”„λ ˆμž„
54
+ """
55
+ df = df.copy()
56
+ df['hour_sin'] = np.sin(2 * np.pi * df['hour'] / 24)
57
+ df['hour_cos'] = np.cos(2 * np.pi * df['hour'] / 24)
58
+ df['month_sin'] = np.sin(2 * np.pi * df['month'] / 12)
59
+ df['month_cos'] = np.cos(2 * np.pi * df['month'] / 12)
60
+ df['ground_temp - temp_C'] = df['groundtemp'] - df['temp_C']
61
+ return df
62
+
63
+ def preprocessing(df):
64
+ """데이터 μ „μ²˜λ¦¬ ν•¨μˆ˜.
65
+
66
+ Args:
67
+ df: 원본 λ°μ΄ν„°ν”„λ ˆμž„
68
+
69
+ Returns:
70
+ μ „μ²˜λ¦¬λœ λ°μ΄ν„°ν”„λ ˆμž„
71
+ """
72
+ df = df[df.columns].copy()
73
+ df['year'] = df['year'].astype('int')
74
+ df['month'] = df['month'].astype('int')
75
+ df['hour'] = df['hour'].astype('int')
76
+ df = add_derived_features(df).copy()
77
+ df['multi_class'] = df['multi_class'].astype('int')
78
+ df.loc[df['wind_dir']=='μ •μ˜¨', 'wind_dir'] = "0"
79
+ df['wind_dir'] = df['wind_dir'].astype('int')
80
+ df = df[['temp_C', 'precip_mm', 'wind_speed', 'wind_dir', 'hm',
81
+ 'vap_pressure', 'dewpoint_C', 'loc_pressure', 'sea_pressure',
82
+ 'solarRad', 'snow_cm', 'cloudcover', 'lm_cloudcover', 'low_cloudbase',
83
+ 'groundtemp', 'O3', 'NO2', 'PM10', 'PM25', 'year',
84
+ 'month', 'hour', 'ground_temp - temp_C', 'hour_sin', 'hour_cos',
85
+ 'month_sin', 'month_cos','multi_class']].copy()
86
+ return df
87
+
88
+
89
+ # 데이터셋 μ€€λΉ„ ν•¨μˆ˜
90
+ def prepare_dataset(region, data_sample='pure', target='multi', fold=3):
91
+
92
+ # 파일 μœ„μΉ˜ 기반으둜 데이터 디렉토리 경둜 μ„€μ •
93
+ current_file_dir = os.path.dirname(os.path.abspath(__file__))
94
+ data_base_dir = os.path.abspath(os.path.join(current_file_dir, '../../../data'))
95
+
96
+ # 데이터 경둜 μ§€μ •
97
+ dat_path = os.path.join(data_base_dir, f"data_for_modeling/{region}_train.csv")
98
+ if data_sample == 'pure':
99
+ train_path = dat_path
100
+ else:
101
+ train_path = os.path.join(data_base_dir, f'data_oversampled/{data_sample}/{data_sample}_{fold}_{region}.csv')
102
+ test_path = os.path.join(data_base_dir, f"data_for_modeling/{region}_test.csv")
103
+ drop_col = ['multi_class','year']
104
+ target_col = f'{target}_class'
105
+
106
+ # 데이터 λ‘œλ“œ
107
+ region_dat = preprocessing(pd.read_csv(dat_path, index_col=0))
108
+ if data_sample == 'pure':
109
+ region_train = region_dat.loc[~region_dat['year'].isin([2021-fold]), :]
110
+ else:
111
+ region_train = preprocessing(pd.read_csv(train_path))
112
+ region_val = region_dat.loc[region_dat['year'].isin([2021-fold]), :]
113
+ region_test = preprocessing(pd.read_csv(test_path))
114
+
115
+ # 컬럼 μ •λ ¬ (일관성 μœ μ§€)
116
+ common_columns = region_train.columns.to_list()
117
+ train_data = region_train[common_columns]
118
+ val_data = region_val[common_columns]
119
+ test_data = region_test[common_columns]
120
+
121
+ # μ„€λͺ…λ³€μˆ˜ & νƒ€κ²Ÿ 뢄리
122
+ X_train = train_data.drop(columns=drop_col)
123
+ y_train = train_data[target_col]
124
+ X_val = val_data.drop(columns=drop_col)
125
+ y_val = val_data[target_col]
126
+ X_test = test_data.drop(columns=drop_col)
127
+ y_test = test_data[target_col]
128
+
129
+ # λ²”μ£Όν˜• & μ—°μ†ν˜• λ³€μˆ˜ 뢄리
130
+ categorical_cols = X_train.select_dtypes(include=['object', 'category', 'int64']).columns
131
+ numerical_cols = X_train.select_dtypes(include=['float64']).columns
132
+
133
+ # λ²”μ£Όν˜• λ³€μˆ˜ Label Encoding
134
+ label_encoders = {}
135
+ for col in categorical_cols:
136
+ le = LabelEncoder()
137
+ le.fit(X_train[col]) # Train 데이터 κΈ°μ€€μœΌλ‘œ ν•™μŠ΅
138
+ label_encoders[col] = le
139
+
140
+ # λ³€ν™˜ 적용
141
+ for col in categorical_cols:
142
+ X_train[col] = label_encoders[col].transform(X_train[col])
143
+ X_val[col] = label_encoders[col].transform(X_val[col])
144
+ X_test[col] = label_encoders[col].transform(X_test[col])
145
+
146
+ # μ—°μ†ν˜• λ³€μˆ˜ Quantile Transformation
147
+ scaler = QuantileTransformer(output_distribution='normal')
148
+ scaler.fit(X_train[numerical_cols]) # Train 데이터 κΈ°μ€€μœΌλ‘œ ν•™μŠ΅
149
+
150
+ # λ³€ν™˜ 적용
151
+ X_train[numerical_cols] = scaler.transform(X_train[numerical_cols])
152
+ X_val[numerical_cols] = scaler.transform(X_val[numerical_cols])
153
+ X_test[numerical_cols] = scaler.transform(X_test[numerical_cols])
154
+
155
+ return X_train, X_val, X_test, y_train, y_val, y_test, categorical_cols, numerical_cols
156
+
157
+
158
+
159
+ # 데이터 λ³€ν™˜ 및 dataloader 생성 ν•¨μˆ˜
160
+ def prepare_dataloader(region, data_sample='pure', target='multi', fold=3, random_state=None):
161
+
162
+ # 파일 μœ„μΉ˜ 기반으둜 데이터 디렉토리 경둜 μ„€μ •
163
+ current_file_dir = os.path.dirname(os.path.abspath(__file__))
164
+ data_base_dir = os.path.abspath(os.path.join(current_file_dir, '../../../data'))
165
+
166
+ # 데이터 경둜 μ§€μ •
167
+ dat_path = os.path.join(data_base_dir, f"data_for_modeling/{region}_train.csv")
168
+ if data_sample == 'pure':
169
+ train_path = dat_path
170
+ else:
171
+ train_path = os.path.join(data_base_dir, f'data_oversampled/{data_sample}/{data_sample}_{fold}_{region}.csv')
172
+ test_path = os.path.join(data_base_dir, f"data_for_modeling/{region}_test.csv")
173
+ drop_col = ['multi_class','year']
174
+ target_col = f'{target}_class'
175
+
176
+ # 데이터 λ‘œλ“œ
177
+ region_dat = preprocessing(pd.read_csv(dat_path, index_col=0))
178
+ if data_sample == 'pure':
179
+ region_train = region_dat.loc[~region_dat['year'].isin([2021-fold]), :]
180
+ else:
181
+ region_train = preprocessing(pd.read_csv(train_path))
182
+ region_val = region_dat.loc[region_dat['year'].isin([2021-fold]), :]
183
+ region_test = preprocessing(pd.read_csv(test_path))
184
+
185
+ # 컬럼 μ •λ ¬ (일관성 μœ μ§€)
186
+ common_columns = region_train.columns.to_list()
187
+ train_data = region_train[common_columns]
188
+ val_data = region_val[common_columns]
189
+ test_data = region_test[common_columns]
190
+
191
+ # μ„€λͺ…λ³€μˆ˜ & νƒ€κ²Ÿ 뢄리
192
+ X_train = train_data.drop(columns=drop_col)
193
+ y_train = train_data[target_col]
194
+ X_val = val_data.drop(columns=drop_col)
195
+ y_val = val_data[target_col]
196
+ X_test = test_data.drop(columns=drop_col)
197
+ y_test = test_data[target_col]
198
+
199
+ # λ²”μ£Όν˜• & μ—°μ†ν˜• λ³€μˆ˜ 뢄리
200
+ categorical_cols = X_train.select_dtypes(include=['object', 'category', 'int64']).columns
201
+ numerical_cols = X_train.select_dtypes(include=['float64']).columns
202
+
203
+ # λ²”μ£Όν˜• λ³€μˆ˜ Label Encoding
204
+ label_encoders = {}
205
+ for col in categorical_cols:
206
+ le = LabelEncoder()
207
+ le.fit(X_train[col]) # Train 데이터 κΈ°μ€€μœΌλ‘œ ν•™μŠ΅
208
+ label_encoders[col] = le
209
+
210
+ # λ³€ν™˜ 적용
211
+ for col in categorical_cols:
212
+ X_train[col] = label_encoders[col].transform(X_train[col])
213
+ X_val[col] = label_encoders[col].transform(X_val[col])
214
+ X_test[col] = label_encoders[col].transform(X_test[col])
215
+
216
+ # μ—°μ†ν˜• λ³€μˆ˜ Quantile Transformation
217
+ scaler = QuantileTransformer(output_distribution='normal')
218
+ scaler.fit(X_train[numerical_cols]) # Train 데이터 κΈ°μ€€μœΌλ‘œ ν•™μŠ΅
219
+
220
+ # λ³€ν™˜ 적용
221
+ X_train[numerical_cols] = scaler.transform(X_train[numerical_cols])
222
+ X_val[numerical_cols] = scaler.transform(X_val[numerical_cols])
223
+ X_test[numerical_cols] = scaler.transform(X_test[numerical_cols])
224
+
225
+ # μ—°μ†ν˜• λ³€μˆ˜μ™€ λ²”μ£Όν˜• λ³€μˆ˜ 뢄리
226
+ X_train_num = torch.tensor(X_train[numerical_cols].values, dtype=torch.float32)
227
+ X_train_cat = torch.tensor(X_train[categorical_cols].values, dtype=torch.long)
228
+
229
+ X_val_num = torch.tensor(X_val[numerical_cols].values, dtype=torch.float32)
230
+ X_val_cat = torch.tensor(X_val[categorical_cols].values, dtype=torch.long)
231
+
232
+ X_test_num = torch.tensor(X_test[numerical_cols].values, dtype=torch.float32)
233
+ X_test_cat = torch.tensor(X_test[categorical_cols].values, dtype=torch.long)
234
+
235
+ # λ ˆμ΄λΈ” λ³€ν™˜
236
+ if target == "binary":
237
+ y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32) # 이진 λΆ„λ₯˜ β†’ float32
238
+ y_val_tensor = torch.tensor(y_val.values, dtype=torch.float32)
239
+ y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32)
240
+ elif target == "multi":
241
+ y_train_tensor = torch.tensor(y_train.values, dtype=torch.long) # 닀쀑 λΆ„λ₯˜ β†’ long
242
+ y_val_tensor = torch.tensor(y_val.values, dtype=torch.long)
243
+ y_test_tensor = torch.tensor(y_test.values, dtype=torch.long)
244
+ else:
245
+ raise ValueError("target must be 'binary' or 'multi'")
246
+
247
+ # TensorDataset 생성
248
+ train_dataset = TensorDataset(X_train_num, X_train_cat, y_train_tensor)
249
+ val_dataset = TensorDataset(X_val_num, X_val_cat, y_val_tensor)
250
+ test_dataset = TensorDataset(X_test_num, X_test_cat, y_test_tensor)
251
+
252
+ # DataLoader 생성
253
+ if random_state == None:
254
+ train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
255
+ else:
256
+ train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, generator=torch.Generator().manual_seed(random_state))
257
+ val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)
258
+ test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)
259
+
260
+ return X_train, categorical_cols, numerical_cols, train_loader, val_loader, test_loader
261
+
262
+ # 데이터 λ³€ν™˜ 및 dataloader 생성 ν•¨μˆ˜ (batch_size νŒŒλΌλ―Έν„° μΆ”κ°€ 버전)
263
+ def prepare_dataloader_with_batchsize(region, data_sample='pure', target='multi', fold=3, random_state=None, batch_size=64):
264
+ # 파일 μœ„μΉ˜ 기반으둜 데이터 디렉토리 경둜 μ„€μ •
265
+ current_file_dir = os.path.dirname(os.path.abspath(__file__))
266
+ data_base_dir = os.path.abspath(os.path.join(current_file_dir, '../../../data'))
267
+
268
+ # 데이터 경둜 μ§€μ •
269
+ dat_path = os.path.join(data_base_dir, f"data_for_modeling/{region}_train.csv")
270
+ if data_sample == 'pure':
271
+ train_path = dat_path
272
+ else:
273
+ train_path = os.path.join(data_base_dir, f'data_oversampled/{data_sample}/{data_sample}_{fold}_{region}.csv')
274
+ test_path = os.path.join(data_base_dir, f"data_for_modeling/{region}_test.csv")
275
+ drop_col = ['multi_class','year']
276
+ target_col = f'{target}_class'
277
+
278
+ # 데이터 λ‘œλ“œ
279
+ region_dat = preprocessing(pd.read_csv(dat_path, index_col=0))
280
+ if data_sample == 'pure':
281
+ region_train = region_dat.loc[~region_dat['year'].isin([2021-fold]), :]
282
+ else:
283
+ region_train = preprocessing(pd.read_csv(train_path))
284
+ region_val = region_dat.loc[region_dat['year'].isin([2021-fold]), :]
285
+ region_test = preprocessing(pd.read_csv(test_path))
286
+
287
+ # 컬럼 μ •λ ¬ (일관성 μœ μ§€)
288
+ common_columns = region_train.columns.to_list()
289
+ train_data = region_train[common_columns]
290
+ val_data = region_val[common_columns]
291
+ test_data = region_test[common_columns]
292
+
293
+ # μ„€λͺ…λ³€μˆ˜ & νƒ€κ²Ÿ 뢄리
294
+ X_train = train_data.drop(columns=drop_col)
295
+ y_train = train_data[target_col]
296
+ X_val = val_data.drop(columns=drop_col)
297
+ y_val = val_data[target_col]
298
+ X_test = test_data.drop(columns=drop_col)
299
+ y_test = test_data[target_col]
300
+
301
+ # λ²”μ£Όν˜• & μ—°μ†ν˜• λ³€μˆ˜ 뢄리
302
+ categorical_cols = X_train.select_dtypes(include=['object', 'category', 'int64']).columns
303
+ numerical_cols = X_train.select_dtypes(include=['float64']).columns
304
+
305
+ # λ²”μ£Όν˜• λ³€μˆ˜ Label Encoding
306
+ label_encoders = {}
307
+ for col in categorical_cols:
308
+ le = LabelEncoder()
309
+ le.fit(X_train[col]) # Train 데이터 κΈ°μ€€μœΌλ‘œ ν•™μŠ΅
310
+ label_encoders[col] = le
311
+
312
+ # λ³€ν™˜ 적용
313
+ for col in categorical_cols:
314
+ X_train[col] = label_encoders[col].transform(X_train[col])
315
+ X_val[col] = label_encoders[col].transform(X_val[col])
316
+ X_test[col] = label_encoders[col].transform(X_test[col])
317
+
318
+ # μ—°μ†ν˜• λ³€μˆ˜ Quantile Transformation
319
+ scaler = QuantileTransformer(output_distribution='normal')
320
+ scaler.fit(X_train[numerical_cols]) # Train 데이터 κΈ°μ€€μœΌλ‘œ ν•™μŠ΅
321
+
322
+ # λ³€ν™˜ 적용
323
+ X_train[numerical_cols] = scaler.transform(X_train[numerical_cols])
324
+ X_val[numerical_cols] = scaler.transform(X_val[numerical_cols])
325
+ X_test[numerical_cols] = scaler.transform(X_test[numerical_cols])
326
+
327
+ # μ—°μ†ν˜• λ³€μˆ˜μ™€ λ²”μ£Όν˜• λ³€μˆ˜ 뢄리
328
+ X_train_num = torch.tensor(X_train[numerical_cols].values, dtype=torch.float32)
329
+ X_train_cat = torch.tensor(X_train[categorical_cols].values, dtype=torch.long)
330
+
331
+ X_val_num = torch.tensor(X_val[numerical_cols].values, dtype=torch.float32)
332
+ X_val_cat = torch.tensor(X_val[categorical_cols].values, dtype=torch.long)
333
+
334
+ X_test_num = torch.tensor(X_test[numerical_cols].values, dtype=torch.float32)
335
+ X_test_cat = torch.tensor(X_test[categorical_cols].values, dtype=torch.long)
336
+
337
+ # λ ˆμ΄λΈ” λ³€ν™˜
338
+ if target == "binary":
339
+ y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32) # 이진 λΆ„λ₯˜ β†’ float32
340
+ y_val_tensor = torch.tensor(y_val.values, dtype=torch.float32)
341
+ y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32)
342
+ elif target == "multi":
343
+ y_train_tensor = torch.tensor(y_train.values, dtype=torch.long) # 닀쀑 λΆ„λ₯˜ β†’ long
344
+ y_val_tensor = torch.tensor(y_val.values, dtype=torch.long)
345
+ y_test_tensor = torch.tensor(y_test.values, dtype=torch.long)
346
+ else:
347
+ raise ValueError("target must be 'binary' or 'multi'")
348
+
349
+ # TensorDataset 생성
350
+ train_dataset = TensorDataset(X_train_num, X_train_cat, y_train_tensor)
351
+ val_dataset = TensorDataset(X_val_num, X_val_cat, y_val_tensor)
352
+ test_dataset = TensorDataset(X_test_num, X_test_cat, y_test_tensor)
353
+
354
+ # DataLoader 생성 (batch_size νŒŒλΌλ―Έν„° μ‚¬μš©)
355
+ if random_state == None:
356
+ train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
357
+ else:
358
+ train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, generator=torch.Generator().manual_seed(random_state))
359
+ val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
360
+ test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
361
+
362
+ return X_train, categorical_cols, numerical_cols, train_loader, val_loader, test_loader, y_train, scaler
363
+
364
+
365
+ def calculate_csi(y_true, pred):
366
+
367
+ cm = confusion_matrix(y_true, pred) # λ³€μˆ˜ 이름을 cm으둜 λ³€κ²½
368
+ # ν˜Όλ™ ν–‰λ ¬μ—μ„œ H, F, M μΆ”μΆœ
369
+ H = (cm[0, 0] + cm[1, 1])
370
+
371
+ F = (cm[1, 0] + cm[2, 0] +
372
+ cm[0, 1] + cm[2, 1])
373
+
374
+ M = (cm[0, 2] + cm[1, 2])
375
+
376
+ # CSI 계산
377
+ CSI = H / (H + F + M + 1e-10)
378
+ return CSI
379
+
380
+ def sample_weight(y_train):
381
+ class_weights = compute_class_weight(
382
+ class_weight='balanced',
383
+ classes=np.unique(y_train), # 고유 클래슀
384
+ y=y_train # ν•™μŠ΅ 데이터 λ ˆμ΄λΈ”
385
+ )
386
+ sample_weights = np.array([class_weights[label] for label in y_train])
387
+
388
+ return sample_weights
389
+
390
+ # ν•˜μ΄νΌνŒŒλΌλ―Έν„° μ΅œμ ν™” ν•¨μˆ˜ μ •μ˜
391
+ def objective(trial, model_choose, region, data_sample='pure', target='multi', n_folds=3, random_state=42):
392
+ # GPU μ‚¬μš© κ°€λŠ₯ μ—¬λΆ€ 확인 및 device μ„€μ •
393
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
394
+ val_scores = []
395
+
396
+ # --- 1. ν•˜μ΄νΌνŒŒλΌλ―Έν„° 탐색 λ²”μœ„ μ •μ˜ (μˆ˜μ •λ¨) ---
397
+ if model_choose == "ft_transformer":
398
+ d_token = trial.suggest_int("d_token", 64, 256, step=32)
399
+ n_blocks = trial.suggest_int("n_blocks", 2, 6) # 깊이 μΆ•μ†Œλ‘œ 과적합 λ°©μ§€
400
+ n_heads = trial.suggest_categorical("n_heads", [4, 8])
401
+ # d_token은 n_heads의 λ°°μˆ˜μ—¬μ•Ό 함 (FT-Transformer의 ꡬ쑰적 μ œμ•½ λŒ€μ‘)
402
+ if d_token % n_heads != 0:
403
+ d_token = (d_token // n_heads) * n_heads
404
+
405
+ attention_dropout = trial.suggest_float("attention_dropout", 0.1, 0.4)
406
+ ffn_dropout = trial.suggest_float("ffn_dropout", 0.1, 0.4)
407
+ lr = trial.suggest_float("lr", 1e-5, 1e-2, log=True) # λ²”μœ„ ν™•λŒ€
408
+ weight_decay = trial.suggest_float("weight_decay", 1e-4, 1e-1, log=True) # 더 곡격적인 λ²”μœ„λ‘œ ν™•μž₯
409
+ batch_size = trial.suggest_categorical("batch_size", [32, 64, 128, 256]) # Batch Size μΆ”κ°€
410
+
411
+ elif model_choose == 'resnet_like':
412
+ d_main = trial.suggest_int("d_main", 64, 256, step=32)
413
+ d_hidden = trial.suggest_int("d_hidden", 64, 512, step=64)
414
+ n_blocks = trial.suggest_int("n_blocks", 2, 5) # λ„ˆλ¬΄ κΉŠμ§€ μ•Šκ²Œ 쑰절
415
+ dropout_first = trial.suggest_float("dropout_first", 0.1, 0.4)
416
+ dropout_second = trial.suggest_float("dropout_second", 0.0, 0.2)
417
+ lr = trial.suggest_float("lr", 1e-5, 1e-2, log=True)
418
+ weight_decay = trial.suggest_float("weight_decay", 1e-4, 1e-1, log=True) # 더 곡격적인 λ²”μœ„λ‘œ ν™•μž₯
419
+ batch_size = trial.suggest_categorical("batch_size", [32, 64, 128, 256]) # Batch Size μΆ”κ°€
420
+
421
+ elif model_choose == 'deepgbm':
422
+ # DeepGBM의 경우 λͺ¨λΈ νŠΉμ„±μ— 맞좰 ResNet 블둝 및 μž„λ² λ”© 차원 쑰절
423
+ d_main = trial.suggest_int("d_main", 64, 256, step=32)
424
+ d_hidden = trial.suggest_int("d_hidden", 64, 256, step=64)
425
+ n_blocks = trial.suggest_int("n_blocks", 2, 6)
426
+ dropout = trial.suggest_float("dropout", 0.1, 0.4)
427
+ lr = trial.suggest_float("lr", 1e-5, 1e-2, log=True)
428
+ weight_decay = trial.suggest_float("weight_decay", 1e-4, 1e-1, log=True) # 더 곡격적인 λ²”μœ„λ‘œ ν™•μž₯
429
+ batch_size = trial.suggest_categorical("batch_size", [32, 64, 128, 256]) # Batch Size μΆ”κ°€
430
+
431
+ # --- 2. Fold별 ν•™μŠ΅ 및 ꡐ차 검증 ---
432
+ for fold in range(1, n_folds + 1):
433
+ X_train_df, categorical_cols, numerical_cols, train_loader, val_loader, _, y_train, _ = prepare_dataloader_with_batchsize(
434
+ region, data_sample=data_sample, target=target, fold=fold, random_state=random_state, batch_size=batch_size
435
+ )
436
+
437
+ # λͺ¨λΈ μ΄ˆκΈ°ν™”
438
+ if model_choose == "ft_transformer":
439
+ model = FTTransformer(
440
+ num_features=len(numerical_cols),
441
+ cat_cardinalities=[len(X_train_df[col].unique()) for col in categorical_cols],
442
+ d_token=d_token,
443
+ n_blocks=n_blocks,
444
+ n_heads=n_heads,
445
+ attention_dropout=attention_dropout,
446
+ ffn_dropout=ffn_dropout,
447
+ num_classes=3
448
+ ).to(device)
449
+ elif model_choose == 'resnet_like':
450
+ input_dim = len(numerical_cols) + len(categorical_cols)
451
+ model = ResNetLike(
452
+ input_dim=input_dim,
453
+ d_main=d_main,
454
+ d_hidden=d_hidden,
455
+ n_blocks=n_blocks,
456
+ dropout_first=dropout_first,
457
+ dropout_second=dropout_second,
458
+ num_classes=3
459
+ ).to(device)
460
+ elif model_choose == 'deepgbm':
461
+ model = DeepGBM(
462
+ num_features=len(numerical_cols),
463
+ cat_features=[len(X_train_df[col].unique()) for col in categorical_cols],
464
+ d_main=d_main,
465
+ d_hidden=d_hidden,
466
+ n_blocks=n_blocks,
467
+ dropout=dropout,
468
+ num_classes=3
469
+ ).to(device)
470
+
471
+ # 클래슀 κ°€μ€‘μΉ˜ 계산 및 손싀 ν•¨μˆ˜ μ„€μ • (Label Smoothing 적용)
472
+ if target == 'multi':
473
+ class_weights = compute_class_weight(
474
+ class_weight='balanced',
475
+ classes=np.unique(y_train),
476
+ y=y_train
477
+ )
478
+ # ν΄λž˜μŠ€λ³„ κ°€μ€‘μΉ˜ 둜그 좜λ ₯
479
+ unique_classes = np.unique(y_train)
480
+ class_counts = {cls: np.sum(y_train == cls) for cls in unique_classes}
481
+ print(f" Fold {fold} - ν΄λž˜μŠ€λ³„ κ°€μ€‘μΉ˜: {dict(zip(unique_classes, class_weights))} (ν΄λž˜μŠ€λ³„ μƒ˜ν”Œ 수: {class_counts})")
482
+ class_weights_tensor = torch.tensor(class_weights, dtype=torch.float32).to(device)
483
+ criterion = nn.CrossEntropyLoss(weight=class_weights_tensor, label_smoothing=0.0) # Label Smoothing μΆ”κ°€
484
+ else:
485
+ criterion = nn.BCEWithLogitsLoss()
486
+ optimizer = optim.AdamW(model.parameters(), lr=lr, weight_decay=weight_decay)
487
+
488
+ # ν•™μŠ΅λ₯  μŠ€μΌ€μ€„λŸ¬ μΆ”κ°€: μ„±λŠ₯ 정체 μ‹œ LR을 0.5λ°° κ°μ†Œ (검증 CSI κΈ°μ€€)
489
+ scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=3)
490
+
491
+ # ν•™μŠ΅ μ„€μ • (에폭 및 νŽ˜μ΄μ…˜μŠ€ 상ν–₯)
492
+ epochs = 200
493
+ patience = 12 # λ”₯λŸ¬λ‹μ˜ 정체 ꡬ간을 κ³ λ €ν•˜μ—¬ μ†Œν­ 상ν–₯
494
+ best_fold_csi = 0
495
+ counter = 0
496
+
497
+ for epoch in range(epochs):
498
+ model.train()
499
+ for x_num_batch, x_cat_batch, y_batch in train_loader:
500
+ x_num_batch, x_cat_batch, y_batch = x_num_batch.to(device), x_cat_batch.to(device), y_batch.to(device)
501
+
502
+ optimizer.zero_grad()
503
+ y_pred = model(x_num_batch, x_cat_batch)
504
+ loss = criterion(y_pred, y_batch if target == 'multi' else y_batch.float())
505
+ loss.backward()
506
+ optimizer.step()
507
+
508
+ # Validation 평가
509
+ model.eval()
510
+ y_pred_val, y_true_val = [], []
511
+ with torch.no_grad():
512
+ for x_num_batch, x_cat_batch, y_batch in val_loader:
513
+ x_num_batch, x_cat_batch, y_batch = x_num_batch.to(device), x_cat_batch.to(device), y_batch.to(device)
514
+ output = model(x_num_batch, x_cat_batch)
515
+ pred = output.argmax(dim=1) if target == 'multi' else (torch.sigmoid(output) >= 0.5).long()
516
+
517
+ y_pred_val.extend(pred.cpu().numpy())
518
+ y_true_val.extend(y_batch.cpu().numpy())
519
+
520
+ # CSI 계산 및 μŠ€μΌ€μ€„λŸ¬ μ—…λ°μ΄νŠΈ
521
+ val_csi = calculate_csi(y_true_val, y_pred_val)
522
+ scheduler.step(val_csi)
523
+
524
+ # Optuna Pruning 적용 (첫 번째 Foldμ—μ„œ μ‘°κΈ° μ’…λ£Œ νŒλ‹¨ κ°•ν™”)
525
+ trial.report(val_csi, epoch)
526
+ if trial.should_prune():
527
+ raise optuna.exceptions.TrialPruned()
528
+
529
+ # Early Stopping 체크
530
+ if val_csi > best_fold_csi:
531
+ best_fold_csi = val_csi
532
+ counter = 0
533
+ else:
534
+ counter += 1
535
+
536
+ if counter >= patience:
537
+ break
538
+
539
+ val_scores.append(best_fold_csi)
540
+
541
+ # λͺ¨λ“  fold의 평균 μ„±λŠ₯ λ°˜ν™˜
542
+ return np.mean(val_scores)
543
+
544
+
545
+ # μ΅œμ ν™”λœ ν•˜μ΄νΌνŒŒλΌλ―Έν„°λ‘œ μ΅œμ’… λͺ¨λΈ ν•™μŠ΅ 및 μ €μž₯ ν•¨μˆ˜
546
+ def train_final_model(best_params, model_choose, region, data_sample='pure', target='multi', n_folds=3, random_state=42):
547
+ """
548
+ μ΅œμ ν™”λœ ν•˜μ΄νΌνŒŒλΌλ―Έν„°λ‘œ μ΅œμ’… λͺ¨λΈμ„ ν•™μŠ΅ν•˜κ³  μ €μž₯ν•©λ‹ˆλ‹€.
549
+
550
+ Args:
551
+ best_params: μ΅œμ ν™”λœ ν•˜μ΄νΌνŒŒλΌλ―Έν„° λ”•μ…”λ„ˆλ¦¬
552
+ model_choose: λͺ¨λΈ 선택 ('ft_transformer', 'resnet_like', 'deepgbm')
553
+ region: μ§€μ—­λͺ…
554
+ data_sample: 데이터 μƒ˜ν”Œ νƒ€μž… ('pure', 'smote', etc.)
555
+ target: νƒ€κ²Ÿ νƒ€μž… ('multi', 'binary')
556
+ n_folds: ꡐ차 검증 fold 수
557
+ random_state: 랜덀 μ‹œλ“œ
558
+
559
+ Returns:
560
+ μ €μž₯된 λͺ¨λΈ 경둜 리슀트
561
+ """
562
+ # GPU μ‚¬μš© κ°€λŠ₯ μ—¬λΆ€ 확인 및 device μ„€μ •
563
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
564
+
565
+ models = []
566
+ scalers = [] # scaler 리슀트 μΆ”κ°€
567
+
568
+ print("μ΅œμ’… λͺ¨λΈ ν•™μŠ΅ μ‹œμž‘...")
569
+
570
+ for fold in range(1, n_folds + 1):
571
+ print(f"Fold {fold} ν•™μŠ΅ 쀑...")
572
+
573
+ # μ΅œμ ν™”λœ batch_size μ‚¬μš©
574
+ batch_size = best_params.get("batch_size", 64)
575
+ X_train_df, categorical_cols, numerical_cols, train_loader, val_loader, _, y_train, scaler = prepare_dataloader_with_batchsize(
576
+ region, data_sample=data_sample, target=target, fold=fold, random_state=random_state, batch_size=batch_size
577
+ )
578
+
579
+ # λͺ¨λΈ μ΄ˆκΈ°ν™”
580
+ if model_choose == "ft_transformer":
581
+ d_token = best_params["d_token"]
582
+ n_heads = best_params.get("n_heads", 8)
583
+ # d_token은 n_heads의 λ°°μˆ˜μ—¬μ•Ό 함 (FT-Transformer의 ꡬ쑰적 μ œμ•½ λŒ€μ‘)
584
+ if d_token % n_heads != 0:
585
+ d_token = (d_token // n_heads) * n_heads
586
+
587
+ model = FTTransformer(
588
+ num_features=len(numerical_cols),
589
+ cat_cardinalities=[len(X_train_df[col].unique()) for col in categorical_cols],
590
+ d_token=d_token,
591
+ n_blocks=best_params["n_blocks"],
592
+ n_heads=n_heads,
593
+ attention_dropout=best_params["attention_dropout"],
594
+ ffn_dropout=best_params["ffn_dropout"],
595
+ num_classes=3
596
+ ).to(device)
597
+ elif model_choose == 'resnet_like':
598
+ input_dim = len(numerical_cols) + len(categorical_cols)
599
+ model = ResNetLike(
600
+ input_dim=input_dim,
601
+ d_main=best_params["d_main"],
602
+ d_hidden=best_params["d_hidden"],
603
+ n_blocks=best_params["n_blocks"],
604
+ dropout_first=best_params["dropout_first"],
605
+ dropout_second=best_params["dropout_second"],
606
+ num_classes=3
607
+ ).to(device)
608
+ elif model_choose == 'deepgbm':
609
+ model = DeepGBM(
610
+ num_features=len(numerical_cols),
611
+ cat_features=[len(X_train_df[col].unique()) for col in categorical_cols],
612
+ d_main=best_params["d_main"],
613
+ d_hidden=best_params["d_hidden"],
614
+ n_blocks=best_params["n_blocks"],
615
+ dropout=best_params["dropout"],
616
+ num_classes=3
617
+ ).to(device)
618
+ else:
619
+ raise ValueError(f"Unknown model_choose: {model_choose}")
620
+
621
+ # 클래슀 κ°€μ€‘μΉ˜ 계산 및 손싀 ν•¨μˆ˜ μ„€μ • (Label Smoothing 적용)
622
+ if target == 'multi':
623
+ class_weights = compute_class_weight(
624
+ class_weight='balanced',
625
+ classes=np.unique(y_train),
626
+ y=y_train
627
+ )
628
+ class_weights_tensor = torch.tensor(class_weights, dtype=torch.float32).to(device)
629
+ criterion = nn.CrossEntropyLoss(weight=class_weights_tensor, label_smoothing=0.0) # Label Smoothing μΆ”κ°€
630
+ else:
631
+ criterion = nn.BCEWithLogitsLoss()
632
+ optimizer = optim.AdamW(model.parameters(), lr=best_params["lr"], weight_decay=best_params["weight_decay"])
633
+
634
+ # ν•™μŠ΅λ₯  μŠ€μΌ€μ€„λŸ¬
635
+ scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=3)
636
+
637
+ # ν•™μŠ΅ μ„€μ •
638
+ epochs = 200
639
+ patience = 12
640
+ best_fold_csi = 0
641
+ counter = 0
642
+ best_model = None
643
+
644
+ for epoch in range(epochs):
645
+ model.train()
646
+ for x_num_batch, x_cat_batch, y_batch in train_loader:
647
+ x_num_batch, x_cat_batch, y_batch = x_num_batch.to(device), x_cat_batch.to(device), y_batch.to(device)
648
+
649
+ optimizer.zero_grad()
650
+ y_pred = model(x_num_batch, x_cat_batch)
651
+ loss = criterion(y_pred, y_batch if target == 'multi' else y_batch.float())
652
+ loss.backward()
653
+ optimizer.step()
654
+
655
+ # Validation 평가
656
+ model.eval()
657
+ y_pred_val, y_true_val = [], []
658
+ with torch.no_grad():
659
+ for x_num_batch, x_cat_batch, y_batch in val_loader:
660
+ x_num_batch, x_cat_batch, y_batch = x_num_batch.to(device), x_cat_batch.to(device), y_batch.to(device)
661
+ output = model(x_num_batch, x_cat_batch)
662
+ pred = output.argmax(dim=1) if target == 'multi' else (torch.sigmoid(output) >= 0.5).long()
663
+
664
+ y_pred_val.extend(pred.cpu().numpy())
665
+ y_true_val.extend(y_batch.cpu().numpy())
666
+
667
+ # CSI 계산 및 μŠ€μΌ€μ€„λŸ¬ μ—…λ°μ΄νŠΈ
668
+ val_csi = calculate_csi(y_true_val, y_pred_val)
669
+ scheduler.step(val_csi)
670
+
671
+ # Early Stopping 체크
672
+ if val_csi > best_fold_csi:
673
+ best_fold_csi = val_csi
674
+ counter = 0
675
+ best_model = copy.deepcopy(model)
676
+ else:
677
+ counter += 1
678
+
679
+ if counter >= patience:
680
+ print(f" Early stopping at epoch {epoch+1}, Best CSI: {best_fold_csi:.4f}")
681
+ break
682
+
683
+ if best_model is None:
684
+ best_model = model
685
+
686
+ scalers.append(scaler) # scaler μ €μž₯ (fold μˆœμ„œλŒ€λ‘œ)
687
+ models.append(best_model)
688
+ print(f" Fold {fold} ν•™μŠ΅ μ™„λ£Œ (검증 CSI: {best_fold_csi:.4f})")
689
+
690
+ # λͺ¨λΈ μ €μž₯ 경둜 μ„€μ •
691
+ save_dir = f'../save_model/{model_choose}_optima'
692
+ os.makedirs(save_dir, exist_ok=True)
693
+
694
+ # 파일λͺ… 생성
695
+ if data_sample == 'pure':
696
+ model_filename = f'{model_choose}_pure_{region}.pkl'
697
+ else:
698
+ model_filename = f'{model_choose}_{data_sample}_{region}.pkl'
699
+
700
+ model_path = f'{save_dir}/{model_filename}'
701
+
702
+ # λ¦¬μŠ€νŠΈμ— λ‹΄μ•„ ν•œ λ²ˆμ— μ €μž₯
703
+ joblib.dump(models, model_path)
704
+ print(f"\nλͺ¨λ“  λͺ¨λΈ μ €μž₯ μ™„λ£Œ: {model_path} (총 {len(models)}개 fold)")
705
+
706
+ # Scaler 별도 μ €μž₯
707
+ scaler_save_dir = f'../save_model/{model_choose}_optima/scaler'
708
+ os.makedirs(scaler_save_dir, exist_ok=True)
709
+
710
+ # 파일λͺ… 생성 (λͺ¨λΈκ³Ό λ™μΌν•œ νŒ¨ν„΄)
711
+ if data_sample == 'pure':
712
+ scaler_filename = f'{model_choose}_pure_{region}_scaler.pkl'
713
+ else:
714
+ scaler_filename = f'{model_choose}_{data_sample}_{region}_scaler.pkl'
715
+
716
+ scaler_path = f'{scaler_save_dir}/{scaler_filename}'
717
+ joblib.dump(scalers, scaler_path)
718
+ print(f"Scaler μ €μž₯ μ™„λ£Œ: {scaler_path} (총 {len(scalers)}개 fold)")
719
+
720
+ return model_path
721
+
Analysis_code/5.optima/ft_transformer_ctgan10000/ft_transformer_ctgan10000_busan.py ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import optuna
2
+ import numpy as np
3
+ import random
4
+ import pandas as pd
5
+ import joblib
6
+ import os
7
+ import torch
8
+ from utils import *
9
+ # Python 및 Numpy μ‹œλ“œ κ³ μ •
10
+ seed = 42
11
+ random.seed(seed)
12
+ np.random.seed(seed)
13
+
14
+
15
+ # Trial μ™„λ£Œ μ‹œ 상세 정보 좜λ ₯ν•˜λŠ” callback ν•¨μˆ˜
16
+ def print_trial_callback(study, trial):
17
+ """각 trial μ™„λ£Œ μ‹œ best valueλ₯Ό ν¬ν•¨ν•œ 상세 정보 좜λ ₯"""
18
+ print(f"\n{'='*80}")
19
+ print(f"Trial {trial.number} μ™„λ£Œ")
20
+ print(f" Value (CSI): {trial.value:.6f}" if trial.value is not None else f" Value: {trial.value}")
21
+ print(f" Parameters: {trial.params}")
22
+ print(f" Best Value (CSI): {study.best_value:.6f}" if study.best_value is not None else f" Best Value: {study.best_value}")
23
+ print(f" Best Trial: {study.best_trial.number}")
24
+ print(f" Best Parameters: {study.best_params}")
25
+ print(f"{'='*80}\n")
26
+
27
+
28
+ # 1. Study 생성 μ‹œ 'maximize'둜 μ„€μ •
29
+ study = optuna.create_study(
30
+ direction="maximize", # CSI μ μˆ˜κ°€ λ†’μ„μˆ˜λ‘ μ’‹μœΌλ―€λ‘œ maximize
31
+ pruner=optuna.pruners.MedianPruner(n_warmup_steps=10) # 초반 10에폭은 μ§€μΌœλ³΄κ³  이후 κ°€μ§€μΉ˜κΈ°
32
+ )
33
+
34
+ # 2. μ΅œμ ν™” μ‹€ν–‰
35
+ study.optimize(
36
+ lambda trial: objective(trial, model_choose="ft_transformer", region="busan", data_sample='ctgan10000'),
37
+ n_trials=100,
38
+ callbacks=[print_trial_callback]
39
+ )
40
+
41
+ # 3. κ²°κ³Ό 확인 및 μš”μ•½
42
+ print(f"\nμ΅œμ ν™” μ™„λ£Œ.")
43
+ print(f"Best CSI Score: {study.best_value:.4f}")
44
+ print(f"Best Hyperparameters: {study.best_params}")
45
+
46
+ try:
47
+ # λͺ¨λ“  trial의 CSI 점수 μΆ”μΆœ
48
+ csi_scores = [trial.value for trial in study.trials if trial.value is not None]
49
+
50
+ if len(csi_scores) > 0:
51
+ print(f"\nμ΅œμ ν™” κ³Όμ • μš”μ•½:")
52
+ print(f" - 총 μ‹œλ„ 횟수: {len(study.trials)}")
53
+ print(f" - μ„±κ³΅ν•œ μ‹œλ„: {len(csi_scores)}")
54
+ print(f" - 졜초 CSI: {csi_scores[0]:.4f}")
55
+ print(f" - μ΅œμ’… CSI: {csi_scores[-1]:.4f}")
56
+ print(f" - 졜고 CSI: {max(csi_scores):.4f}")
57
+ print(f" - μ΅œμ € CSI: {min(csi_scores):.4f}")
58
+ print(f" - 평균 CSI: {np.mean(csi_scores):.4f}")
59
+
60
+ # Study 객체 μ €μž₯
61
+ # 파일 μœ„μΉ˜ 기반으둜 base 디렉토리 경둜 μ„€μ •
62
+ current_file_dir = os.path.dirname(os.path.abspath(__file__))
63
+ base_dir = os.path.dirname(os.path.dirname(current_file_dir)) # 5.optima 디렉토리
64
+ os.makedirs(os.path.join(base_dir, "optimization_history"), exist_ok=True)
65
+ study_path = os.path.join(base_dir, "optimization_history/ft_transformer_ctgan10000_busan_trials.pkl")
66
+ joblib.dump(study, study_path)
67
+ print(f"\nμ΅œμ ν™” Study 객체가 {study_path}에 μ €μž₯λ˜μ—ˆμŠ΅λ‹ˆλ‹€.")
68
+
69
+ # μ΅œμ ν™”λœ ν•˜μ΄νΌνŒŒλΌλ―Έν„°λ‘œ μ΅œμ’… λͺ¨λΈ ν•™μŠ΅ 및 μ €μž₯
70
+ print("\n" + "="*50)
71
+ print("μ΅œμ ν™”λœ ν•˜μ΄νΌνŒŒλΌλ―Έν„°λ‘œ μ΅œμ’… λͺ¨λΈ ν•™μŠ΅ μ‹œμž‘")
72
+ print("="*50)
73
+
74
+ best_params = study.best_params
75
+ model_path = train_final_model(
76
+ best_params=best_params,
77
+ model_choose="ft_transformer",
78
+ region="busan",
79
+ data_sample='ctgan10000',
80
+ target='multi',
81
+ n_folds=3,
82
+ random_state=seed
83
+ )
84
+
85
+ print(f"\nμ΅œμ’… λͺ¨λΈ ν•™μŠ΅ 및 μ €μž₯ μ™„λ£Œ!")
86
+ print(f"μ €μž₯된 λͺ¨λΈ 경둜: {model_path}")
87
+
88
+ except Exception as e:
89
+ print(f"\n⚠️ μ΅œμ ν™” κ²°κ³Ό 뢄석 쀑 였λ₯˜ λ°œμƒ: {e}")
90
+ import traceback
91
+ traceback.print_exc()
92
+
93
+ # 정상 μ’…λ£Œ
94
+ import sys
95
+ sys.exit(0)
96
+
Analysis_code/5.optima/ft_transformer_ctgan10000/utils.py ADDED
@@ -0,0 +1,720 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import torch.optim as optim
4
+ import numpy as np
5
+ import random
6
+ import os
7
+ import copy
8
+ from sklearn.preprocessing import QuantileTransformer, LabelEncoder
9
+ from torch.utils.data import DataLoader, TensorDataset
10
+ from sklearn.metrics import confusion_matrix
11
+ from sklearn.utils.class_weight import compute_class_weight
12
+ import pandas as pd
13
+ import optuna
14
+ from sklearn.metrics import accuracy_score, f1_score
15
+ import joblib
16
+
17
+
18
+ import sys
19
+ # 파일 μœ„μΉ˜ 기반으둜 models 디렉토리 경둜 μ„€μ •
20
+ current_file_dir = os.path.dirname(os.path.abspath(__file__))
21
+ models_path = os.path.abspath(os.path.join(current_file_dir, '../../models'))
22
+ sys.path.insert(0, models_path)
23
+ from ft_transformer import FTTransformer
24
+ from resnet_like import ResNetLike
25
+ from deepgbm import DeepGBM
26
+ import warnings
27
+ warnings.filterwarnings('ignore')
28
+
29
+ # Python 및 Numpy μ‹œλ“œ κ³ μ •
30
+ seed = 42
31
+ random.seed(seed)
32
+ np.random.seed(seed)
33
+
34
+ # PyTorch μ‹œλ“œ κ³ μ •
35
+ torch.manual_seed(seed)
36
+ torch.cuda.manual_seed(seed)
37
+ torch.cuda.manual_seed_all(seed) # Multi-GPU ν™˜κ²½μ—μ„œ λ™μΌν•œ μ‹œλ“œ 적용
38
+
39
+ # PyTorch μ—°μ‚°μ˜ 결정적 λͺ¨λ“œ μ„€μ •
40
+ torch.backends.cudnn.deterministic = True # μ‹€ν–‰λ§ˆλ‹€ λ™μΌν•œ κ²°κ³Όλ₯Ό 보μž₯
41
+ torch.backends.cudnn.benchmark = True # μ„±λŠ₯ μ΅œμ ν™”λ₯Ό ν™œμ„±ν™” (κ°€λŠ₯ν•œ ν•œ λΉ λ₯Έ μ—°μ‚° μˆ˜ν–‰)
42
+
43
+
44
+ def add_derived_features(df: pd.DataFrame) -> pd.DataFrame:
45
+ """
46
+ μ œκ±°ν–ˆλ˜ νŒŒμƒ λ³€μˆ˜λ“€μ„ 볡ꡬ
47
+
48
+ Args:
49
+ df: λ°μ΄ν„°ν”„λ ˆμž„
50
+
51
+ Returns:
52
+ νŒŒμƒ λ³€μˆ˜κ°€ μΆ”κ°€λœ λ°μ΄ν„°ν”„λ ˆμž„
53
+ """
54
+ df = df.copy()
55
+ df['hour_sin'] = np.sin(2 * np.pi * df['hour'] / 24)
56
+ df['hour_cos'] = np.cos(2 * np.pi * df['hour'] / 24)
57
+ df['month_sin'] = np.sin(2 * np.pi * df['month'] / 12)
58
+ df['month_cos'] = np.cos(2 * np.pi * df['month'] / 12)
59
+ df['ground_temp - temp_C'] = df['groundtemp'] - df['temp_C']
60
+ return df
61
+
62
+ def preprocessing(df):
63
+ """데이터 μ „μ²˜λ¦¬ ν•¨μˆ˜.
64
+
65
+ Args:
66
+ df: 원본 λ°μ΄ν„°ν”„λ ˆμž„
67
+
68
+ Returns:
69
+ μ „μ²˜λ¦¬λœ λ°μ΄ν„°ν”„λ ˆμž„
70
+ """
71
+ df = df[df.columns].copy()
72
+ df['year'] = df['year'].astype('int')
73
+ df['month'] = df['month'].astype('int')
74
+ df['hour'] = df['hour'].astype('int')
75
+ df = add_derived_features(df).copy()
76
+ df['multi_class'] = df['multi_class'].astype('int')
77
+ df.loc[df['wind_dir']=='μ •μ˜¨', 'wind_dir'] = "0"
78
+ df['wind_dir'] = df['wind_dir'].astype('int')
79
+ df = df[['temp_C', 'precip_mm', 'wind_speed', 'wind_dir', 'hm',
80
+ 'vap_pressure', 'dewpoint_C', 'loc_pressure', 'sea_pressure',
81
+ 'solarRad', 'snow_cm', 'cloudcover', 'lm_cloudcover', 'low_cloudbase',
82
+ 'groundtemp', 'O3', 'NO2', 'PM10', 'PM25', 'year',
83
+ 'month', 'hour', 'ground_temp - temp_C', 'hour_sin', 'hour_cos',
84
+ 'month_sin', 'month_cos','multi_class']].copy()
85
+ return df
86
+
87
+
88
+ # 데이터셋 μ€€λΉ„ ν•¨μˆ˜
89
+ def prepare_dataset(region, data_sample='pure', target='multi', fold=3):
90
+
91
+ # 파일 μœ„μΉ˜ 기반으둜 데이터 디렉토리 경둜 μ„€μ •
92
+ current_file_dir = os.path.dirname(os.path.abspath(__file__))
93
+ data_base_dir = os.path.abspath(os.path.join(current_file_dir, '../../../data'))
94
+
95
+ # 데이터 경둜 μ§€μ •
96
+ dat_path = os.path.join(data_base_dir, f"data_for_modeling/{region}_train.csv")
97
+ if data_sample == 'pure':
98
+ train_path = dat_path
99
+ else:
100
+ train_path = os.path.join(data_base_dir, f'data_oversampled/{data_sample}/{data_sample}_{fold}_{region}.csv')
101
+ test_path = os.path.join(data_base_dir, f"data_for_modeling/{region}_test.csv")
102
+ drop_col = ['multi_class','year']
103
+ target_col = f'{target}_class'
104
+
105
+ # 데이터 λ‘œλ“œ
106
+ region_dat = preprocessing(pd.read_csv(dat_path, index_col=0))
107
+ if data_sample == 'pure':
108
+ region_train = region_dat.loc[~region_dat['year'].isin([2021-fold]), :]
109
+ else:
110
+ region_train = preprocessing(pd.read_csv(train_path))
111
+ region_val = region_dat.loc[region_dat['year'].isin([2021-fold]), :]
112
+ region_test = preprocessing(pd.read_csv(test_path))
113
+
114
+ # 컬럼 μ •λ ¬ (일관성 μœ μ§€)
115
+ common_columns = region_train.columns.to_list()
116
+ train_data = region_train[common_columns]
117
+ val_data = region_val[common_columns]
118
+ test_data = region_test[common_columns]
119
+
120
+ # μ„€λͺ…λ³€μˆ˜ & νƒ€κ²Ÿ 뢄리
121
+ X_train = train_data.drop(columns=drop_col)
122
+ y_train = train_data[target_col]
123
+ X_val = val_data.drop(columns=drop_col)
124
+ y_val = val_data[target_col]
125
+ X_test = test_data.drop(columns=drop_col)
126
+ y_test = test_data[target_col]
127
+
128
+ # λ²”μ£Όν˜• & μ—°μ†ν˜• λ³€μˆ˜ 뢄리
129
+ categorical_cols = X_train.select_dtypes(include=['object', 'category', 'int64']).columns
130
+ numerical_cols = X_train.select_dtypes(include=['float64']).columns
131
+
132
+ # λ²”μ£Όν˜• λ³€μˆ˜ Label Encoding
133
+ label_encoders = {}
134
+ for col in categorical_cols:
135
+ le = LabelEncoder()
136
+ le.fit(X_train[col]) # Train 데이터 κΈ°μ€€μœΌλ‘œ ν•™μŠ΅
137
+ label_encoders[col] = le
138
+
139
+ # λ³€ν™˜ 적용
140
+ for col in categorical_cols:
141
+ X_train[col] = label_encoders[col].transform(X_train[col])
142
+ X_val[col] = label_encoders[col].transform(X_val[col])
143
+ X_test[col] = label_encoders[col].transform(X_test[col])
144
+
145
+ # μ—°μ†ν˜• λ³€μˆ˜ Quantile Transformation
146
+ scaler = QuantileTransformer(output_distribution='normal')
147
+ scaler.fit(X_train[numerical_cols]) # Train 데이터 κΈ°μ€€μœΌλ‘œ ν•™μŠ΅
148
+
149
+ # λ³€ν™˜ 적용
150
+ X_train[numerical_cols] = scaler.transform(X_train[numerical_cols])
151
+ X_val[numerical_cols] = scaler.transform(X_val[numerical_cols])
152
+ X_test[numerical_cols] = scaler.transform(X_test[numerical_cols])
153
+
154
+ return X_train, X_val, X_test, y_train, y_val, y_test, categorical_cols, numerical_cols
155
+
156
+
157
+
158
+ # 데이터 λ³€ν™˜ 및 dataloader 생성 ν•¨μˆ˜
159
+ def prepare_dataloader(region, data_sample='pure', target='multi', fold=3, random_state=None):
160
+
161
+ # 파일 μœ„μΉ˜ 기반으둜 데이터 디렉토리 경둜 μ„€μ •
162
+ current_file_dir = os.path.dirname(os.path.abspath(__file__))
163
+ data_base_dir = os.path.abspath(os.path.join(current_file_dir, '../../../data'))
164
+
165
+ # 데이터 경둜 μ§€μ •
166
+ dat_path = os.path.join(data_base_dir, f"data_for_modeling/{region}_train.csv")
167
+ if data_sample == 'pure':
168
+ train_path = dat_path
169
+ else:
170
+ train_path = os.path.join(data_base_dir, f'data_oversampled/{data_sample}/{data_sample}_{fold}_{region}.csv')
171
+ test_path = os.path.join(data_base_dir, f"data_for_modeling/{region}_test.csv")
172
+ drop_col = ['multi_class','year']
173
+ target_col = f'{target}_class'
174
+
175
+ # 데이터 λ‘œλ“œ
176
+ region_dat = preprocessing(pd.read_csv(dat_path, index_col=0))
177
+ if data_sample == 'pure':
178
+ region_train = region_dat.loc[~region_dat['year'].isin([2021-fold]), :]
179
+ else:
180
+ region_train = preprocessing(pd.read_csv(train_path))
181
+ region_val = region_dat.loc[region_dat['year'].isin([2021-fold]), :]
182
+ region_test = preprocessing(pd.read_csv(test_path))
183
+
184
+ # 컬럼 μ •λ ¬ (일관성 μœ μ§€)
185
+ common_columns = region_train.columns.to_list()
186
+ train_data = region_train[common_columns]
187
+ val_data = region_val[common_columns]
188
+ test_data = region_test[common_columns]
189
+
190
+ # μ„€λͺ…λ³€μˆ˜ & νƒ€κ²Ÿ 뢄리
191
+ X_train = train_data.drop(columns=drop_col)
192
+ y_train = train_data[target_col]
193
+ X_val = val_data.drop(columns=drop_col)
194
+ y_val = val_data[target_col]
195
+ X_test = test_data.drop(columns=drop_col)
196
+ y_test = test_data[target_col]
197
+
198
+ # λ²”μ£Όν˜• & μ—°μ†ν˜• λ³€μˆ˜ 뢄리
199
+ categorical_cols = X_train.select_dtypes(include=['object', 'category', 'int64']).columns
200
+ numerical_cols = X_train.select_dtypes(include=['float64']).columns
201
+
202
+ # λ²”μ£Όν˜• λ³€μˆ˜ Label Encoding
203
+ label_encoders = {}
204
+ for col in categorical_cols:
205
+ le = LabelEncoder()
206
+ le.fit(X_train[col]) # Train 데이터 κΈ°μ€€μœΌλ‘œ ν•™μŠ΅
207
+ label_encoders[col] = le
208
+
209
+ # λ³€ν™˜ 적용
210
+ for col in categorical_cols:
211
+ X_train[col] = label_encoders[col].transform(X_train[col])
212
+ X_val[col] = label_encoders[col].transform(X_val[col])
213
+ X_test[col] = label_encoders[col].transform(X_test[col])
214
+
215
+ # μ—°μ†ν˜• λ³€μˆ˜ Quantile Transformation
216
+ scaler = QuantileTransformer(output_distribution='normal')
217
+ scaler.fit(X_train[numerical_cols]) # Train 데이터 κΈ°μ€€μœΌλ‘œ ν•™μŠ΅
218
+
219
+ # λ³€ν™˜ 적용
220
+ X_train[numerical_cols] = scaler.transform(X_train[numerical_cols])
221
+ X_val[numerical_cols] = scaler.transform(X_val[numerical_cols])
222
+ X_test[numerical_cols] = scaler.transform(X_test[numerical_cols])
223
+
224
+ # μ—°μ†ν˜• λ³€μˆ˜μ™€ λ²”μ£Όν˜• λ³€μˆ˜ 뢄리
225
+ X_train_num = torch.tensor(X_train[numerical_cols].values, dtype=torch.float32)
226
+ X_train_cat = torch.tensor(X_train[categorical_cols].values, dtype=torch.long)
227
+
228
+ X_val_num = torch.tensor(X_val[numerical_cols].values, dtype=torch.float32)
229
+ X_val_cat = torch.tensor(X_val[categorical_cols].values, dtype=torch.long)
230
+
231
+ X_test_num = torch.tensor(X_test[numerical_cols].values, dtype=torch.float32)
232
+ X_test_cat = torch.tensor(X_test[categorical_cols].values, dtype=torch.long)
233
+
234
+ # λ ˆμ΄λΈ” λ³€ν™˜
235
+ if target == "binary":
236
+ y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32) # 이진 λΆ„λ₯˜ β†’ float32
237
+ y_val_tensor = torch.tensor(y_val.values, dtype=torch.float32)
238
+ y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32)
239
+ elif target == "multi":
240
+ y_train_tensor = torch.tensor(y_train.values, dtype=torch.long) # 닀쀑 λΆ„λ₯˜ β†’ long
241
+ y_val_tensor = torch.tensor(y_val.values, dtype=torch.long)
242
+ y_test_tensor = torch.tensor(y_test.values, dtype=torch.long)
243
+ else:
244
+ raise ValueError("target must be 'binary' or 'multi'")
245
+
246
+ # TensorDataset 생성
247
+ train_dataset = TensorDataset(X_train_num, X_train_cat, y_train_tensor)
248
+ val_dataset = TensorDataset(X_val_num, X_val_cat, y_val_tensor)
249
+ test_dataset = TensorDataset(X_test_num, X_test_cat, y_test_tensor)
250
+
251
+ # DataLoader 생성
252
+ if random_state == None:
253
+ train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
254
+ else:
255
+ train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, generator=torch.Generator().manual_seed(random_state))
256
+ val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)
257
+ test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)
258
+
259
+ return X_train, categorical_cols, numerical_cols, train_loader, val_loader, test_loader
260
+
261
+ # 데이터 λ³€ν™˜ 및 dataloader 생성 ν•¨μˆ˜ (batch_size νŒŒλΌλ―Έν„° μΆ”κ°€ 버전)
262
+ def prepare_dataloader_with_batchsize(region, data_sample='pure', target='multi', fold=3, random_state=None, batch_size=64):
263
+ # 파일 μœ„μΉ˜ 기반으둜 데이터 디렉토리 경둜 μ„€μ •
264
+ current_file_dir = os.path.dirname(os.path.abspath(__file__))
265
+ data_base_dir = os.path.abspath(os.path.join(current_file_dir, '../../../data'))
266
+
267
+ # 데이터 경둜 μ§€μ •
268
+ dat_path = os.path.join(data_base_dir, f"data_for_modeling/{region}_train.csv")
269
+ if data_sample == 'pure':
270
+ train_path = dat_path
271
+ else:
272
+ train_path = os.path.join(data_base_dir, f'data_oversampled/{data_sample}/{data_sample}_{fold}_{region}.csv')
273
+ test_path = os.path.join(data_base_dir, f"data_for_modeling/{region}_test.csv")
274
+ drop_col = ['multi_class','year']
275
+ target_col = f'{target}_class'
276
+
277
+ # 데이터 λ‘œλ“œ
278
+ region_dat = preprocessing(pd.read_csv(dat_path, index_col=0))
279
+ if data_sample == 'pure':
280
+ region_train = region_dat.loc[~region_dat['year'].isin([2021-fold]), :]
281
+ else:
282
+ region_train = preprocessing(pd.read_csv(train_path))
283
+ region_val = region_dat.loc[region_dat['year'].isin([2021-fold]), :]
284
+ region_test = preprocessing(pd.read_csv(test_path))
285
+
286
+ # 컬럼 μ •λ ¬ (일관성 μœ μ§€)
287
+ common_columns = region_train.columns.to_list()
288
+ train_data = region_train[common_columns]
289
+ val_data = region_val[common_columns]
290
+ test_data = region_test[common_columns]
291
+
292
+ # μ„€λͺ…λ³€μˆ˜ & νƒ€κ²Ÿ 뢄리
293
+ X_train = train_data.drop(columns=drop_col)
294
+ y_train = train_data[target_col]
295
+ X_val = val_data.drop(columns=drop_col)
296
+ y_val = val_data[target_col]
297
+ X_test = test_data.drop(columns=drop_col)
298
+ y_test = test_data[target_col]
299
+
300
+ # λ²”μ£Όν˜• & μ—°μ†ν˜• λ³€μˆ˜ 뢄리
301
+ categorical_cols = X_train.select_dtypes(include=['object', 'category', 'int64']).columns
302
+ numerical_cols = X_train.select_dtypes(include=['float64']).columns
303
+
304
+ # λ²”μ£Όν˜• λ³€μˆ˜ Label Encoding
305
+ label_encoders = {}
306
+ for col in categorical_cols:
307
+ le = LabelEncoder()
308
+ le.fit(X_train[col]) # Train 데이터 κΈ°μ€€μœΌλ‘œ ν•™μŠ΅
309
+ label_encoders[col] = le
310
+
311
+ # λ³€ν™˜ 적용
312
+ for col in categorical_cols:
313
+ X_train[col] = label_encoders[col].transform(X_train[col])
314
+ X_val[col] = label_encoders[col].transform(X_val[col])
315
+ X_test[col] = label_encoders[col].transform(X_test[col])
316
+
317
+ # μ—°μ†ν˜• λ³€μˆ˜ Quantile Transformation
318
+ scaler = QuantileTransformer(output_distribution='normal')
319
+ scaler.fit(X_train[numerical_cols]) # Train 데이터 κΈ°μ€€μœΌλ‘œ ν•™μŠ΅
320
+
321
+ # λ³€ν™˜ 적용
322
+ X_train[numerical_cols] = scaler.transform(X_train[numerical_cols])
323
+ X_val[numerical_cols] = scaler.transform(X_val[numerical_cols])
324
+ X_test[numerical_cols] = scaler.transform(X_test[numerical_cols])
325
+
326
+ # μ—°μ†ν˜• λ³€μˆ˜μ™€ λ²”μ£Όν˜• λ³€μˆ˜ 뢄리
327
+ X_train_num = torch.tensor(X_train[numerical_cols].values, dtype=torch.float32)
328
+ X_train_cat = torch.tensor(X_train[categorical_cols].values, dtype=torch.long)
329
+
330
+ X_val_num = torch.tensor(X_val[numerical_cols].values, dtype=torch.float32)
331
+ X_val_cat = torch.tensor(X_val[categorical_cols].values, dtype=torch.long)
332
+
333
+ X_test_num = torch.tensor(X_test[numerical_cols].values, dtype=torch.float32)
334
+ X_test_cat = torch.tensor(X_test[categorical_cols].values, dtype=torch.long)
335
+
336
+ # λ ˆμ΄λΈ” λ³€ν™˜
337
+ if target == "binary":
338
+ y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32) # 이진 λΆ„λ₯˜ β†’ float32
339
+ y_val_tensor = torch.tensor(y_val.values, dtype=torch.float32)
340
+ y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32)
341
+ elif target == "multi":
342
+ y_train_tensor = torch.tensor(y_train.values, dtype=torch.long) # 닀쀑 λΆ„λ₯˜ β†’ long
343
+ y_val_tensor = torch.tensor(y_val.values, dtype=torch.long)
344
+ y_test_tensor = torch.tensor(y_test.values, dtype=torch.long)
345
+ else:
346
+ raise ValueError("target must be 'binary' or 'multi'")
347
+
348
+ # TensorDataset 생성
349
+ train_dataset = TensorDataset(X_train_num, X_train_cat, y_train_tensor)
350
+ val_dataset = TensorDataset(X_val_num, X_val_cat, y_val_tensor)
351
+ test_dataset = TensorDataset(X_test_num, X_test_cat, y_test_tensor)
352
+
353
+ # DataLoader 생성 (batch_size νŒŒλΌλ―Έν„° μ‚¬μš©)
354
+ if random_state == None:
355
+ train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
356
+ else:
357
+ train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, generator=torch.Generator().manual_seed(random_state))
358
+ val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
359
+ test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
360
+
361
+ return X_train, categorical_cols, numerical_cols, train_loader, val_loader, test_loader, y_train, scaler
362
+
363
+
364
+ def calculate_csi(y_true, pred):
365
+
366
+ cm = confusion_matrix(y_true, pred) # λ³€μˆ˜ 이름을 cm으둜 λ³€κ²½
367
+ # ν˜Όλ™ ν–‰λ ¬μ—μ„œ H, F, M μΆ”μΆœ
368
+ H = (cm[0, 0] + cm[1, 1])
369
+
370
+ F = (cm[1, 0] + cm[2, 0] +
371
+ cm[0, 1] + cm[2, 1])
372
+
373
+ M = (cm[0, 2] + cm[1, 2])
374
+
375
+ # CSI 계산
376
+ CSI = H / (H + F + M + 1e-10)
377
+ return CSI
378
+
379
+ def sample_weight(y_train):
380
+ class_weights = compute_class_weight(
381
+ class_weight='balanced',
382
+ classes=np.unique(y_train), # 고유 클래슀
383
+ y=y_train # ν•™μŠ΅ 데이터 λ ˆμ΄λΈ”
384
+ )
385
+ sample_weights = np.array([class_weights[label] for label in y_train])
386
+
387
+ return sample_weights
388
+
389
+ # ν•˜μ΄νΌνŒŒλΌλ―Έν„° μ΅œμ ν™” ν•¨μˆ˜ μ •μ˜
390
+ def objective(trial, model_choose, region, data_sample='pure', target='multi', n_folds=3, random_state=42):
391
+ # GPU μ‚¬μš© κ°€λŠ₯ μ—¬λΆ€ 확인 및 device μ„€μ •
392
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
393
+ val_scores = []
394
+
395
+ # --- 1. ν•˜μ΄νΌνŒŒλΌλ―Έν„° 탐색 λ²”μœ„ μ •μ˜ (μˆ˜μ •λ¨) ---
396
+ if model_choose == "ft_transformer":
397
+ d_token = trial.suggest_int("d_token", 64, 256, step=32)
398
+ n_blocks = trial.suggest_int("n_blocks", 2, 6) # 깊이 μΆ•μ†Œλ‘œ 과적합 λ°©μ§€
399
+ n_heads = trial.suggest_categorical("n_heads", [4, 8])
400
+ # d_token은 n_heads의 λ°°μˆ˜μ—¬μ•Ό 함 (FT-Transformer의 ꡬ쑰적 μ œμ•½ λŒ€μ‘)
401
+ if d_token % n_heads != 0:
402
+ d_token = (d_token // n_heads) * n_heads
403
+
404
+ attention_dropout = trial.suggest_float("attention_dropout", 0.1, 0.4)
405
+ ffn_dropout = trial.suggest_float("ffn_dropout", 0.1, 0.4)
406
+ lr = trial.suggest_float("lr", 1e-5, 1e-2, log=True) # λ²”μœ„ ν™•λŒ€
407
+ weight_decay = trial.suggest_float("weight_decay", 1e-4, 1e-1, log=True) # 더 곡격적인 λ²”μœ„λ‘œ ν™•μž₯
408
+ batch_size = trial.suggest_categorical("batch_size", [32, 64, 128, 256]) # Batch Size μΆ”κ°€
409
+
410
+ elif model_choose == 'resnet_like':
411
+ d_main = trial.suggest_int("d_main", 64, 256, step=32)
412
+ d_hidden = trial.suggest_int("d_hidden", 64, 512, step=64)
413
+ n_blocks = trial.suggest_int("n_blocks", 2, 5) # λ„ˆλ¬΄ κΉŠμ§€ μ•Šκ²Œ 쑰절
414
+ dropout_first = trial.suggest_float("dropout_first", 0.1, 0.4)
415
+ dropout_second = trial.suggest_float("dropout_second", 0.0, 0.2)
416
+ lr = trial.suggest_float("lr", 1e-5, 1e-2, log=True)
417
+ weight_decay = trial.suggest_float("weight_decay", 1e-4, 1e-1, log=True) # 더 곡격적인 λ²”μœ„λ‘œ ν™•μž₯
418
+ batch_size = trial.suggest_categorical("batch_size", [32, 64, 128, 256]) # Batch Size μΆ”κ°€
419
+
420
+ elif model_choose == 'deepgbm':
421
+ # DeepGBM의 경우 λͺ¨λΈ νŠΉμ„±μ— 맞좰 ResNet 블둝 및 μž„λ² λ”© 차원 쑰절
422
+ d_main = trial.suggest_int("d_main", 64, 256, step=32)
423
+ d_hidden = trial.suggest_int("d_hidden", 64, 256, step=64)
424
+ n_blocks = trial.suggest_int("n_blocks", 2, 6)
425
+ dropout = trial.suggest_float("dropout", 0.1, 0.4)
426
+ lr = trial.suggest_float("lr", 1e-5, 1e-2, log=True)
427
+ weight_decay = trial.suggest_float("weight_decay", 1e-4, 1e-1, log=True) # 더 곡격적인 λ²”μœ„λ‘œ ν™•μž₯
428
+ batch_size = trial.suggest_categorical("batch_size", [32, 64, 128, 256]) # Batch Size μΆ”κ°€
429
+
430
+ # --- 2. Fold별 ν•™μŠ΅ 및 ꡐ차 검증 ---
431
+ for fold in range(1, n_folds + 1):
432
+ X_train_df, categorical_cols, numerical_cols, train_loader, val_loader, _, y_train, _ = prepare_dataloader_with_batchsize(
433
+ region, data_sample=data_sample, target=target, fold=fold, random_state=random_state, batch_size=batch_size
434
+ )
435
+
436
+ # λͺ¨λΈ μ΄ˆκΈ°ν™”
437
+ if model_choose == "ft_transformer":
438
+ model = FTTransformer(
439
+ num_features=len(numerical_cols),
440
+ cat_cardinalities=[len(X_train_df[col].unique()) for col in categorical_cols],
441
+ d_token=d_token,
442
+ n_blocks=n_blocks,
443
+ n_heads=n_heads,
444
+ attention_dropout=attention_dropout,
445
+ ffn_dropout=ffn_dropout,
446
+ num_classes=3
447
+ ).to(device)
448
+ elif model_choose == 'resnet_like':
449
+ input_dim = len(numerical_cols) + len(categorical_cols)
450
+ model = ResNetLike(
451
+ input_dim=input_dim,
452
+ d_main=d_main,
453
+ d_hidden=d_hidden,
454
+ n_blocks=n_blocks,
455
+ dropout_first=dropout_first,
456
+ dropout_second=dropout_second,
457
+ num_classes=3
458
+ ).to(device)
459
+ elif model_choose == 'deepgbm':
460
+ model = DeepGBM(
461
+ num_features=len(numerical_cols),
462
+ cat_features=[len(X_train_df[col].unique()) for col in categorical_cols],
463
+ d_main=d_main,
464
+ d_hidden=d_hidden,
465
+ n_blocks=n_blocks,
466
+ dropout=dropout,
467
+ num_classes=3
468
+ ).to(device)
469
+
470
+ # 클래슀 κ°€μ€‘μΉ˜ 계산 및 손싀 ν•¨μˆ˜ μ„€μ • (Label Smoothing 적용)
471
+ if target == 'multi':
472
+ class_weights = compute_class_weight(
473
+ class_weight='balanced',
474
+ classes=np.unique(y_train),
475
+ y=y_train
476
+ )
477
+ # ν΄λž˜μŠ€λ³„ κ°€μ€‘μΉ˜ 둜그 좜λ ₯
478
+ unique_classes = np.unique(y_train)
479
+ class_counts = {cls: np.sum(y_train == cls) for cls in unique_classes}
480
+ print(f" Fold {fold} - ν΄λž˜μŠ€λ³„ κ°€μ€‘μΉ˜: {dict(zip(unique_classes, class_weights))} (ν΄λž˜μŠ€λ³„ μƒ˜ν”Œ 수: {class_counts})")
481
+ class_weights_tensor = torch.tensor(class_weights, dtype=torch.float32).to(device)
482
+ criterion = nn.CrossEntropyLoss(weight=class_weights_tensor, label_smoothing=0.0) # Label Smoothing μΆ”κ°€
483
+ else:
484
+ criterion = nn.BCEWithLogitsLoss()
485
+ optimizer = optim.AdamW(model.parameters(), lr=lr, weight_decay=weight_decay)
486
+
487
+ # ν•™μŠ΅λ₯  μŠ€μΌ€μ€„λŸ¬ μΆ”κ°€: μ„±λŠ₯ 정체 μ‹œ LR을 0.5λ°° κ°μ†Œ (검증 CSI κΈ°μ€€)
488
+ scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=3)
489
+
490
+ # ν•™μŠ΅ μ„€μ • (에폭 및 νŽ˜μ΄μ…˜μŠ€ 상ν–₯)
491
+ epochs = 200
492
+ patience = 12 # λ”₯λŸ¬λ‹μ˜ 정체 ꡬ간을 κ³ λ €ν•˜μ—¬ μ†Œν­ 상ν–₯
493
+ best_fold_csi = 0
494
+ counter = 0
495
+
496
+ for epoch in range(epochs):
497
+ model.train()
498
+ for x_num_batch, x_cat_batch, y_batch in train_loader:
499
+ x_num_batch, x_cat_batch, y_batch = x_num_batch.to(device), x_cat_batch.to(device), y_batch.to(device)
500
+
501
+ optimizer.zero_grad()
502
+ y_pred = model(x_num_batch, x_cat_batch)
503
+ loss = criterion(y_pred, y_batch if target == 'multi' else y_batch.float())
504
+ loss.backward()
505
+ optimizer.step()
506
+
507
+ # Validation 평가
508
+ model.eval()
509
+ y_pred_val, y_true_val = [], []
510
+ with torch.no_grad():
511
+ for x_num_batch, x_cat_batch, y_batch in val_loader:
512
+ x_num_batch, x_cat_batch, y_batch = x_num_batch.to(device), x_cat_batch.to(device), y_batch.to(device)
513
+ output = model(x_num_batch, x_cat_batch)
514
+ pred = output.argmax(dim=1) if target == 'multi' else (torch.sigmoid(output) >= 0.5).long()
515
+
516
+ y_pred_val.extend(pred.cpu().numpy())
517
+ y_true_val.extend(y_batch.cpu().numpy())
518
+
519
+ # CSI 계산 및 μŠ€μΌ€μ€„λŸ¬ μ—…λ°μ΄νŠΈ
520
+ val_csi = calculate_csi(y_true_val, y_pred_val)
521
+ scheduler.step(val_csi)
522
+
523
+ # Optuna Pruning 적용 (첫 번째 Foldμ—μ„œ μ‘°κΈ° μ’…λ£Œ νŒλ‹¨ κ°•ν™”)
524
+ trial.report(val_csi, epoch)
525
+ if trial.should_prune():
526
+ raise optuna.exceptions.TrialPruned()
527
+
528
+ # Early Stopping 체크
529
+ if val_csi > best_fold_csi:
530
+ best_fold_csi = val_csi
531
+ counter = 0
532
+ else:
533
+ counter += 1
534
+
535
+ if counter >= patience:
536
+ break
537
+
538
+ val_scores.append(best_fold_csi)
539
+
540
+ # λͺ¨λ“  fold의 평균 μ„±λŠ₯ λ°˜ν™˜
541
+ return np.mean(val_scores)
542
+
543
+
544
+ # μ΅œμ ν™”λœ ν•˜μ΄νΌνŒŒλΌλ―Έν„°λ‘œ μ΅œμ’… λͺ¨λΈ ν•™μŠ΅ 및 μ €μž₯ ν•¨μˆ˜
545
+ def train_final_model(best_params, model_choose, region, data_sample='pure', target='multi', n_folds=3, random_state=42):
546
+ """
547
+ μ΅œμ ν™”λœ ν•˜μ΄νΌνŒŒλΌλ―Έν„°λ‘œ μ΅œμ’… λͺ¨λΈμ„ ν•™μŠ΅ν•˜κ³  μ €μž₯ν•©λ‹ˆλ‹€.
548
+
549
+ Args:
550
+ best_params: μ΅œμ ν™”λœ ν•˜μ΄νΌνŒŒλΌλ―Έν„° λ”•μ…”λ„ˆλ¦¬
551
+ model_choose: λͺ¨λΈ 선택 ('ft_transformer', 'resnet_like', 'deepgbm')
552
+ region: μ§€μ—­λͺ…
553
+ data_sample: 데이터 μƒ˜ν”Œ νƒ€μž… ('pure', 'smote', etc.)
554
+ target: νƒ€κ²Ÿ νƒ€μž… ('multi', 'binary')
555
+ n_folds: ꡐ차 검증 fold 수
556
+ random_state: 랜덀 μ‹œλ“œ
557
+
558
+ Returns:
559
+ μ €μž₯된 λͺ¨λΈ 경둜 리슀트
560
+ """
561
+ # GPU μ‚¬μš© κ°€λŠ₯ μ—¬λΆ€ 확인 및 device μ„€μ •
562
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
563
+
564
+ models = []
565
+ scalers = [] # scaler 리슀트 μΆ”κ°€
566
+
567
+ print("μ΅œμ’… λͺ¨λΈ ν•™μŠ΅ μ‹œμž‘...")
568
+
569
+ for fold in range(1, n_folds + 1):
570
+ print(f"Fold {fold} ν•™μŠ΅ 쀑...")
571
+
572
+ # μ΅œμ ν™”λœ batch_size μ‚¬μš©
573
+ batch_size = best_params.get("batch_size", 64)
574
+ X_train_df, categorical_cols, numerical_cols, train_loader, val_loader, _, y_train, scaler = prepare_dataloader_with_batchsize(
575
+ region, data_sample=data_sample, target=target, fold=fold, random_state=random_state, batch_size=batch_size
576
+ )
577
+
578
+ # λͺ¨λΈ μ΄ˆκΈ°ν™”
579
+ if model_choose == "ft_transformer":
580
+ d_token = best_params["d_token"]
581
+ n_heads = best_params.get("n_heads", 8)
582
+ # d_token은 n_heads의 λ°°μˆ˜μ—¬μ•Ό 함 (FT-Transformer의 ꡬ쑰적 μ œμ•½ λŒ€μ‘)
583
+ if d_token % n_heads != 0:
584
+ d_token = (d_token // n_heads) * n_heads
585
+
586
+ model = FTTransformer(
587
+ num_features=len(numerical_cols),
588
+ cat_cardinalities=[len(X_train_df[col].unique()) for col in categorical_cols],
589
+ d_token=d_token,
590
+ n_blocks=best_params["n_blocks"],
591
+ n_heads=n_heads,
592
+ attention_dropout=best_params["attention_dropout"],
593
+ ffn_dropout=best_params["ffn_dropout"],
594
+ num_classes=3
595
+ ).to(device)
596
+ elif model_choose == 'resnet_like':
597
+ input_dim = len(numerical_cols) + len(categorical_cols)
598
+ model = ResNetLike(
599
+ input_dim=input_dim,
600
+ d_main=best_params["d_main"],
601
+ d_hidden=best_params["d_hidden"],
602
+ n_blocks=best_params["n_blocks"],
603
+ dropout_first=best_params["dropout_first"],
604
+ dropout_second=best_params["dropout_second"],
605
+ num_classes=3
606
+ ).to(device)
607
+ elif model_choose == 'deepgbm':
608
+ model = DeepGBM(
609
+ num_features=len(numerical_cols),
610
+ cat_features=[len(X_train_df[col].unique()) for col in categorical_cols],
611
+ d_main=best_params["d_main"],
612
+ d_hidden=best_params["d_hidden"],
613
+ n_blocks=best_params["n_blocks"],
614
+ dropout=best_params["dropout"],
615
+ num_classes=3
616
+ ).to(device)
617
+ else:
618
+ raise ValueError(f"Unknown model_choose: {model_choose}")
619
+
620
+ # 클래슀 κ°€μ€‘μΉ˜ 계산 및 손싀 ν•¨μˆ˜ μ„€μ • (Label Smoothing 적용)
621
+ if target == 'multi':
622
+ class_weights = compute_class_weight(
623
+ class_weight='balanced',
624
+ classes=np.unique(y_train),
625
+ y=y_train
626
+ )
627
+ class_weights_tensor = torch.tensor(class_weights, dtype=torch.float32).to(device)
628
+ criterion = nn.CrossEntropyLoss(weight=class_weights_tensor, label_smoothing=0.0) # Label Smoothing μΆ”κ°€
629
+ else:
630
+ criterion = nn.BCEWithLogitsLoss()
631
+ optimizer = optim.AdamW(model.parameters(), lr=best_params["lr"], weight_decay=best_params["weight_decay"])
632
+
633
+ # ν•™μŠ΅λ₯  μŠ€μΌ€μ€„λŸ¬
634
+ scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=3)
635
+
636
+ # ν•™μŠ΅ μ„€μ •
637
+ epochs = 200
638
+ patience = 12
639
+ best_fold_csi = 0
640
+ counter = 0
641
+ best_model = None
642
+
643
+ for epoch in range(epochs):
644
+ model.train()
645
+ for x_num_batch, x_cat_batch, y_batch in train_loader:
646
+ x_num_batch, x_cat_batch, y_batch = x_num_batch.to(device), x_cat_batch.to(device), y_batch.to(device)
647
+
648
+ optimizer.zero_grad()
649
+ y_pred = model(x_num_batch, x_cat_batch)
650
+ loss = criterion(y_pred, y_batch if target == 'multi' else y_batch.float())
651
+ loss.backward()
652
+ optimizer.step()
653
+
654
+ # Validation 평가
655
+ model.eval()
656
+ y_pred_val, y_true_val = [], []
657
+ with torch.no_grad():
658
+ for x_num_batch, x_cat_batch, y_batch in val_loader:
659
+ x_num_batch, x_cat_batch, y_batch = x_num_batch.to(device), x_cat_batch.to(device), y_batch.to(device)
660
+ output = model(x_num_batch, x_cat_batch)
661
+ pred = output.argmax(dim=1) if target == 'multi' else (torch.sigmoid(output) >= 0.5).long()
662
+
663
+ y_pred_val.extend(pred.cpu().numpy())
664
+ y_true_val.extend(y_batch.cpu().numpy())
665
+
666
+ # CSI 계산 및 μŠ€μΌ€μ€„λŸ¬ μ—…λ°μ΄νŠΈ
667
+ val_csi = calculate_csi(y_true_val, y_pred_val)
668
+ scheduler.step(val_csi)
669
+
670
+ # Early Stopping 체크
671
+ if val_csi > best_fold_csi:
672
+ best_fold_csi = val_csi
673
+ counter = 0
674
+ best_model = copy.deepcopy(model)
675
+ else:
676
+ counter += 1
677
+
678
+ if counter >= patience:
679
+ print(f" Early stopping at epoch {epoch+1}, Best CSI: {best_fold_csi:.4f}")
680
+ break
681
+
682
+ if best_model is None:
683
+ best_model = model
684
+
685
+ scalers.append(scaler) # scaler μ €μž₯ (fold μˆœμ„œλŒ€λ‘œ)
686
+ models.append(best_model)
687
+ print(f" Fold {fold} ν•™μŠ΅ μ™„λ£Œ (검증 CSI: {best_fold_csi:.4f})")
688
+
689
+ # λͺ¨λΈ μ €μž₯ 경둜 μ„€μ •
690
+ save_dir = f'../save_model/{model_choose}_optima'
691
+ os.makedirs(save_dir, exist_ok=True)
692
+
693
+ # 파일λͺ… 생성
694
+ if data_sample == 'pure':
695
+ model_filename = f'{model_choose}_pure_{region}.pkl'
696
+ else:
697
+ model_filename = f'{model_choose}_{data_sample}_{region}.pkl'
698
+
699
+ model_path = f'{save_dir}/{model_filename}'
700
+
701
+ # λ¦¬μŠ€νŠΈμ— λ‹΄μ•„ ν•œ λ²ˆμ— μ €μž₯
702
+ joblib.dump(models, model_path)
703
+ print(f"\nλͺ¨λ“  λͺ¨λΈ μ €μž₯ μ™„λ£Œ: {model_path} (총 {len(models)}개 fold)")
704
+
705
+ # Scaler 별도 μ €μž₯
706
+ scaler_save_dir = f'../save_model/{model_choose}_optima/scaler'
707
+ os.makedirs(scaler_save_dir, exist_ok=True)
708
+
709
+ # 파일λͺ… 생성 (λͺ¨λΈκ³Ό λ™μΌν•œ νŒ¨ν„΄)
710
+ if data_sample == 'pure':
711
+ scaler_filename = f'{model_choose}_pure_{region}_scaler.pkl'
712
+ else:
713
+ scaler_filename = f'{model_choose}_{data_sample}_{region}_scaler.pkl'
714
+
715
+ scaler_path = f'{scaler_save_dir}/{scaler_filename}'
716
+ joblib.dump(scalers, scaler_path)
717
+ print(f"Scaler μ €μž₯ μ™„λ£Œ: {scaler_path} (총 {len(scalers)}개 fold)")
718
+
719
+ return model_path
720
+
Analysis_code/5.optima/resnet_like_ctgan10000/resnet_like_ctgan10000_busan.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import optuna
2
+ import numpy as np
3
+ import random
4
+ import pandas as pd
5
+ import joblib
6
+ import os
7
+ import torch
8
+ from utils import *
9
+ # Python 및 Numpy μ‹œλ“œ κ³ μ •
10
+ seed = 42
11
+ random.seed(seed)
12
+ np.random.seed(seed)
13
+
14
+
15
+ # 1. Study 생성 μ‹œ 'maximize'둜 μ„€μ •
16
+ study = optuna.create_study(
17
+ direction="maximize", # CSI μ μˆ˜κ°€ λ†’μ„μˆ˜λ‘ μ’‹μœΌλ―€λ‘œ maximize
18
+ pruner=optuna.pruners.MedianPruner(n_warmup_steps=10) # 초반 10에폭은 μ§€μΌœλ³΄κ³  이후 κ°€μ§€μΉ˜κΈ°
19
+ )
20
+ # Trial μ™„λ£Œ μ‹œ 상세 정보 좜λ ₯ν•˜λŠ” callback ν•¨μˆ˜
21
+ def print_trial_callback(study, trial):
22
+ """각 trial μ™„λ£Œ μ‹œ best valueλ₯Ό ν¬ν•¨ν•œ 상세 정보 좜λ ₯"""
23
+ print(f"\n{'='*80}")
24
+ print(f"Trial {trial.number} μ™„λ£Œ")
25
+ print(f" Value (CSI): {trial.value:.6f}" if trial.value is not None else f" Value: {trial.value}")
26
+ print(f" Parameters: {trial.params}")
27
+ print(f" Best Value (CSI): {study.best_value:.6f}" if study.best_value is not None else f" Best Value: {study.best_value}")
28
+ print(f" Best Trial: {study.best_trial.number}")
29
+ print(f" Best Parameters: {study.best_params}")
30
+ print(f"{'='*80}\n")
31
+
32
+
33
+
34
+ # 2. μ΅œμ ν™” μ‹€ν–‰
35
+ study.optimize(
36
+ lambda trial: objective(trial, model_choose="resnet_like", region="busan", data_sample='ctgan10000'),
37
+ n_trials=100
38
+ ,
39
+ callbacks=[print_trial_callback]
40
+ )
41
+
42
+ # 3. κ²°κ³Ό 확인 및 μš”μ•½
43
+ print(f"\nμ΅œμ ν™” μ™„λ£Œ.")
44
+ print(f"Best CSI Score: {study.best_value:.4f}")
45
+ print(f"Best Hyperparameters: {study.best_params}")
46
+
47
+ try:
48
+ # λͺ¨λ“  trial의 CSI 점수 μΆ”μΆœ
49
+ csi_scores = [trial.value for trial in study.trials if trial.value is not None]
50
+
51
+ if len(csi_scores) > 0:
52
+ print(f"\nμ΅œμ ν™” κ³Όμ • μš”μ•½:")
53
+ print(f" - 총 μ‹œλ„ 횟수: {len(study.trials)}")
54
+ print(f" - μ„±κ³΅ν•œ μ‹œλ„: {len(csi_scores)}")
55
+ print(f" - 졜초 CSI: {csi_scores[0]:.4f}")
56
+ print(f" - μ΅œμ’… CSI: {csi_scores[-1]:.4f}")
57
+ print(f" - 졜고 CSI: {max(csi_scores):.4f}")
58
+ print(f" - μ΅œμ € CSI: {min(csi_scores):.4f}")
59
+ print(f" - 평균 CSI: {np.mean(csi_scores):.4f}")
60
+
61
+ # Study 객체 μ €μž₯
62
+ # 파일 μœ„μΉ˜ 기반으둜 base 디렉토리 경둜 μ„€μ •
63
+ current_file_dir = os.path.dirname(os.path.abspath(__file__))
64
+ base_dir = os.path.dirname(os.path.dirname(current_file_dir)) # 5.optima 디렉토리
65
+ os.makedirs(os.path.join(base_dir, "optimization_history"), exist_ok=True)
66
+ study_path = os.path.join(base_dir, "optimization_history/resnet_like_ctgan10000_busan_trials.pkl")
67
+ joblib.dump(study, study_path)
68
+ print(f"\nμ΅œμ ν™” Study 객체가 {study_path}에 μ €μž₯λ˜μ—ˆμŠ΅λ‹ˆλ‹€.")
69
+
70
+ # μ΅œμ ν™”λœ ν•˜μ΄νΌνŒŒλΌλ―Έν„°λ‘œ μ΅œμ’… λͺ¨λΈ ν•™μŠ΅ 및 μ €μž₯
71
+ print("\n" + "="*50)
72
+ print("μ΅œμ ν™”λœ ν•˜μ΄νΌνŒŒλΌλ―Έν„°λ‘œ μ΅œμ’… λͺ¨λΈ ν•™μŠ΅ μ‹œμž‘")
73
+ print("="*50)
74
+
75
+ best_params = study.best_params
76
+ model_path = train_final_model(
77
+ best_params=best_params,
78
+ model_choose="resnet_like",
79
+ region="busan",
80
+ data_sample='ctgan10000',
81
+ target='multi',
82
+ n_folds=3,
83
+ random_state=seed
84
+ )
85
+
86
+ print(f"\nμ΅œμ’… λͺ¨λΈ ν•™μŠ΅ 및 μ €μž₯ μ™„λ£Œ!")
87
+ print(f"μ €μž₯된 λͺ¨λΈ 경둜: {model_path}")
88
+
89
+ except Exception as e:
90
+ print(f"\n⚠️ μ΅œμ ν™” κ²°κ³Ό 뢄석 쀑 였λ₯˜ λ°œμƒ: {e}")
91
+ import traceback
92
+ traceback.print_exc()
93
+
94
+ # 정상 μ’…λ£Œ
95
+ import sys
96
+ sys.exit(0)
97
+
98
+
Analysis_code/5.optima/resnet_like_ctgan10000/utils.py ADDED
@@ -0,0 +1,719 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import torch.optim as optim
4
+ import numpy as np
5
+ import random
6
+ import os
7
+ import copy
8
+ from sklearn.preprocessing import QuantileTransformer, LabelEncoder
9
+ from torch.utils.data import DataLoader, TensorDataset
10
+ from sklearn.metrics import confusion_matrix
11
+ from sklearn.utils.class_weight import compute_class_weight
12
+ import pandas as pd
13
+ import optuna
14
+ from sklearn.metrics import accuracy_score, f1_score
15
+ import joblib
16
+
17
+
18
+ import sys
19
+ # 파일 μœ„μΉ˜ 기반으둜 models 디렉토리 경둜 μ„€μ •
20
+ current_file_dir = os.path.dirname(os.path.abspath(__file__))
21
+ models_path = os.path.abspath(os.path.join(current_file_dir, '../../models'))
22
+ sys.path.insert(0, models_path)
23
+ from ft_transformer import FTTransformer
24
+ from resnet_like import ResNetLike
25
+ from deepgbm import DeepGBM
26
+ import warnings
27
+ warnings.filterwarnings('ignore')
28
+
29
+
30
+ # Python 및 Numpy μ‹œλ“œ κ³ μ •
31
+ seed = 42
32
+ random.seed(seed)
33
+ np.random.seed(seed)
34
+
35
+ # PyTorch μ‹œλ“œ κ³ μ •
36
+ torch.manual_seed(seed)
37
+ torch.cuda.manual_seed(seed)
38
+ torch.cuda.manual_seed_all(seed) # Multi-GPU ν™˜κ²½μ—μ„œ λ™μΌν•œ μ‹œλ“œ 적용
39
+
40
+ # PyTorch μ—°μ‚°μ˜ 결정적 λͺ¨λ“œ μ„€μ •
41
+ torch.backends.cudnn.deterministic = True # μ‹€ν–‰λ§ˆλ‹€ λ™μΌν•œ κ²°κ³Όλ₯Ό 보μž₯
42
+ torch.backends.cudnn.benchmark = True # μ„±λŠ₯ μ΅œμ ν™”λ₯Ό ν™œμ„±ν™” (κ°€λŠ₯ν•œ ν•œ λΉ λ₯Έ μ—°μ‚° μˆ˜ν–‰)
43
+
44
+
45
+ def add_derived_features(df: pd.DataFrame) -> pd.DataFrame:
46
+ """
47
+ μ œκ±°ν–ˆλ˜ νŒŒμƒ λ³€μˆ˜λ“€μ„ 볡ꡬ
48
+
49
+ Args:
50
+ df: λ°μ΄ν„°ν”„λ ˆμž„
51
+
52
+ Returns:
53
+ νŒŒμƒ λ³€μˆ˜κ°€ μΆ”κ°€λœ λ°μ΄ν„°ν”„λ ˆμž„
54
+ """
55
+ df = df.copy()
56
+ df['hour_sin'] = np.sin(2 * np.pi * df['hour'] / 24)
57
+ df['hour_cos'] = np.cos(2 * np.pi * df['hour'] / 24)
58
+ df['month_sin'] = np.sin(2 * np.pi * df['month'] / 12)
59
+ df['month_cos'] = np.cos(2 * np.pi * df['month'] / 12)
60
+ df['ground_temp - temp_C'] = df['groundtemp'] - df['temp_C']
61
+ return df
62
+
63
+ def preprocessing(df):
64
+ """데이터 μ „μ²˜λ¦¬ ν•¨μˆ˜.
65
+
66
+ Args:
67
+ df: 원본 λ°μ΄ν„°ν”„λ ˆμž„
68
+
69
+ Returns:
70
+ μ „μ²˜λ¦¬λœ λ°μ΄ν„°ν”„λ ˆμž„
71
+ """
72
+ df = df[df.columns].copy()
73
+ df['year'] = df['year'].astype('int')
74
+ df['month'] = df['month'].astype('int')
75
+ df['hour'] = df['hour'].astype('int')
76
+ df = add_derived_features(df).copy()
77
+ df['multi_class'] = df['multi_class'].astype('int')
78
+ df.loc[df['wind_dir']=='μ •μ˜¨', 'wind_dir'] = "0"
79
+ df['wind_dir'] = df['wind_dir'].astype('int')
80
+ df = df[['temp_C', 'precip_mm', 'wind_speed', 'wind_dir', 'hm',
81
+ 'vap_pressure', 'dewpoint_C', 'loc_pressure', 'sea_pressure',
82
+ 'solarRad', 'snow_cm', 'cloudcover', 'lm_cloudcover', 'low_cloudbase',
83
+ 'groundtemp', 'O3', 'NO2', 'PM10', 'PM25', 'year',
84
+ 'month', 'hour', 'ground_temp - temp_C', 'hour_sin', 'hour_cos',
85
+ 'month_sin', 'month_cos','multi_class']].copy()
86
+ return df
87
+
88
+
89
+ # 데이터셋 μ€€λΉ„ ν•¨μˆ˜
90
+ def prepare_dataset(region, data_sample='pure', target='multi', fold=3):
91
+ # 파일 μœ„μΉ˜ 기반으둜 데이터 디렉토리 경둜 μ„€μ •
92
+ current_file_dir = os.path.dirname(os.path.abspath(__file__))
93
+ data_base_dir = os.path.abspath(os.path.join(current_file_dir, '../../../data'))
94
+
95
+ # 데이터 경둜 μ§€μ •
96
+ dat_path = os.path.join(data_base_dir, f"data_for_modeling/{region}_train.csv")
97
+ if data_sample == 'pure':
98
+ train_path = dat_path
99
+ else:
100
+ train_path = os.path.join(data_base_dir, f'data_oversampled/{data_sample}/{data_sample}_{fold}_{region}.csv')
101
+ test_path = os.path.join(data_base_dir, f"data_for_modeling/{region}_test.csv")
102
+ drop_col = ['multi_class','year']
103
+ target_col = f'{target}_class'
104
+
105
+ # 데이터 λ‘œλ“œ
106
+ region_dat = preprocessing(pd.read_csv(dat_path, index_col=0))
107
+ if data_sample == 'pure':
108
+ region_train = region_dat.loc[~region_dat['year'].isin([2021-fold]), :]
109
+ else:
110
+ region_train = preprocessing(pd.read_csv(train_path))
111
+ region_val = region_dat.loc[region_dat['year'].isin([2021-fold]), :]
112
+ region_test = preprocessing(pd.read_csv(test_path))
113
+
114
+ # 컬럼 μ •λ ¬ (일관성 μœ μ§€)
115
+ common_columns = region_train.columns.to_list()
116
+ train_data = region_train[common_columns]
117
+ val_data = region_val[common_columns]
118
+ test_data = region_test[common_columns]
119
+
120
+ # μ„€λͺ…λ³€μˆ˜ & νƒ€κ²Ÿ 뢄리
121
+ X_train = train_data.drop(columns=drop_col)
122
+ y_train = train_data[target_col]
123
+ X_val = val_data.drop(columns=drop_col)
124
+ y_val = val_data[target_col]
125
+ X_test = test_data.drop(columns=drop_col)
126
+ y_test = test_data[target_col]
127
+
128
+ # λ²”μ£Όν˜• & μ—°μ†ν˜• λ³€μˆ˜ 뢄리
129
+ categorical_cols = X_train.select_dtypes(include=['object', 'category', 'int64']).columns
130
+ numerical_cols = X_train.select_dtypes(include=['float64']).columns
131
+
132
+ # λ²”μ£Όν˜• λ³€μˆ˜ Label Encoding
133
+ label_encoders = {}
134
+ for col in categorical_cols:
135
+ le = LabelEncoder()
136
+ le.fit(X_train[col]) # Train 데이터 κΈ°μ€€μœΌλ‘œ ν•™μŠ΅
137
+ label_encoders[col] = le
138
+
139
+ # λ³€ν™˜ 적용
140
+ for col in categorical_cols:
141
+ X_train[col] = label_encoders[col].transform(X_train[col])
142
+ X_val[col] = label_encoders[col].transform(X_val[col])
143
+ X_test[col] = label_encoders[col].transform(X_test[col])
144
+
145
+ # μ—°μ†ν˜• λ³€μˆ˜ Quantile Transformation
146
+ scaler = QuantileTransformer(output_distribution='normal')
147
+ scaler.fit(X_train[numerical_cols]) # Train 데이터 κΈ°μ€€μœΌλ‘œ ν•™μŠ΅
148
+
149
+ # λ³€ν™˜ 적용
150
+ X_train[numerical_cols] = scaler.transform(X_train[numerical_cols])
151
+ X_val[numerical_cols] = scaler.transform(X_val[numerical_cols])
152
+ X_test[numerical_cols] = scaler.transform(X_test[numerical_cols])
153
+
154
+ return X_train, X_val, X_test, y_train, y_val, y_test, categorical_cols, numerical_cols
155
+
156
+
157
+
158
+ # 데이터 λ³€ν™˜ 및 dataloader 생성 ν•¨μˆ˜
159
+ def prepare_dataloader(region, data_sample='pure', target='multi', fold=3, random_state=None):
160
+ # 파일 μœ„μΉ˜ 기반으둜 데이터 디렉토리 경둜 μ„€μ •
161
+ current_file_dir = os.path.dirname(os.path.abspath(__file__))
162
+ data_base_dir = os.path.abspath(os.path.join(current_file_dir, '../../../data'))
163
+
164
+ # 데이터 경둜 μ§€μ •
165
+ dat_path = os.path.join(data_base_dir, f"data_for_modeling/{region}_train.csv")
166
+ if data_sample == 'pure':
167
+ train_path = dat_path
168
+ else:
169
+ train_path = os.path.join(data_base_dir, f'data_oversampled/{data_sample}/{data_sample}_{fold}_{region}.csv')
170
+ test_path = os.path.join(data_base_dir, f"data_for_modeling/{region}_test.csv")
171
+ drop_col = ['multi_class','year']
172
+ target_col = f'{target}_class'
173
+
174
+ # 데이터 λ‘œλ“œ
175
+ region_dat = preprocessing(pd.read_csv(dat_path, index_col=0))
176
+ if data_sample == 'pure':
177
+ region_train = region_dat.loc[~region_dat['year'].isin([2021-fold]), :]
178
+ else:
179
+ region_train = preprocessing(pd.read_csv(train_path))
180
+ region_val = region_dat.loc[region_dat['year'].isin([2021-fold]), :]
181
+ region_test = preprocessing(pd.read_csv(test_path))
182
+
183
+ # 컬럼 μ •λ ¬ (일관성 μœ μ§€)
184
+ common_columns = region_train.columns.to_list()
185
+ train_data = region_train[common_columns]
186
+ val_data = region_val[common_columns]
187
+ test_data = region_test[common_columns]
188
+
189
+ # μ„€λͺ…λ³€μˆ˜ & νƒ€κ²Ÿ 뢄리
190
+ X_train = train_data.drop(columns=drop_col)
191
+ y_train = train_data[target_col]
192
+ X_val = val_data.drop(columns=drop_col)
193
+ y_val = val_data[target_col]
194
+ X_test = test_data.drop(columns=drop_col)
195
+ y_test = test_data[target_col]
196
+
197
+ # λ²”μ£Όν˜• & μ—°μ†ν˜• λ³€μˆ˜ 뢄리
198
+ categorical_cols = X_train.select_dtypes(include=['object', 'category', 'int64']).columns
199
+ numerical_cols = X_train.select_dtypes(include=['float64']).columns
200
+
201
+ # λ²”μ£Όν˜• λ³€μˆ˜ Label Encoding
202
+ label_encoders = {}
203
+ for col in categorical_cols:
204
+ le = LabelEncoder()
205
+ le.fit(X_train[col]) # Train 데이터 κΈ°μ€€μœΌλ‘œ ν•™μŠ΅
206
+ label_encoders[col] = le
207
+
208
+ # λ³€ν™˜ 적용
209
+ for col in categorical_cols:
210
+ X_train[col] = label_encoders[col].transform(X_train[col])
211
+ X_val[col] = label_encoders[col].transform(X_val[col])
212
+ X_test[col] = label_encoders[col].transform(X_test[col])
213
+
214
+ # μ—°μ†ν˜• λ³€μˆ˜ Quantile Transformation
215
+ scaler = QuantileTransformer(output_distribution='normal')
216
+ scaler.fit(X_train[numerical_cols]) # Train 데이터 κΈ°μ€€μœΌλ‘œ ν•™μŠ΅
217
+
218
+ # λ³€ν™˜ 적용
219
+ X_train[numerical_cols] = scaler.transform(X_train[numerical_cols])
220
+ X_val[numerical_cols] = scaler.transform(X_val[numerical_cols])
221
+ X_test[numerical_cols] = scaler.transform(X_test[numerical_cols])
222
+
223
+ # μ—°μ†ν˜• λ³€μˆ˜μ™€ λ²”μ£Όν˜• λ³€μˆ˜ 뢄리
224
+ X_train_num = torch.tensor(X_train[numerical_cols].values, dtype=torch.float32)
225
+ X_train_cat = torch.tensor(X_train[categorical_cols].values, dtype=torch.long)
226
+
227
+ X_val_num = torch.tensor(X_val[numerical_cols].values, dtype=torch.float32)
228
+ X_val_cat = torch.tensor(X_val[categorical_cols].values, dtype=torch.long)
229
+
230
+ X_test_num = torch.tensor(X_test[numerical_cols].values, dtype=torch.float32)
231
+ X_test_cat = torch.tensor(X_test[categorical_cols].values, dtype=torch.long)
232
+
233
+ # λ ˆμ΄λΈ” λ³€ν™˜
234
+ if target == "binary":
235
+ y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32) # 이진 λΆ„λ₯˜ β†’ float32
236
+ y_val_tensor = torch.tensor(y_val.values, dtype=torch.float32)
237
+ y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32)
238
+ elif target == "multi":
239
+ y_train_tensor = torch.tensor(y_train.values, dtype=torch.long) # 닀쀑 λΆ„λ₯˜ β†’ long
240
+ y_val_tensor = torch.tensor(y_val.values, dtype=torch.long)
241
+ y_test_tensor = torch.tensor(y_test.values, dtype=torch.long)
242
+ else:
243
+ raise ValueError("target must be 'binary' or 'multi'")
244
+
245
+ # TensorDataset 생성
246
+ train_dataset = TensorDataset(X_train_num, X_train_cat, y_train_tensor)
247
+ val_dataset = TensorDataset(X_val_num, X_val_cat, y_val_tensor)
248
+ test_dataset = TensorDataset(X_test_num, X_test_cat, y_test_tensor)
249
+
250
+ # DataLoader 생성
251
+ if random_state == None:
252
+ train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
253
+ else:
254
+ train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, generator=torch.Generator().manual_seed(random_state))
255
+ val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)
256
+ test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)
257
+
258
+ return X_train, categorical_cols, numerical_cols, train_loader, val_loader, test_loader
259
+
260
+ # 데이터 λ³€ν™˜ 및 dataloader 생성 ν•¨μˆ˜ (batch_size νŒŒλΌλ―Έν„° μΆ”κ°€ 버전)
261
+ def prepare_dataloader_with_batchsize(region, data_sample='pure', target='multi', fold=3, random_state=None, batch_size=64):
262
+ # 파일 μœ„μΉ˜ 기반으둜 데이터 디렉토리 경둜 μ„€μ •
263
+ current_file_dir = os.path.dirname(os.path.abspath(__file__))
264
+ data_base_dir = os.path.abspath(os.path.join(current_file_dir, '../../../data'))
265
+
266
+ # 데이터 경둜 μ§€μ •
267
+ dat_path = os.path.join(data_base_dir, f"data_for_modeling/{region}_train.csv")
268
+ if data_sample == 'pure':
269
+ train_path = dat_path
270
+ else:
271
+ train_path = os.path.join(data_base_dir, f'data_oversampled/{data_sample}/{data_sample}_{fold}_{region}.csv')
272
+ test_path = os.path.join(data_base_dir, f"data_for_modeling/{region}_test.csv")
273
+ drop_col = ['multi_class','year']
274
+ target_col = f'{target}_class'
275
+
276
+ # 데이터 λ‘œλ“œ
277
+ region_dat = preprocessing(pd.read_csv(dat_path, index_col=0))
278
+ if data_sample == 'pure':
279
+ region_train = region_dat.loc[~region_dat['year'].isin([2021-fold]), :]
280
+ else:
281
+ region_train = preprocessing(pd.read_csv(train_path))
282
+ region_val = region_dat.loc[region_dat['year'].isin([2021-fold]), :]
283
+ region_test = preprocessing(pd.read_csv(test_path))
284
+
285
+ # 컬럼 μ •λ ¬ (일관성 μœ μ§€)
286
+ common_columns = region_train.columns.to_list()
287
+ train_data = region_train[common_columns]
288
+ val_data = region_val[common_columns]
289
+ test_data = region_test[common_columns]
290
+
291
+ # μ„€λͺ…λ³€μˆ˜ & νƒ€κ²Ÿ 뢄리
292
+ X_train = train_data.drop(columns=drop_col)
293
+ y_train = train_data[target_col]
294
+ X_val = val_data.drop(columns=drop_col)
295
+ y_val = val_data[target_col]
296
+ X_test = test_data.drop(columns=drop_col)
297
+ y_test = test_data[target_col]
298
+
299
+ # λ²”μ£Όν˜• & μ—°μ†ν˜• λ³€μˆ˜ 뢄리
300
+ categorical_cols = X_train.select_dtypes(include=['object', 'category', 'int64']).columns
301
+ numerical_cols = X_train.select_dtypes(include=['float64']).columns
302
+
303
+ # λ²”μ£Όν˜• λ³€μˆ˜ Label Encoding
304
+ label_encoders = {}
305
+ for col in categorical_cols:
306
+ le = LabelEncoder()
307
+ le.fit(X_train[col]) # Train 데이터 κΈ°μ€€μœΌλ‘œ ν•™μŠ΅
308
+ label_encoders[col] = le
309
+
310
+ # λ³€ν™˜ 적용
311
+ for col in categorical_cols:
312
+ X_train[col] = label_encoders[col].transform(X_train[col])
313
+ X_val[col] = label_encoders[col].transform(X_val[col])
314
+ X_test[col] = label_encoders[col].transform(X_test[col])
315
+
316
+ # μ—°μ†ν˜• λ³€μˆ˜ Quantile Transformation
317
+ scaler = QuantileTransformer(output_distribution='normal')
318
+ scaler.fit(X_train[numerical_cols]) # Train 데이터 κΈ°μ€€μœΌλ‘œ ν•™μŠ΅
319
+
320
+ # λ³€ν™˜ 적용
321
+ X_train[numerical_cols] = scaler.transform(X_train[numerical_cols])
322
+ X_val[numerical_cols] = scaler.transform(X_val[numerical_cols])
323
+ X_test[numerical_cols] = scaler.transform(X_test[numerical_cols])
324
+
325
+ # μ—°μ†ν˜• λ³€μˆ˜μ™€ λ²”μ£Όν˜• λ³€μˆ˜ 뢄리
326
+ X_train_num = torch.tensor(X_train[numerical_cols].values, dtype=torch.float32)
327
+ X_train_cat = torch.tensor(X_train[categorical_cols].values, dtype=torch.long)
328
+
329
+ X_val_num = torch.tensor(X_val[numerical_cols].values, dtype=torch.float32)
330
+ X_val_cat = torch.tensor(X_val[categorical_cols].values, dtype=torch.long)
331
+
332
+ X_test_num = torch.tensor(X_test[numerical_cols].values, dtype=torch.float32)
333
+ X_test_cat = torch.tensor(X_test[categorical_cols].values, dtype=torch.long)
334
+
335
+ # λ ˆμ΄λΈ” λ³€ν™˜
336
+ if target == "binary":
337
+ y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32) # 이진 λΆ„λ₯˜ β†’ float32
338
+ y_val_tensor = torch.tensor(y_val.values, dtype=torch.float32)
339
+ y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32)
340
+ elif target == "multi":
341
+ y_train_tensor = torch.tensor(y_train.values, dtype=torch.long) # 닀쀑 λΆ„λ₯˜ β†’ long
342
+ y_val_tensor = torch.tensor(y_val.values, dtype=torch.long)
343
+ y_test_tensor = torch.tensor(y_test.values, dtype=torch.long)
344
+ else:
345
+ raise ValueError("target must be 'binary' or 'multi'")
346
+
347
+ # TensorDataset 생성
348
+ train_dataset = TensorDataset(X_train_num, X_train_cat, y_train_tensor)
349
+ val_dataset = TensorDataset(X_val_num, X_val_cat, y_val_tensor)
350
+ test_dataset = TensorDataset(X_test_num, X_test_cat, y_test_tensor)
351
+
352
+ # DataLoader 생성 (batch_size νŒŒλΌλ―Έν„° μ‚¬μš©)
353
+ if random_state == None:
354
+ train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
355
+ else:
356
+ train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, generator=torch.Generator().manual_seed(random_state))
357
+ val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
358
+ test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
359
+
360
+ return X_train, categorical_cols, numerical_cols, train_loader, val_loader, test_loader, y_train, scaler
361
+
362
+
363
+ def calculate_csi(y_true, pred):
364
+
365
+ cm = confusion_matrix(y_true, pred) # λ³€μˆ˜ 이름을 cm으둜 λ³€κ²½
366
+ # ν˜Όλ™ ν–‰λ ¬μ—μ„œ H, F, M μΆ”μΆœ
367
+ H = (cm[0, 0] + cm[1, 1])
368
+
369
+ F = (cm[1, 0] + cm[2, 0] +
370
+ cm[0, 1] + cm[2, 1])
371
+
372
+ M = (cm[0, 2] + cm[1, 2])
373
+
374
+ # CSI 계산
375
+ CSI = H / (H + F + M + 1e-10)
376
+ return CSI
377
+
378
+ def sample_weight(y_train):
379
+ class_weights = compute_class_weight(
380
+ class_weight='balanced',
381
+ classes=np.unique(y_train), # 고유 클래슀
382
+ y=y_train # ν•™μŠ΅ 데이터 λ ˆμ΄λΈ”
383
+ )
384
+ sample_weights = np.array([class_weights[label] for label in y_train])
385
+
386
+ return sample_weights
387
+
388
+ # ν•˜μ΄νΌνŒŒλΌλ―Έν„° μ΅œμ ν™” ν•¨μˆ˜ μ •μ˜
389
+ def objective(trial, model_choose, region, data_sample='pure', target='multi', n_folds=3, random_state=42):
390
+ # GPU μ‚¬μš© κ°€λŠ₯ μ—¬λΆ€ 확인 및 device μ„€μ •
391
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
392
+ val_scores = []
393
+
394
+ # --- 1. ν•˜μ΄νΌνŒŒλΌλ―Έν„° 탐색 λ²”μœ„ μ •μ˜ (μˆ˜μ •λ¨) ---
395
+ if model_choose == "ft_transformer":
396
+ d_token = trial.suggest_int("d_token", 64, 256, step=32)
397
+ n_blocks = trial.suggest_int("n_blocks", 2, 6) # 깊이 μΆ•μ†Œλ‘œ 과적합 λ°©μ§€
398
+ n_heads = trial.suggest_categorical("n_heads", [4, 8])
399
+ # d_token은 n_heads의 λ°°μˆ˜μ—¬μ•Ό 함 (FT-Transformer의 ꡬ쑰적 μ œμ•½ λŒ€μ‘)
400
+ if d_token % n_heads != 0:
401
+ d_token = (d_token // n_heads) * n_heads
402
+
403
+ attention_dropout = trial.suggest_float("attention_dropout", 0.1, 0.4)
404
+ ffn_dropout = trial.suggest_float("ffn_dropout", 0.1, 0.4)
405
+ lr = trial.suggest_float("lr", 1e-5, 1e-2, log=True) # λ²”μœ„ ν™•λŒ€
406
+ weight_decay = trial.suggest_float("weight_decay", 1e-4, 1e-1, log=True) # 더 곡격적인 λ²”μœ„λ‘œ ν™•μž₯
407
+ batch_size = trial.suggest_categorical("batch_size", [32, 64, 128, 256]) # Batch Size μΆ”κ°€
408
+
409
+ elif model_choose == 'resnet_like':
410
+ d_main = trial.suggest_int("d_main", 64, 256, step=32)
411
+ d_hidden = trial.suggest_int("d_hidden", 64, 512, step=64)
412
+ n_blocks = trial.suggest_int("n_blocks", 2, 5) # λ„ˆλ¬΄ κΉŠμ§€ μ•Šκ²Œ 쑰절
413
+ dropout_first = trial.suggest_float("dropout_first", 0.1, 0.4)
414
+ dropout_second = trial.suggest_float("dropout_second", 0.0, 0.2)
415
+ lr = trial.suggest_float("lr", 1e-5, 1e-2, log=True)
416
+ weight_decay = trial.suggest_float("weight_decay", 1e-4, 1e-1, log=True) # 더 곡격적인 λ²”μœ„λ‘œ ν™•μž₯
417
+ batch_size = trial.suggest_categorical("batch_size", [32, 64, 128, 256]) # Batch Size μΆ”κ°€
418
+
419
+ elif model_choose == 'deepgbm':
420
+ # DeepGBM의 경우 λͺ¨λΈ νŠΉμ„±μ— 맞좰 ResNet 블둝 및 μž„λ² λ”© 차원 쑰절
421
+ d_main = trial.suggest_int("d_main", 64, 256, step=32)
422
+ d_hidden = trial.suggest_int("d_hidden", 64, 256, step=64)
423
+ n_blocks = trial.suggest_int("n_blocks", 2, 6)
424
+ dropout = trial.suggest_float("dropout", 0.1, 0.4)
425
+ lr = trial.suggest_float("lr", 1e-5, 1e-2, log=True)
426
+ weight_decay = trial.suggest_float("weight_decay", 1e-4, 1e-1, log=True) # 더 곡격적인 λ²”μœ„λ‘œ ν™•μž₯
427
+ batch_size = trial.suggest_categorical("batch_size", [32, 64, 128, 256]) # Batch Size μΆ”κ°€
428
+
429
+ # --- 2. Fold별 ν•™μŠ΅ 및 ꡐ차 검증 ---
430
+ for fold in range(1, n_folds + 1):
431
+ X_train_df, categorical_cols, numerical_cols, train_loader, val_loader, _, y_train, _ = prepare_dataloader_with_batchsize(
432
+ region, data_sample=data_sample, target=target, fold=fold, random_state=random_state, batch_size=batch_size
433
+ )
434
+
435
+ # λͺ¨λΈ μ΄ˆκΈ°ν™”
436
+ if model_choose == "ft_transformer":
437
+ model = FTTransformer(
438
+ num_features=len(numerical_cols),
439
+ cat_cardinalities=[len(X_train_df[col].unique()) for col in categorical_cols],
440
+ d_token=d_token,
441
+ n_blocks=n_blocks,
442
+ n_heads=n_heads,
443
+ attention_dropout=attention_dropout,
444
+ ffn_dropout=ffn_dropout,
445
+ num_classes=3
446
+ ).to(device)
447
+ elif model_choose == 'resnet_like':
448
+ input_dim = len(numerical_cols) + len(categorical_cols)
449
+ model = ResNetLike(
450
+ input_dim=input_dim,
451
+ d_main=d_main,
452
+ d_hidden=d_hidden,
453
+ n_blocks=n_blocks,
454
+ dropout_first=dropout_first,
455
+ dropout_second=dropout_second,
456
+ num_classes=3
457
+ ).to(device)
458
+ elif model_choose == 'deepgbm':
459
+ model = DeepGBM(
460
+ num_features=len(numerical_cols),
461
+ cat_features=[len(X_train_df[col].unique()) for col in categorical_cols],
462
+ d_main=d_main,
463
+ d_hidden=d_hidden,
464
+ n_blocks=n_blocks,
465
+ dropout=dropout,
466
+ num_classes=3
467
+ ).to(device)
468
+
469
+ # 클래슀 κ°€μ€‘μΉ˜ 계산 및 손싀 ν•¨μˆ˜ μ„€μ • (Label Smoothing 적용)
470
+ if target == 'multi':
471
+ class_weights = compute_class_weight(
472
+ class_weight='balanced',
473
+ classes=np.unique(y_train),
474
+ y=y_train
475
+ )
476
+ # ν΄λž˜μŠ€λ³„ κ°€μ€‘μΉ˜ 둜그 좜λ ₯
477
+ unique_classes = np.unique(y_train)
478
+ class_counts = {cls: np.sum(y_train == cls) for cls in unique_classes}
479
+ print(f" Fold {fold} - ν΄λž˜μŠ€λ³„ κ°€μ€‘μΉ˜: {dict(zip(unique_classes, class_weights))} (ν΄λž˜μŠ€λ³„ μƒ˜ν”Œ 수: {class_counts})")
480
+ class_weights_tensor = torch.tensor(class_weights, dtype=torch.float32).to(device)
481
+ criterion = nn.CrossEntropyLoss(weight=class_weights_tensor, label_smoothing=0.0) # Label Smoothing μΆ”κ°€
482
+ else:
483
+ criterion = nn.BCEWithLogitsLoss()
484
+ optimizer = optim.AdamW(model.parameters(), lr=lr, weight_decay=weight_decay)
485
+
486
+ # ν•™μŠ΅λ₯  μŠ€μΌ€μ€„λŸ¬ μΆ”κ°€: μ„±λŠ₯ 정체 μ‹œ LR을 0.5λ°° κ°μ†Œ (검증 CSI κΈ°μ€€)
487
+ scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=3)
488
+
489
+ # ν•™μŠ΅ μ„€μ • (에폭 및 νŽ˜μ΄μ…˜μŠ€ 상ν–₯)
490
+ epochs = 200
491
+ patience = 12 # λ”₯λŸ¬λ‹μ˜ 정체 ꡬ간을 κ³ λ €ν•˜μ—¬ μ†Œν­ 상ν–₯
492
+ best_fold_csi = 0
493
+ counter = 0
494
+
495
+ for epoch in range(epochs):
496
+ model.train()
497
+ for x_num_batch, x_cat_batch, y_batch in train_loader:
498
+ x_num_batch, x_cat_batch, y_batch = x_num_batch.to(device), x_cat_batch.to(device), y_batch.to(device)
499
+
500
+ optimizer.zero_grad()
501
+ y_pred = model(x_num_batch, x_cat_batch)
502
+ loss = criterion(y_pred, y_batch if target == 'multi' else y_batch.float())
503
+ loss.backward()
504
+ optimizer.step()
505
+
506
+ # Validation 평가
507
+ model.eval()
508
+ y_pred_val, y_true_val = [], []
509
+ with torch.no_grad():
510
+ for x_num_batch, x_cat_batch, y_batch in val_loader:
511
+ x_num_batch, x_cat_batch, y_batch = x_num_batch.to(device), x_cat_batch.to(device), y_batch.to(device)
512
+ output = model(x_num_batch, x_cat_batch)
513
+ pred = output.argmax(dim=1) if target == 'multi' else (torch.sigmoid(output) >= 0.5).long()
514
+
515
+ y_pred_val.extend(pred.cpu().numpy())
516
+ y_true_val.extend(y_batch.cpu().numpy())
517
+
518
+ # CSI 계산 및 μŠ€μΌ€μ€„λŸ¬ μ—…λ°μ΄νŠΈ
519
+ val_csi = calculate_csi(y_true_val, y_pred_val)
520
+ scheduler.step(val_csi)
521
+
522
+ # Optuna Pruning 적용 (첫 번째 Foldμ—μ„œ μ‘°κΈ° μ’…λ£Œ νŒλ‹¨ κ°•ν™”)
523
+ trial.report(val_csi, epoch)
524
+ if trial.should_prune():
525
+ raise optuna.exceptions.TrialPruned()
526
+
527
+ # Early Stopping 체크
528
+ if val_csi > best_fold_csi:
529
+ best_fold_csi = val_csi
530
+ counter = 0
531
+ else:
532
+ counter += 1
533
+
534
+ if counter >= patience:
535
+ break
536
+
537
+ val_scores.append(best_fold_csi)
538
+
539
+ # λͺ¨λ“  fold의 평균 μ„±λŠ₯ λ°˜ν™˜
540
+ return np.mean(val_scores)
541
+
542
+
543
+ # μ΅œμ ν™”λœ ν•˜μ΄νΌνŒŒλΌλ―Έν„°λ‘œ μ΅œμ’… λͺ¨λΈ ν•™μŠ΅ 및 μ €μž₯ ν•¨μˆ˜
544
+ def train_final_model(best_params, model_choose, region, data_sample='pure', target='multi', n_folds=3, random_state=42):
545
+ """
546
+ μ΅œμ ν™”λœ ν•˜μ΄νΌνŒŒλΌλ―Έν„°λ‘œ μ΅œμ’… λͺ¨λΈμ„ ν•™μŠ΅ν•˜κ³  μ €μž₯ν•©λ‹ˆλ‹€.
547
+
548
+ Args:
549
+ best_params: μ΅œμ ν™”λœ ν•˜μ΄νΌνŒŒλΌλ―Έν„° λ”•μ…”λ„ˆλ¦¬
550
+ model_choose: λͺ¨λΈ 선택 ('ft_transformer', 'resnet_like', 'deepgbm')
551
+ region: μ§€μ—­λͺ…
552
+ data_sample: 데이터 μƒ˜ν”Œ νƒ€μž… ('pure', 'smote', etc.)
553
+ target: νƒ€κ²Ÿ νƒ€μž… ('multi', 'binary')
554
+ n_folds: ꡐ차 검증 fold 수
555
+ random_state: 랜덀 μ‹œλ“œ
556
+
557
+ Returns:
558
+ μ €μž₯된 λͺ¨λΈ 경둜 리슀트
559
+ """
560
+ # GPU μ‚¬μš© κ°€λŠ₯ μ—¬λΆ€ 확인 및 device μ„€μ •
561
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
562
+
563
+ models = []
564
+ scalers = [] # scaler 리슀트 μΆ”κ°€
565
+
566
+ print("μ΅œμ’… λͺ¨λΈ ν•™μŠ΅ μ‹œμž‘...")
567
+
568
+ for fold in range(1, n_folds + 1):
569
+ print(f"Fold {fold} ν•™μŠ΅ 쀑...")
570
+
571
+ # μ΅œμ ν™”λœ batch_size μ‚¬μš©
572
+ batch_size = best_params.get("batch_size", 64)
573
+ X_train_df, categorical_cols, numerical_cols, train_loader, val_loader, _, y_train, scaler = prepare_dataloader_with_batchsize(
574
+ region, data_sample=data_sample, target=target, fold=fold, random_state=random_state, batch_size=batch_size
575
+ )
576
+
577
+ # λͺ¨λΈ μ΄ˆκΈ°ν™”
578
+ if model_choose == "ft_transformer":
579
+ d_token = best_params["d_token"]
580
+ n_heads = best_params.get("n_heads", 8)
581
+ # d_token은 n_heads의 λ°°μˆ˜μ—¬μ•Ό 함 (FT-Transformer의 ꡬ쑰적 μ œμ•½ λŒ€μ‘)
582
+ if d_token % n_heads != 0:
583
+ d_token = (d_token // n_heads) * n_heads
584
+
585
+ model = FTTransformer(
586
+ num_features=len(numerical_cols),
587
+ cat_cardinalities=[len(X_train_df[col].unique()) for col in categorical_cols],
588
+ d_token=d_token,
589
+ n_blocks=best_params["n_blocks"],
590
+ n_heads=n_heads,
591
+ attention_dropout=best_params["attention_dropout"],
592
+ ffn_dropout=best_params["ffn_dropout"],
593
+ num_classes=3
594
+ ).to(device)
595
+ elif model_choose == 'resnet_like':
596
+ input_dim = len(numerical_cols) + len(categorical_cols)
597
+ model = ResNetLike(
598
+ input_dim=input_dim,
599
+ d_main=best_params["d_main"],
600
+ d_hidden=best_params["d_hidden"],
601
+ n_blocks=best_params["n_blocks"],
602
+ dropout_first=best_params["dropout_first"],
603
+ dropout_second=best_params["dropout_second"],
604
+ num_classes=3
605
+ ).to(device)
606
+ elif model_choose == 'deepgbm':
607
+ model = DeepGBM(
608
+ num_features=len(numerical_cols),
609
+ cat_features=[len(X_train_df[col].unique()) for col in categorical_cols],
610
+ d_main=best_params["d_main"],
611
+ d_hidden=best_params["d_hidden"],
612
+ n_blocks=best_params["n_blocks"],
613
+ dropout=best_params["dropout"],
614
+ num_classes=3
615
+ ).to(device)
616
+ else:
617
+ raise ValueError(f"Unknown model_choose: {model_choose}")
618
+
619
+ # 클래슀 κ°€μ€‘μΉ˜ 계산 및 손싀 ν•¨μˆ˜ μ„€μ • (Label Smoothing 적용)
620
+ if target == 'multi':
621
+ class_weights = compute_class_weight(
622
+ class_weight='balanced',
623
+ classes=np.unique(y_train),
624
+ y=y_train
625
+ )
626
+ class_weights_tensor = torch.tensor(class_weights, dtype=torch.float32).to(device)
627
+ criterion = nn.CrossEntropyLoss(weight=class_weights_tensor, label_smoothing=0.0) # Label Smoothing μΆ”κ°€
628
+ else:
629
+ criterion = nn.BCEWithLogitsLoss()
630
+ optimizer = optim.AdamW(model.parameters(), lr=best_params["lr"], weight_decay=best_params["weight_decay"])
631
+
632
+ # ν•™μŠ΅λ₯  μŠ€μΌ€μ€„λŸ¬
633
+ scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=3)
634
+
635
+ # ν•™μŠ΅ μ„€μ •
636
+ epochs = 200
637
+ patience = 12
638
+ best_fold_csi = 0
639
+ counter = 0
640
+ best_model = None
641
+
642
+ for epoch in range(epochs):
643
+ model.train()
644
+ for x_num_batch, x_cat_batch, y_batch in train_loader:
645
+ x_num_batch, x_cat_batch, y_batch = x_num_batch.to(device), x_cat_batch.to(device), y_batch.to(device)
646
+
647
+ optimizer.zero_grad()
648
+ y_pred = model(x_num_batch, x_cat_batch)
649
+ loss = criterion(y_pred, y_batch if target == 'multi' else y_batch.float())
650
+ loss.backward()
651
+ optimizer.step()
652
+
653
+ # Validation 평가
654
+ model.eval()
655
+ y_pred_val, y_true_val = [], []
656
+ with torch.no_grad():
657
+ for x_num_batch, x_cat_batch, y_batch in val_loader:
658
+ x_num_batch, x_cat_batch, y_batch = x_num_batch.to(device), x_cat_batch.to(device), y_batch.to(device)
659
+ output = model(x_num_batch, x_cat_batch)
660
+ pred = output.argmax(dim=1) if target == 'multi' else (torch.sigmoid(output) >= 0.5).long()
661
+
662
+ y_pred_val.extend(pred.cpu().numpy())
663
+ y_true_val.extend(y_batch.cpu().numpy())
664
+
665
+ # CSI 계산 및 μŠ€μΌ€μ€„λŸ¬ μ—…λ°μ΄νŠΈ
666
+ val_csi = calculate_csi(y_true_val, y_pred_val)
667
+ scheduler.step(val_csi)
668
+
669
+ # Early Stopping 체크
670
+ if val_csi > best_fold_csi:
671
+ best_fold_csi = val_csi
672
+ counter = 0
673
+ best_model = copy.deepcopy(model)
674
+ else:
675
+ counter += 1
676
+
677
+ if counter >= patience:
678
+ print(f" Early stopping at epoch {epoch+1}, Best CSI: {best_fold_csi:.4f}")
679
+ break
680
+
681
+ if best_model is None:
682
+ best_model = model
683
+
684
+ scalers.append(scaler) # scaler μ €μž₯ (fold μˆœμ„œλŒ€λ‘œ)
685
+ models.append(best_model)
686
+ print(f" Fold {fold} ν•™μŠ΅ μ™„λ£Œ (검증 CSI: {best_fold_csi:.4f})")
687
+
688
+ # λͺ¨λΈ μ €μž₯ 경둜 μ„€μ •
689
+ save_dir = f'../save_model/{model_choose}_optima'
690
+ os.makedirs(save_dir, exist_ok=True)
691
+
692
+ # 파일λͺ… 생성
693
+ if data_sample == 'pure':
694
+ model_filename = f'{model_choose}_pure_{region}.pkl'
695
+ else:
696
+ model_filename = f'{model_choose}_{data_sample}_{region}.pkl'
697
+
698
+ model_path = f'{save_dir}/{model_filename}'
699
+
700
+ # λ¦¬μŠ€νŠΈμ— λ‹΄μ•„ ν•œ λ²ˆμ— μ €μž₯
701
+ joblib.dump(models, model_path)
702
+ print(f"\nλͺ¨λ“  λͺ¨λΈ μ €μž₯ μ™„λ£Œ: {model_path} (총 {len(models)}개 fold)")
703
+
704
+ # Scaler 별도 μ €μž₯
705
+ scaler_save_dir = f'../save_model/{model_choose}_optima/scaler'
706
+ os.makedirs(scaler_save_dir, exist_ok=True)
707
+
708
+ # 파일λͺ… 생성 (λͺ¨λΈκ³Ό λ™μΌν•œ νŒ¨ν„΄)
709
+ if data_sample == 'pure':
710
+ scaler_filename = f'{model_choose}_pure_{region}_scaler.pkl'
711
+ else:
712
+ scaler_filename = f'{model_choose}_{data_sample}_{region}_scaler.pkl'
713
+
714
+ scaler_path = f'{scaler_save_dir}/{scaler_filename}'
715
+ joblib.dump(scalers, scaler_path)
716
+ print(f"Scaler μ €μž₯ μ™„λ£Œ: {scaler_path} (총 {len(scalers)}개 fold)")
717
+
718
+ return model_path
719
+
Analysis_code/5.optima/run_bash/deepgbm/deepgbm_pure.log ADDED
The diff for this file is too large to render. See raw diff
 
Analysis_code/5.optima/run_bash/deepgbm/deepgbm_smote.log ADDED
The diff for this file is too large to render. See raw diff
 
Analysis_code/5.optima/run_bash/deepgbm/deepgbm_smotenc_ctgan20000.log ADDED
The diff for this file is too large to render. See raw diff
 
Analysis_code/5.optima/run_bash/deepgbm/run_deepgbm_pure.sh CHANGED
@@ -45,7 +45,7 @@ for file in "${FILES[@]}"; do
45
  FILE_START=$(date +%s)
46
 
47
  # Python 슀크립트 μ‹€ν–‰ (GPU 1번 μ„€μ •)
48
- if CUDA_VISIBLE_DEVICES=1 python3 -u "$filepath"; then
49
  FILE_END=$(date +%s)
50
  FILE_DURATION=$((FILE_END - FILE_START))
51
  echo ""
 
45
  FILE_START=$(date +%s)
46
 
47
  # Python 슀크립트 μ‹€ν–‰ (GPU 1번 μ„€μ •)
48
+ if CUDA_VISIBLE_DEVICES=0 python3 -u "$filepath"; then
49
  FILE_END=$(date +%s)
50
  FILE_DURATION=$((FILE_END - FILE_START))
51
  echo ""
Analysis_code/5.optima/run_bash/deepgbm/run_deepgbm_smote.sh CHANGED
@@ -45,7 +45,7 @@ for file in "${FILES[@]}"; do
45
  FILE_START=$(date +%s)
46
 
47
  # Python 슀크립트 μ‹€ν–‰ (GPU 1번 μ„€μ •)
48
- if CUDA_VISIBLE_DEVICES=1 python3 -u "$filepath"; then
49
  FILE_END=$(date +%s)
50
  FILE_DURATION=$((FILE_END - FILE_START))
51
  echo ""
 
45
  FILE_START=$(date +%s)
46
 
47
  # Python 슀크립트 μ‹€ν–‰ (GPU 1번 μ„€μ •)
48
+ if CUDA_VISIBLE_DEVICES=0 python3 -u "$filepath"; then
49
  FILE_END=$(date +%s)
50
  FILE_DURATION=$((FILE_END - FILE_START))
51
  echo ""
Analysis_code/5.optima/run_bash/deepgbm/run_deepgbm_smotenc_ctgan20000.sh CHANGED
@@ -45,7 +45,7 @@ for file in "${FILES[@]}"; do
45
  FILE_START=$(date +%s)
46
 
47
  # Python 슀크립트 μ‹€ν–‰ (GPU 1번 μ„€μ •)
48
- if CUDA_VISIBLE_DEVICES=1 python3 -u "$filepath"; then
49
  FILE_END=$(date +%s)
50
  FILE_DURATION=$((FILE_END - FILE_START))
51
  echo ""
 
45
  FILE_START=$(date +%s)
46
 
47
  # Python 슀크립트 μ‹€ν–‰ (GPU 1번 μ„€μ •)
48
+ if CUDA_VISIBLE_DEVICES=0 python3 -u "$filepath"; then
49
  FILE_END=$(date +%s)
50
  FILE_DURATION=$((FILE_END - FILE_START))
51
  echo ""
Analysis_code/5.optima/run_bash/ft_transformer/ft_transformer_pure.log CHANGED
The diff for this file is too large to render. See raw diff
 
Analysis_code/5.optima/run_bash/ft_transformer/ft_transformer_smote.log CHANGED
The diff for this file is too large to render. See raw diff
 
Analysis_code/5.optima/run_bash/ft_transformer/ft_transformer_smotenc_ctgan20000.log ADDED
The diff for this file is too large to render. See raw diff
 
Analysis_code/5.optima/run_bash/ft_transformer/run_ft_transformer_smotenc_ctgan20000.sh CHANGED
@@ -45,7 +45,7 @@ for file in "${FILES[@]}"; do
45
  FILE_START=$(date +%s)
46
 
47
  # Python 슀크립트 μ‹€ν–‰ (GPU 0번 μ„€μ •)
48
- if CUDA_VISIBLE_DEVICES=1 python3 -u "$filepath"; then
49
  FILE_END=$(date +%s)
50
  FILE_DURATION=$((FILE_END - FILE_START))
51
  echo ""
 
45
  FILE_START=$(date +%s)
46
 
47
  # Python 슀크립트 μ‹€ν–‰ (GPU 0번 μ„€μ •)
48
+ if CUDA_VISIBLE_DEVICES=0 python3 -u "$filepath"; then
49
  FILE_END=$(date +%s)
50
  FILE_DURATION=$((FILE_END - FILE_START))
51
  echo ""
Analysis_code/5.optima/run_bash/resnet_like/resnet_like_pure.log CHANGED
The diff for this file is too large to render. See raw diff
 
Analysis_code/5.optima/run_bash/resnet_like/resnet_like_smote.log CHANGED
The diff for this file is too large to render. See raw diff
 
Analysis_code/5.optima/run_bash/resnet_like/resnet_like_smotenc_ctgan20000.log ADDED
@@ -0,0 +1,343 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ nohup: ignoring input
2
+ /bin/bash: /opt/conda/lib/libtinfo.so.6: no version information available (required by /bin/bash)
3
+ ==========================================
4
+ ResNet-Like SMOTENC CTGAN20000 파일 μ‹€ν–‰ μ‹œμž‘
5
+ μ‹œμž‘ μ‹œκ°„: 2025-12-25 16:59:09
6
+ GPU: 0번 (CUDA_VISIBLE_DEVICES=0)
7
+ ==========================================
8
+
9
+ ----------------------------------------
10
+ μ‹€ν–‰ 쀑: resnet_like_smotenc_ctgan20000/resnet_like_smotenc_ctgan20000_busan.py
11
+ μ‹œμž‘ μ‹œκ°„: 2025-12-25 16:59:09
12
+ ----------------------------------------
13
+ [I 2025-12-25 16:59:11,068] A new study created in memory with name: no-name-07508dc2-d1e4-4e1f-80e4-6cb900bd0bcc
14
+ Fold 1 - ν΄λž˜μŠ€λ³„ κ°€μ€‘μΉ˜: {0: 0.9429593495934959, 1: 0.9205079365079365, 2: 1.1721238580321771} (ν΄λž˜μŠ€λ³„ μƒ˜ν”Œ 수: {0: 20500, 1: 21000, 2: 16492})
15
+ Fold 2 - ν΄λž˜μŠ€λ³„ κ°€μ€‘μΉ˜: {0: 0.9440162601626017, 1: 0.917172195892575, 2: 1.1759332401612284} (ν΄λž˜μŠ€λ³„ μƒ˜ν”Œ 수: {0: 20500, 1: 21100, 2: 16457})
16
+ Fold 3 - ν΄λž˜μŠ€λ³„ κ°€μ€‘μΉ˜: {0: 0.9437235772357724, 1: 0.9168878357030016, 2: 1.1768558509236167} (ν΄λž˜μŠ€λ³„ μƒ˜ν”Œ 수: {0: 20500, 1: 21100, 2: 16439})
17
+ [I 2025-12-25 17:00:25,723] Trial 0 finished with value: 0.4301307659086353 and parameters: {'d_main': 224, 'd_hidden': 256, 'n_blocks': 2, 'dropout_first': 0.10156371676747883, 'dropout_second': 0.022497091377821434, 'lr': 0.003727359142958118, 'weight_decay': 0.00038742389242139776, 'batch_size': 128}. Best is trial 0 with value: 0.4301307659086353.
18
+
19
+ ================================================================================
20
+ Trial 0 μ™„λ£Œ
21
+ Value (CSI): 0.430131
22
+ Parameters: {'d_main': 224, 'd_hidden': 256, 'n_blocks': 2, 'dropout_first': 0.10156371676747883, 'dropout_second': 0.022497091377821434, 'lr': 0.003727359142958118, 'weight_decay': 0.00038742389242139776, 'batch_size': 128}
23
+ Best Value (CSI): 0.430131
24
+ Best Trial: 0
25
+ Best Parameters: {'d_main': 224, 'd_hidden': 256, 'n_blocks': 2, 'dropout_first': 0.10156371676747883, 'dropout_second': 0.022497091377821434, 'lr': 0.003727359142958118, 'weight_decay': 0.00038742389242139776, 'batch_size': 128}
26
+ ================================================================================
27
+
28
+ Fold 1 - ν΄λž˜μŠ€λ³„ κ°€μ€‘μΉ˜: {0: 0.9429593495934959, 1: 0.9205079365079365, 2: 1.1721238580321771} (ν΄λž˜μŠ€λ³„ μƒ˜ν”Œ 수: {0: 20500, 1: 21000, 2: 16492})
29
+ Fold 2 - ν΄λž˜μŠ€λ³„ κ°€μ€‘μΉ˜: {0: 0.9440162601626017, 1: 0.917172195892575, 2: 1.1759332401612284} (ν΄λž˜μŠ€λ³„ μƒ˜ν”Œ 수: {0: 20500, 1: 21100, 2: 16457})
30
+ Fold 3 - ν΄λž˜μŠ€λ³„ κ°€μ€‘μΉ˜: {0: 0.9437235772357724, 1: 0.9168878357030016, 2: 1.1768558509236167} (ν΄λž˜μŠ€λ³„ μƒ˜ν”Œ 수: {0: 20500, 1: 21100, 2: 16439})
31
+ [I 2025-12-25 17:09:00,346] Trial 1 finished with value: 0.34866160612685354 and parameters: {'d_main': 192, 'd_hidden': 320, 'n_blocks': 4, 'dropout_first': 0.27116449295886247, 'dropout_second': 0.012755481146547676, 'lr': 1.6105326621013827e-05, 'weight_decay': 0.09159870455148564, 'batch_size': 32}. Best is trial 0 with value: 0.4301307659086353.
32
+
33
+ ================================================================================
34
+ Trial 1 μ™„λ£Œ
35
+ Value (CSI): 0.348662
36
+ Parameters: {'d_main': 192, 'd_hidden': 320, 'n_blocks': 4, 'dropout_first': 0.27116449295886247, 'dropout_second': 0.012755481146547676, 'lr': 1.6105326621013827e-05, 'weight_decay': 0.09159870455148564, 'batch_size': 32}
37
+ Best Value (CSI): 0.430131
38
+ Best Trial: 0
39
+ Best Parameters: {'d_main': 224, 'd_hidden': 256, 'n_blocks': 2, 'dropout_first': 0.10156371676747883, 'dropout_second': 0.022497091377821434, 'lr': 0.003727359142958118, 'weight_decay': 0.00038742389242139776, 'batch_size': 128}
40
+ ================================================================================
41
+
42
+ Fold 1 - ν΄λž˜μŠ€λ³„ κ°€μ€‘μΉ˜: {0: 0.9429593495934959, 1: 0.9205079365079365, 2: 1.1721238580321771} (ν΄λž˜μŠ€λ³„ μƒ˜ν”Œ 수: {0: 20500, 1: 21000, 2: 16492})
43
+ Fold 2 - ν΄λž˜μŠ€λ³„ κ°€μ€‘μΉ˜: {0: 0.9440162601626017, 1: 0.917172195892575, 2: 1.1759332401612284} (ν΄λž˜μŠ€λ³„ μƒ˜ν”Œ 수: {0: 20500, 1: 21100, 2: 16457})
44
+ Fold 3 - ν΄λž˜μŠ€λ³„ κ°€μ€‘μΉ˜: {0: 0.9437235772357724, 1: 0.9168878357030016, 2: 1.1768558509236167} (ν΄λž˜μŠ€λ³„ μƒ˜ν”Œ 수: {0: 20500, 1: 21100, 2: 16439})
45
+ [I 2025-12-25 17:10:23,826] Trial 2 finished with value: 0.4298469011463266 and parameters: {'d_main': 160, 'd_hidden': 64, 'n_blocks': 5, 'dropout_first': 0.2779266822607858, 'dropout_second': 0.08398746485299519, 'lr': 0.009843086320286042, 'weight_decay': 0.00023371434986427377, 'batch_size': 256}. Best is trial 0 with value: 0.4301307659086353.
46
+
47
+ ================================================================================
48
+ Trial 2 μ™„λ£Œ
49
+ Value (CSI): 0.429847
50
+ Parameters: {'d_main': 160, 'd_hidden': 64, 'n_blocks': 5, 'dropout_first': 0.2779266822607858, 'dropout_second': 0.08398746485299519, 'lr': 0.009843086320286042, 'weight_decay': 0.00023371434986427377, 'batch_size': 256}
51
+ Best Value (CSI): 0.430131
52
+ Best Trial: 0
53
+ Best Parameters: {'d_main': 224, 'd_hidden': 256, 'n_blocks': 2, 'dropout_first': 0.10156371676747883, 'dropout_second': 0.022497091377821434, 'lr': 0.003727359142958118, 'weight_decay': 0.00038742389242139776, 'batch_size': 128}
54
+ ================================================================================
55
+
56
+ Fold 1 - ν΄λž˜μŠ€λ³„ κ°€μ€‘μΉ˜: {0: 0.9429593495934959, 1: 0.9205079365079365, 2: 1.1721238580321771} (ν΄λž˜μŠ€λ³„ μƒ˜ν”Œ 수: {0: 20500, 1: 21000, 2: 16492})
57
+ Fold 2 - ν΄λž˜μŠ€λ³„ κ°€μ€‘μΉ˜: {0: 0.9440162601626017, 1: 0.917172195892575, 2: 1.1759332401612284} (ν΄λž˜μŠ€λ³„ μƒ˜ν”Œ 수: {0: 20500, 1: 21100, 2: 16457})
58
+ Fold 3 - ν΄λž˜μŠ€λ³„ κ°€μ€‘μΉ˜: {0: 0.9437235772357724, 1: 0.9168878357030016, 2: 1.1768558509236167} (ν΄λž˜μŠ€λ³„ μƒ˜ν”Œ 수: {0: 20500, 1: 21100, 2: 16439})
59
+ [I 2025-12-25 17:14:03,913] Trial 3 finished with value: 0.3845075126216187 and parameters: {'d_main': 64, 'd_hidden': 384, 'n_blocks': 2, 'dropout_first': 0.2753804618350803, 'dropout_second': 0.024769672838715607, 'lr': 0.00044363391353525626, 'weight_decay': 0.0002347247602245694, 'batch_size': 32}. Best is trial 0 with value: 0.4301307659086353.
60
+
61
+ ================================================================================
62
+ Trial 3 μ™„λ£Œ
63
+ Value (CSI): 0.384508
64
+ Parameters: {'d_main': 64, 'd_hidden': 384, 'n_blocks': 2, 'dropout_first': 0.2753804618350803, 'dropout_second': 0.024769672838715607, 'lr': 0.00044363391353525626, 'weight_decay': 0.0002347247602245694, 'batch_size': 32}
65
+ Best Value (CSI): 0.430131
66
+ Best Trial: 0
67
+ Best Parameters: {'d_main': 224, 'd_hidden': 256, 'n_blocks': 2, 'dropout_first': 0.10156371676747883, 'dropout_second': 0.022497091377821434, 'lr': 0.003727359142958118, 'weight_decay': 0.00038742389242139776, 'batch_size': 128}
68
+ ================================================================================
69
+
70
+ Fold 1 - ν΄λž˜μŠ€λ³„ κ°€μ€‘μΉ˜: {0: 0.9429593495934959, 1: 0.9205079365079365, 2: 1.1721238580321771} (ν΄λž˜μŠ€λ³„ μƒ˜ν”Œ 수: {0: 20500, 1: 21000, 2: 16492})
71
+ Fold 2 - ν΄λž˜μŠ€λ³„ κ°€μ€‘μΉ˜: {0: 0.9440162601626017, 1: 0.917172195892575, 2: 1.1759332401612284} (ν΄λž˜μŠ€λ³„ μƒ˜ν”Œ 수: {0: 20500, 1: 21100, 2: 16457})
72
+ Fold 3 - ν΄λž˜μŠ€λ³„ κ°€μ€‘μΉ˜: {0: 0.9437235772357724, 1: 0.9168878357030016, 2: 1.1768558509236167} (ν΄λž˜μŠ€λ³„ μƒ˜ν”Œ 수: {0: 20500, 1: 21100, 2: 16439})
73
+ [I 2025-12-25 17:21:11,653] Trial 4 finished with value: 0.3309491353413913 and parameters: {'d_main': 256, 'd_hidden': 384, 'n_blocks': 4, 'dropout_first': 0.28310034806911577, 'dropout_second': 0.006932423729906057, 'lr': 1.136506228965239e-05, 'weight_decay': 0.0036183895571627513, 'batch_size': 32}. Best is trial 0 with value: 0.4301307659086353.
74
+
75
+ ================================================================================
76
+ Trial 4 μ™„λ£Œ
77
+ Value (CSI): 0.330949
78
+ Parameters: {'d_main': 256, 'd_hidden': 384, 'n_blocks': 4, 'dropout_first': 0.28310034806911577, 'dropout_second': 0.006932423729906057, 'lr': 1.136506228965239e-05, 'weight_decay': 0.0036183895571627513, 'batch_size': 32}
79
+ Best Value (CSI): 0.430131
80
+ Best Trial: 0
81
+ Best Parameters: {'d_main': 224, 'd_hidden': 256, 'n_blocks': 2, 'dropout_first': 0.10156371676747883, 'dropout_second': 0.022497091377821434, 'lr': 0.003727359142958118, 'weight_decay': 0.00038742389242139776, 'batch_size': 128}
82
+ ================================================================================
83
+
84
+ Fold 1 - ν΄λž˜μŠ€λ³„ κ°€μ€‘μΉ˜: {0: 0.9429593495934959, 1: 0.9205079365079365, 2: 1.1721238580321771} (ν΄λž˜μŠ€λ³„ μƒ˜ν”Œ 수: {0: 20500, 1: 21000, 2: 16492})
85
+ Fold 2 - ν΄λž˜μŠ€λ³„ κ°€μ€‘μΉ˜: {0: 0.9440162601626017, 1: 0.917172195892575, 2: 1.1759332401612284} (ν΄λž˜μŠ€λ³„ μƒ˜ν”Œ 수: {0: 20500, 1: 21100, 2: 16457})
86
+ Fold 3 - ν΄λž˜μŠ€λ³„ κ°€μ€‘μΉ˜: {0: 0.9437235772357724, 1: 0.9168878357030016, 2: 1.1768558509236167} (ν΄λž˜μŠ€λ³„ μƒ˜ν”Œ 수: {0: 20500, 1: 21100, 2: 16439})
87
+ [I 2025-12-25 17:22:14,266] Trial 5 finished with value: 0.3899932374661734 and parameters: {'d_main': 128, 'd_hidden': 384, 'n_blocks': 2, 'dropout_first': 0.2844695689401958, 'dropout_second': 0.1572561815094476, 'lr': 0.0008860248969002365, 'weight_decay': 0.0037909150895975353, 'batch_size': 256}. Best is trial 0 with value: 0.4301307659086353.
88
+
89
+ ================================================================================
90
+ Trial 5 μ™„λ£Œ
91
+ Value (CSI): 0.389993
92
+ Parameters: {'d_main': 128, 'd_hidden': 384, 'n_blocks': 2, 'dropout_first': 0.2844695689401958, 'dropout_second': 0.1572561815094476, 'lr': 0.0008860248969002365, 'weight_decay': 0.0037909150895975353, 'batch_size': 256}
93
+ Best Value (CSI): 0.430131
94
+ Best Trial: 0
95
+ Best Parameters: {'d_main': 224, 'd_hidden': 256, 'n_blocks': 2, 'dropout_first': 0.10156371676747883, 'dropout_second': 0.022497091377821434, 'lr': 0.003727359142958118, 'weight_decay': 0.00038742389242139776, 'batch_size': 128}
96
+ ================================================================================
97
+
98
+ Fold 1 - ν΄λž˜μŠ€λ³„ κ°€μ€‘μΉ˜: {0: 0.9429593495934959, 1: 0.9205079365079365, 2: 1.1721238580321771} (ν΄λž˜μŠ€λ³„ μƒ˜ν”Œ 수: {0: 20500, 1: 21000, 2: 16492})
99
+ [I 2025-12-25 17:22:55,435] Trial 6 pruned.
100
+
101
+ ================================================================================
102
+ Trial 6 μ™„λ£Œ
103
+ Value (CSI): 0.241888
104
+ Parameters: {'d_main': 128, 'd_hidden': 64, 'n_blocks': 5, 'dropout_first': 0.1806643232253578, 'dropout_second': 0.020619058831649786, 'lr': 1.3595828033778886e-05, 'weight_decay': 0.00022285919677684105, 'batch_size': 32}
105
+ Best Value (CSI): 0.430131
106
+ Best Trial: 0
107
+ Best Parameters: {'d_main': 224, 'd_hidden': 256, 'n_blocks': 2, 'dropout_first': 0.10156371676747883, 'dropout_second': 0.022497091377821434, 'lr': 0.003727359142958118, 'weight_decay': 0.00038742389242139776, 'batch_size': 128}
108
+ ================================================================================
109
+
110
+ Fold 1 - ν΄λž˜μŠ€λ³„ κ°€μ€‘μΉ˜: {0: 0.9429593495934959, 1: 0.9205079365079365, 2: 1.1721238580321771} (ν΄λž˜μŠ€λ³„ μƒ˜ν”Œ 수: {0: 20500, 1: 21000, 2: 16492})
111
+ Fold 2 - ν΄λž˜μŠ€λ³„ κ°€μ€‘μΉ˜: {0: 0.9440162601626017, 1: 0.917172195892575, 2: 1.1759332401612284} (ν΄λž˜μŠ€λ³„ μƒ˜ν”Œ 수: {0: 20500, 1: 21100, 2: 16457})
112
+ Fold 3 - ν΄λž˜μŠ€λ³„ κ°€μ€‘μΉ˜: {0: 0.9437235772357724, 1: 0.9168878357030016, 2: 1.1768558509236167} (ν΄λž˜μŠ€λ³„ μƒ˜ν”Œ 수: {0: 20500, 1: 21100, 2: 16439})
113
+ [I 2025-12-25 17:24:23,815] Trial 7 finished with value: 0.40834348300054496 and parameters: {'d_main': 160, 'd_hidden': 320, 'n_blocks': 4, 'dropout_first': 0.1717426678254888, 'dropout_second': 0.0934178652146346, 'lr': 0.0007575854806569902, 'weight_decay': 0.01534216484408197, 'batch_size': 128}. Best is trial 0 with value: 0.4301307659086353.
114
+
115
+ ================================================================================
116
+ Trial 7 μ™„λ£Œ
117
+ Value (CSI): 0.408343
118
+ Parameters: {'d_main': 160, 'd_hidden': 320, 'n_blocks': 4, 'dropout_first': 0.1717426678254888, 'dropout_second': 0.0934178652146346, 'lr': 0.0007575854806569902, 'weight_decay': 0.01534216484408197, 'batch_size': 128}
119
+ Best Value (CSI): 0.430131
120
+ Best Trial: 0
121
+ Best Parameters: {'d_main': 224, 'd_hidden': 256, 'n_blocks': 2, 'dropout_first': 0.10156371676747883, 'dropout_second': 0.022497091377821434, 'lr': 0.003727359142958118, 'weight_decay': 0.00038742389242139776, 'batch_size': 128}
122
+ ================================================================================
123
+
124
+ Fold 1 - ν΄λž˜μŠ€λ³„ κ°€μ€‘μΉ˜: {0: 0.9429593495934959, 1: 0.9205079365079365, 2: 1.1721238580321771} (ν΄λž˜μŠ€λ³„ μƒ˜ν”Œ 수: {0: 20500, 1: 21000, 2: 16492})
125
+ [I 2025-12-25 17:24:32,169] Trial 8 pruned.
126
+
127
+ ================================================================================
128
+ Trial 8 μ™„λ£Œ
129
+ Value (CSI): 0.209073
130
+ Parameters: {'d_main': 224, 'd_hidden': 384, 'n_blocks': 3, 'dropout_first': 0.14151506978958192, 'dropout_second': 0.07948320800127218, 'lr': 1.004046861765609e-05, 'weight_decay': 0.007637138900375409, 'batch_size': 256}
131
+ Best Value (CSI): 0.430131
132
+ Best Trial: 0
133
+ Best Parameters: {'d_main': 224, 'd_hidden': 256, 'n_blocks': 2, 'dropout_first': 0.10156371676747883, 'dropout_second': 0.022497091377821434, 'lr': 0.003727359142958118, 'weight_decay': 0.00038742389242139776, 'batch_size': 128}
134
+ ================================================================================
135
+
136
+ Fold 1 - ν΄λž˜μŠ€λ³„ κ°€μ€‘μΉ˜: {0: 0.9429593495934959, 1: 0.9205079365079365, 2: 1.1721238580321771} (ν΄λž˜μŠ€λ³„ μƒ˜ν”Œ 수: {0: 20500, 1: 21000, 2: 16492})
137
+ [I 2025-12-25 17:25:12,850] Trial 9 pruned.
138
+
139
+ ================================================================================
140
+ Trial 9 μ™„λ£Œ
141
+ Value (CSI): 0.208511
142
+ Parameters: {'d_main': 128, 'd_hidden': 256, 'n_blocks': 4, 'dropout_first': 0.2317115408820841, 'dropout_second': 0.14700723302643884, 'lr': 1.2612737429485679e-05, 'weight_decay': 0.04037328340694686, 'batch_size': 32}
143
+ Best Value (CSI): 0.430131
144
+ Best Trial: 0
145
+ Best Parameters: {'d_main': 224, 'd_hidden': 256, 'n_blocks': 2, 'dropout_first': 0.10156371676747883, 'dropout_second': 0.022497091377821434, 'lr': 0.003727359142958118, 'weight_decay': 0.00038742389242139776, 'batch_size': 128}
146
+ ================================================================================
147
+
148
+ Fold 1 - ν΄λž˜μŠ€λ³„ κ°€μ€‘μΉ˜: {0: 0.9429593495934959, 1: 0.9205079365079365, 2: 1.1721238580321771} (ν΄λž˜μŠ€λ³„ μƒ˜ν”Œ 수: {0: 20500, 1: 21000, 2: 16492})
149
+ Fold 2 - ν΄λž˜μŠ€λ³„ κ°€μ€‘μΉ˜: {0: 0.9440162601626017, 1: 0.917172195892575, 2: 1.1759332401612284} (ν΄λž˜μŠ€λ³„ μƒ˜ν”Œ 수: {0: 20500, 1: 21100, 2: 16457})
150
+ Fold 3 - ν΄λž˜μŠ€λ³„ κ°€μ€‘μΉ˜: {0: 0.9437235772357724, 1: 0.9168878357030016, 2: 1.1768558509236167} (ν΄λž˜μŠ€λ³„ μƒ˜ν”Œ 수: {0: 20500, 1: 21100, 2: 16439})
151
+ [I 2025-12-25 17:26:41,245] Trial 10 finished with value: 0.45568873098715307 and parameters: {'d_main': 256, 'd_hidden': 512, 'n_blocks': 3, 'dropout_first': 0.1045169105881141, 'dropout_second': 0.05372596450552944, 'lr': 0.00943776593390798, 'weight_decay': 0.0010575459741554466, 'batch_size': 128}. Best is trial 10 with value: 0.45568873098715307.
152
+
153
+ ================================================================================
154
+ Trial 10 μ™„λ£Œ
155
+ Value (CSI): 0.455689
156
+ Parameters: {'d_main': 256, 'd_hidden': 512, 'n_blocks': 3, 'dropout_first': 0.1045169105881141, 'dropout_second': 0.05372596450552944, 'lr': 0.00943776593390798, 'weight_decay': 0.0010575459741554466, 'batch_size': 128}
157
+ Best Value (CSI): 0.455689
158
+ Best Trial: 10
159
+ Best Parameters: {'d_main': 256, 'd_hidden': 512, 'n_blocks': 3, 'dropout_first': 0.1045169105881141, 'dropout_second': 0.05372596450552944, 'lr': 0.00943776593390798, 'weight_decay': 0.0010575459741554466, 'batch_size': 128}
160
+ ================================================================================
161
+
162
+ Fold 1 - ν΄λž˜μŠ€λ³„ κ°€μ€‘μΉ˜: {0: 0.9429593495934959, 1: 0.9205079365079365, 2: 1.1721238580321771} (ν΄λž˜μŠ€λ³„ μƒ˜ν”Œ 수: {0: 20500, 1: 21000, 2: 16492})
163
+ Fold 2 - ν΄λž˜μŠ€λ³„ κ°€μ€‘μΉ˜: {0: 0.9440162601626017, 1: 0.917172195892575, 2: 1.1759332401612284} (ν΄λž˜μŠ€λ³„ μƒ˜ν”Œ 수: {0: 20500, 1: 21100, 2: 16457})
164
+ Fold 3 - ν΄λž˜μŠ€λ³„ κ°€μ€‘μΉ˜: {0: 0.9437235772357724, 1: 0.9168878357030016, 2: 1.1768558509236167} (ν΄λž˜μŠ€λ³„ μƒ˜ν”Œ 수: {0: 20500, 1: 21100, 2: 16439})
165
+ [I 2025-12-25 17:27:47,879] Trial 11 finished with value: 0.42197542511098435 and parameters: {'d_main': 256, 'd_hidden': 192, 'n_blocks': 3, 'dropout_first': 0.11027426804047841, 'dropout_second': 0.04995550792691689, 'lr': 0.009638623921852399, 'weight_decay': 0.0007413784723226764, 'batch_size': 128}. Best is trial 10 with value: 0.45568873098715307.
166
+
167
+ ================================================================================
168
+ Trial 11 μ™„λ£Œ
169
+ Value (CSI): 0.421975
170
+ Parameters: {'d_main': 256, 'd_hidden': 192, 'n_blocks': 3, 'dropout_first': 0.11027426804047841, 'dropout_second': 0.04995550792691689, 'lr': 0.009638623921852399, 'weight_decay': 0.0007413784723226764, 'batch_size': 128}
171
+ Best Value (CSI): 0.455689
172
+ Best Trial: 10
173
+ Best Parameters: {'d_main': 256, 'd_hidden': 512, 'n_blocks': 3, 'dropout_first': 0.1045169105881141, 'dropout_second': 0.05372596450552944, 'lr': 0.00943776593390798, 'weight_decay': 0.0010575459741554466, 'batch_size': 128}
174
+ ================================================================================
175
+
176
+ Fold 1 - ν΄λž˜μŠ€λ³„ κ°€μ€‘μΉ˜: {0: 0.9429593495934959, 1: 0.9205079365079365, 2: 1.1721238580321771} (ν΄λž˜μŠ€λ³„ μƒ˜ν”Œ 수: {0: 20500, 1: 21000, 2: 16492})
177
+ Fold 2 - ν΄λž˜μŠ€λ³„ κ°€μ€‘μΉ˜: {0: 0.9440162601626017, 1: 0.917172195892575, 2: 1.1759332401612284} (ν΄λž˜μŠ€λ³„ μƒ˜ν”Œ 수: {0: 20500, 1: 21100, 2: 16457})
178
+ Fold 3 - ν΄λž˜μŠ€λ³„ κ°€μ€‘μΉ˜: {0: 0.9437235772357724, 1: 0.9168878357030016, 2: 1.1768558509236167} (ν΄λž˜μŠ€λ³„ μƒ˜ν”Œ 수: {0: 20500, 1: 21100, 2: 16439})
179
+ [I 2025-12-25 17:29:29,987] Trial 12 finished with value: 0.44433747481934155 and parameters: {'d_main': 224, 'd_hidden': 512, 'n_blocks': 2, 'dropout_first': 0.10499188234645457, 'dropout_second': 0.05134622783384764, 'lr': 0.0031463084726703365, 'weight_decay': 0.0008736315412771812, 'batch_size': 64}. Best is trial 10 with value: 0.45568873098715307.
180
+
181
+ ================================================================================
182
+ Trial 12 μ™„λ£Œ
183
+ Value (CSI): 0.444337
184
+ Parameters: {'d_main': 224, 'd_hidden': 512, 'n_blocks': 2, 'dropout_first': 0.10499188234645457, 'dropout_second': 0.05134622783384764, 'lr': 0.0031463084726703365, 'weight_decay': 0.0008736315412771812, 'batch_size': 64}
185
+ Best Value (CSI): 0.455689
186
+ Best Trial: 10
187
+ Best Parameters: {'d_main': 256, 'd_hidden': 512, 'n_blocks': 3, 'dropout_first': 0.1045169105881141, 'dropout_second': 0.05372596450552944, 'lr': 0.00943776593390798, 'weight_decay': 0.0010575459741554466, 'batch_size': 128}
188
+ ================================================================================
189
+
190
+ Fold 1 - ν΄λž˜μŠ€λ³„ κ°€μ€‘μΉ˜: {0: 0.9429593495934959, 1: 0.9205079365079365, 2: 1.1721238580321771} (ν΄λž˜μŠ€λ³„ μƒ˜ν”Œ 수: {0: 20500, 1: 21000, 2: 16492})
191
+ Fold 2 - ν΄λž˜μŠ€λ³„ κ°€μ€‘μΉ˜: {0: 0.9440162601626017, 1: 0.917172195892575, 2: 1.1759332401612284} (ν΄λž˜μŠ€λ³„ μƒ˜ν”Œ 수: {0: 20500, 1: 21100, 2: 16457})
192
+ Fold 3 - ν΄λž˜μŠ€λ³„ κ°€μ€‘μΉ˜: {0: 0.9437235772357724, 1: 0.9168878357030016, 2: 1.1768558509236167} (ν΄λž˜μŠ€λ³„ μƒ˜ν”Œ 수: {0: 20500, 1: 21100, 2: 16439})
193
+ [I 2025-12-25 17:31:13,317] Trial 13 finished with value: 0.42346682809059244 and parameters: {'d_main': 224, 'd_hidden': 512, 'n_blocks': 3, 'dropout_first': 0.38531025057952983, 'dropout_second': 0.05770605194787165, 'lr': 0.0029524632666265583, 'weight_decay': 0.0010566025989832471, 'batch_size': 64}. Best is trial 10 with value: 0.45568873098715307.
194
+
195
+ ================================================================================
196
+ Trial 13 μ™„λ£Œ
197
+ Value (CSI): 0.423467
198
+ Parameters: {'d_main': 224, 'd_hidden': 512, 'n_blocks': 3, 'dropout_first': 0.38531025057952983, 'dropout_second': 0.05770605194787165, 'lr': 0.0029524632666265583, 'weight_decay': 0.0010566025989832471, 'batch_size': 64}
199
+ Best Value (CSI): 0.455689
200
+ Best Trial: 10
201
+ Best Parameters: {'d_main': 256, 'd_hidden': 512, 'n_blocks': 3, 'dropout_first': 0.1045169105881141, 'dropout_second': 0.05372596450552944, 'lr': 0.00943776593390798, 'weight_decay': 0.0010575459741554466, 'batch_size': 128}
202
+ ================================================================================
203
+
204
+ Fold 1 - ν΄λž˜μŠ€λ³„ κ°€μ€‘μΉ˜: {0: 0.9429593495934959, 1: 0.9205079365079365, 2: 1.1721238580321771} (ν΄λž˜μŠ€λ³„ μƒ˜ν”Œ 수: {0: 20500, 1: 21000, 2: 16492})
205
+ Fold 2 - ν΄λž˜μŠ€λ³„ κ°€μ€‘μΉ˜: {0: 0.9440162601626017, 1: 0.917172195892575, 2: 1.1759332401612284} (ν΄λž˜μŠ€λ³„ μƒ˜ν”Œ 수: {0: 20500, 1: 21100, 2: 16457})
206
+ Fold 3 - ν΄λž˜μŠ€λ³„ κ°€μ€‘μΉ˜: {0: 0.9437235772357724, 1: 0.9168878357030016, 2: 1.1768558509236167} (ν΄λž˜μŠ€λ³„ μƒ˜ν”Œ 수: {0: 20500, 1: 21100, 2: 16439})
207
+ [I 2025-12-25 17:32:54,124] Trial 14 finished with value: 0.43889187275926794 and parameters: {'d_main': 256, 'd_hidden': 512, 'n_blocks': 2, 'dropout_first': 0.13826103711935042, 'dropout_second': 0.12295366379536615, 'lr': 0.002792553877523577, 'weight_decay': 0.0012655524586531054, 'batch_size': 64}. Best is trial 10 with value: 0.45568873098715307.
208
+
209
+ ================================================================================
210
+ Trial 14 μ™„λ£Œ
211
+ Value (CSI): 0.438892
212
+ Parameters: {'d_main': 256, 'd_hidden': 512, 'n_blocks': 2, 'dropout_first': 0.13826103711935042, 'dropout_second': 0.12295366379536615, 'lr': 0.002792553877523577, 'weight_decay': 0.0012655524586531054, 'batch_size': 64}
213
+ Best Value (CSI): 0.455689
214
+ Best Trial: 10
215
+ Best Parameters: {'d_main': 256, 'd_hidden': 512, 'n_blocks': 3, 'dropout_first': 0.1045169105881141, 'dropout_second': 0.05372596450552944, 'lr': 0.00943776593390798, 'weight_decay': 0.0010575459741554466, 'batch_size': 128}
216
+ ================================================================================
217
+
218
+ Fold 1 - ν΄λž˜μŠ€λ³„ κ°€μ€‘μΉ˜: {0: 0.9429593495934959, 1: 0.9205079365079365, 2: 1.1721238580321771} (ν΄λž˜μŠ€λ³„ μƒ˜ν”Œ 수: {0: 20500, 1: 21000, 2: 16492})
219
+ [I 2025-12-25 17:33:10,457] Trial 15 pruned.
220
+
221
+ ================================================================================
222
+ Trial 15 μ™„λ£Œ
223
+ Value (CSI): 0.382440
224
+ Parameters: {'d_main': 192, 'd_hidden': 512, 'n_blocks': 3, 'dropout_first': 0.11005239342556938, 'dropout_second': 0.05282313351206165, 'lr': 0.0014744290756182105, 'weight_decay': 0.0014332063342351049, 'batch_size': 64}
225
+ Best Value (CSI): 0.455689
226
+ Best Trial: 10
227
+ Best Parameters: {'d_main': 256, 'd_hidden': 512, 'n_blocks': 3, 'dropout_first': 0.1045169105881141, 'dropout_second': 0.05372596450552944, 'lr': 0.00943776593390798, 'weight_decay': 0.0010575459741554466, 'batch_size': 128}
228
+ ================================================================================
229
+
230
+ Fold 1 - ν΄λž˜μŠ€λ³„ κ°€μ€‘μΉ˜: {0: 0.9429593495934959, 1: 0.9205079365079365, 2: 1.1721238580321771} (ν΄λž˜μŠ€λ³„ μƒ˜ν”Œ 수: {0: 20500, 1: 21000, 2: 16492})
231
+ [I 2025-12-25 17:33:19,805] Trial 16 pruned.
232
+
233
+ ================================================================================
234
+ Trial 16 μ™„λ£Œ
235
+ Value (CSI): 0.353548
236
+ Parameters: {'d_main': 192, 'd_hidden': 448, 'n_blocks': 2, 'dropout_first': 0.1888472184430823, 'dropout_second': 0.18995712629771064, 'lr': 0.00021100251322998348, 'weight_decay': 0.00011516083231935141, 'batch_size': 128}
237
+ Best Value (CSI): 0.455689
238
+ Best Trial: 10
239
+ Best Parameters: {'d_main': 256, 'd_hidden': 512, 'n_blocks': 3, 'dropout_first': 0.1045169105881141, 'dropout_second': 0.05372596450552944, 'lr': 0.00943776593390798, 'weight_decay': 0.0010575459741554466, 'batch_size': 128}
240
+ ================================================================================
241
+
242
+ Fold 1 - ν΄λž˜μŠ€λ³„ κ°€μ€‘μΉ˜: {0: 0.9429593495934959, 1: 0.9205079365079365, 2: 1.1721238580321771} (ν΄λž˜μŠ€λ³„ μƒ˜ν”Œ 수: {0: 20500, 1: 21000, 2: 16492})
243
+ [I 2025-12-25 17:33:36,528] Trial 17 pruned.
244
+
245
+ ================================================================================
246
+ Trial 17 μ™„λ£Œ
247
+ Value (CSI): 0.369598
248
+ Parameters: {'d_main': 224, 'd_hidden': 448, 'n_blocks': 3, 'dropout_first': 0.148777791202791, 'dropout_second': 0.06125371639544101, 'lr': 0.004966293343601136, 'weight_decay': 0.002047276938146628, 'batch_size': 64}
249
+ Best Value (CSI): 0.455689
250
+ Best Trial: 10
251
+ Best Parameters: {'d_main': 256, 'd_hidden': 512, 'n_blocks': 3, 'dropout_first': 0.1045169105881141, 'dropout_second': 0.05372596450552944, 'lr': 0.00943776593390798, 'weight_decay': 0.0010575459741554466, 'batch_size': 128}
252
+ ================================================================================
253
+
254
+ Fold 1 - ν΄λž˜μŠ€λ³„ κ°€μ€‘μΉ˜: {0: 0.9429593495934959, 1: 0.9205079365079365, 2: 1.1721238580321771} (ν΄λž˜μŠ€λ³„ μƒ˜ν”Œ 수: {0: 20500, 1: 21000, 2: 16492})
255
+ Fold 2 - ν΄λž˜μŠ€λ³„ κ°€μ€‘μΉ˜: {0: 0.9440162601626017, 1: 0.917172195892575, 2: 1.1759332401612284} (ν΄λž˜μŠ€λ³„ μƒ˜ν”Œ 수: {0: 20500, 1: 21100, 2: 16457})
256
+ Fold 3 - ν΄λž˜μŠ€λ³„ κ°€μ€‘μΉ˜: {0: 0.9437235772357724, 1: 0.9168878357030016, 2: 1.1768558509236167} (ν΄λž˜μŠ€λ³„ μƒ˜ν”Œ 수: {0: 20500, 1: 21100, 2: 16439})
257
+ [I 2025-12-25 17:35:19,256] Trial 18 finished with value: 0.43554917854930214 and parameters: {'d_main': 64, 'd_hidden': 448, 'n_blocks': 2, 'dropout_first': 0.10183795686380824, 'dropout_second': 0.11092680117376562, 'lr': 0.009728600379224997, 'weight_decay': 0.0006966213827491482, 'batch_size': 64}. Best is trial 10 with value: 0.45568873098715307.
258
+
259
+ ================================================================================
260
+ Trial 18 μ™„λ£Œ
261
+ Value (CSI): 0.435549
262
+ Parameters: {'d_main': 64, 'd_hidden': 448, 'n_blocks': 2, 'dropout_first': 0.10183795686380824, 'dropout_second': 0.11092680117376562, 'lr': 0.009728600379224997, 'weight_decay': 0.0006966213827491482, 'batch_size': 64}
263
+ Best Value (CSI): 0.455689
264
+ Best Trial: 10
265
+ Best Parameters: {'d_main': 256, 'd_hidden': 512, 'n_blocks': 3, 'dropout_first': 0.1045169105881141, 'dropout_second': 0.05372596450552944, 'lr': 0.00943776593390798, 'weight_decay': 0.0010575459741554466, 'batch_size': 128}
266
+ ================================================================================
267
+
268
+ Fold 1 - ν΄λž˜μŠ€λ³„ κ°€μ€‘μΉ˜: {0: 0.9429593495934959, 1: 0.9205079365079365, 2: 1.1721238580321771} (ν΄λž˜μŠ€λ³„ μƒ˜ν”Œ 수: {0: 20500, 1: 21000, 2: 16492})
269
+ [I 2025-12-25 17:35:29,604] Trial 19 pruned.
270
+
271
+ ================================================================================
272
+ Trial 19 μ™„λ£Œ
273
+ Value (CSI): 0.374277
274
+ Parameters: {'d_main': 256, 'd_hidden': 128, 'n_blocks': 3, 'dropout_first': 0.20458013340839706, 'dropout_second': 0.03936539506356172, 'lr': 0.002044084446572949, 'weight_decay': 0.0005937198842313242, 'batch_size': 128}
275
+ Best Value (CSI): 0.455689
276
+ Best Trial: 10
277
+ Best Parameters: {'d_main': 256, 'd_hidden': 512, 'n_blocks': 3, 'dropout_first': 0.1045169105881141, 'dropout_second': 0.05372596450552944, 'lr': 0.00943776593390798, 'weight_decay': 0.0010575459741554466, 'batch_size': 128}
278
+ ================================================================================
279
+
280
+ Fold 1 - ν΄λž˜μŠ€λ³„ κ°€μ€‘μΉ˜: {0: 0.9429593495934959, 1: 0.9205079365079365, 2: 1.1721238580321771} (ν΄λž˜μŠ€λ³„ μƒ˜ν”Œ 수: {0: 20500, 1: 21000, 2: 16492})
281
+ [I 2025-12-25 17:35:38,703] Trial 20 pruned.
282
+
283
+ ================================================================================
284
+ Trial 20 μ™„λ£Œ
285
+ Value (CSI): 0.279321
286
+ Parameters: {'d_main': 224, 'd_hidden': 512, 'n_blocks': 2, 'dropout_first': 0.13947875703479234, 'dropout_second': 0.07418201307448172, 'lr': 0.005384115015845719, 'weight_decay': 0.0025611933143003217, 'batch_size': 128}
287
+ Best Value (CSI): 0.455689
288
+ Best Trial: 10
289
+ Best Parameters: {'d_main': 256, 'd_hidden': 512, 'n_blocks': 3, 'dropout_first': 0.1045169105881141, 'dropout_second': 0.05372596450552944, 'lr': 0.00943776593390798, 'weight_decay': 0.0010575459741554466, 'batch_size': 128}
290
+ ================================================================================
291
+
292
+ Fold 1 - ν΄λž˜μŠ€λ³„ κ°€μ€‘μΉ˜: {0: 0.9429593495934959, 1: 0.9205079365079365, 2: 1.1721238580321771} (ν΄λž˜μŠ€λ³„ μƒ˜ν”Œ 수: {0: 20500, 1: 21000, 2: 16492})
293
+ [I 2025-12-25 17:35:53,337] Trial 21 pruned.
294
+
295
+ ================================================================================
296
+ Trial 21 μ™„λ£Œ
297
+ Value (CSI): 0.362717
298
+ Parameters: {'d_main': 256, 'd_hidden': 512, 'n_blocks': 2, 'dropout_first': 0.13626921113677734, 'dropout_second': 0.10030012446906778, 'lr': 0.002037737924762407, 'weight_decay': 0.001347899826038143, 'batch_size': 64}
299
+ Best Value (CSI): 0.455689
300
+ Best Trial: 10
301
+ Best Parameters: {'d_main': 256, 'd_hidden': 512, 'n_blocks': 3, 'dropout_first': 0.1045169105881141, 'dropout_second': 0.05372596450552944, 'lr': 0.00943776593390798, 'weight_decay': 0.0010575459741554466, 'batch_size': 128}
302
+ ================================================================================
303
+
304
+ Fold 1 - ν΄λž˜μŠ€λ³„ κ°€μ€‘μΉ˜: {0: 0.9429593495934959, 1: 0.9205079365079365, 2: 1.1721238580321771} (ν΄λž˜μŠ€λ³„ μƒ˜ν”Œ 수: {0: 20500, 1: 21000, 2: 16492})
305
+ [I 2025-12-25 17:36:08,009] Trial 22 pruned.
306
+
307
+ ================================================================================
308
+ Trial 22 μ™„λ£Œ
309
+ Value (CSI): 0.343606
310
+ Parameters: {'d_main': 256, 'd_hidden': 448, 'n_blocks': 2, 'dropout_first': 0.1293383333282218, 'dropout_second': 0.11731304342724859, 'lr': 0.004738846815538181, 'weight_decay': 0.0013996905007623878, 'batch_size': 64}
311
+ Best Value (CSI): 0.455689
312
+ Best Trial: 10
313
+ Best Parameters: {'d_main': 256, 'd_hidden': 512, 'n_blocks': 3, 'dropout_first': 0.1045169105881141, 'dropout_second': 0.05372596450552944, 'lr': 0.00943776593390798, 'weight_decay': 0.0010575459741554466, 'batch_size': 128}
314
+ ================================================================================
315
+
316
+ Fold 1 - ν΄λž˜μŠ€λ³„ κ°€μ€‘μΉ˜: {0: 0.9429593495934959, 1: 0.9205079365079365, 2: 1.1721238580321771} (ν΄λž˜μŠ€λ³„ μƒ˜ν”Œ 수: {0: 20500, 1: 21000, 2: 16492})
317
+ Fold 2 - ν΄λž˜μŠ€λ³„ κ°€μ€‘μΉ˜: {0: 0.9440162601626017, 1: 0.917172195892575, 2: 1.1759332401612284} (ν΄λž˜μŠ€λ³„ μƒ˜ν”Œ 수: {0: 20500, 1: 21100, 2: 16457})
318
+ Fold 3 - ν΄λž˜μŠ€λ³„ κ°€μ€‘μΉ˜: {0: 0.9437235772357724, 1: 0.9168878357030016, 2: 1.1768558509236167} (ν΄λž˜μŠ€λ³„ μƒ˜ν”Œ 수: {0: 20500, 1: 21100, 2: 16439})
319
+ [I 2025-12-25 17:38:04,053] Trial 23 finished with value: 0.4283188153909843 and parameters: {'d_main': 256, 'd_hidden': 512, 'n_blocks': 3, 'dropout_first': 0.15657019116372836, 'dropout_second': 0.03579914728063769, 'lr': 0.0024610566433362767, 'weight_decay': 0.0005158217464369175, 'batch_size': 64}. Best is trial 10 with value: 0.45568873098715307.
320
+
321
+ ================================================================================
322
+ Trial 23 μ™„λ£Œ
323
+ Value (CSI): 0.428319
324
+ Parameters: {'d_main': 256, 'd_hidden': 512, 'n_blocks': 3, 'dropout_first': 0.15657019116372836, 'dropout_second': 0.03579914728063769, 'lr': 0.0024610566433362767, 'weight_decay': 0.0005158217464369175, 'batch_size': 64}
325
+ Best Value (CSI): 0.455689
326
+ Best Trial: 10
327
+ Best Parameters: {'d_main': 256, 'd_hidden': 512, 'n_blocks': 3, 'dropout_first': 0.1045169105881141, 'dropout_second': 0.05372596450552944, 'lr': 0.00943776593390798, 'weight_decay': 0.0010575459741554466, 'batch_size': 128}
328
+ ================================================================================
329
+
330
+ Fold 1 - ν΄λž˜μŠ€λ³„ κ°€μ€‘μΉ˜: {0: 0.9429593495934959, 1: 0.9205079365079365, 2: 1.1721238580321771} (ν΄λž˜μŠ€λ³„ μƒ˜ν”Œ 수: {0: 20500, 1: 21000, 2: 16492})
331
+ [I 2025-12-25 17:38:20,002] Trial 24 pruned.
332
+
333
+ ================================================================================
334
+ Trial 24 μ™„λ£Œ
335
+ Value (CSI): 0.366959
336
+ Parameters: {'d_main': 192, 'd_hidden': 448, 'n_blocks': 2, 'dropout_first': 0.12366099999313362, 'dropout_second': 0.066707588276286, 'lr': 0.0054929082954202554, 'weight_decay': 0.0010668358869719242, 'batch_size': 64}
337
+ Best Value (CSI): 0.455689
338
+ Best Trial: 10
339
+ Best Parameters: {'d_main': 256, 'd_hidden': 512, 'n_blocks': 3, 'dropout_first': 0.1045169105881141, 'dropout_second': 0.05372596450552944, 'lr': 0.00943776593390798, 'weight_decay': 0.0010575459741554466, 'batch_size': 128}
340
+ ================================================================================
341
+
342
+ Fold 1 - ν΄λž˜μŠ€λ³„ κ°€μ€‘μΉ˜: {0: 0.9429593495934959, 1: 0.9205079365079365, 2: 1.1721238580321771} (ν΄λž˜μŠ€λ³„ μƒ˜ν”Œ 수: {0: 20500, 1: 21000, 2: 16492})
343
+ Fold 2 - ν΄λž˜μŠ€λ³„ κ°€μ€‘μΉ˜: {0: 0.9440162601626017, 1: 0.917172195892575, 2: 1.1759332401612284} (ν΄λž˜μŠ€λ³„ μƒ˜ν”Œ 수: {0: 20500, 1: 21100, 2: 16457})