aromidvar commited on
Commit
e802234
·
verified ·
1 Parent(s): af32ced

Update core/train_eval.py

Browse files
Files changed (1) hide show
  1. core/train_eval.py +50 -23
core/train_eval.py CHANGED
@@ -1,5 +1,4 @@
1
  # core/train_eval.py
2
-
3
  import numpy as np
4
  import pandas as pd
5
  import torch
@@ -164,8 +163,9 @@ def train_and_evaluate(
164
  selected_features = select_features(
165
  df, features, target, selector_method, importance_threshold
166
  )
 
167
 
168
- # --- FIX: Properly unpack preprocess_data return (avoid tuple issue) ---
169
  (
170
  X,
171
  y,
@@ -177,14 +177,24 @@ def train_and_evaluate(
177
  updated_feature_cols,
178
  ) = preprocess_data(df, selected_features, target, window, horizon)
179
 
 
 
 
 
 
 
 
 
 
180
  if X.shape[0] < 10:
181
  return {"error": f"Insufficient data samples: {X.shape[0]}"}
182
 
183
- # Train/test split
184
  train_size = int((1 - test_split) * len(X))
185
  X_train, X_test = X[:train_size], X[train_size:]
186
  y_train, y_test = y[:train_size], y[train_size:]
187
 
 
188
  train_dataset = TensorDataset(
189
  torch.tensor(X_train, dtype=torch.float32),
190
  torch.tensor(y_train, dtype=torch.float32),
@@ -209,6 +219,7 @@ def train_and_evaluate(
209
  try:
210
  output = StringIO()
211
  sys.stdout = output
 
212
  summary(model, input_size=(window, input_size))
213
  sys.stdout = sys.__stdout__
214
  logging.debug(output.getvalue())
@@ -233,21 +244,25 @@ def train_and_evaluate(
233
  model.train()
234
  running_loss = 0.0
235
  for batch_X, batch_y in train_loader:
236
- batch_X, batch_y = batch_X.to(device), batch_y.to(device)
 
237
  optimizer.zero_grad()
238
  outputs = model(batch_X)
239
  loss = criterion(outputs, batch_y)
240
  loss.backward()
241
  optimizer.step()
242
  running_loss += loss.item() * batch_X.size(0)
 
243
  epoch_train_loss = running_loss / len(train_loader.dataset)
244
  train_losses.append(epoch_train_loss)
245
 
 
246
  model.eval()
247
  running_val = 0.0
248
  with torch.no_grad():
249
  for batch_X, batch_y in test_loader:
250
- batch_X, batch_y = batch_X.to(device), batch_y.to(device)
 
251
  outputs = model(batch_X)
252
  v_loss = criterion(outputs, batch_y)
253
  running_val += v_loss.item() * batch_X.size(0)
@@ -257,40 +272,47 @@ def train_and_evaluate(
257
  if scheduler:
258
  scheduler.step(epoch_val_loss)
259
 
 
 
260
  # ---------------- Evaluation ----------------
261
  model.eval()
262
  with torch.no_grad():
263
- y_pred_scaled = model(torch.tensor(X_test, dtype=torch.float32).to(device)).cpu().numpy()
 
 
264
  y_test_unscaled = target_scaler.inverse_transform(y_test.reshape(-1, horizon)).flatten()
265
  y_pred_unscaled = target_scaler.inverse_transform(y_pred_scaled.reshape(-1, horizon)).flatten()
266
 
267
  precision, recall = compute_precision_recall(y_test_unscaled, y_pred_unscaled)
268
 
269
  metrics = {
270
- "R2": r2_score(y_test_unscaled, y_pred_unscaled),
271
- "MAPE": mean_absolute_percentage_error(y_test_unscaled, y_pred_unscaled),
272
- "RMSE": np.sqrt(mean_squared_error(y_test_unscaled, y_pred_unscaled)),
273
- "MAE": mean_absolute_error(y_test_unscaled, y_pred_unscaled),
274
- "DirAcc": directional_accuracy(y_test_unscaled, y_pred_unscaled),
275
- "MASE": mase(
276
- y_test_unscaled,
277
- y_pred_unscaled,
278
- target_scaler.inverse_transform(y_train.reshape(-1, horizon)).flatten(),
 
 
279
  ),
280
- "Volatility": compute_volatility(y_pred_unscaled),
281
- "Sharpe": compute_sharpe_ratio(y_pred_unscaled),
282
- "Precision": precision,
283
- "Recall": recall,
284
  }
285
 
 
286
  latest_data = torch.tensor(X[-1:], dtype=torch.float32).to(device)
287
  with torch.no_grad():
288
- latest_prediction = model(latest_data).cpu().numpy()
289
  latest_prediction = target_scaler.inverse_transform(
290
- latest_prediction.reshape(-1, horizon)
291
  ).flatten()
292
 
293
- return {
294
  "model": model,
295
  "train_loss": train_losses,
296
  "val_loss": val_losses,
@@ -305,8 +327,13 @@ def train_and_evaluate(
305
  "dropout": dropout,
306
  "window": window,
307
  },
 
 
308
  }
309
 
 
 
 
310
  except Exception as e:
311
  logging.error(f"Error in train_and_evaluate: {str(e)}")
312
- return {"error": str(e)}
 
1
  # core/train_eval.py
 
2
  import numpy as np
3
  import pandas as pd
4
  import torch
 
163
  selected_features = select_features(
164
  df, features, target, selector_method, importance_threshold
165
  )
166
+ logging.info(f"Selected features: {selected_features}")
167
 
168
+ # --- MUST unpack preprocess_data properly (avoid tuple misuse) ---
169
  (
170
  X,
171
  y,
 
177
  updated_feature_cols,
178
  ) = preprocess_data(df, selected_features, target, window, horizon)
179
 
180
+ X = np.asarray(X)
181
+ y = np.asarray(y)
182
+
183
+ if X.ndim != 3:
184
+ raise ValueError(f"Preprocessed X must be 3D (samples, window, features). Got shape: {X.shape}")
185
+ if y.ndim == 1:
186
+ # ensure y has shape (samples, horizon)
187
+ y = y.reshape(-1, horizon)
188
+
189
  if X.shape[0] < 10:
190
  return {"error": f"Insufficient data samples: {X.shape[0]}"}
191
 
192
+ # Train/test split (simple slice to preserve time order)
193
  train_size = int((1 - test_split) * len(X))
194
  X_train, X_test = X[:train_size], X[train_size:]
195
  y_train, y_test = y[:train_size], y[train_size:]
196
 
197
+ # Build datasets (do NOT move to device here; move in training loop)
198
  train_dataset = TensorDataset(
199
  torch.tensor(X_train, dtype=torch.float32),
200
  torch.tensor(y_train, dtype=torch.float32),
 
219
  try:
220
  output = StringIO()
221
  sys.stdout = output
222
+ # summary expects (channels, seq_len) for some models, here we show (seq_len, features)
223
  summary(model, input_size=(window, input_size))
224
  sys.stdout = sys.__stdout__
225
  logging.debug(output.getvalue())
 
244
  model.train()
245
  running_loss = 0.0
246
  for batch_X, batch_y in train_loader:
247
+ batch_X = batch_X.to(device)
248
+ batch_y = batch_y.to(device)
249
  optimizer.zero_grad()
250
  outputs = model(batch_X)
251
  loss = criterion(outputs, batch_y)
252
  loss.backward()
253
  optimizer.step()
254
  running_loss += loss.item() * batch_X.size(0)
255
+
256
  epoch_train_loss = running_loss / len(train_loader.dataset)
257
  train_losses.append(epoch_train_loss)
258
 
259
+ # validation
260
  model.eval()
261
  running_val = 0.0
262
  with torch.no_grad():
263
  for batch_X, batch_y in test_loader:
264
+ batch_X = batch_X.to(device)
265
+ batch_y = batch_y.to(device)
266
  outputs = model(batch_X)
267
  v_loss = criterion(outputs, batch_y)
268
  running_val += v_loss.item() * batch_X.size(0)
 
272
  if scheduler:
273
  scheduler.step(epoch_val_loss)
274
 
275
+ logging.debug(f"Epoch {epoch+1}/{epochs} train={epoch_train_loss:.6f} val={epoch_val_loss:.6f}")
276
+
277
  # ---------------- Evaluation ----------------
278
  model.eval()
279
  with torch.no_grad():
280
+ X_test_tensor = torch.tensor(X_test, dtype=torch.float32).to(device)
281
+ y_pred_scaled = model(X_test_tensor).cpu().numpy()
282
+
283
  y_test_unscaled = target_scaler.inverse_transform(y_test.reshape(-1, horizon)).flatten()
284
  y_pred_unscaled = target_scaler.inverse_transform(y_pred_scaled.reshape(-1, horizon)).flatten()
285
 
286
  precision, recall = compute_precision_recall(y_test_unscaled, y_pred_unscaled)
287
 
288
  metrics = {
289
+ "R2": float(r2_score(y_test_unscaled, y_pred_unscaled)),
290
+ "MAPE": float(mean_absolute_percentage_error(y_test_unscaled, y_pred_unscaled)),
291
+ "RMSE": float(np.sqrt(mean_squared_error(y_test_unscaled, y_pred_unscaled))),
292
+ "MAE": float(mean_absolute_error(y_test_unscaled, y_pred_unscaled)),
293
+ "DirAcc": float(directional_accuracy(y_test_unscaled, y_pred_unscaled)),
294
+ "MASE": float(
295
+ mase(
296
+ y_test_unscaled,
297
+ y_pred_unscaled,
298
+ target_scaler.inverse_transform(y_train.reshape(-1, horizon)).flatten(),
299
+ )
300
  ),
301
+ "Volatility": float(compute_volatility(y_pred_unscaled)),
302
+ "Sharpe": float(compute_sharpe_ratio(y_pred_unscaled)),
303
+ "Precision": float(np.nan if np.isnan(precision) else precision),
304
+ "Recall": float(np.nan if np.isnan(recall) else recall),
305
  }
306
 
307
+ # Latest prediction (use last window from original X)
308
  latest_data = torch.tensor(X[-1:], dtype=torch.float32).to(device)
309
  with torch.no_grad():
310
+ latest_prediction_scaled = model(latest_data).cpu().numpy()
311
  latest_prediction = target_scaler.inverse_transform(
312
+ latest_prediction_scaled.reshape(-1, horizon)
313
  ).flatten()
314
 
315
+ result = {
316
  "model": model,
317
  "train_loss": train_losses,
318
  "val_loss": val_losses,
 
327
  "dropout": dropout,
328
  "window": window,
329
  },
330
+ "scalers": {"feature_scaler": feature_scaler, "target_scaler": target_scaler},
331
+ "features": updated_feature_cols,
332
  }
333
 
334
+ logging.info("Training and evaluation completed successfully")
335
+ return result
336
+
337
  except Exception as e:
338
  logging.error(f"Error in train_and_evaluate: {str(e)}")
339
+ return {"error": str(e)}