ifieryarrows commited on
Commit
cea6239
·
verified ·
1 Parent(s): 2c56f6b

Sync from GitHub (tests passed)

Browse files
Files changed (3) hide show
  1. app/ai_engine.py +6 -1
  2. app/features.py +2 -2
  3. app/inference.py +5 -4
app/ai_engine.py CHANGED
@@ -687,7 +687,7 @@ def train_xgboost_model(
687
  latest_path = model_dir / f"xgb_{target_symbol.replace('=', '_')}_latest.json"
688
  model.save_model(str(latest_path))
689
 
690
- # Save metrics
691
  metrics = {
692
  "target_symbol": target_symbol,
693
  "trained_at": datetime.now(timezone.utc).isoformat(),
@@ -699,6 +699,11 @@ def train_xgboost_model(
699
  "val_rmse": val_rmse,
700
  "best_iteration": model.best_iteration,
701
  "feature_count": len(feature_names),
 
 
 
 
 
702
  }
703
 
704
  metrics_path = model_dir / f"xgb_{target_symbol.replace('=', '_')}_latest.metrics.json"
 
687
  latest_path = model_dir / f"xgb_{target_symbol.replace('=', '_')}_latest.json"
688
  model.save_model(str(latest_path))
689
 
690
+ # Save metrics (including training symbols audit)
691
  metrics = {
692
  "target_symbol": target_symbol,
693
  "trained_at": datetime.now(timezone.utc).isoformat(),
 
699
  "val_rmse": val_rmse,
700
  "best_iteration": model.best_iteration,
701
  "feature_count": len(feature_names),
702
+ # Audit: which symbols were used for training
703
+ "symbol_set_name": settings.symbol_set,
704
+ "training_symbols": settings.training_symbols,
705
+ "training_symbols_hash": settings.training_symbols_hash,
706
+ "training_symbols_source": settings.training_symbols_source,
707
  }
708
 
709
  metrics_path = model_dir / f"xgb_{target_symbol.replace('=', '_')}_latest.metrics.json"
app/features.py CHANGED
@@ -236,8 +236,8 @@ def align_to_target_calendar(
236
  # Reindex to target calendar
237
  reindexed = df.reindex(target_index)
238
 
239
- # Limited forward-fill
240
- reindexed = reindexed.ffill(limit=max_ffill)
241
 
242
  aligned[symbol] = reindexed
243
 
 
236
  # Reindex to target calendar
237
  reindexed = df.reindex(target_index)
238
 
239
+ # Limited forward-fill (infer_objects fixes future downcasting warning)
240
+ reindexed = reindexed.ffill(limit=max_ffill).infer_objects(copy=False)
241
 
242
  aligned[symbol] = reindexed
243
 
app/inference.py CHANGED
@@ -250,10 +250,11 @@ def build_features_for_prediction(
250
  # Get latest row
251
  latest = all_features.iloc[[-1]].copy()
252
 
253
- # Ensure we have all required features
254
- for feat in feature_names:
255
- if feat not in latest.columns:
256
- latest[feat] = 0.0
 
257
 
258
  # Select only the features the model expects
259
  latest = latest[feature_names]
 
250
  # Get latest row
251
  latest = all_features.iloc[[-1]].copy()
252
 
253
+ # Ensure we have all required features (avoid fragmented DataFrame)
254
+ missing_feats = {feat: 0.0 for feat in feature_names if feat not in latest.columns}
255
+ if missing_feats:
256
+ missing_df = pd.DataFrame(missing_feats, index=latest.index)
257
+ latest = pd.concat([latest, missing_df], axis=1)
258
 
259
  # Select only the features the model expects
260
  latest = latest[feature_names]