Spaces:
Running
Running
Sync from GitHub (tests passed)
Browse files- app/ai_engine.py +6 -1
- app/features.py +2 -2
- app/inference.py +5 -4
app/ai_engine.py
CHANGED
|
@@ -687,7 +687,7 @@ def train_xgboost_model(
|
|
| 687 |
latest_path = model_dir / f"xgb_{target_symbol.replace('=', '_')}_latest.json"
|
| 688 |
model.save_model(str(latest_path))
|
| 689 |
|
| 690 |
-
# Save metrics
|
| 691 |
metrics = {
|
| 692 |
"target_symbol": target_symbol,
|
| 693 |
"trained_at": datetime.now(timezone.utc).isoformat(),
|
|
@@ -699,6 +699,11 @@ def train_xgboost_model(
|
|
| 699 |
"val_rmse": val_rmse,
|
| 700 |
"best_iteration": model.best_iteration,
|
| 701 |
"feature_count": len(feature_names),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 702 |
}
|
| 703 |
|
| 704 |
metrics_path = model_dir / f"xgb_{target_symbol.replace('=', '_')}_latest.metrics.json"
|
|
|
|
| 687 |
latest_path = model_dir / f"xgb_{target_symbol.replace('=', '_')}_latest.json"
|
| 688 |
model.save_model(str(latest_path))
|
| 689 |
|
| 690 |
+
# Save metrics (including training symbols audit)
|
| 691 |
metrics = {
|
| 692 |
"target_symbol": target_symbol,
|
| 693 |
"trained_at": datetime.now(timezone.utc).isoformat(),
|
|
|
|
| 699 |
"val_rmse": val_rmse,
|
| 700 |
"best_iteration": model.best_iteration,
|
| 701 |
"feature_count": len(feature_names),
|
| 702 |
+
# Audit: which symbols were used for training
|
| 703 |
+
"symbol_set_name": settings.symbol_set,
|
| 704 |
+
"training_symbols": settings.training_symbols,
|
| 705 |
+
"training_symbols_hash": settings.training_symbols_hash,
|
| 706 |
+
"training_symbols_source": settings.training_symbols_source,
|
| 707 |
}
|
| 708 |
|
| 709 |
metrics_path = model_dir / f"xgb_{target_symbol.replace('=', '_')}_latest.metrics.json"
|
app/features.py
CHANGED
|
@@ -236,8 +236,8 @@ def align_to_target_calendar(
|
|
| 236 |
# Reindex to target calendar
|
| 237 |
reindexed = df.reindex(target_index)
|
| 238 |
|
| 239 |
-
# Limited forward-fill
|
| 240 |
-
reindexed = reindexed.ffill(limit=max_ffill)
|
| 241 |
|
| 242 |
aligned[symbol] = reindexed
|
| 243 |
|
|
|
|
| 236 |
# Reindex to target calendar
|
| 237 |
reindexed = df.reindex(target_index)
|
| 238 |
|
| 239 |
+
# Limited forward-fill (infer_objects fixes future downcasting warning)
|
| 240 |
+
reindexed = reindexed.ffill(limit=max_ffill).infer_objects(copy=False)
|
| 241 |
|
| 242 |
aligned[symbol] = reindexed
|
| 243 |
|
app/inference.py
CHANGED
|
@@ -250,10 +250,11 @@ def build_features_for_prediction(
|
|
| 250 |
# Get latest row
|
| 251 |
latest = all_features.iloc[[-1]].copy()
|
| 252 |
|
| 253 |
-
# Ensure we have all required features
|
| 254 |
-
for feat in feature_names
|
| 255 |
-
|
| 256 |
-
|
|
|
|
| 257 |
|
| 258 |
# Select only the features the model expects
|
| 259 |
latest = latest[feature_names]
|
|
|
|
| 250 |
# Get latest row
|
| 251 |
latest = all_features.iloc[[-1]].copy()
|
| 252 |
|
| 253 |
+
# Ensure we have all required features (avoid fragmented DataFrame)
|
| 254 |
+
missing_feats = {feat: 0.0 for feat in feature_names if feat not in latest.columns}
|
| 255 |
+
if missing_feats:
|
| 256 |
+
missing_df = pd.DataFrame(missing_feats, index=latest.index)
|
| 257 |
+
latest = pd.concat([latest, missing_df], axis=1)
|
| 258 |
|
| 259 |
# Select only the features the model expects
|
| 260 |
latest = latest[feature_names]
|