ShadowGard3n commited on
Commit
db023ce
·
1 Parent(s): 1ce5332

Changes in deployment+

Browse files
Files changed (2) hide show
  1. routes/predictions.py +27 -23
  2. services/market_services.py +23 -21
routes/predictions.py CHANGED
@@ -208,10 +208,10 @@ router = APIRouter()
208
  MODELS_DIR = 'models'
209
  models = {}
210
 
 
211
  if os.path.exists(MODELS_DIR):
212
  for model_file in os.listdir(MODELS_DIR):
213
  if model_file.endswith('.pkl'):
214
- # Normalize filename
215
  commodity_name = model_file.replace('.pkl', '').replace('_', '/')
216
  try:
217
  models[commodity_name] = joblib.load(os.path.join(MODELS_DIR, model_file))
@@ -241,54 +241,60 @@ def predict_commodity_price(commodity: str):
241
 
242
  target_commodity = commodity.upper()
243
 
244
- # Normalize keys in models dict to handle potential mismatches
245
- # (Optional robustness step, relies on loading logic)
246
  if target_commodity not in models:
247
  raise HTTPException(status_code=404, detail=f"Model for '{commodity}' not found.")
248
 
249
  model = models[target_commodity]
250
 
 
251
  df_commodity = DF_FULL[DF_FULL['commodity'].str.upper() == target_commodity]
252
  if df_commodity.empty:
253
  raise HTTPException(status_code=404, detail="No historical data found for commodity")
254
 
 
255
  df_daily = df_commodity.groupby(df_commodity.index).agg({'modal_price': 'mean'})
256
  last_known_date = df_daily.index.max()
257
 
258
- # Generate Recent History
259
- start_context_date = last_known_date - pd.Timedelta(days=90)
260
-
261
- # _create_features now safely handles NaNs, but for history we have full data
262
- df_featured = _create_features(df_daily)
263
- test_df = df_featured.loc[df_featured.index >= start_context_date]
264
-
265
  recent_data = []
266
- if not test_df.empty:
267
- FEATURES = [col for col in test_df.columns if col != 'modal_price']
268
-
269
- # CRITICAL FIX: Explicitly cast to DataFrame to preserve feature names for XGBoost
270
- X_input = pd.DataFrame(test_df[FEATURES].values, columns=FEATURES, index=test_df.index)
271
 
272
- try:
 
 
 
 
273
  predictions = model.predict(X_input)
 
274
  for date, actual, pred in zip(test_df.index, test_df['modal_price'], predictions):
275
  recent_data.append({
276
  "date": date.strftime('%Y-%m-%d'),
277
  "actual_price": float(actual),
278
  "predicted_price": float(pred)
279
  })
280
- except Exception as e:
281
- print(f"Warning: Could not generate history validation: {e}")
282
 
283
- # Generate Future Forecast
 
284
  try:
285
  daily_forecast_df = get_market_prediction(model, DF_FULL, target_commodity, last_known_date)
286
 
287
- future_data = []
288
  for date, row in daily_forecast_df.iterrows():
 
 
 
 
 
 
 
 
289
  future_data.append({
290
  "date": date.strftime('%Y-%m-%d'),
291
- "forecast_price": float(row['forecast'])
292
  })
293
 
294
  except Exception as e:
@@ -310,7 +316,6 @@ async def get_market_price(request: MarketPriceRequest):
310
  async def get_current_weather(city: str):
311
  try:
312
  weather_data = await get_weather_data_for_city(city)
313
- # ... (rest of the weather logic remains unchanged)
314
  current_data = weather_data.get("current", {})
315
  location_data = weather_data.get("location", {})
316
  air_quality_data = current_data.get("air_quality", {})
@@ -335,7 +340,6 @@ async def get_current_weather(city: str):
335
  async def get_weather_forecast(city: str, days: int = Query(default=1, ge=1, le=14)):
336
  try:
337
  forecast_data = await get_weather_forecast_for_city(city, days)
338
- # ... (rest of forecast logic remains unchanged)
339
  location_data = forecast_data.get("location", {})
340
  processed_forecast_days = []
341
  for day_data in forecast_data.get("forecast", {}).get("forecastday", []):
 
208
  MODELS_DIR = 'models'
209
  models = {}
210
 
211
+ # Ensure models dir exists and load models
212
  if os.path.exists(MODELS_DIR):
213
  for model_file in os.listdir(MODELS_DIR):
214
  if model_file.endswith('.pkl'):
 
215
  commodity_name = model_file.replace('.pkl', '').replace('_', '/')
216
  try:
217
  models[commodity_name] = joblib.load(os.path.join(MODELS_DIR, model_file))
 
241
 
242
  target_commodity = commodity.upper()
243
 
 
 
244
  if target_commodity not in models:
245
  raise HTTPException(status_code=404, detail=f"Model for '{commodity}' not found.")
246
 
247
  model = models[target_commodity]
248
 
249
+ # Check history
250
  df_commodity = DF_FULL[DF_FULL['commodity'].str.upper() == target_commodity]
251
  if df_commodity.empty:
252
  raise HTTPException(status_code=404, detail="No historical data found for commodity")
253
 
254
+ # Get last known date
255
  df_daily = df_commodity.groupby(df_commodity.index).agg({'modal_price': 'mean'})
256
  last_known_date = df_daily.index.max()
257
 
258
+ # --- 1. Recent History Validation ---
 
 
 
 
 
 
259
  recent_data = []
260
+ try:
261
+ start_context_date = last_known_date - pd.Timedelta(days=90)
262
+ df_featured = _create_features(df_daily)
263
+ test_df = df_featured.loc[df_featured.index >= start_context_date].dropna()
 
264
 
265
+ if not test_df.empty:
266
+ FEATURES = [col for col in test_df.columns if col != 'modal_price']
267
+ # FIX: Use DataFrame with columns for XGBoost
268
+ X_input = pd.DataFrame(test_df[FEATURES].values, columns=FEATURES, index=test_df.index)
269
+
270
  predictions = model.predict(X_input)
271
+
272
  for date, actual, pred in zip(test_df.index, test_df['modal_price'], predictions):
273
  recent_data.append({
274
  "date": date.strftime('%Y-%m-%d'),
275
  "actual_price": float(actual),
276
  "predicted_price": float(pred)
277
  })
278
+ except Exception as e:
279
+ print(f"Warning: Could not generate history validation: {e}")
280
 
281
+ # --- 2. Future Forecast ---
282
+ future_data = []
283
  try:
284
  daily_forecast_df = get_market_prediction(model, DF_FULL, target_commodity, last_known_date)
285
 
 
286
  for date, row in daily_forecast_df.iterrows():
287
+ price = row['forecast']
288
+
289
+ # --- CRITICAL FIX: Handle NaN values to prevent JSON crash ---
290
+ if pd.isna(price) or np.isnan(price):
291
+ final_price = None
292
+ else:
293
+ final_price = float(price)
294
+
295
  future_data.append({
296
  "date": date.strftime('%Y-%m-%d'),
297
+ "forecast_price": final_price
298
  })
299
 
300
  except Exception as e:
 
316
  async def get_current_weather(city: str):
317
  try:
318
  weather_data = await get_weather_data_for_city(city)
 
319
  current_data = weather_data.get("current", {})
320
  location_data = weather_data.get("location", {})
321
  air_quality_data = current_data.get("air_quality", {})
 
340
  async def get_weather_forecast(city: str, days: int = Query(default=1, ge=1, le=14)):
341
  try:
342
  forecast_data = await get_weather_forecast_for_city(city, days)
 
343
  location_data = forecast_data.get("location", {})
344
  processed_forecast_days = []
345
  for day_data in forecast_data.get("forecast", {}).get("forecastday", []):
services/market_services.py CHANGED
@@ -13,7 +13,7 @@ models = {}
13
  if os.path.exists(MODELS_DIR):
14
  for model_file in os.listdir(MODELS_DIR):
15
  if model_file.endswith('.pkl'):
16
- # Normalize filename to commodity name
17
  commodity_name = model_file.replace('.pkl', '').replace('_', '/')
18
  try:
19
  models[commodity_name] = joblib.load(os.path.join(MODELS_DIR, model_file))
@@ -48,56 +48,58 @@ def _create_features(df):
48
  df['rolling_mean_30'] = df['modal_price'].shift(1).rolling(window=30).mean()
49
  df['rolling_std_30'] = df['modal_price'].shift(1).rolling(window=30).std()
50
 
51
- # CRITICAL FIX: Only drop rows where FEATURES are NaN, keep the target row even if modal_price is NaN
52
- # We drop based on the longest lag (30 days) to ensure features are valid
53
- return df.dropna(subset=['rolling_std_30', 'price_lag_30'])
54
 
55
  def get_market_prediction(model, df_full, commodity, last_known_date):
56
  """
57
  Iteratively predicts the next 180 days.
58
  """
 
59
  df_commodity = df_full[df_full['commodity'].str.upper() == commodity.upper()]
60
  df_daily = df_commodity.groupby(df_commodity.index).agg({'modal_price': 'mean'})
61
 
 
62
  future_dates = pd.date_range(start=last_known_date + pd.Timedelta(days=1), periods=180, freq='D')
63
 
64
  future_df = pd.DataFrame(index=future_dates)
65
  future_df['modal_price'] = np.nan
66
 
 
67
  df_extended = pd.concat([df_daily, future_df])
68
 
69
- # Pre-calculate feature names once to ensure consistency
70
- sample_features = _create_features(df_daily.tail(40))
71
- FEATURES = [col for col in sample_features.columns if col != 'modal_price']
 
72
 
73
  for date in future_dates:
74
- # We need enough history to calculate lags
75
  subset = df_extended.loc[:date]
 
 
76
  if len(subset) < 35: continue
77
 
78
- # Create features
79
- # Note: This returns a DataFrame
80
  featured_subset = _create_features(subset)
81
 
82
- if featured_subset.empty: continue
83
-
84
- # Get the row for the specific date we are predicting
85
- # Because we fixed _create_features, this row should exist now
86
- if date not in featured_subset.index:
87
- continue
88
-
89
  featured_row = featured_subset.loc[[date]]
90
 
91
- # CRITICAL FIX: Ensure input is a DataFrame with strict column names
92
- X_input = pd.DataFrame(featured_row[FEATURES].values, columns=FEATURES)
93
-
94
  try:
 
95
  prediction = model.predict(X_input)[0]
96
- # Update the dataframe
97
  df_extended.loc[date, 'modal_price'] = prediction
98
  except Exception as e:
 
99
  print(f"Prediction error for {date}: {e}")
100
  break
 
101
 
102
  daily_forecast_df = df_extended.loc[future_dates].copy()
103
  daily_forecast_df.rename(columns={'modal_price': 'forecast'}, inplace=True)
 
13
  if os.path.exists(MODELS_DIR):
14
  for model_file in os.listdir(MODELS_DIR):
15
  if model_file.endswith('.pkl'):
16
+ # Normalize filename to commodity name (handle slash replacement if needed)
17
  commodity_name = model_file.replace('.pkl', '').replace('_', '/')
18
  try:
19
  models[commodity_name] = joblib.load(os.path.join(MODELS_DIR, model_file))
 
48
  df['rolling_mean_30'] = df['modal_price'].shift(1).rolling(window=30).mean()
49
  df['rolling_std_30'] = df['modal_price'].shift(1).rolling(window=30).std()
50
 
51
+ # Return features. Note: We do NOT dropna here because we need to generate
52
+ # features for the future row even if it has NaNs initially.
53
+ return df
54
 
55
  def get_market_prediction(model, df_full, commodity, last_known_date):
56
  """
57
  Iteratively predicts the next 180 days.
58
  """
59
+ # 1. Filter data for the commodity
60
  df_commodity = df_full[df_full['commodity'].str.upper() == commodity.upper()]
61
  df_daily = df_commodity.groupby(df_commodity.index).agg({'modal_price': 'mean'})
62
 
63
+ # 2. Setup future dates
64
  future_dates = pd.date_range(start=last_known_date + pd.Timedelta(days=1), periods=180, freq='D')
65
 
66
  future_df = pd.DataFrame(index=future_dates)
67
  future_df['modal_price'] = np.nan
68
 
69
+ # 3. Combine history and future placeholder
70
  df_extended = pd.concat([df_daily, future_df])
71
 
72
+ # 4. Get the correct list of features from a valid sample
73
+ # We take the last 50 valid days to determine feature columns
74
+ valid_sample = _create_features(df_daily.tail(50)).dropna()
75
+ FEATURES = [col for col in valid_sample.columns if col != 'modal_price']
76
 
77
  for date in future_dates:
78
+ # We need historical context to calculate rolling averages/lags
79
  subset = df_extended.loc[:date]
80
+
81
+ # Safety check: ensure we have enough data for 30-day rolling window
82
  if len(subset) < 35: continue
83
 
84
+ # Generate features for this specific date
 
85
  featured_subset = _create_features(subset)
86
 
87
+ # Extract the row for the current prediction date
88
+ if date not in featured_subset.index: continue
 
 
 
 
 
89
  featured_row = featured_subset.loc[[date]]
90
 
91
+ # --- CRITICAL FIX START ---
92
+ # XGBoost requires a DataFrame with specific column names.
93
+ # We explicitly recreate the DataFrame to ensure headers are present.
94
  try:
95
+ X_input = pd.DataFrame(featured_row[FEATURES].values, columns=FEATURES, index=featured_row.index)
96
  prediction = model.predict(X_input)[0]
 
97
  df_extended.loc[date, 'modal_price'] = prediction
98
  except Exception as e:
99
+ # Log error but don't crash the loop immediately
100
  print(f"Prediction error for {date}: {e}")
101
  break
102
+ # --- CRITICAL FIX END ---
103
 
104
  daily_forecast_df = df_extended.loc[future_dates].copy()
105
  daily_forecast_df.rename(columns={'modal_price': 'forecast'}, inplace=True)