ShadowGard3n commited on
Commit
d76b061
·
1 Parent(s): d3c3adb

small changes

Browse files
Files changed (2) hide show
  1. routes/predictions.py +85 -85
  2. services/market_services.py +136 -136
routes/predictions.py CHANGED
@@ -242,87 +242,8 @@ def predict_fertilizer(request: FertilizerPredictionRequest):
242
 
243
 
244
 
245
- @router.get("/api/predict/{commodity}")
246
- def predict_commodity_price(commodity: str):
247
- if DF_FULL is None:
248
- raise HTTPException(status_code=500, detail="Server Error: Dataset not loaded.")
249
-
250
- # 2. Check if Model exists (Normalize to Upper Case)
251
- target_commodity = commodity.upper()
252
-
253
- if target_commodity not in models:
254
- raise HTTPException(status_code=404, detail=f"Model for '{commodity}' not found.")
255
-
256
- model = models[target_commodity]
257
-
258
- # 3. Check if we have history for this commodity
259
- df_commodity = DF_FULL[DF_FULL['commodity'].str.upper() == target_commodity]
260
- if df_commodity.empty:
261
- raise HTTPException(status_code=404, detail="No historical data found for commodity")
262
-
263
- # 4. Get the last known date
264
- df_daily = df_commodity.groupby(df_commodity.index).agg({'modal_price': 'mean'})
265
- last_known_date = df_daily.index.max()
266
-
267
- # 5. Generate Recent History (for comparison chart)
268
- start_context_date = last_known_date - pd.Timedelta(days=90)
269
- df_featured = _create_features(df_daily)
270
- test_df = df_featured.loc[df_featured.index >= start_context_date]
271
-
272
- recent_data = []
273
- if not test_df.empty:
274
- FEATURES = [col for col in test_df.columns if col != 'modal_price']
275
- try:
276
- # We try to disable feature check here too just in case
277
- try:
278
- model.get_booster().feature_names = None
279
- except:
280
- pass
281
-
282
- # Use values here as well to be safe
283
- input_values = test_df[FEATURES].values
284
- predictions = model.predict(input_values)
285
-
286
- for date, actual, pred in zip(test_df.index, test_df['modal_price'], predictions):
287
- recent_data.append({
288
- "date": date.strftime('%Y-%m-%d'),
289
- "actual_price": float(actual),
290
- "predicted_price": float(pred)
291
- })
292
- except Exception as e:
293
- print(f"Warning: Could not generate history validation: {e}")
294
-
295
- # 6. Generate Future Forecast
296
- try:
297
- # Calls the helper which now correctly returns a DataFrame
298
- daily_forecast_df = get_market_prediction(model, DF_FULL, target_commodity, last_known_date)
299
-
300
- future_data = []
301
- # YOUR ORIGINAL LOOP NOW WORKS BECAUSE IT'S A DATAFRAME AGAIN
302
- for date, row in daily_forecast_df.iterrows():
303
- future_data.append({
304
- "date": date.strftime('%Y-%m-%d'),
305
- "forecast_price": float(row['forecast'])
306
- })
307
-
308
- except Exception as e:
309
- print(f"Forecast Error: {e}")
310
- raise HTTPException(status_code=500, detail=f"Prediction failed: {str(e)}")
311
-
312
- # Returns the exact structure your frontend expects
313
- return {
314
- "commodity": commodity,
315
- "recent_data": recent_data,
316
- "forecast_data": future_data
317
- }
318
-
319
  # @router.get("/api/predict/{commodity}")
320
  # def predict_commodity_price(commodity: str):
321
- # # result = get_market_prediction(commodity)
322
- # # if "error" in result:
323
- # # raise HTTPException(status_code=404, detail=result["error"])
324
- # # return result
325
-
326
  # if DF_FULL is None:
327
  # raise HTTPException(status_code=500, detail="Server Error: Dataset not loaded.")
328
 
@@ -344,7 +265,6 @@ def predict_commodity_price(commodity: str):
344
  # last_known_date = df_daily.index.max()
345
 
346
  # # 5. Generate Recent History (for comparison chart)
347
- # # Get last 90 days of actual data
348
  # start_context_date = last_known_date - pd.Timedelta(days=90)
349
  # df_featured = _create_features(df_daily)
350
  # test_df = df_featured.loc[df_featured.index >= start_context_date]
@@ -353,7 +273,16 @@ def predict_commodity_price(commodity: str):
353
  # if not test_df.empty:
354
  # FEATURES = [col for col in test_df.columns if col != 'modal_price']
355
  # try:
356
- # predictions = model.predict(test_df[FEATURES])
 
 
 
 
 
 
 
 
 
357
  # for date, actual, pred in zip(test_df.index, test_df['modal_price'], predictions):
358
  # recent_data.append({
359
  # "date": date.strftime('%Y-%m-%d'),
@@ -363,13 +292,13 @@ def predict_commodity_price(commodity: str):
363
  # except Exception as e:
364
  # print(f"Warning: Could not generate history validation: {e}")
365
 
366
- # # 6. Generate Future Forecast (Calling the helper function correctly!)
367
  # try:
368
- # # HERE IS THE FIX: We pass all 4 arguments required by the helper
369
- # print(model, DF_FULL, target_commodity, last_known_date)
370
  # daily_forecast_df = get_market_prediction(model, DF_FULL, target_commodity, last_known_date)
371
- # print(daily_forecast_df)
372
  # future_data = []
 
373
  # for date, row in daily_forecast_df.iterrows():
374
  # future_data.append({
375
  # "date": date.strftime('%Y-%m-%d'),
@@ -380,11 +309,82 @@ def predict_commodity_price(commodity: str):
380
  # print(f"Forecast Error: {e}")
381
  # raise HTTPException(status_code=500, detail=f"Prediction failed: {str(e)}")
382
 
 
383
  # return {
384
  # "commodity": commodity,
385
  # "recent_data": recent_data,
386
  # "forecast_data": future_data
387
  # }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
388
 
389
  @router.post(
390
  "/api/marketPrice",
 
242
 
243
 
244
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
245
  # @router.get("/api/predict/{commodity}")
246
  # def predict_commodity_price(commodity: str):
 
 
 
 
 
247
  # if DF_FULL is None:
248
  # raise HTTPException(status_code=500, detail="Server Error: Dataset not loaded.")
249
 
 
265
  # last_known_date = df_daily.index.max()
266
 
267
  # # 5. Generate Recent History (for comparison chart)
 
268
  # start_context_date = last_known_date - pd.Timedelta(days=90)
269
  # df_featured = _create_features(df_daily)
270
  # test_df = df_featured.loc[df_featured.index >= start_context_date]
 
273
  # if not test_df.empty:
274
  # FEATURES = [col for col in test_df.columns if col != 'modal_price']
275
  # try:
276
+ # # We try to disable feature check here too just in case
277
+ # try:
278
+ # model.get_booster().feature_names = None
279
+ # except:
280
+ # pass
281
+
282
+ # # Use values here as well to be safe
283
+ # input_values = test_df[FEATURES].values
284
+ # predictions = model.predict(input_values)
285
+
286
  # for date, actual, pred in zip(test_df.index, test_df['modal_price'], predictions):
287
  # recent_data.append({
288
  # "date": date.strftime('%Y-%m-%d'),
 
292
  # except Exception as e:
293
  # print(f"Warning: Could not generate history validation: {e}")
294
 
295
+ # # 6. Generate Future Forecast
296
  # try:
297
+ # # Calls the helper which now correctly returns a DataFrame
 
298
  # daily_forecast_df = get_market_prediction(model, DF_FULL, target_commodity, last_known_date)
299
+
300
  # future_data = []
301
+ # # YOUR ORIGINAL LOOP NOW WORKS BECAUSE IT'S A DATAFRAME AGAIN
302
  # for date, row in daily_forecast_df.iterrows():
303
  # future_data.append({
304
  # "date": date.strftime('%Y-%m-%d'),
 
309
  # print(f"Forecast Error: {e}")
310
  # raise HTTPException(status_code=500, detail=f"Prediction failed: {str(e)}")
311
 
312
+ # # Returns the exact structure your frontend expects
313
  # return {
314
  # "commodity": commodity,
315
  # "recent_data": recent_data,
316
  # "forecast_data": future_data
317
  # }
318
+
319
+ @router.get("/api/predict/{commodity}")
320
+ def predict_commodity_price(commodity: str):
321
+ # result = get_market_prediction(commodity)
322
+ # if "error" in result:
323
+ # raise HTTPException(status_code=404, detail=result["error"])
324
+ # return result
325
+
326
+ if DF_FULL is None:
327
+ raise HTTPException(status_code=500, detail="Server Error: Dataset not loaded.")
328
+
329
+ # 2. Check if Model exists (Normalize to Upper Case)
330
+ target_commodity = commodity.upper()
331
+
332
+ if target_commodity not in models:
333
+ raise HTTPException(status_code=404, detail=f"Model for '{commodity}' not found.")
334
+
335
+ model = models[target_commodity]
336
+
337
+ # 3. Check if we have history for this commodity
338
+ df_commodity = DF_FULL[DF_FULL['commodity'].str.upper() == target_commodity]
339
+ if df_commodity.empty:
340
+ raise HTTPException(status_code=404, detail="No historical data found for commodity")
341
+
342
+ # 4. Get the last known date
343
+ df_daily = df_commodity.groupby(df_commodity.index).agg({'modal_price': 'mean'})
344
+ last_known_date = df_daily.index.max()
345
+
346
+ # 5. Generate Recent History (for comparison chart)
347
+ # Get last 90 days of actual data
348
+ start_context_date = last_known_date - pd.Timedelta(days=90)
349
+ df_featured = _create_features(df_daily)
350
+ test_df = df_featured.loc[df_featured.index >= start_context_date]
351
+
352
+ recent_data = []
353
+ if not test_df.empty:
354
+ FEATURES = [col for col in test_df.columns if col != 'modal_price']
355
+ try:
356
+ predictions = model.predict(test_df[FEATURES])
357
+ for date, actual, pred in zip(test_df.index, test_df['modal_price'], predictions):
358
+ recent_data.append({
359
+ "date": date.strftime('%Y-%m-%d'),
360
+ "actual_price": float(actual),
361
+ "predicted_price": float(pred)
362
+ })
363
+ except Exception as e:
364
+ print(f"Warning: Could not generate history validation: {e}")
365
+
366
+ # 6. Generate Future Forecast (Calling the helper function correctly!)
367
+ try:
368
+ # HERE IS THE FIX: We pass all 4 arguments required by the helper
369
+ print(model, DF_FULL, target_commodity, last_known_date)
370
+ daily_forecast_df = get_market_prediction(model, DF_FULL, target_commodity, last_known_date)
371
+ print(daily_forecast_df)
372
+ future_data = []
373
+ for date, row in daily_forecast_df.iterrows():
374
+ future_data.append({
375
+ "date": date.strftime('%Y-%m-%d'),
376
+ "forecast_price": float(row['forecast'])
377
+ })
378
+
379
+ except Exception as e:
380
+ print(f"Forecast Error: {e}")
381
+ raise HTTPException(status_code=500, detail=f"Prediction failed: {str(e)}")
382
+
383
+ return {
384
+ "commodity": commodity,
385
+ "recent_data": recent_data,
386
+ "forecast_data": future_data
387
+ }
388
 
389
  @router.post(
390
  "/api/marketPrice",
services/market_services.py CHANGED
@@ -1,132 +1,9 @@
1
- # import pandas as pd
2
- # import numpy as np
3
- # import joblib
4
- # import os
5
- # from fastapi import APIRouter, HTTPException
6
- # from pydantic import BaseModel
7
- # from typing import List, Dict, Optional
8
-
9
- # # --- Load Models & Data ---
10
- # MODELS_DIR = 'models'
11
- # models = {}
12
-
13
- # feature_names = [
14
- # 'dayofweek', 'dayofyear', 'month', 'year', 'quarter', 'weekofyear',
15
- # 'price_lag_7', 'price_lag_14', 'price_lag_30', 'rolling_mean_30', 'rolling_std_30'
16
- # ]
17
-
18
- # # Ensure models dir exists
19
- # if os.path.exists(MODELS_DIR):
20
- # for model_file in os.listdir(MODELS_DIR):
21
- # if model_file.endswith('.pkl'):
22
- # commodity_name = model_file.replace('.pkl', '').replace('_', '/')
23
- # models[commodity_name] = joblib.load(os.path.join(MODELS_DIR, model_file))
24
- # print(f"✅ Model loaded for: {commodity_name}")
25
-
26
- # try:
27
- # # Ensure your CSV is accessible
28
- # DF_FULL = pd.read_csv('final_output.csv', parse_dates=['created_at'], index_col='created_at')
29
- # print("✅ Dataset loaded.")
30
- # except FileNotFoundError:
31
- # print("❌ 'final_output.csv' not found. Predictions will fail.")
32
- # DF_FULL = None
33
-
34
- # # --- Helper Functions ---
35
-
36
- # # def _create_features(df):
37
- # # df = df.copy()
38
- # # df['dayofweek'] = df.index.dayofweek
39
- # # df['dayofyear'] = df.index.dayofyear
40
- # # df['month'] = df.index.month
41
- # # df['year'] = df.index.year
42
- # # df['quarter'] = df.index.quarter
43
- # # df['weekofyear'] = df.index.isocalendar().week.astype(int)
44
- # # # Lags and Rolling features
45
- # # df['price_lag_7'] = df['modal_price'].shift(7)
46
- # # df['price_lag_14'] = df['modal_price'].shift(14)
47
- # # df['price_lag_30'] = df['modal_price'].shift(30)
48
- # # df['rolling_mean_30'] = df['modal_price'].shift(1).rolling(window=30).mean()
49
- # # df['rolling_std_30'] = df['modal_price'].shift(1).rolling(window=30).std()
50
- # # return df.dropna()
51
-
52
-
53
- # def _create_features(df):
54
- # df = df.copy()
55
- # df['dayofweek'] = df.index.dayofweek
56
- # df['dayofyear'] = df.index.dayofyear
57
- # df['month'] = df.index.month
58
- # df['year'] = df.index.year
59
- # df['quarter'] = df.index.quarter
60
- # df['weekofyear'] = df.index.isocalendar().week.astype(int)
61
-
62
- # # Lags and Rolling features
63
- # df['price_lag_7'] = df['modal_price'].shift(7)
64
- # df['price_lag_14'] = df['modal_price'].shift(14)
65
- # df['price_lag_30'] = df['modal_price'].shift(30)
66
- # df['rolling_mean_30'] = df['modal_price'].shift(1).rolling(window=30).mean()
67
- # df['rolling_std_30'] = df['modal_price'].shift(1).rolling(window=30).std()
68
-
69
- # # REMOVE .dropna() here! We need the last row even if modal_price is NaN.
70
- # return df
71
-
72
- # def get_market_prediction(model, df_full, commodity, last_known_date):
73
- # """
74
- # Iteratively predicts the next 180 days.
75
- # """
76
- # print(f"--- PREDICTION STARTED FOR {commodity} ---")
77
-
78
- # # --- FIX START: DISABLE FEATURE NAME CHECK ---
79
- # # This tells XGBoost: "Don't check column names, just trust the order of numbers I give you"
80
- # try:
81
- # booster = model.get_booster()
82
- # booster.feature_names = None
83
- # except Exception:
84
- # pass # If it fails, we proceed anyway
85
- # # --- FIX END ---
86
-
87
- # df_commodity = df_full[df_full['commodity'] == commodity]
88
- # df_daily = df_commodity.groupby(df_commodity.index).agg({'modal_price': 'mean'})
89
-
90
- # future_dates = pd.date_range(start=last_known_date + pd.Timedelta(days=1), periods=180, freq='D')
91
-
92
- # future_df = pd.DataFrame(index=future_dates)
93
- # future_df['modal_price'] = np.nan
94
-
95
- # df_extended = pd.concat([df_daily, future_df])
96
-
97
- # for date in future_dates:
98
- # subset = df_extended.loc[:date]
99
- # if len(subset) < 30: continue
100
-
101
- # featured_subset = _create_features(subset)
102
- # target_row = featured_subset.iloc[-1]
103
-
104
- # if target_row[feature_names].isna().any():
105
- # continue
106
-
107
- # # 1. Extract values as a plain Numpy Array
108
- # # Reshape to (1, 11) -> 1 row, 11 features
109
- # input_values = target_row[feature_names].values.reshape(1, -1)
110
-
111
- # # 2. Predict using the Numpy Array directly (Faster & Safer now)
112
- # # Since we disabled feature_names check, this will now work.
113
- # prediction = model.predict(input_values)[0]
114
-
115
- # df_extended.loc[date, 'modal_price'] = prediction
116
-
117
- # daily_forecast_df = df_extended.loc[future_dates].copy()
118
- # daily_forecast_df.rename(columns={'modal_price': 'forecast'}, inplace=True)
119
-
120
- # print("--- PREDICTION SUCCESS ---")
121
- # return daily_forecast_df
122
-
123
-
124
-
125
  import pandas as pd
126
  import numpy as np
127
  import joblib
128
  import os
129
  from fastapi import APIRouter, HTTPException
 
130
  from typing import List, Dict, Optional
131
 
132
  # --- Load Models & Data ---
@@ -156,6 +33,23 @@ except FileNotFoundError:
156
 
157
  # --- Helper Functions ---
158
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
159
  def _create_features(df):
160
  df = df.copy()
161
  df['dayofweek'] = df.index.dayofweek
@@ -172,24 +66,23 @@ def _create_features(df):
172
  df['rolling_mean_30'] = df['modal_price'].shift(1).rolling(window=30).mean()
173
  df['rolling_std_30'] = df['modal_price'].shift(1).rolling(window=30).std()
174
 
175
- # CRITICAL: Do NOT dropna() here, or we lose the row we are trying to predict
176
  return df
177
 
178
  def get_market_prediction(model, df_full, commodity, last_known_date):
179
  """
180
  Iteratively predicts the next 180 days.
181
- Returns a DataFrame so the route can iterate over it.
182
  """
183
  print(f"--- PREDICTION STARTED FOR {commodity} ---")
184
 
185
- # --- FIX 1: DISABLE FEATURE NAME CHECK ---
186
- # This prevents the "X does not have valid feature names" error
187
  try:
188
  booster = model.get_booster()
189
  booster.feature_names = None
190
  except Exception:
191
- pass
192
- # -----------------------------------------
193
 
194
  df_commodity = df_full[df_full['commodity'] == commodity]
195
  df_daily = df_commodity.groupby(df_commodity.index).agg({'modal_price': 'mean'})
@@ -208,24 +101,131 @@ def get_market_prediction(model, df_full, commodity, last_known_date):
208
  featured_subset = _create_features(subset)
209
  target_row = featured_subset.iloc[-1]
210
 
211
- # Only check if features are NaN (modal_price is allowed to be NaN)
212
  if target_row[feature_names].isna().any():
213
  continue
214
 
215
- # --- FIX 2: Pass Pure Values ---
216
  # Reshape to (1, 11) -> 1 row, 11 features
217
  input_values = target_row[feature_names].values.reshape(1, -1)
218
 
219
- # Predict using the values (bypassing column name check)
 
220
  prediction = model.predict(input_values)[0]
221
 
222
  df_extended.loc[date, 'modal_price'] = prediction
223
 
224
- # Extract just the future part
225
  daily_forecast_df = df_extended.loc[future_dates].copy()
226
  daily_forecast_df.rename(columns={'modal_price': 'forecast'}, inplace=True)
227
 
228
  print("--- PREDICTION SUCCESS ---")
229
-
230
- # Returns DataFrame (Matches your route code)
231
  return daily_forecast_df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import pandas as pd
2
  import numpy as np
3
  import joblib
4
  import os
5
  from fastapi import APIRouter, HTTPException
6
+ from pydantic import BaseModel
7
  from typing import List, Dict, Optional
8
 
9
  # --- Load Models & Data ---
 
33
 
34
  # --- Helper Functions ---
35
 
36
+ # def _create_features(df):
37
+ # df = df.copy()
38
+ # df['dayofweek'] = df.index.dayofweek
39
+ # df['dayofyear'] = df.index.dayofyear
40
+ # df['month'] = df.index.month
41
+ # df['year'] = df.index.year
42
+ # df['quarter'] = df.index.quarter
43
+ # df['weekofyear'] = df.index.isocalendar().week.astype(int)
44
+ # # Lags and Rolling features
45
+ # df['price_lag_7'] = df['modal_price'].shift(7)
46
+ # df['price_lag_14'] = df['modal_price'].shift(14)
47
+ # df['price_lag_30'] = df['modal_price'].shift(30)
48
+ # df['rolling_mean_30'] = df['modal_price'].shift(1).rolling(window=30).mean()
49
+ # df['rolling_std_30'] = df['modal_price'].shift(1).rolling(window=30).std()
50
+ # return df.dropna()
51
+
52
+
53
  def _create_features(df):
54
  df = df.copy()
55
  df['dayofweek'] = df.index.dayofweek
 
66
  df['rolling_mean_30'] = df['modal_price'].shift(1).rolling(window=30).mean()
67
  df['rolling_std_30'] = df['modal_price'].shift(1).rolling(window=30).std()
68
 
69
+ # REMOVE .dropna() here! We need the last row even if modal_price is NaN.
70
  return df
71
 
72
  def get_market_prediction(model, df_full, commodity, last_known_date):
73
  """
74
  Iteratively predicts the next 180 days.
 
75
  """
76
  print(f"--- PREDICTION STARTED FOR {commodity} ---")
77
 
78
+ # --- FIX START: DISABLE FEATURE NAME CHECK ---
79
+ # This tells XGBoost: "Don't check column names, just trust the order of numbers I give you"
80
  try:
81
  booster = model.get_booster()
82
  booster.feature_names = None
83
  except Exception:
84
+ pass # If it fails, we proceed anyway
85
+ # --- FIX END ---
86
 
87
  df_commodity = df_full[df_full['commodity'] == commodity]
88
  df_daily = df_commodity.groupby(df_commodity.index).agg({'modal_price': 'mean'})
 
101
  featured_subset = _create_features(subset)
102
  target_row = featured_subset.iloc[-1]
103
 
 
104
  if target_row[feature_names].isna().any():
105
  continue
106
 
107
+ # 1. Extract values as a plain Numpy Array
108
  # Reshape to (1, 11) -> 1 row, 11 features
109
  input_values = target_row[feature_names].values.reshape(1, -1)
110
 
111
+ # 2. Predict using the Numpy Array directly (Faster & Safer now)
112
+ # Since we disabled feature_names check, this will now work.
113
  prediction = model.predict(input_values)[0]
114
 
115
  df_extended.loc[date, 'modal_price'] = prediction
116
 
 
117
  daily_forecast_df = df_extended.loc[future_dates].copy()
118
  daily_forecast_df.rename(columns={'modal_price': 'forecast'}, inplace=True)
119
 
120
  print("--- PREDICTION SUCCESS ---")
 
 
121
  return daily_forecast_df
122
+
123
+
124
+
125
+ # import pandas as pd
126
+ # import numpy as np
127
+ # import joblib
128
+ # import os
129
+ # from fastapi import APIRouter, HTTPException
130
+ # from typing import List, Dict, Optional
131
+
132
+ # # --- Load Models & Data ---
133
+ # MODELS_DIR = 'models'
134
+ # models = {}
135
+
136
+ # feature_names = [
137
+ # 'dayofweek', 'dayofyear', 'month', 'year', 'quarter', 'weekofyear',
138
+ # 'price_lag_7', 'price_lag_14', 'price_lag_30', 'rolling_mean_30', 'rolling_std_30'
139
+ # ]
140
+
141
+ # # Ensure models dir exists
142
+ # if os.path.exists(MODELS_DIR):
143
+ # for model_file in os.listdir(MODELS_DIR):
144
+ # if model_file.endswith('.pkl'):
145
+ # commodity_name = model_file.replace('.pkl', '').replace('_', '/')
146
+ # models[commodity_name] = joblib.load(os.path.join(MODELS_DIR, model_file))
147
+ # print(f"✅ Model loaded for: {commodity_name}")
148
+
149
+ # try:
150
+ # # Ensure your CSV is accessible
151
+ # DF_FULL = pd.read_csv('final_output.csv', parse_dates=['created_at'], index_col='created_at')
152
+ # print("✅ Dataset loaded.")
153
+ # except FileNotFoundError:
154
+ # print("❌ 'final_output.csv' not found. Predictions will fail.")
155
+ # DF_FULL = None
156
+
157
+ # # --- Helper Functions ---
158
+
159
+ # def _create_features(df):
160
+ # df = df.copy()
161
+ # df['dayofweek'] = df.index.dayofweek
162
+ # df['dayofyear'] = df.index.dayofyear
163
+ # df['month'] = df.index.month
164
+ # df['year'] = df.index.year
165
+ # df['quarter'] = df.index.quarter
166
+ # df['weekofyear'] = df.index.isocalendar().week.astype(int)
167
+
168
+ # # Lags and Rolling features
169
+ # df['price_lag_7'] = df['modal_price'].shift(7)
170
+ # df['price_lag_14'] = df['modal_price'].shift(14)
171
+ # df['price_lag_30'] = df['modal_price'].shift(30)
172
+ # df['rolling_mean_30'] = df['modal_price'].shift(1).rolling(window=30).mean()
173
+ # df['rolling_std_30'] = df['modal_price'].shift(1).rolling(window=30).std()
174
+
175
+ # # CRITICAL: Do NOT dropna() here, or we lose the row we are trying to predict
176
+ # return df
177
+
178
+ # def get_market_prediction(model, df_full, commodity, last_known_date):
179
+ # """
180
+ # Iteratively predicts the next 180 days.
181
+ # Returns a DataFrame so the route can iterate over it.
182
+ # """
183
+ # print(f"--- PREDICTION STARTED FOR {commodity} ---")
184
+
185
+ # # --- FIX 1: DISABLE FEATURE NAME CHECK ---
186
+ # # This prevents the "X does not have valid feature names" error
187
+ # try:
188
+ # booster = model.get_booster()
189
+ # booster.feature_names = None
190
+ # except Exception:
191
+ # pass
192
+ # # -----------------------------------------
193
+
194
+ # df_commodity = df_full[df_full['commodity'] == commodity]
195
+ # df_daily = df_commodity.groupby(df_commodity.index).agg({'modal_price': 'mean'})
196
+
197
+ # future_dates = pd.date_range(start=last_known_date + pd.Timedelta(days=1), periods=180, freq='D')
198
+
199
+ # future_df = pd.DataFrame(index=future_dates)
200
+ # future_df['modal_price'] = np.nan
201
+
202
+ # df_extended = pd.concat([df_daily, future_df])
203
+
204
+ # for date in future_dates:
205
+ # subset = df_extended.loc[:date]
206
+ # if len(subset) < 30: continue
207
+
208
+ # featured_subset = _create_features(subset)
209
+ # target_row = featured_subset.iloc[-1]
210
+
211
+ # # Only check if features are NaN (modal_price is allowed to be NaN)
212
+ # if target_row[feature_names].isna().any():
213
+ # continue
214
+
215
+ # # --- FIX 2: Pass Pure Values ---
216
+ # # Reshape to (1, 11) -> 1 row, 11 features
217
+ # input_values = target_row[feature_names].values.reshape(1, -1)
218
+
219
+ # # Predict using the values (bypassing column name check)
220
+ # prediction = model.predict(input_values)[0]
221
+
222
+ # df_extended.loc[date, 'modal_price'] = prediction
223
+
224
+ # # Extract just the future part
225
+ # daily_forecast_df = df_extended.loc[future_dates].copy()
226
+ # daily_forecast_df.rename(columns={'modal_price': 'forecast'}, inplace=True)
227
+
228
+ # print("--- PREDICTION SUCCESS ---")
229
+
230
+ # # Returns DataFrame (Matches your route code)
231
+ # return daily_forecast_df