ShadowGard3n commited on
Commit
a0df58b
Β·
1 Parent(s): baa3ca8

Changes in deployment++++

Browse files
requirements.txt CHANGED
@@ -1,10 +1,10 @@
1
  fastapi
2
  uvicorn[standard]
3
- scikit-learn==1.2.2
4
  numpy
5
- xgboost==1.7.6
6
  joblib
7
- pandas==1.5.3
8
  matplotlib
9
  httpx
10
  python-dotenv
 
1
  fastapi
2
  uvicorn[standard]
3
+ scikit-learn
4
  numpy
5
+ xgboost
6
  joblib
7
+ pandas
8
  matplotlib
9
  httpx
10
  python-dotenv
routes/predictions.py CHANGED
@@ -201,25 +201,22 @@ from services.weather_service import get_weather_data_for_city, AIR_QUALITY_MAP,
201
  import os
202
  import joblib
203
  import pandas as pd
204
- import numpy as np
205
-
206
  router = APIRouter()
207
 
 
208
  MODELS_DIR = 'models'
209
  models = {}
210
 
211
- # Ensure models dir exists and load models
212
  if os.path.exists(MODELS_DIR):
213
  for model_file in os.listdir(MODELS_DIR):
214
  if model_file.endswith('.pkl'):
215
  commodity_name = model_file.replace('.pkl', '').replace('_', '/')
216
- try:
217
- models[commodity_name] = joblib.load(os.path.join(MODELS_DIR, model_file))
218
- print(f"βœ… Model loaded for: {commodity_name}")
219
- except Exception as e:
220
- print(f"❌ Failed to load model {commodity_name}: {e}")
221
 
222
  try:
 
223
  DF_FULL = pd.read_csv('final_output.csv', parse_dates=['created_at'], index_col='created_at')
224
  print("βœ… Dataset loaded.")
225
  except FileNotFoundError:
@@ -236,9 +233,15 @@ def predict_fertilizer(request: FertilizerPredictionRequest):
236
 
237
  @router.get("/api/predict/{commodity}")
238
  def predict_commodity_price(commodity: str):
 
 
 
 
 
239
  if DF_FULL is None:
240
  raise HTTPException(status_code=500, detail="Server Error: Dataset not loaded.")
241
 
 
242
  target_commodity = commodity.upper()
243
 
244
  if target_commodity not in models:
@@ -246,56 +249,45 @@ def predict_commodity_price(commodity: str):
246
 
247
  model = models[target_commodity]
248
 
249
- # Check history
250
  df_commodity = DF_FULL[DF_FULL['commodity'].str.upper() == target_commodity]
251
  if df_commodity.empty:
252
  raise HTTPException(status_code=404, detail="No historical data found for commodity")
253
 
254
- # Get last known date
255
  df_daily = df_commodity.groupby(df_commodity.index).agg({'modal_price': 'mean'})
256
  last_known_date = df_daily.index.max()
257
 
258
- # --- 1. Recent History Validation ---
 
 
 
 
 
259
  recent_data = []
260
- try:
261
- start_context_date = last_known_date - pd.Timedelta(days=90)
262
- df_featured = _create_features(df_daily)
263
- test_df = df_featured.loc[df_featured.index >= start_context_date].dropna()
264
-
265
- if not test_df.empty:
266
- FEATURES = [col for col in test_df.columns if col != 'modal_price']
267
-
268
- # FIX: Ensure DataFrame format for XGBoost
269
- X_input = pd.DataFrame(test_df[FEATURES].values, columns=FEATURES, index=test_df.index)
270
-
271
- predictions = model.predict(X_input)
272
-
273
  for date, actual, pred in zip(test_df.index, test_df['modal_price'], predictions):
274
  recent_data.append({
275
  "date": date.strftime('%Y-%m-%d'),
276
  "actual_price": float(actual),
277
  "predicted_price": float(pred)
278
  })
279
- except Exception as e:
280
- print(f"Warning: Could not generate history validation: {e}")
281
 
282
- # --- 2. Future Forecast ---
283
- future_data = []
284
  try:
 
285
  daily_forecast_df = get_market_prediction(model, DF_FULL, target_commodity, last_known_date)
286
 
 
287
  for date, row in daily_forecast_df.iterrows():
288
- price = row['forecast']
289
-
290
- # --- CRITICAL FIX: Handle NaN values safely ---
291
- if pd.isna(price) or np.isnan(price):
292
- final_price = None
293
- else:
294
- final_price = float(price)
295
-
296
  future_data.append({
297
  "date": date.strftime('%Y-%m-%d'),
298
- "forecast_price": final_price
299
  })
300
 
301
  except Exception as e:
@@ -308,7 +300,12 @@ def predict_commodity_price(commodity: str):
308
  "forecast_data": future_data
309
  }
310
 
311
- @router.post("/api/marketPrice", response_model=List[MarketPriceData])
 
 
 
 
 
312
  async def get_market_price(request: MarketPriceRequest):
313
  market_data = await fetch_market_data(request)
314
  return market_data
@@ -317,12 +314,15 @@ async def get_market_price(request: MarketPriceRequest):
317
  async def get_current_weather(city: str):
318
  try:
319
  weather_data = await get_weather_data_for_city(city)
 
320
  current_data = weather_data.get("current", {})
321
  location_data = weather_data.get("location", {})
322
  air_quality_data = current_data.get("air_quality", {})
 
323
  aqi_index = air_quality_data.get("us-epa-index")
 
324
 
325
- return WeatherResponse(
326
  location_name=location_data.get("name", "N/A"),
327
  temperature_c=current_data.get("temp_c"),
328
  condition=current_data.get("condition", {}).get("text", "N/A"),
@@ -330,22 +330,32 @@ async def get_current_weather(city: str):
330
  wind_kph=current_data.get("wind_kph"),
331
  cloud=current_data.get("cloud"),
332
  is_day=current_data.get("is_day"),
333
- air_quality=AIR_QUALITY_MAP.get(aqi_index, "Unknown")
334
  )
 
 
 
335
  except HTTPException as e:
336
  raise e
337
  except Exception as e:
338
  raise HTTPException(status_code=500, detail=f"An unexpected error occurred: {str(e)}")
 
339
 
340
- @router.get("/weather/forecast/{city}", response_model=ForecastResponse)
341
- async def get_weather_forecast(city: str, days: int = Query(default=1, ge=1, le=14)):
 
 
 
342
  try:
343
  forecast_data = await get_weather_forecast_for_city(city, days)
 
344
  location_data = forecast_data.get("location", {})
 
 
345
  processed_forecast_days = []
346
- for day_data in forecast_data.get("forecast", {}).get("forecastday", []):
347
  day_details = day_data.get("day", {})
348
- processed_forecast_days.append(DayForecast(
349
  date=day_data.get("date"),
350
  maxtemp_c=day_details.get("maxtemp_c"),
351
  mintemp_c=day_details.get("mintemp_c"),
@@ -354,12 +364,16 @@ async def get_weather_forecast(city: str, days: int = Query(default=1, ge=1, le=
354
  daily_chance_of_rain=day_details.get("daily_chance_of_rain", 0),
355
  avghumidity=day_details.get("avghumidity", 0),
356
  maxwind_kph=day_details.get("maxwind_kph", 0.0),
357
- ))
358
-
359
- return ForecastResponse(
 
360
  location_name=location_data.get("name", "N/A"),
361
  forecast_days=processed_forecast_days
362
  )
 
 
 
363
  except HTTPException as e:
364
  raise e
365
  except Exception as e:
 
201
  import os
202
  import joblib
203
  import pandas as pd
 
 
204
  router = APIRouter()
205
 
206
+
207
  MODELS_DIR = 'models'
208
  models = {}
209
 
210
+ # Ensure models dir exists
211
  if os.path.exists(MODELS_DIR):
212
  for model_file in os.listdir(MODELS_DIR):
213
  if model_file.endswith('.pkl'):
214
  commodity_name = model_file.replace('.pkl', '').replace('_', '/')
215
+ models[commodity_name] = joblib.load(os.path.join(MODELS_DIR, model_file))
216
+ print(f"βœ… Model loaded for: {commodity_name}")
 
 
 
217
 
218
  try:
219
+ # Ensure your CSV is accessible
220
  DF_FULL = pd.read_csv('final_output.csv', parse_dates=['created_at'], index_col='created_at')
221
  print("βœ… Dataset loaded.")
222
  except FileNotFoundError:
 
233
 
234
  @router.get("/api/predict/{commodity}")
235
  def predict_commodity_price(commodity: str):
236
+ # result = get_market_prediction(commodity)
237
+ # if "error" in result:
238
+ # raise HTTPException(status_code=404, detail=result["error"])
239
+ # return result
240
+
241
  if DF_FULL is None:
242
  raise HTTPException(status_code=500, detail="Server Error: Dataset not loaded.")
243
 
244
+ # 2. Check if Model exists (Normalize to Upper Case)
245
  target_commodity = commodity.upper()
246
 
247
  if target_commodity not in models:
 
249
 
250
  model = models[target_commodity]
251
 
252
+ # 3. Check if we have history for this commodity
253
  df_commodity = DF_FULL[DF_FULL['commodity'].str.upper() == target_commodity]
254
  if df_commodity.empty:
255
  raise HTTPException(status_code=404, detail="No historical data found for commodity")
256
 
257
+ # 4. Get the last known date
258
  df_daily = df_commodity.groupby(df_commodity.index).agg({'modal_price': 'mean'})
259
  last_known_date = df_daily.index.max()
260
 
261
+ # 5. Generate Recent History (for comparison chart)
262
+ # Get last 90 days of actual data
263
+ start_context_date = last_known_date - pd.Timedelta(days=90)
264
+ df_featured = _create_features(df_daily)
265
+ test_df = df_featured.loc[df_featured.index >= start_context_date]
266
+
267
  recent_data = []
268
+ if not test_df.empty:
269
+ FEATURES = [col for col in test_df.columns if col != 'modal_price']
270
+ try:
271
+ predictions = model.predict(test_df[FEATURES])
 
 
 
 
 
 
 
 
 
272
  for date, actual, pred in zip(test_df.index, test_df['modal_price'], predictions):
273
  recent_data.append({
274
  "date": date.strftime('%Y-%m-%d'),
275
  "actual_price": float(actual),
276
  "predicted_price": float(pred)
277
  })
278
+ except Exception as e:
279
+ print(f"Warning: Could not generate history validation: {e}")
280
 
281
+ # 6. Generate Future Forecast (Calling the helper function correctly!)
 
282
  try:
283
+ # HERE IS THE FIX: We pass all 4 arguments required by the helper
284
  daily_forecast_df = get_market_prediction(model, DF_FULL, target_commodity, last_known_date)
285
 
286
+ future_data = []
287
  for date, row in daily_forecast_df.iterrows():
 
 
 
 
 
 
 
 
288
  future_data.append({
289
  "date": date.strftime('%Y-%m-%d'),
290
+ "forecast_price": float(row['forecast'])
291
  })
292
 
293
  except Exception as e:
 
300
  "forecast_data": future_data
301
  }
302
 
303
+ @router.post(
304
+ "/api/marketPrice",
305
+ response_model=List[MarketPriceData],
306
+ summary="Fetch Agricultural Market Prices",
307
+ description="Retrieves daily market price data for a specific commodity, state, and APMC over the last 7 days."
308
+ )
309
  async def get_market_price(request: MarketPriceRequest):
310
  market_data = await fetch_market_data(request)
311
  return market_data
 
314
  async def get_current_weather(city: str):
315
  try:
316
  weather_data = await get_weather_data_for_city(city)
317
+
318
  current_data = weather_data.get("current", {})
319
  location_data = weather_data.get("location", {})
320
  air_quality_data = current_data.get("air_quality", {})
321
+
322
  aqi_index = air_quality_data.get("us-epa-index")
323
+ air_quality_description = AIR_QUALITY_MAP.get(aqi_index, "Unknown")
324
 
325
+ response_data = WeatherResponse(
326
  location_name=location_data.get("name", "N/A"),
327
  temperature_c=current_data.get("temp_c"),
328
  condition=current_data.get("condition", {}).get("text", "N/A"),
 
330
  wind_kph=current_data.get("wind_kph"),
331
  cloud=current_data.get("cloud"),
332
  is_day=current_data.get("is_day"),
333
+ air_quality=air_quality_description
334
  )
335
+
336
+ return response_data
337
+
338
  except HTTPException as e:
339
  raise e
340
  except Exception as e:
341
  raise HTTPException(status_code=500, detail=f"An unexpected error occurred: {str(e)}")
342
+
343
 
344
+ @router.get("/weather/forecast/{city}", response_model=ForecastResponse, summary="Get Weather Forecast")
345
+ async def get_weather_forecast(city: str, days: int = Query(default=1, ge=1, le=14, description="Number of days to forecast (between 1 and 14).")):
346
+ """
347
+ Retrieves the weather forecast for a specific city for a given number of days.
348
+ """
349
  try:
350
  forecast_data = await get_weather_forecast_for_city(city, days)
351
+
352
  location_data = forecast_data.get("location", {})
353
+ forecast_days_raw = forecast_data.get("forecast", {}).get("forecastday", [])
354
+
355
  processed_forecast_days = []
356
+ for day_data in forecast_days_raw:
357
  day_details = day_data.get("day", {})
358
+ processed_day = DayForecast(
359
  date=day_data.get("date"),
360
  maxtemp_c=day_details.get("maxtemp_c"),
361
  mintemp_c=day_details.get("mintemp_c"),
 
364
  daily_chance_of_rain=day_details.get("daily_chance_of_rain", 0),
365
  avghumidity=day_details.get("avghumidity", 0),
366
  maxwind_kph=day_details.get("maxwind_kph", 0.0),
367
+ )
368
+ processed_forecast_days.append(processed_day)
369
+
370
+ response_data = ForecastResponse(
371
  location_name=location_data.get("name", "N/A"),
372
  forecast_days=processed_forecast_days
373
  )
374
+
375
+ return response_data
376
+
377
  except HTTPException as e:
378
  raise e
379
  except Exception as e:
services/market_services.py CHANGED
@@ -10,18 +10,16 @@ from typing import List, Dict, Optional
10
  MODELS_DIR = 'models'
11
  models = {}
12
 
 
13
  if os.path.exists(MODELS_DIR):
14
  for model_file in os.listdir(MODELS_DIR):
15
  if model_file.endswith('.pkl'):
16
- # Normalize filename to commodity name
17
  commodity_name = model_file.replace('.pkl', '').replace('_', '/')
18
- try:
19
- models[commodity_name] = joblib.load(os.path.join(MODELS_DIR, model_file))
20
- print(f"βœ… Model loaded for: {commodity_name}")
21
- except Exception as e:
22
- print(f"❌ Failed to load model {commodity_name}: {e}")
23
 
24
  try:
 
25
  DF_FULL = pd.read_csv('final_output.csv', parse_dates=['created_at'], index_col='created_at')
26
  print("βœ… Dataset loaded.")
27
  except FileNotFoundError:
@@ -38,73 +36,57 @@ def _create_features(df):
38
  df['year'] = df.index.year
39
  df['quarter'] = df.index.quarter
40
  df['weekofyear'] = df.index.isocalendar().week.astype(int)
41
-
42
- # Lag features
43
  df['price_lag_7'] = df['modal_price'].shift(7)
44
  df['price_lag_14'] = df['modal_price'].shift(14)
45
  df['price_lag_30'] = df['modal_price'].shift(30)
46
-
47
- # Rolling window features
48
  df['rolling_mean_30'] = df['modal_price'].shift(1).rolling(window=30).mean()
49
  df['rolling_std_30'] = df['modal_price'].shift(1).rolling(window=30).std()
50
-
51
- # CRITICAL: Do NOT dropna() here. We need the future row (which has NaNs) to survive
52
- # so we can predict it.
53
- return df
54
 
55
  def get_market_prediction(model, df_full, commodity, last_known_date):
56
  """
57
  Iteratively predicts the next 180 days.
58
  """
59
- # 1. Filter data for the commodity
60
- df_commodity = df_full[df_full['commodity'].str.upper() == commodity.upper()]
61
  df_daily = df_commodity.groupby(df_commodity.index).agg({'modal_price': 'mean'})
62
 
63
- # 2. Setup future dates
64
  future_dates = pd.date_range(start=last_known_date + pd.Timedelta(days=1), periods=180, freq='D')
65
 
 
66
  future_df = pd.DataFrame(index=future_dates)
67
  future_df['modal_price'] = np.nan
68
 
69
- # 3. Combine history and future placeholder
70
  df_extended = pd.concat([df_daily, future_df])
71
 
72
- # 4. Determine feature columns from a valid historical sample
73
- valid_sample = _create_features(df_daily.tail(50)).dropna()
74
- FEATURES = [col for col in valid_sample.columns if col != 'modal_price']
75
-
76
  for date in future_dates:
77
- # We need historical context to calculate rolling averages/lags
 
78
  subset = df_extended.loc[:date]
 
79
 
80
- # Safety check: ensure we have enough data for 30-day rolling window
81
- if len(subset) < 35: continue
82
 
83
- # Generate features
84
- featured_subset = _create_features(subset)
 
85
 
86
- # Get the row for the prediction date
87
- if date not in featured_subset.index: continue
88
- featured_row = featured_subset.loc[[date]]
89
 
90
- # --- FIX: Ensure strict DataFrame format for XGBoost ---
91
- try:
92
- # Reconstruct DataFrame with explicit columns to satisfy XGBoost
93
- X_input = pd.DataFrame(featured_row[FEATURES].values, columns=FEATURES, index=featured_row.index)
94
-
95
- prediction = model.predict(X_input)[0]
96
- df_extended.loc[date, 'modal_price'] = prediction
97
- except Exception as e:
98
- # If prediction fails, we break. The NaNs will remain and be handled by the route.
99
- print(f"Prediction error for {date}: {e}")
100
- break
101
 
 
102
  daily_forecast_df = df_extended.loc[future_dates].copy()
103
  daily_forecast_df.rename(columns={'modal_price': 'forecast'}, inplace=True)
104
 
105
  return daily_forecast_df
106
 
107
 
 
 
108
  # import pandas as pd
109
  # import numpy as np
110
  # import joblib
 
10
  MODELS_DIR = 'models'
11
  models = {}
12
 
13
+ # Ensure models dir exists
14
  if os.path.exists(MODELS_DIR):
15
  for model_file in os.listdir(MODELS_DIR):
16
  if model_file.endswith('.pkl'):
 
17
  commodity_name = model_file.replace('.pkl', '').replace('_', '/')
18
+ models[commodity_name] = joblib.load(os.path.join(MODELS_DIR, model_file))
19
+ print(f"βœ… Model loaded for: {commodity_name}")
 
 
 
20
 
21
  try:
22
+ # Ensure your CSV is accessible
23
  DF_FULL = pd.read_csv('final_output.csv', parse_dates=['created_at'], index_col='created_at')
24
  print("βœ… Dataset loaded.")
25
  except FileNotFoundError:
 
36
  df['year'] = df.index.year
37
  df['quarter'] = df.index.quarter
38
  df['weekofyear'] = df.index.isocalendar().week.astype(int)
39
+ # Lags and Rolling features
 
40
  df['price_lag_7'] = df['modal_price'].shift(7)
41
  df['price_lag_14'] = df['modal_price'].shift(14)
42
  df['price_lag_30'] = df['modal_price'].shift(30)
 
 
43
  df['rolling_mean_30'] = df['modal_price'].shift(1).rolling(window=30).mean()
44
  df['rolling_std_30'] = df['modal_price'].shift(1).rolling(window=30).std()
45
+ return df.dropna()
 
 
 
46
 
47
  def get_market_prediction(model, df_full, commodity, last_known_date):
48
  """
49
  Iteratively predicts the next 180 days.
50
  """
51
+ df_commodity = df_full[df_full['commodity'] == commodity]
 
52
  df_daily = df_commodity.groupby(df_commodity.index).agg({'modal_price': 'mean'})
53
 
 
54
  future_dates = pd.date_range(start=last_known_date + pd.Timedelta(days=1), periods=180, freq='D')
55
 
56
+ # Create a container for future data
57
  future_df = pd.DataFrame(index=future_dates)
58
  future_df['modal_price'] = np.nan
59
 
60
+ # Append future placeholder to history so we can calculate lags on the fly
61
  df_extended = pd.concat([df_daily, future_df])
62
 
 
 
 
 
63
  for date in future_dates:
64
+ # Create features for the specific day (uses previous days' data for lags)
65
+ # Note: We take a slice up to 'date' to generate features dynamically
66
  subset = df_extended.loc[:date]
67
+ if len(subset) < 30: continue # Safety check for rolling windows
68
 
69
+ featured_row = _create_features(subset).iloc[-1:]
 
70
 
71
+ if featured_row.empty: continue
72
+
73
+ FEATURES = [col for col in featured_row.columns if col != 'modal_price']
74
 
75
+ # Predict
76
+ prediction = model.predict(featured_row[FEATURES])[0]
 
77
 
78
+ # Update the dataframe so the next loop can use this prediction for its lag features
79
+ df_extended.loc[date, 'modal_price'] = prediction
 
 
 
 
 
 
 
 
 
80
 
81
+ # Extract just the future part
82
  daily_forecast_df = df_extended.loc[future_dates].copy()
83
  daily_forecast_df.rename(columns={'modal_price': 'forecast'}, inplace=True)
84
 
85
  return daily_forecast_df
86
 
87
 
88
+
89
+
90
  # import pandas as pd
91
  # import numpy as np
92
  # import joblib