Spaces:

ShadowGard3n
/

AgroVision-Backend

Running

App Files Files Community

ShadowGard3n commited on 21 days ago

Commit

db023ce

1 Parent(s): 1ce5332

Changes in deployment+

Browse files

Files changed (2) hide show

routes/predictions.py +27 -23
services/market_services.py +23 -21

routes/predictions.py CHANGED Viewed

@@ -208,10 +208,10 @@ router = APIRouter()
 MODELS_DIR = 'models'
 models = {}
 if os.path.exists(MODELS_DIR):
     for model_file in os.listdir(MODELS_DIR):
         if model_file.endswith('.pkl'):
-            # Normalize filename
             commodity_name = model_file.replace('.pkl', '').replace('_', '/')
             try:
                 models[commodity_name] = joblib.load(os.path.join(MODELS_DIR, model_file))
@@ -241,54 +241,60 @@ def predict_commodity_price(commodity: str):
     target_commodity = commodity.upper()
-    # Normalize keys in models dict to handle potential mismatches
-    # (Optional robustness step, relies on loading logic)
     if target_commodity not in models:
         raise HTTPException(status_code=404, detail=f"Model for '{commodity}' not found.")
     model = models[target_commodity]
     df_commodity = DF_FULL[DF_FULL['commodity'].str.upper() == target_commodity]
     if df_commodity.empty:
          raise HTTPException(status_code=404, detail="No historical data found for commodity")
     df_daily = df_commodity.groupby(df_commodity.index).agg({'modal_price': 'mean'})
     last_known_date = df_daily.index.max()
-    # Generate Recent History
-    start_context_date = last_known_date - pd.Timedelta(days=90)
-    # _create_features now safely handles NaNs, but for history we have full data
-    df_featured = _create_features(df_daily)
-    test_df = df_featured.loc[df_featured.index >= start_context_date]
     recent_data = []
-    if not test_df.empty:
-        FEATURES = [col for col in test_df.columns if col != 'modal_price']
-        # CRITICAL FIX: Explicitly cast to DataFrame to preserve feature names for XGBoost
-        X_input = pd.DataFrame(test_df[FEATURES].values, columns=FEATURES, index=test_df.index)
-        try:
             predictions = model.predict(X_input)
             for date, actual, pred in zip(test_df.index, test_df['modal_price'], predictions):
                 recent_data.append({
                     "date": date.strftime('%Y-%m-%d'),
                     "actual_price": float(actual),
                     "predicted_price": float(pred)
                 })
-        except Exception as e:
-            print(f"Warning: Could not generate history validation: {e}")
-    # Generate Future Forecast
     try:
         daily_forecast_df = get_market_prediction(model, DF_FULL, target_commodity, last_known_date)
-        future_data = []
         for date, row in daily_forecast_df.iterrows():
             future_data.append({
                 "date": date.strftime('%Y-%m-%d'),
-                "forecast_price": float(row['forecast'])
             })
     except Exception as e:
@@ -310,7 +316,6 @@ async def get_market_price(request: MarketPriceRequest):
 async def get_current_weather(city: str):
     try:
         weather_data = await get_weather_data_for_city(city)
-        # ... (rest of the weather logic remains unchanged)
         current_data = weather_data.get("current", {})
         location_data = weather_data.get("location", {})
         air_quality_data = current_data.get("air_quality", {})
@@ -335,7 +340,6 @@ async def get_current_weather(city: str):
 async def get_weather_forecast(city: str, days: int = Query(default=1, ge=1, le=14)):
     try:
         forecast_data = await get_weather_forecast_for_city(city, days)
-        # ... (rest of forecast logic remains unchanged)
         location_data = forecast_data.get("location", {})
         processed_forecast_days = []
         for day_data in forecast_data.get("forecast", {}).get("forecastday", []):

 MODELS_DIR = 'models'
 models = {}
+# Ensure models dir exists and load models
 if os.path.exists(MODELS_DIR):
     for model_file in os.listdir(MODELS_DIR):
         if model_file.endswith('.pkl'):
             commodity_name = model_file.replace('.pkl', '').replace('_', '/')
             try:
                 models[commodity_name] = joblib.load(os.path.join(MODELS_DIR, model_file))
     target_commodity = commodity.upper()
     if target_commodity not in models:
         raise HTTPException(status_code=404, detail=f"Model for '{commodity}' not found.")
     model = models[target_commodity]
+    # Check history
     df_commodity = DF_FULL[DF_FULL['commodity'].str.upper() == target_commodity]
     if df_commodity.empty:
          raise HTTPException(status_code=404, detail="No historical data found for commodity")
+    # Get last known date
     df_daily = df_commodity.groupby(df_commodity.index).agg({'modal_price': 'mean'})
     last_known_date = df_daily.index.max()
+    # --- 1. Recent History Validation ---
     recent_data = []
+    try:
+        start_context_date = last_known_date - pd.Timedelta(days=90)
+        df_featured = _create_features(df_daily)
+        test_df = df_featured.loc[df_featured.index >= start_context_date].dropna()
+        if not test_df.empty:
+            FEATURES = [col for col in test_df.columns if col != 'modal_price']
+            # FIX: Use DataFrame with columns for XGBoost
+            X_input = pd.DataFrame(test_df[FEATURES].values, columns=FEATURES, index=test_df.index)
             predictions = model.predict(X_input)
             for date, actual, pred in zip(test_df.index, test_df['modal_price'], predictions):
                 recent_data.append({
                     "date": date.strftime('%Y-%m-%d'),
                     "actual_price": float(actual),
                     "predicted_price": float(pred)
                 })
+    except Exception as e:
+        print(f"Warning: Could not generate history validation: {e}")
+    # --- 2. Future Forecast ---
+    future_data = []
     try:
         daily_forecast_df = get_market_prediction(model, DF_FULL, target_commodity, last_known_date)
         for date, row in daily_forecast_df.iterrows():
+            price = row['forecast']
+            # --- CRITICAL FIX: Handle NaN values to prevent JSON crash ---
+            if pd.isna(price) or np.isnan(price):
+                final_price = None
+            else:
+                final_price = float(price)
             future_data.append({
                 "date": date.strftime('%Y-%m-%d'),
+                "forecast_price": final_price
             })
     except Exception as e:
 async def get_current_weather(city: str):
     try:
         weather_data = await get_weather_data_for_city(city)
         current_data = weather_data.get("current", {})
         location_data = weather_data.get("location", {})
         air_quality_data = current_data.get("air_quality", {})
 async def get_weather_forecast(city: str, days: int = Query(default=1, ge=1, le=14)):
     try:
         forecast_data = await get_weather_forecast_for_city(city, days)
         location_data = forecast_data.get("location", {})
         processed_forecast_days = []
         for day_data in forecast_data.get("forecast", {}).get("forecastday", []):

services/market_services.py CHANGED Viewed

@@ -13,7 +13,7 @@ models = {}
 if os.path.exists(MODELS_DIR):
     for model_file in os.listdir(MODELS_DIR):
         if model_file.endswith('.pkl'):
-            # Normalize filename to commodity name
             commodity_name = model_file.replace('.pkl', '').replace('_', '/')
             try:
                 models[commodity_name] = joblib.load(os.path.join(MODELS_DIR, model_file))
@@ -48,56 +48,58 @@ def _create_features(df):
     df['rolling_mean_30'] = df['modal_price'].shift(1).rolling(window=30).mean()
     df['rolling_std_30'] = df['modal_price'].shift(1).rolling(window=30).std()
-    # CRITICAL FIX: Only drop rows where FEATURES are NaN, keep the target row even if modal_price is NaN
-    # We drop based on the longest lag (30 days) to ensure features are valid
-    return df.dropna(subset=['rolling_std_30', 'price_lag_30'])
 def get_market_prediction(model, df_full, commodity, last_known_date):
     """
     Iteratively predicts the next 180 days.
     """
     df_commodity = df_full[df_full['commodity'].str.upper() == commodity.upper()]
     df_daily = df_commodity.groupby(df_commodity.index).agg({'modal_price': 'mean'})
     future_dates = pd.date_range(start=last_known_date + pd.Timedelta(days=1), periods=180, freq='D')
     future_df = pd.DataFrame(index=future_dates)
     future_df['modal_price'] = np.nan
     df_extended = pd.concat([df_daily, future_df])
-    # Pre-calculate feature names once to ensure consistency
-    sample_features = _create_features(df_daily.tail(40))
-    FEATURES = [col for col in sample_features.columns if col != 'modal_price']
     for date in future_dates:
-        # We need enough history to calculate lags
         subset = df_extended.loc[:date]
         if len(subset) < 35: continue
-        # Create features
-        # Note: This returns a DataFrame
         featured_subset = _create_features(subset)
-        if featured_subset.empty: continue
-        # Get the row for the specific date we are predicting
-        # Because we fixed _create_features, this row should exist now
-        if date not in featured_subset.index:
-            continue
         featured_row = featured_subset.loc[[date]]
-        # CRITICAL FIX: Ensure input is a DataFrame with strict column names
-        X_input = pd.DataFrame(featured_row[FEATURES].values, columns=FEATURES)
         try:
             prediction = model.predict(X_input)[0]
-            # Update the dataframe
             df_extended.loc[date, 'modal_price'] = prediction
         except Exception as e:
             print(f"Prediction error for {date}: {e}")
             break
     daily_forecast_df = df_extended.loc[future_dates].copy()
     daily_forecast_df.rename(columns={'modal_price': 'forecast'}, inplace=True)

 if os.path.exists(MODELS_DIR):
     for model_file in os.listdir(MODELS_DIR):
         if model_file.endswith('.pkl'):
+            # Normalize filename to commodity name (handle slash replacement if needed)
             commodity_name = model_file.replace('.pkl', '').replace('_', '/')
             try:
                 models[commodity_name] = joblib.load(os.path.join(MODELS_DIR, model_file))
     df['rolling_mean_30'] = df['modal_price'].shift(1).rolling(window=30).mean()
     df['rolling_std_30'] = df['modal_price'].shift(1).rolling(window=30).std()
+    # Return features. Note: We do NOT dropna here because we need to generate
+    # features for the future row even if it has NaNs initially.
+    return df
 def get_market_prediction(model, df_full, commodity, last_known_date):
     """
     Iteratively predicts the next 180 days.
     """
+    # 1. Filter data for the commodity
     df_commodity = df_full[df_full['commodity'].str.upper() == commodity.upper()]
     df_daily = df_commodity.groupby(df_commodity.index).agg({'modal_price': 'mean'})
+    # 2. Setup future dates
     future_dates = pd.date_range(start=last_known_date + pd.Timedelta(days=1), periods=180, freq='D')
     future_df = pd.DataFrame(index=future_dates)
     future_df['modal_price'] = np.nan
+    # 3. Combine history and future placeholder
     df_extended = pd.concat([df_daily, future_df])
+    # 4. Get the correct list of features from a valid sample
+    # We take the last 50 valid days to determine feature columns
+    valid_sample = _create_features(df_daily.tail(50)).dropna()
+    FEATURES = [col for col in valid_sample.columns if col != 'modal_price']
     for date in future_dates:
+        # We need historical context to calculate rolling averages/lags
         subset = df_extended.loc[:date]
+        # Safety check: ensure we have enough data for 30-day rolling window
         if len(subset) < 35: continue
+        # Generate features for this specific date
         featured_subset = _create_features(subset)
+        # Extract the row for the current prediction date
+        if date not in featured_subset.index: continue
         featured_row = featured_subset.loc[[date]]
+        # --- CRITICAL FIX START ---
+        # XGBoost requires a DataFrame with specific column names.
+        # We explicitly recreate the DataFrame to ensure headers are present.
         try:
+            X_input = pd.DataFrame(featured_row[FEATURES].values, columns=FEATURES, index=featured_row.index)
             prediction = model.predict(X_input)[0]
             df_extended.loc[date, 'modal_price'] = prediction
         except Exception as e:
+            # Log error but don't crash the loop immediately
             print(f"Prediction error for {date}: {e}")
             break
+        # --- CRITICAL FIX END ---
     daily_forecast_df = df_extended.loc[future_dates].copy()
     daily_forecast_df.rename(columns={'modal_price': 'forecast'}, inplace=True)