Spaces:
Running
Running
Commit
·
db023ce
1
Parent(s):
1ce5332
Changes in deployment+
Browse files- routes/predictions.py +27 -23
- services/market_services.py +23 -21
routes/predictions.py
CHANGED
|
@@ -208,10 +208,10 @@ router = APIRouter()
|
|
| 208 |
MODELS_DIR = 'models'
|
| 209 |
models = {}
|
| 210 |
|
|
|
|
| 211 |
if os.path.exists(MODELS_DIR):
|
| 212 |
for model_file in os.listdir(MODELS_DIR):
|
| 213 |
if model_file.endswith('.pkl'):
|
| 214 |
-
# Normalize filename
|
| 215 |
commodity_name = model_file.replace('.pkl', '').replace('_', '/')
|
| 216 |
try:
|
| 217 |
models[commodity_name] = joblib.load(os.path.join(MODELS_DIR, model_file))
|
|
@@ -241,54 +241,60 @@ def predict_commodity_price(commodity: str):
|
|
| 241 |
|
| 242 |
target_commodity = commodity.upper()
|
| 243 |
|
| 244 |
-
# Normalize keys in models dict to handle potential mismatches
|
| 245 |
-
# (Optional robustness step, relies on loading logic)
|
| 246 |
if target_commodity not in models:
|
| 247 |
raise HTTPException(status_code=404, detail=f"Model for '{commodity}' not found.")
|
| 248 |
|
| 249 |
model = models[target_commodity]
|
| 250 |
|
|
|
|
| 251 |
df_commodity = DF_FULL[DF_FULL['commodity'].str.upper() == target_commodity]
|
| 252 |
if df_commodity.empty:
|
| 253 |
raise HTTPException(status_code=404, detail="No historical data found for commodity")
|
| 254 |
|
|
|
|
| 255 |
df_daily = df_commodity.groupby(df_commodity.index).agg({'modal_price': 'mean'})
|
| 256 |
last_known_date = df_daily.index.max()
|
| 257 |
|
| 258 |
-
#
|
| 259 |
-
start_context_date = last_known_date - pd.Timedelta(days=90)
|
| 260 |
-
|
| 261 |
-
# _create_features now safely handles NaNs, but for history we have full data
|
| 262 |
-
df_featured = _create_features(df_daily)
|
| 263 |
-
test_df = df_featured.loc[df_featured.index >= start_context_date]
|
| 264 |
-
|
| 265 |
recent_data = []
|
| 266 |
-
|
| 267 |
-
|
| 268 |
-
|
| 269 |
-
|
| 270 |
-
X_input = pd.DataFrame(test_df[FEATURES].values, columns=FEATURES, index=test_df.index)
|
| 271 |
|
| 272 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 273 |
predictions = model.predict(X_input)
|
|
|
|
| 274 |
for date, actual, pred in zip(test_df.index, test_df['modal_price'], predictions):
|
| 275 |
recent_data.append({
|
| 276 |
"date": date.strftime('%Y-%m-%d'),
|
| 277 |
"actual_price": float(actual),
|
| 278 |
"predicted_price": float(pred)
|
| 279 |
})
|
| 280 |
-
|
| 281 |
-
|
| 282 |
|
| 283 |
-
#
|
|
|
|
| 284 |
try:
|
| 285 |
daily_forecast_df = get_market_prediction(model, DF_FULL, target_commodity, last_known_date)
|
| 286 |
|
| 287 |
-
future_data = []
|
| 288 |
for date, row in daily_forecast_df.iterrows():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 289 |
future_data.append({
|
| 290 |
"date": date.strftime('%Y-%m-%d'),
|
| 291 |
-
"forecast_price":
|
| 292 |
})
|
| 293 |
|
| 294 |
except Exception as e:
|
|
@@ -310,7 +316,6 @@ async def get_market_price(request: MarketPriceRequest):
|
|
| 310 |
async def get_current_weather(city: str):
|
| 311 |
try:
|
| 312 |
weather_data = await get_weather_data_for_city(city)
|
| 313 |
-
# ... (rest of the weather logic remains unchanged)
|
| 314 |
current_data = weather_data.get("current", {})
|
| 315 |
location_data = weather_data.get("location", {})
|
| 316 |
air_quality_data = current_data.get("air_quality", {})
|
|
@@ -335,7 +340,6 @@ async def get_current_weather(city: str):
|
|
| 335 |
async def get_weather_forecast(city: str, days: int = Query(default=1, ge=1, le=14)):
|
| 336 |
try:
|
| 337 |
forecast_data = await get_weather_forecast_for_city(city, days)
|
| 338 |
-
# ... (rest of forecast logic remains unchanged)
|
| 339 |
location_data = forecast_data.get("location", {})
|
| 340 |
processed_forecast_days = []
|
| 341 |
for day_data in forecast_data.get("forecast", {}).get("forecastday", []):
|
|
|
|
| 208 |
MODELS_DIR = 'models'
|
| 209 |
models = {}
|
| 210 |
|
| 211 |
+
# Ensure models dir exists and load models
|
| 212 |
if os.path.exists(MODELS_DIR):
|
| 213 |
for model_file in os.listdir(MODELS_DIR):
|
| 214 |
if model_file.endswith('.pkl'):
|
|
|
|
| 215 |
commodity_name = model_file.replace('.pkl', '').replace('_', '/')
|
| 216 |
try:
|
| 217 |
models[commodity_name] = joblib.load(os.path.join(MODELS_DIR, model_file))
|
|
|
|
| 241 |
|
| 242 |
target_commodity = commodity.upper()
|
| 243 |
|
|
|
|
|
|
|
| 244 |
if target_commodity not in models:
|
| 245 |
raise HTTPException(status_code=404, detail=f"Model for '{commodity}' not found.")
|
| 246 |
|
| 247 |
model = models[target_commodity]
|
| 248 |
|
| 249 |
+
# Check history
|
| 250 |
df_commodity = DF_FULL[DF_FULL['commodity'].str.upper() == target_commodity]
|
| 251 |
if df_commodity.empty:
|
| 252 |
raise HTTPException(status_code=404, detail="No historical data found for commodity")
|
| 253 |
|
| 254 |
+
# Get last known date
|
| 255 |
df_daily = df_commodity.groupby(df_commodity.index).agg({'modal_price': 'mean'})
|
| 256 |
last_known_date = df_daily.index.max()
|
| 257 |
|
| 258 |
+
# --- 1. Recent History Validation ---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 259 |
recent_data = []
|
| 260 |
+
try:
|
| 261 |
+
start_context_date = last_known_date - pd.Timedelta(days=90)
|
| 262 |
+
df_featured = _create_features(df_daily)
|
| 263 |
+
test_df = df_featured.loc[df_featured.index >= start_context_date].dropna()
|
|
|
|
| 264 |
|
| 265 |
+
if not test_df.empty:
|
| 266 |
+
FEATURES = [col for col in test_df.columns if col != 'modal_price']
|
| 267 |
+
# FIX: Use DataFrame with columns for XGBoost
|
| 268 |
+
X_input = pd.DataFrame(test_df[FEATURES].values, columns=FEATURES, index=test_df.index)
|
| 269 |
+
|
| 270 |
predictions = model.predict(X_input)
|
| 271 |
+
|
| 272 |
for date, actual, pred in zip(test_df.index, test_df['modal_price'], predictions):
|
| 273 |
recent_data.append({
|
| 274 |
"date": date.strftime('%Y-%m-%d'),
|
| 275 |
"actual_price": float(actual),
|
| 276 |
"predicted_price": float(pred)
|
| 277 |
})
|
| 278 |
+
except Exception as e:
|
| 279 |
+
print(f"Warning: Could not generate history validation: {e}")
|
| 280 |
|
| 281 |
+
# --- 2. Future Forecast ---
|
| 282 |
+
future_data = []
|
| 283 |
try:
|
| 284 |
daily_forecast_df = get_market_prediction(model, DF_FULL, target_commodity, last_known_date)
|
| 285 |
|
|
|
|
| 286 |
for date, row in daily_forecast_df.iterrows():
|
| 287 |
+
price = row['forecast']
|
| 288 |
+
|
| 289 |
+
# --- CRITICAL FIX: Handle NaN values to prevent JSON crash ---
|
| 290 |
+
if pd.isna(price) or np.isnan(price):
|
| 291 |
+
final_price = None
|
| 292 |
+
else:
|
| 293 |
+
final_price = float(price)
|
| 294 |
+
|
| 295 |
future_data.append({
|
| 296 |
"date": date.strftime('%Y-%m-%d'),
|
| 297 |
+
"forecast_price": final_price
|
| 298 |
})
|
| 299 |
|
| 300 |
except Exception as e:
|
|
|
|
| 316 |
async def get_current_weather(city: str):
|
| 317 |
try:
|
| 318 |
weather_data = await get_weather_data_for_city(city)
|
|
|
|
| 319 |
current_data = weather_data.get("current", {})
|
| 320 |
location_data = weather_data.get("location", {})
|
| 321 |
air_quality_data = current_data.get("air_quality", {})
|
|
|
|
| 340 |
async def get_weather_forecast(city: str, days: int = Query(default=1, ge=1, le=14)):
|
| 341 |
try:
|
| 342 |
forecast_data = await get_weather_forecast_for_city(city, days)
|
|
|
|
| 343 |
location_data = forecast_data.get("location", {})
|
| 344 |
processed_forecast_days = []
|
| 345 |
for day_data in forecast_data.get("forecast", {}).get("forecastday", []):
|
services/market_services.py
CHANGED
|
@@ -13,7 +13,7 @@ models = {}
|
|
| 13 |
if os.path.exists(MODELS_DIR):
|
| 14 |
for model_file in os.listdir(MODELS_DIR):
|
| 15 |
if model_file.endswith('.pkl'):
|
| 16 |
-
# Normalize filename to commodity name
|
| 17 |
commodity_name = model_file.replace('.pkl', '').replace('_', '/')
|
| 18 |
try:
|
| 19 |
models[commodity_name] = joblib.load(os.path.join(MODELS_DIR, model_file))
|
|
@@ -48,56 +48,58 @@ def _create_features(df):
|
|
| 48 |
df['rolling_mean_30'] = df['modal_price'].shift(1).rolling(window=30).mean()
|
| 49 |
df['rolling_std_30'] = df['modal_price'].shift(1).rolling(window=30).std()
|
| 50 |
|
| 51 |
-
#
|
| 52 |
-
#
|
| 53 |
-
return df
|
| 54 |
|
| 55 |
def get_market_prediction(model, df_full, commodity, last_known_date):
|
| 56 |
"""
|
| 57 |
Iteratively predicts the next 180 days.
|
| 58 |
"""
|
|
|
|
| 59 |
df_commodity = df_full[df_full['commodity'].str.upper() == commodity.upper()]
|
| 60 |
df_daily = df_commodity.groupby(df_commodity.index).agg({'modal_price': 'mean'})
|
| 61 |
|
|
|
|
| 62 |
future_dates = pd.date_range(start=last_known_date + pd.Timedelta(days=1), periods=180, freq='D')
|
| 63 |
|
| 64 |
future_df = pd.DataFrame(index=future_dates)
|
| 65 |
future_df['modal_price'] = np.nan
|
| 66 |
|
|
|
|
| 67 |
df_extended = pd.concat([df_daily, future_df])
|
| 68 |
|
| 69 |
-
#
|
| 70 |
-
|
| 71 |
-
|
|
|
|
| 72 |
|
| 73 |
for date in future_dates:
|
| 74 |
-
# We need
|
| 75 |
subset = df_extended.loc[:date]
|
|
|
|
|
|
|
| 76 |
if len(subset) < 35: continue
|
| 77 |
|
| 78 |
-
#
|
| 79 |
-
# Note: This returns a DataFrame
|
| 80 |
featured_subset = _create_features(subset)
|
| 81 |
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
# Get the row for the specific date we are predicting
|
| 85 |
-
# Because we fixed _create_features, this row should exist now
|
| 86 |
-
if date not in featured_subset.index:
|
| 87 |
-
continue
|
| 88 |
-
|
| 89 |
featured_row = featured_subset.loc[[date]]
|
| 90 |
|
| 91 |
-
# CRITICAL FIX
|
| 92 |
-
|
| 93 |
-
|
| 94 |
try:
|
|
|
|
| 95 |
prediction = model.predict(X_input)[0]
|
| 96 |
-
# Update the dataframe
|
| 97 |
df_extended.loc[date, 'modal_price'] = prediction
|
| 98 |
except Exception as e:
|
|
|
|
| 99 |
print(f"Prediction error for {date}: {e}")
|
| 100 |
break
|
|
|
|
| 101 |
|
| 102 |
daily_forecast_df = df_extended.loc[future_dates].copy()
|
| 103 |
daily_forecast_df.rename(columns={'modal_price': 'forecast'}, inplace=True)
|
|
|
|
| 13 |
if os.path.exists(MODELS_DIR):
|
| 14 |
for model_file in os.listdir(MODELS_DIR):
|
| 15 |
if model_file.endswith('.pkl'):
|
| 16 |
+
# Normalize filename to commodity name (handle slash replacement if needed)
|
| 17 |
commodity_name = model_file.replace('.pkl', '').replace('_', '/')
|
| 18 |
try:
|
| 19 |
models[commodity_name] = joblib.load(os.path.join(MODELS_DIR, model_file))
|
|
|
|
| 48 |
df['rolling_mean_30'] = df['modal_price'].shift(1).rolling(window=30).mean()
|
| 49 |
df['rolling_std_30'] = df['modal_price'].shift(1).rolling(window=30).std()
|
| 50 |
|
| 51 |
+
# Return features. Note: We do NOT dropna here because we need to generate
|
| 52 |
+
# features for the future row even if it has NaNs initially.
|
| 53 |
+
return df
|
| 54 |
|
| 55 |
def get_market_prediction(model, df_full, commodity, last_known_date):
|
| 56 |
"""
|
| 57 |
Iteratively predicts the next 180 days.
|
| 58 |
"""
|
| 59 |
+
# 1. Filter data for the commodity
|
| 60 |
df_commodity = df_full[df_full['commodity'].str.upper() == commodity.upper()]
|
| 61 |
df_daily = df_commodity.groupby(df_commodity.index).agg({'modal_price': 'mean'})
|
| 62 |
|
| 63 |
+
# 2. Setup future dates
|
| 64 |
future_dates = pd.date_range(start=last_known_date + pd.Timedelta(days=1), periods=180, freq='D')
|
| 65 |
|
| 66 |
future_df = pd.DataFrame(index=future_dates)
|
| 67 |
future_df['modal_price'] = np.nan
|
| 68 |
|
| 69 |
+
# 3. Combine history and future placeholder
|
| 70 |
df_extended = pd.concat([df_daily, future_df])
|
| 71 |
|
| 72 |
+
# 4. Get the correct list of features from a valid sample
|
| 73 |
+
# We take the last 50 valid days to determine feature columns
|
| 74 |
+
valid_sample = _create_features(df_daily.tail(50)).dropna()
|
| 75 |
+
FEATURES = [col for col in valid_sample.columns if col != 'modal_price']
|
| 76 |
|
| 77 |
for date in future_dates:
|
| 78 |
+
# We need historical context to calculate rolling averages/lags
|
| 79 |
subset = df_extended.loc[:date]
|
| 80 |
+
|
| 81 |
+
# Safety check: ensure we have enough data for 30-day rolling window
|
| 82 |
if len(subset) < 35: continue
|
| 83 |
|
| 84 |
+
# Generate features for this specific date
|
|
|
|
| 85 |
featured_subset = _create_features(subset)
|
| 86 |
|
| 87 |
+
# Extract the row for the current prediction date
|
| 88 |
+
if date not in featured_subset.index: continue
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 89 |
featured_row = featured_subset.loc[[date]]
|
| 90 |
|
| 91 |
+
# --- CRITICAL FIX START ---
|
| 92 |
+
# XGBoost requires a DataFrame with specific column names.
|
| 93 |
+
# We explicitly recreate the DataFrame to ensure headers are present.
|
| 94 |
try:
|
| 95 |
+
X_input = pd.DataFrame(featured_row[FEATURES].values, columns=FEATURES, index=featured_row.index)
|
| 96 |
prediction = model.predict(X_input)[0]
|
|
|
|
| 97 |
df_extended.loc[date, 'modal_price'] = prediction
|
| 98 |
except Exception as e:
|
| 99 |
+
# Log error but don't crash the loop immediately
|
| 100 |
print(f"Prediction error for {date}: {e}")
|
| 101 |
break
|
| 102 |
+
# --- CRITICAL FIX END ---
|
| 103 |
|
| 104 |
daily_forecast_df = df_extended.loc[future_dates].copy()
|
| 105 |
daily_forecast_df.rename(columns={'modal_price': 'forecast'}, inplace=True)
|