Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| import numpy as np | |
| import joblib | |
| import os | |
| from fastapi import APIRouter, HTTPException | |
| from pydantic import BaseModel | |
| from typing import List, Dict, Optional | |
| # --- Load Models & Data --- | |
| MODELS_DIR = 'models' | |
| models = {} | |
| feature_names = [ | |
| 'dayofweek', 'dayofyear', 'month', 'year', 'quarter', 'weekofyear', | |
| 'price_lag_7', 'price_lag_14', 'price_lag_30', 'rolling_mean_30', 'rolling_std_30' | |
| ] | |
| # Ensure models dir exists | |
| if os.path.exists(MODELS_DIR): | |
| for model_file in os.listdir(MODELS_DIR): | |
| if model_file.endswith('.pkl'): | |
| commodity_name = model_file.replace('.pkl', '').replace('_', '/') | |
| models[commodity_name] = joblib.load(os.path.join(MODELS_DIR, model_file)) | |
| print(f"β Model loaded for: {commodity_name}") | |
| try: | |
| # Ensure your CSV is accessible | |
| DF_FULL = pd.read_csv('final_output.csv', parse_dates=['created_at'], index_col='created_at') | |
| print("β Dataset loaded.") | |
| except FileNotFoundError: | |
| print("β 'final_output.csv' not found. Predictions will fail.") | |
| DF_FULL = None | |
| # --- Helper Functions --- | |
| # def _create_features(df): | |
| # df = df.copy() | |
| # df['dayofweek'] = df.index.dayofweek | |
| # df['dayofyear'] = df.index.dayofyear | |
| # df['month'] = df.index.month | |
| # df['year'] = df.index.year | |
| # df['quarter'] = df.index.quarter | |
| # df['weekofyear'] = df.index.isocalendar().week.astype(int) | |
| # # Lags and Rolling features | |
| # df['price_lag_7'] = df['modal_price'].shift(7) | |
| # df['price_lag_14'] = df['modal_price'].shift(14) | |
| # df['price_lag_30'] = df['modal_price'].shift(30) | |
| # df['rolling_mean_30'] = df['modal_price'].shift(1).rolling(window=30).mean() | |
| # df['rolling_std_30'] = df['modal_price'].shift(1).rolling(window=30).std() | |
| # return df.dropna() | |
| def _create_features(df): | |
| df = df.copy() | |
| df['dayofweek'] = df.index.dayofweek | |
| df['dayofyear'] = df.index.dayofyear | |
| df['month'] = df.index.month | |
| df['year'] = df.index.year | |
| df['quarter'] = df.index.quarter | |
| df['weekofyear'] = df.index.isocalendar().week.astype(int) | |
| # Lags and Rolling features | |
| df['price_lag_7'] = df['modal_price'].shift(7) | |
| df['price_lag_14'] = df['modal_price'].shift(14) | |
| df['price_lag_30'] = df['modal_price'].shift(30) | |
| df['rolling_mean_30'] = df['modal_price'].shift(1).rolling(window=30).mean() | |
| df['rolling_std_30'] = df['modal_price'].shift(1).rolling(window=30).std() | |
| # REMOVE .dropna() here! We need the last row even if modal_price is NaN. | |
| return df | |
| def get_market_prediction(model, df_full, commodity, last_known_date): | |
| """ | |
| Iteratively predicts the next 180 days. | |
| """ | |
| print(f"--- PREDICTION STARTED FOR {commodity} ---") | |
| # --- FIX START: DISABLE FEATURE NAME CHECK --- | |
| # This tells XGBoost: "Don't check column names, just trust the order of numbers I give you" | |
| try: | |
| booster = model.get_booster() | |
| booster.feature_names = None | |
| except Exception: | |
| pass # If it fails, we proceed anyway | |
| # --- FIX END --- | |
| df_commodity = df_full[df_full['commodity'] == commodity] | |
| df_daily = df_commodity.groupby(df_commodity.index).agg({'modal_price': 'mean'}) | |
| future_dates = pd.date_range(start=last_known_date + pd.Timedelta(days=1), periods=180, freq='D') | |
| future_df = pd.DataFrame(index=future_dates) | |
| future_df['modal_price'] = np.nan | |
| df_extended = pd.concat([df_daily, future_df]) | |
| for date in future_dates: | |
| subset = df_extended.loc[:date] | |
| if len(subset) < 30: continue | |
| featured_subset = _create_features(subset) | |
| target_row = featured_subset.iloc[-1] | |
| if target_row[feature_names].isna().any(): | |
| continue | |
| # 1. Extract values as a plain Numpy Array | |
| # Reshape to (1, 11) -> 1 row, 11 features | |
| input_values = target_row[feature_names].values.reshape(1, -1) | |
| # 2. Predict using the Numpy Array directly (Faster & Safer now) | |
| # Since we disabled feature_names check, this will now work. | |
| prediction = model.predict(input_values)[0] | |
| df_extended.loc[date, 'modal_price'] = prediction | |
| daily_forecast_df = df_extended.loc[future_dates].copy() | |
| daily_forecast_df.rename(columns={'modal_price': 'forecast'}, inplace=True) | |
| print("--- PREDICTION SUCCESS ---") | |
| return daily_forecast_df | |
| # import pandas as pd | |
| # import numpy as np | |
| # import joblib | |
| # import os | |
| # from fastapi import APIRouter, HTTPException | |
| # from typing import List, Dict, Optional | |
| # # --- Load Models & Data --- | |
| # MODELS_DIR = 'models' | |
| # models = {} | |
| # feature_names = [ | |
| # 'dayofweek', 'dayofyear', 'month', 'year', 'quarter', 'weekofyear', | |
| # 'price_lag_7', 'price_lag_14', 'price_lag_30', 'rolling_mean_30', 'rolling_std_30' | |
| # ] | |
| # # Ensure models dir exists | |
| # if os.path.exists(MODELS_DIR): | |
| # for model_file in os.listdir(MODELS_DIR): | |
| # if model_file.endswith('.pkl'): | |
| # commodity_name = model_file.replace('.pkl', '').replace('_', '/') | |
| # models[commodity_name] = joblib.load(os.path.join(MODELS_DIR, model_file)) | |
| # print(f"β Model loaded for: {commodity_name}") | |
| # try: | |
| # # Ensure your CSV is accessible | |
| # DF_FULL = pd.read_csv('final_output.csv', parse_dates=['created_at'], index_col='created_at') | |
| # print("β Dataset loaded.") | |
| # except FileNotFoundError: | |
| # print("β 'final_output.csv' not found. Predictions will fail.") | |
| # DF_FULL = None | |
| # # --- Helper Functions --- | |
| # def _create_features(df): | |
| # df = df.copy() | |
| # df['dayofweek'] = df.index.dayofweek | |
| # df['dayofyear'] = df.index.dayofyear | |
| # df['month'] = df.index.month | |
| # df['year'] = df.index.year | |
| # df['quarter'] = df.index.quarter | |
| # df['weekofyear'] = df.index.isocalendar().week.astype(int) | |
| # # Lags and Rolling features | |
| # df['price_lag_7'] = df['modal_price'].shift(7) | |
| # df['price_lag_14'] = df['modal_price'].shift(14) | |
| # df['price_lag_30'] = df['modal_price'].shift(30) | |
| # df['rolling_mean_30'] = df['modal_price'].shift(1).rolling(window=30).mean() | |
| # df['rolling_std_30'] = df['modal_price'].shift(1).rolling(window=30).std() | |
| # # CRITICAL: Do NOT dropna() here, or we lose the row we are trying to predict | |
| # return df | |
| # def get_market_prediction(model, df_full, commodity, last_known_date): | |
| # """ | |
| # Iteratively predicts the next 180 days. | |
| # Returns a DataFrame so the route can iterate over it. | |
| # """ | |
| # print(f"--- PREDICTION STARTED FOR {commodity} ---") | |
| # # --- FIX 1: DISABLE FEATURE NAME CHECK --- | |
| # # This prevents the "X does not have valid feature names" error | |
| # try: | |
| # booster = model.get_booster() | |
| # booster.feature_names = None | |
| # except Exception: | |
| # pass | |
| # # ----------------------------------------- | |
| # df_commodity = df_full[df_full['commodity'] == commodity] | |
| # df_daily = df_commodity.groupby(df_commodity.index).agg({'modal_price': 'mean'}) | |
| # future_dates = pd.date_range(start=last_known_date + pd.Timedelta(days=1), periods=180, freq='D') | |
| # future_df = pd.DataFrame(index=future_dates) | |
| # future_df['modal_price'] = np.nan | |
| # df_extended = pd.concat([df_daily, future_df]) | |
| # for date in future_dates: | |
| # subset = df_extended.loc[:date] | |
| # if len(subset) < 30: continue | |
| # featured_subset = _create_features(subset) | |
| # target_row = featured_subset.iloc[-1] | |
| # # Only check if features are NaN (modal_price is allowed to be NaN) | |
| # if target_row[feature_names].isna().any(): | |
| # continue | |
| # # --- FIX 2: Pass Pure Values --- | |
| # # Reshape to (1, 11) -> 1 row, 11 features | |
| # input_values = target_row[feature_names].values.reshape(1, -1) | |
| # # Predict using the values (bypassing column name check) | |
| # prediction = model.predict(input_values)[0] | |
| # df_extended.loc[date, 'modal_price'] = prediction | |
| # # Extract just the future part | |
| # daily_forecast_df = df_extended.loc[future_dates].copy() | |
| # daily_forecast_df.rename(columns={'modal_price': 'forecast'}, inplace=True) | |
| # print("--- PREDICTION SUCCESS ---") | |
| # # Returns DataFrame (Matches your route code) | |
| # return daily_forecast_df | |