AgroVision-Backend / services /market_services.py
ShadowGard3n's picture
small changes
d76b061
import pandas as pd
import numpy as np
import joblib
import os
from fastapi import APIRouter, HTTPException
from pydantic import BaseModel
from typing import List, Dict, Optional
# --- Load Models & Data ---
MODELS_DIR = 'models'
models = {}
feature_names = [
'dayofweek', 'dayofyear', 'month', 'year', 'quarter', 'weekofyear',
'price_lag_7', 'price_lag_14', 'price_lag_30', 'rolling_mean_30', 'rolling_std_30'
]
# Ensure models dir exists
if os.path.exists(MODELS_DIR):
for model_file in os.listdir(MODELS_DIR):
if model_file.endswith('.pkl'):
commodity_name = model_file.replace('.pkl', '').replace('_', '/')
models[commodity_name] = joblib.load(os.path.join(MODELS_DIR, model_file))
print(f"βœ… Model loaded for: {commodity_name}")
try:
# Ensure your CSV is accessible
DF_FULL = pd.read_csv('final_output.csv', parse_dates=['created_at'], index_col='created_at')
print("βœ… Dataset loaded.")
except FileNotFoundError:
print("❌ 'final_output.csv' not found. Predictions will fail.")
DF_FULL = None
# --- Helper Functions ---
# def _create_features(df):
# df = df.copy()
# df['dayofweek'] = df.index.dayofweek
# df['dayofyear'] = df.index.dayofyear
# df['month'] = df.index.month
# df['year'] = df.index.year
# df['quarter'] = df.index.quarter
# df['weekofyear'] = df.index.isocalendar().week.astype(int)
# # Lags and Rolling features
# df['price_lag_7'] = df['modal_price'].shift(7)
# df['price_lag_14'] = df['modal_price'].shift(14)
# df['price_lag_30'] = df['modal_price'].shift(30)
# df['rolling_mean_30'] = df['modal_price'].shift(1).rolling(window=30).mean()
# df['rolling_std_30'] = df['modal_price'].shift(1).rolling(window=30).std()
# return df.dropna()
def _create_features(df):
df = df.copy()
df['dayofweek'] = df.index.dayofweek
df['dayofyear'] = df.index.dayofyear
df['month'] = df.index.month
df['year'] = df.index.year
df['quarter'] = df.index.quarter
df['weekofyear'] = df.index.isocalendar().week.astype(int)
# Lags and Rolling features
df['price_lag_7'] = df['modal_price'].shift(7)
df['price_lag_14'] = df['modal_price'].shift(14)
df['price_lag_30'] = df['modal_price'].shift(30)
df['rolling_mean_30'] = df['modal_price'].shift(1).rolling(window=30).mean()
df['rolling_std_30'] = df['modal_price'].shift(1).rolling(window=30).std()
# REMOVE .dropna() here! We need the last row even if modal_price is NaN.
return df
def get_market_prediction(model, df_full, commodity, last_known_date):
"""
Iteratively predicts the next 180 days.
"""
print(f"--- PREDICTION STARTED FOR {commodity} ---")
# --- FIX START: DISABLE FEATURE NAME CHECK ---
# This tells XGBoost: "Don't check column names, just trust the order of numbers I give you"
try:
booster = model.get_booster()
booster.feature_names = None
except Exception:
pass # If it fails, we proceed anyway
# --- FIX END ---
df_commodity = df_full[df_full['commodity'] == commodity]
df_daily = df_commodity.groupby(df_commodity.index).agg({'modal_price': 'mean'})
future_dates = pd.date_range(start=last_known_date + pd.Timedelta(days=1), periods=180, freq='D')
future_df = pd.DataFrame(index=future_dates)
future_df['modal_price'] = np.nan
df_extended = pd.concat([df_daily, future_df])
for date in future_dates:
subset = df_extended.loc[:date]
if len(subset) < 30: continue
featured_subset = _create_features(subset)
target_row = featured_subset.iloc[-1]
if target_row[feature_names].isna().any():
continue
# 1. Extract values as a plain Numpy Array
# Reshape to (1, 11) -> 1 row, 11 features
input_values = target_row[feature_names].values.reshape(1, -1)
# 2. Predict using the Numpy Array directly (Faster & Safer now)
# Since we disabled feature_names check, this will now work.
prediction = model.predict(input_values)[0]
df_extended.loc[date, 'modal_price'] = prediction
daily_forecast_df = df_extended.loc[future_dates].copy()
daily_forecast_df.rename(columns={'modal_price': 'forecast'}, inplace=True)
print("--- PREDICTION SUCCESS ---")
return daily_forecast_df
# import pandas as pd
# import numpy as np
# import joblib
# import os
# from fastapi import APIRouter, HTTPException
# from typing import List, Dict, Optional
# # --- Load Models & Data ---
# MODELS_DIR = 'models'
# models = {}
# feature_names = [
# 'dayofweek', 'dayofyear', 'month', 'year', 'quarter', 'weekofyear',
# 'price_lag_7', 'price_lag_14', 'price_lag_30', 'rolling_mean_30', 'rolling_std_30'
# ]
# # Ensure models dir exists
# if os.path.exists(MODELS_DIR):
# for model_file in os.listdir(MODELS_DIR):
# if model_file.endswith('.pkl'):
# commodity_name = model_file.replace('.pkl', '').replace('_', '/')
# models[commodity_name] = joblib.load(os.path.join(MODELS_DIR, model_file))
# print(f"βœ… Model loaded for: {commodity_name}")
# try:
# # Ensure your CSV is accessible
# DF_FULL = pd.read_csv('final_output.csv', parse_dates=['created_at'], index_col='created_at')
# print("βœ… Dataset loaded.")
# except FileNotFoundError:
# print("❌ 'final_output.csv' not found. Predictions will fail.")
# DF_FULL = None
# # --- Helper Functions ---
# def _create_features(df):
# df = df.copy()
# df['dayofweek'] = df.index.dayofweek
# df['dayofyear'] = df.index.dayofyear
# df['month'] = df.index.month
# df['year'] = df.index.year
# df['quarter'] = df.index.quarter
# df['weekofyear'] = df.index.isocalendar().week.astype(int)
# # Lags and Rolling features
# df['price_lag_7'] = df['modal_price'].shift(7)
# df['price_lag_14'] = df['modal_price'].shift(14)
# df['price_lag_30'] = df['modal_price'].shift(30)
# df['rolling_mean_30'] = df['modal_price'].shift(1).rolling(window=30).mean()
# df['rolling_std_30'] = df['modal_price'].shift(1).rolling(window=30).std()
# # CRITICAL: Do NOT dropna() here, or we lose the row we are trying to predict
# return df
# def get_market_prediction(model, df_full, commodity, last_known_date):
# """
# Iteratively predicts the next 180 days.
# Returns a DataFrame so the route can iterate over it.
# """
# print(f"--- PREDICTION STARTED FOR {commodity} ---")
# # --- FIX 1: DISABLE FEATURE NAME CHECK ---
# # This prevents the "X does not have valid feature names" error
# try:
# booster = model.get_booster()
# booster.feature_names = None
# except Exception:
# pass
# # -----------------------------------------
# df_commodity = df_full[df_full['commodity'] == commodity]
# df_daily = df_commodity.groupby(df_commodity.index).agg({'modal_price': 'mean'})
# future_dates = pd.date_range(start=last_known_date + pd.Timedelta(days=1), periods=180, freq='D')
# future_df = pd.DataFrame(index=future_dates)
# future_df['modal_price'] = np.nan
# df_extended = pd.concat([df_daily, future_df])
# for date in future_dates:
# subset = df_extended.loc[:date]
# if len(subset) < 30: continue
# featured_subset = _create_features(subset)
# target_row = featured_subset.iloc[-1]
# # Only check if features are NaN (modal_price is allowed to be NaN)
# if target_row[feature_names].isna().any():
# continue
# # --- FIX 2: Pass Pure Values ---
# # Reshape to (1, 11) -> 1 row, 11 features
# input_values = target_row[feature_names].values.reshape(1, -1)
# # Predict using the values (bypassing column name check)
# prediction = model.predict(input_values)[0]
# df_extended.loc[date, 'modal_price'] = prediction
# # Extract just the future part
# daily_forecast_df = df_extended.loc[future_dates].copy()
# daily_forecast_df.rename(columns={'modal_price': 'forecast'}, inplace=True)
# print("--- PREDICTION SUCCESS ---")
# # Returns DataFrame (Matches your route code)
# return daily_forecast_df