Spaces:

ShadowGard3n
/

AgroVision-Backend

Sleeping

App Files Files Community

AgroVision-Backend / services /market_services.py

ShadowGard3n

small changes

d76b061 3 months ago

raw

history blame contribute delete

8.42 kB

	import pandas as pd
	import numpy as np
	import joblib
	import os
	from fastapi import APIRouter, HTTPException
	from pydantic import BaseModel
	from typing import List, Dict, Optional

	# --- Load Models & Data ---
	MODELS_DIR = 'models'
	models = {}

	feature_names = [
	'dayofweek', 'dayofyear', 'month', 'year', 'quarter', 'weekofyear',
	'price_lag_7', 'price_lag_14', 'price_lag_30', 'rolling_mean_30', 'rolling_std_30'
	]

	# Ensure models dir exists
	if os.path.exists(MODELS_DIR):
	for model_file in os.listdir(MODELS_DIR):
	if model_file.endswith('.pkl'):
	commodity_name = model_file.replace('.pkl', '').replace('_', '/')
	models[commodity_name] = joblib.load(os.path.join(MODELS_DIR, model_file))
	print(f"✅ Model loaded for: {commodity_name}")

	try:
	# Ensure your CSV is accessible
	DF_FULL = pd.read_csv('final_output.csv', parse_dates=['created_at'], index_col='created_at')
	print("✅ Dataset loaded.")
	except FileNotFoundError:
	print("❌ 'final_output.csv' not found. Predictions will fail.")
	DF_FULL = None

	# --- Helper Functions ---

	# def _create_features(df):
	# df = df.copy()
	# df['dayofweek'] = df.index.dayofweek
	# df['dayofyear'] = df.index.dayofyear
	# df['month'] = df.index.month
	# df['year'] = df.index.year
	# df['quarter'] = df.index.quarter
	# df['weekofyear'] = df.index.isocalendar().week.astype(int)
	# # Lags and Rolling features
	# df['price_lag_7'] = df['modal_price'].shift(7)
	# df['price_lag_14'] = df['modal_price'].shift(14)
	# df['price_lag_30'] = df['modal_price'].shift(30)
	# df['rolling_mean_30'] = df['modal_price'].shift(1).rolling(window=30).mean()
	# df['rolling_std_30'] = df['modal_price'].shift(1).rolling(window=30).std()
	# return df.dropna()


	def _create_features(df):
	df = df.copy()
	df['dayofweek'] = df.index.dayofweek
	df['dayofyear'] = df.index.dayofyear
	df['month'] = df.index.month
	df['year'] = df.index.year
	df['quarter'] = df.index.quarter
	df['weekofyear'] = df.index.isocalendar().week.astype(int)

	# Lags and Rolling features
	df['price_lag_7'] = df['modal_price'].shift(7)
	df['price_lag_14'] = df['modal_price'].shift(14)
	df['price_lag_30'] = df['modal_price'].shift(30)
	df['rolling_mean_30'] = df['modal_price'].shift(1).rolling(window=30).mean()
	df['rolling_std_30'] = df['modal_price'].shift(1).rolling(window=30).std()

	# REMOVE .dropna() here! We need the last row even if modal_price is NaN.
	return df

	def get_market_prediction(model, df_full, commodity, last_known_date):
	"""
	Iteratively predicts the next 180 days.
	"""
	print(f"--- PREDICTION STARTED FOR {commodity} ---")

	# --- FIX START: DISABLE FEATURE NAME CHECK ---
	# This tells XGBoost: "Don't check column names, just trust the order of numbers I give you"
	try:
	booster = model.get_booster()
	booster.feature_names = None
	except Exception:
	pass # If it fails, we proceed anyway
	# --- FIX END ---

	df_commodity = df_full[df_full['commodity'] == commodity]
	df_daily = df_commodity.groupby(df_commodity.index).agg({'modal_price': 'mean'})

	future_dates = pd.date_range(start=last_known_date + pd.Timedelta(days=1), periods=180, freq='D')

	future_df = pd.DataFrame(index=future_dates)
	future_df['modal_price'] = np.nan

	df_extended = pd.concat([df_daily, future_df])

	for date in future_dates:
	subset = df_extended.loc[:date]
	if len(subset) < 30: continue

	featured_subset = _create_features(subset)
	target_row = featured_subset.iloc[-1]

	if target_row[feature_names].isna().any():
	continue

	# 1. Extract values as a plain Numpy Array
	# Reshape to (1, 11) -> 1 row, 11 features
	input_values = target_row[feature_names].values.reshape(1, -1)

	# 2. Predict using the Numpy Array directly (Faster & Safer now)
	# Since we disabled feature_names check, this will now work.
	prediction = model.predict(input_values)[0]

	df_extended.loc[date, 'modal_price'] = prediction

	daily_forecast_df = df_extended.loc[future_dates].copy()
	daily_forecast_df.rename(columns={'modal_price': 'forecast'}, inplace=True)

	print("--- PREDICTION SUCCESS ---")
	return daily_forecast_df



	# import pandas as pd
	# import numpy as np
	# import joblib
	# import os
	# from fastapi import APIRouter, HTTPException
	# from typing import List, Dict, Optional

	# # --- Load Models & Data ---
	# MODELS_DIR = 'models'
	# models = {}

	# feature_names = [
	# 'dayofweek', 'dayofyear', 'month', 'year', 'quarter', 'weekofyear',
	# 'price_lag_7', 'price_lag_14', 'price_lag_30', 'rolling_mean_30', 'rolling_std_30'
	# ]

	# # Ensure models dir exists
	# if os.path.exists(MODELS_DIR):
	# for model_file in os.listdir(MODELS_DIR):
	# if model_file.endswith('.pkl'):
	# commodity_name = model_file.replace('.pkl', '').replace('_', '/')
	# models[commodity_name] = joblib.load(os.path.join(MODELS_DIR, model_file))
	# print(f"✅ Model loaded for: {commodity_name}")

	# try:
	# # Ensure your CSV is accessible
	# DF_FULL = pd.read_csv('final_output.csv', parse_dates=['created_at'], index_col='created_at')
	# print("✅ Dataset loaded.")
	# except FileNotFoundError:
	# print("❌ 'final_output.csv' not found. Predictions will fail.")
	# DF_FULL = None

	# # --- Helper Functions ---

	# def _create_features(df):
	# df = df.copy()
	# df['dayofweek'] = df.index.dayofweek
	# df['dayofyear'] = df.index.dayofyear
	# df['month'] = df.index.month
	# df['year'] = df.index.year
	# df['quarter'] = df.index.quarter
	# df['weekofyear'] = df.index.isocalendar().week.astype(int)

	# # Lags and Rolling features
	# df['price_lag_7'] = df['modal_price'].shift(7)
	# df['price_lag_14'] = df['modal_price'].shift(14)
	# df['price_lag_30'] = df['modal_price'].shift(30)
	# df['rolling_mean_30'] = df['modal_price'].shift(1).rolling(window=30).mean()
	# df['rolling_std_30'] = df['modal_price'].shift(1).rolling(window=30).std()

	# # CRITICAL: Do NOT dropna() here, or we lose the row we are trying to predict
	# return df

	# def get_market_prediction(model, df_full, commodity, last_known_date):
	# """
	# Iteratively predicts the next 180 days.
	# Returns a DataFrame so the route can iterate over it.
	# """
	# print(f"--- PREDICTION STARTED FOR {commodity} ---")

	# # --- FIX 1: DISABLE FEATURE NAME CHECK ---
	# # This prevents the "X does not have valid feature names" error
	# try:
	# booster = model.get_booster()
	# booster.feature_names = None
	# except Exception:
	# pass
	# # -----------------------------------------

	# df_commodity = df_full[df_full['commodity'] == commodity]
	# df_daily = df_commodity.groupby(df_commodity.index).agg({'modal_price': 'mean'})

	# future_dates = pd.date_range(start=last_known_date + pd.Timedelta(days=1), periods=180, freq='D')

	# future_df = pd.DataFrame(index=future_dates)
	# future_df['modal_price'] = np.nan

	# df_extended = pd.concat([df_daily, future_df])

	# for date in future_dates:
	# subset = df_extended.loc[:date]
	# if len(subset) < 30: continue

	# featured_subset = _create_features(subset)
	# target_row = featured_subset.iloc[-1]

	# # Only check if features are NaN (modal_price is allowed to be NaN)
	# if target_row[feature_names].isna().any():
	# continue

	# # --- FIX 2: Pass Pure Values ---
	# # Reshape to (1, 11) -> 1 row, 11 features
	# input_values = target_row[feature_names].values.reshape(1, -1)

	# # Predict using the values (bypassing column name check)
	# prediction = model.predict(input_values)[0]

	# df_extended.loc[date, 'modal_price'] = prediction

	# # Extract just the future part
	# daily_forecast_df = df_extended.loc[future_dates].copy()
	# daily_forecast_df.rename(columns={'modal_price': 'forecast'}, inplace=True)

	# print("--- PREDICTION SUCCESS ---")

	# # Returns DataFrame (Matches your route code)
	# return daily_forecast_df