Spaces:

ThejasRao
/

agripredict

Runtime error

App Files Files Community

ThejasRao commited on Nov 15, 2025

Commit

fa4fc8b

1 Parent(s): 3029a46

Update agripredict app

Browse files

Files changed (20) hide show

.env +2 -0
app.py +0 -1625
src/agri_predict/__init__.py +41 -0
src/agri_predict/__pycache__/__init__.cpython-312.pyc +0 -0
src/agri_predict/__pycache__/config.cpython-312.pyc +0 -0
src/agri_predict/__pycache__/constants.cpython-312.pyc +0 -0
src/agri_predict/__pycache__/data.cpython-312.pyc +0 -0
src/agri_predict/__pycache__/features.cpython-312.pyc +0 -0
src/agri_predict/__pycache__/models.cpython-312.pyc +0 -0
src/agri_predict/__pycache__/plotting.cpython-312.pyc +0 -0
src/agri_predict/__pycache__/scraper.cpython-312.pyc +0 -0
src/agri_predict/__pycache__/utils.cpython-312.pyc +0 -0
src/agri_predict/config.py +29 -0
src/agri_predict/constants.py +118 -0
src/agri_predict/data.py +112 -0
src/agri_predict/features.py +135 -0
src/agri_predict/models.py +221 -0
src/agri_predict/plotting.py +198 -0
src/agri_predict/scraper.py +282 -0
src/agri_predict/utils.py +40 -0

.env ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ MONGO_URI=mongodb+srv://Agripredict:TjXSvMhOis49qH8E@cluster0.gek7n.mongodb.net/?retryWrites=true&w=majority&appName=Cluster0
2	+

app.py DELETED Viewed

@@ -1,1625 +0,0 @@
-import streamlit as st
-import pandas as pd
-import numpy as np
-from pymongo import MongoClient
-from sklearn.model_selection import train_test_split, GridSearchCV
-from sklearn.metrics import mean_squared_error, mean_absolute_error
-from xgboost import XGBRegressor
-from st_aggrid import AgGrid, GridOptionsBuilder, DataReturnMode, GridUpdateMode
-from sklearn.preprocessing import MinMaxScaler
-from datetime import datetime, timedelta
-import plotly.express as px
-import plotly.graph_objects as go
-import calendar
-import certifi
-import requests
-from werkzeug.security import generate_password_hash, check_password_hash
-from bs4 import BeautifulSoup
-import json
-from itertools import product
-from tqdm import tqdm
-import io
-from statsmodels.tsa.statespace.sarimax import SARIMAX
-from datetime import datetime, timedelta
-def generate_date_ranges(start_date: str, end_date: str):
-    current = datetime.strptime(start_date, "%d %b %Y")
-    end = datetime.strptime(end_date, "%d %b %Y")
-    while current <= end:
-        date_str = current.strftime("%d %b %Y")
-        yield (date_str, date_str)
-        current += timedelta(days=1)
-mongo_uri = "mongodb+srv://Agripredict:TjXSvMhOis49qH8E@cluster0.gek7n.mongodb.net/?retryWrites=true&w=majority&appName=Cluster0"
-if not mongo_uri:
-    st.error("MongoDB URI is not set!")
-    st.stop()
-else:
-    client = MongoClient(mongo_uri, tlsCAFile=certifi.where())
-    db = client["AgriPredict"]
-    collection = db["WhiteSesame"]
-    best_params_collection = db["BestParams"]
-    best_params_collection_1m = db["BestParams_1m"]
-    best_params_collection_3m = db["BestParams_3m"]
-    impExp = db["impExp"]
-    users_collection = db["user"]
-    market_price_data = db["new_data_price"]
-state_market_dict = {
-    "Karnataka": [
-        "Kalburgi",
-        "Basava Kalayana",
-        "Lingasugur",
-        "Kustagi",
-        "Bangalore",
-        "Bagalakot",
-        "Hubli (Amaragol)"
-    ],
-    "Gujarat": [
-        "Siddhpur",
-        "Jasdan",
-        "Gondal",
-        "Morbi",
-        "Botad",
-        "Visavadar",
-        "Dahod",
-        "Rajkot",
-        "Junagadh",
-        "Savarkundla",
-        "Bhavnagar",
-        "Rajula",
-        "Dhoraji",
-        "Amreli",
-        "Mahuva(Station Road)",
-        "Mansa",
-        "Porbandar",
-        "Dasada Patadi",
-        "Halvad",
-        "Chotila",
-        "Bhanvad",
-        "Dhansura",
-        "Babra",
-        "Upleta",
-        "Palitana",
-        "Jetpur(Dist.Rajkot)",
-        "S.Mandvi",
-        "Mandvi",
-        "Khambha",
-        "Kadi",
-        "Taleja",
-        "Himatnagar",
-        "Lakhani",
-        "Rapar",
-        "Una",
-        "Dhari",
-        "Bagasara",
-        "Jam Jodhpur",
-        "Veraval",
-        "Dhragradhra",
-        "Deesa"
-    ],
-    "Uttar Pradesh": [
-        "Bangarmau",
-        "Sultanpur",
-        "Maudaha",
-        "Mauranipur",
-        "Lalitpur",
-        "Konch",
-        "Muskara",
-        "Raath",
-        "Varipaal",
-        "Auraiya",
-        "Orai",
-        "Banda",
-        "Kishunpur",
-        "Ait",
-        "Jhansi",
-        "Kurara",
-        "Chirgaon",
-        "Charkhari",
-        "Moth",
-        "Jalaun",
-        "Sirsaganj",
-        "Shikohabad"
-    ],
-    "Madhya Pradesh": [
-        "Naugaon",
-        "Mehar",
-        "Kailaras",
-        "Datia",
-        "LavKush Nagar(Laundi)",
-        "Ajaygarh",
-        "Rajnagar",
-        "Sevda",
-        "Neemuch",
-        "Sheopurkalan",
-        "Lashkar",
-        "Alampur",
-        "Niwadi",
-        "Dabra",
-        "Ujjain",
-        "Bijawar",
-        "Sidhi",
-        "Barad",
-        "Pohari",
-        "Shahagarh",
-        "Lateri",
-        "Banapura",
-        "Panna",
-        "Garhakota",
-        "Katni",
-        "Chhatarpur",
-        "Beohari",
-        "Satna",
-        "Sabalgarh",
-        "Hanumana",
-        "Bhander",
-        "Banmorkalan",
-        "Jaora",
-        "Bagli",
-        "Singroli"
-    ],
-    "Telangana": [
-        "Warangal"
-    ]
-}
-def create_forecasting_features(df):
-    df = df.copy()
-    if not isinstance(df.index, pd.DatetimeIndex):
-        df = df.set_index('Reported Date')
-        df.index = pd.to_datetime(df.index)
-    target_map = df['Modal Price (Rs./Quintal)'].to_dict()
-    df['dayofweek'] = df.index.dayofweek
-    df['quarter'] = df.index.quarter
-    df['month'] = df.index.month
-    df['year'] = df.index.year
-    df['dayofyear'] = df.index.dayofyear
-    df['weekofyear'] = df.index.isocalendar().week
-    df['lag14'] = (df.index - pd.Timedelta(days=14)).map(target_map)
-    df['lag28'] = (df.index - pd.Timedelta(days=28)).map(target_map)
-    df['lag56'] = (df.index - pd.Timedelta(days=56)).map(target_map)
-    df['lag_3months'] = (df.index - pd.DateOffset(months=3)).map(target_map)
-    df['lag_6months'] = (df.index - pd.DateOffset(months=6)).map(target_map)
-    for window in [7, 14, 28]:
-        df[f'rolling_mean_{window}'] = df['Modal Price (Rs./Quintal)'].rolling(window=window, min_periods=1).mean()
-        df[f'rolling_std_{window}'] = df['Modal Price (Rs./Quintal)'].rolling(window=window, min_periods=1).std()
-    df['ema7'] = df['Modal Price (Rs./Quintal)'].ewm(span=7, adjust=False).mean()
-    df['ema14'] = df['Modal Price (Rs./Quintal)'].ewm(span=14, adjust=False).mean()
-    df['monthly_avg'] = df.groupby('month')['Modal Price (Rs./Quintal)'].transform('mean')
-    df['weekly_avg'] = df.groupby('weekofyear')['Modal Price (Rs./Quintal)'].transform('mean')
-    df['dayofweek_avg'] = df.groupby('dayofweek')['Modal Price (Rs./Quintal)'].transform('mean')
-    df['fourier_sin_365'] = np.sin(2 * np.pi * df.index.dayofyear / 365)
-    df['fourier_cos_365'] = np.cos(2 * np.pi * df.index.dayofyear / 365)
-    df['fourier_sin_14'] = np.sin(2 * np.pi * df.index.dayofyear / 14)
-    df['fourier_cos_14'] = np.cos(2 * np.pi * df.index.dayofyear / 14)
-    df['recent_min_14'] = (df.index - pd.Timedelta(days=14)).map(target_map).min()
-    df['recent_max_14'] = (df.index - pd.Timedelta(days=14)).map(target_map).max()
-    df['recent_range_14'] = df['recent_max_14'] - df['recent_min_14']
-    df['yearly_avg'] = df.groupby('year')['Modal Price (Rs./Quintal)'].transform('mean')
-    df['cumulative_mean'] = df['Modal Price (Rs./Quintal)'].expanding().mean()
-    return df.reset_index()
-def create_forecasting_features_1m(df):
-    df = df.copy()
-    if not isinstance(df.index, pd.DatetimeIndex):
-        df = df.set_index('Reported Date')
-        df.index = pd.to_datetime(df.index)
-    target_map = df['Modal Price (Rs./Quintal)'].to_dict()
-    df['dayofweek'] = df.index.dayofweek
-    df['quarter'] = df.index.quarter
-    df['month'] = df.index.month
-    df['year'] = df.index.year
-    df['dayofyear'] = df.index.dayofyear
-    df['weekofyear'] = df.index.isocalendar().week
-    df['lag_30'] = (df.index - pd.Timedelta(days=30)).map(target_map)
-    df['lag_60'] = (df.index - pd.Timedelta(days=60)).map(target_map)
-    df['lag_90'] = (df.index - pd.Timedelta(days=90)).map(target_map)
-    df['lag_6months'] = (df.index - pd.DateOffset(months=6)).map(target_map)
-    df['lag_12months'] = (df.index - pd.DateOffset(months=12)).map(target_map)
-    for window in [30, 60, 90]:
-        df[f'rolling_mean_{window}'] = df['Modal Price (Rs./Quintal)'].rolling(window=window, min_periods=1).mean()
-        df[f'rolling_std_{window}'] = df['Modal Price (Rs./Quintal)'].rolling(window=window, min_periods=1).std()
-    df['ema_30'] = df['Modal Price (Rs./Quintal)'].ewm(span=30, adjust=False).mean()
-    df['ema_60'] = df['Modal Price (Rs./Quintal)'].ewm(span=60, adjust=False).mean()
-    df['monthly_avg'] = df.groupby('month')['Modal Price (Rs./Quintal)'].transform('mean')
-    df['weekly_avg'] = df.groupby('weekofyear')['Modal Price (Rs./Quintal)'].transform('mean')
-    df['dayofweek_avg'] = df.groupby('dayofweek')['Modal Price (Rs./Quintal)'].transform('mean')
-    df['fourier_sin_365'] = np.sin(2 * np.pi * df.index.dayofyear / 365)
-    df['fourier_cos_365'] = np.cos(2 * np.pi * df.index.dayofyear / 365)
-    df['fourier_sin_30'] = np.sin(2 * np.pi * df.index.dayofyear / 30)
-    df['fourier_cos_30'] = np.cos(2 * np.pi * df.index.dayofyear / 30)
-    df['recent_min_30'] = (df.index - pd.Timedelta(days=30)).map(target_map).min()
-    df['recent_max_30'] = (df.index - pd.Timedelta(days=30)).map(target_map).max()
-    df['recent_range_30'] = df['recent_max_30'] - df['recent_min_30']
-    df['yearly_avg'] = df.groupby('year')['Modal Price (Rs./Quintal)'].transform('mean')
-    df['cumulative_mean'] = df['Modal Price (Rs./Quintal)'].expanding().mean()
-    return df.reset_index()
-def create_forecasting_features_3m(df):
-    df = df.copy()
-    if not isinstance(df.index, pd.DatetimeIndex):
-        df = df.set_index('Reported Date')
-        df.index = pd.to_datetime(df.index)
-    target_map = df['Modal Price (Rs./Quintal)'].to_dict()
-    df['dayofweek'] = df.index.dayofweek
-    df['quarter'] = df.index.quarter
-    df['month'] = df.index.month
-    df['year'] = df.index.year
-    df['dayofyear'] = df.index.dayofyear
-    df['weekofyear'] = df.index.isocalendar().week
-    df['lag_3months'] = (df.index - pd.DateOffset(months=3)).map(target_map)
-    df['lag_6months'] = (df.index - pd.DateOffset(months=6)).map(target_map)
-    df['lag_9months'] = (df.index - pd.DateOffset(months=9)).map(target_map)
-    df['lag_12months'] = (df.index - pd.DateOffset(months=12)).map(target_map)
-    for window in [90, 180, 270, 365]:
-        df[f'rolling_mean_{window}'] = df['Modal Price (Rs./Quintal)'].rolling(window=window, min_periods=1).mean()
-        df[f'rolling_std_{window}'] = df['Modal Price (Rs./Quintal)'].rolling(window=window, min_periods=1).std()
-    df['ema90'] = df['Modal Price (Rs./Quintal)'].ewm(span=90, adjust=False).mean()
-    df['ema180'] = df['Modal Price (Rs./Quintal)'].ewm(span=180, adjust=False).mean()
-    df['monthly_avg'] = df.groupby('month')['Modal Price (Rs./Quintal)'].transform('mean')
-    df['weekly_avg'] = df.groupby('weekofyear')['Modal Price (Rs./Quintal)'].transform('mean')
-    df['dayofweek_avg'] = df.groupby('dayofweek')['Modal Price (Rs./Quintal)'].transform('mean')
-    df['fourier_sin_90'] = np.sin(2 * np.pi * df.index.dayofyear / 90)
-    df['fourier_cos_90'] = np.cos(2 * np.pi * df.index.dayofyear / 90)
-    df['fourier_sin_30'] = np.sin(2 * np.pi * df.index.dayofyear / 30)
-    df['fourier_cos_30'] = np.cos(2 * np.pi * df.index.dayofyear / 30)
-    df['recent_min_90'] = (df.index - pd.Timedelta(days=90)).map(target_map).min()
-    df['recent_max_90'] = (df.index - pd.Timedelta(days=90)).map(target_map).max()
-    df['recent_range_90'] = df['recent_max_90'] - df['recent_min_90']
-    df['yearly_avg'] = df.groupby('year')['Modal Price (Rs./Quintal)'].transform('mean')
-    df['cumulative_mean'] = df['Modal Price (Rs./Quintal)'].expanding().mean()
-    return df.reset_index()
-def preprocess_data(df):
-    df = df[['Reported Date', 'Modal Price (Rs./Quintal)']]
-    df['Reported Date'] = pd.to_datetime(df['Reported Date'])
-    df = df.groupby('Reported Date', as_index=False).mean()
-    full_date_range = pd.date_range(df['Reported Date'].min(), df['Reported Date'].max())
-    df = df.set_index('Reported Date').reindex(full_date_range).rename_axis('Reported Date').reset_index()
-    df['Modal Price (Rs./Quintal)'] = (
-        df['Modal Price (Rs./Quintal)'].fillna(method='ffill').fillna(method='bfill')
-    )
-    return df
-def train_and_evaluate(df):
-    import streamlit as st
-    progress_bar = st.progress(0)
-    def update_tuning_progress(current, total):
-        progress = int((current / total) * 100)
-        progress_bar.progress(progress)
-    df = create_forecasting_features(df)
-    train_df = df[df['Reported Date'] < '2024-01-01']
-    test_df = df[df['Reported Date'] >= '2024-01-01']
-    X_train = train_df.drop(columns=['Modal Price (Rs./Quintal)', 'Reported Date'])
-    y_train = train_df['Modal Price (Rs./Quintal)']
-    X_test = test_df.drop(columns=['Modal Price (Rs./Quintal)', 'Reported Date'])
-    y_test = test_df['Modal Price (Rs./Quintal)']
-    st.write("Performing hyperparameter tuning...")
-    param_grid = {
-        'learning_rate': [0.01, 0.1, 0.2],
-        'max_depth': [3, 5, 7],
-        'n_estimators': [50, 100, 150],
-        'booster': ['gbtree', 'dart']
-    }
-    model = XGBRegressor()
-    param_combinations = len(param_grid['learning_rate']) * len(param_grid['max_depth']) * \
-                         len(param_grid['n_estimators']) * len(param_grid['booster'])
-    current_combination = 0
-    def custom_grid_search():
-        nonlocal current_combination
-        best_score = float('-inf')
-        best_params = None
-        for learning_rate in param_grid['learning_rate']:
-            for max_depth in param_grid['max_depth']:
-                for n_estimators in param_grid['n_estimators']:
-                    for booster in param_grid['booster']:
-                        model.set_params(
-                            learning_rate=learning_rate,
-                            max_depth=max_depth,
-                            n_estimators=n_estimators,
-                            booster=booster
-                        )
-                        model.fit(X_train, y_train)
-                        score = model.score(X_test, y_test)
-                        if score > best_score:
-                            best_score = score
-                            best_params = {
-                                'learning_rate': learning_rate,
-                                'max_depth': max_depth,
-                                'n_estimators': n_estimators,
-                                'booster': booster
-                            }
-                        current_combination += 1
-                        update_tuning_progress(current_combination, param_combinations)
-        return best_params
-    best_params = custom_grid_search()
-    st.write("Training the best model and making predictions...")
-    best_model = XGBRegressor(**best_params)
-    best_model.fit(X_train, y_train)
-    y_pred = best_model.predict(X_test)
-    rmse = mean_squared_error(y_test, y_pred, squared=False)
-    mae = mean_absolute_error(y_test, y_pred)
-    st.write(f"RMSE: {rmse}")
-    st.write(f"MAE: {mae}")
-    train_plot_df = train_df[['Reported Date', 'Modal Price (Rs./Quintal)']].copy()
-    train_plot_df['Type'] = 'Train'
-    test_plot_df = test_df[['Reported Date', 'Modal Price (Rs./Quintal)']].copy()
-    test_plot_df['Type'] = 'Test'
-    predicted_plot_df = test_df[['Reported Date']].copy()
-    predicted_plot_df['Modal Price (Rs./Quintal)'] = y_pred
-    predicted_plot_df['Type'] = 'Predicted'
-    plot_df = pd.concat([train_plot_df, test_plot_df, predicted_plot_df])
-    fig = go.Figure()
-    for plot_type, color, dash in [('Train', 'blue', None), ('Test', 'orange', None),
-                                   ('Predicted', 'green', 'dot')]:
-        data = plot_df[plot_df['Type'] == plot_type]
-        fig.add_trace(go.Scatter(
-            x=data['Reported Date'],
-            y=data['Modal Price (Rs./Quintal)'],
-            mode='lines',
-            name=f"{plot_type} Data",
-            line=dict(color=color, dash=dash)
-        ))
-    fig.update_layout(
-        title="Train, Test, and Predicted Data",
-        xaxis_title="Date",
-        yaxis_title="Modal Price (Rs./Quintal)",
-        template="plotly_white"
-    )
-    st.plotly_chart(fig, use_container_width=True)
-    return best_params
-def train_and_evaluate_1m(df):
-    import streamlit as st
-    import pandas as pd
-    import plotly.graph_objects as go
-    from xgboost import XGBRegressor
-    from sklearn.metrics import mean_squared_error, mean_absolute_error
-    progress_bar = st.progress(0)
-    def update_tuning_progress(current, total):
-        progress = int((current / total) * 100)
-        progress_bar.progress(progress)
-    df = create_forecasting_features_1m(df)
-    split_date = pd.to_datetime("2024-01-01")
-    test_horizon = pd.DateOffset(days=30)
-    train_df = df[df['Reported Date'] < split_date]
-    test_df = df[(df['Reported Date'] >= split_date) & (df['Reported Date'] < split_date + test_horizon)]
-    X_train = train_df.drop(columns=['Modal Price (Rs./Quintal)', 'Reported Date'])
-    y_train = train_df['Modal Price (Rs./Quintal)']
-    X_test = test_df.drop(columns=['Modal Price (Rs./Quintal)', 'Reported Date'])
-    y_test = test_df['Modal Price (Rs./Quintal)']
-    st.write("Performing hyperparameter tuning...")
-    param_grid = {
-        'learning_rate': [0.01, 0.1, 0.2],
-        'max_depth': [3, 5, 7],
-        'n_estimators': [50, 100, 150],
-        'booster': ['gbtree', 'dart']
-    }
-    model = XGBRegressor()
-    param_combinations = len(param_grid['learning_rate']) * len(param_grid['max_depth']) * \
-                         len(param_grid['n_estimators']) * len(param_grid['booster'])
-    current_combination = 0
-    def custom_grid_search():
-        nonlocal current_combination
-        best_score = float('-inf')
-        best_params = None
-        for learning_rate in param_grid['learning_rate']:
-            for max_depth in param_grid['max_depth']:
-                for n_estimators in param_grid['n_estimators']:
-                    for booster in param_grid['booster']:
-                        model.set_params(
-                            learning_rate=learning_rate,
-                            max_depth=max_depth,
-                            n_estimators=n_estimators,
-                            booster=booster
-                        )
-                        model.fit(X_train, y_train)
-                        score = model.score(X_test, y_test)
-                        if score > best_score:
-                            best_score = score
-                            best_params = {
-                                'learning_rate': learning_rate,
-                                'max_depth': max_depth,
-                                'n_estimators': n_estimators,
-                                'booster': booster
-                            }
-                        current_combination += 1
-                        update_tuning_progress(current_combination, param_combinations)
-        return best_params
-    best_params = custom_grid_search()
-    st.write("Training the best model and making predictions...")
-    best_model = XGBRegressor(**best_params)
-    best_model.fit(X_train, y_train)
-    y_pred = best_model.predict(X_test)
-    rmse = mean_squared_error(y_test, y_pred, squared=False)
-    mae = mean_absolute_error(y_test, y_pred)
-    st.write(f"RMSE: {rmse}")
-    st.write(f"MAE: {mae}")
-    train_plot_df = train_df[['Reported Date', 'Modal Price (Rs./Quintal)']].copy()
-    train_plot_df['Type'] = 'Train'
-    test_plot_df = test_df[['Reported Date', 'Modal Price (Rs./Quintal)']].copy()
-    test_plot_df['Type'] = 'Test'
-    predicted_plot_df = test_df[['Reported Date']].copy()
-    predicted_plot_df['Modal Price (Rs./Quintal)'] = y_pred
-    predicted_plot_df['Type'] = 'Predicted'
-    plot_df = pd.concat([train_plot_df, test_plot_df, predicted_plot_df])
-    fig = go.Figure()
-    for plot_type, color, dash in [('Train', 'blue', None), ('Test', 'orange', None),
-                                   ('Predicted', 'green', 'dot')]:
-        data = plot_df[plot_df['Type'] == plot_type]
-        fig.add_trace(go.Scatter(
-            x=data['Reported Date'],
-            y=data['Modal Price (Rs./Quintal)'],
-            mode='lines',
-            name=f"{plot_type} Data",
-            line=dict(color=color, dash=dash)
-        ))
-    fig.update_layout(
-        title="Train, Test, and Predicted Data",
-        xaxis_title="Date",
-        yaxis_title="Modal Price (Rs./Quintal)",
-        template="plotly_white"
-    )
-    st.plotly_chart(fig, use_container_width=True)
-    return best_params
-def train_and_evaluate_3m(df):
-    import streamlit as st
-    progress_bar = st.progress(0)
-    def update_tuning_progress(current, total):
-        progress = int((current / total) * 100)
-        progress_bar.progress(progress)
-    df = create_forecasting_features_3m(df)
-    train_df = df[df['Reported Date'] < '2023-10-01']
-    test_df = df[df['Reported Date'] >= '2023-10-01']
-    X_train = train_df.drop(columns=['Modal Price (Rs./Quintal)', 'Reported Date'])
-    y_train = train_df['Modal Price (Rs./Quintal)']
-    X_test = test_df.drop(columns=['Modal Price (Rs./Quintal)', 'Reported Date'])
-    y_test = test_df['Modal Price (Rs./Quintal)']
-    st.write("Performing hyperparameter tuning...")
-    param_grid = {
-        'learning_rate': [0.01, 0.1, 0.2],
-        'max_depth': [3, 5, 7],
-        'n_estimators': [50, 100, 150],
-        'booster': ['gbtree', 'dart']
-    }
-    model = XGBRegressor()
-    param_combinations = len(param_grid['learning_rate']) * len(param_grid['max_depth']) * \
-                         len(param_grid['n_estimators']) * len(param_grid['booster'])
-    current_combination = 0
-    def custom_grid_search():
-        nonlocal current_combination
-        best_score = float('-inf')
-        best_params = None
-        for learning_rate in param_grid['learning_rate']:
-            for max_depth in param_grid['max_depth']:
-                for n_estimators in param_grid['n_estimators']:
-                    for booster in param_grid['booster']:
-                        model.set_params(
-                            learning_rate=learning_rate,
-                            max_depth=max_depth,
-                            n_estimators=n_estimators,
-                            booster=booster
-                        )
-                        model.fit(X_train, y_train)
-                        score = model.score(X_test, y_test)
-                        if score > best_score:
-                            best_score = score
-                            best_params = {
-                                'learning_rate': learning_rate,
-                                'max_depth': max_depth,
-                                'n_estimators': n_estimators,
-                                'booster': booster
-                            }
-                        current_combination += 1
-                        update_tuning_progress(current_combination, param_combinations)
-        return best_params
-    best_params = custom_grid_search()
-    st.write("Training the best model and making predictions...")
-    best_model = XGBRegressor(**best_params)
-    best_model.fit(X_train, y_train)
-    y_pred = best_model.predict(X_test)
-    rmse = mean_squared_error(y_test, y_pred, squared=False)
-    mae = mean_absolute_error(y_test, y_pred)
-    st.write(f"RMSE: {rmse}")
-    st.write(f"MAE: {mae}")
-    train_plot_df = train_df[['Reported Date', 'Modal Price (Rs./Quintal)']].copy()
-    train_plot_df['Type'] = 'Train'
-    test_plot_df = test_df[['Reported Date', 'Modal Price (Rs./Quintal)']].copy()
-    test_plot_df['Type'] = 'Test'
-    predicted_plot_df = test_df[['Reported Date']].copy()
-    predicted_plot_df['Modal Price (Rs./Quintal)'] = y_pred
-    predicted_plot_df['Type'] = 'Predicted'
-    plot_df = pd.concat([train_plot_df, test_plot_df, predicted_plot_df])
-    fig = go.Figure()
-    for plot_type, color, dash in [('Train', 'blue', None), ('Test', 'orange', None),
-                                   ('Predicted', 'green', 'dot')]:
-        data = plot_df[plot_df['Type'] == plot_type]
-        fig.add_trace(go.Scatter(
-            x=data['Reported Date'],
-            y=data['Modal Price (Rs./Quintal)'],
-            mode='lines',
-            name=f"{plot_type} Data",
-            line=dict(color=color, dash=dash)
-        ))
-    fig.update_layout(
-        title="Train, Test, and Predicted Data",
-        xaxis_title="Date",
-        yaxis_title="Modal Price (Rs./Quintal)",
-        template="plotly_white"
-    )
-    st.plotly_chart(fig, use_container_width=True)
-    return best_params
-def forecast_next_14_days(df, _best_params, key):
-    last_date = df['Reported Date'].max()
-    future_dates = pd.date_range(start=last_date + pd.Timedelta(days=1), periods=14)
-    future_df = pd.DataFrame({'Reported Date': future_dates})
-    full_df = pd.concat([df, future_df], ignore_index=True)
-    full_df = create_forecasting_features(full_df)
-    original_df = full_df[full_df['Reported Date'] <= last_date]
-    future_df = full_df[full_df['Reported Date'] > last_date]
-    X_train = original_df.drop(columns=['Modal Price (Rs./Quintal)', 'Reported Date'], errors='ignore')
-    y_train = original_df['Modal Price (Rs./Quintal)']
-    X_future = future_df.drop(columns=['Modal Price (Rs./Quintal)', 'Reported Date'], errors='ignore')
-    model = XGBRegressor(**_best_params)
-    model.fit(X_train, y_train)
-    future_predictions = model.predict(X_future)
-    future_df['Modal Price (Rs./Quintal)'] = future_predictions
-    plot_data(original_df, future_df, last_date, 14)
-    download_button(future_df, key)
-def forecast_next_30_days(df, _best_params, key):
-    last_date = df['Reported Date'].max()
-    future_dates = pd.date_range(start=last_date + pd.Timedelta(days=1), periods=30)
-    future_df = pd.DataFrame({'Reported Date': future_dates})
-    full_df = pd.concat([df, future_df], ignore_index=True)
-    full_df = create_forecasting_features_1m(full_df)
-    original_df = full_df[full_df['Reported Date'] <= last_date]
-    future_df = full_df[full_df['Reported Date'] > last_date]
-    X_train = original_df.drop(columns=['Modal Price (Rs./Quintal)', 'Reported Date'], errors='ignore')
-    y_train = original_df['Modal Price (Rs./Quintal)']
-    X_future = future_df.drop(columns=['Modal Price (Rs./Quintal)', 'Reported Date'], errors='ignore')
-    model = XGBRegressor(**_best_params)
-    model.fit(X_train, y_train)
-    future_predictions = model.predict(X_future)
-    future_df['Modal Price (Rs./Quintal)'] = future_predictions
-    plot_data(original_df, future_df, last_date, 30)
-    download_button(future_df, key)
-def forecast_next_90_days(df, _best_params, key):
-    last_date = df['Reported Date'].max()
-    future_dates = pd.date_range(start=last_date + pd.Timedelta(days=1), periods=90)
-    future_df = pd.DataFrame({'Reported Date': future_dates})
-    full_df = pd.concat([df, future_df], ignore_index=True)
-    full_df = create_forecasting_features_3m(full_df)
-    original_df = full_df[full_df['Reported Date'] <= last_date]
-    future_df = full_df[full_df['Reported Date'] > last_date]
-    X_train = original_df.drop(columns=['Modal Price (Rs./Quintal)', 'Reported Date'], errors='ignore')
-    y_train = original_df['Modal Price (Rs./Quintal)']
-    X_future = future_df.drop(columns=['Modal Price (Rs./Quintal)', 'Reported Date'], errors='ignore')
-    model = XGBRegressor(**_best_params)
-    model.fit(X_train, y_train)
-    future_predictions = model.predict(X_future)
-    future_df['Modal Price (Rs./Quintal)'] = future_predictions
-    plot_data(original_df, future_df, last_date, 90)
-    download_button(future_df, key)
-def plot_data(original_df, future_df, last_date, days):
-    actual_df = original_df[original_df['Reported Date'] >= (last_date - pd.Timedelta(days=days))].copy()
-    actual_df['Type'] = 'Actual'
-    future_plot_df = future_df[['Reported Date', 'Modal Price (Rs./Quintal)']].copy()
-    future_plot_df['Type'] = 'Forecasted'
-    last_actual_point = actual_df.sort_values('Reported Date').iloc[[-1]].copy()
-    future_plot_df = pd.concat([last_actual_point, future_plot_df])
-    plot_df = pd.concat([actual_df, future_plot_df])
-    fig = go.Figure()
-    for plot_type, color, dash in [('Actual', 'blue', 'solid'), ('Forecasted', 'red', 'dash')]:
-        data = plot_df[plot_df['Type'] == plot_type]
-        fig.add_trace(go.Scatter(
-            x=data['Reported Date'],
-            y=data['Modal Price (Rs./Quintal)'],
-            mode='lines',
-            name=f"{plot_type} Data",
-            line=dict(color=color, dash=dash)
-        ))
-    fig.update_layout(
-        title="Actual vs Forecasted Modal Price (Rs./Quintal)",
-        xaxis_title="Date",
-        yaxis_title="Modal Price (Rs./Quintal)",
-        template="plotly_white"
-    )
-    st.plotly_chart(fig, use_container_width=True)
-def download_button(future_df, key):
-    download_df = future_df[['Reported Date', 'Modal Price (Rs./Quintal)']].copy()
-    download_df['Reported Date'] = download_df['Reported Date'].dt.strftime('%Y-%m-%d')
-    towrite = io.BytesIO()
-    download_df.to_excel(towrite, index=False, engine='xlsxwriter')
-    towrite.seek(0)
-    st.download_button(label="Download Forecasted Values",
-                       data=towrite,
-                       file_name=f'forecasted_prices_{key}.xlsx',
-                       mime='application/vnd.ms-excel')
-def fetch_and_process_data(query_filter, collection):
-    try:
-        cursor = collection.find(query_filter)
-        data = list(cursor)
-        if data:
-            df = pd.DataFrame(data)
-            st.write("Preprocessing data...")
-            df = preprocess_data(df)
-            return df
-        else:
-            st.warning("⚠️ No data found for the selected filter.")
-            return None
-    except Exception as e:
-        st.error(f"❌ Error fetching data 1: {e}")
-        return None
-def save_best_params(collection, filter_key, best_params):
-    best_params["filter_key"] = filter_key
-    best_params["last_updated"] = datetime.now().isoformat()
-    existing_entry = collection.find_one({"filter_key": filter_key})
-    if existing_entry:
-        collection.replace_one({"filter_key": filter_key}, best_params)
-    else:
-        collection.insert_one(best_params)
-def get_best_params(filter_key, collection):
-    record = collection.find_one({"filter_key": filter_key})
-    return record
-def train_and_forecast(df, filter_key, days):
-    if df is not None:
-        if days==14:
-            best_params = train_and_evaluate(df)
-            save_best_params(filter_key, best_params, best_params_collection)
-            forecast_next_14_days(df, best_params, filter_key)
-        elif days==30:
-            best_params = train_and_evaluate_1m(df)
-            save_best_params(filter_key, best_params, best_params_collection_1m)
-            forecast_next_30_days(df, best_params, filter_key)
-        elif days==90:
-            best_params = train_and_evaluate_3m(df)
-            save_best_params(filter_key, best_params, best_params_collection_3m)
-            forecast_next_90_days(df, best_params, filter_key)
-failed_dates_data = []
-failed_dates_market = []
-def forecast(df, filter_key, days):
-    if days==14:
-        record = get_best_params(filter_key, best_params_collection)
-        if record:
-            st.info(f"ℹ️ The model was trained on {record['last_updated']}.")
-            forecast_next_14_days(df, record, filter_key)
-        else:
-            st.warning("⚠️ Model is not trained yet. Please train the model first.")
-    if days==30:
-        record = get_best_params(filter_key, best_params_collection_1m)
-        if record:
-            st.info(f"ℹ️ The model was trained on {record['last_updated']}.")
-            forecast_next_30_days(df, record, filter_key)
-        else:
-            st.warning("⚠️ Model is not trained yet. Please train the model first.")
-    if days==90:
-        record = get_best_params(filter_key, best_params_collection_3m)
-        if record:
-            st.info(f"ℹ️ The model was trained on {record['last_updated']}.")
-            forecast_next_90_days(df, record, filter_key)
-        else:
-            st.warning("⚠️ Model is not trained yet. Please train the model first.")
-def collection_to_dataframe(collection, drop_id=True):
-    """
-    Converts a MongoDB collection to a pandas DataFrame.
-    Args:
-        collection: MongoDB collection object.
-        drop_id (bool): Whether to drop the '_id' column. Default is True.
-    Returns:
-        pd.DataFrame: DataFrame containing the collection data.
-    """
-    documents = list(collection.find())
-    df = pd.DataFrame(documents)
-    if drop_id and '_id' in df.columns:
-        df = df.drop(columns=['_id'])
-    return df
-def editable_spreadsheet():
-    st.title("Sowing Report Prediction Model")
-    uploaded_file = st.file_uploader("Upload your Excel file", type=['xlsx'])
-    if uploaded_file is not None:
-        df_excel = pd.read_excel(uploaded_file)
-        st.write("Excel data loaded:", df_excel)
-        with st.form("input_form"):
-            input_region = st.text_input("Enter Region to Filter By", placeholder="Region Name")
-            input_season = st.text_input("Enter Season to Filter By", placeholder="Season (e.g., Winter)")
-            input_area = st.number_input("Enter Area (in hectares) for Production Calculation", min_value=0.0, format="%.2f")
-            submit_button = st.form_submit_button("Calculate Production")
-        if submit_button:
-            if input_region and input_season and input_area > 0:
-                filtered_df = df_excel[
-                    (df_excel['Region'].str.lower() == input_region.lower()) &
-                    (df_excel['Season'].str.lower() == input_season.lower())
-                ]
-                if not filtered_df.empty:
-                    process_dataframe(filtered_df, input_area)
-                else:
-                    st.error("No data found for the specified region and season.")
-            else:
-                st.error("Please enter valid region, season, and area to proceed.")
-def process_dataframe(df, area):
-    if 'Yield' in df.columns:
-        average_yield = df['Yield'].mean()
-        predicted_production = average_yield * area
-        st.success(f"The predicted Production Volume for the specified region and season is: {predicted_production:.2f} units")
-    else:
-        st.error("The DataFrame does not contain a necessary 'Yield' column for calculation.")
-def display_statistics(df):
-    st.title("📊 National Market Statistics Dashboard")
-    st.markdown("""
-        <style>
-            h1 {
-                color: #2e7d32;
-                font-size: 36px;
-                font-weight: bold;
-            }
-            h3 {
-                color: #388e3c;
-                font-size: 28px;
-                font-weight: 600;
-            }
-            p {
-                font-size: 16px;
-                line-height: 1.6;
-            }
-            .highlight {
-                background-color: #f1f8e9;
-                padding: 10px;
-                border-radius: 8px;
-                font-size: 16px;
-                color: #2e7d32;
-                font-weight: 500;
-            }
-        </style>
-    """, unsafe_allow_html=True)
-    df['Reported Date'] = pd.to_datetime(df['Reported Date'])
-    national_data = df.groupby('Reported Date').agg({
-        'Modal Price (Rs./Quintal)': 'mean',
-        'Arrivals (Tonnes)': 'sum'
-    }).reset_index()
-    st.subheader("🗓️ Key Statistics")
-    latest_date = national_data['Reported Date'].max()
-    latest_price = national_data[national_data['Reported Date'] == latest_date]['Modal Price (Rs./Quintal)'].mean()
-    national_data['Arrivals (Tonnes)'] = pd.to_numeric(national_data['Arrivals (Tonnes)'], errors='coerce')
-    latest_arrivals = national_data[national_data['Reported Date'] == latest_date]['Arrivals (Tonnes)'].sum()
-    st.markdown("<p class='highlight'>This section provides the most recent statistics for the market. It includes the latest available date, the average price of commodities, and the total quantity of goods arriving at the market. These metrics offer an up-to-date snapshot of market conditions.</p>", unsafe_allow_html=True)
-    st.write(f"**Latest Date**: {latest_date.strftime('%Y-%m-%d')}")
-    st.write(f"**Latest Modal Price**: {latest_price:.2f} Rs./Quintal")
-    st.write(f"**Latest Arrivals**: {float(latest_arrivals):.2f} Tonnes")
-    st.subheader("📆 This Day in Previous Years")
-    st.markdown("<p class='highlight'>This table shows the modal price and total arrivals for this exact day across previous years. It provides a historical perspective to compare against current market conditions. This section examines historical data for the same day in previous years. By analyzing trends for this specific day, you can identify seasonal patterns, supply-demand changes, or any deviations that might warrant closer attention.</p>", unsafe_allow_html=True)
-    today = latest_date
-    previous_years_data = national_data[national_data['Reported Date'].dt.dayofyear == today.dayofyear]
-    if not previous_years_data.empty:
-        previous_years_data['Year'] = previous_years_data['Reported Date'].dt.year.astype(str)
-        display_data = (previous_years_data[['Year', 'Modal Price (Rs./Quintal)', 'Arrivals (Tonnes)']]
-                        .sort_values(by='Year', ascending=False)
-                        .reset_index(drop=True))
-        st.table(display_data)
-    else:
-        st.write("No historical data available for this day in previous years.")
-    st.subheader("📅 Monthly Averages Over Years")
-    st.markdown("<p class='highlight'>This section displays the average modal prices and arrivals for each month across all years. It helps identify seasonal trends and peak activity months, which can be crucial for inventory planning and market predictions.</p>", unsafe_allow_html=True)
-    national_data['Month'] = national_data['Reported Date'].dt.month
-    monthly_avg_price = national_data.groupby('Month')['Modal Price (Rs./Quintal)'].mean().reset_index()
-    monthly_avg_arrivals = national_data.groupby('Month')['Arrivals (Tonnes)'].mean().reset_index()
-    monthly_avg = pd.merge(monthly_avg_price, monthly_avg_arrivals, on='Month')
-    monthly_avg['Month'] = monthly_avg['Month'].apply(lambda x: calendar.month_name[x])
-    monthly_avg.columns = ['Month', 'Average Modal Price (Rs./Quintal)', 'Average Arrivals (Tonnes)']
-    st.write(monthly_avg)
-    st.subheader("📆 Yearly Averages")
-    st.markdown("<p class='highlight'>Yearly averages provide insights into long-term trends in pricing and arrivals. By examining these values, you can detect overall growth, stability, or volatility in the market.</p>", unsafe_allow_html=True)
-    national_data['Year'] = national_data['Reported Date'].dt.year
-    yearly_avg_price = national_data.groupby('Year')['Modal Price (Rs./Quintal)'].mean().reset_index()
-    yearly_sum_arrivals = national_data.groupby('Year')['Arrivals (Tonnes)'].sum().reset_index()
-    yearly_avg = pd.merge(yearly_avg_price, yearly_sum_arrivals, on='Year')
-    yearly_avg['Year'] = yearly_avg['Year'].apply(lambda x: f"{int(x)}")
-    yearly_avg.columns = ['Year', 'Average Modal Price (Rs./Quintal)', 'Average Arrivals (Tonnes)']
-    st.write(yearly_avg)
-    st.subheader("📈 Largest Daily Price Changes (Past Year)")
-    st.markdown("<p class='highlight'>This analysis identifies the most significant daily price changes in the past year. These fluctuations can highlight periods of market volatility, potentially caused by external factors like weather, policy changes, or supply chain disruptions.</p>", unsafe_allow_html=True)
-    one_year_ago = latest_date - pd.DateOffset(years=1)
-    recent_data = national_data[national_data['Reported Date'] >= one_year_ago]
-    recent_data['Daily Change (%)'] = recent_data['Modal Price (Rs./Quintal)'].pct_change() * 100
-    largest_changes = recent_data[['Reported Date', 'Modal Price (Rs./Quintal)', 'Daily Change (%)']].nlargest(5, 'Daily Change (%)')
-    largest_changes['Reported Date'] = largest_changes['Reported Date'].dt.date
-    largest_changes = largest_changes.reset_index(drop=True)
-    st.write(largest_changes)
-    st.subheader("🏆 Top 5 Highest and Lowest Prices (Past Year)")
-    st.markdown("<p class='highlight'>This section highlights the highest and lowest prices over the past year. These values reflect the extremes of market dynamics, helping to understand price ceilings and floors in the recent period.</p>", unsafe_allow_html=True)
-    highest_prices = recent_data.nlargest(5, 'Modal Price (Rs./Quintal)')[['Reported Date', 'Modal Price (Rs./Quintal)']]
-    lowest_prices = recent_data.nsmallest(5, 'Modal Price (Rs./Quintal)')[['Reported Date', 'Modal Price (Rs./Quintal)']]
-    highest_prices['Reported Date'] = highest_prices['Reported Date'].dt.date
-    lowest_prices['Reported Date'] = lowest_prices['Reported Date'].dt.date
-    highest_prices = highest_prices.reset_index(drop=True)
-    lowest_prices = lowest_prices.reset_index(drop=True)
-    st.write("**Top 5 Highest Prices**")
-    st.write(highest_prices)
-    st.write("**Top 5 Lowest Prices**")
-    st.write(lowest_prices)
-    st.subheader("🗂️ Data Snapshot")
-    st.markdown("<p class='highlight'>This snapshot provides a concise overview of the latest data, including rolling averages and lagged values. These metrics help identify short-term trends and lagged effects in pricing.</p>", unsafe_allow_html=True)
-    national_data['Rolling Mean (14 Days)'] = national_data['Modal Price (Rs./Quintal)'].rolling(window=14).mean()
-    national_data['Lag (14 Days)'] = national_data['Modal Price (Rs./Quintal)'].shift(14)
-    national_data['Reported Date'] = national_data['Reported Date'].dt.date
-    national_data = national_data.sort_values(by='Reported Date', ascending=False)
-    st.dataframe(national_data.head(14).reset_index(drop=True), use_container_width=True, height=525)
-    editable_spreadsheet()
-def parse_table_with_rowspan(table):
-    data = []
-    rowspan_map = {}
-    rows = table.find_all("tr")
-    for row_index, tr in enumerate(rows):
-        cells = tr.find_all(["td", "th"])
-        row_data = []
-        col_index = 0
-        cell_index = 0
-        while col_index < len(cells) or cell_index in rowspan_map:
-            if cell_index in rowspan_map:
-                cell_info = rowspan_map[cell_index]
-                row_data.append(cell_info["value"])
-                cell_info["rows_left"] -= 1
-                if cell_info["rows_left"] == 0:
-                    del rowspan_map[cell_index]
-                cell_index += 1
-            elif col_index < len(cells):
-                cell = cells[col_index]
-                value = cell.get_text(strip=True)
-                rowspan = int(cell.get("rowspan", 1))
-                row_data.append(value)
-                if rowspan > 1:
-                    rowspan_map[cell_index] = {"value": value, "rows_left": rowspan - 1}
-                col_index += 1
-                cell_index += 1
-        data.append(row_data)
-    return data
-def fetch_and_store_data():
-    SCRAPER_API_KEY = "8842750a88db7513a1d19325745437cc"
-    latest_doc = collection.find_one(sort=[("Reported Date", -1)])
-    from_date = (latest_doc["Reported Date"] + timedelta(days=1)) if latest_doc else datetime(2019, 1, 1)
-    to_date = datetime.now() - timedelta(days=1)
-    print(f"📦 Modal Data → From: {from_date.strftime('%d-%b-%Y')} To: {to_date.strftime('%d-%b-%Y')}")
-    current = from_date.replace(day=1)
-    while current <= to_date:
-        start_of_range = max(current, from_date)
-        end_of_range = (current.replace(day=28) + timedelta(days=4)).replace(day=1) - timedelta(days=1)
-        if end_of_range > to_date:
-            end_of_range = to_date
-        date_from_str = start_of_range.strftime('%d-%b-%Y')
-        date_to_str = end_of_range.strftime('%d-%b-%Y')
-        print(f"\n📅 Fetching data from {date_from_str} to {date_to_str}")
-        target_url = (
-            "https://agmarknet.gov.in/SearchCmmMkt.aspx"
-            f"?Tx_Commodity=11&Tx_State=0&Tx_District=0&Tx_Market=0"
-            f"&DateFrom={date_from_str}&DateTo={date_to_str}"
-            f"&Fr_Date={date_from_str}&To_Date={date_to_str}"
-            "&Tx_Trend=2"
-            "&Tx_CommodityHead=Sesamum(Sesame,Gingelly,Til)"
-            "&Tx_StateHead=--Select--"
-            "&Tx_DistrictHead=--Select--"
-            "&Tx_MarketHead=--Select--"
-        )
-        payload = {
-            "api_key": SCRAPER_API_KEY,
-            "url": target_url
-        }
-        try:
-            response = requests.get("https://api.scraperapi.com/", params=payload)
-            soup = BeautifulSoup(response.text, "html.parser")
-            table = soup.find("table", {"class": "tableagmark_new"})
-            if not table or not table.find_all("tr"):
-                print("❌ No table found.")
-                current = (current + timedelta(days=32)).replace(day=1)
-                continue
-            all_rows = parse_table_with_rowspan(table)
-            headers = all_rows[0]
-            rows = all_rows[1:]
-            df_raw = pd.DataFrame(rows, columns=headers)
-            print(f"🔍 Raw rows fetched: {len(df_raw)}")
-            # Clean invalid state/district/market names
-            required_columns = ["State Name", "District Name", "Market Name"]
-            if all(col in df_raw.columns for col in required_columns):
-                df_raw = df_raw[
-                    (df_raw["State Name"].str.strip() != "-") &
-                    (df_raw["District Name"].str.strip() != "-") &
-                    (df_raw["Market Name"].str.strip() != "-")
-                ]
-                print(f"✅ Rows after filtering: {len(df_raw)}")
-            else:
-                print("⚠️ One or more expected columns are missing. Skipping filter.")
-            # Filter by variety + grade
-            df_raw = df_raw[
-                (df_raw["Variety"].str.strip().str.lower() == "white") &
-                (df_raw["Grade"].str.strip().str.upper() == "FAQ")
-            ]
-            print(f"✅ Filtered rows with 'White' variety and 'FAQ' grade: {len(df_raw)}")
-            # Parse and clean dates
-            df_raw["Reported Date Parsed"] = pd.to_datetime(
-                df_raw["Reported Date"].str.strip(), format='%d %b %Y', errors='coerce'
-            )
-            df_raw = df_raw[df_raw["Reported Date Parsed"].notna()].copy()
-            df_raw["Reported Date"] = df_raw["Reported Date Parsed"]
-            df_raw.drop(columns=["Reported Date Parsed"], inplace=True)
-            # Type conversions
-            df_raw["Modal Price (Rs./Quintal)"] = pd.to_numeric(
-                df_raw["Modal Price (Rs./Quintal)"], errors='coerce'
-            ).round().astype("Int64")
-            df_raw["Arrivals (Tonnes)"] = pd.to_numeric(
-                df_raw["Arrivals (Tonnes)"], errors='coerce'
-            ).astype("float64")
-            df_raw["State Name"] = df_raw["State Name"].astype("string")
-            df_raw["Market Name"] = df_raw["Market Name"].astype("string")
-            # Write cleaned CSV
-            raw_csv_filename = f"clean_raw_modal_data_{start_of_range.strftime('%b_%Y')}.csv"
-            df_raw.to_csv(raw_csv_filename, index=False)
-            print(f"📄 Cleaned raw data CSV written to: {raw_csv_filename}")
-            # Insert to DB
-            records = df_raw.to_dict(orient="records")
-            if records:
-                collection.insert_many(records)
-                print(f"✅ Inserted {len(records)} records for {current.strftime('%b %Y')}")
-            else:
-                print("⚠️ No valid records after final filtering.")
-        except Exception as e:
-            print(f"🔥 Exception during {current.strftime('%b %Y')} fetch: {e}")
-        current = (current + timedelta(days=32)).replace(day=1)
-def fetch_and_store_data_market():
-    SCRAPER_API_KEY = "8842750a88db7513a1d19325745437cc"
-    latest_doc = market_price_data.find_one(sort=[("Reported Date", -1)])
-    from_date = (latest_doc["Reported Date"] + timedelta(days=1)) if latest_doc else datetime(2019, 1, 1)
-    to_date = datetime.now() - timedelta(days=1)
-    print(f"📦 Market Data → From: {from_date.strftime('%d-%b-%Y')} To: {to_date.strftime('%d-%b-%Y')}")
-    current = from_date.replace(day=1)
-    while current <= to_date:
-        start_of_range = max(current, from_date)
-        end_of_range = (current.replace(day=28) + timedelta(days=4)).replace(day=1) - timedelta(days=1)
-        if end_of_range > to_date:
-            end_of_range = to_date
-        date_from_str = start_of_range.strftime('%d-%b-%Y')
-        date_to_str = end_of_range.strftime('%d-%b-%Y')
-        print(f"\n📅 Fetching data from {date_from_str} to {date_to_str}")
-        target_url = (
-            "https://agmarknet.gov.in/SearchCmmMkt.aspx"
-            f"?Tx_Commodity=11&Tx_State=0&Tx_District=0&Tx_Market=0"
-            f"&DateFrom={date_from_str}&DateTo={date_to_str}"
-            f"&Fr_Date={date_from_str}&To_Date={date_to_str}"
-            "&Tx_Trend=0"
-            "&Tx_CommodityHead=Sesamum(Sesame,Gingelly,Til)"
-            "&Tx_StateHead=--Select--"
-            "&Tx_DistrictHead=--Select--"
-            "&Tx_MarketHead=--Select--"
-        )
-        payload = {
-            "api_key": SCRAPER_API_KEY,
-            "url": target_url
-        }
-        try:
-            response = requests.get("https://api.scraperapi.com/", params=payload)
-            soup = BeautifulSoup(response.text, "html.parser")
-            table = soup.find("table", {"class": "tableagmark_new"})
-            if not table or not table.find_all("tr"):
-                print("❌ No table found.")
-                current = (current + timedelta(days=32)).replace(day=1)
-                continue
-            all_rows = parse_table_with_rowspan(table)
-            headers = all_rows[0]
-            rows = all_rows[1:]
-            # ✅ Filter out irrelevant columns based on available data
-            required_columns = ["Sl no.", "District Name", "Market Name", "Commodity", "Variety", "Grade", "Min Price (Rs./Quintal)", "Max Price (Rs./Quintal)", "Modal Price (Rs./Quintal)", "Price Date"]
-            df_raw = pd.DataFrame(rows, columns=headers)
-            # Remove rows with invalid or missing location data
-            df_raw = df_raw[
-                (df_raw["District Name"].str.strip() != "-") &
-                (df_raw["Market Name"].str.strip() != "-")
-            ]
-            print(f"✅ Rows after filtering invalid locations: {len(df_raw)}")
-            # ✅ Filter for variety and grade
-            df_raw = df_raw[
-                (df_raw["Variety"].str.strip().str.lower() == "white") &
-                (df_raw["Grade"].str.strip().str.upper() == "FAQ")
-            ]
-            print(f"✅ Filtered rows with 'White' variety and 'FAQ' grade: {len(df_raw)}")
-            # ✅ Parse 'Price Date' as 'Reported Date'
-            df_raw["Reported Date Parsed"] = pd.to_datetime(
-                df_raw["Price Date"].str.strip(), format='%d %b %Y', errors='coerce'
-            )
-            df_raw = df_raw[df_raw["Reported Date Parsed"].notna()].copy()
-            df_raw["Reported Date"] = df_raw["Reported Date Parsed"]
-            df_raw.drop(columns=["Reported Date Parsed"], inplace=True)
-            # ✅ Type conversions
-            df_raw["Modal Price (Rs./Quintal)"] = pd.to_numeric(
-                df_raw["Modal Price (Rs./Quintal)"], errors='coerce'
-            ).round().astype("Int64")
-            df_raw["Min Price (Rs./Quintal)"] = pd.to_numeric(
-                df_raw["Min Price (Rs./Quintal)"], errors='coerce'
-            ).round().astype("Int64")
-            df_raw["Max Price (Rs./Quintal)"] = pd.to_numeric(
-                df_raw["Max Price (Rs./Quintal)"], errors='coerce'
-            ).round().astype("Int64")
-            df_raw["District Name"] = df_raw["District Name"].astype("string")
-            df_raw["Market Name"] = df_raw["Market Name"].astype("string")
-            # ✅ Save CSV for audit
-            raw_csv_filename = f"clean_raw_market_data_{start_of_range.strftime('%b_%Y')}.csv"
-            df_raw.to_csv(raw_csv_filename, index=False)
-            print(f"📄 CSV saved: {raw_csv_filename}")
-            # ✅ Insert into MongoDB
-            records = df_raw.to_dict(orient="records")
-            if records:
-                market_price_data.insert_many(records)
-                print(f"✅ Inserted {len(records)} records for {current.strftime('%b %Y')}")
-            else:
-                print("⚠️ No valid records after final filtering.")
-        except Exception as e:
-            print(f"🔥 Exception during {current.strftime('%b %Y')} fetch: {e}")
-        current = (current + timedelta(days=32)).replace(day=1)
-def get_dataframe_from_collection(collection):
-    data = list(collection.find())
-    df = pd.DataFrame(data)
-    if "_id" in df.columns:
-        df = df.drop(columns=["_id"])
-    return df
-def authenticate_user(username, password):
-    user = users_collection.find_one({"username": username})
-    if user and check_password_hash(user['password'], password):
-        return True
-    return False
-st.markdown("""
-    <style>
-        /* Main layout adjustments */
-        .main { max-width: 1200px; margin: 0 auto; }
-        /* Header style */
-        h1 {
-            color: #4CAF50;
-            font-family: 'Arial Black', sans-serif;
-        }
-        /* Button Styling */
-        .stButton>button {
-            background-color: #4CAF50;
-            color: white;
-            font-size: 16px;
-            border-radius: 12px;
-            padding: 12px 20px;
-            margin: 10px auto;
-            border: none;
-            cursor: pointer;
-            transition: background-color 0.4s ease, transform 0.3s ease, box-shadow 0.3s ease;
-            box-shadow: 0 4px 6px rgba(0, 0, 0, 0.2);
-        }
-        /* Hover Effects for Button */
-        .stButton>button:hover {
-            background-color: #2196F3; /* Change color on hover */
-            color: #ffffff; /* Ensure text is readable */
-            transform: scale(1.1) rotate(-2deg); /* Slight zoom and tilt */
-            box-shadow: 0 8px 12px rgba(0, 0, 0, 0.3); /* Enhance shadow effect */
-        }
-        /* Animation Effect */
-        .stButton>button:after {
-            content: '';
-            position: absolute;
-            top: 0;
-            left: 0;
-            right: 0;
-            bottom: 0;
-            border-radius: 12px;
-            background: linear-gradient(45deg, #4CAF50, #2196F3, #FFC107, #FF5722);
-            z-index: -1; /* Ensure gradient stays behind the button */
-            opacity: 0;
-            transition: opacity 0.5s ease;
-        }
-        /* Glow Effect on Hover */
-        .stButton>button:hover:after {
-            opacity: 1;
-            animation: glowing 2s infinite alternate;
-        }
-        /* Keyframes for Glow Animation */
-        @keyframes glowing {
-            0% { box-shadow: 0 0 5px #4CAF50, 0 0 10px #4CAF50; }
-            100% { box-shadow: 0 0 20px #2196F3, 0 0 30px #2196F3; }
-        }
-        /* Responsive Design */
-        @media (max-width: 768px) {
-            .stButton>button {
-                width: 100%;
-                font-size: 14px;
-            }
-            h1 {
-                font-size: 24px;
-            }
-        }
-    </style>
-""", unsafe_allow_html=True)
-if 'authenticated' not in st.session_state:
-    st.session_state.authenticated = False
-if st.session_state.get("authenticated", False):
-    st.title("🌾 AgriPredict Dashboard")
-    if st.button("Get Live Data Feed"):
-        st.write("🔄 Fetching fresh data from Modal + Agmarknet...")
-        fetch_and_store_data()
-        fetch_and_store_data_market()
-    view_mode = st.radio("", ["Statistics", "Plots", "Predictions", "Exim"], horizontal=True)
-    if view_mode == "Plots":
-        st.sidebar.header("Filters")
-        selected_period = st.sidebar.selectbox(
-            "Select Time Period",
-            ["2 Weeks", "1 Month", "3 Months", "1 Year", "5 Years"],
-            index=1
-        )
-        period_mapping = {
-            "2 Weeks": 14,
-            "1 Month": 30,
-            "3 Months": 90,
-            "1 Year": 365,
-            "2 Years": 730,
-            "5 Years": 1825
-        }
-        st.session_state["selected_period"] = period_mapping[selected_period]
-        state_options = list(state_market_dict.keys()) + ['India']
-        selected_state = st.sidebar.selectbox("Select State", state_options)
-        market_wise = False
-        query_filter = {}
-        if selected_state != 'India':
-            market_wise = st.sidebar.checkbox("Market Wise Analysis")
-            if market_wise:
-                markets = state_market_dict.get(selected_state, [])
-                st.write(f"✅ Available markets for {selected_state}: {markets}")
-                selected_market = st.sidebar.selectbox("Select Market", markets)
-                query_filter = {"Market Name": selected_market}
-            else:
-                query_filter = {"State Name": selected_state}
-        else:
-            query_filter = {"State Name": {"$exists": True}}
-        query_filter["Reported Date"] = {
-            "$gte": datetime.now() - timedelta(days=st.session_state["selected_period"])
-        }
-        data_type = st.sidebar.radio("Select Data Type", ["Price", "Volume", "Both"])
-        st.write(f"🧪 Final Mongo Query Filter: `{query_filter}`")
-        if st.sidebar.button("✨ Let's go!"):
-            try:
-                df_market_grouped = pd.DataFrame()
-                df_grouped = pd.DataFrame()
-                # MARKET-WISE
-                if "Market Name" in query_filter:
-                    st.info("📊 Market-level data mode enabled")
-                    market_cursor = market_price_data.find(query_filter)
-                    market_data = list(market_cursor)
-                    st.write(f"📄 Market rows fetched: {len(market_data)}")
-                    if market_data:
-                        df_market = pd.DataFrame(market_data)
-                        df_market['Reported Date'] = pd.to_datetime(df_market['Reported Date'], errors='coerce')
-                        df_market["Modal Price (Rs./Quintal)"] = pd.to_numeric(df_market["Modal Price (Rs./Quintal)"], errors='coerce')
-                        df_market_grouped = df_market.groupby('Reported Date', as_index=False).agg({
-                            'Modal Price (Rs./Quintal)': 'mean'
-                        }).dropna()
-                        date_range = pd.date_range(df_market_grouped['Reported Date'].min(), df_market_grouped['Reported Date'].max())
-                        df_market_grouped = df_market_grouped.set_index('Reported Date').reindex(date_range).rename_axis('Reported Date').reset_index()
-                        df_market_grouped['Modal Price (Rs./Quintal)'] = df_market_grouped['Modal Price (Rs./Quintal)'].fillna(method='ffill').fillna(method='bfill')
-                # STATE/NATIONAL-WISE
-                st.info("📥 Fetching state-level or national data...")
-                cursor = collection.find(query_filter)
-                data = list(cursor)
-                st.write(f"📄 Total rows fetched from collection: {len(data)}")
-                if data:
-                    df = pd.DataFrame(data)
-                    df['Reported Date'] = pd.to_datetime(df['Reported Date'], errors='coerce')
-                    df['Arrivals (Tonnes)'] = pd.to_numeric(df['Arrivals (Tonnes)'], errors='coerce')
-                    df['Modal Price (Rs./Quintal)'] = pd.to_numeric(df['Modal Price (Rs./Quintal)'], errors='coerce')
-                    df_grouped = df.groupby('Reported Date', as_index=False).agg({
-                        'Arrivals (Tonnes)': 'sum',
-                        'Modal Price (Rs./Quintal)': 'mean'
-                    }).dropna()
-                    date_range = pd.date_range(df_grouped['Reported Date'].min(), df_grouped['Reported Date'].max())
-                    df_grouped = df_grouped.set_index('Reported Date').reindex(date_range).rename_axis('Reported Date').reset_index()
-                    df_grouped['Arrivals (Tonnes)'] = df_grouped['Arrivals (Tonnes)'].fillna(method='ffill').fillna(method='bfill')
-                    df_grouped['Modal Price (Rs./Quintal)'] = df_grouped['Modal Price (Rs./Quintal)'].fillna(method='ffill').fillna(method='bfill')
-                    st.subheader(f"📈 Trends for {selected_state} ({'Market: ' + selected_market if market_wise else 'State-wide'})")
-                    fig = go.Figure()
-                    if data_type == "Both":
-                        scaler = MinMaxScaler()
-                        df_grouped[['Scaled Price', 'Scaled Arrivals']] = scaler.fit_transform(
-                            df_grouped[['Modal Price (Rs./Quintal)', 'Arrivals (Tonnes)']]
-                        )
-                        fig.add_trace(go.Scatter(
-                            x=df_grouped['Reported Date'],
-                            y=df_grouped['Scaled Price'],
-                            mode='lines',
-                            name='Scaled Modal Price',
-                            line=dict(color='green'),
-                        ))
-                        fig.add_trace(go.Scatter(
-                            x=df_grouped['Reported Date'],
-                            y=df_grouped['Scaled Arrivals'],
-                            mode='lines',
-                            name='Scaled Arrivals',
-                            line=dict(color='blue'),
-                        ))
-                    elif data_type == "Price":
-                        price_df = df_market_grouped if not df_market_grouped.empty else df_grouped
-                        fig.add_trace(go.Scatter(
-                            x=price_df['Reported Date'],
-                            y=price_df["Modal Price (Rs./Quintal)"],
-                            mode='lines',
-                            name='Modal Price',
-                            line=dict(color='green'),
-                        ))
-                    elif data_type == "Volume":
-                        fig.add_trace(go.Scatter(
-                            x=df_grouped['Reported Date'],
-                            y=df_grouped['Arrivals (Tonnes)'],
-                            mode='lines',
-                            name='Arrivals',
-                            line=dict(color='blue'),
-                        ))
-                    fig.update_layout(
-                        title="📊 Agricultural Trends",
-                        xaxis_title="Date",
-                        yaxis_title="Value (Scaled if Both)",
-                        template="plotly_white"
-                    )
-                    st.plotly_chart(fig, use_container_width=True)
-                else:
-                    st.warning("⚠️ No data found for the selected filter range and region.")
-            except Exception as e:
-                st.error(f"❌ Error fetching data 2: {e}")
-                st.exception(e)
-    elif view_mode == "Predictions":
-        st.subheader("📊 Model Analysis")
-        sub_option = st.radio("Select one of the following", ["India", "States", "Market"], horizontal=True)
-        sub_timeline = st.radio("Select one of the following horizons", ["14 days", "1 month", "3 month"], horizontal=True)
-        if sub_option == "States":
-            states = ["Karnataka", "Madhya Pradesh", "Gujarat", "Uttar Pradesh", "Telangana"]
-            selected_state = st.selectbox("Select State for Model Training", states)
-            filter_key = f"state_{selected_state}"
-            if st.button("Forecast"):
-                query_filter = {"State Name": selected_state}
-                df = fetch_and_process_data(query_filter, collection)
-                if sub_timeline == "14 days":
-                    forecast(df, filter_key, 14)
-                elif sub_timeline == "1 month":
-                    forecast(df, filter_key, 30)
-                else:
-                    forecast(df, filter_key, 90)
-        elif sub_option == "Market":
-            market_options = ["Rajkot", "Neemuch", "Kalburgi", "Warangal"]
-            selected_market = st.selectbox("Select Market for Model Training", market_options)
-            filter_key = f"market_{selected_market}"
-            if st.button("Forecast"):
-                query_filter = {"Market Name": selected_market}
-                comparison_date = pd.to_datetime("18 Feb 2025")
-                df = fetch_and_process_data(query_filter, market_price_data)
-                st.write(df[df["Reported Date"]>comparison_date])
-                if sub_timeline == "14 days":
-                    forecast(df, filter_key, 14)
-                elif sub_timeline == "1 month":
-                    forecast(df, filter_key, 30)
-                else:
-                    forecast(df, filter_key, 90)
-        elif sub_option == "India":
-            df = collection_to_dataframe(impExp)
-            if True:
-                if st.button("Forecast"):
-                    query_filter = {}
-                    df = fetch_and_process_data(query_filter, collection)
-                    if sub_timeline == "14 days":
-                        forecast(df, "India", 14)
-                    elif sub_timeline == "1 month":
-                        forecast(df, "India", 30)
-                    else:
-                        forecast(df, "India", 90)
-    elif view_mode=="Statistics":
-        document = collection.find_one()
-        df = get_dataframe_from_collection(collection)
-        display_statistics(df)
-    elif view_mode == "Exim":
-        df = collection_to_dataframe(impExp)
-        plot_option = st.radio(
-            "Select the data to visualize:",
-            ["Import Price", "Import Quantity", "Export Price", "Export Quantity"],
-            horizontal=True
-        )
-        time_period = st.selectbox(
-            "Select time period:",
-            ["1 Month", "6 Months", "1 Year", "2 Years"]
-        )
-        df["Reported Date"] = pd.to_datetime(df["Reported Date"], format="%Y-%m-%d")
-        if time_period == "1 Month":
-            start_date = pd.Timestamp.now() - pd.DateOffset(months=1)
-        elif time_period == "6 Months":
-            start_date = pd.Timestamp.now() - pd.DateOffset(months=6)
-        elif time_period == "1 Year":
-            start_date = pd.Timestamp.now() - pd.DateOffset(years=1)
-        elif time_period == "2 Years":
-            start_date = pd.Timestamp.now() - pd.DateOffset(years=2)
-        filtered_df = df[df["Reported Date"] >= start_date]
-        if plot_option == "Import Price":
-            grouped_df = (
-                filtered_df.groupby("Reported Date", as_index=False)["VALUE_IMPORT"]
-                .mean()
-                .rename(columns={"VALUE_IMPORT": "Average Import Price"})
-            )
-            y_axis_label = "Average Import Price (Rs.)"
-        elif plot_option == "Import Quantity":
-            grouped_df = (
-                filtered_df.groupby("Reported Date", as_index=False)["QUANTITY_IMPORT"]
-                .sum()
-                .rename(columns={"QUANTITY_IMPORT": "Total Import Quantity"})
-            )
-            y_axis_label = "Total Import Quantity (Tonnes)"
-        elif plot_option == "Export Price":
-            grouped_df = (
-                filtered_df.groupby("Reported Date", as_index=False)["VALUE_EXPORT"]
-                .mean()
-                .rename(columns={"VALUE_EXPORT": "Average Export Price"})
-            )
-            y_axis_label = "Average Export Price (Rs.)"
-        elif plot_option == "Export Quantity":
-            grouped_df = (
-                filtered_df.groupby("Reported Date", as_index=False)["QUANTITY_IMPORT"]
-                .sum()
-                .rename(columns={"QUANTITY_IMPORT": "Total Export Quantity"})
-            )
-            y_axis_label = "Total Export Quantity (Tonnes)"
-        fig = px.line(
-            grouped_df,
-            x="Reported Date",
-            y=grouped_df.columns[1],
-            title=f"{plot_option} Over Time",
-            labels={"Reported Date": "Date", grouped_df.columns[1]: y_axis_label},
-        )
-        st.plotly_chart(fig)
-else:
-    with st.form("login_form"):
-        st.subheader("Please log in")
-        username = st.text_input("Username")
-        password = st.text_input("Password", type="password")
-        login_button = st.form_submit_button("Login")
-        if login_button:
-            if authenticate_user(username, password):
-                st.session_state.authenticated = True
-                st.session_state['username'] = username
-                st.write("Login successful!")
-                st.rerun()
-            else:
-                st.error("Invalid username or password")

src/agri_predict/__init__.py ADDED Viewed

	@@ -0,0 +1,41 @@

+"""AgriPredict package entrypoint.
+Expose commonly used helpers for the Streamlit app.
+"""
+from .config import get_collections
+from .features import (
+    create_forecasting_features,
+    create_forecasting_features_1m,
+    create_forecasting_features_3m,
+)
+from .data import (
+    preprocess_data,
+    fetch_and_process_data,
+    fetch_and_store_data,
+    get_dataframe_from_collection,
+    collection_to_dataframe,
+)
+from .models import (
+    train_and_evaluate,
+    train_and_evaluate_1m,
+    train_and_evaluate_3m,
+    train_and_forecast,
+    forecast,
+    forecast_next_14_days,
+    forecast_next_30_days,
+    forecast_next_90_days,
+)
+from .plotting import plot_data, download_button, display_statistics
+from .utils import (
+    save_best_params,
+    get_best_params,
+    authenticate_user,
+    collection_to_dataframe as utils_collection_to_dataframe,
+)
+from .scraper import api_client, AgmarknetAPIClient
+__all__ = [
+    "get_collections",
+    "api_client",
+    "AgmarknetAPIClient",
+]

src/agri_predict/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (1.31 kB). View file

src/agri_predict/__pycache__/config.cpython-312.pyc ADDED Viewed

Binary file (1.28 kB). View file

src/agri_predict/__pycache__/constants.cpython-312.pyc ADDED Viewed

Binary file (1.36 kB). View file

src/agri_predict/__pycache__/data.cpython-312.pyc ADDED Viewed

Binary file (5.29 kB). View file

src/agri_predict/__pycache__/features.cpython-312.pyc ADDED Viewed

Binary file (13 kB). View file

src/agri_predict/__pycache__/models.cpython-312.pyc ADDED Viewed

Binary file (11.6 kB). View file

src/agri_predict/__pycache__/plotting.cpython-312.pyc ADDED Viewed

Binary file (15.3 kB). View file

src/agri_predict/__pycache__/scraper.cpython-312.pyc ADDED Viewed

Binary file (12.5 kB). View file

src/agri_predict/__pycache__/utils.cpython-312.pyc ADDED Viewed

Binary file (2.18 kB). View file

src/agri_predict/config.py ADDED Viewed

	@@ -0,0 +1,29 @@

+import os
+from dotenv import load_dotenv
+from pymongo import MongoClient
+import certifi
+from datetime import datetime
+load_dotenv()
+def get_collections():
+    """Return a dict with MongoDB collections used by the app.
+    Loads `MONGO_URI` from environment and connects with TLS CA validation.
+    Raises RuntimeError if `MONGO_URI` is missing.
+    """
+    mongo_uri = os.getenv("MONGO_URI")
+    if not mongo_uri:
+        raise RuntimeError("MONGO_URI is not set in the environment")
+    client = MongoClient(mongo_uri, tlsCAFile=certifi.where())
+    db = client["AgriPredict"]
+    return {
+        "collection": db["WhiteSesame"],
+        "best_params_collection": db["BestParams"],
+        "best_params_collection_1m": db["BestParams_1m"],
+        "best_params_collection_3m": db["BestParams_3m"],
+        "impExp": db["impExp"],
+        "users_collection": db["user"],
+    }

src/agri_predict/constants.py ADDED Viewed

	@@ -0,0 +1,118 @@

+state_market_dict = {
+    "Karnataka": [
+        "Kalburgi",
+        "Basava Kalayana",
+        "Lingasugur",
+        "Kustagi",
+        "Bangalore",
+        "Bagalakot",
+        "Hubli (Amaragol)"
+    ],
+    "Gujarat": [
+        "Siddhpur",
+        "Jasdan",
+        "Gondal",
+        "Morbi",
+        "Botad",
+        "Visavadar",
+        "Dahod",
+        "Rajkot",
+        "Junagadh",
+        "Savarkundla",
+        "Bhavnagar",
+        "Rajula",
+        "Dhoraji",
+        "Amreli",
+        "Mahuva(Station Road)",
+        "Mansa",
+        "Porbandar",
+        "Dasada Patadi",
+        "Halvad",
+        "Chotila",
+        "Bhanvad",
+        "Dhansura",
+        "Babra",
+        "Upleta",
+        "Palitana",
+        "Jetpur(Dist.Rajkot)",
+        "S.Mandvi",
+        "Mandvi",
+        "Khambha",
+        "Kadi",
+        "Taleja",
+        "Himatnagar",
+        "Lakhani",
+        "Rapar",
+        "Una",
+        "Dhari",
+        "Bagasara",
+        "Jam Jodhpur",
+        "Veraval",
+        "Dhragradhra",
+        "Deesa"
+    ],
+    "Uttar Pradesh": [
+        "Bangarmau",
+        "Sultanpur",
+        "Maudaha",
+        "Mauranipur",
+        "Lalitpur",
+        "Konch",
+        "Muskara",
+        "Raath",
+        "Varipaal",
+        "Auraiya",
+        "Orai",
+        "Banda",
+        "Kishunpur",
+        "Ait",
+        "Jhansi",
+        "Kurara",
+        "Chirgaon",
+        "Charkhari",
+        "Moth",
+        "Jalaun",
+        "Sirsaganj",
+        "Shikohabad"
+    ],
+    "Madhya Pradesh": [
+        "Naugaon",
+        "Mehar",
+        "Kailaras",
+        "Datia",
+        "LavKush Nagar(Laundi)",
+        "Ajaygarh",
+        "Rajnagar",
+        "Sevda",
+        "Neemuch",
+        "Sheopurkalan",
+        "Lashkar",
+        "Alampur",
+        "Niwadi",
+        "Dabra",
+        "Ujjain",
+        "Bijawar",
+        "Sidhi",
+        "Barad",
+        "Pohari",
+        "Shahagarh",
+        "Lateri",
+        "Banapura",
+        "Panna",
+        "Garhakota",
+        "Katni",
+        "Chhatarpur",
+        "Beohari",
+        "Satna",
+        "Sabalgarh",
+        "Hanumana",
+        "Bhander",
+        "Banmorkalan",
+        "Jaora",
+        "Bagli",
+        "Singroli"
+    ],
+    "Telangana": [
+        "Warangal"
+    ]
+}

src/agri_predict/data.py ADDED Viewed

	@@ -0,0 +1,112 @@

+import pandas as pd
+from datetime import datetime, timedelta
+from .config import get_collections
+from .scraper import api_client
+def preprocess_data(df: pd.DataFrame) -> pd.DataFrame:
+    df = df[['Reported Date', 'Modal Price (Rs./Quintal)']].copy()
+    df['Reported Date'] = pd.to_datetime(df['Reported Date'])
+    df = df.groupby('Reported Date', as_index=False).mean()
+    full_date_range = pd.date_range(df['Reported Date'].min(), df['Reported Date'].max())
+    df = df.set_index('Reported Date').reindex(full_date_range).rename_axis('Reported Date').reset_index()
+    df['Modal Price (Rs./Quintal)'] = df['Modal Price (Rs./Quintal)'].ffill().bfill()
+    return df
+def fetch_and_process_data(query_filter: dict):
+    cols = get_collections()
+    collection = cols['collection']
+    try:
+        cursor = collection.find(query_filter)
+        data = list(cursor)
+        if data:
+            df = pd.DataFrame(data)
+            df = preprocess_data(df)
+            return df
+        return None
+    except Exception:
+        return None
+def fetch_and_store_data():
+    """Fetch new data from Agmarknet API and store in MongoDB.
+    Fetches data from the day after the latest date in the database
+    until yesterday. Uses the Agmarknet API client.
+    Returns:
+        pd.DataFrame: The fetched and stored data, or None if no data
+    """
+    cols = get_collections()
+    collection = cols['collection']
+    latest_doc = collection.find_one(sort=[("Reported Date", -1)])
+    latest_date = latest_doc["Reported Date"] if latest_doc and "Reported Date" in latest_doc else None
+    # Calculate date range
+    if latest_date:
+        from_date = latest_date + timedelta(days=1)
+    else:
+        from_date = datetime(2000, 1, 1)
+    to_date = datetime.now() - timedelta(days=1)
+    # Format dates for API (YYYY-MM-DD)
+    from_date_str = from_date.strftime('%Y-%m-%d')
+    to_date_str = to_date.strftime('%Y-%m-%d')
+    # Fetch data using API client
+    responses = api_client.fetch_date_range(from_date_str, to_date_str)
+    if not responses:
+        return None
+    # Parse responses to DataFrame
+    df = api_client.parse_multiple_responses_to_dataframe(responses)
+    if df.empty:
+        return None
+    # Filter for White variety only
+    df = df[df['Variety'] == "White"]
+    # Ensure proper data types for MongoDB
+    df["Reported Date"] = pd.to_datetime(df["Reported Date"])
+    # Convert numeric fields with proper type handling
+    df["Modal Price (Rs./Quintal)"] = pd.to_numeric(df["Modal Price (Rs./Quintal)"], errors='coerce').astype('Int64')
+    df["Min Price (Rs./Quintal)"] = pd.to_numeric(df["Min Price (Rs./Quintal)"], errors='coerce').astype(str)
+    df["Max Price (Rs./Quintal)"] = pd.to_numeric(df["Max Price (Rs./Quintal)"], errors='coerce').astype(str)
+    df["Arrivals (Tonnes)"] = pd.to_numeric(df["Arrivals (Tonnes)"], errors='coerce').astype(float)
+    # Sort by date
+    df.sort_values(by="Reported Date", inplace=True)
+    # Insert into MongoDB with proper format
+    for _, row in df.iterrows():
+        doc = row.to_dict()
+        # Ensure NaN values are handled properly
+        for key in doc:
+            if pd.isna(doc[key]):
+                doc[key] = None
+        collection.insert_one(doc)
+    return df
+def get_dataframe_from_collection(collection):
+    data = list(collection.find())
+    df = pd.DataFrame(data)
+    if '_id' in df.columns:
+        df = df.drop(columns=['_id'])
+    return df
+def collection_to_dataframe(collection, drop_id=True):
+    documents = list(collection.find())
+    df = pd.DataFrame(documents)
+    if drop_id and '_id' in df.columns:
+        df = df.drop(columns=['_id'])
+    return df

src/agri_predict/features.py ADDED Viewed

	@@ -0,0 +1,135 @@

+import pandas as pd
+import numpy as np
+def _ensure_datetime_index(df):
+    df = df.copy()
+    if not isinstance(df.index, pd.DatetimeIndex):
+        df = df.set_index('Reported Date')
+        df.index = pd.to_datetime(df.index)
+    return df
+def create_forecasting_features(df):
+    df = _ensure_datetime_index(df)
+    target_map = df['Modal Price (Rs./Quintal)'].to_dict()
+    df['dayofweek'] = df.index.dayofweek
+    df['quarter'] = df.index.quarter
+    df['month'] = df.index.month
+    df['year'] = df.index.year
+    df['dayofyear'] = df.index.dayofyear
+    df['weekofyear'] = df.index.isocalendar().week
+    df['lag14'] = (df.index - pd.Timedelta(days=14)).map(target_map)
+    df['lag28'] = (df.index - pd.Timedelta(days=28)).map(target_map)
+    df['lag56'] = (df.index - pd.Timedelta(days=56)).map(target_map)
+    df['lag_3months'] = (df.index - pd.DateOffset(months=3)).map(target_map)
+    df['lag_6months'] = (df.index - pd.DateOffset(months=6)).map(target_map)
+    for window in [7, 14, 28]:
+        df[f'rolling_mean_{window}'] = df['Modal Price (Rs./Quintal)'].rolling(window=window, min_periods=1).mean()
+        df[f'rolling_std_{window}'] = df['Modal Price (Rs./Quintal)'].rolling(window=window, min_periods=1).std()
+    df['ema7'] = df['Modal Price (Rs./Quintal)'].ewm(span=7, adjust=False).mean()
+    df['ema14'] = df['Modal Price (Rs./Quintal)'].ewm(span=14, adjust=False).mean()
+    df['monthly_avg'] = df.groupby('month')['Modal Price (Rs./Quintal)'].transform('mean')
+    df['weekly_avg'] = df.groupby('weekofyear')['Modal Price (Rs./Quintal)'].transform('mean')
+    df['dayofweek_avg'] = df.groupby('dayofweek')['Modal Price (Rs./Quintal)'].transform('mean')
+    df['fourier_sin_365'] = np.sin(2 * np.pi * df.index.dayofyear / 365)
+    df['fourier_cos_365'] = np.cos(2 * np.pi * df.index.dayofyear / 365)
+    df['fourier_sin_14'] = np.sin(2 * np.pi * df.index.dayofyear / 14)
+    df['fourier_cos_14'] = np.cos(2 * np.pi * df.index.dayofyear / 14)
+    df['recent_min_14'] = (df.index - pd.Timedelta(days=14)).map(target_map).min()
+    df['recent_max_14'] = (df.index - pd.Timedelta(days=14)).map(target_map).max()
+    df['recent_range_14'] = df['recent_max_14'] - df['recent_min_14']
+    df['yearly_avg'] = df.groupby('year')['Modal Price (Rs./Quintal)'].transform('mean')
+    df['cumulative_mean'] = df['Modal Price (Rs./Quintal)'].expanding().mean()
+    return df.reset_index()
+def create_forecasting_features_1m(df):
+    df = _ensure_datetime_index(df)
+    target_map = df['Modal Price (Rs./Quintal)'].to_dict()
+    df['dayofweek'] = df.index.dayofweek
+    df['quarter'] = df.index.quarter
+    df['month'] = df.index.month
+    df['year'] = df.index.year
+    df['dayofyear'] = df.index.dayofyear
+    df['weekofyear'] = df.index.isocalendar().week
+    df['lag_30'] = (df.index - pd.Timedelta(days=30)).map(target_map)
+    df['lag_60'] = (df.index - pd.Timedelta(days=60)).map(target_map)
+    df['lag_90'] = (df.index - pd.Timedelta(days=90)).map(target_map)
+    df['lag_6months'] = (df.index - pd.DateOffset(months=6)).map(target_map)
+    df['lag_12months'] = (df.index - pd.DateOffset(months=12)).map(target_map)
+    for window in [30, 60, 90]:
+        df[f'rolling_mean_{window}'] = df['Modal Price (Rs./Quintal)'].rolling(window=window, min_periods=1).mean()
+        df[f'rolling_std_{window}'] = df['Modal Price (Rs./Quintal)'].rolling(window=window, min_periods=1).std()
+    df['ema_30'] = df['Modal Price (Rs./Quintal)'].ewm(span=30, adjust=False).mean()
+    df['ema_60'] = df['Modal Price (Rs./Quintal)'].ewm(span=60, adjust=False).mean()
+    df['monthly_avg'] = df.groupby('month')['Modal Price (Rs./Quintal)'].transform('mean')
+    df['weekly_avg'] = df.groupby('weekofyear')['Modal Price (Rs./Quintal)'].transform('mean')
+    df['dayofweek_avg'] = df.groupby('dayofweek')['Modal Price (Rs./Quintal)'].transform('mean')
+    df['fourier_sin_365'] = np.sin(2 * np.pi * df.index.dayofyear / 365)
+    df['fourier_cos_365'] = np.cos(2 * np.pi * df.index.dayofyear / 365)
+    df['fourier_sin_30'] = np.sin(2 * np.pi * df.index.dayofyear / 30)
+    df['fourier_cos_30'] = np.cos(2 * np.pi * df.index.dayofyear / 30)
+    df['recent_min_30'] = (df.index - pd.Timedelta(days=30)).map(target_map).min()
+    df['recent_max_30'] = (df.index - pd.Timedelta(days=30)).map(target_map).max()
+    df['recent_range_30'] = df['recent_max_30'] - df['recent_min_30']
+    df['yearly_avg'] = df.groupby('year')['Modal Price (Rs./Quintal)'].transform('mean')
+    df['cumulative_mean'] = df['Modal Price (Rs./Quintal)'].expanding().mean()
+    return df.reset_index()
+def create_forecasting_features_3m(df):
+    df = _ensure_datetime_index(df)
+    target_map = df['Modal Price (Rs./Quintal)'].to_dict()
+    df['dayofweek'] = df.index.dayofweek
+    df['quarter'] = df.index.quarter
+    df['month'] = df.index.month
+    df['year'] = df.index.year
+    df['dayofyear'] = df.index.dayofyear
+    df['weekofyear'] = df.index.isocalendar().week
+    df['lag_3months'] = (df.index - pd.DateOffset(months=3)).map(target_map)
+    df['lag_6months'] = (df.index - pd.DateOffset(months=6)).map(target_map)
+    df['lag_9months'] = (df.index - pd.DateOffset(months=9)).map(target_map)
+    df['lag_12months'] = (df.index - pd.DateOffset(months=12)).map(target_map)
+    for window in [90, 180, 270, 365]:
+        df[f'rolling_mean_{window}'] = df['Modal Price (Rs./Quintal)'].rolling(window=window, min_periods=1).mean()
+        df[f'rolling_std_{window}'] = df['Modal Price (Rs./Quintal)'].rolling(window=window, min_periods=1).std()
+    df['ema90'] = df['Modal Price (Rs./Quintal)'].ewm(span=90, adjust=False).mean()
+    df['ema180'] = df['Modal Price (Rs./Quintal)'].ewm(span=180, adjust=False).mean()
+    df['monthly_avg'] = df.groupby('month')['Modal Price (Rs./Quintal)'].transform('mean')
+    df['weekly_avg'] = df.groupby('weekofyear')['Modal Price (Rs./Quintal)'].transform('mean')
+    df['dayofweek_avg'] = df.groupby('dayofweek')['Modal Price (Rs./Quintal)'].transform('mean')
+    df['fourier_sin_90'] = np.sin(2 * np.pi * df.index.dayofyear / 90)
+    df['fourier_cos_90'] = np.cos(2 * np.pi * df.index.dayofyear / 90)
+    df['fourier_sin_30'] = np.sin(2 * np.pi * df.index.dayofyear / 30)
+    df['fourier_cos_30'] = np.cos(2 * np.pi * df.index.dayofyear / 30)
+    df['recent_min_90'] = (df.index - pd.Timedelta(days=90)).map(target_map).min()
+    df['recent_max_90'] = (df.index - pd.Timedelta(days=90)).map(target_map).max()
+    df['recent_range_90'] = df['recent_max_90'] - df['recent_min_90']
+    df['yearly_avg'] = df.groupby('year')['Modal Price (Rs./Quintal)'].transform('mean')
+    df['cumulative_mean'] = df['Modal Price (Rs./Quintal)'].expanding().mean()
+    return df.reset_index()

src/agri_predict/models.py ADDED Viewed

	@@ -0,0 +1,221 @@

+import streamlit as st
+import pandas as pd
+from xgboost import XGBRegressor
+from sklearn.metrics import mean_squared_error, mean_absolute_error
+from sklearn.preprocessing import MinMaxScaler
+from .features import (
+    create_forecasting_features,
+    create_forecasting_features_1m,
+    create_forecasting_features_3m,
+)
+from .plotting import plot_data, download_button
+from .config import get_collections
+def _custom_grid_search(X_train, y_train, X_test, y_test, param_grid, progress_bar):
+    model = XGBRegressor()
+    param_combinations = len(param_grid['learning_rate']) * len(param_grid['max_depth']) * \
+                         len(param_grid['n_estimators']) * len(param_grid['booster'])
+    current_combination = 0
+    best_score = float('-inf')
+    best_params = None
+    for learning_rate in param_grid['learning_rate']:
+        for max_depth in param_grid['max_depth']:
+            for n_estimators in param_grid['n_estimators']:
+                for booster in param_grid['booster']:
+                    model.set_params(
+                        learning_rate=learning_rate,
+                        max_depth=max_depth,
+                        n_estimators=n_estimators,
+                        booster=booster
+                    )
+                    model.fit(X_train, y_train)
+                    score = model.score(X_test, y_test)
+                    if score > best_score:
+                        best_score = score
+                        best_params = {
+                            'learning_rate': learning_rate,
+                            'max_depth': max_depth,
+                            'n_estimators': n_estimators,
+                            'booster': booster
+                        }
+                    current_combination += 1
+                    progress_bar.progress(int((current_combination / param_combinations) * 100))
+    return best_params
+def _train_and_evaluate_generic(df, feature_fn, split_date, progress_bar):
+    df = feature_fn(df)
+    train_df = df[df['Reported Date'] < split_date]
+    test_df = df[df['Reported Date'] >= split_date]
+    X_train = train_df.drop(columns=['Modal Price (Rs./Quintal)', 'Reported Date'])
+    y_train = train_df['Modal Price (Rs./Quintal)']
+    X_test = test_df.drop(columns=['Modal Price (Rs./Quintal)', 'Reported Date'])
+    y_test = test_df['Modal Price (Rs./Quintal)']
+    param_grid = {
+        'learning_rate': [0.01, 0.1, 0.2],
+        'max_depth': [3, 5, 7],
+        'n_estimators': [50, 100, 150],
+        'booster': ['gbtree', 'dart']
+    }
+    st.write("Performing hyperparameter tuning...")
+    best_params = _custom_grid_search(X_train, y_train, X_test, y_test, param_grid, progress_bar)
+    st.write("Training the best model and making predictions...")
+    best_model = XGBRegressor(**best_params)
+    best_model.fit(X_train, y_train)
+    y_pred = best_model.predict(X_test)
+    rmse = mean_squared_error(y_test, y_pred)
+    mae = mean_absolute_error(y_test, y_pred)
+    st.write(f"RMSE: {rmse}")
+    st.write(f"MAE: {mae}")
+    # Prepare plot data
+    train_plot_df = train_df[['Reported Date', 'Modal Price (Rs./Quintal)']].copy()
+    train_plot_df['Type'] = 'Train'
+    test_plot_df = test_df[['Reported Date', 'Modal Price (Rs./Quintal)']].copy()
+    test_plot_df['Type'] = 'Test'
+    predicted_plot_df = test_df[['Reported Date']].copy()
+    predicted_plot_df['Modal Price (Rs./Quintal)'] = y_pred
+    predicted_plot_df['Type'] = 'Predicted'
+    plot_df = pd.concat([train_plot_df, test_plot_df, predicted_plot_df])
+    import plotly.graph_objects as go
+    fig = go.Figure()
+    for plot_type, color, dash in [('Train', 'blue', None), ('Test', 'orange', None), ('Predicted', 'green', 'dot')]:
+        data = plot_df[plot_df['Type'] == plot_type]
+        fig.add_trace(go.Scatter(
+            x=data['Reported Date'],
+            y=data['Modal Price (Rs./Quintal)'],
+            mode='lines',
+            name=f"{plot_type} Data",
+            line=dict(color=color, dash=dash)
+        ))
+    fig.update_layout(title="Train, Test, and Predicted Data", xaxis_title="Date", yaxis_title="Modal Price (Rs./Quintal)", template="plotly_white")
+    st.plotly_chart(fig, width='stretch')
+    return best_params
+def train_and_evaluate(df):
+    progress_bar = st.progress(0)
+    return _train_and_evaluate_generic(df, create_forecasting_features, '2024-01-01', progress_bar)
+def train_and_evaluate_1m(df):
+    progress_bar = st.progress(0)
+    return _train_and_evaluate_generic(df, create_forecasting_features_1m, pd.to_datetime('2023-01-01'), progress_bar)
+def train_and_evaluate_3m(df):
+    progress_bar = st.progress(0)
+    return _train_and_evaluate_generic(df, create_forecasting_features_3m, pd.to_datetime('2023-01-01'), progress_bar)
+def forecast_next_14_days(df, _best_params, key):
+    last_date = df['Reported Date'].max()
+    future_dates = pd.date_range(start=last_date + pd.Timedelta(days=1), periods=14)
+    future_df = pd.DataFrame({'Reported Date': future_dates})
+    full_df = pd.concat([df, future_df], ignore_index=True)
+    full_df = create_forecasting_features(full_df)
+    original_df = full_df[full_df['Reported Date'] <= last_date].copy()
+    future_df = full_df[full_df['Reported Date'] > last_date].copy()
+    X_train = original_df.drop(columns=['Modal Price (Rs./Quintal)', 'Reported Date'], errors='ignore')
+    y_train = original_df['Modal Price (Rs./Quintal)']
+    X_future = future_df.drop(columns=['Modal Price (Rs./Quintal)', 'Reported Date'], errors='ignore')
+    model = XGBRegressor(**_best_params)
+    model.fit(X_train, y_train)
+    future_predictions = model.predict(X_future)
+    future_df['Modal Price (Rs./Quintal)'] = future_predictions
+    plot_data(original_df, future_df, last_date, model, 14)
+    download_button(future_df, key)
+def forecast_next_30_days(df, _best_params, key):
+    last_date = df['Reported Date'].max()
+    future_dates = pd.date_range(start=last_date + pd.Timedelta(days=1), periods=30)
+    future_df = pd.DataFrame({'Reported Date': future_dates})
+    full_df = pd.concat([df, future_df], ignore_index=True)
+    full_df = create_forecasting_features_1m(full_df)
+    original_df = full_df[full_df['Reported Date'] <= last_date].copy()
+    future_df = full_df[full_df['Reported Date'] > last_date].copy()
+    X_train = original_df.drop(columns=['Modal Price (Rs./Quintal)', 'Reported Date'], errors='ignore')
+    y_train = original_df['Modal Price (Rs./Quintal)']
+    X_future = future_df.drop(columns=['Modal Price (Rs./Quintal)', 'Reported Date'], errors='ignore')
+    model = XGBRegressor(**_best_params)
+    model.fit(X_train, y_train)
+    future_predictions = model.predict(X_future)
+    future_df['Modal Price (Rs./Quintal)'] = future_predictions
+    plot_data(original_df, future_df, last_date, model, 30)
+    download_button(future_df, key)
+def forecast_next_90_days(df, _best_params, key):
+    last_date = df['Reported Date'].max()
+    future_dates = pd.date_range(start=last_date + pd.Timedelta(days=1), periods=90)
+    future_df = pd.DataFrame({'Reported Date': future_dates})
+    full_df = pd.concat([df, future_df], ignore_index=True)
+    full_df = create_forecasting_features_3m(full_df)
+    original_df = full_df[full_df['Reported Date'] <= last_date].copy()
+    future_df = full_df[full_df['Reported Date'] > last_date].copy()
+    X_train = original_df.drop(columns=['Modal Price (Rs./Quintal)', 'Reported Date'], errors='ignore')
+    y_train = original_df['Modal Price (Rs./Quintal)']
+    X_future = future_df.drop(columns=['Modal Price (Rs./Quintal)', 'Reported Date'], errors='ignore')
+    model = XGBRegressor(**_best_params)
+    model.fit(X_train, y_train)
+    future_predictions = model.predict(X_future)
+    future_df['Modal Price (Rs./Quintal)'] = future_predictions
+    plot_data(original_df, future_df, last_date, model, 90)
+    download_button(future_df, key)
+def train_and_forecast(df, filter_key, days):
+    cols = get_collections()
+    if df is not None:
+        if days == 14:
+            best_params = train_and_evaluate(df)
+            cols['best_params_collection'].replace_one({'filter_key': filter_key}, {**best_params, 'filter_key': filter_key, 'last_updated': pd.Timestamp.now().isoformat()}, upsert=True)
+            forecast_next_14_days(df, best_params, filter_key)
+        elif days == 30:
+            best_params = train_and_evaluate_1m(df)
+            cols['best_params_collection_1m'].replace_one({'filter_key': filter_key}, {**best_params, 'filter_key': filter_key, 'last_updated': pd.Timestamp.now().isoformat()}, upsert=True)
+            forecast_next_30_days(df, best_params, filter_key)
+        elif days == 90:
+            best_params = train_and_evaluate_3m(df)
+            cols['best_params_collection_3m'].replace_one({'filter_key': filter_key}, {**best_params, 'filter_key': filter_key, 'last_updated': pd.Timestamp.now().isoformat()}, upsert=True)
+            forecast_next_90_days(df, best_params, filter_key)
+def get_best_params(filter_key, collection):
+    record = collection.find_one({"filter_key": filter_key})
+    return record
+def forecast(df, filter_key, days):
+    cols = get_collections()
+    if days == 14:
+        record = get_best_params(filter_key, cols['best_params_collection'])
+        if record:
+            st.info(f"ℹ️ The model was trained on {record['last_updated']}.")
+            forecast_next_14_days(df, record, filter_key)
+        else:
+            st.warning("⚠️ Model is not trained yet. Please train the model first.")
+    if days == 30:
+        record = get_best_params(filter_key, cols['best_params_collection_1m'])
+        if record:
+            st.info(f"ℹ️ The model was trained on {record['last_updated']}.")
+            forecast_next_30_days(df, record, filter_key)
+        else:
+            st.warning("⚠️ Model is not trained yet. Please train the model first.")
+    if days == 90:
+        record = get_best_params(filter_key, cols['best_params_collection_3m'])
+        if record:
+            st.info(f"ℹ️ The model was trained on {record['last_updated']}.")
+            forecast_next_90_days(df, record, filter_key)
+        else:
+            st.warning("⚠️ Model is not trained yet. Please train the model first.")

src/agri_predict/plotting.py ADDED Viewed

	@@ -0,0 +1,198 @@

+import streamlit as st
+import pandas as pd
+import plotly.graph_objects as go
+import io
+import calendar
+import numpy as np
+def plot_data(original_df, future_df, last_date, model, days):
+    actual_last_df = original_df[original_df['Reported Date'] > (last_date - pd.Timedelta(days=days))]
+    predicted_plot_df = actual_last_df[['Reported Date']].copy()
+    predicted_plot_df['Modal Price (Rs./Quintal)'] = model.predict(
+        actual_last_df.drop(columns=['Modal Price (Rs./Quintal)', 'Reported Date'], errors='ignore'))
+    predicted_plot_df['Type'] = 'Actual'
+    future_plot_df = future_df[['Reported Date', 'Modal Price (Rs./Quintal)']].copy()
+    future_plot_df['Type'] = 'Forecasted'
+    last_actual_point = predicted_plot_df.iloc[[-1]].copy()
+    last_actual_point['Type'] = 'Forecasted'
+    future_plot_df = pd.concat([last_actual_point, future_plot_df])
+    plot_df = pd.concat([predicted_plot_df, future_plot_df])
+    fig = go.Figure()
+    for plot_type, color, dash in [('Actual', 'blue', 'solid'), ('Forecasted', 'red', 'dash')]:
+        data = plot_df[plot_df['Type'] == plot_type]
+        fig.add_trace(go.Scatter(x=data['Reported Date'], y=data['Modal Price (Rs./Quintal)'], mode='lines', name=f"{plot_type} Data", line=dict(color=color, dash=dash)))
+    fig.update_layout(title="Actual vs Forecasted Modal Price (Rs./Quintal)", xaxis_title="Date", yaxis_title="Modal Price (Rs./Quintal)", template="plotly_white")
+    st.plotly_chart(fig, width='stretch')
+def download_button(future_df: pd.DataFrame, key: str):
+    download_df = future_df[['Reported Date', 'Modal Price (Rs./Quintal)']].copy()
+    download_df['Reported Date'] = pd.to_datetime(download_df['Reported Date']).dt.strftime('%Y-%m-%d')
+    towrite = io.BytesIO()
+    with pd.ExcelWriter(towrite, engine='xlsxwriter') as writer:
+        download_df.to_excel(writer, index=False, sheet_name='Forecast')
+    towrite.seek(0)
+    st.download_button(label="Download Forecast as Excel", data=towrite, file_name=f"forecast_{key}.xlsx")
+def display_statistics(df):
+    st.title("📊 National Market Statistics Dashboard")
+    st.markdown("""
+        <style>
+            h1 {
+                color: #2e7d32;
+                font-size: 36px;
+                font-weight: bold;
+            }
+            h3 {
+                color: #388e3c;
+                font-size: 28px;
+                font-weight: 600;
+            }
+            p {
+                font-size: 16px;
+                line-height: 1.6;
+            }
+            .highlight {
+                background-color: #f1f8e9;
+                padding: 10px;
+                border-radius: 8px;
+                font-size: 16px;
+                color: #2e7d32;
+                font-weight: 500;
+            }
+        </style>
+    """, unsafe_allow_html=True)
+    # Ensure 'Reported Date' is in datetime format
+    df['Reported Date'] = pd.to_datetime(df['Reported Date'])
+    national_data = df.groupby('Reported Date').agg({
+        'Modal Price (Rs./Quintal)': 'mean',
+        'Arrivals (Tonnes)': 'sum'
+    }).reset_index()
+    st.subheader("🗓️ Key Statistics")
+    latest_date = national_data['Reported Date'].max()
+    latest_price = national_data[national_data['Reported Date'] == latest_date]['Modal Price (Rs./Quintal)'].mean()
+    latest_arrivals = national_data[national_data['Reported Date'] == latest_date]['Arrivals (Tonnes)'].sum()
+    st.markdown("<p class='highlight'>This section provides the most recent statistics for the market. It includes the latest available date, the average price of commodities, and the total quantity of goods arriving at the market. These metrics offer an up-to-date snapshot of market conditions.</p>", unsafe_allow_html=True)
+    st.write(f"**Latest Date**: {latest_date.strftime('%Y-%m-%d')}")
+    st.write(f"**Latest Modal Price**: {latest_price:.2f} Rs./Quintal")
+    st.write(f"**Latest Arrivals**: {latest_arrivals:.2f} Tonnes")
+    st.subheader("📆 This Day in Previous Years")
+    st.markdown("<p class='highlight'>This table shows the modal price and total arrivals for this exact day across previous years. It provides a historical perspective to compare against current market conditions. This section examines historical data for the same day in previous years. By analyzing trends for this specific day, you can identify seasonal patterns, supply-demand changes, or any deviations that might warrant closer attention.</p>", unsafe_allow_html=True)
+    today = latest_date
+    previous_years_data = national_data[national_data['Reported Date'].dt.dayofyear == today.dayofyear]
+    if not previous_years_data.empty:
+        previous_years_data['Year'] = previous_years_data['Reported Date'].dt.year.astype(str)
+        display_data = (previous_years_data[['Year', 'Modal Price (Rs./Quintal)', 'Arrivals (Tonnes)']]
+                        .sort_values(by='Year', ascending=False)
+                        .reset_index(drop=True))
+        st.table(display_data)
+    else:
+        st.write("No historical data available for this day in previous years.")
+    st.subheader("📅 Monthly Averages Over Years")
+    st.markdown("<p class='highlight'>This section displays the average modal prices and arrivals for each month across all years. It helps identify seasonal trends and peak activity months, which can be crucial for inventory planning and market predictions.</p>", unsafe_allow_html=True)
+    national_data['Month'] = national_data['Reported Date'].dt.month
+    monthly_avg_price = national_data.groupby('Month')['Modal Price (Rs./Quintal)'].mean().reset_index()
+    monthly_avg_arrivals = national_data.groupby('Month')['Arrivals (Tonnes)'].mean().reset_index()
+    monthly_avg = pd.merge(monthly_avg_price, monthly_avg_arrivals, on='Month')
+    monthly_avg['Month'] = monthly_avg['Month'].apply(lambda x: calendar.month_name[x])
+    monthly_avg.columns = ['Month', 'Average Modal Price (Rs./Quintal)', 'Average Arrivals (Tonnes)']
+    st.write(monthly_avg)
+    st.subheader("📆 Yearly Averages")
+    st.markdown("<p class='highlight'>Yearly averages provide insights into long-term trends in pricing and arrivals. By examining these values, you can detect overall growth, stability, or volatility in the market.</p>", unsafe_allow_html=True)
+    national_data['Year'] = national_data['Reported Date'].dt.year
+    yearly_avg_price = national_data.groupby('Year')['Modal Price (Rs./Quintal)'].mean().reset_index()
+    yearly_sum_arrivals = national_data.groupby('Year')['Arrivals (Tonnes)'].sum().reset_index()
+    yearly_avg = pd.merge(yearly_avg_price, yearly_sum_arrivals, on='Year')
+    yearly_avg['Year'] = yearly_avg['Year'].apply(lambda x: f"{int(x)}")
+    yearly_avg.columns = ['Year', 'Average Modal Price (Rs./Quintal)', 'Average Arrivals (Tonnes)']
+    st.write(yearly_avg)
+    st.subheader("📈 Largest Daily Price Changes (Past Year)")
+    st.markdown("<p class='highlight'>This analysis identifies the most significant daily price changes in the past year. These fluctuations can highlight periods of market volatility, potentially caused by external factors like weather, policy changes, or supply chain disruptions.</p>", unsafe_allow_html=True)
+    one_year_ago = latest_date - pd.DateOffset(years=1)
+    recent_data = national_data[national_data['Reported Date'] >= one_year_ago]
+    recent_data['Daily Change (%)'] = recent_data['Modal Price (Rs./Quintal)'].pct_change() * 100
+    largest_changes = recent_data[['Reported Date', 'Modal Price (Rs./Quintal)', 'Daily Change (%)']].nlargest(5, 'Daily Change (%)')
+    largest_changes['Reported Date'] = largest_changes['Reported Date'].dt.date
+    largest_changes = largest_changes.reset_index(drop=True)
+    st.write(largest_changes)
+    st.subheader("🏆 Top 5 Highest and Lowest Prices (Past Year)")
+    st.markdown("<p class='highlight'>This section highlights the highest and lowest prices over the past year. These values reflect the extremes of market dynamics, helping to understand price ceilings and floors in the recent period.</p>", unsafe_allow_html=True)
+    highest_prices = recent_data.nlargest(5, 'Modal Price (Rs./Quintal)')[['Reported Date', 'Modal Price (Rs./Quintal)']]
+    lowest_prices = recent_data.nsmallest(5, 'Modal Price (Rs./Quintal)')[['Reported Date', 'Modal Price (Rs./Quintal)']]
+    highest_prices['Reported Date'] = highest_prices['Reported Date'].dt.date
+    lowest_prices['Reported Date'] = lowest_prices['Reported Date'].dt.date
+    highest_prices = highest_prices.reset_index(drop=True)
+    lowest_prices = lowest_prices.reset_index(drop=True)
+    st.write("**Top 5 Highest Prices**")
+    st.write(highest_prices)
+    st.write("**Top 5 Lowest Prices**")
+    st.write(lowest_prices)
+    st.subheader("🗂️ Data Snapshot")
+    st.markdown("<p class='highlight'>This snapshot provides a concise overview of the latest data, including rolling averages and lagged values. These metrics help identify short-term trends and lagged effects in pricing.</p>", unsafe_allow_html=True)
+    national_data['Rolling Mean (14 Days)'] = national_data['Modal Price (Rs./Quintal)'].rolling(window=14).mean()
+    national_data['Lag (14 Days)'] = national_data['Modal Price (Rs./Quintal)'].shift(14)
+    national_data['Reported Date'] = national_data['Reported Date'].dt.date
+    national_data = national_data.sort_values(by='Reported Date', ascending=False)
+    st.dataframe(national_data.head(14).reset_index(drop=True), width='stretch', height=525)
+    editable_spreadsheet()
+def editable_spreadsheet():
+    st.title("Sowing Report Prediction Model")
+    # Excel file uploader
+    uploaded_file = st.file_uploader("Upload your Excel file", type=['xlsx'])
+    # Check if an Excel file is uploaded
+    if uploaded_file is not None:
+        # Read the Excel file
+        df_excel = pd.read_excel(uploaded_file)
+        # Display the DataFrame from the Excel file
+        st.write("Excel data loaded:", df_excel)
+        # Form for inputting filtering options and area for calculation
+        with st.form("input_form"):
+            input_region = st.text_input("Enter Region to Filter By", placeholder="Region Name")
+            input_season = st.text_input("Enter Season to Filter By", placeholder="Season (e.g., Winter)")
+            input_area = st.number_input("Enter Area (in hectares) for Production Calculation", min_value=0.0, format="%.2f")
+            submit_button = st.form_submit_button("Calculate Production")
+        if submit_button:
+            if input_region and input_season and input_area > 0:
+                # Filter data by the region and season specified
+                filtered_df = df_excel[
+                    (df_excel['Region'].str.lower() == input_region.lower()) &
+                    (df_excel['Season'].str.lower() == input_season.lower())
+                    ]
+                if not filtered_df.empty:
+                    process_dataframe(filtered_df, input_area)
+                else:
+                    st.error("No data found for the specified region and season.")
+            else:
+                st.error("Please enter valid region, season, and area to proceed.")
+def process_dataframe(df, area):
+    if 'Yield' in df.columns:
+        average_yield = df['Yield'].mean()
+        predicted_production = average_yield * area
+        st.success(f"The predicted Production Volume for the specified region and season is: {predicted_production:.2f} units")
+    else:
+        st.error("The DataFrame does not contain a necessary 'Yield' column for calculation.")

src/agri_predict/scraper.py ADDED Viewed

	@@ -0,0 +1,282 @@

+"""API-based scraper for agmarknet.gov.in using direct API calls."""
+import requests
+import pandas as pd
+from datetime import datetime, timedelta
+from typing import Optional, List, Dict, Any
+import logging
+from pathlib import Path
+# Configure logging
+logger = logging.getLogger(__name__)
+handler = logging.StreamHandler()
+formatter = logging.Formatter(
+    '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+)
+handler.setFormatter(formatter)
+logger.addHandler(handler)
+logger.setLevel(logging.INFO)
+class AgmarknetAPIClient:
+    """Client for Agmarknet API."""
+    BASE_URL = "https://api.agmarknet.gov.in/v1/prices-and-arrivals/market-report/specific"
+    # Fixed parameters
+    COMMODITY_GROUP_ID = 3      # Commodity group for Sesamum
+    COMMODITY_ID = 11           # Sesamum(Sesame,Gingelly,Til)
+    INCLUDE_EXCEL = "false"
+    # Timeout in seconds
+    TIMEOUT = 30
+    def __init__(self):
+        """Initialize API client."""
+        self.session = requests.Session()
+        logger.info("Agmarknet API client initialized")
+    def _log_api_call(self, date_str: str, url: str, status_code: int,
+                     records_count: int = 0):
+        """Log API call details.
+        Args:
+            date_str: Date string (YYYY-MM-DD)
+            url: Full URL called
+            status_code: HTTP status code
+            records_count: Number of records fetched
+        """
+        logger.info(
+            f"API CALL | Date: {date_str} | Status: {status_code} | "
+            f"Records: {records_count} | URL: {url}"
+        )
+    def fetch_market_data(self, date: str) -> Optional[Dict[str, Any]]:
+        """Fetch market data for a specific date.
+        Args:
+            date: Date string in format YYYY-MM-DD
+        Returns:
+            API response dictionary or None if error
+        """
+        url = (
+            f"{self.BASE_URL}?date={date}&"
+            f"commodityGroupId={self.COMMODITY_GROUP_ID}&"
+            f"commodityId={self.COMMODITY_ID}&"
+            f"includeExcel={self.INCLUDE_EXCEL}"
+        )
+        try:
+            logger.info(f"Fetching data for date: {date}")
+            response = self.session.get(url, timeout=self.TIMEOUT)
+            response.raise_for_status()
+            data = response.json()
+            if data.get("success"):
+                # Count total records
+                total_records = self._count_records(data)
+                self._log_api_call(date, url, response.status_code, total_records)
+                logger.info(
+                    f"✅ Successfully fetched data | Date: {date} | "
+                    f"Total records: {total_records}"
+                )
+                return data
+            else:
+                logger.error(
+                    f"❌ API returned failure | Date: {date} | "
+                    f"Message: {data.get('message', 'Unknown error')}"
+                )
+                return None
+        except requests.exceptions.Timeout:
+            logger.error(f"❌ Timeout error for date: {date}")
+            return None
+        except requests.exceptions.HTTPError as e:
+            logger.error(f"❌ HTTP error for date: {date} | Status: {e.response.status_code}")
+            return None
+        except requests.exceptions.RequestException as e:
+            logger.error(f"❌ Request error for date: {date} | Error: {str(e)}")
+            return None
+        except ValueError as e:
+            logger.error(f"❌ JSON decode error for date: {date} | Error: {str(e)}")
+            return None
+    def fetch_date_range(self, start_date: str, end_date: str) -> List[Dict[str, Any]]:
+        """Fetch market data for a date range.
+        Args:
+            start_date: Start date (YYYY-MM-DD)
+            end_date: End date (YYYY-MM-DD)
+        Returns:
+            List of API response dictionaries
+        """
+        logger.info(f"Starting date range fetch | From: {start_date} To: {end_date}")
+        try:
+            start = datetime.strptime(start_date, "%Y-%m-%d")
+            end = datetime.strptime(end_date, "%Y-%m-%d")
+        except ValueError as e:
+            logger.error(f"❌ Invalid date format | Error: {str(e)}")
+            return []
+        if start > end:
+            logger.error(f"❌ Start date cannot be after end date")
+            return []
+        results = []
+        current = start
+        logger.info(f"Fetching {(end - start).days + 1} days of data...")
+        while current <= end:
+            date_str = current.strftime("%Y-%m-%d")
+            data = self.fetch_market_data(date_str)
+            if data:
+                results.append(data)
+            current += timedelta(days=1)
+        logger.info(
+            f"✅ Completed date range fetch | "
+            f"Total days: {(end - start).days + 1} | "
+            f"Successful fetches: {len(results)}"
+        )
+        return results
+    @staticmethod
+    def _count_records(data: Dict[str, Any]) -> int:
+        """Count total records in API response.
+        Args:
+            data: API response dictionary
+        Returns:
+            Total number of records
+        """
+        count = 0
+        states = data.get("states", [])
+        for state in states:
+            markets = state.get("markets", [])
+            for market in markets:
+                market_data = market.get("data", [])
+                count += len(market_data)
+        return count
+    @staticmethod
+    def parse_response_to_dataframe(api_response: Dict[str, Any]) -> pd.DataFrame:
+        """Parse API response to DataFrame.
+        Args:
+            api_response: API response dictionary
+        Returns:
+            Flattened DataFrame with all market data
+        """
+        records = []
+        # Extract report date from title
+        title = api_response.get("title", "")
+        # Format: "Market wise Daily Report for Sesamum(Sesame,Gingelly,Til) on 01-Nov-2025"
+        reported_date = None
+        if " on " in title:
+            date_part = title.split(" on ")[-1].strip()
+            try:
+                reported_date = pd.to_datetime(date_part, format="%d-%b-%Y")
+            except:
+                reported_date = None
+        commodity_name = api_response.get("commodityName", "")
+        states = api_response.get("states", [])
+        for state in states:
+            state_name = state.get("stateName", "")
+            state_id = state.get("stateId")
+            markets = state.get("markets", [])
+            for market in markets:
+                market_name = market.get("marketName", "")
+                # Remove "APMC" suffix if present
+                if market_name.endswith(" APMC"):
+                    market_name = market_name[:-5].strip()
+                market_id = market.get("marketId")
+                market_data = market.get("data", [])
+                for entry in market_data:
+                    record = {
+                        "Reported Date": reported_date,
+                        "State Name": state_name,
+                        "District Name": state_name,  # Using state name as district for now
+                        "Market Name": market_name,
+                        "Variety": entry.get("variety"),
+                        "Group": "Oil Seeds",
+                        "Arrivals (Tonnes)": entry.get("arrivals"),
+                        "Min Price (Rs./Quintal)": entry.get("minimumPrice"),
+                        "Max Price (Rs./Quintal)": entry.get("maximumPrice"),
+                        "Modal Price (Rs./Quintal)": entry.get("modalPrice"),
+                        "Grade": entry.get("grade"),
+                    }
+                    records.append(record)
+        df = pd.DataFrame(records)
+        logger.info(f"Parsed API response to DataFrame | Records: {len(df)}")
+        return df
+    @staticmethod
+    def parse_multiple_responses_to_dataframe(
+        responses: List[Dict[str, Any]]
+    ) -> pd.DataFrame:
+        """Parse multiple API responses to single DataFrame.
+        Args:
+            responses: List of API response dictionaries
+        Returns:
+            Combined DataFrame
+        """
+        dfs = []
+        for response in responses:
+            df = AgmarknetAPIClient.parse_response_to_dataframe(response)
+            dfs.append(df)
+        combined_df = pd.concat(dfs, ignore_index=True)
+        logger.info(
+            f"Combined {len(responses)} API responses into DataFrame | "
+            f"Total records: {len(combined_df)}"
+        )
+        return combined_df
+    def export_response_to_file(self, api_response: Dict[str, Any],
+                               filename: str = "api_response.json"):
+        """Export API response to JSON file.
+        Args:
+            api_response: API response dictionary
+            filename: Output filename
+        """
+        import json
+        filepath = Path(filename)
+        try:
+            with open(filepath, 'w') as f:
+                json.dump(api_response, f, indent=2)
+            logger.info(f"✅ Exported API response to file | Path: {filepath}")
+        except Exception as e:
+            logger.error(f"❌ Failed to export API response | Error: {str(e)}")
+# Global client instance
+api_client = AgmarknetAPIClient()

src/agri_predict/utils.py ADDED Viewed

	@@ -0,0 +1,40 @@

+import pandas as pd
+from werkzeug.security import check_password_hash, generate_password_hash
+def collection_to_dataframe(collection, drop_id=True):
+    documents = list(collection.find())
+    df = pd.DataFrame(documents)
+    if drop_id and '_id' in df.columns:
+        df = df.drop(columns=['_id'])
+    return df
+def authenticate_user(username: str, password: str, users_collection=None) -> bool:
+    """Authenticate user with cached collection to avoid reconnection.
+    Args:
+        username: Username to authenticate
+        password: Password to check
+        users_collection: Optional cached users collection. If None, will get fresh collection.
+    """
+    if users_collection is None:
+        from .config import get_collections
+        cols = get_collections()
+        users_collection = cols['users_collection']
+    user = users_collection.find_one({"username": username})
+    if user and check_password_hash(user.get('password', ''), password):
+        return True
+    return False
+def save_best_params(collection, filter_key, best_params):
+    best_params = dict(best_params)
+    best_params["filter_key"] = filter_key
+    best_params["last_updated"] = pd.Timestamp.now().isoformat()
+    collection.replace_one({"filter_key": filter_key}, best_params, upsert=True)
+def get_best_params(filter_key, collection):
+    return collection.find_one({"filter_key": filter_key})