Spaces:
Running
Running
Upload 5 files
Browse files- README.md +34 -0
- remove_recent_data.py +60 -0
- requirements.txt +15 -0
- streamlit_app.py +249 -0
- update_all_models.py +225 -0
README.md
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# AgriPredict (Refactor)
|
| 2 |
+
|
| 3 |
+
This repository contains a refactored, modularized version of the Streamlit-based AgriPredict dashboard.
|
| 4 |
+
|
| 5 |
+
Structure:
|
| 6 |
+
|
| 7 |
+
- `src/agri_predict` - package with modules:
|
| 8 |
+
- `config.py` - env & MongoDB connection helpers
|
| 9 |
+
- `constants.py` - shared constants (state/market mapping)
|
| 10 |
+
- `features.py` - feature engineering functions
|
| 11 |
+
- `data.py` - data access, preprocessing and scraping helpers
|
| 12 |
+
- `models.py` - model training, grid search and forecasting
|
| 13 |
+
- `plotting.py` - plotting and download helpers
|
| 14 |
+
- `utils.py` - authentication and utility functions
|
| 15 |
+
- `streamlit_app.py` - Streamlit entrypoint
|
| 16 |
+
- `requirements.txt` - Python dependencies
|
| 17 |
+
|
| 18 |
+
Run locally:
|
| 19 |
+
|
| 20 |
+
1. Create a virtualenv and install dependencies:
|
| 21 |
+
|
| 22 |
+
```bash
|
| 23 |
+
python -m venv .venv
|
| 24 |
+
source .venv/bin/activate
|
| 25 |
+
pip install -r requirements.txt
|
| 26 |
+
```
|
| 27 |
+
|
| 28 |
+
2. Set `MONGO_URI` in a `.env` file at project root.
|
| 29 |
+
|
| 30 |
+
3. Start the app:
|
| 31 |
+
|
| 32 |
+
```bash
|
| 33 |
+
streamlit run streamlit_app.py
|
| 34 |
+
```
|
remove_recent_data.py
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Script to remove data after October 25, 2025 from MongoDB for testing the scraper."""
|
| 2 |
+
|
| 3 |
+
from datetime import datetime
|
| 4 |
+
from src.agri_predict.config import get_collections
|
| 5 |
+
|
| 6 |
+
def remove_data_after_date(cutoff_date_str="2025-10-25"):
|
| 7 |
+
"""Remove all data after the specified date.
|
| 8 |
+
|
| 9 |
+
Args:
|
| 10 |
+
cutoff_date_str: Date string in format YYYY-MM-DD
|
| 11 |
+
"""
|
| 12 |
+
cutoff_date = datetime.strptime(cutoff_date_str, "%Y-%m-%d")
|
| 13 |
+
|
| 14 |
+
cols = get_collections()
|
| 15 |
+
collection = cols['collection']
|
| 16 |
+
|
| 17 |
+
# Count documents before deletion
|
| 18 |
+
before_count = collection.count_documents({})
|
| 19 |
+
after_cutoff_count = collection.count_documents({
|
| 20 |
+
"Reported Date": {"$gt": cutoff_date}
|
| 21 |
+
})
|
| 22 |
+
|
| 23 |
+
print(f"📊 Database Status:")
|
| 24 |
+
print(f" Total documents: {before_count}")
|
| 25 |
+
print(f" Documents after {cutoff_date_str}: {after_cutoff_count}")
|
| 26 |
+
|
| 27 |
+
if after_cutoff_count == 0:
|
| 28 |
+
print(f"✅ No documents found after {cutoff_date_str}")
|
| 29 |
+
return
|
| 30 |
+
|
| 31 |
+
# Delete documents
|
| 32 |
+
result = collection.delete_many({
|
| 33 |
+
"Reported Date": {"$gt": cutoff_date}
|
| 34 |
+
})
|
| 35 |
+
|
| 36 |
+
print(f"\n🗑️ Deletion Results:")
|
| 37 |
+
print(f" Deleted {result.deleted_count} documents")
|
| 38 |
+
|
| 39 |
+
# Verify deletion
|
| 40 |
+
remaining_count = collection.count_documents({})
|
| 41 |
+
latest_doc = collection.find_one(sort=[("Reported Date", -1)])
|
| 42 |
+
|
| 43 |
+
print(f"\n✅ After Deletion:")
|
| 44 |
+
print(f" Total documents: {remaining_count}")
|
| 45 |
+
if latest_doc:
|
| 46 |
+
latest_date = latest_doc.get("Reported Date")
|
| 47 |
+
print(f" Latest date in database: {latest_date.strftime('%Y-%m-%d') if latest_date else 'Unknown'}")
|
| 48 |
+
else:
|
| 49 |
+
print(f" Database is empty")
|
| 50 |
+
|
| 51 |
+
if __name__ == "__main__":
|
| 52 |
+
print("="*60)
|
| 53 |
+
print("🧹 Cleaning MongoDB Data After 2025-10-25")
|
| 54 |
+
print("="*60 + "\n")
|
| 55 |
+
|
| 56 |
+
remove_data_after_date("2025-10-10")
|
| 57 |
+
|
| 58 |
+
print("\n" + "="*60)
|
| 59 |
+
print("✅ Cleanup Complete - Ready to test scraper!")
|
| 60 |
+
print("="*60)
|
requirements.txt
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
streamlit
|
| 2 |
+
pandas
|
| 3 |
+
numpy
|
| 4 |
+
scikit-learn
|
| 5 |
+
xgboost
|
| 6 |
+
pymongo
|
| 7 |
+
python-dotenv
|
| 8 |
+
plotly
|
| 9 |
+
certifi
|
| 10 |
+
werkzeug
|
| 11 |
+
statsmodels
|
| 12 |
+
openpyxl
|
| 13 |
+
xlsxwriter
|
| 14 |
+
tqdm
|
| 15 |
+
requests
|
streamlit_app.py
ADDED
|
@@ -0,0 +1,249 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Streamlit entrypoint for AgriPredict (refactored).
|
| 2 |
+
|
| 3 |
+
Run with: `streamlit run streamlit_app.py` from project root.
|
| 4 |
+
"""
|
| 5 |
+
import streamlit as st
|
| 6 |
+
from datetime import datetime, timedelta
|
| 7 |
+
import pandas as pd
|
| 8 |
+
from sklearn.preprocessing import MinMaxScaler
|
| 9 |
+
|
| 10 |
+
from src.agri_predict import (
|
| 11 |
+
fetch_and_process_data,
|
| 12 |
+
fetch_and_store_data,
|
| 13 |
+
preprocess_data,
|
| 14 |
+
train_and_forecast,
|
| 15 |
+
forecast,
|
| 16 |
+
collection_to_dataframe,
|
| 17 |
+
get_dataframe_from_collection,
|
| 18 |
+
)
|
| 19 |
+
from src.agri_predict.constants import state_market_dict
|
| 20 |
+
from src.agri_predict.utils import authenticate_user
|
| 21 |
+
from src.agri_predict.config import get_collections
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
st.set_page_config(layout="wide")
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
@st.cache_resource
|
| 28 |
+
def get_cached_collections():
|
| 29 |
+
"""Cache MongoDB collections to avoid reconnecting on every page load."""
|
| 30 |
+
return get_collections()
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
st.markdown("""
|
| 34 |
+
<style>
|
| 35 |
+
.main { max-width: 1200px; margin: 0 auto; }
|
| 36 |
+
h1 { color: #4CAF50; font-family: 'Arial Black', sans-serif; }
|
| 37 |
+
.stButton>button { background-color: #4CAF50; color: white; }
|
| 38 |
+
</style>
|
| 39 |
+
""", unsafe_allow_html=True)
|
| 40 |
+
|
| 41 |
+
if 'authenticated' not in st.session_state:
|
| 42 |
+
st.session_state.authenticated = False
|
| 43 |
+
|
| 44 |
+
if st.session_state.authenticated:
|
| 45 |
+
# Get cached collections only after authentication
|
| 46 |
+
try:
|
| 47 |
+
cols = get_cached_collections()
|
| 48 |
+
except Exception as exc:
|
| 49 |
+
st.error(f"Configuration error: {exc}")
|
| 50 |
+
st.stop()
|
| 51 |
+
|
| 52 |
+
collection = cols['collection']
|
| 53 |
+
impExp = cols['impExp']
|
| 54 |
+
|
| 55 |
+
st.title("🌾 AgriPredict Dashboard")
|
| 56 |
+
if st.button("Get Live Data Feed"):
|
| 57 |
+
fetch_and_store_data()
|
| 58 |
+
|
| 59 |
+
view_mode = st.radio("View Mode", ["Statistics", "Plots", "Predictions", "Exim"], horizontal=True, label_visibility="collapsed")
|
| 60 |
+
|
| 61 |
+
if view_mode == "Plots":
|
| 62 |
+
st.sidebar.header("Filters")
|
| 63 |
+
selected_period = st.sidebar.selectbox("Select Time Period", ["2 Weeks", "1 Month", "3 Months", "1 Year", "5 Years"], index=1)
|
| 64 |
+
period_mapping = {"2 Weeks": 14, "1 Month": 30, "3 Months": 90, "1 Year": 365, "2 Years": 730, "5 Years": 1825}
|
| 65 |
+
st.session_state.selected_period = period_mapping[selected_period]
|
| 66 |
+
|
| 67 |
+
state_options = list(state_market_dict.keys()) + ['India']
|
| 68 |
+
selected_state = st.sidebar.selectbox("Select", state_options)
|
| 69 |
+
|
| 70 |
+
market_wise = False
|
| 71 |
+
if selected_state != 'India':
|
| 72 |
+
market_wise = st.sidebar.checkbox("Market Wise Analysis")
|
| 73 |
+
if market_wise:
|
| 74 |
+
markets = state_market_dict.get(selected_state, [])
|
| 75 |
+
selected_market = st.sidebar.selectbox("Select Market", markets)
|
| 76 |
+
query_filter = {"State Name": selected_state, "Market Name": selected_market}
|
| 77 |
+
else:
|
| 78 |
+
query_filter = {"State Name": selected_state}
|
| 79 |
+
else:
|
| 80 |
+
query_filter = {}
|
| 81 |
+
|
| 82 |
+
data_type = st.sidebar.radio("Select Data Type", ["Price", "Volume", "Both"])
|
| 83 |
+
query_filter["Reported Date"] = {"$gte": datetime.now() - timedelta(days=st.session_state.selected_period)}
|
| 84 |
+
|
| 85 |
+
if st.sidebar.button("✨ Let's go!"):
|
| 86 |
+
try:
|
| 87 |
+
cursor = collection.find(query_filter)
|
| 88 |
+
data = list(cursor)
|
| 89 |
+
if data:
|
| 90 |
+
df = pd.DataFrame(data)
|
| 91 |
+
df['Reported Date'] = pd.to_datetime(df['Reported Date'])
|
| 92 |
+
df_grouped = df.groupby('Reported Date', as_index=False).agg({'Arrivals (Tonnes)': 'sum', 'Modal Price (Rs./Quintal)': 'mean'})
|
| 93 |
+
date_range = pd.date_range(start=df_grouped['Reported Date'].min(), end=df_grouped['Reported Date'].max())
|
| 94 |
+
df_grouped = df_grouped.set_index('Reported Date').reindex(date_range).rename_axis('Reported Date').reset_index()
|
| 95 |
+
df_grouped['Arrivals (Tonnes)'] = df_grouped['Arrivals (Tonnes)'].ffill().bfill()
|
| 96 |
+
df_grouped['Modal Price (Rs./Quintal)'] = df_grouped['Modal Price (Rs./Quintal)'].ffill().bfill()
|
| 97 |
+
st.subheader(f"📈 Trends for {selected_state} ({'Market: ' + selected_market if market_wise else 'State'})")
|
| 98 |
+
if data_type == "Both":
|
| 99 |
+
scaler = MinMaxScaler()
|
| 100 |
+
df_grouped[['Scaled Price', 'Scaled Arrivals']] = scaler.fit_transform(df_grouped[['Modal Price (Rs./Quintal)', 'Arrivals (Tonnes)']])
|
| 101 |
+
import plotly.graph_objects as go
|
| 102 |
+
fig = go.Figure()
|
| 103 |
+
fig.add_trace(go.Scatter(x=df_grouped['Reported Date'], y=df_grouped['Scaled Price'], mode='lines', name='Scaled Price', line=dict(width=1, color='green')))
|
| 104 |
+
fig.add_trace(go.Scatter(x=df_grouped['Reported Date'], y=df_grouped['Scaled Arrivals'], mode='lines', name='Scaled Arrivals', line=dict(width=1, color='blue')))
|
| 105 |
+
fig.update_layout(title="Price and Arrivals Trend", xaxis_title='Date', yaxis_title='Scaled Values', template='plotly_white')
|
| 106 |
+
st.plotly_chart(fig, width='stretch')
|
| 107 |
+
elif data_type == "Price":
|
| 108 |
+
import plotly.graph_objects as go
|
| 109 |
+
fig = go.Figure()
|
| 110 |
+
fig.add_trace(go.Scatter(x=df_grouped['Reported Date'], y=df_grouped['Modal Price (Rs./Quintal)'], mode='lines', name='Modal Price', line=dict(width=1, color='green')))
|
| 111 |
+
fig.update_layout(title="Modal Price Trend", xaxis_title='Date', yaxis_title='Price (/Quintall)', template='plotly_white')
|
| 112 |
+
st.plotly_chart(fig, width='stretch')
|
| 113 |
+
else:
|
| 114 |
+
import plotly.graph_objects as go
|
| 115 |
+
fig = go.Figure()
|
| 116 |
+
fig.add_trace(go.Scatter(x=df_grouped['Reported Date'], y=df_grouped['Arrivals (Tonnes)'], mode='lines', name='Arrivals', line=dict(width=1, color='blue')))
|
| 117 |
+
fig.update_layout(title="Arrivals Trend", xaxis_title='Date', yaxis_title='Volume (in Tonnes)', template='plotly_white')
|
| 118 |
+
st.plotly_chart(fig, width='stretch')
|
| 119 |
+
else:
|
| 120 |
+
st.warning("⚠️ No data found for the selected filters.")
|
| 121 |
+
except Exception as e:
|
| 122 |
+
st.error(f"❌ Error fetching data: {e}")
|
| 123 |
+
|
| 124 |
+
elif view_mode == "Predictions":
|
| 125 |
+
st.subheader("📊 Model Analysis")
|
| 126 |
+
sub_option = st.radio("Select one of the following", ["India", "States", "Market"], horizontal=True)
|
| 127 |
+
sub_timeline = st.radio("Select one of the following horizons", ["14 days", "1 month", "3 month"], horizontal=True)
|
| 128 |
+
if sub_option == "States":
|
| 129 |
+
states = ["Karnataka", "Madhya Pradesh", "Gujarat", "Uttar Pradesh", "Telangana"]
|
| 130 |
+
selected_state = st.selectbox("Select State for Model Training", states)
|
| 131 |
+
filter_key = f"state_{selected_state}"
|
| 132 |
+
if st.button("Train and Forecast"):
|
| 133 |
+
query_filter = {"State Name": selected_state}
|
| 134 |
+
df = fetch_and_process_data(query_filter)
|
| 135 |
+
if sub_timeline == "14 days":
|
| 136 |
+
train_and_forecast(df, filter_key, 14)
|
| 137 |
+
elif sub_timeline == "1 month":
|
| 138 |
+
train_and_forecast(df, filter_key, 30)
|
| 139 |
+
else:
|
| 140 |
+
train_and_forecast(df, filter_key, 90)
|
| 141 |
+
if st.button("Forecast"):
|
| 142 |
+
query_filter = {"State Name": selected_state}
|
| 143 |
+
df = fetch_and_process_data(query_filter)
|
| 144 |
+
if sub_timeline == "14 days":
|
| 145 |
+
forecast(df, filter_key, 14)
|
| 146 |
+
elif sub_timeline == "1 month":
|
| 147 |
+
forecast(df, filter_key, 30)
|
| 148 |
+
else:
|
| 149 |
+
forecast(df, filter_key, 90)
|
| 150 |
+
elif sub_option == "Market":
|
| 151 |
+
market_options = ["Rajkot", "Gondal", "Kalburgi", "Amreli"]
|
| 152 |
+
selected_market = st.selectbox("Select Market for Model Training", market_options)
|
| 153 |
+
filter_key = f"market_{selected_market}"
|
| 154 |
+
if st.button("Train and Forecast"):
|
| 155 |
+
query_filter = {"Market Name": selected_market}
|
| 156 |
+
df = fetch_and_process_data(query_filter)
|
| 157 |
+
if sub_timeline == "14 days":
|
| 158 |
+
train_and_forecast(df, filter_key, 14)
|
| 159 |
+
elif sub_timeline == "1 month":
|
| 160 |
+
train_and_forecast(df, filter_key, 30)
|
| 161 |
+
else:
|
| 162 |
+
train_and_forecast(df, filter_key, 90)
|
| 163 |
+
elif st.button("Forecast"):
|
| 164 |
+
query_filter = {"Market Name": selected_market}
|
| 165 |
+
df = fetch_and_process_data(query_filter)
|
| 166 |
+
if sub_timeline == "14 days":
|
| 167 |
+
forecast(df, filter_key, 14)
|
| 168 |
+
elif sub_timeline == "1 month":
|
| 169 |
+
forecast(df, filter_key, 30)
|
| 170 |
+
else:
|
| 171 |
+
forecast(df, filter_key, 90)
|
| 172 |
+
elif sub_option == "India":
|
| 173 |
+
df = collection_to_dataframe(impExp)
|
| 174 |
+
if st.button("Train and Forecast"):
|
| 175 |
+
query_filter = {}
|
| 176 |
+
df = fetch_and_process_data(query_filter)
|
| 177 |
+
if sub_timeline == "14 days":
|
| 178 |
+
train_and_forecast(df, "India", 14)
|
| 179 |
+
elif sub_timeline == "1 month":
|
| 180 |
+
train_and_forecast(df, "India", 30)
|
| 181 |
+
else:
|
| 182 |
+
train_and_forecast(df, "India", 90)
|
| 183 |
+
if st.button("Forecast"):
|
| 184 |
+
query_filter = {}
|
| 185 |
+
df = fetch_and_process_data(query_filter)
|
| 186 |
+
if sub_timeline == "14 days":
|
| 187 |
+
forecast(df, "India", 14)
|
| 188 |
+
elif sub_timeline == "1 month":
|
| 189 |
+
forecast(df, "India", 30)
|
| 190 |
+
else:
|
| 191 |
+
forecast(df, "India", 90)
|
| 192 |
+
|
| 193 |
+
elif view_mode == "Statistics":
|
| 194 |
+
document = collection.find_one()
|
| 195 |
+
df = get_dataframe_from_collection(collection)
|
| 196 |
+
from src.agri_predict.plotting import display_statistics
|
| 197 |
+
display_statistics(df)
|
| 198 |
+
elif view_mode == "Exim":
|
| 199 |
+
df = collection_to_dataframe(impExp)
|
| 200 |
+
plot_option = st.radio("Select the data to visualize:", ["Import Price", "Import Quantity", "Export Price", "Export Quantity"], horizontal=True)
|
| 201 |
+
time_period = st.selectbox("Select time period:", ["1 Month", "6 Months", "1 Year", "2 Years"])
|
| 202 |
+
df["Reported Date"] = pd.to_datetime(df["Reported Date"], format="%Y-%m-%d")
|
| 203 |
+
if time_period == "1 Month":
|
| 204 |
+
start_date = pd.Timestamp.now() - pd.DateOffset(months=1)
|
| 205 |
+
elif time_period == "6 Months":
|
| 206 |
+
start_date = pd.Timestamp.now() - pd.DateOffset(months=6)
|
| 207 |
+
elif time_period == "1 Year":
|
| 208 |
+
start_date = pd.Timestamp.now() - pd.DateOffset(years=1)
|
| 209 |
+
else:
|
| 210 |
+
start_date = pd.Timestamp.now() - pd.DateOffset(years=2)
|
| 211 |
+
filtered_df = df[df["Reported Date"] >= start_date]
|
| 212 |
+
if plot_option == "Import Price":
|
| 213 |
+
grouped_df = filtered_df.groupby("Reported Date", as_index=False)["VALUE_IMPORT"].mean().rename(columns={"VALUE_IMPORT": "Average Import Price"})
|
| 214 |
+
y_axis_label = "Average Import Price (Rs.)"
|
| 215 |
+
elif plot_option == "Import Quantity":
|
| 216 |
+
grouped_df = filtered_df.groupby("Reported Date", as_index=False)["QUANTITY_IMPORT"].sum().rename(columns={"QUANTITY_IMPORT": "Total Import Quantity"})
|
| 217 |
+
y_axis_label = "Total Import Quantity (Tonnes)"
|
| 218 |
+
elif plot_option == "Export Price":
|
| 219 |
+
grouped_df = filtered_df.groupby("Reported Date", as_index=False)["VALUE_EXPORT"].mean().rename(columns={"VALUE_EXPORT": "Average Export Price"})
|
| 220 |
+
y_axis_label = "Average Export Price (Rs.)"
|
| 221 |
+
else:
|
| 222 |
+
grouped_df = filtered_df.groupby("Reported Date", as_index=False)["QUANTITY_IMPORT"].sum().rename(columns={"QUANTITY_IMPORT": "Total Export Quantity"})
|
| 223 |
+
y_axis_label = "Total Export Quantity (Tonnes)"
|
| 224 |
+
import plotly.express as px
|
| 225 |
+
fig = px.line(grouped_df, x="Reported Date", y=grouped_df.columns[1], title=f"{plot_option} Over Time", labels={"Reported Date": "Date", grouped_df.columns[1]: y_axis_label})
|
| 226 |
+
st.plotly_chart(fig)
|
| 227 |
+
|
| 228 |
+
else:
|
| 229 |
+
with st.form("login_form"):
|
| 230 |
+
st.subheader("Please log in")
|
| 231 |
+
username = st.text_input("Username")
|
| 232 |
+
password = st.text_input("Password", type="password")
|
| 233 |
+
login_button = st.form_submit_button("Login")
|
| 234 |
+
if login_button:
|
| 235 |
+
# Get cached collections for authentication
|
| 236 |
+
try:
|
| 237 |
+
cols = get_cached_collections()
|
| 238 |
+
users_collection = cols['users_collection']
|
| 239 |
+
except Exception as exc:
|
| 240 |
+
st.error(f"Database connection error: {exc}")
|
| 241 |
+
st.stop()
|
| 242 |
+
|
| 243 |
+
if authenticate_user(username, password, users_collection):
|
| 244 |
+
st.session_state.authenticated = True
|
| 245 |
+
st.session_state['username'] = username
|
| 246 |
+
st.write("Login successful!")
|
| 247 |
+
st.rerun()
|
| 248 |
+
else:
|
| 249 |
+
st.error("Invalid username or password")
|
update_all_models.py
ADDED
|
@@ -0,0 +1,225 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Script to train and update all models for India, States, and Markets.
|
| 3 |
+
Run this script to update all forecasting models without using the UI.
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import numpy as np
|
| 7 |
+
import pandas as pd
|
| 8 |
+
from sklearn.model_selection import train_test_split
|
| 9 |
+
from sklearn.metrics import mean_squared_error, mean_absolute_error
|
| 10 |
+
from xgboost import XGBRegressor
|
| 11 |
+
from tqdm import tqdm
|
| 12 |
+
|
| 13 |
+
from src.agri_predict import fetch_and_process_data
|
| 14 |
+
from src.agri_predict.constants import state_market_dict
|
| 15 |
+
from src.agri_predict.features import (
|
| 16 |
+
create_forecasting_features,
|
| 17 |
+
create_forecasting_features_1m,
|
| 18 |
+
create_forecasting_features_3m,
|
| 19 |
+
)
|
| 20 |
+
from src.agri_predict.config import get_collections
|
| 21 |
+
|
| 22 |
+
# Define forecast horizons
|
| 23 |
+
FORECAST_HORIZONS = [14, 30, 90] # 14 days, 1 month, 3 months
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
def train_model_batch(df, filter_key, days):
|
| 27 |
+
"""Train model without UI components for batch processing."""
|
| 28 |
+
cols = get_collections()
|
| 29 |
+
|
| 30 |
+
# Select feature creation function based on horizon
|
| 31 |
+
if days == 14:
|
| 32 |
+
df_features = create_forecasting_features(df)
|
| 33 |
+
split_date = '2024-01-01'
|
| 34 |
+
collection_key = 'best_params_collection'
|
| 35 |
+
elif days == 30:
|
| 36 |
+
df_features = create_forecasting_features_1m(df)
|
| 37 |
+
split_date = '2023-01-01'
|
| 38 |
+
collection_key = 'best_params_collection_1m'
|
| 39 |
+
else: # 90 days
|
| 40 |
+
df_features = create_forecasting_features_3m(df)
|
| 41 |
+
split_date = '2023-01-01'
|
| 42 |
+
collection_key = 'best_params_collection_3m'
|
| 43 |
+
|
| 44 |
+
# Split data
|
| 45 |
+
train_df = df_features[df_features['Reported Date'] < split_date]
|
| 46 |
+
test_df = df_features[df_features['Reported Date'] >= split_date]
|
| 47 |
+
|
| 48 |
+
X_train = train_df.drop(columns=['Modal Price (Rs./Quintal)', 'Reported Date'])
|
| 49 |
+
y_train = train_df['Modal Price (Rs./Quintal)']
|
| 50 |
+
X_test = test_df.drop(columns=['Modal Price (Rs./Quintal)', 'Reported Date'])
|
| 51 |
+
y_test = test_df['Modal Price (Rs./Quintal)']
|
| 52 |
+
|
| 53 |
+
# Hyperparameter tuning with progress bar
|
| 54 |
+
param_grid = {
|
| 55 |
+
'learning_rate': [0.01, 0.1, 0.2],
|
| 56 |
+
'max_depth': [3, 5, 7],
|
| 57 |
+
'n_estimators': [50, 100, 150],
|
| 58 |
+
'booster': ['gbtree', 'dart']
|
| 59 |
+
}
|
| 60 |
+
|
| 61 |
+
model = XGBRegressor()
|
| 62 |
+
best_score = float('-inf')
|
| 63 |
+
best_params = None
|
| 64 |
+
|
| 65 |
+
total_combinations = len(param_grid['learning_rate']) * len(param_grid['max_depth']) * \
|
| 66 |
+
len(param_grid['n_estimators']) * len(param_grid['booster'])
|
| 67 |
+
|
| 68 |
+
with tqdm(total=total_combinations, desc=f" Tuning hyperparameters") as pbar:
|
| 69 |
+
for learning_rate in param_grid['learning_rate']:
|
| 70 |
+
for max_depth in param_grid['max_depth']:
|
| 71 |
+
for n_estimators in param_grid['n_estimators']:
|
| 72 |
+
for booster in param_grid['booster']:
|
| 73 |
+
model.set_params(
|
| 74 |
+
learning_rate=learning_rate,
|
| 75 |
+
max_depth=max_depth,
|
| 76 |
+
n_estimators=n_estimators,
|
| 77 |
+
booster=booster
|
| 78 |
+
)
|
| 79 |
+
model.fit(X_train, y_train)
|
| 80 |
+
score = model.score(X_test, y_test)
|
| 81 |
+
if score > best_score:
|
| 82 |
+
best_score = score
|
| 83 |
+
best_params = {
|
| 84 |
+
'learning_rate': learning_rate,
|
| 85 |
+
'max_depth': max_depth,
|
| 86 |
+
'n_estimators': n_estimators,
|
| 87 |
+
'booster': booster
|
| 88 |
+
}
|
| 89 |
+
pbar.update(1)
|
| 90 |
+
|
| 91 |
+
# Train final model with best params
|
| 92 |
+
best_model = XGBRegressor(**best_params)
|
| 93 |
+
best_model.fit(X_train, y_train)
|
| 94 |
+
y_pred = best_model.predict(X_test)
|
| 95 |
+
|
| 96 |
+
# Calculate metrics
|
| 97 |
+
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
|
| 98 |
+
mae = mean_absolute_error(y_test, y_pred)
|
| 99 |
+
|
| 100 |
+
# Save to MongoDB
|
| 101 |
+
cols[collection_key].replace_one(
|
| 102 |
+
{'filter_key': filter_key},
|
| 103 |
+
{
|
| 104 |
+
**best_params,
|
| 105 |
+
'filter_key': filter_key,
|
| 106 |
+
'last_updated': pd.Timestamp.now().isoformat(),
|
| 107 |
+
'rmse': rmse,
|
| 108 |
+
'mae': mae,
|
| 109 |
+
'score': best_score
|
| 110 |
+
},
|
| 111 |
+
upsert=True
|
| 112 |
+
)
|
| 113 |
+
|
| 114 |
+
return best_params, rmse, mae
|
| 115 |
+
|
| 116 |
+
|
| 117 |
+
def update_india_models():
|
| 118 |
+
"""Update models for all of India."""
|
| 119 |
+
print("\n" + "="*60)
|
| 120 |
+
print("UPDATING INDIA MODELS")
|
| 121 |
+
print("="*60)
|
| 122 |
+
|
| 123 |
+
query_filter = {}
|
| 124 |
+
df = fetch_and_process_data(query_filter)
|
| 125 |
+
|
| 126 |
+
if df is not None:
|
| 127 |
+
for days in FORECAST_HORIZONS:
|
| 128 |
+
horizon_name = "14 days" if days == 14 else "1 month" if days == 30 else "3 months"
|
| 129 |
+
print(f"\n[India] Training {horizon_name} forecast model...")
|
| 130 |
+
try:
|
| 131 |
+
best_params, rmse, mae = train_model_batch(df, "India", days)
|
| 132 |
+
print(f"✅ [India] {horizon_name} model updated successfully")
|
| 133 |
+
print(f" RMSE: {rmse:.2f}, MAE: {mae:.2f}")
|
| 134 |
+
except Exception as e:
|
| 135 |
+
print(f"❌ [India] Error updating {horizon_name} model: {e}")
|
| 136 |
+
else:
|
| 137 |
+
print("❌ [India] No data available")
|
| 138 |
+
|
| 139 |
+
|
| 140 |
+
def update_state_models():
|
| 141 |
+
"""Update models for all states."""
|
| 142 |
+
print("\n" + "="*60)
|
| 143 |
+
print("UPDATING STATE MODELS")
|
| 144 |
+
print("="*60)
|
| 145 |
+
|
| 146 |
+
states = ["Karnataka", "Madhya Pradesh", "Gujarat", "Uttar Pradesh", "Telangana"]
|
| 147 |
+
|
| 148 |
+
for state in states:
|
| 149 |
+
print(f"\n--- Processing State: {state} ---")
|
| 150 |
+
query_filter = {"State Name": state}
|
| 151 |
+
df = fetch_and_process_data(query_filter)
|
| 152 |
+
|
| 153 |
+
if df is not None:
|
| 154 |
+
filter_key = f"state_{state}"
|
| 155 |
+
for days in FORECAST_HORIZONS:
|
| 156 |
+
horizon_name = "14 days" if days == 14 else "1 month" if days == 30 else "3 months"
|
| 157 |
+
print(f"[{state}] Training {horizon_name} forecast model...")
|
| 158 |
+
try:
|
| 159 |
+
best_params, rmse, mae = train_model_batch(df, filter_key, days)
|
| 160 |
+
print(f"✅ [{state}] {horizon_name} model updated successfully")
|
| 161 |
+
print(f" RMSE: {rmse:.2f}, MAE: {mae:.2f}")
|
| 162 |
+
except Exception as e:
|
| 163 |
+
print(f"❌ [{state}] Error updating {horizon_name} model: {e}")
|
| 164 |
+
else:
|
| 165 |
+
print(f"❌ [{state}] No data available")
|
| 166 |
+
|
| 167 |
+
|
| 168 |
+
def update_market_models():
|
| 169 |
+
"""Update models for specific markets."""
|
| 170 |
+
print("\n" + "="*60)
|
| 171 |
+
print("UPDATING MARKET MODELS")
|
| 172 |
+
print("="*60)
|
| 173 |
+
|
| 174 |
+
markets = ["Rajkot", "Gondal", "Kalburgi", "Amreli"]
|
| 175 |
+
|
| 176 |
+
for market in markets:
|
| 177 |
+
print(f"\n--- Processing Market: {market} ---")
|
| 178 |
+
query_filter = {"Market Name": market}
|
| 179 |
+
df = fetch_and_process_data(query_filter)
|
| 180 |
+
|
| 181 |
+
if df is not None:
|
| 182 |
+
filter_key = f"market_{market}"
|
| 183 |
+
for days in FORECAST_HORIZONS:
|
| 184 |
+
horizon_name = "14 days" if days == 14 else "1 month" if days == 30 else "3 months"
|
| 185 |
+
print(f"[{market}] Training {horizon_name} forecast model...")
|
| 186 |
+
try:
|
| 187 |
+
best_params, rmse, mae = train_model_batch(df, filter_key, days)
|
| 188 |
+
print(f"✅ [{market}] {horizon_name} model updated successfully")
|
| 189 |
+
print(f" RMSE: {rmse:.2f}, MAE: {mae:.2f}")
|
| 190 |
+
except Exception as e:
|
| 191 |
+
print(f"❌ [{market}] Error updating {horizon_name} model: {e}")
|
| 192 |
+
else:
|
| 193 |
+
print(f"❌ [{market}] No data available")
|
| 194 |
+
|
| 195 |
+
|
| 196 |
+
def main():
|
| 197 |
+
"""Main function to update all models."""
|
| 198 |
+
print("\n" + "🌾" * 30)
|
| 199 |
+
print("AGRIPREDICT - BATCH MODEL UPDATE")
|
| 200 |
+
print("🌾" * 30)
|
| 201 |
+
print("\nThis script will train and update all forecasting models.")
|
| 202 |
+
print("This may take several minutes to complete.\n")
|
| 203 |
+
|
| 204 |
+
try:
|
| 205 |
+
# Update India models
|
| 206 |
+
update_india_models()
|
| 207 |
+
|
| 208 |
+
# Update State models
|
| 209 |
+
update_state_models()
|
| 210 |
+
|
| 211 |
+
# Update Market models
|
| 212 |
+
update_market_models()
|
| 213 |
+
|
| 214 |
+
print("\n" + "="*60)
|
| 215 |
+
print("✅ ALL MODELS UPDATED SUCCESSFULLY")
|
| 216 |
+
print("="*60)
|
| 217 |
+
|
| 218 |
+
except KeyboardInterrupt:
|
| 219 |
+
print("\n\n⚠️ Process interrupted by user")
|
| 220 |
+
except Exception as e:
|
| 221 |
+
print(f"\n\n❌ Fatal error: {e}")
|
| 222 |
+
|
| 223 |
+
|
| 224 |
+
if __name__ == "__main__":
|
| 225 |
+
main()
|