ThejasRao commited on
Commit
a0a43bb
·
verified ·
1 Parent(s): a5fb5d2

Actual file

Browse files
Files changed (1) hide show
  1. app.py +1474 -232
app.py CHANGED
@@ -1,15 +1,27 @@
1
  import streamlit as st
2
- import plotly.graph_objects as go
3
- from pymongo import MongoClient
4
- from datetime import datetime, timedelta
5
  import pandas as pd
 
 
 
 
 
 
6
  from sklearn.preprocessing import MinMaxScaler
 
 
 
 
7
  import certifi
 
 
 
8
  import json
9
- import os
10
-
11
- mongo_uri = "mongodb+srv://Agripredict:TjXSvMhOis49qH8E@cluster0.gek7n.mongodb.net/?retryWrites=true&w=majority&appName=Cluster0"
 
12
 
 
13
  if not mongo_uri:
14
  st.error("MongoDB URI is not set!")
15
  st.stop()
@@ -18,257 +30,1487 @@ else:
18
  client = MongoClient(mongo_uri, tlsCAFile=certifi.where())
19
  db = client["AgriPredict"]
20
  collection = db["WhiteSesame"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
- # CSS to increase the width of the container
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  st.markdown("""
24
  <style>
25
- /* Adjust the width of the main container */
26
- .main {
27
- max-width: 1200px; /* Increase the width */
28
- margin: 0 auto; /* Center the container */
29
- }
30
 
31
- /* Main background */
32
- body {
33
- background-color: #f9f9f9;
 
34
  }
35
 
36
- /* Title styling */
37
- h1 {
38
- color: #4CAF50;
39
- font-family: 'Arial Black', sans-serif;
40
- }
41
-
42
- /* Buttons */
43
  .stButton>button {
44
- background-color: #4CAF50;
45
- color: white;
46
- font-size: 14px;
47
- border-radius: 8px;
48
- padding: 10px 20px;
49
- margin: 5px;
50
- white-space: nowrap;
51
- }
52
- .stButton>button:hover {
53
- background-color: #45a049;
54
  }
55
 
56
- /* Selectbox styling */
57
- .stSelectbox>div {
58
- padding: 10px;
59
- background-color: #ffffff;
60
- border: 1px solid #e6e6e6;
61
- border-radius: 8px;
62
  }
63
 
64
- /* Checkbox styling */
65
- .stCheckbox>label {
66
- font-size: 14px;
67
- color: #555;
 
 
 
 
 
 
 
 
 
68
  }
69
 
70
- /* Containers */
71
- .stContainer {
72
- border-radius: 12px;
73
- padding: 20px;
74
- background-color: #ffffff;
75
- box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1);
76
  }
77
 
78
- /* Chart area */
79
- .plotly-graph-div {
80
- border-radius: 12px;
81
- box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1);
82
  }
83
 
84
- /* Footer */
85
- footer {
86
- font-size: 12px;
87
- text-align: center;
88
- color: #888;
89
- padding: 10px;
 
 
 
90
  }
91
  </style>
92
  """, unsafe_allow_html=True)
 
 
93
 
94
- st.title("🌾 AgriPredict Dashboard")
95
-
96
- # Load the state-market dictionary from the JSON file
97
- with open('all_state_market_dict.json', 'r') as file:
98
- state_market_dict = json.load(file)
99
-
100
- # UI for Dashboard
101
- with st.container():
102
- with st.expander("AgriPredict Dashboard", expanded=True):
103
- # Adjust the columns to fit more elements within the container
104
- col1, col2, col3, col4, col5, col6, col7 = st.columns([1.5, 1.5, 1.5, 1.5, 1.5, 3, 3])
105
-
106
- # Buttons for periods
107
- with col1:
108
- if st.button('2W', key='2_weeks'):
109
- st.session_state.selected_period = 14
110
-
111
- with col2:
112
- if st.button('1M', key='1_month'):
113
- st.session_state.selected_period = 30
114
-
115
- with col3:
116
- if st.button('3M', key='3_months'):
117
- st.session_state.selected_period = 90
118
-
119
- with col4:
120
- if st.button('1Y', key='1_year'):
121
- st.session_state.selected_period = 365
122
-
123
- with col5:
124
- if st.button('5Y', key='5_year'):
125
- st.session_state.selected_period = 1825
126
-
127
- # Dropdown for states
128
- with col6:
129
- states = list(state_market_dict.keys())
130
- selected_state = st.selectbox(
131
- "Choose a state",
132
- states,
133
- key="state_selectbox",
134
- index=0
135
- )
136
-
137
- # Dropdown for selecting between Price, Volume, or Both
138
- with col7:
139
- data_type = st.selectbox(
140
- "Select Data Type",
141
- ["Price", "Volume", "Both"]
142
- )
143
 
144
- # Checkbox for market-wise analysis
145
- st.write("")
146
- with st.container():
147
- market_wise = st.checkbox("Market wise", key="market_checkbox")
148
-
149
- if market_wise:
150
- # Get markets for the selected state
151
- markets = state_market_dict.get(selected_state, [])
152
- selected_market = st.selectbox(
153
- "Choose a market",
154
- markets,
155
- key="market_selectbox",
156
- index=0
157
- )
158
- query_filter = {"state": selected_state, "Market Name": selected_market}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
159
  else:
160
- query_filter = {"state": selected_state}
161
-
 
 
 
 
 
 
162
  # Add date filtering based on selected period
163
- if 'selected_period' in st.session_state:
164
- days_period = st.session_state.selected_period
165
- query_filter["Reported Date"] = {
166
- "$gte": datetime.now() - timedelta(days=days_period)
167
- }
168
-
169
- # Fetch data from MongoDB
170
- try:
171
- cursor = collection.find(query_filter)
172
- data = list(cursor)
173
-
174
- if data:
175
- # Convert MongoDB data to a DataFrame
176
- df = pd.DataFrame(data)
177
- df['Reported Date'] = pd.to_datetime(df['Reported Date'])
178
-
179
- # Group by Reported Date
180
- df_grouped = (
181
- df.groupby('Reported Date', as_index=False)
182
- .agg({
183
- 'Arrivals (Tonnes)': 'sum',
184
- 'Modal Price (Rs./Quintal)': 'mean'
185
- })
186
- )
187
-
188
- # Create a complete date range
189
- date_range = pd.date_range(
190
- start=df_grouped['Reported Date'].min(),
191
- end=df_grouped['Reported Date'].max()
192
- )
193
- df_grouped = df_grouped.set_index('Reported Date').reindex(date_range).rename_axis('Reported Date').reset_index()
194
-
195
- # Fill missing values
196
- df_grouped['Arrivals (Tonnes)'] = df_grouped['Arrivals (Tonnes)'].fillna(
197
- method='ffill').fillna(method='bfill')
198
- df_grouped['Modal Price (Rs./Quintal)'] = df_grouped['Modal Price (Rs./Quintal)'].fillna(
199
- method='ffill').fillna(method='bfill')
200
-
201
- st.subheader(f"📈 Trend Graph for {selected_state} ({'Market: ' + selected_market if market_wise else 'State'})")
202
-
203
- if data_type == "Both":
204
- # Min-Max Scaling
205
- scaler = MinMaxScaler()
206
- df_grouped[['Scaled Price', 'Scaled Arrivals']] = scaler.fit_transform(
207
- df_grouped[['Modal Price (Rs./Quintal)', 'Arrivals (Tonnes)']]
208
- )
209
-
210
- fig = go.Figure()
211
-
212
- # Plot Scaled Price with actual values on hover
213
- fig.add_trace(go.Scatter(
214
- x=df_grouped['Reported Date'],
215
- y=df_grouped['Scaled Price'],
216
- mode='lines',
217
- name='Scaled Price',
218
- line=dict(width=1, color='green'),
219
- text=df_grouped['Modal Price (Rs./Quintal)'], # Actual Modal Price values
220
- hovertemplate='Date: %{x}<br>Scaled Price: %{y:.2f}<br>Actual Price: %{text:.2f}<extra></extra>'
221
- ))
222
-
223
- # Plot Scaled Arrivals with actual values on hover
224
- fig.add_trace(go.Scatter(
225
- x=df_grouped['Reported Date'],
226
- y=df_grouped['Scaled Arrivals'],
227
- mode='lines',
228
- name='Scaled Arrivals',
229
- line=dict(width=1, color='blue'),
230
- text=df_grouped['Arrivals (Tonnes)'], # Actual Arrivals values
231
- hovertemplate='Date: %{x}<br>Scaled Arrivals: %{y:.2f}<br>Actual Arrivals: %{text:.2f}<extra></extra>'
232
- ))
233
-
234
- fig.update_layout(
235
- title="Price and Arrivals Trend",
236
- xaxis_title='Date',
237
- yaxis_title='Scaled Values',
238
- template='plotly_white'
239
  )
240
- st.plotly_chart(fig)
241
-
242
- elif data_type == "Price":
243
- # Plot Modal Price
244
- fig = go.Figure()
245
- fig.add_trace(go.Scatter(
246
- x=df_grouped['Reported Date'],
247
- y=df_grouped['Modal Price (Rs./Quintal)'],
248
- mode='lines',
249
- name='Modal Price',
250
- line=dict(width=1, color='green')
251
- ))
252
- fig.update_layout(title="Modal Price Trend", xaxis_title='Date', yaxis_title='Price', template='plotly_white')
253
- st.plotly_chart(fig)
254
-
255
- elif data_type == "Volume":
256
- # Plot Arrivals (Tonnes)
257
- fig = go.Figure()
258
- fig.add_trace(go.Scatter(
259
- x=df_grouped['Reported Date'],
260
- y=df_grouped['Arrivals (Tonnes)'],
261
- mode='lines',
262
- name='Arrivals',
263
- line=dict(width=1, color='blue')
264
- ))
265
- fig.update_layout(title="Arrivals Trend", xaxis_title='Date', yaxis_title='Volume', template='plotly_white')
266
- st.plotly_chart(fig)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
267
 
 
 
 
 
 
 
 
268
  else:
269
- st.warning("⚠️ No relevant data found for the selected options.")
270
- else:
271
- st.warning("⚠️ No data found for the selected filters.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
272
 
273
- except Exception as e:
274
- st.error(f"❌ Error fetching data: {e}")
 
 
 
 
 
 
 
1
  import streamlit as st
 
 
 
2
  import pandas as pd
3
+ import numpy as np
4
+ from pymongo import MongoClient
5
+ from sklearn.model_selection import train_test_split, GridSearchCV
6
+ from sklearn.metrics import mean_squared_error, mean_absolute_error
7
+ from xgboost import XGBRegressor
8
+ from st_aggrid import AgGrid, GridOptionsBuilder, DataReturnMode, GridUpdateMode
9
  from sklearn.preprocessing import MinMaxScaler
10
+ from datetime import datetime, timedelta
11
+ import plotly.express as px
12
+ import plotly.graph_objects as go
13
+ import calendar
14
  import certifi
15
+ import requests
16
+ from werkzeug.security import generate_password_hash, check_password_hash
17
+ from bs4 import BeautifulSoup
18
  import json
19
+ from itertools import product
20
+ from tqdm import tqdm
21
+ import io
22
+ from statsmodels.tsa.statespace.sarimax import SARIMAX
23
 
24
+ mongo_uri = st.secrets["mongodb+srv://Agripredict:TjXSvMhOis49qH8E@cluster0.gek7n.mongodb.net/?retryWrites=true&w=majority&appName=Cluster0"]
25
  if not mongo_uri:
26
  st.error("MongoDB URI is not set!")
27
  st.stop()
 
30
  client = MongoClient(mongo_uri, tlsCAFile=certifi.where())
31
  db = client["AgriPredict"]
32
  collection = db["WhiteSesame"]
33
+ best_params_collection = db["BestParams"]
34
+ best_params_collection_1m = db["BestParams_1m"]
35
+ best_params_collection_3m = db["BestParams_3m"]
36
+ impExp = db["impExp"]
37
+ users_collection = db["user"]
38
+
39
+ state_market_dict = {
40
+ "Karnataka": [
41
+ "Kalburgi",
42
+ "Basava Kalayana",
43
+ "Lingasugur",
44
+ "Kustagi",
45
+ "Bangalore",
46
+ "Bagalakot",
47
+ "Hubli (Amaragol)"
48
+ ],
49
+ "Gujarat": [
50
+ "Siddhpur",
51
+ "Jasdan",
52
+ "Gondal",
53
+ "Morbi",
54
+ "Botad",
55
+ "Visavadar",
56
+ "Dahod",
57
+ "Rajkot",
58
+ "Junagadh",
59
+ "Savarkundla",
60
+ "Bhavnagar",
61
+ "Rajula",
62
+ "Dhoraji",
63
+ "Amreli",
64
+ "Mahuva(Station Road)",
65
+ "Mansa",
66
+ "Porbandar",
67
+ "Dasada Patadi",
68
+ "Halvad",
69
+ "Chotila",
70
+ "Bhanvad",
71
+ "Dhansura",
72
+ "Babra",
73
+ "Upleta",
74
+ "Palitana",
75
+ "Jetpur(Dist.Rajkot)",
76
+ "S.Mandvi",
77
+ "Mandvi",
78
+ "Khambha",
79
+ "Kadi",
80
+ "Taleja",
81
+ "Himatnagar",
82
+ "Lakhani",
83
+ "Rapar",
84
+ "Una",
85
+ "Dhari",
86
+ "Bagasara",
87
+ "Jam Jodhpur",
88
+ "Veraval",
89
+ "Dhragradhra",
90
+ "Deesa"
91
+ ],
92
+ "Uttar Pradesh": [
93
+ "Bangarmau",
94
+ "Sultanpur",
95
+ "Maudaha",
96
+ "Mauranipur",
97
+ "Lalitpur",
98
+ "Konch",
99
+ "Muskara",
100
+ "Raath",
101
+ "Varipaal",
102
+ "Auraiya",
103
+ "Orai",
104
+ "Banda",
105
+ "Kishunpur",
106
+ "Ait",
107
+ "Jhansi",
108
+ "Kurara",
109
+ "Chirgaon",
110
+ "Charkhari",
111
+ "Moth",
112
+ "Jalaun",
113
+ "Sirsaganj",
114
+ "Shikohabad"
115
+ ],
116
+ "Madhya Pradesh": [
117
+ "Naugaon",
118
+ "Mehar",
119
+ "Kailaras",
120
+ "Datia",
121
+ "LavKush Nagar(Laundi)",
122
+ "Ajaygarh",
123
+ "Rajnagar",
124
+ "Sevda",
125
+ "Neemuch",
126
+ "Sheopurkalan",
127
+ "Lashkar",
128
+ "Alampur",
129
+ "Niwadi",
130
+ "Dabra",
131
+ "Ujjain",
132
+ "Bijawar",
133
+ "Sidhi",
134
+ "Barad",
135
+ "Pohari",
136
+ "Shahagarh",
137
+ "Lateri",
138
+ "Banapura",
139
+ "Panna",
140
+ "Garhakota",
141
+ "Katni",
142
+ "Chhatarpur",
143
+ "Beohari",
144
+ "Satna",
145
+ "Sabalgarh",
146
+ "Hanumana",
147
+ "Bhander",
148
+ "Banmorkalan",
149
+ "Jaora",
150
+ "Bagli",
151
+ "Singroli"
152
+ ],
153
+ "Telangana": [
154
+ "Warangal"
155
+ ]
156
+ }
157
+ def create_forecasting_features(df):
158
+ df = df.copy()
159
+ if not isinstance(df.index, pd.DatetimeIndex):
160
+ df = df.set_index('Reported Date')
161
+ df.index = pd.to_datetime(df.index)
162
+
163
+ target_map = df['Modal Price (Rs./Quintal)'].to_dict()
164
+
165
+ df['dayofweek'] = df.index.dayofweek
166
+ df['quarter'] = df.index.quarter
167
+ df['month'] = df.index.month
168
+ df['year'] = df.index.year
169
+ df['dayofyear'] = df.index.dayofyear
170
+ df['weekofyear'] = df.index.isocalendar().week
171
+
172
+ df['lag14'] = (df.index - pd.Timedelta(days=14)).map(target_map)
173
+ df['lag28'] = (df.index - pd.Timedelta(days=28)).map(target_map)
174
+ df['lag56'] = (df.index - pd.Timedelta(days=56)).map(target_map)
175
+ df['lag_3months'] = (df.index - pd.DateOffset(months=3)).map(target_map)
176
+ df['lag_6months'] = (df.index - pd.DateOffset(months=6)).map(target_map)
177
+ for window in [7, 14, 28]:
178
+ df[f'rolling_mean_{window}'] = df['Modal Price (Rs./Quintal)'].rolling(window=window, min_periods=1).mean()
179
+ df[f'rolling_std_{window}'] = df['Modal Price (Rs./Quintal)'].rolling(window=window, min_periods=1).std()
180
+
181
+ df['ema7'] = df['Modal Price (Rs./Quintal)'].ewm(span=7, adjust=False).mean()
182
+ df['ema14'] = df['Modal Price (Rs./Quintal)'].ewm(span=14, adjust=False).mean()
183
+ df['monthly_avg'] = df.groupby('month')['Modal Price (Rs./Quintal)'].transform('mean')
184
+ df['weekly_avg'] = df.groupby('weekofyear')['Modal Price (Rs./Quintal)'].transform('mean')
185
+ df['dayofweek_avg'] = df.groupby('dayofweek')['Modal Price (Rs./Quintal)'].transform('mean')
186
+
187
+ df['fourier_sin_365'] = np.sin(2 * np.pi * df.index.dayofyear / 365)
188
+ df['fourier_cos_365'] = np.cos(2 * np.pi * df.index.dayofyear / 365)
189
+ df['fourier_sin_14'] = np.sin(2 * np.pi * df.index.dayofyear / 14)
190
+ df['fourier_cos_14'] = np.cos(2 * np.pi * df.index.dayofyear / 14)
191
+
192
+ df['recent_min_14'] = (df.index - pd.Timedelta(days=14)).map(target_map).min()
193
+ df['recent_max_14'] = (df.index - pd.Timedelta(days=14)).map(target_map).max()
194
+ df['recent_range_14'] = df['recent_max_14'] - df['recent_min_14']
195
+
196
+ df['yearly_avg'] = df.groupby('year')['Modal Price (Rs./Quintal)'].transform('mean')
197
+ df['cumulative_mean'] = df['Modal Price (Rs./Quintal)'].expanding().mean()
198
+
199
+ return df.reset_index()
200
+
201
+ def create_forecasting_features_1m(df):
202
+ df = df.copy()
203
+ if not isinstance(df.index, pd.DatetimeIndex):
204
+ df = df.set_index('Reported Date')
205
+ df.index = pd.to_datetime(df.index)
206
+
207
+ target_map = df['Modal Price (Rs./Quintal)'].to_dict()
208
+
209
+ df['dayofweek'] = df.index.dayofweek
210
+ df['quarter'] = df.index.quarter
211
+ df['month'] = df.index.month
212
+ df['year'] = df.index.year
213
+ df['dayofyear'] = df.index.dayofyear
214
+ df['weekofyear'] = df.index.isocalendar().week
215
+
216
+ df['lag_30'] = (df.index - pd.Timedelta(days=30)).map(target_map)
217
+ df['lag_60'] = (df.index - pd.Timedelta(days=60)).map(target_map)
218
+ df['lag_90'] = (df.index - pd.Timedelta(days=90)).map(target_map)
219
+ df['lag_6months'] = (df.index - pd.DateOffset(months=6)).map(target_map)
220
+ df['lag_12months'] = (df.index - pd.DateOffset(months=12)).map(target_map)
221
+
222
+ for window in [30, 60, 90]:
223
+ df[f'rolling_mean_{window}'] = df['Modal Price (Rs./Quintal)'].rolling(window=window, min_periods=1).mean()
224
+ df[f'rolling_std_{window}'] = df['Modal Price (Rs./Quintal)'].rolling(window=window, min_periods=1).std()
225
+
226
+ df['ema_30'] = df['Modal Price (Rs./Quintal)'].ewm(span=30, adjust=False).mean()
227
+ df['ema_60'] = df['Modal Price (Rs./Quintal)'].ewm(span=60, adjust=False).mean()
228
+
229
+ df['monthly_avg'] = df.groupby('month')['Modal Price (Rs./Quintal)'].transform('mean')
230
+ df['weekly_avg'] = df.groupby('weekofyear')['Modal Price (Rs./Quintal)'].transform('mean')
231
+ df['dayofweek_avg'] = df.groupby('dayofweek')['Modal Price (Rs./Quintal)'].transform('mean')
232
+
233
+ df['fourier_sin_365'] = np.sin(2 * np.pi * df.index.dayofyear / 365)
234
+ df['fourier_cos_365'] = np.cos(2 * np.pi * df.index.dayofyear / 365)
235
+ df['fourier_sin_30'] = np.sin(2 * np.pi * df.index.dayofyear / 30)
236
+ df['fourier_cos_30'] = np.cos(2 * np.pi * df.index.dayofyear / 30)
237
+
238
+ df['recent_min_30'] = (df.index - pd.Timedelta(days=30)).map(target_map).min()
239
+ df['recent_max_30'] = (df.index - pd.Timedelta(days=30)).map(target_map).max()
240
+ df['recent_range_30'] = df['recent_max_30'] - df['recent_min_30']
241
+
242
+ df['yearly_avg'] = df.groupby('year')['Modal Price (Rs./Quintal)'].transform('mean')
243
+ df['cumulative_mean'] = df['Modal Price (Rs./Quintal)'].expanding().mean()
244
+
245
+ return df.reset_index()
246
+
247
+ def create_forecasting_features_3m(df):
248
+ df = df.copy()
249
+ if not isinstance(df.index, pd.DatetimeIndex):
250
+ df = df.set_index('Reported Date')
251
+ df.index = pd.to_datetime(df.index)
252
+
253
+ target_map = df['Modal Price (Rs./Quintal)'].to_dict()
254
+
255
+ df['dayofweek'] = df.index.dayofweek
256
+ df['quarter'] = df.index.quarter
257
+ df['month'] = df.index.month
258
+ df['year'] = df.index.year
259
+ df['dayofyear'] = df.index.dayofyear
260
+ df['weekofyear'] = df.index.isocalendar().week
261
+
262
+ df['lag_3months'] = (df.index - pd.DateOffset(months=3)).map(target_map)
263
+ df['lag_6months'] = (df.index - pd.DateOffset(months=6)).map(target_map)
264
+ df['lag_9months'] = (df.index - pd.DateOffset(months=9)).map(target_map)
265
+ df['lag_12months'] = (df.index - pd.DateOffset(months=12)).map(target_map)
266
+
267
+ for window in [90, 180, 270, 365]:
268
+ df[f'rolling_mean_{window}'] = df['Modal Price (Rs./Quintal)'].rolling(window=window, min_periods=1).mean()
269
+ df[f'rolling_std_{window}'] = df['Modal Price (Rs./Quintal)'].rolling(window=window, min_periods=1).std()
270
+
271
+ df['ema90'] = df['Modal Price (Rs./Quintal)'].ewm(span=90, adjust=False).mean()
272
+ df['ema180'] = df['Modal Price (Rs./Quintal)'].ewm(span=180, adjust=False).mean()
273
+ df['monthly_avg'] = df.groupby('month')['Modal Price (Rs./Quintal)'].transform('mean')
274
+ df['weekly_avg'] = df.groupby('weekofyear')['Modal Price (Rs./Quintal)'].transform('mean')
275
+ df['dayofweek_avg'] = df.groupby('dayofweek')['Modal Price (Rs./Quintal)'].transform('mean')
276
+
277
+ df['fourier_sin_90'] = np.sin(2 * np.pi * df.index.dayofyear / 90)
278
+ df['fourier_cos_90'] = np.cos(2 * np.pi * df.index.dayofyear / 90)
279
+ df['fourier_sin_30'] = np.sin(2 * np.pi * df.index.dayofyear / 30)
280
+ df['fourier_cos_30'] = np.cos(2 * np.pi * df.index.dayofyear / 30)
281
+
282
+ df['recent_min_90'] = (df.index - pd.Timedelta(days=90)).map(target_map).min()
283
+ df['recent_max_90'] = (df.index - pd.Timedelta(days=90)).map(target_map).max()
284
+ df['recent_range_90'] = df['recent_max_90'] - df['recent_min_90']
285
+
286
+ df['yearly_avg'] = df.groupby('year')['Modal Price (Rs./Quintal)'].transform('mean')
287
+ df['cumulative_mean'] = df['Modal Price (Rs./Quintal)'].expanding().mean()
288
+
289
+ return df.reset_index()
290
+
291
+
292
+ def preprocess_data(df):
293
+ # Retain only 'Reported Date' and 'Modal Price (Rs./Quintal)' columns
294
+ df = df[['Reported Date', 'Modal Price (Rs./Quintal)']]
295
+
296
+ # Ensure 'Reported Date' is in datetime format
297
+ df['Reported Date'] = pd.to_datetime(df['Reported Date'])
298
+
299
+ # Group by 'Reported Date' and calculate mean of 'Modal Price (Rs./Quintal)'
300
+ df = df.groupby('Reported Date', as_index=False).mean()
301
+
302
+ # Generate a full date range from the minimum to the maximum date
303
+ full_date_range = pd.date_range(df['Reported Date'].min(), df['Reported Date'].max())
304
+ df = df.set_index('Reported Date').reindex(full_date_range).rename_axis('Reported Date').reset_index()
305
+
306
+ # Detect and remove outliers for every 30 days
307
+ df['Modal Price (Rs./Quintal)'] = (
308
+ df['Modal Price (Rs./Quintal)'].fillna(method='ffill').fillna(method='bfill')
309
+ )
310
+ return df
311
+
312
+ def train_and_evaluate(df):
313
+ import streamlit as st
314
+
315
+ # Add progress bar for hyperparameter tuning
316
+ progress_bar = st.progress(0)
317
+
318
+ # Helper function to update progress during hyperparameter tuning
319
+ def update_tuning_progress(current, total):
320
+ progress = int((current / total) * 100)
321
+ progress_bar.progress(progress)
322
+
323
+ df = create_forecasting_features(df)
324
+
325
+ # Split the data into training and testing sets
326
+ train_df = df[df['Reported Date'] < '2024-01-01']
327
+ test_df = df[df['Reported Date'] >= '2024-01-01']
328
+
329
+ X_train = train_df.drop(columns=['Modal Price (Rs./Quintal)', 'Reported Date'])
330
+ y_train = train_df['Modal Price (Rs./Quintal)']
331
+ X_test = test_df.drop(columns=['Modal Price (Rs./Quintal)', 'Reported Date'])
332
+ y_test = test_df['Modal Price (Rs./Quintal)']
333
+
334
+ # Hyperparameter tuning
335
+ st.write("Performing hyperparameter tuning...")
336
+ param_grid = {
337
+ 'learning_rate': [0.01, 0.1, 0.2],
338
+ 'max_depth': [3, 5, 7],
339
+ 'n_estimators': [50, 100, 150],
340
+ 'booster': ['gbtree', 'dart']
341
+ }
342
+
343
+ model = XGBRegressor()
344
+ param_combinations = len(param_grid['learning_rate']) * len(param_grid['max_depth']) * \
345
+ len(param_grid['n_estimators']) * len(param_grid['booster'])
346
+
347
+ current_combination = 0 # Counter for combinations
348
+
349
+ def custom_grid_search():
350
+ nonlocal current_combination
351
+ best_score = float('-inf')
352
+ best_params = None
353
+ for learning_rate in param_grid['learning_rate']:
354
+ for max_depth in param_grid['max_depth']:
355
+ for n_estimators in param_grid['n_estimators']:
356
+ for booster in param_grid['booster']:
357
+ model.set_params(
358
+ learning_rate=learning_rate,
359
+ max_depth=max_depth,
360
+ n_estimators=n_estimators,
361
+ booster=booster
362
+ )
363
+ model.fit(X_train, y_train)
364
+ score = model.score(X_test, y_test)
365
+ if score > best_score:
366
+ best_score = score
367
+ best_params = {
368
+ 'learning_rate': learning_rate,
369
+ 'max_depth': max_depth,
370
+ 'n_estimators': n_estimators,
371
+ 'booster': booster
372
+ }
373
+ # Update progress bar
374
+ current_combination += 1
375
+ update_tuning_progress(current_combination, param_combinations)
376
+ return best_params
377
+
378
+ best_params = custom_grid_search()
379
+
380
+ # Train the best model with the identified parameters
381
+ st.write("Training the best model and making predictions...")
382
+ best_model = XGBRegressor(**best_params)
383
+ best_model.fit(X_train, y_train)
384
+ y_pred = best_model.predict(X_test)
385
+
386
+ # Metrics
387
+ rmse = mean_squared_error(y_test, y_pred, squared=False)
388
+ mae = mean_absolute_error(y_test, y_pred)
389
+ st.write(f"RMSE: {rmse}")
390
+ st.write(f"MAE: {mae}")
391
+
392
+ # Prepare data for plotting
393
+ train_plot_df = train_df[['Reported Date', 'Modal Price (Rs./Quintal)']].copy()
394
+ train_plot_df['Type'] = 'Train'
395
+
396
+ test_plot_df = test_df[['Reported Date', 'Modal Price (Rs./Quintal)']].copy()
397
+ test_plot_df['Type'] = 'Test'
398
+
399
+ predicted_plot_df = test_df[['Reported Date']].copy()
400
+ predicted_plot_df['Modal Price (Rs./Quintal)'] = y_pred
401
+ predicted_plot_df['Type'] = 'Predicted'
402
+
403
+ plot_df = pd.concat([train_plot_df, test_plot_df, predicted_plot_df])
404
+
405
+ fig = go.Figure()
406
+
407
+ for plot_type, color, dash in [('Train', 'blue', None), ('Test', 'orange', None),
408
+ ('Predicted', 'green', 'dot')]:
409
+ data = plot_df[plot_df['Type'] == plot_type]
410
+ fig.add_trace(go.Scatter(
411
+ x=data['Reported Date'],
412
+ y=data['Modal Price (Rs./Quintal)'],
413
+ mode='lines',
414
+ name=f"{plot_type} Data",
415
+ line=dict(color=color, dash=dash)
416
+ ))
417
+
418
+ fig.update_layout(
419
+ title="Train, Test, and Predicted Data",
420
+ xaxis_title="Date",
421
+ yaxis_title="Modal Price (Rs./Quintal)",
422
+ template="plotly_white"
423
+ )
424
+
425
+ st.plotly_chart(fig, use_container_width=True)
426
+
427
+ # Return best parameters
428
+ return best_params
429
+
430
+ def train_and_evaluate_1m(df):
431
+ import streamlit as st
432
+ import pandas as pd
433
+ import plotly.graph_objects as go
434
+ from xgboost import XGBRegressor
435
+ from sklearn.metrics import mean_squared_error, mean_absolute_error
436
+
437
+ # Add progress bar for hyperparameter tuning
438
+ progress_bar = st.progress(0)
439
+
440
+ # Helper function to update progress during hyperparameter tuning
441
+ def update_tuning_progress(current, total):
442
+ progress = int((current / total) * 100)
443
+ progress_bar.progress(progress)
444
+
445
+ df = create_forecasting_features_1m(df)
446
+
447
+ # Define train-test split for a 1-month horizon
448
+ split_date = pd.to_datetime("2024-01-01")
449
+ test_horizon = pd.DateOffset(days=30) # 1-month horizon
450
+
451
+ train_df = df[df['Reported Date'] < split_date]
452
+ test_df = df[(df['Reported Date'] >= split_date) & (df['Reported Date'] < split_date + test_horizon)]
453
+
454
+ X_train = train_df.drop(columns=['Modal Price (Rs./Quintal)', 'Reported Date'])
455
+ y_train = train_df['Modal Price (Rs./Quintal)']
456
+ X_test = test_df.drop(columns=['Modal Price (Rs./Quintal)', 'Reported Date'])
457
+ y_test = test_df['Modal Price (Rs./Quintal)']
458
+
459
+ # Hyperparameter tuning
460
+ st.write("Performing hyperparameter tuning...")
461
+ param_grid = {
462
+ 'learning_rate': [0.01, 0.1, 0.2],
463
+ 'max_depth': [3, 5, 7],
464
+ 'n_estimators': [50, 100, 150],
465
+ 'booster': ['gbtree', 'dart']
466
+ }
467
+
468
+ model = XGBRegressor()
469
+ param_combinations = len(param_grid['learning_rate']) * len(param_grid['max_depth']) * \
470
+ len(param_grid['n_estimators']) * len(param_grid['booster'])
471
+
472
+ current_combination = 0 # Counter for combinations
473
+
474
+ def custom_grid_search():
475
+ nonlocal current_combination
476
+ best_score = float('-inf')
477
+ best_params = None
478
+ for learning_rate in param_grid['learning_rate']:
479
+ for max_depth in param_grid['max_depth']:
480
+ for n_estimators in param_grid['n_estimators']:
481
+ for booster in param_grid['booster']:
482
+ model.set_params(
483
+ learning_rate=learning_rate,
484
+ max_depth=max_depth,
485
+ n_estimators=n_estimators,
486
+ booster=booster
487
+ )
488
+ model.fit(X_train, y_train)
489
+ score = model.score(X_test, y_test)
490
+ if score > best_score:
491
+ best_score = score
492
+ best_params = {
493
+ 'learning_rate': learning_rate,
494
+ 'max_depth': max_depth,
495
+ 'n_estimators': n_estimators,
496
+ 'booster': booster
497
+ }
498
+ # Update progress bar
499
+ current_combination += 1
500
+ update_tuning_progress(current_combination, param_combinations)
501
+ return best_params
502
+
503
+ best_params = custom_grid_search()
504
+
505
+ # Train the best model with the identified parameters
506
+ st.write("Training the best model and making predictions...")
507
+ best_model = XGBRegressor(**best_params)
508
+ best_model.fit(X_train, y_train)
509
+ y_pred = best_model.predict(X_test)
510
+
511
+ # Metrics
512
+ rmse = mean_squared_error(y_test, y_pred, squared=False)
513
+ mae = mean_absolute_error(y_test, y_pred)
514
+ st.write(f"RMSE: {rmse}")
515
+ st.write(f"MAE: {mae}")
516
+
517
+ # Prepare data for plotting
518
+ train_plot_df = train_df[['Reported Date', 'Modal Price (Rs./Quintal)']].copy()
519
+ train_plot_df['Type'] = 'Train'
520
+
521
+ test_plot_df = test_df[['Reported Date', 'Modal Price (Rs./Quintal)']].copy()
522
+ test_plot_df['Type'] = 'Test'
523
+
524
+ predicted_plot_df = test_df[['Reported Date']].copy()
525
+ predicted_plot_df['Modal Price (Rs./Quintal)'] = y_pred
526
+ predicted_plot_df['Type'] = 'Predicted'
527
+
528
+ plot_df = pd.concat([train_plot_df, test_plot_df, predicted_plot_df])
529
+
530
+ fig = go.Figure()
531
+
532
+ for plot_type, color, dash in [('Train', 'blue', None), ('Test', 'orange', None),
533
+ ('Predicted', 'green', 'dot')]:
534
+ data = plot_df[plot_df['Type'] == plot_type]
535
+ fig.add_trace(go.Scatter(
536
+ x=data['Reported Date'],
537
+ y=data['Modal Price (Rs./Quintal)'],
538
+ mode='lines',
539
+ name=f"{plot_type} Data",
540
+ line=dict(color=color, dash=dash)
541
+ ))
542
+
543
+ fig.update_layout(
544
+ title="Train, Test, and Predicted Data",
545
+ xaxis_title="Date",
546
+ yaxis_title="Modal Price (Rs./Quintal)",
547
+ template="plotly_white"
548
+ )
549
+
550
+ st.plotly_chart(fig, use_container_width=True)
551
+
552
+ # Return best parameters
553
+ return best_params
554
+
555
+ def train_and_evaluate_3m(df):
556
+ import streamlit as st
557
+
558
+ # Add progress bar for hyperparameter tuning
559
+ progress_bar = st.progress(0)
560
+
561
+ # Helper function to update progress during hyperparameter tuning
562
+ def update_tuning_progress(current, total):
563
+ progress = int((current / total) * 100)
564
+ progress_bar.progress(progress)
565
+
566
+ df = create_forecasting_features_3m(df)
567
+ train_df = df[df['Reported Date'] < '2023-10-01']
568
+ test_df = df[df['Reported Date'] >= '2023-10-01']
569
+
570
+ X_train = train_df.drop(columns=['Modal Price (Rs./Quintal)', 'Reported Date'])
571
+ y_train = train_df['Modal Price (Rs./Quintal)']
572
+ X_test = test_df.drop(columns=['Modal Price (Rs./Quintal)', 'Reported Date'])
573
+ y_test = test_df['Modal Price (Rs./Quintal)']
574
+
575
+ # Hyperparameter tuning
576
+ st.write("Performing hyperparameter tuning...")
577
+ param_grid = {
578
+ 'learning_rate': [0.01, 0.1, 0.2],
579
+ 'max_depth': [3, 5, 7],
580
+ 'n_estimators': [50, 100, 150],
581
+ 'booster': ['gbtree', 'dart']
582
+ }
583
+
584
+ model = XGBRegressor()
585
+ param_combinations = len(param_grid['learning_rate']) * len(param_grid['max_depth']) * \
586
+ len(param_grid['n_estimators']) * len(param_grid['booster'])
587
+
588
+ current_combination = 0 # Counter for combinations
589
+
590
+ def custom_grid_search():
591
+ nonlocal current_combination
592
+ best_score = float('-inf')
593
+ best_params = None
594
+ for learning_rate in param_grid['learning_rate']:
595
+ for max_depth in param_grid['max_depth']:
596
+ for n_estimators in param_grid['n_estimators']:
597
+ for booster in param_grid['booster']:
598
+ model.set_params(
599
+ learning_rate=learning_rate,
600
+ max_depth=max_depth,
601
+ n_estimators=n_estimators,
602
+ booster=booster
603
+ )
604
+ model.fit(X_train, y_train)
605
+ score = model.score(X_test, y_test)
606
+ if score > best_score:
607
+ best_score = score
608
+ best_params = {
609
+ 'learning_rate': learning_rate,
610
+ 'max_depth': max_depth,
611
+ 'n_estimators': n_estimators,
612
+ 'booster': booster
613
+ }
614
+ # Update progress bar
615
+ current_combination += 1
616
+ update_tuning_progress(current_combination, param_combinations)
617
+ return best_params
618
+
619
+ best_params = custom_grid_search()
620
+
621
+ # Train the best model with the identified parameters
622
+ st.write("Training the best model and making predictions...")
623
+ best_model = XGBRegressor(**best_params)
624
+ best_model.fit(X_train, y_train)
625
+ y_pred = best_model.predict(X_test)
626
+
627
+ # Metrics
628
+ rmse = mean_squared_error(y_test, y_pred, squared=False)
629
+ mae = mean_absolute_error(y_test, y_pred)
630
+ st.write(f"RMSE: {rmse}")
631
+ st.write(f"MAE: {mae}")
632
+
633
+ # Prepare data for plotting
634
+ train_plot_df = train_df[['Reported Date', 'Modal Price (Rs./Quintal)']].copy()
635
+ train_plot_df['Type'] = 'Train'
636
+
637
+ test_plot_df = test_df[['Reported Date', 'Modal Price (Rs./Quintal)']].copy()
638
+ test_plot_df['Type'] = 'Test'
639
+
640
+ predicted_plot_df = test_df[['Reported Date']].copy()
641
+ predicted_plot_df['Modal Price (Rs./Quintal)'] = y_pred
642
+ predicted_plot_df['Type'] = 'Predicted'
643
+
644
+ plot_df = pd.concat([train_plot_df, test_plot_df, predicted_plot_df])
645
+
646
+ fig = go.Figure()
647
+
648
+ for plot_type, color, dash in [('Train', 'blue', None), ('Test', 'orange', None),
649
+ ('Predicted', 'green', 'dot')]:
650
+ data = plot_df[plot_df['Type'] == plot_type]
651
+ fig.add_trace(go.Scatter(
652
+ x=data['Reported Date'],
653
+ y=data['Modal Price (Rs./Quintal)'],
654
+ mode='lines',
655
+ name=f"{plot_type} Data",
656
+ line=dict(color=color, dash=dash)
657
+ ))
658
+
659
+ fig.update_layout(
660
+ title="Train, Test, and Predicted Data",
661
+ xaxis_title="Date",
662
+ yaxis_title="Modal Price (Rs./Quintal)",
663
+ template="plotly_white"
664
+ )
665
+
666
+ st.plotly_chart(fig, use_container_width=True)
667
+
668
+ # Return best parameters
669
+ return best_params
670
+
671
+ def forecast_next_14_days(df, _best_params, key):
672
+ last_date = df['Reported Date'].max()
673
+ future_dates = pd.date_range(start=last_date + pd.Timedelta(days=1), periods=14)
674
+ future_df = pd.DataFrame({'Reported Date': future_dates})
675
+
676
+ # Assuming 'create_forecasting_features' function is defined elsewhere
677
+ full_df = pd.concat([df, future_df], ignore_index=True)
678
+ full_df = create_forecasting_features(full_df)
679
+
680
+ original_df = full_df[full_df['Reported Date'] <= last_date]
681
+ future_df = full_df[full_df['Reported Date'] > last_date]
682
+
683
+ X_train = original_df.drop(columns=['Modal Price (Rs./Quintal)', 'Reported Date'], errors='ignore')
684
+ y_train = original_df['Modal Price (Rs./Quintal)']
685
+ X_future = future_df.drop(columns=['Modal Price (Rs./Quintal)', 'Reported Date'], errors='ignore')
686
+
687
+ model = XGBRegressor(**_best_params)
688
+ model.fit(X_train, y_train)
689
+
690
+ future_predictions = model.predict(X_future)
691
+ future_df['Modal Price (Rs./Quintal)'] = future_predictions
692
+
693
+ # Pass model to plot_data
694
+ plot_data(original_df, future_df, last_date, model, 14)
695
+ download_button(future_df, key)
696
+
697
+ def forecast_next_30_days(df, _best_params, key):
698
+ last_date = df['Reported Date'].max()
699
+ future_dates = pd.date_range(start=last_date + pd.Timedelta(days=1), periods=30)
700
+ future_df = pd.DataFrame({'Reported Date': future_dates})
701
+
702
+ # Assuming 'create_forecasting_features' function is defined elsewhere
703
+ full_df = pd.concat([df, future_df], ignore_index=True)
704
+ full_df = create_forecasting_features_1m(full_df)
705
+
706
+ original_df = full_df[full_df['Reported Date'] <= last_date]
707
+ future_df = full_df[full_df['Reported Date'] > last_date]
708
+
709
+ X_train = original_df.drop(columns=['Modal Price (Rs./Quintal)', 'Reported Date'], errors='ignore')
710
+ y_train = original_df['Modal Price (Rs./Quintal)']
711
+ X_future = future_df.drop(columns=['Modal Price (Rs./Quintal)', 'Reported Date'], errors='ignore')
712
 
713
+ model = XGBRegressor(**_best_params)
714
+ model.fit(X_train, y_train)
715
+
716
+ future_predictions = model.predict(X_future)
717
+ future_df['Modal Price (Rs./Quintal)'] = future_predictions
718
+
719
+ # Pass model to plot_data
720
+ plot_data(original_df, future_df, last_date, model, 30)
721
+ download_button(future_df, key)
722
+
723
+ def forecast_next_90_days(df, _best_params, key):
724
+ last_date = df['Reported Date'].max()
725
+ future_dates = pd.date_range(start=last_date + pd.Timedelta(days=1), periods=90)
726
+ future_df = pd.DataFrame({'Reported Date': future_dates})
727
+
728
+ # Assuming 'create_forecasting_features' function is defined elsewhere
729
+ full_df = pd.concat([df, future_df], ignore_index=True)
730
+ full_df = create_forecasting_features_3m(full_df)
731
+
732
+ original_df = full_df[full_df['Reported Date'] <= last_date]
733
+ future_df = full_df[full_df['Reported Date'] > last_date]
734
+
735
+ X_train = original_df.drop(columns=['Modal Price (Rs./Quintal)', 'Reported Date'], errors='ignore')
736
+ y_train = original_df['Modal Price (Rs./Quintal)']
737
+ X_future = future_df.drop(columns=['Modal Price (Rs./Quintal)', 'Reported Date'], errors='ignore')
738
+
739
+ model = XGBRegressor(**_best_params)
740
+ model.fit(X_train, y_train)
741
+
742
+ future_predictions = model.predict(X_future)
743
+ future_df['Modal Price (Rs./Quintal)'] = future_predictions
744
+
745
+ # Pass model to plot_data
746
+ plot_data(original_df, future_df, last_date, model, 90)
747
+ download_button(future_df, key)
748
+
749
+ def plot_data(original_df, future_df, last_date, model, days):
750
+ actual_last_df = original_df[original_df['Reported Date'] > (last_date - pd.Timedelta(days=days))]
751
+ predicted_plot_df = actual_last_df[['Reported Date']].copy()
752
+ predicted_plot_df['Modal Price (Rs./Quintal)'] = model.predict(
753
+ actual_last_df.drop(columns=['Modal Price (Rs./Quintal)', 'Reported Date'], errors='ignore'))
754
+ predicted_plot_df['Type'] = 'Actual'
755
+
756
+ future_plot_df = future_df[['Reported Date', 'Modal Price (Rs./Quintal)']].copy()
757
+ future_plot_df['Type'] = 'Forecasted'
758
+ last_actual_point = predicted_plot_df.iloc[[-1]].copy()
759
+ last_actual_point['Type'] = 'Forecasted'
760
+ future_plot_df = pd.concat([last_actual_point, future_plot_df])
761
+ plot_df = pd.concat([predicted_plot_df, future_plot_df])
762
+
763
+ fig = go.Figure()
764
+ for plot_type, color, dash in [('Actual', 'blue', 'solid'), ('Forecasted', 'red', 'dash')]:
765
+ data = plot_df[plot_df['Type'] == plot_type]
766
+ fig.add_trace(go.Scatter(x=data['Reported Date'], y=data['Modal Price (Rs./Quintal)'], mode='lines', name=f"{plot_type} Data", line=dict(color=color, dash=dash)))
767
+ fig.update_layout(title="Actual vs Forecasted Modal Price (Rs./Quintal)", xaxis_title="Date", yaxis_title="Modal Price (Rs./Quintal)", template="plotly_white")
768
+ st.plotly_chart(fig, use_container_width=True)
769
+
770
+ def download_button(future_df, key):
771
+ # Create a new DataFrame with only 'Reported Date' and 'Modal Price (Rs./Quintal)'
772
+ download_df = future_df[['Reported Date', 'Modal Price (Rs./Quintal)']].copy()
773
+
774
+ # Format 'Reported Date' to display only the date in YYYY-MM-DD format
775
+ download_df['Reported Date'] = download_df['Reported Date'].dt.strftime('%Y-%m-%d')
776
+
777
+ # Write to Excel without the index
778
+ towrite = io.BytesIO()
779
+ download_df.to_excel(towrite, index=False, engine='xlsxwriter') # Using 'xlsxwriter' for the Excel engine
780
+ towrite.seek(0)
781
+
782
+ # Create a download button for the Excel file
783
+ st.download_button(label="Download Forecasted Values",
784
+ data=towrite,
785
+ file_name=f'forecasted_prices_{key}.xlsx',
786
+ mime='application/vnd.ms-excel')
787
+
788
+
789
+
790
+ def fetch_and_process_data(query_filter):
791
+ try:
792
+ cursor = collection.find(query_filter)
793
+ data = list(cursor)
794
+ if data:
795
+ df = pd.DataFrame(data)
796
+ st.write("Preprocessing data...")
797
+ df = preprocess_data(df)
798
+ return df
799
+ else:
800
+ st.warning("⚠️ No data found for the selected filter.")
801
+ return None
802
+ except Exception as e:
803
+ st.error(f"❌ Error fetching data: {e}")
804
+ return None
805
+
806
+ def save_best_params(collection, filter_key, best_params):
807
+ best_params["filter_key"] = filter_key
808
+ best_params["last_updated"] = datetime.now().isoformat()
809
+
810
+ existing_entry = collection.find_one({"filter_key": filter_key})
811
+ if existing_entry:
812
+ collection.replace_one({"filter_key": filter_key}, best_params)
813
+ else:
814
+ collection.insert_one(best_params)
815
+
816
+ # Function to retrieve best_params from MongoDB
817
+ def get_best_params(filter_key, collection):
818
+ record = collection.find_one({"filter_key": filter_key})
819
+ return record
820
+ # Function to handle training and forecasting
821
+ def train_and_forecast(df, filter_key, days):
822
+ if df is not None:
823
+ # Train the model and save parameters to MongoDB
824
+ if days==14:
825
+ best_params = train_and_evaluate(df)
826
+ save_best_params(filter_key, best_params, best_params_collection)
827
+ forecast_next_14_days(df, best_params, filter_key)
828
+ elif days==30:
829
+ best_params = train_and_evaluate_1m(df)
830
+ save_best_params(filter_key, best_params, best_params_collection_1m)
831
+ forecast_next_30_days(df, best_params, filter_key)
832
+ elif days==90:
833
+ best_params = train_and_evaluate_3m(df)
834
+ save_best_params(filter_key, best_params, best_params_collection_3m)
835
+ forecast_next_90_days(df, best_params, filter_key)
836
+
837
+ def forecast(df, filter_key, days):
838
+ if days==14:
839
+ record = get_best_params(filter_key, best_params_collection)
840
+ if record:
841
+ st.info(f"ℹ️ The model was trained on {record['last_updated']}.")
842
+ forecast_next_14_days(df, record, filter_key)
843
+ else:
844
+ st.warning("⚠️ Model is not trained yet. Please train the model first.")
845
+ if days==30:
846
+ record = get_best_params(filter_key, best_params_collection_1m)
847
+ if record:
848
+ st.info(f"ℹ️ The model was trained on {record['last_updated']}.")
849
+ forecast_next_30_days(df, record, filter_key)
850
+ else:
851
+ st.warning("⚠️ Model is not trained yet. Please train the model first.")
852
+ if days==90:
853
+ record = get_best_params(filter_key, best_params_collection_3m)
854
+ if record:
855
+ st.info(f"ℹ️ The model was trained on {record['last_updated']}.")
856
+ forecast_next_90_days(df, record, filter_key)
857
+ else:
858
+ st.warning("⚠️ Model is not trained yet. Please train the model first.")
859
+
860
+ def collection_to_dataframe(collection, drop_id=True):
861
+ """
862
+ Converts a MongoDB collection to a pandas DataFrame.
863
+
864
+ Args:
865
+ collection: MongoDB collection object.
866
+ drop_id (bool): Whether to drop the '_id' column. Default is True.
867
+
868
+ Returns:
869
+ pd.DataFrame: DataFrame containing the collection data.
870
+ """
871
+ # Fetch all documents from the collection
872
+ documents = list(collection.find())
873
+
874
+ # Convert to a pandas DataFrame
875
+ df = pd.DataFrame(documents)
876
+
877
+ # Drop the MongoDB "_id" column if specified
878
+ if drop_id and '_id' in df.columns:
879
+ df = df.drop(columns=['_id'])
880
+
881
+ return df
882
+
883
+
884
+
885
+ def editable_spreadsheet():
886
+ st.title("Sowing Report Prediction Model")
887
+
888
+ # Excel file uploader
889
+ uploaded_file = st.file_uploader("Upload your Excel file", type=['xlsx'])
890
+
891
+ # Check if an Excel file is uploaded
892
+ if uploaded_file is not None:
893
+ # Read the Excel file
894
+ df_excel = pd.read_excel(uploaded_file)
895
+
896
+ # Display the DataFrame from the Excel file
897
+ st.write("Excel data loaded:", df_excel)
898
+
899
+ # Form for inputting filtering options and area for calculation
900
+ with st.form("input_form"):
901
+ input_region = st.text_input("Enter Region to Filter By", placeholder="Region Name")
902
+ input_season = st.text_input("Enter Season to Filter By", placeholder="Season (e.g., Winter)")
903
+ input_area = st.number_input("Enter Area (in hectares) for Production Calculation", min_value=0.0, format="%.2f")
904
+ submit_button = st.form_submit_button("Calculate Production")
905
+
906
+ if submit_button:
907
+ if input_region and input_season and input_area > 0:
908
+ # Filter data by the region and season specified
909
+ filtered_df = df_excel[
910
+ (df_excel['Region'].str.lower() == input_region.lower()) &
911
+ (df_excel['Season'].str.lower() == input_season.lower())
912
+ ]
913
+
914
+ if not filtered_df.empty:
915
+ process_dataframe(filtered_df, input_area)
916
+ else:
917
+ st.error("No data found for the specified region and season.")
918
+ else:
919
+ st.error("Please enter valid region, season, and area to proceed.")
920
+
921
+ def process_dataframe(df, area):
922
+ if 'Yield' in df.columns:
923
+ average_yield = df['Yield'].mean()
924
+ predicted_production = average_yield * area
925
+ st.success(f"The predicted Production Volume for the specified region and season is: {predicted_production:.2f} units")
926
+ else:
927
+ st.error("The DataFrame does not contain a necessary 'Yield' column for calculation.")
928
+
929
+
930
+
931
+ def display_statistics(df):
932
+ st.title("📊 National Market Statistics Dashboard")
933
+ st.markdown("""
934
+ <style>
935
+ h1 {
936
+ color: #2e7d32;
937
+ font-size: 36px;
938
+ font-weight: bold;
939
+ }
940
+ h3 {
941
+ color: #388e3c;
942
+ font-size: 28px;
943
+ font-weight: 600;
944
+ }
945
+ p {
946
+ font-size: 16px;
947
+ line-height: 1.6;
948
+ }
949
+ .highlight {
950
+ background-color: #f1f8e9;
951
+ padding: 10px;
952
+ border-radius: 8px;
953
+ font-size: 16px;
954
+ color: #2e7d32;
955
+ font-weight: 500;
956
+ }
957
+ </style>
958
+ """, unsafe_allow_html=True)
959
+
960
+ # Ensure 'Reported Date' is in datetime format
961
+ df['Reported Date'] = pd.to_datetime(df['Reported Date'])
962
+ national_data = df.groupby('Reported Date').agg({
963
+ 'Modal Price (Rs./Quintal)': 'mean',
964
+ 'Arrivals (Tonnes)': 'sum'
965
+ }).reset_index()
966
+
967
+ st.subheader("🗓️ Key Statistics")
968
+ latest_date = national_data['Reported Date'].max()
969
+ latest_price = national_data[national_data['Reported Date'] == latest_date]['Modal Price (Rs./Quintal)'].mean()
970
+ latest_arrivals = national_data[national_data['Reported Date'] == latest_date]['Arrivals (Tonnes)'].sum()
971
+
972
+ st.markdown("<p class='highlight'>This section provides the most recent statistics for the market. It includes the latest available date, the average price of commodities, and the total quantity of goods arriving at the market. These metrics offer an up-to-date snapshot of market conditions.</p>", unsafe_allow_html=True)
973
+ st.write(f"**Latest Date**: {latest_date.strftime('%Y-%m-%d')}")
974
+ st.write(f"**Latest Modal Price**: {latest_price:.2f} Rs./Quintal")
975
+ st.write(f"**Latest Arrivals**: {latest_arrivals:.2f} Tonnes")
976
+
977
+ st.subheader("📆 This Day in Previous Years")
978
+ st.markdown("<p class='highlight'>This table shows the modal price and total arrivals for this exact day across previous years. It provides a historical perspective to compare against current market conditions. This section examines historical data for the same day in previous years. By analyzing trends for this specific day, you can identify seasonal patterns, supply-demand changes, or any deviations that might warrant closer attention.</p>", unsafe_allow_html=True)
979
+ today = latest_date
980
+ previous_years_data = national_data[national_data['Reported Date'].dt.dayofyear == today.dayofyear]
981
+
982
+ if not previous_years_data.empty:
983
+ previous_years_data['Year'] = previous_years_data['Reported Date'].dt.year.astype(str)
984
+ display_data = (previous_years_data[['Year', 'Modal Price (Rs./Quintal)', 'Arrivals (Tonnes)']]
985
+ .sort_values(by='Year', ascending=False)
986
+ .reset_index(drop=True))
987
+ st.table(display_data)
988
+ else:
989
+ st.write("No historical data available for this day in previous years.")
990
+
991
+ st.subheader("📅 Monthly Averages Over Years")
992
+ st.markdown("<p class='highlight'>This section displays the average modal prices and arrivals for each month across all years. It helps identify seasonal trends and peak activity months, which can be crucial for inventory planning and market predictions.</p>", unsafe_allow_html=True)
993
+ national_data['Month'] = national_data['Reported Date'].dt.month
994
+ monthly_avg_price = national_data.groupby('Month')['Modal Price (Rs./Quintal)'].mean().reset_index()
995
+ monthly_avg_arrivals = national_data.groupby('Month')['Arrivals (Tonnes)'].mean().reset_index()
996
+ monthly_avg = pd.merge(monthly_avg_price, monthly_avg_arrivals, on='Month')
997
+ monthly_avg['Month'] = monthly_avg['Month'].apply(lambda x: calendar.month_name[x])
998
+ monthly_avg.columns = ['Month', 'Average Modal Price (Rs./Quintal)', 'Average Arrivals (Tonnes)']
999
+ st.write(monthly_avg)
1000
+ st.subheader("📆 Yearly Averages")
1001
+ st.markdown("<p class='highlight'>Yearly averages provide insights into long-term trends in pricing and arrivals. By examining these values, you can detect overall growth, stability, or volatility in the market.</p>", unsafe_allow_html=True)
1002
+ national_data['Year'] = national_data['Reported Date'].dt.year
1003
+ yearly_avg_price = national_data.groupby('Year')['Modal Price (Rs./Quintal)'].mean().reset_index()
1004
+ yearly_sum_arrivals = national_data.groupby('Year')['Arrivals (Tonnes)'].sum().reset_index()
1005
+ yearly_avg = pd.merge(yearly_avg_price, yearly_sum_arrivals, on='Year')
1006
+ yearly_avg['Year'] = yearly_avg['Year'].apply(lambda x: f"{int(x)}")
1007
+ yearly_avg.columns = ['Year', 'Average Modal Price (Rs./Quintal)', 'Average Arrivals (Tonnes)']
1008
+ st.write(yearly_avg)
1009
+
1010
+ st.subheader("📈 Largest Daily Price Changes (Past Year)")
1011
+ st.markdown("<p class='highlight'>This analysis identifies the most significant daily price changes in the past year. These fluctuations can highlight periods of market volatility, potentially caused by external factors like weather, policy changes, or supply chain disruptions.</p>", unsafe_allow_html=True)
1012
+ one_year_ago = latest_date - pd.DateOffset(years=1)
1013
+ recent_data = national_data[national_data['Reported Date'] >= one_year_ago]
1014
+ recent_data['Daily Change (%)'] = recent_data['Modal Price (Rs./Quintal)'].pct_change() * 100
1015
+ largest_changes = recent_data[['Reported Date', 'Modal Price (Rs./Quintal)', 'Daily Change (%)']].nlargest(5, 'Daily Change (%)')
1016
+ largest_changes['Reported Date'] = largest_changes['Reported Date'].dt.date
1017
+ largest_changes = largest_changes.reset_index(drop=True)
1018
+ st.write(largest_changes)
1019
+
1020
+ st.subheader("🏆 Top 5 Highest and Lowest Prices (Past Year)")
1021
+ st.markdown("<p class='highlight'>This section highlights the highest and lowest prices over the past year. These values reflect the extremes of market dynamics, helping to understand price ceilings and floors in the recent period.</p>", unsafe_allow_html=True)
1022
+ highest_prices = recent_data.nlargest(5, 'Modal Price (Rs./Quintal)')[['Reported Date', 'Modal Price (Rs./Quintal)']]
1023
+ lowest_prices = recent_data.nsmallest(5, 'Modal Price (Rs./Quintal)')[['Reported Date', 'Modal Price (Rs./Quintal)']]
1024
+ highest_prices['Reported Date'] = highest_prices['Reported Date'].dt.date
1025
+ lowest_prices['Reported Date'] = lowest_prices['Reported Date'].dt.date
1026
+ highest_prices = highest_prices.reset_index(drop=True)
1027
+ lowest_prices = lowest_prices.reset_index(drop=True)
1028
+ st.write("**Top 5 Highest Prices**")
1029
+ st.write(highest_prices)
1030
+ st.write("**Top 5 Lowest Prices**")
1031
+ st.write(lowest_prices)
1032
+
1033
+ st.subheader("🗂️ Data Snapshot")
1034
+ st.markdown("<p class='highlight'>This snapshot provides a concise overview of the latest data, including rolling averages and lagged values. These metrics help identify short-term trends and lagged effects in pricing.</p>", unsafe_allow_html=True)
1035
+ national_data['Rolling Mean (14 Days)'] = national_data['Modal Price (Rs./Quintal)'].rolling(window=14).mean()
1036
+ national_data['Lag (14 Days)'] = national_data['Modal Price (Rs./Quintal)'].shift(14)
1037
+ national_data['Reported Date'] = national_data['Reported Date'].dt.date
1038
+ national_data = national_data.sort_values(by='Reported Date', ascending=False)
1039
+ st.dataframe(national_data.head(14).reset_index(drop=True), use_container_width=True, height=525)
1040
+
1041
+ editable_spreadsheet()
1042
+
1043
+
1044
+
1045
+ def fetch_and_store_data():
1046
+ latest_doc = collection.find_one(sort=[("Reported Date", -1)])
1047
+ if latest_doc and "Reported Date" in latest_doc:
1048
+ latest_date = latest_doc["Reported Date"]
1049
+ else:
1050
+ latest_date = None
1051
+
1052
+ if latest_date:
1053
+ from_date = (latest_date + timedelta(days=1)).strftime('%d %b %Y')
1054
+ else:
1055
+ # If no latest date, set a default from_date
1056
+ from_date = "01 Jan 2000"
1057
+
1058
+ to_date = (datetime.now() - timedelta(days=1)).strftime('%d %b %Y')
1059
+
1060
+ # Build the URL to be requested
1061
+ base_url = "https://agmarknet.gov.in/SearchCmmMkt.aspx"
1062
+ params = {
1063
+ "Tx_Commodity": "11",
1064
+ "Tx_State": "0",
1065
+ "Tx_District": "0",
1066
+ "Tx_Market": "0",
1067
+ "DateFrom": from_date,
1068
+ "DateTo": to_date,
1069
+ "Fr_Date": from_date,
1070
+ "To_Date": to_date,
1071
+ "Tx_Trend": "2",
1072
+ "Tx_CommodityHead": "Sesamum(Sesame,Gingelly,Til)",
1073
+ "Tx_StateHead": "--Select--",
1074
+ "Tx_DistrictHead": "--Select--",
1075
+ "Tx_MarketHead": "--Select--"
1076
+ }
1077
+
1078
+ full_url = f"{base_url}?{'&'.join(f'{k}={v}' for k, v in params.items())}"
1079
+ api_url = "https://api.scraperapi.com"
1080
+ api_key = "bbbbde6b56c0fde1e2a61c914eb22d14"
1081
+ scraperapi_params = {
1082
+ 'api_key': api_key,
1083
+ 'url': full_url
1084
+ }
1085
+
1086
+ response = requests.get(api_url, params=scraperapi_params)
1087
+
1088
+ if response.status_code == 200:
1089
+ soup = BeautifulSoup(response.content, 'html.parser')
1090
+ table = soup.find("table", {"class": "tableagmark_new"})
1091
+
1092
+ if table:
1093
+ headers = [th.get_text(strip=True) for th in table.find_all("th")]
1094
+ rows = []
1095
+
1096
+ for row in table.find_all("tr")[1:]:
1097
+ cells = [td.get_text(strip=True) for td in row.find_all("td")]
1098
+ if cells:
1099
+ rows.append(cells)
1100
+
1101
+ df = pd.DataFrame(rows, columns=headers)
1102
+ df = df[df['Variety']=="White"]
1103
+ df["Reported Date"] = pd.to_datetime(df["Reported Date"], format='%d %b %Y', errors='coerce')
1104
+ df.dropna(subset=["Reported Date"], inplace=True)
1105
+ df.sort_values(by="Reported Date", inplace=True)
1106
+ df.rename(columns={"State Name": "state"}, inplace=True)
1107
+
1108
+ # Type casting for the columns
1109
+ df["Modal Price (Rs./Quintal)"] = pd.to_numeric(df["Modal Price (Rs./Quintal)"], errors='coerce').astype("int64")
1110
+ df["Arrivals (Tonnes)"] = pd.to_numeric(df["Arrivals (Tonnes)"], errors='coerce').astype("float64")
1111
+ df["state"] = df["state"].astype("string")
1112
+ df["Market Name"] = df["Market Name"].astype("string")
1113
+
1114
+ for index, row in df.iterrows():
1115
+ document = row.to_dict()
1116
+ collection.insert_one(document)
1117
+
1118
+ return df
1119
+
1120
+ else:
1121
+ print(f"Failed to fetch data with status code: {response.status_code}")
1122
+ return None
1123
+
1124
+
1125
+
1126
+ def get_dataframe_from_collection(collection):
1127
+ # Fetch all documents from the collection
1128
+ data = list(collection.find())
1129
+
1130
+ # Convert the list of documents into a DataFrame
1131
+ df = pd.DataFrame(data)
1132
+
1133
+ # Drop the MongoDB-specific '_id' column (optional, if not needed)
1134
+ if "_id" in df.columns:
1135
+ df = df.drop(columns=["_id"])
1136
+
1137
+ return df
1138
+
1139
+ def authenticate_user(username, password):
1140
+ user = users_collection.find_one({"username": username})
1141
+ if user and check_password_hash(user['password'], password):
1142
+ return True
1143
+ return False
1144
+
1145
+ # CSS for responsive and professional design
1146
  st.markdown("""
1147
  <style>
1148
+ /* Main layout adjustments */
1149
+ .main { max-width: 1200px; margin: 0 auto; }
 
 
 
1150
 
1151
+ /* Header style */
1152
+ h1 {
1153
+ color: #4CAF50;
1154
+ font-family: 'Arial Black', sans-serif;
1155
  }
1156
 
1157
+ /* Button Styling */
 
 
 
 
 
 
1158
  .stButton>button {
1159
+ background-color: #4CAF50;
1160
+ color: white;
1161
+ font-size: 16px;
1162
+ border-radius: 12px;
1163
+ padding: 12px 20px;
1164
+ margin: 10px auto;
1165
+ border: none;
1166
+ cursor: pointer;
1167
+ transition: background-color 0.4s ease, transform 0.3s ease, box-shadow 0.3s ease;
1168
+ box-shadow: 0 4px 6px rgba(0, 0, 0, 0.2);
1169
  }
1170
 
1171
+ /* Hover Effects for Button */
1172
+ .stButton>button:hover {
1173
+ background-color: #2196F3; /* Change color on hover */
1174
+ color: #ffffff; /* Ensure text is readable */
1175
+ transform: scale(1.1) rotate(-2deg); /* Slight zoom and tilt */
1176
+ box-shadow: 0 8px 12px rgba(0, 0, 0, 0.3); /* Enhance shadow effect */
1177
  }
1178
 
1179
+ /* Animation Effect */
1180
+ .stButton>button:after {
1181
+ content: '';
1182
+ position: absolute;
1183
+ top: 0;
1184
+ left: 0;
1185
+ right: 0;
1186
+ bottom: 0;
1187
+ border-radius: 12px;
1188
+ background: linear-gradient(45deg, #4CAF50, #2196F3, #FFC107, #FF5722);
1189
+ z-index: -1; /* Ensure gradient stays behind the button */
1190
+ opacity: 0;
1191
+ transition: opacity 0.5s ease;
1192
  }
1193
 
1194
+ /* Glow Effect on Hover */
1195
+ .stButton>button:hover:after {
1196
+ opacity: 1;
1197
+ animation: glowing 2s infinite alternate;
 
 
1198
  }
1199
 
1200
+ /* Keyframes for Glow Animation */
1201
+ @keyframes glowing {
1202
+ 0% { box-shadow: 0 0 5px #4CAF50, 0 0 10px #4CAF50; }
1203
+ 100% { box-shadow: 0 0 20px #2196F3, 0 0 30px #2196F3; }
1204
  }
1205
 
1206
+ /* Responsive Design */
1207
+ @media (max-width: 768px) {
1208
+ .stButton>button {
1209
+ width: 100%;
1210
+ font-size: 14px;
1211
+ }
1212
+ h1 {
1213
+ font-size: 24px;
1214
+ }
1215
  }
1216
  </style>
1217
  """, unsafe_allow_html=True)
1218
+ if 'authenticated' not in st.session_state:
1219
+ st.session_state.authenticated = False
1220
 
1221
+ if st.session_state.authenticated:
1222
+ st.title("🌾 AgriPredict Dashboard")
1223
+ if st.button("Get Live Data Feed"):
1224
+ fetch_and_store_data()
1225
+ # Top-level radio buttons for switching views
1226
+ view_mode = st.radio("", ["Statistics", "Plots", "Predictions", "Exim"], horizontal=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1227
 
1228
+ if view_mode == "Plots":
1229
+ st.sidebar.header("Filters")
1230
+ selected_period = st.sidebar.selectbox(
1231
+ "Select Time Period",
1232
+ ["2 Weeks", "1 Month", "3 Months", "1 Year", "5 Years"],
1233
+ index=1
1234
+ )
1235
+ period_mapping = {
1236
+ "2 Weeks": 14,
1237
+ "1 Month": 30,
1238
+ "3 Months": 90,
1239
+ "1 Year": 365,
1240
+ "2 Years": 730,
1241
+ "5 Years": 1825
1242
+ }
1243
+ st.session_state.selected_period = period_mapping[selected_period]
1244
+
1245
+ # Add 'India' option to the list of states
1246
+ state_options = list(state_market_dict.keys()) + ['India']
1247
+ selected_state = st.sidebar.selectbox("Select", state_options)
1248
+
1249
+ market_wise = False
1250
+ if selected_state != 'India':
1251
+ market_wise = st.sidebar.checkbox("Market Wise Analysis")
1252
+ if market_wise:
1253
+ markets = state_market_dict.get(selected_state, [])
1254
+ selected_market = st.sidebar.selectbox("Select Market", markets)
1255
+ query_filter = {"state": selected_state, "Market Name": selected_market}
1256
+ else:
1257
+ query_filter = {"state": selected_state}
1258
  else:
1259
+ query_filter = {} # For India, no specific state filter
1260
+
1261
+ # Dropdown for data type
1262
+ data_type = st.sidebar.radio(
1263
+ "Select Data Type",
1264
+ ["Price", "Volume", "Both"]
1265
+ )
1266
+
1267
  # Add date filtering based on selected period
1268
+ query_filter["Reported Date"] = {
1269
+ "$gte": datetime.now() - timedelta(days=st.session_state.selected_period)
1270
+ }
1271
+
1272
+ # Submit button to trigger the query and plot
1273
+ if st.sidebar.button("✨ Let's go!"):
1274
+ # Fetch data from MongoDB
1275
+ try:
1276
+ cursor = collection.find(query_filter)
1277
+ data = list(cursor)
1278
+
1279
+ if data:
1280
+ # Convert MongoDB data to a DataFrame
1281
+ df = pd.DataFrame(data)
1282
+ df['Reported Date'] = pd.to_datetime(df['Reported Date'])
1283
+
1284
+ if selected_state == 'India':
1285
+ # Aggregate data for all of India
1286
+ df_grouped = df.groupby('Reported Date', as_index=False).agg({
1287
+ 'Arrivals (Tonnes)': 'sum',
1288
+ 'Modal Price (Rs./Quintal)': 'mean'
1289
+ })
1290
+ else:
1291
+ # Regular grouping by Reported Date
1292
+ df_grouped = df.groupby('Reported Date', as_index=False).agg({
1293
+ 'Arrivals (Tonnes)': 'sum',
1294
+ 'Modal Price (Rs./Quintal)': 'mean'
1295
+ })
1296
+
1297
+ # Create a complete date range
1298
+ date_range = pd.date_range(
1299
+ start=df_grouped['Reported Date'].min(),
1300
+ end=df_grouped['Reported Date'].max()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1301
  )
1302
+ df_grouped = df_grouped.set_index('Reported Date').reindex(date_range).rename_axis('Reported Date').reset_index()
1303
+
1304
+ # Fill missing values
1305
+ df_grouped['Arrivals (Tonnes)'] = df_grouped['Arrivals (Tonnes)'].fillna(method='ffill').fillna(method='bfill')
1306
+ df_grouped['Modal Price (Rs./Quintal)'] = df_grouped['Modal Price (Rs./Quintal)'].fillna(method='ffill').fillna(method='bfill')
1307
+
1308
+ st.subheader(f"📈 Trends for {selected_state} ({'Market: ' + selected_market if market_wise else 'State'})")
1309
+
1310
+ if data_type == "Both":
1311
+ # Min-Max Scaling
1312
+ scaler = MinMaxScaler()
1313
+ df_grouped[['Scaled Price', 'Scaled Arrivals']] = scaler.fit_transform(
1314
+ df_grouped[['Modal Price (Rs./Quintal)', 'Arrivals (Tonnes)']]
1315
+ )
1316
+
1317
+ fig = go.Figure()
1318
+
1319
+ fig.add_trace(go.Scatter(
1320
+ x=df_grouped['Reported Date'],
1321
+ y=df_grouped['Scaled Price'],
1322
+ mode='lines',
1323
+ name='Scaled Price',
1324
+ line=dict(width=1, color='green'),
1325
+ text=df_grouped['Modal Price (Rs./Quintal)'],
1326
+ hovertemplate='Date: %{x}<br>Scaled Price: %{y:.2f}<br>Actual Price: %{text:.2f}<extra></extra>'
1327
+ ))
1328
+
1329
+ fig.add_trace(go.Scatter(
1330
+ x=df_grouped['Reported Date'],
1331
+ y=df_grouped['Scaled Arrivals'],
1332
+ mode='lines',
1333
+ name='Scaled Arrivals',
1334
+ line=dict(width=1, color='blue'),
1335
+ text=df_grouped['Arrivals (Tonnes)'],
1336
+ hovertemplate='Date: %{x}<br>Scaled Arrivals: %{y:.2f}<br>Actual Arrivals: %{text:.2f}<extra></extra>'
1337
+ ))
1338
+
1339
+ fig.update_layout(
1340
+ title="Price and Arrivals Trend",
1341
+ xaxis_title='Date',
1342
+ yaxis_title='Scaled Values',
1343
+ template='plotly_white'
1344
+ )
1345
+ st.plotly_chart(fig, use_container_width=True)
1346
+
1347
+ elif data_type == "Price":
1348
+ # Plot Modal Price
1349
+ fig = go.Figure()
1350
+ fig.add_trace(go.Scatter(
1351
+ x=df_grouped['Reported Date'],
1352
+ y=df_grouped['Modal Price (Rs./Quintal)'],
1353
+ mode='lines',
1354
+ name='Modal Price',
1355
+ line=dict(width=1, color='green')
1356
+ ))
1357
+ fig.update_layout(title="Modal Price Trend", xaxis_title='Date', yaxis_title='Price (/Quintall)', template='plotly_white')
1358
+ st.plotly_chart(fig, use_container_width=True)
1359
+
1360
+ elif data_type == "Volume":
1361
+ # Plot Arrivals (Tonnes)
1362
+ fig = go.Figure()
1363
+ fig.add_trace(go.Scatter(
1364
+ x=df_grouped['Reported Date'],
1365
+ y=df_grouped['Arrivals (Tonnes)'],
1366
+ mode='lines',
1367
+ name='Arrivals',
1368
+ line=dict(width=1, color='blue')
1369
+ ))
1370
+ fig.update_layout(title="Arrivals Trend", xaxis_title='Date', yaxis_title='Volume (in Tonnes)', template='plotly_white')
1371
+ st.plotly_chart(fig, use_container_width=True)
1372
+
1373
+ else:
1374
+ st.warning("⚠️ No data found for the selected filters.")
1375
+
1376
+ except Exception as e:
1377
+ st.error(f"❌ Error fetching data: {e}")
1378
+ elif view_mode == "Predictions":
1379
+ st.subheader("📊 Model Analysis")
1380
+ sub_option = st.radio("Select one of the following", ["India", "States", "Market"], horizontal=True)
1381
+ sub_timeline = st.radio("Select one of the following horizons", ["14 days", "1 month", "3 month"], horizontal=True)
1382
+ if sub_option == "States":
1383
+ states = ["Karnataka", "Madhya Pradesh", "Gujarat", "Uttar Pradesh", "Telangana"]
1384
+ selected_state = st.selectbox("Select State for Model Training", states)
1385
+ filter_key = f"state_{selected_state}" # Unique key for each state
1386
 
1387
+ if st.button("Forecast"):
1388
+ query_filter = {"state": selected_state}
1389
+ df = fetch_and_process_data(query_filter)
1390
+ if sub_timeline == "14 days":
1391
+ forecast(df, filter_key, 14)
1392
+ elif sub_timeline == "1 month":
1393
+ forecast(df, filter_key, 30)
1394
  else:
1395
+ forecast(df, filter_key, 90)
1396
+ elif sub_option == "Market":
1397
+ market_options = ["Rajkot", "Neemuch", "Kalburgi", "Warangal"]
1398
+ selected_market = st.selectbox("Select Market for Model Training", market_options)
1399
+ filter_key = f"market_{selected_market}" # Unique key for each market
1400
+ if st.button("Forecast"):
1401
+ query_filter = {"Market Name": selected_market}
1402
+ df = fetch_and_process_data(query_filter)
1403
+ if sub_timeline == "14 days":
1404
+ forecast(df, filter_key, 14)
1405
+ elif sub_timeline == "1 month":
1406
+ forecast(df, filter_key, 30)
1407
+ else:
1408
+ forecast(df, filter_key, 90)
1409
+
1410
+ elif sub_option == "India":
1411
+ df = collection_to_dataframe(impExp)
1412
+ if True:
1413
+ if st.button("Forecast"):
1414
+ query_filter = {}
1415
+ df = fetch_and_process_data(query_filter)
1416
+ if sub_timeline == "14 days":
1417
+ forecast(df, "India", 14)
1418
+ elif sub_timeline == "1 month":
1419
+ forecast(df, "India", 30)
1420
+ else:
1421
+ forecast(df, "India", 90)
1422
+
1423
+ elif view_mode=="Statistics":
1424
+ document = collection.find_one()
1425
+ print(document)
1426
+ df = get_dataframe_from_collection(collection)
1427
+ print(df)
1428
+ display_statistics(df)
1429
+ elif view_mode == "Exim":
1430
+ df = collection_to_dataframe(impExp)
1431
+
1432
+ # Add radio buttons for user selection
1433
+ plot_option = st.radio(
1434
+ "Select the data to visualize:",
1435
+ ["Import Price", "Import Quantity", "Export Price", "Export Quantity"],
1436
+ horizontal=True
1437
+ )
1438
+
1439
+ # Dropdown for time period selection
1440
+ time_period = st.selectbox(
1441
+ "Select time period:",
1442
+ ["1 Month", "6 Months", "1 Year", "2 Years"]
1443
+ )
1444
+
1445
+ # Convert Reported Date to datetime
1446
+ df["Reported Date"] = pd.to_datetime(df["Reported Date"], format="%Y-%m-%d")
1447
+
1448
+ # Filter data based on the time period
1449
+ if time_period == "1 Month":
1450
+ start_date = pd.Timestamp.now() - pd.DateOffset(months=1)
1451
+ elif time_period == "6 Months":
1452
+ start_date = pd.Timestamp.now() - pd.DateOffset(months=6)
1453
+ elif time_period == "1 Year":
1454
+ start_date = pd.Timestamp.now() - pd.DateOffset(years=1)
1455
+ elif time_period == "2 Years":
1456
+ start_date = pd.Timestamp.now() - pd.DateOffset(years=2)
1457
+
1458
+ filtered_df = df[df["Reported Date"] >= start_date]
1459
+
1460
+ # Process data based on the selected option
1461
+ if plot_option == "Import Price":
1462
+ grouped_df = (
1463
+ filtered_df.groupby("Reported Date", as_index=False)["VALUE_IMPORT"]
1464
+ .mean()
1465
+ .rename(columns={"VALUE_IMPORT": "Average Import Price"})
1466
+ )
1467
+ y_axis_label = "Average Import Price (Rs.)"
1468
+ elif plot_option == "Import Quantity":
1469
+ grouped_df = (
1470
+ filtered_df.groupby("Reported Date", as_index=False)["QUANTITY_IMPORT"]
1471
+ .sum()
1472
+ .rename(columns={"QUANTITY_IMPORT": "Total Import Quantity"})
1473
+ )
1474
+ y_axis_label = "Total Import Quantity (Tonnes)"
1475
+ elif plot_option == "Export Price":
1476
+ grouped_df = (
1477
+ filtered_df.groupby("Reported Date", as_index=False)["VALUE_EXPORT"]
1478
+ .mean()
1479
+ .rename(columns={"VALUE_EXPORT": "Average Export Price"})
1480
+ )
1481
+ y_axis_label = "Average Export Price (Rs.)"
1482
+ elif plot_option == "Export Quantity":
1483
+ grouped_df = (
1484
+ filtered_df.groupby("Reported Date", as_index=False)["QUANTITY_IMPORT"]
1485
+ .sum()
1486
+ .rename(columns={"QUANTITY_IMPORT": "Total Export Quantity"})
1487
+ )
1488
+ y_axis_label = "Total Export Quantity (Tonnes)"
1489
+
1490
+ # Plot using Plotly
1491
+ fig = px.line(
1492
+ grouped_df,
1493
+ x="Reported Date",
1494
+ y=grouped_df.columns[1], # Dynamic y-axis column name
1495
+ title=f"{plot_option} Over Time",
1496
+ labels={"Reported Date": "Date", grouped_df.columns[1]: y_axis_label},
1497
+ )
1498
+ st.plotly_chart(fig)
1499
+
1500
+
1501
+ else:
1502
+ with st.form("login_form"):
1503
+ st.subheader("Please log in")
1504
+
1505
+ username = st.text_input("Username")
1506
+ password = st.text_input("Password", type="password")
1507
+ login_button = st.form_submit_button("Login")
1508
 
1509
+ if login_button:
1510
+ if authenticate_user(username, password):
1511
+ st.session_state.authenticated = True # Set the authentication state to True
1512
+ st.session_state['username'] = username # Store username in session state
1513
+ st.write("Login successful!")
1514
+ st.rerun() # Page will automatically rerun to show the protected content
1515
+ else:
1516
+ st.error("Invalid username or password")