ThejasRao commited on
Commit
fa4fc8b
·
1 Parent(s): 3029a46

Update agripredict app

Browse files
.env ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ MONGO_URI=mongodb+srv://Agripredict:TjXSvMhOis49qH8E@cluster0.gek7n.mongodb.net/?retryWrites=true&w=majority&appName=Cluster0
2
+
app.py DELETED
@@ -1,1625 +0,0 @@
1
- import streamlit as st
2
- import pandas as pd
3
- import numpy as np
4
- from pymongo import MongoClient
5
- from sklearn.model_selection import train_test_split, GridSearchCV
6
- from sklearn.metrics import mean_squared_error, mean_absolute_error
7
- from xgboost import XGBRegressor
8
- from st_aggrid import AgGrid, GridOptionsBuilder, DataReturnMode, GridUpdateMode
9
- from sklearn.preprocessing import MinMaxScaler
10
- from datetime import datetime, timedelta
11
- import plotly.express as px
12
- import plotly.graph_objects as go
13
- import calendar
14
- import certifi
15
- import requests
16
- from werkzeug.security import generate_password_hash, check_password_hash
17
- from bs4 import BeautifulSoup
18
- import json
19
- from itertools import product
20
- from tqdm import tqdm
21
- import io
22
- from statsmodels.tsa.statespace.sarimax import SARIMAX
23
- from datetime import datetime, timedelta
24
-
25
- def generate_date_ranges(start_date: str, end_date: str):
26
- current = datetime.strptime(start_date, "%d %b %Y")
27
- end = datetime.strptime(end_date, "%d %b %Y")
28
- while current <= end:
29
- date_str = current.strftime("%d %b %Y")
30
- yield (date_str, date_str)
31
- current += timedelta(days=1)
32
-
33
- mongo_uri = "mongodb+srv://Agripredict:TjXSvMhOis49qH8E@cluster0.gek7n.mongodb.net/?retryWrites=true&w=majority&appName=Cluster0"
34
- if not mongo_uri:
35
- st.error("MongoDB URI is not set!")
36
- st.stop()
37
- else:
38
- client = MongoClient(mongo_uri, tlsCAFile=certifi.where())
39
- db = client["AgriPredict"]
40
- collection = db["WhiteSesame"]
41
- best_params_collection = db["BestParams"]
42
- best_params_collection_1m = db["BestParams_1m"]
43
- best_params_collection_3m = db["BestParams_3m"]
44
- impExp = db["impExp"]
45
- users_collection = db["user"]
46
- market_price_data = db["new_data_price"]
47
-
48
- state_market_dict = {
49
- "Karnataka": [
50
- "Kalburgi",
51
- "Basava Kalayana",
52
- "Lingasugur",
53
- "Kustagi",
54
- "Bangalore",
55
- "Bagalakot",
56
- "Hubli (Amaragol)"
57
- ],
58
- "Gujarat": [
59
- "Siddhpur",
60
- "Jasdan",
61
- "Gondal",
62
- "Morbi",
63
- "Botad",
64
- "Visavadar",
65
- "Dahod",
66
- "Rajkot",
67
- "Junagadh",
68
- "Savarkundla",
69
- "Bhavnagar",
70
- "Rajula",
71
- "Dhoraji",
72
- "Amreli",
73
- "Mahuva(Station Road)",
74
- "Mansa",
75
- "Porbandar",
76
- "Dasada Patadi",
77
- "Halvad",
78
- "Chotila",
79
- "Bhanvad",
80
- "Dhansura",
81
- "Babra",
82
- "Upleta",
83
- "Palitana",
84
- "Jetpur(Dist.Rajkot)",
85
- "S.Mandvi",
86
- "Mandvi",
87
- "Khambha",
88
- "Kadi",
89
- "Taleja",
90
- "Himatnagar",
91
- "Lakhani",
92
- "Rapar",
93
- "Una",
94
- "Dhari",
95
- "Bagasara",
96
- "Jam Jodhpur",
97
- "Veraval",
98
- "Dhragradhra",
99
- "Deesa"
100
- ],
101
- "Uttar Pradesh": [
102
- "Bangarmau",
103
- "Sultanpur",
104
- "Maudaha",
105
- "Mauranipur",
106
- "Lalitpur",
107
- "Konch",
108
- "Muskara",
109
- "Raath",
110
- "Varipaal",
111
- "Auraiya",
112
- "Orai",
113
- "Banda",
114
- "Kishunpur",
115
- "Ait",
116
- "Jhansi",
117
- "Kurara",
118
- "Chirgaon",
119
- "Charkhari",
120
- "Moth",
121
- "Jalaun",
122
- "Sirsaganj",
123
- "Shikohabad"
124
- ],
125
- "Madhya Pradesh": [
126
- "Naugaon",
127
- "Mehar",
128
- "Kailaras",
129
- "Datia",
130
- "LavKush Nagar(Laundi)",
131
- "Ajaygarh",
132
- "Rajnagar",
133
- "Sevda",
134
- "Neemuch",
135
- "Sheopurkalan",
136
- "Lashkar",
137
- "Alampur",
138
- "Niwadi",
139
- "Dabra",
140
- "Ujjain",
141
- "Bijawar",
142
- "Sidhi",
143
- "Barad",
144
- "Pohari",
145
- "Shahagarh",
146
- "Lateri",
147
- "Banapura",
148
- "Panna",
149
- "Garhakota",
150
- "Katni",
151
- "Chhatarpur",
152
- "Beohari",
153
- "Satna",
154
- "Sabalgarh",
155
- "Hanumana",
156
- "Bhander",
157
- "Banmorkalan",
158
- "Jaora",
159
- "Bagli",
160
- "Singroli"
161
- ],
162
- "Telangana": [
163
- "Warangal"
164
- ]
165
- }
166
- def create_forecasting_features(df):
167
- df = df.copy()
168
- if not isinstance(df.index, pd.DatetimeIndex):
169
- df = df.set_index('Reported Date')
170
- df.index = pd.to_datetime(df.index)
171
-
172
- target_map = df['Modal Price (Rs./Quintal)'].to_dict()
173
-
174
- df['dayofweek'] = df.index.dayofweek
175
- df['quarter'] = df.index.quarter
176
- df['month'] = df.index.month
177
- df['year'] = df.index.year
178
- df['dayofyear'] = df.index.dayofyear
179
- df['weekofyear'] = df.index.isocalendar().week
180
-
181
- df['lag14'] = (df.index - pd.Timedelta(days=14)).map(target_map)
182
- df['lag28'] = (df.index - pd.Timedelta(days=28)).map(target_map)
183
- df['lag56'] = (df.index - pd.Timedelta(days=56)).map(target_map)
184
- df['lag_3months'] = (df.index - pd.DateOffset(months=3)).map(target_map)
185
- df['lag_6months'] = (df.index - pd.DateOffset(months=6)).map(target_map)
186
- for window in [7, 14, 28]:
187
- df[f'rolling_mean_{window}'] = df['Modal Price (Rs./Quintal)'].rolling(window=window, min_periods=1).mean()
188
- df[f'rolling_std_{window}'] = df['Modal Price (Rs./Quintal)'].rolling(window=window, min_periods=1).std()
189
-
190
- df['ema7'] = df['Modal Price (Rs./Quintal)'].ewm(span=7, adjust=False).mean()
191
- df['ema14'] = df['Modal Price (Rs./Quintal)'].ewm(span=14, adjust=False).mean()
192
- df['monthly_avg'] = df.groupby('month')['Modal Price (Rs./Quintal)'].transform('mean')
193
- df['weekly_avg'] = df.groupby('weekofyear')['Modal Price (Rs./Quintal)'].transform('mean')
194
- df['dayofweek_avg'] = df.groupby('dayofweek')['Modal Price (Rs./Quintal)'].transform('mean')
195
-
196
- df['fourier_sin_365'] = np.sin(2 * np.pi * df.index.dayofyear / 365)
197
- df['fourier_cos_365'] = np.cos(2 * np.pi * df.index.dayofyear / 365)
198
- df['fourier_sin_14'] = np.sin(2 * np.pi * df.index.dayofyear / 14)
199
- df['fourier_cos_14'] = np.cos(2 * np.pi * df.index.dayofyear / 14)
200
-
201
- df['recent_min_14'] = (df.index - pd.Timedelta(days=14)).map(target_map).min()
202
- df['recent_max_14'] = (df.index - pd.Timedelta(days=14)).map(target_map).max()
203
- df['recent_range_14'] = df['recent_max_14'] - df['recent_min_14']
204
-
205
- df['yearly_avg'] = df.groupby('year')['Modal Price (Rs./Quintal)'].transform('mean')
206
- df['cumulative_mean'] = df['Modal Price (Rs./Quintal)'].expanding().mean()
207
-
208
- return df.reset_index()
209
-
210
- def create_forecasting_features_1m(df):
211
- df = df.copy()
212
- if not isinstance(df.index, pd.DatetimeIndex):
213
- df = df.set_index('Reported Date')
214
- df.index = pd.to_datetime(df.index)
215
-
216
- target_map = df['Modal Price (Rs./Quintal)'].to_dict()
217
-
218
- df['dayofweek'] = df.index.dayofweek
219
- df['quarter'] = df.index.quarter
220
- df['month'] = df.index.month
221
- df['year'] = df.index.year
222
- df['dayofyear'] = df.index.dayofyear
223
- df['weekofyear'] = df.index.isocalendar().week
224
-
225
- df['lag_30'] = (df.index - pd.Timedelta(days=30)).map(target_map)
226
- df['lag_60'] = (df.index - pd.Timedelta(days=60)).map(target_map)
227
- df['lag_90'] = (df.index - pd.Timedelta(days=90)).map(target_map)
228
- df['lag_6months'] = (df.index - pd.DateOffset(months=6)).map(target_map)
229
- df['lag_12months'] = (df.index - pd.DateOffset(months=12)).map(target_map)
230
-
231
- for window in [30, 60, 90]:
232
- df[f'rolling_mean_{window}'] = df['Modal Price (Rs./Quintal)'].rolling(window=window, min_periods=1).mean()
233
- df[f'rolling_std_{window}'] = df['Modal Price (Rs./Quintal)'].rolling(window=window, min_periods=1).std()
234
-
235
- df['ema_30'] = df['Modal Price (Rs./Quintal)'].ewm(span=30, adjust=False).mean()
236
- df['ema_60'] = df['Modal Price (Rs./Quintal)'].ewm(span=60, adjust=False).mean()
237
-
238
- df['monthly_avg'] = df.groupby('month')['Modal Price (Rs./Quintal)'].transform('mean')
239
- df['weekly_avg'] = df.groupby('weekofyear')['Modal Price (Rs./Quintal)'].transform('mean')
240
- df['dayofweek_avg'] = df.groupby('dayofweek')['Modal Price (Rs./Quintal)'].transform('mean')
241
-
242
- df['fourier_sin_365'] = np.sin(2 * np.pi * df.index.dayofyear / 365)
243
- df['fourier_cos_365'] = np.cos(2 * np.pi * df.index.dayofyear / 365)
244
- df['fourier_sin_30'] = np.sin(2 * np.pi * df.index.dayofyear / 30)
245
- df['fourier_cos_30'] = np.cos(2 * np.pi * df.index.dayofyear / 30)
246
-
247
- df['recent_min_30'] = (df.index - pd.Timedelta(days=30)).map(target_map).min()
248
- df['recent_max_30'] = (df.index - pd.Timedelta(days=30)).map(target_map).max()
249
- df['recent_range_30'] = df['recent_max_30'] - df['recent_min_30']
250
-
251
- df['yearly_avg'] = df.groupby('year')['Modal Price (Rs./Quintal)'].transform('mean')
252
- df['cumulative_mean'] = df['Modal Price (Rs./Quintal)'].expanding().mean()
253
-
254
- return df.reset_index()
255
-
256
- def create_forecasting_features_3m(df):
257
- df = df.copy()
258
- if not isinstance(df.index, pd.DatetimeIndex):
259
- df = df.set_index('Reported Date')
260
- df.index = pd.to_datetime(df.index)
261
-
262
- target_map = df['Modal Price (Rs./Quintal)'].to_dict()
263
-
264
- df['dayofweek'] = df.index.dayofweek
265
- df['quarter'] = df.index.quarter
266
- df['month'] = df.index.month
267
- df['year'] = df.index.year
268
- df['dayofyear'] = df.index.dayofyear
269
- df['weekofyear'] = df.index.isocalendar().week
270
-
271
- df['lag_3months'] = (df.index - pd.DateOffset(months=3)).map(target_map)
272
- df['lag_6months'] = (df.index - pd.DateOffset(months=6)).map(target_map)
273
- df['lag_9months'] = (df.index - pd.DateOffset(months=9)).map(target_map)
274
- df['lag_12months'] = (df.index - pd.DateOffset(months=12)).map(target_map)
275
-
276
- for window in [90, 180, 270, 365]:
277
- df[f'rolling_mean_{window}'] = df['Modal Price (Rs./Quintal)'].rolling(window=window, min_periods=1).mean()
278
- df[f'rolling_std_{window}'] = df['Modal Price (Rs./Quintal)'].rolling(window=window, min_periods=1).std()
279
-
280
- df['ema90'] = df['Modal Price (Rs./Quintal)'].ewm(span=90, adjust=False).mean()
281
- df['ema180'] = df['Modal Price (Rs./Quintal)'].ewm(span=180, adjust=False).mean()
282
- df['monthly_avg'] = df.groupby('month')['Modal Price (Rs./Quintal)'].transform('mean')
283
- df['weekly_avg'] = df.groupby('weekofyear')['Modal Price (Rs./Quintal)'].transform('mean')
284
- df['dayofweek_avg'] = df.groupby('dayofweek')['Modal Price (Rs./Quintal)'].transform('mean')
285
-
286
- df['fourier_sin_90'] = np.sin(2 * np.pi * df.index.dayofyear / 90)
287
- df['fourier_cos_90'] = np.cos(2 * np.pi * df.index.dayofyear / 90)
288
- df['fourier_sin_30'] = np.sin(2 * np.pi * df.index.dayofyear / 30)
289
- df['fourier_cos_30'] = np.cos(2 * np.pi * df.index.dayofyear / 30)
290
-
291
- df['recent_min_90'] = (df.index - pd.Timedelta(days=90)).map(target_map).min()
292
- df['recent_max_90'] = (df.index - pd.Timedelta(days=90)).map(target_map).max()
293
- df['recent_range_90'] = df['recent_max_90'] - df['recent_min_90']
294
-
295
- df['yearly_avg'] = df.groupby('year')['Modal Price (Rs./Quintal)'].transform('mean')
296
- df['cumulative_mean'] = df['Modal Price (Rs./Quintal)'].expanding().mean()
297
-
298
- return df.reset_index()
299
-
300
-
301
- def preprocess_data(df):
302
- df = df[['Reported Date', 'Modal Price (Rs./Quintal)']]
303
- df['Reported Date'] = pd.to_datetime(df['Reported Date'])
304
- df = df.groupby('Reported Date', as_index=False).mean()
305
- full_date_range = pd.date_range(df['Reported Date'].min(), df['Reported Date'].max())
306
- df = df.set_index('Reported Date').reindex(full_date_range).rename_axis('Reported Date').reset_index()
307
-
308
- df['Modal Price (Rs./Quintal)'] = (
309
- df['Modal Price (Rs./Quintal)'].fillna(method='ffill').fillna(method='bfill')
310
- )
311
- return df
312
-
313
-
314
- def train_and_evaluate(df):
315
- import streamlit as st
316
- progress_bar = st.progress(0)
317
- def update_tuning_progress(current, total):
318
- progress = int((current / total) * 100)
319
- progress_bar.progress(progress)
320
-
321
- df = create_forecasting_features(df)
322
-
323
- train_df = df[df['Reported Date'] < '2024-01-01']
324
- test_df = df[df['Reported Date'] >= '2024-01-01']
325
-
326
- X_train = train_df.drop(columns=['Modal Price (Rs./Quintal)', 'Reported Date'])
327
- y_train = train_df['Modal Price (Rs./Quintal)']
328
- X_test = test_df.drop(columns=['Modal Price (Rs./Quintal)', 'Reported Date'])
329
- y_test = test_df['Modal Price (Rs./Quintal)']
330
- st.write("Performing hyperparameter tuning...")
331
- param_grid = {
332
- 'learning_rate': [0.01, 0.1, 0.2],
333
- 'max_depth': [3, 5, 7],
334
- 'n_estimators': [50, 100, 150],
335
- 'booster': ['gbtree', 'dart']
336
- }
337
-
338
- model = XGBRegressor()
339
- param_combinations = len(param_grid['learning_rate']) * len(param_grid['max_depth']) * \
340
- len(param_grid['n_estimators']) * len(param_grid['booster'])
341
-
342
- current_combination = 0
343
-
344
- def custom_grid_search():
345
- nonlocal current_combination
346
- best_score = float('-inf')
347
- best_params = None
348
- for learning_rate in param_grid['learning_rate']:
349
- for max_depth in param_grid['max_depth']:
350
- for n_estimators in param_grid['n_estimators']:
351
- for booster in param_grid['booster']:
352
- model.set_params(
353
- learning_rate=learning_rate,
354
- max_depth=max_depth,
355
- n_estimators=n_estimators,
356
- booster=booster
357
- )
358
- model.fit(X_train, y_train)
359
- score = model.score(X_test, y_test)
360
- if score > best_score:
361
- best_score = score
362
- best_params = {
363
- 'learning_rate': learning_rate,
364
- 'max_depth': max_depth,
365
- 'n_estimators': n_estimators,
366
- 'booster': booster
367
- }
368
- current_combination += 1
369
- update_tuning_progress(current_combination, param_combinations)
370
- return best_params
371
-
372
- best_params = custom_grid_search()
373
-
374
- st.write("Training the best model and making predictions...")
375
- best_model = XGBRegressor(**best_params)
376
- best_model.fit(X_train, y_train)
377
- y_pred = best_model.predict(X_test)
378
-
379
- rmse = mean_squared_error(y_test, y_pred, squared=False)
380
- mae = mean_absolute_error(y_test, y_pred)
381
- st.write(f"RMSE: {rmse}")
382
- st.write(f"MAE: {mae}")
383
-
384
- train_plot_df = train_df[['Reported Date', 'Modal Price (Rs./Quintal)']].copy()
385
- train_plot_df['Type'] = 'Train'
386
-
387
- test_plot_df = test_df[['Reported Date', 'Modal Price (Rs./Quintal)']].copy()
388
- test_plot_df['Type'] = 'Test'
389
-
390
- predicted_plot_df = test_df[['Reported Date']].copy()
391
- predicted_plot_df['Modal Price (Rs./Quintal)'] = y_pred
392
- predicted_plot_df['Type'] = 'Predicted'
393
-
394
- plot_df = pd.concat([train_plot_df, test_plot_df, predicted_plot_df])
395
-
396
- fig = go.Figure()
397
-
398
- for plot_type, color, dash in [('Train', 'blue', None), ('Test', 'orange', None),
399
- ('Predicted', 'green', 'dot')]:
400
- data = plot_df[plot_df['Type'] == plot_type]
401
- fig.add_trace(go.Scatter(
402
- x=data['Reported Date'],
403
- y=data['Modal Price (Rs./Quintal)'],
404
- mode='lines',
405
- name=f"{plot_type} Data",
406
- line=dict(color=color, dash=dash)
407
- ))
408
-
409
- fig.update_layout(
410
- title="Train, Test, and Predicted Data",
411
- xaxis_title="Date",
412
- yaxis_title="Modal Price (Rs./Quintal)",
413
- template="plotly_white"
414
- )
415
-
416
- st.plotly_chart(fig, use_container_width=True)
417
-
418
- return best_params
419
-
420
- def train_and_evaluate_1m(df):
421
- import streamlit as st
422
- import pandas as pd
423
- import plotly.graph_objects as go
424
- from xgboost import XGBRegressor
425
- from sklearn.metrics import mean_squared_error, mean_absolute_error
426
-
427
- progress_bar = st.progress(0)
428
-
429
- def update_tuning_progress(current, total):
430
- progress = int((current / total) * 100)
431
- progress_bar.progress(progress)
432
-
433
- df = create_forecasting_features_1m(df)
434
-
435
- split_date = pd.to_datetime("2024-01-01")
436
- test_horizon = pd.DateOffset(days=30)
437
-
438
- train_df = df[df['Reported Date'] < split_date]
439
- test_df = df[(df['Reported Date'] >= split_date) & (df['Reported Date'] < split_date + test_horizon)]
440
-
441
- X_train = train_df.drop(columns=['Modal Price (Rs./Quintal)', 'Reported Date'])
442
- y_train = train_df['Modal Price (Rs./Quintal)']
443
- X_test = test_df.drop(columns=['Modal Price (Rs./Quintal)', 'Reported Date'])
444
- y_test = test_df['Modal Price (Rs./Quintal)']
445
-
446
- st.write("Performing hyperparameter tuning...")
447
- param_grid = {
448
- 'learning_rate': [0.01, 0.1, 0.2],
449
- 'max_depth': [3, 5, 7],
450
- 'n_estimators': [50, 100, 150],
451
- 'booster': ['gbtree', 'dart']
452
- }
453
-
454
- model = XGBRegressor()
455
- param_combinations = len(param_grid['learning_rate']) * len(param_grid['max_depth']) * \
456
- len(param_grid['n_estimators']) * len(param_grid['booster'])
457
-
458
- current_combination = 0
459
-
460
- def custom_grid_search():
461
- nonlocal current_combination
462
- best_score = float('-inf')
463
- best_params = None
464
- for learning_rate in param_grid['learning_rate']:
465
- for max_depth in param_grid['max_depth']:
466
- for n_estimators in param_grid['n_estimators']:
467
- for booster in param_grid['booster']:
468
- model.set_params(
469
- learning_rate=learning_rate,
470
- max_depth=max_depth,
471
- n_estimators=n_estimators,
472
- booster=booster
473
- )
474
- model.fit(X_train, y_train)
475
- score = model.score(X_test, y_test)
476
- if score > best_score:
477
- best_score = score
478
- best_params = {
479
- 'learning_rate': learning_rate,
480
- 'max_depth': max_depth,
481
- 'n_estimators': n_estimators,
482
- 'booster': booster
483
- }
484
- current_combination += 1
485
- update_tuning_progress(current_combination, param_combinations)
486
- return best_params
487
-
488
- best_params = custom_grid_search()
489
- st.write("Training the best model and making predictions...")
490
- best_model = XGBRegressor(**best_params)
491
- best_model.fit(X_train, y_train)
492
- y_pred = best_model.predict(X_test)
493
-
494
- rmse = mean_squared_error(y_test, y_pred, squared=False)
495
- mae = mean_absolute_error(y_test, y_pred)
496
- st.write(f"RMSE: {rmse}")
497
- st.write(f"MAE: {mae}")
498
-
499
- train_plot_df = train_df[['Reported Date', 'Modal Price (Rs./Quintal)']].copy()
500
- train_plot_df['Type'] = 'Train'
501
-
502
- test_plot_df = test_df[['Reported Date', 'Modal Price (Rs./Quintal)']].copy()
503
- test_plot_df['Type'] = 'Test'
504
-
505
- predicted_plot_df = test_df[['Reported Date']].copy()
506
- predicted_plot_df['Modal Price (Rs./Quintal)'] = y_pred
507
- predicted_plot_df['Type'] = 'Predicted'
508
-
509
- plot_df = pd.concat([train_plot_df, test_plot_df, predicted_plot_df])
510
-
511
- fig = go.Figure()
512
-
513
- for plot_type, color, dash in [('Train', 'blue', None), ('Test', 'orange', None),
514
- ('Predicted', 'green', 'dot')]:
515
- data = plot_df[plot_df['Type'] == plot_type]
516
- fig.add_trace(go.Scatter(
517
- x=data['Reported Date'],
518
- y=data['Modal Price (Rs./Quintal)'],
519
- mode='lines',
520
- name=f"{plot_type} Data",
521
- line=dict(color=color, dash=dash)
522
- ))
523
-
524
- fig.update_layout(
525
- title="Train, Test, and Predicted Data",
526
- xaxis_title="Date",
527
- yaxis_title="Modal Price (Rs./Quintal)",
528
- template="plotly_white"
529
- )
530
-
531
- st.plotly_chart(fig, use_container_width=True)
532
-
533
- return best_params
534
-
535
- def train_and_evaluate_3m(df):
536
- import streamlit as st
537
- progress_bar = st.progress(0)
538
- def update_tuning_progress(current, total):
539
- progress = int((current / total) * 100)
540
- progress_bar.progress(progress)
541
-
542
- df = create_forecasting_features_3m(df)
543
- train_df = df[df['Reported Date'] < '2023-10-01']
544
- test_df = df[df['Reported Date'] >= '2023-10-01']
545
-
546
- X_train = train_df.drop(columns=['Modal Price (Rs./Quintal)', 'Reported Date'])
547
- y_train = train_df['Modal Price (Rs./Quintal)']
548
- X_test = test_df.drop(columns=['Modal Price (Rs./Quintal)', 'Reported Date'])
549
- y_test = test_df['Modal Price (Rs./Quintal)']
550
-
551
- st.write("Performing hyperparameter tuning...")
552
- param_grid = {
553
- 'learning_rate': [0.01, 0.1, 0.2],
554
- 'max_depth': [3, 5, 7],
555
- 'n_estimators': [50, 100, 150],
556
- 'booster': ['gbtree', 'dart']
557
- }
558
-
559
- model = XGBRegressor()
560
- param_combinations = len(param_grid['learning_rate']) * len(param_grid['max_depth']) * \
561
- len(param_grid['n_estimators']) * len(param_grid['booster'])
562
-
563
- current_combination = 0
564
-
565
- def custom_grid_search():
566
- nonlocal current_combination
567
- best_score = float('-inf')
568
- best_params = None
569
- for learning_rate in param_grid['learning_rate']:
570
- for max_depth in param_grid['max_depth']:
571
- for n_estimators in param_grid['n_estimators']:
572
- for booster in param_grid['booster']:
573
- model.set_params(
574
- learning_rate=learning_rate,
575
- max_depth=max_depth,
576
- n_estimators=n_estimators,
577
- booster=booster
578
- )
579
- model.fit(X_train, y_train)
580
- score = model.score(X_test, y_test)
581
- if score > best_score:
582
- best_score = score
583
- best_params = {
584
- 'learning_rate': learning_rate,
585
- 'max_depth': max_depth,
586
- 'n_estimators': n_estimators,
587
- 'booster': booster
588
- }
589
- current_combination += 1
590
- update_tuning_progress(current_combination, param_combinations)
591
- return best_params
592
-
593
- best_params = custom_grid_search()
594
- st.write("Training the best model and making predictions...")
595
- best_model = XGBRegressor(**best_params)
596
- best_model.fit(X_train, y_train)
597
- y_pred = best_model.predict(X_test)
598
-
599
- rmse = mean_squared_error(y_test, y_pred, squared=False)
600
- mae = mean_absolute_error(y_test, y_pred)
601
- st.write(f"RMSE: {rmse}")
602
- st.write(f"MAE: {mae}")
603
-
604
- train_plot_df = train_df[['Reported Date', 'Modal Price (Rs./Quintal)']].copy()
605
- train_plot_df['Type'] = 'Train'
606
-
607
- test_plot_df = test_df[['Reported Date', 'Modal Price (Rs./Quintal)']].copy()
608
- test_plot_df['Type'] = 'Test'
609
-
610
- predicted_plot_df = test_df[['Reported Date']].copy()
611
- predicted_plot_df['Modal Price (Rs./Quintal)'] = y_pred
612
- predicted_plot_df['Type'] = 'Predicted'
613
-
614
- plot_df = pd.concat([train_plot_df, test_plot_df, predicted_plot_df])
615
-
616
- fig = go.Figure()
617
-
618
- for plot_type, color, dash in [('Train', 'blue', None), ('Test', 'orange', None),
619
- ('Predicted', 'green', 'dot')]:
620
- data = plot_df[plot_df['Type'] == plot_type]
621
- fig.add_trace(go.Scatter(
622
- x=data['Reported Date'],
623
- y=data['Modal Price (Rs./Quintal)'],
624
- mode='lines',
625
- name=f"{plot_type} Data",
626
- line=dict(color=color, dash=dash)
627
- ))
628
-
629
- fig.update_layout(
630
- title="Train, Test, and Predicted Data",
631
- xaxis_title="Date",
632
- yaxis_title="Modal Price (Rs./Quintal)",
633
- template="plotly_white"
634
- )
635
-
636
- st.plotly_chart(fig, use_container_width=True)
637
-
638
- return best_params
639
-
640
- def forecast_next_14_days(df, _best_params, key):
641
- last_date = df['Reported Date'].max()
642
- future_dates = pd.date_range(start=last_date + pd.Timedelta(days=1), periods=14)
643
- future_df = pd.DataFrame({'Reported Date': future_dates})
644
- full_df = pd.concat([df, future_df], ignore_index=True)
645
- full_df = create_forecasting_features(full_df)
646
-
647
- original_df = full_df[full_df['Reported Date'] <= last_date]
648
- future_df = full_df[full_df['Reported Date'] > last_date]
649
-
650
- X_train = original_df.drop(columns=['Modal Price (Rs./Quintal)', 'Reported Date'], errors='ignore')
651
- y_train = original_df['Modal Price (Rs./Quintal)']
652
- X_future = future_df.drop(columns=['Modal Price (Rs./Quintal)', 'Reported Date'], errors='ignore')
653
-
654
- model = XGBRegressor(**_best_params)
655
- model.fit(X_train, y_train)
656
-
657
- future_predictions = model.predict(X_future)
658
- future_df['Modal Price (Rs./Quintal)'] = future_predictions
659
- plot_data(original_df, future_df, last_date, 14)
660
- download_button(future_df, key)
661
-
662
- def forecast_next_30_days(df, _best_params, key):
663
- last_date = df['Reported Date'].max()
664
- future_dates = pd.date_range(start=last_date + pd.Timedelta(days=1), periods=30)
665
- future_df = pd.DataFrame({'Reported Date': future_dates})
666
- full_df = pd.concat([df, future_df], ignore_index=True)
667
- full_df = create_forecasting_features_1m(full_df)
668
-
669
- original_df = full_df[full_df['Reported Date'] <= last_date]
670
- future_df = full_df[full_df['Reported Date'] > last_date]
671
-
672
- X_train = original_df.drop(columns=['Modal Price (Rs./Quintal)', 'Reported Date'], errors='ignore')
673
- y_train = original_df['Modal Price (Rs./Quintal)']
674
- X_future = future_df.drop(columns=['Modal Price (Rs./Quintal)', 'Reported Date'], errors='ignore')
675
-
676
- model = XGBRegressor(**_best_params)
677
- model.fit(X_train, y_train)
678
-
679
- future_predictions = model.predict(X_future)
680
- future_df['Modal Price (Rs./Quintal)'] = future_predictions
681
- plot_data(original_df, future_df, last_date, 30)
682
- download_button(future_df, key)
683
-
684
- def forecast_next_90_days(df, _best_params, key):
685
- last_date = df['Reported Date'].max()
686
- future_dates = pd.date_range(start=last_date + pd.Timedelta(days=1), periods=90)
687
- future_df = pd.DataFrame({'Reported Date': future_dates})
688
- full_df = pd.concat([df, future_df], ignore_index=True)
689
- full_df = create_forecasting_features_3m(full_df)
690
-
691
- original_df = full_df[full_df['Reported Date'] <= last_date]
692
- future_df = full_df[full_df['Reported Date'] > last_date]
693
-
694
- X_train = original_df.drop(columns=['Modal Price (Rs./Quintal)', 'Reported Date'], errors='ignore')
695
- y_train = original_df['Modal Price (Rs./Quintal)']
696
- X_future = future_df.drop(columns=['Modal Price (Rs./Quintal)', 'Reported Date'], errors='ignore')
697
-
698
- model = XGBRegressor(**_best_params)
699
- model.fit(X_train, y_train)
700
-
701
- future_predictions = model.predict(X_future)
702
- future_df['Modal Price (Rs./Quintal)'] = future_predictions
703
- plot_data(original_df, future_df, last_date, 90)
704
- download_button(future_df, key)
705
-
706
- def plot_data(original_df, future_df, last_date, days):
707
- actual_df = original_df[original_df['Reported Date'] >= (last_date - pd.Timedelta(days=days))].copy()
708
- actual_df['Type'] = 'Actual'
709
-
710
- future_plot_df = future_df[['Reported Date', 'Modal Price (Rs./Quintal)']].copy()
711
- future_plot_df['Type'] = 'Forecasted'
712
- last_actual_point = actual_df.sort_values('Reported Date').iloc[[-1]].copy()
713
- future_plot_df = pd.concat([last_actual_point, future_plot_df])
714
-
715
- plot_df = pd.concat([actual_df, future_plot_df])
716
- fig = go.Figure()
717
- for plot_type, color, dash in [('Actual', 'blue', 'solid'), ('Forecasted', 'red', 'dash')]:
718
- data = plot_df[plot_df['Type'] == plot_type]
719
- fig.add_trace(go.Scatter(
720
- x=data['Reported Date'],
721
- y=data['Modal Price (Rs./Quintal)'],
722
- mode='lines',
723
- name=f"{plot_type} Data",
724
- line=dict(color=color, dash=dash)
725
- ))
726
-
727
- fig.update_layout(
728
- title="Actual vs Forecasted Modal Price (Rs./Quintal)",
729
- xaxis_title="Date",
730
- yaxis_title="Modal Price (Rs./Quintal)",
731
- template="plotly_white"
732
- )
733
-
734
- st.plotly_chart(fig, use_container_width=True)
735
-
736
-
737
- def download_button(future_df, key):
738
- download_df = future_df[['Reported Date', 'Modal Price (Rs./Quintal)']].copy()
739
- download_df['Reported Date'] = download_df['Reported Date'].dt.strftime('%Y-%m-%d')
740
- towrite = io.BytesIO()
741
- download_df.to_excel(towrite, index=False, engine='xlsxwriter')
742
- towrite.seek(0)
743
- st.download_button(label="Download Forecasted Values",
744
- data=towrite,
745
- file_name=f'forecasted_prices_{key}.xlsx',
746
- mime='application/vnd.ms-excel')
747
-
748
-
749
-
750
- def fetch_and_process_data(query_filter, collection):
751
- try:
752
- cursor = collection.find(query_filter)
753
- data = list(cursor)
754
- if data:
755
- df = pd.DataFrame(data)
756
- st.write("Preprocessing data...")
757
- df = preprocess_data(df)
758
- return df
759
- else:
760
- st.warning("⚠️ No data found for the selected filter.")
761
- return None
762
- except Exception as e:
763
- st.error(f"❌ Error fetching data 1: {e}")
764
- return None
765
-
766
- def save_best_params(collection, filter_key, best_params):
767
- best_params["filter_key"] = filter_key
768
- best_params["last_updated"] = datetime.now().isoformat()
769
-
770
- existing_entry = collection.find_one({"filter_key": filter_key})
771
- if existing_entry:
772
- collection.replace_one({"filter_key": filter_key}, best_params)
773
- else:
774
- collection.insert_one(best_params)
775
-
776
- def get_best_params(filter_key, collection):
777
- record = collection.find_one({"filter_key": filter_key})
778
- return record
779
-
780
- def train_and_forecast(df, filter_key, days):
781
- if df is not None:
782
- if days==14:
783
- best_params = train_and_evaluate(df)
784
- save_best_params(filter_key, best_params, best_params_collection)
785
- forecast_next_14_days(df, best_params, filter_key)
786
- elif days==30:
787
- best_params = train_and_evaluate_1m(df)
788
- save_best_params(filter_key, best_params, best_params_collection_1m)
789
- forecast_next_30_days(df, best_params, filter_key)
790
- elif days==90:
791
- best_params = train_and_evaluate_3m(df)
792
- save_best_params(filter_key, best_params, best_params_collection_3m)
793
- forecast_next_90_days(df, best_params, filter_key)
794
- failed_dates_data = []
795
- failed_dates_market = []
796
-
797
- def forecast(df, filter_key, days):
798
- if days==14:
799
- record = get_best_params(filter_key, best_params_collection)
800
- if record:
801
- st.info(f"ℹ️ The model was trained on {record['last_updated']}.")
802
- forecast_next_14_days(df, record, filter_key)
803
- else:
804
- st.warning("⚠️ Model is not trained yet. Please train the model first.")
805
- if days==30:
806
- record = get_best_params(filter_key, best_params_collection_1m)
807
- if record:
808
- st.info(f"ℹ️ The model was trained on {record['last_updated']}.")
809
- forecast_next_30_days(df, record, filter_key)
810
- else:
811
- st.warning("⚠️ Model is not trained yet. Please train the model first.")
812
- if days==90:
813
- record = get_best_params(filter_key, best_params_collection_3m)
814
- if record:
815
- st.info(f"ℹ️ The model was trained on {record['last_updated']}.")
816
- forecast_next_90_days(df, record, filter_key)
817
- else:
818
- st.warning("⚠️ Model is not trained yet. Please train the model first.")
819
-
820
- def collection_to_dataframe(collection, drop_id=True):
821
- """
822
- Converts a MongoDB collection to a pandas DataFrame.
823
-
824
- Args:
825
- collection: MongoDB collection object.
826
- drop_id (bool): Whether to drop the '_id' column. Default is True.
827
-
828
- Returns:
829
- pd.DataFrame: DataFrame containing the collection data.
830
- """
831
- documents = list(collection.find())
832
-
833
- df = pd.DataFrame(documents)
834
- if drop_id and '_id' in df.columns:
835
- df = df.drop(columns=['_id'])
836
-
837
- return df
838
-
839
-
840
-
841
- def editable_spreadsheet():
842
- st.title("Sowing Report Prediction Model")
843
- uploaded_file = st.file_uploader("Upload your Excel file", type=['xlsx'])
844
-
845
- if uploaded_file is not None:
846
- df_excel = pd.read_excel(uploaded_file)
847
- st.write("Excel data loaded:", df_excel)
848
-
849
- with st.form("input_form"):
850
- input_region = st.text_input("Enter Region to Filter By", placeholder="Region Name")
851
- input_season = st.text_input("Enter Season to Filter By", placeholder="Season (e.g., Winter)")
852
- input_area = st.number_input("Enter Area (in hectares) for Production Calculation", min_value=0.0, format="%.2f")
853
- submit_button = st.form_submit_button("Calculate Production")
854
-
855
- if submit_button:
856
- if input_region and input_season and input_area > 0:
857
- filtered_df = df_excel[
858
- (df_excel['Region'].str.lower() == input_region.lower()) &
859
- (df_excel['Season'].str.lower() == input_season.lower())
860
- ]
861
-
862
- if not filtered_df.empty:
863
- process_dataframe(filtered_df, input_area)
864
- else:
865
- st.error("No data found for the specified region and season.")
866
- else:
867
- st.error("Please enter valid region, season, and area to proceed.")
868
-
869
- def process_dataframe(df, area):
870
- if 'Yield' in df.columns:
871
- average_yield = df['Yield'].mean()
872
- predicted_production = average_yield * area
873
- st.success(f"The predicted Production Volume for the specified region and season is: {predicted_production:.2f} units")
874
- else:
875
- st.error("The DataFrame does not contain a necessary 'Yield' column for calculation.")
876
-
877
-
878
-
879
- def display_statistics(df):
880
- st.title("📊 National Market Statistics Dashboard")
881
- st.markdown("""
882
- <style>
883
- h1 {
884
- color: #2e7d32;
885
- font-size: 36px;
886
- font-weight: bold;
887
- }
888
- h3 {
889
- color: #388e3c;
890
- font-size: 28px;
891
- font-weight: 600;
892
- }
893
- p {
894
- font-size: 16px;
895
- line-height: 1.6;
896
- }
897
- .highlight {
898
- background-color: #f1f8e9;
899
- padding: 10px;
900
- border-radius: 8px;
901
- font-size: 16px;
902
- color: #2e7d32;
903
- font-weight: 500;
904
- }
905
- </style>
906
- """, unsafe_allow_html=True)
907
- df['Reported Date'] = pd.to_datetime(df['Reported Date'])
908
- national_data = df.groupby('Reported Date').agg({
909
- 'Modal Price (Rs./Quintal)': 'mean',
910
- 'Arrivals (Tonnes)': 'sum'
911
- }).reset_index()
912
-
913
- st.subheader("🗓️ Key Statistics")
914
- latest_date = national_data['Reported Date'].max()
915
- latest_price = national_data[national_data['Reported Date'] == latest_date]['Modal Price (Rs./Quintal)'].mean()
916
- national_data['Arrivals (Tonnes)'] = pd.to_numeric(national_data['Arrivals (Tonnes)'], errors='coerce')
917
- latest_arrivals = national_data[national_data['Reported Date'] == latest_date]['Arrivals (Tonnes)'].sum()
918
-
919
- st.markdown("<p class='highlight'>This section provides the most recent statistics for the market. It includes the latest available date, the average price of commodities, and the total quantity of goods arriving at the market. These metrics offer an up-to-date snapshot of market conditions.</p>", unsafe_allow_html=True)
920
- st.write(f"**Latest Date**: {latest_date.strftime('%Y-%m-%d')}")
921
- st.write(f"**Latest Modal Price**: {latest_price:.2f} Rs./Quintal")
922
- st.write(f"**Latest Arrivals**: {float(latest_arrivals):.2f} Tonnes")
923
-
924
- st.subheader("📆 This Day in Previous Years")
925
- st.markdown("<p class='highlight'>This table shows the modal price and total arrivals for this exact day across previous years. It provides a historical perspective to compare against current market conditions. This section examines historical data for the same day in previous years. By analyzing trends for this specific day, you can identify seasonal patterns, supply-demand changes, or any deviations that might warrant closer attention.</p>", unsafe_allow_html=True)
926
- today = latest_date
927
- previous_years_data = national_data[national_data['Reported Date'].dt.dayofyear == today.dayofyear]
928
-
929
- if not previous_years_data.empty:
930
- previous_years_data['Year'] = previous_years_data['Reported Date'].dt.year.astype(str)
931
- display_data = (previous_years_data[['Year', 'Modal Price (Rs./Quintal)', 'Arrivals (Tonnes)']]
932
- .sort_values(by='Year', ascending=False)
933
- .reset_index(drop=True))
934
- st.table(display_data)
935
- else:
936
- st.write("No historical data available for this day in previous years.")
937
-
938
- st.subheader("📅 Monthly Averages Over Years")
939
- st.markdown("<p class='highlight'>This section displays the average modal prices and arrivals for each month across all years. It helps identify seasonal trends and peak activity months, which can be crucial for inventory planning and market predictions.</p>", unsafe_allow_html=True)
940
- national_data['Month'] = national_data['Reported Date'].dt.month
941
- monthly_avg_price = national_data.groupby('Month')['Modal Price (Rs./Quintal)'].mean().reset_index()
942
- monthly_avg_arrivals = national_data.groupby('Month')['Arrivals (Tonnes)'].mean().reset_index()
943
- monthly_avg = pd.merge(monthly_avg_price, monthly_avg_arrivals, on='Month')
944
- monthly_avg['Month'] = monthly_avg['Month'].apply(lambda x: calendar.month_name[x])
945
- monthly_avg.columns = ['Month', 'Average Modal Price (Rs./Quintal)', 'Average Arrivals (Tonnes)']
946
- st.write(monthly_avg)
947
- st.subheader("📆 Yearly Averages")
948
- st.markdown("<p class='highlight'>Yearly averages provide insights into long-term trends in pricing and arrivals. By examining these values, you can detect overall growth, stability, or volatility in the market.</p>", unsafe_allow_html=True)
949
- national_data['Year'] = national_data['Reported Date'].dt.year
950
- yearly_avg_price = national_data.groupby('Year')['Modal Price (Rs./Quintal)'].mean().reset_index()
951
- yearly_sum_arrivals = national_data.groupby('Year')['Arrivals (Tonnes)'].sum().reset_index()
952
- yearly_avg = pd.merge(yearly_avg_price, yearly_sum_arrivals, on='Year')
953
- yearly_avg['Year'] = yearly_avg['Year'].apply(lambda x: f"{int(x)}")
954
- yearly_avg.columns = ['Year', 'Average Modal Price (Rs./Quintal)', 'Average Arrivals (Tonnes)']
955
- st.write(yearly_avg)
956
-
957
- st.subheader("📈 Largest Daily Price Changes (Past Year)")
958
- st.markdown("<p class='highlight'>This analysis identifies the most significant daily price changes in the past year. These fluctuations can highlight periods of market volatility, potentially caused by external factors like weather, policy changes, or supply chain disruptions.</p>", unsafe_allow_html=True)
959
- one_year_ago = latest_date - pd.DateOffset(years=1)
960
- recent_data = national_data[national_data['Reported Date'] >= one_year_ago]
961
- recent_data['Daily Change (%)'] = recent_data['Modal Price (Rs./Quintal)'].pct_change() * 100
962
- largest_changes = recent_data[['Reported Date', 'Modal Price (Rs./Quintal)', 'Daily Change (%)']].nlargest(5, 'Daily Change (%)')
963
- largest_changes['Reported Date'] = largest_changes['Reported Date'].dt.date
964
- largest_changes = largest_changes.reset_index(drop=True)
965
- st.write(largest_changes)
966
-
967
- st.subheader("🏆 Top 5 Highest and Lowest Prices (Past Year)")
968
- st.markdown("<p class='highlight'>This section highlights the highest and lowest prices over the past year. These values reflect the extremes of market dynamics, helping to understand price ceilings and floors in the recent period.</p>", unsafe_allow_html=True)
969
- highest_prices = recent_data.nlargest(5, 'Modal Price (Rs./Quintal)')[['Reported Date', 'Modal Price (Rs./Quintal)']]
970
- lowest_prices = recent_data.nsmallest(5, 'Modal Price (Rs./Quintal)')[['Reported Date', 'Modal Price (Rs./Quintal)']]
971
- highest_prices['Reported Date'] = highest_prices['Reported Date'].dt.date
972
- lowest_prices['Reported Date'] = lowest_prices['Reported Date'].dt.date
973
- highest_prices = highest_prices.reset_index(drop=True)
974
- lowest_prices = lowest_prices.reset_index(drop=True)
975
- st.write("**Top 5 Highest Prices**")
976
- st.write(highest_prices)
977
- st.write("**Top 5 Lowest Prices**")
978
- st.write(lowest_prices)
979
-
980
- st.subheader("🗂️ Data Snapshot")
981
- st.markdown("<p class='highlight'>This snapshot provides a concise overview of the latest data, including rolling averages and lagged values. These metrics help identify short-term trends and lagged effects in pricing.</p>", unsafe_allow_html=True)
982
- national_data['Rolling Mean (14 Days)'] = national_data['Modal Price (Rs./Quintal)'].rolling(window=14).mean()
983
- national_data['Lag (14 Days)'] = national_data['Modal Price (Rs./Quintal)'].shift(14)
984
- national_data['Reported Date'] = national_data['Reported Date'].dt.date
985
- national_data = national_data.sort_values(by='Reported Date', ascending=False)
986
- st.dataframe(national_data.head(14).reset_index(drop=True), use_container_width=True, height=525)
987
-
988
- editable_spreadsheet()
989
-
990
-
991
- def parse_table_with_rowspan(table):
992
- data = []
993
- rowspan_map = {}
994
-
995
- rows = table.find_all("tr")
996
- for row_index, tr in enumerate(rows):
997
- cells = tr.find_all(["td", "th"])
998
- row_data = []
999
- col_index = 0
1000
- cell_index = 0
1001
-
1002
- while col_index < len(cells) or cell_index in rowspan_map:
1003
- if cell_index in rowspan_map:
1004
- cell_info = rowspan_map[cell_index]
1005
- row_data.append(cell_info["value"])
1006
- cell_info["rows_left"] -= 1
1007
- if cell_info["rows_left"] == 0:
1008
- del rowspan_map[cell_index]
1009
- cell_index += 1
1010
- elif col_index < len(cells):
1011
- cell = cells[col_index]
1012
- value = cell.get_text(strip=True)
1013
- rowspan = int(cell.get("rowspan", 1))
1014
-
1015
- row_data.append(value)
1016
-
1017
- if rowspan > 1:
1018
- rowspan_map[cell_index] = {"value": value, "rows_left": rowspan - 1}
1019
-
1020
- col_index += 1
1021
- cell_index += 1
1022
-
1023
- data.append(row_data)
1024
-
1025
- return data
1026
-
1027
-
1028
- def fetch_and_store_data():
1029
- SCRAPER_API_KEY = "8842750a88db7513a1d19325745437cc"
1030
- latest_doc = collection.find_one(sort=[("Reported Date", -1)])
1031
- from_date = (latest_doc["Reported Date"] + timedelta(days=1)) if latest_doc else datetime(2019, 1, 1)
1032
- to_date = datetime.now() - timedelta(days=1)
1033
-
1034
- print(f"📦 Modal Data → From: {from_date.strftime('%d-%b-%Y')} To: {to_date.strftime('%d-%b-%Y')}")
1035
-
1036
- current = from_date.replace(day=1)
1037
- while current <= to_date:
1038
- start_of_range = max(current, from_date)
1039
- end_of_range = (current.replace(day=28) + timedelta(days=4)).replace(day=1) - timedelta(days=1)
1040
- if end_of_range > to_date:
1041
- end_of_range = to_date
1042
-
1043
- date_from_str = start_of_range.strftime('%d-%b-%Y')
1044
- date_to_str = end_of_range.strftime('%d-%b-%Y')
1045
-
1046
- print(f"\n📅 Fetching data from {date_from_str} to {date_to_str}")
1047
-
1048
- target_url = (
1049
- "https://agmarknet.gov.in/SearchCmmMkt.aspx"
1050
- f"?Tx_Commodity=11&Tx_State=0&Tx_District=0&Tx_Market=0"
1051
- f"&DateFrom={date_from_str}&DateTo={date_to_str}"
1052
- f"&Fr_Date={date_from_str}&To_Date={date_to_str}"
1053
- "&Tx_Trend=2"
1054
- "&Tx_CommodityHead=Sesamum(Sesame,Gingelly,Til)"
1055
- "&Tx_StateHead=--Select--"
1056
- "&Tx_DistrictHead=--Select--"
1057
- "&Tx_MarketHead=--Select--"
1058
- )
1059
-
1060
- payload = {
1061
- "api_key": SCRAPER_API_KEY,
1062
- "url": target_url
1063
- }
1064
-
1065
- try:
1066
- response = requests.get("https://api.scraperapi.com/", params=payload)
1067
- soup = BeautifulSoup(response.text, "html.parser")
1068
- table = soup.find("table", {"class": "tableagmark_new"})
1069
-
1070
- if not table or not table.find_all("tr"):
1071
- print("❌ No table found.")
1072
- current = (current + timedelta(days=32)).replace(day=1)
1073
- continue
1074
-
1075
- all_rows = parse_table_with_rowspan(table)
1076
- headers = all_rows[0]
1077
- rows = all_rows[1:]
1078
-
1079
- df_raw = pd.DataFrame(rows, columns=headers)
1080
- print(f"🔍 Raw rows fetched: {len(df_raw)}")
1081
-
1082
- # Clean invalid state/district/market names
1083
- required_columns = ["State Name", "District Name", "Market Name"]
1084
- if all(col in df_raw.columns for col in required_columns):
1085
- df_raw = df_raw[
1086
- (df_raw["State Name"].str.strip() != "-") &
1087
- (df_raw["District Name"].str.strip() != "-") &
1088
- (df_raw["Market Name"].str.strip() != "-")
1089
- ]
1090
- print(f"✅ Rows after filtering: {len(df_raw)}")
1091
- else:
1092
- print("⚠️ One or more expected columns are missing. Skipping filter.")
1093
-
1094
- # Filter by variety + grade
1095
- df_raw = df_raw[
1096
- (df_raw["Variety"].str.strip().str.lower() == "white") &
1097
- (df_raw["Grade"].str.strip().str.upper() == "FAQ")
1098
- ]
1099
- print(f"✅ Filtered rows with 'White' variety and 'FAQ' grade: {len(df_raw)}")
1100
-
1101
- # Parse and clean dates
1102
- df_raw["Reported Date Parsed"] = pd.to_datetime(
1103
- df_raw["Reported Date"].str.strip(), format='%d %b %Y', errors='coerce'
1104
- )
1105
- df_raw = df_raw[df_raw["Reported Date Parsed"].notna()].copy()
1106
- df_raw["Reported Date"] = df_raw["Reported Date Parsed"]
1107
- df_raw.drop(columns=["Reported Date Parsed"], inplace=True)
1108
-
1109
- # Type conversions
1110
- df_raw["Modal Price (Rs./Quintal)"] = pd.to_numeric(
1111
- df_raw["Modal Price (Rs./Quintal)"], errors='coerce'
1112
- ).round().astype("Int64")
1113
- df_raw["Arrivals (Tonnes)"] = pd.to_numeric(
1114
- df_raw["Arrivals (Tonnes)"], errors='coerce'
1115
- ).astype("float64")
1116
- df_raw["State Name"] = df_raw["State Name"].astype("string")
1117
- df_raw["Market Name"] = df_raw["Market Name"].astype("string")
1118
-
1119
- # Write cleaned CSV
1120
- raw_csv_filename = f"clean_raw_modal_data_{start_of_range.strftime('%b_%Y')}.csv"
1121
- df_raw.to_csv(raw_csv_filename, index=False)
1122
- print(f"📄 Cleaned raw data CSV written to: {raw_csv_filename}")
1123
-
1124
- # Insert to DB
1125
- records = df_raw.to_dict(orient="records")
1126
- if records:
1127
- collection.insert_many(records)
1128
- print(f"✅ Inserted {len(records)} records for {current.strftime('%b %Y')}")
1129
- else:
1130
- print("⚠️ No valid records after final filtering.")
1131
-
1132
- except Exception as e:
1133
- print(f"🔥 Exception during {current.strftime('%b %Y')} fetch: {e}")
1134
-
1135
- current = (current + timedelta(days=32)).replace(day=1)
1136
-
1137
- def fetch_and_store_data_market():
1138
- SCRAPER_API_KEY = "8842750a88db7513a1d19325745437cc"
1139
- latest_doc = market_price_data.find_one(sort=[("Reported Date", -1)])
1140
- from_date = (latest_doc["Reported Date"] + timedelta(days=1)) if latest_doc else datetime(2019, 1, 1)
1141
- to_date = datetime.now() - timedelta(days=1)
1142
-
1143
- print(f"📦 Market Data → From: {from_date.strftime('%d-%b-%Y')} To: {to_date.strftime('%d-%b-%Y')}")
1144
-
1145
- current = from_date.replace(day=1)
1146
- while current <= to_date:
1147
- start_of_range = max(current, from_date)
1148
- end_of_range = (current.replace(day=28) + timedelta(days=4)).replace(day=1) - timedelta(days=1)
1149
- if end_of_range > to_date:
1150
- end_of_range = to_date
1151
-
1152
- date_from_str = start_of_range.strftime('%d-%b-%Y')
1153
- date_to_str = end_of_range.strftime('%d-%b-%Y')
1154
-
1155
- print(f"\n📅 Fetching data from {date_from_str} to {date_to_str}")
1156
-
1157
- target_url = (
1158
- "https://agmarknet.gov.in/SearchCmmMkt.aspx"
1159
- f"?Tx_Commodity=11&Tx_State=0&Tx_District=0&Tx_Market=0"
1160
- f"&DateFrom={date_from_str}&DateTo={date_to_str}"
1161
- f"&Fr_Date={date_from_str}&To_Date={date_to_str}"
1162
- "&Tx_Trend=0"
1163
- "&Tx_CommodityHead=Sesamum(Sesame,Gingelly,Til)"
1164
- "&Tx_StateHead=--Select--"
1165
- "&Tx_DistrictHead=--Select--"
1166
- "&Tx_MarketHead=--Select--"
1167
- )
1168
-
1169
- payload = {
1170
- "api_key": SCRAPER_API_KEY,
1171
- "url": target_url
1172
- }
1173
-
1174
- try:
1175
- response = requests.get("https://api.scraperapi.com/", params=payload)
1176
- soup = BeautifulSoup(response.text, "html.parser")
1177
- table = soup.find("table", {"class": "tableagmark_new"})
1178
-
1179
- if not table or not table.find_all("tr"):
1180
- print("❌ No table found.")
1181
- current = (current + timedelta(days=32)).replace(day=1)
1182
- continue
1183
-
1184
- all_rows = parse_table_with_rowspan(table)
1185
- headers = all_rows[0]
1186
- rows = all_rows[1:]
1187
-
1188
- # ✅ Filter out irrelevant columns based on available data
1189
- required_columns = ["Sl no.", "District Name", "Market Name", "Commodity", "Variety", "Grade", "Min Price (Rs./Quintal)", "Max Price (Rs./Quintal)", "Modal Price (Rs./Quintal)", "Price Date"]
1190
- df_raw = pd.DataFrame(rows, columns=headers)
1191
-
1192
- # Remove rows with invalid or missing location data
1193
- df_raw = df_raw[
1194
- (df_raw["District Name"].str.strip() != "-") &
1195
- (df_raw["Market Name"].str.strip() != "-")
1196
- ]
1197
- print(f"✅ Rows after filtering invalid locations: {len(df_raw)}")
1198
-
1199
- # ✅ Filter for variety and grade
1200
- df_raw = df_raw[
1201
- (df_raw["Variety"].str.strip().str.lower() == "white") &
1202
- (df_raw["Grade"].str.strip().str.upper() == "FAQ")
1203
- ]
1204
- print(f"✅ Filtered rows with 'White' variety and 'FAQ' grade: {len(df_raw)}")
1205
-
1206
- # ✅ Parse 'Price Date' as 'Reported Date'
1207
- df_raw["Reported Date Parsed"] = pd.to_datetime(
1208
- df_raw["Price Date"].str.strip(), format='%d %b %Y', errors='coerce'
1209
- )
1210
- df_raw = df_raw[df_raw["Reported Date Parsed"].notna()].copy()
1211
- df_raw["Reported Date"] = df_raw["Reported Date Parsed"]
1212
- df_raw.drop(columns=["Reported Date Parsed"], inplace=True)
1213
-
1214
- # ✅ Type conversions
1215
- df_raw["Modal Price (Rs./Quintal)"] = pd.to_numeric(
1216
- df_raw["Modal Price (Rs./Quintal)"], errors='coerce'
1217
- ).round().astype("Int64")
1218
- df_raw["Min Price (Rs./Quintal)"] = pd.to_numeric(
1219
- df_raw["Min Price (Rs./Quintal)"], errors='coerce'
1220
- ).round().astype("Int64")
1221
- df_raw["Max Price (Rs./Quintal)"] = pd.to_numeric(
1222
- df_raw["Max Price (Rs./Quintal)"], errors='coerce'
1223
- ).round().astype("Int64")
1224
- df_raw["District Name"] = df_raw["District Name"].astype("string")
1225
- df_raw["Market Name"] = df_raw["Market Name"].astype("string")
1226
-
1227
- # ✅ Save CSV for audit
1228
- raw_csv_filename = f"clean_raw_market_data_{start_of_range.strftime('%b_%Y')}.csv"
1229
- df_raw.to_csv(raw_csv_filename, index=False)
1230
- print(f"📄 CSV saved: {raw_csv_filename}")
1231
-
1232
- # ✅ Insert into MongoDB
1233
- records = df_raw.to_dict(orient="records")
1234
- if records:
1235
- market_price_data.insert_many(records)
1236
- print(f"✅ Inserted {len(records)} records for {current.strftime('%b %Y')}")
1237
- else:
1238
- print("⚠️ No valid records after final filtering.")
1239
-
1240
- except Exception as e:
1241
- print(f"🔥 Exception during {current.strftime('%b %Y')} fetch: {e}")
1242
-
1243
- current = (current + timedelta(days=32)).replace(day=1)
1244
-
1245
-
1246
-
1247
- def get_dataframe_from_collection(collection):
1248
- data = list(collection.find())
1249
- df = pd.DataFrame(data)
1250
- if "_id" in df.columns:
1251
- df = df.drop(columns=["_id"])
1252
-
1253
- return df
1254
-
1255
- def authenticate_user(username, password):
1256
- user = users_collection.find_one({"username": username})
1257
- if user and check_password_hash(user['password'], password):
1258
- return True
1259
- return False
1260
-
1261
- st.markdown("""
1262
- <style>
1263
- /* Main layout adjustments */
1264
- .main { max-width: 1200px; margin: 0 auto; }
1265
-
1266
- /* Header style */
1267
- h1 {
1268
- color: #4CAF50;
1269
- font-family: 'Arial Black', sans-serif;
1270
- }
1271
-
1272
- /* Button Styling */
1273
- .stButton>button {
1274
- background-color: #4CAF50;
1275
- color: white;
1276
- font-size: 16px;
1277
- border-radius: 12px;
1278
- padding: 12px 20px;
1279
- margin: 10px auto;
1280
- border: none;
1281
- cursor: pointer;
1282
- transition: background-color 0.4s ease, transform 0.3s ease, box-shadow 0.3s ease;
1283
- box-shadow: 0 4px 6px rgba(0, 0, 0, 0.2);
1284
- }
1285
-
1286
- /* Hover Effects for Button */
1287
- .stButton>button:hover {
1288
- background-color: #2196F3; /* Change color on hover */
1289
- color: #ffffff; /* Ensure text is readable */
1290
- transform: scale(1.1) rotate(-2deg); /* Slight zoom and tilt */
1291
- box-shadow: 0 8px 12px rgba(0, 0, 0, 0.3); /* Enhance shadow effect */
1292
- }
1293
-
1294
- /* Animation Effect */
1295
- .stButton>button:after {
1296
- content: '';
1297
- position: absolute;
1298
- top: 0;
1299
- left: 0;
1300
- right: 0;
1301
- bottom: 0;
1302
- border-radius: 12px;
1303
- background: linear-gradient(45deg, #4CAF50, #2196F3, #FFC107, #FF5722);
1304
- z-index: -1; /* Ensure gradient stays behind the button */
1305
- opacity: 0;
1306
- transition: opacity 0.5s ease;
1307
- }
1308
-
1309
- /* Glow Effect on Hover */
1310
- .stButton>button:hover:after {
1311
- opacity: 1;
1312
- animation: glowing 2s infinite alternate;
1313
- }
1314
-
1315
- /* Keyframes for Glow Animation */
1316
- @keyframes glowing {
1317
- 0% { box-shadow: 0 0 5px #4CAF50, 0 0 10px #4CAF50; }
1318
- 100% { box-shadow: 0 0 20px #2196F3, 0 0 30px #2196F3; }
1319
- }
1320
-
1321
- /* Responsive Design */
1322
- @media (max-width: 768px) {
1323
- .stButton>button {
1324
- width: 100%;
1325
- font-size: 14px;
1326
- }
1327
- h1 {
1328
- font-size: 24px;
1329
- }
1330
- }
1331
- </style>
1332
- """, unsafe_allow_html=True)
1333
- if 'authenticated' not in st.session_state:
1334
- st.session_state.authenticated = False
1335
-
1336
- if st.session_state.get("authenticated", False):
1337
- st.title("🌾 AgriPredict Dashboard")
1338
-
1339
- if st.button("Get Live Data Feed"):
1340
- st.write("🔄 Fetching fresh data from Modal + Agmarknet...")
1341
- fetch_and_store_data()
1342
- fetch_and_store_data_market()
1343
-
1344
- view_mode = st.radio("", ["Statistics", "Plots", "Predictions", "Exim"], horizontal=True)
1345
-
1346
- if view_mode == "Plots":
1347
- st.sidebar.header("Filters")
1348
-
1349
- selected_period = st.sidebar.selectbox(
1350
- "Select Time Period",
1351
- ["2 Weeks", "1 Month", "3 Months", "1 Year", "5 Years"],
1352
- index=1
1353
- )
1354
- period_mapping = {
1355
- "2 Weeks": 14,
1356
- "1 Month": 30,
1357
- "3 Months": 90,
1358
- "1 Year": 365,
1359
- "2 Years": 730,
1360
- "5 Years": 1825
1361
- }
1362
- st.session_state["selected_period"] = period_mapping[selected_period]
1363
-
1364
- state_options = list(state_market_dict.keys()) + ['India']
1365
- selected_state = st.sidebar.selectbox("Select State", state_options)
1366
-
1367
- market_wise = False
1368
- query_filter = {}
1369
-
1370
- if selected_state != 'India':
1371
- market_wise = st.sidebar.checkbox("Market Wise Analysis")
1372
- if market_wise:
1373
- markets = state_market_dict.get(selected_state, [])
1374
- st.write(f"✅ Available markets for {selected_state}: {markets}")
1375
- selected_market = st.sidebar.selectbox("Select Market", markets)
1376
- query_filter = {"Market Name": selected_market}
1377
- else:
1378
- query_filter = {"State Name": selected_state}
1379
- else:
1380
- query_filter = {"State Name": {"$exists": True}}
1381
-
1382
- query_filter["Reported Date"] = {
1383
- "$gte": datetime.now() - timedelta(days=st.session_state["selected_period"])
1384
- }
1385
-
1386
- data_type = st.sidebar.radio("Select Data Type", ["Price", "Volume", "Both"])
1387
-
1388
- st.write(f"🧪 Final Mongo Query Filter: `{query_filter}`")
1389
-
1390
- if st.sidebar.button("✨ Let's go!"):
1391
- try:
1392
- df_market_grouped = pd.DataFrame()
1393
- df_grouped = pd.DataFrame()
1394
-
1395
- # MARKET-WISE
1396
- if "Market Name" in query_filter:
1397
- st.info("📊 Market-level data mode enabled")
1398
- market_cursor = market_price_data.find(query_filter)
1399
- market_data = list(market_cursor)
1400
- st.write(f"📄 Market rows fetched: {len(market_data)}")
1401
-
1402
- if market_data:
1403
- df_market = pd.DataFrame(market_data)
1404
- df_market['Reported Date'] = pd.to_datetime(df_market['Reported Date'], errors='coerce')
1405
- df_market["Modal Price (Rs./Quintal)"] = pd.to_numeric(df_market["Modal Price (Rs./Quintal)"], errors='coerce')
1406
- df_market_grouped = df_market.groupby('Reported Date', as_index=False).agg({
1407
- 'Modal Price (Rs./Quintal)': 'mean'
1408
- }).dropna()
1409
- date_range = pd.date_range(df_market_grouped['Reported Date'].min(), df_market_grouped['Reported Date'].max())
1410
- df_market_grouped = df_market_grouped.set_index('Reported Date').reindex(date_range).rename_axis('Reported Date').reset_index()
1411
- df_market_grouped['Modal Price (Rs./Quintal)'] = df_market_grouped['Modal Price (Rs./Quintal)'].fillna(method='ffill').fillna(method='bfill')
1412
-
1413
- # STATE/NATIONAL-WISE
1414
- st.info("📥 Fetching state-level or national data...")
1415
- cursor = collection.find(query_filter)
1416
- data = list(cursor)
1417
- st.write(f"📄 Total rows fetched from collection: {len(data)}")
1418
-
1419
- if data:
1420
- df = pd.DataFrame(data)
1421
- df['Reported Date'] = pd.to_datetime(df['Reported Date'], errors='coerce')
1422
- df['Arrivals (Tonnes)'] = pd.to_numeric(df['Arrivals (Tonnes)'], errors='coerce')
1423
- df['Modal Price (Rs./Quintal)'] = pd.to_numeric(df['Modal Price (Rs./Quintal)'], errors='coerce')
1424
-
1425
- df_grouped = df.groupby('Reported Date', as_index=False).agg({
1426
- 'Arrivals (Tonnes)': 'sum',
1427
- 'Modal Price (Rs./Quintal)': 'mean'
1428
- }).dropna()
1429
-
1430
- date_range = pd.date_range(df_grouped['Reported Date'].min(), df_grouped['Reported Date'].max())
1431
- df_grouped = df_grouped.set_index('Reported Date').reindex(date_range).rename_axis('Reported Date').reset_index()
1432
- df_grouped['Arrivals (Tonnes)'] = df_grouped['Arrivals (Tonnes)'].fillna(method='ffill').fillna(method='bfill')
1433
- df_grouped['Modal Price (Rs./Quintal)'] = df_grouped['Modal Price (Rs./Quintal)'].fillna(method='ffill').fillna(method='bfill')
1434
-
1435
- st.subheader(f"📈 Trends for {selected_state} ({'Market: ' + selected_market if market_wise else 'State-wide'})")
1436
-
1437
- fig = go.Figure()
1438
-
1439
- if data_type == "Both":
1440
- scaler = MinMaxScaler()
1441
- df_grouped[['Scaled Price', 'Scaled Arrivals']] = scaler.fit_transform(
1442
- df_grouped[['Modal Price (Rs./Quintal)', 'Arrivals (Tonnes)']]
1443
- )
1444
-
1445
- fig.add_trace(go.Scatter(
1446
- x=df_grouped['Reported Date'],
1447
- y=df_grouped['Scaled Price'],
1448
- mode='lines',
1449
- name='Scaled Modal Price',
1450
- line=dict(color='green'),
1451
- ))
1452
-
1453
- fig.add_trace(go.Scatter(
1454
- x=df_grouped['Reported Date'],
1455
- y=df_grouped['Scaled Arrivals'],
1456
- mode='lines',
1457
- name='Scaled Arrivals',
1458
- line=dict(color='blue'),
1459
- ))
1460
-
1461
- elif data_type == "Price":
1462
- price_df = df_market_grouped if not df_market_grouped.empty else df_grouped
1463
- fig.add_trace(go.Scatter(
1464
- x=price_df['Reported Date'],
1465
- y=price_df["Modal Price (Rs./Quintal)"],
1466
- mode='lines',
1467
- name='Modal Price',
1468
- line=dict(color='green'),
1469
- ))
1470
-
1471
- elif data_type == "Volume":
1472
- fig.add_trace(go.Scatter(
1473
- x=df_grouped['Reported Date'],
1474
- y=df_grouped['Arrivals (Tonnes)'],
1475
- mode='lines',
1476
- name='Arrivals',
1477
- line=dict(color='blue'),
1478
- ))
1479
-
1480
- fig.update_layout(
1481
- title="📊 Agricultural Trends",
1482
- xaxis_title="Date",
1483
- yaxis_title="Value (Scaled if Both)",
1484
- template="plotly_white"
1485
- )
1486
- st.plotly_chart(fig, use_container_width=True)
1487
-
1488
- else:
1489
- st.warning("⚠️ No data found for the selected filter range and region.")
1490
-
1491
- except Exception as e:
1492
- st.error(f"❌ Error fetching data 2: {e}")
1493
- st.exception(e)
1494
-
1495
- elif view_mode == "Predictions":
1496
- st.subheader("📊 Model Analysis")
1497
- sub_option = st.radio("Select one of the following", ["India", "States", "Market"], horizontal=True)
1498
- sub_timeline = st.radio("Select one of the following horizons", ["14 days", "1 month", "3 month"], horizontal=True)
1499
- if sub_option == "States":
1500
- states = ["Karnataka", "Madhya Pradesh", "Gujarat", "Uttar Pradesh", "Telangana"]
1501
- selected_state = st.selectbox("Select State for Model Training", states)
1502
- filter_key = f"state_{selected_state}"
1503
-
1504
- if st.button("Forecast"):
1505
- query_filter = {"State Name": selected_state}
1506
- df = fetch_and_process_data(query_filter, collection)
1507
- if sub_timeline == "14 days":
1508
- forecast(df, filter_key, 14)
1509
- elif sub_timeline == "1 month":
1510
- forecast(df, filter_key, 30)
1511
- else:
1512
- forecast(df, filter_key, 90)
1513
- elif sub_option == "Market":
1514
- market_options = ["Rajkot", "Neemuch", "Kalburgi", "Warangal"]
1515
- selected_market = st.selectbox("Select Market for Model Training", market_options)
1516
- filter_key = f"market_{selected_market}"
1517
- if st.button("Forecast"):
1518
- query_filter = {"Market Name": selected_market}
1519
- comparison_date = pd.to_datetime("18 Feb 2025")
1520
- df = fetch_and_process_data(query_filter, market_price_data)
1521
- st.write(df[df["Reported Date"]>comparison_date])
1522
- if sub_timeline == "14 days":
1523
- forecast(df, filter_key, 14)
1524
- elif sub_timeline == "1 month":
1525
- forecast(df, filter_key, 30)
1526
- else:
1527
- forecast(df, filter_key, 90)
1528
-
1529
- elif sub_option == "India":
1530
- df = collection_to_dataframe(impExp)
1531
- if True:
1532
- if st.button("Forecast"):
1533
- query_filter = {}
1534
- df = fetch_and_process_data(query_filter, collection)
1535
- if sub_timeline == "14 days":
1536
- forecast(df, "India", 14)
1537
- elif sub_timeline == "1 month":
1538
- forecast(df, "India", 30)
1539
- else:
1540
- forecast(df, "India", 90)
1541
-
1542
- elif view_mode=="Statistics":
1543
- document = collection.find_one()
1544
- df = get_dataframe_from_collection(collection)
1545
- display_statistics(df)
1546
- elif view_mode == "Exim":
1547
- df = collection_to_dataframe(impExp)
1548
-
1549
- plot_option = st.radio(
1550
- "Select the data to visualize:",
1551
- ["Import Price", "Import Quantity", "Export Price", "Export Quantity"],
1552
- horizontal=True
1553
- )
1554
-
1555
- time_period = st.selectbox(
1556
- "Select time period:",
1557
- ["1 Month", "6 Months", "1 Year", "2 Years"]
1558
- )
1559
-
1560
- df["Reported Date"] = pd.to_datetime(df["Reported Date"], format="%Y-%m-%d")
1561
- if time_period == "1 Month":
1562
- start_date = pd.Timestamp.now() - pd.DateOffset(months=1)
1563
- elif time_period == "6 Months":
1564
- start_date = pd.Timestamp.now() - pd.DateOffset(months=6)
1565
- elif time_period == "1 Year":
1566
- start_date = pd.Timestamp.now() - pd.DateOffset(years=1)
1567
- elif time_period == "2 Years":
1568
- start_date = pd.Timestamp.now() - pd.DateOffset(years=2)
1569
-
1570
- filtered_df = df[df["Reported Date"] >= start_date]
1571
- if plot_option == "Import Price":
1572
- grouped_df = (
1573
- filtered_df.groupby("Reported Date", as_index=False)["VALUE_IMPORT"]
1574
- .mean()
1575
- .rename(columns={"VALUE_IMPORT": "Average Import Price"})
1576
- )
1577
- y_axis_label = "Average Import Price (Rs.)"
1578
- elif plot_option == "Import Quantity":
1579
- grouped_df = (
1580
- filtered_df.groupby("Reported Date", as_index=False)["QUANTITY_IMPORT"]
1581
- .sum()
1582
- .rename(columns={"QUANTITY_IMPORT": "Total Import Quantity"})
1583
- )
1584
- y_axis_label = "Total Import Quantity (Tonnes)"
1585
- elif plot_option == "Export Price":
1586
- grouped_df = (
1587
- filtered_df.groupby("Reported Date", as_index=False)["VALUE_EXPORT"]
1588
- .mean()
1589
- .rename(columns={"VALUE_EXPORT": "Average Export Price"})
1590
- )
1591
- y_axis_label = "Average Export Price (Rs.)"
1592
- elif plot_option == "Export Quantity":
1593
- grouped_df = (
1594
- filtered_df.groupby("Reported Date", as_index=False)["QUANTITY_IMPORT"]
1595
- .sum()
1596
- .rename(columns={"QUANTITY_IMPORT": "Total Export Quantity"})
1597
- )
1598
- y_axis_label = "Total Export Quantity (Tonnes)"
1599
-
1600
- fig = px.line(
1601
- grouped_df,
1602
- x="Reported Date",
1603
- y=grouped_df.columns[1],
1604
- title=f"{plot_option} Over Time",
1605
- labels={"Reported Date": "Date", grouped_df.columns[1]: y_axis_label},
1606
- )
1607
- st.plotly_chart(fig)
1608
-
1609
-
1610
- else:
1611
- with st.form("login_form"):
1612
- st.subheader("Please log in")
1613
-
1614
- username = st.text_input("Username")
1615
- password = st.text_input("Password", type="password")
1616
- login_button = st.form_submit_button("Login")
1617
-
1618
- if login_button:
1619
- if authenticate_user(username, password):
1620
- st.session_state.authenticated = True
1621
- st.session_state['username'] = username
1622
- st.write("Login successful!")
1623
- st.rerun()
1624
- else:
1625
- st.error("Invalid username or password")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/agri_predict/__init__.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """AgriPredict package entrypoint.
2
+
3
+ Expose commonly used helpers for the Streamlit app.
4
+ """
5
+ from .config import get_collections
6
+ from .features import (
7
+ create_forecasting_features,
8
+ create_forecasting_features_1m,
9
+ create_forecasting_features_3m,
10
+ )
11
+ from .data import (
12
+ preprocess_data,
13
+ fetch_and_process_data,
14
+ fetch_and_store_data,
15
+ get_dataframe_from_collection,
16
+ collection_to_dataframe,
17
+ )
18
+ from .models import (
19
+ train_and_evaluate,
20
+ train_and_evaluate_1m,
21
+ train_and_evaluate_3m,
22
+ train_and_forecast,
23
+ forecast,
24
+ forecast_next_14_days,
25
+ forecast_next_30_days,
26
+ forecast_next_90_days,
27
+ )
28
+ from .plotting import plot_data, download_button, display_statistics
29
+ from .utils import (
30
+ save_best_params,
31
+ get_best_params,
32
+ authenticate_user,
33
+ collection_to_dataframe as utils_collection_to_dataframe,
34
+ )
35
+ from .scraper import api_client, AgmarknetAPIClient
36
+
37
+ __all__ = [
38
+ "get_collections",
39
+ "api_client",
40
+ "AgmarknetAPIClient",
41
+ ]
src/agri_predict/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (1.31 kB). View file
 
src/agri_predict/__pycache__/config.cpython-312.pyc ADDED
Binary file (1.28 kB). View file
 
src/agri_predict/__pycache__/constants.cpython-312.pyc ADDED
Binary file (1.36 kB). View file
 
src/agri_predict/__pycache__/data.cpython-312.pyc ADDED
Binary file (5.29 kB). View file
 
src/agri_predict/__pycache__/features.cpython-312.pyc ADDED
Binary file (13 kB). View file
 
src/agri_predict/__pycache__/models.cpython-312.pyc ADDED
Binary file (11.6 kB). View file
 
src/agri_predict/__pycache__/plotting.cpython-312.pyc ADDED
Binary file (15.3 kB). View file
 
src/agri_predict/__pycache__/scraper.cpython-312.pyc ADDED
Binary file (12.5 kB). View file
 
src/agri_predict/__pycache__/utils.cpython-312.pyc ADDED
Binary file (2.18 kB). View file
 
src/agri_predict/config.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from dotenv import load_dotenv
3
+ from pymongo import MongoClient
4
+ import certifi
5
+ from datetime import datetime
6
+
7
+ load_dotenv()
8
+
9
+
10
+ def get_collections():
11
+ """Return a dict with MongoDB collections used by the app.
12
+
13
+ Loads `MONGO_URI` from environment and connects with TLS CA validation.
14
+ Raises RuntimeError if `MONGO_URI` is missing.
15
+ """
16
+ mongo_uri = os.getenv("MONGO_URI")
17
+ if not mongo_uri:
18
+ raise RuntimeError("MONGO_URI is not set in the environment")
19
+
20
+ client = MongoClient(mongo_uri, tlsCAFile=certifi.where())
21
+ db = client["AgriPredict"]
22
+ return {
23
+ "collection": db["WhiteSesame"],
24
+ "best_params_collection": db["BestParams"],
25
+ "best_params_collection_1m": db["BestParams_1m"],
26
+ "best_params_collection_3m": db["BestParams_3m"],
27
+ "impExp": db["impExp"],
28
+ "users_collection": db["user"],
29
+ }
src/agri_predict/constants.py ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ state_market_dict = {
2
+ "Karnataka": [
3
+ "Kalburgi",
4
+ "Basava Kalayana",
5
+ "Lingasugur",
6
+ "Kustagi",
7
+ "Bangalore",
8
+ "Bagalakot",
9
+ "Hubli (Amaragol)"
10
+ ],
11
+ "Gujarat": [
12
+ "Siddhpur",
13
+ "Jasdan",
14
+ "Gondal",
15
+ "Morbi",
16
+ "Botad",
17
+ "Visavadar",
18
+ "Dahod",
19
+ "Rajkot",
20
+ "Junagadh",
21
+ "Savarkundla",
22
+ "Bhavnagar",
23
+ "Rajula",
24
+ "Dhoraji",
25
+ "Amreli",
26
+ "Mahuva(Station Road)",
27
+ "Mansa",
28
+ "Porbandar",
29
+ "Dasada Patadi",
30
+ "Halvad",
31
+ "Chotila",
32
+ "Bhanvad",
33
+ "Dhansura",
34
+ "Babra",
35
+ "Upleta",
36
+ "Palitana",
37
+ "Jetpur(Dist.Rajkot)",
38
+ "S.Mandvi",
39
+ "Mandvi",
40
+ "Khambha",
41
+ "Kadi",
42
+ "Taleja",
43
+ "Himatnagar",
44
+ "Lakhani",
45
+ "Rapar",
46
+ "Una",
47
+ "Dhari",
48
+ "Bagasara",
49
+ "Jam Jodhpur",
50
+ "Veraval",
51
+ "Dhragradhra",
52
+ "Deesa"
53
+ ],
54
+ "Uttar Pradesh": [
55
+ "Bangarmau",
56
+ "Sultanpur",
57
+ "Maudaha",
58
+ "Mauranipur",
59
+ "Lalitpur",
60
+ "Konch",
61
+ "Muskara",
62
+ "Raath",
63
+ "Varipaal",
64
+ "Auraiya",
65
+ "Orai",
66
+ "Banda",
67
+ "Kishunpur",
68
+ "Ait",
69
+ "Jhansi",
70
+ "Kurara",
71
+ "Chirgaon",
72
+ "Charkhari",
73
+ "Moth",
74
+ "Jalaun",
75
+ "Sirsaganj",
76
+ "Shikohabad"
77
+ ],
78
+ "Madhya Pradesh": [
79
+ "Naugaon",
80
+ "Mehar",
81
+ "Kailaras",
82
+ "Datia",
83
+ "LavKush Nagar(Laundi)",
84
+ "Ajaygarh",
85
+ "Rajnagar",
86
+ "Sevda",
87
+ "Neemuch",
88
+ "Sheopurkalan",
89
+ "Lashkar",
90
+ "Alampur",
91
+ "Niwadi",
92
+ "Dabra",
93
+ "Ujjain",
94
+ "Bijawar",
95
+ "Sidhi",
96
+ "Barad",
97
+ "Pohari",
98
+ "Shahagarh",
99
+ "Lateri",
100
+ "Banapura",
101
+ "Panna",
102
+ "Garhakota",
103
+ "Katni",
104
+ "Chhatarpur",
105
+ "Beohari",
106
+ "Satna",
107
+ "Sabalgarh",
108
+ "Hanumana",
109
+ "Bhander",
110
+ "Banmorkalan",
111
+ "Jaora",
112
+ "Bagli",
113
+ "Singroli"
114
+ ],
115
+ "Telangana": [
116
+ "Warangal"
117
+ ]
118
+ }
src/agri_predict/data.py ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ from datetime import datetime, timedelta
3
+
4
+ from .config import get_collections
5
+ from .scraper import api_client
6
+
7
+
8
+ def preprocess_data(df: pd.DataFrame) -> pd.DataFrame:
9
+ df = df[['Reported Date', 'Modal Price (Rs./Quintal)']].copy()
10
+ df['Reported Date'] = pd.to_datetime(df['Reported Date'])
11
+ df = df.groupby('Reported Date', as_index=False).mean()
12
+ full_date_range = pd.date_range(df['Reported Date'].min(), df['Reported Date'].max())
13
+ df = df.set_index('Reported Date').reindex(full_date_range).rename_axis('Reported Date').reset_index()
14
+ df['Modal Price (Rs./Quintal)'] = df['Modal Price (Rs./Quintal)'].ffill().bfill()
15
+ return df
16
+
17
+
18
+ def fetch_and_process_data(query_filter: dict):
19
+ cols = get_collections()
20
+ collection = cols['collection']
21
+ try:
22
+ cursor = collection.find(query_filter)
23
+ data = list(cursor)
24
+ if data:
25
+ df = pd.DataFrame(data)
26
+ df = preprocess_data(df)
27
+ return df
28
+ return None
29
+ except Exception:
30
+ return None
31
+
32
+
33
+ def fetch_and_store_data():
34
+ """Fetch new data from Agmarknet API and store in MongoDB.
35
+
36
+ Fetches data from the day after the latest date in the database
37
+ until yesterday. Uses the Agmarknet API client.
38
+
39
+ Returns:
40
+ pd.DataFrame: The fetched and stored data, or None if no data
41
+ """
42
+ cols = get_collections()
43
+ collection = cols['collection']
44
+
45
+ latest_doc = collection.find_one(sort=[("Reported Date", -1)])
46
+ latest_date = latest_doc["Reported Date"] if latest_doc and "Reported Date" in latest_doc else None
47
+
48
+ # Calculate date range
49
+ if latest_date:
50
+ from_date = latest_date + timedelta(days=1)
51
+ else:
52
+ from_date = datetime(2000, 1, 1)
53
+
54
+ to_date = datetime.now() - timedelta(days=1)
55
+
56
+ # Format dates for API (YYYY-MM-DD)
57
+ from_date_str = from_date.strftime('%Y-%m-%d')
58
+ to_date_str = to_date.strftime('%Y-%m-%d')
59
+
60
+ # Fetch data using API client
61
+ responses = api_client.fetch_date_range(from_date_str, to_date_str)
62
+
63
+ if not responses:
64
+ return None
65
+
66
+ # Parse responses to DataFrame
67
+ df = api_client.parse_multiple_responses_to_dataframe(responses)
68
+
69
+ if df.empty:
70
+ return None
71
+
72
+ # Filter for White variety only
73
+ df = df[df['Variety'] == "White"]
74
+
75
+ # Ensure proper data types for MongoDB
76
+ df["Reported Date"] = pd.to_datetime(df["Reported Date"])
77
+
78
+ # Convert numeric fields with proper type handling
79
+ df["Modal Price (Rs./Quintal)"] = pd.to_numeric(df["Modal Price (Rs./Quintal)"], errors='coerce').astype('Int64')
80
+ df["Min Price (Rs./Quintal)"] = pd.to_numeric(df["Min Price (Rs./Quintal)"], errors='coerce').astype(str)
81
+ df["Max Price (Rs./Quintal)"] = pd.to_numeric(df["Max Price (Rs./Quintal)"], errors='coerce').astype(str)
82
+ df["Arrivals (Tonnes)"] = pd.to_numeric(df["Arrivals (Tonnes)"], errors='coerce').astype(float)
83
+
84
+ # Sort by date
85
+ df.sort_values(by="Reported Date", inplace=True)
86
+
87
+ # Insert into MongoDB with proper format
88
+ for _, row in df.iterrows():
89
+ doc = row.to_dict()
90
+ # Ensure NaN values are handled properly
91
+ for key in doc:
92
+ if pd.isna(doc[key]):
93
+ doc[key] = None
94
+ collection.insert_one(doc)
95
+
96
+ return df
97
+
98
+
99
+ def get_dataframe_from_collection(collection):
100
+ data = list(collection.find())
101
+ df = pd.DataFrame(data)
102
+ if '_id' in df.columns:
103
+ df = df.drop(columns=['_id'])
104
+ return df
105
+
106
+
107
+ def collection_to_dataframe(collection, drop_id=True):
108
+ documents = list(collection.find())
109
+ df = pd.DataFrame(documents)
110
+ if drop_id and '_id' in df.columns:
111
+ df = df.drop(columns=['_id'])
112
+ return df
src/agri_predict/features.py ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+
4
+
5
+ def _ensure_datetime_index(df):
6
+ df = df.copy()
7
+ if not isinstance(df.index, pd.DatetimeIndex):
8
+ df = df.set_index('Reported Date')
9
+ df.index = pd.to_datetime(df.index)
10
+ return df
11
+
12
+
13
+ def create_forecasting_features(df):
14
+ df = _ensure_datetime_index(df)
15
+ target_map = df['Modal Price (Rs./Quintal)'].to_dict()
16
+
17
+ df['dayofweek'] = df.index.dayofweek
18
+ df['quarter'] = df.index.quarter
19
+ df['month'] = df.index.month
20
+ df['year'] = df.index.year
21
+ df['dayofyear'] = df.index.dayofyear
22
+ df['weekofyear'] = df.index.isocalendar().week
23
+
24
+ df['lag14'] = (df.index - pd.Timedelta(days=14)).map(target_map)
25
+ df['lag28'] = (df.index - pd.Timedelta(days=28)).map(target_map)
26
+ df['lag56'] = (df.index - pd.Timedelta(days=56)).map(target_map)
27
+ df['lag_3months'] = (df.index - pd.DateOffset(months=3)).map(target_map)
28
+ df['lag_6months'] = (df.index - pd.DateOffset(months=6)).map(target_map)
29
+ for window in [7, 14, 28]:
30
+ df[f'rolling_mean_{window}'] = df['Modal Price (Rs./Quintal)'].rolling(window=window, min_periods=1).mean()
31
+ df[f'rolling_std_{window}'] = df['Modal Price (Rs./Quintal)'].rolling(window=window, min_periods=1).std()
32
+
33
+ df['ema7'] = df['Modal Price (Rs./Quintal)'].ewm(span=7, adjust=False).mean()
34
+ df['ema14'] = df['Modal Price (Rs./Quintal)'].ewm(span=14, adjust=False).mean()
35
+ df['monthly_avg'] = df.groupby('month')['Modal Price (Rs./Quintal)'].transform('mean')
36
+ df['weekly_avg'] = df.groupby('weekofyear')['Modal Price (Rs./Quintal)'].transform('mean')
37
+ df['dayofweek_avg'] = df.groupby('dayofweek')['Modal Price (Rs./Quintal)'].transform('mean')
38
+
39
+ df['fourier_sin_365'] = np.sin(2 * np.pi * df.index.dayofyear / 365)
40
+ df['fourier_cos_365'] = np.cos(2 * np.pi * df.index.dayofyear / 365)
41
+ df['fourier_sin_14'] = np.sin(2 * np.pi * df.index.dayofyear / 14)
42
+ df['fourier_cos_14'] = np.cos(2 * np.pi * df.index.dayofyear / 14)
43
+
44
+ df['recent_min_14'] = (df.index - pd.Timedelta(days=14)).map(target_map).min()
45
+ df['recent_max_14'] = (df.index - pd.Timedelta(days=14)).map(target_map).max()
46
+ df['recent_range_14'] = df['recent_max_14'] - df['recent_min_14']
47
+
48
+ df['yearly_avg'] = df.groupby('year')['Modal Price (Rs./Quintal)'].transform('mean')
49
+ df['cumulative_mean'] = df['Modal Price (Rs./Quintal)'].expanding().mean()
50
+
51
+ return df.reset_index()
52
+
53
+
54
+ def create_forecasting_features_1m(df):
55
+ df = _ensure_datetime_index(df)
56
+ target_map = df['Modal Price (Rs./Quintal)'].to_dict()
57
+
58
+ df['dayofweek'] = df.index.dayofweek
59
+ df['quarter'] = df.index.quarter
60
+ df['month'] = df.index.month
61
+ df['year'] = df.index.year
62
+ df['dayofyear'] = df.index.dayofyear
63
+ df['weekofyear'] = df.index.isocalendar().week
64
+
65
+ df['lag_30'] = (df.index - pd.Timedelta(days=30)).map(target_map)
66
+ df['lag_60'] = (df.index - pd.Timedelta(days=60)).map(target_map)
67
+ df['lag_90'] = (df.index - pd.Timedelta(days=90)).map(target_map)
68
+ df['lag_6months'] = (df.index - pd.DateOffset(months=6)).map(target_map)
69
+ df['lag_12months'] = (df.index - pd.DateOffset(months=12)).map(target_map)
70
+
71
+ for window in [30, 60, 90]:
72
+ df[f'rolling_mean_{window}'] = df['Modal Price (Rs./Quintal)'].rolling(window=window, min_periods=1).mean()
73
+ df[f'rolling_std_{window}'] = df['Modal Price (Rs./Quintal)'].rolling(window=window, min_periods=1).std()
74
+
75
+ df['ema_30'] = df['Modal Price (Rs./Quintal)'].ewm(span=30, adjust=False).mean()
76
+ df['ema_60'] = df['Modal Price (Rs./Quintal)'].ewm(span=60, adjust=False).mean()
77
+
78
+ df['monthly_avg'] = df.groupby('month')['Modal Price (Rs./Quintal)'].transform('mean')
79
+ df['weekly_avg'] = df.groupby('weekofyear')['Modal Price (Rs./Quintal)'].transform('mean')
80
+ df['dayofweek_avg'] = df.groupby('dayofweek')['Modal Price (Rs./Quintal)'].transform('mean')
81
+
82
+ df['fourier_sin_365'] = np.sin(2 * np.pi * df.index.dayofyear / 365)
83
+ df['fourier_cos_365'] = np.cos(2 * np.pi * df.index.dayofyear / 365)
84
+ df['fourier_sin_30'] = np.sin(2 * np.pi * df.index.dayofyear / 30)
85
+ df['fourier_cos_30'] = np.cos(2 * np.pi * df.index.dayofyear / 30)
86
+
87
+ df['recent_min_30'] = (df.index - pd.Timedelta(days=30)).map(target_map).min()
88
+ df['recent_max_30'] = (df.index - pd.Timedelta(days=30)).map(target_map).max()
89
+ df['recent_range_30'] = df['recent_max_30'] - df['recent_min_30']
90
+
91
+ df['yearly_avg'] = df.groupby('year')['Modal Price (Rs./Quintal)'].transform('mean')
92
+ df['cumulative_mean'] = df['Modal Price (Rs./Quintal)'].expanding().mean()
93
+
94
+ return df.reset_index()
95
+
96
+
97
+ def create_forecasting_features_3m(df):
98
+ df = _ensure_datetime_index(df)
99
+ target_map = df['Modal Price (Rs./Quintal)'].to_dict()
100
+
101
+ df['dayofweek'] = df.index.dayofweek
102
+ df['quarter'] = df.index.quarter
103
+ df['month'] = df.index.month
104
+ df['year'] = df.index.year
105
+ df['dayofyear'] = df.index.dayofyear
106
+ df['weekofyear'] = df.index.isocalendar().week
107
+
108
+ df['lag_3months'] = (df.index - pd.DateOffset(months=3)).map(target_map)
109
+ df['lag_6months'] = (df.index - pd.DateOffset(months=6)).map(target_map)
110
+ df['lag_9months'] = (df.index - pd.DateOffset(months=9)).map(target_map)
111
+ df['lag_12months'] = (df.index - pd.DateOffset(months=12)).map(target_map)
112
+
113
+ for window in [90, 180, 270, 365]:
114
+ df[f'rolling_mean_{window}'] = df['Modal Price (Rs./Quintal)'].rolling(window=window, min_periods=1).mean()
115
+ df[f'rolling_std_{window}'] = df['Modal Price (Rs./Quintal)'].rolling(window=window, min_periods=1).std()
116
+
117
+ df['ema90'] = df['Modal Price (Rs./Quintal)'].ewm(span=90, adjust=False).mean()
118
+ df['ema180'] = df['Modal Price (Rs./Quintal)'].ewm(span=180, adjust=False).mean()
119
+ df['monthly_avg'] = df.groupby('month')['Modal Price (Rs./Quintal)'].transform('mean')
120
+ df['weekly_avg'] = df.groupby('weekofyear')['Modal Price (Rs./Quintal)'].transform('mean')
121
+ df['dayofweek_avg'] = df.groupby('dayofweek')['Modal Price (Rs./Quintal)'].transform('mean')
122
+
123
+ df['fourier_sin_90'] = np.sin(2 * np.pi * df.index.dayofyear / 90)
124
+ df['fourier_cos_90'] = np.cos(2 * np.pi * df.index.dayofyear / 90)
125
+ df['fourier_sin_30'] = np.sin(2 * np.pi * df.index.dayofyear / 30)
126
+ df['fourier_cos_30'] = np.cos(2 * np.pi * df.index.dayofyear / 30)
127
+
128
+ df['recent_min_90'] = (df.index - pd.Timedelta(days=90)).map(target_map).min()
129
+ df['recent_max_90'] = (df.index - pd.Timedelta(days=90)).map(target_map).max()
130
+ df['recent_range_90'] = df['recent_max_90'] - df['recent_min_90']
131
+
132
+ df['yearly_avg'] = df.groupby('year')['Modal Price (Rs./Quintal)'].transform('mean')
133
+ df['cumulative_mean'] = df['Modal Price (Rs./Quintal)'].expanding().mean()
134
+
135
+ return df.reset_index()
src/agri_predict/models.py ADDED
@@ -0,0 +1,221 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ from xgboost import XGBRegressor
4
+ from sklearn.metrics import mean_squared_error, mean_absolute_error
5
+ from sklearn.preprocessing import MinMaxScaler
6
+ from .features import (
7
+ create_forecasting_features,
8
+ create_forecasting_features_1m,
9
+ create_forecasting_features_3m,
10
+ )
11
+ from .plotting import plot_data, download_button
12
+ from .config import get_collections
13
+
14
+
15
+ def _custom_grid_search(X_train, y_train, X_test, y_test, param_grid, progress_bar):
16
+ model = XGBRegressor()
17
+ param_combinations = len(param_grid['learning_rate']) * len(param_grid['max_depth']) * \
18
+ len(param_grid['n_estimators']) * len(param_grid['booster'])
19
+ current_combination = 0
20
+ best_score = float('-inf')
21
+ best_params = None
22
+
23
+ for learning_rate in param_grid['learning_rate']:
24
+ for max_depth in param_grid['max_depth']:
25
+ for n_estimators in param_grid['n_estimators']:
26
+ for booster in param_grid['booster']:
27
+ model.set_params(
28
+ learning_rate=learning_rate,
29
+ max_depth=max_depth,
30
+ n_estimators=n_estimators,
31
+ booster=booster
32
+ )
33
+ model.fit(X_train, y_train)
34
+ score = model.score(X_test, y_test)
35
+ if score > best_score:
36
+ best_score = score
37
+ best_params = {
38
+ 'learning_rate': learning_rate,
39
+ 'max_depth': max_depth,
40
+ 'n_estimators': n_estimators,
41
+ 'booster': booster
42
+ }
43
+ current_combination += 1
44
+ progress_bar.progress(int((current_combination / param_combinations) * 100))
45
+ return best_params
46
+
47
+
48
+ def _train_and_evaluate_generic(df, feature_fn, split_date, progress_bar):
49
+ df = feature_fn(df)
50
+ train_df = df[df['Reported Date'] < split_date]
51
+ test_df = df[df['Reported Date'] >= split_date]
52
+
53
+ X_train = train_df.drop(columns=['Modal Price (Rs./Quintal)', 'Reported Date'])
54
+ y_train = train_df['Modal Price (Rs./Quintal)']
55
+ X_test = test_df.drop(columns=['Modal Price (Rs./Quintal)', 'Reported Date'])
56
+ y_test = test_df['Modal Price (Rs./Quintal)']
57
+
58
+ param_grid = {
59
+ 'learning_rate': [0.01, 0.1, 0.2],
60
+ 'max_depth': [3, 5, 7],
61
+ 'n_estimators': [50, 100, 150],
62
+ 'booster': ['gbtree', 'dart']
63
+ }
64
+
65
+ st.write("Performing hyperparameter tuning...")
66
+ best_params = _custom_grid_search(X_train, y_train, X_test, y_test, param_grid, progress_bar)
67
+
68
+ st.write("Training the best model and making predictions...")
69
+ best_model = XGBRegressor(**best_params)
70
+ best_model.fit(X_train, y_train)
71
+ y_pred = best_model.predict(X_test)
72
+
73
+ rmse = mean_squared_error(y_test, y_pred)
74
+ mae = mean_absolute_error(y_test, y_pred)
75
+ st.write(f"RMSE: {rmse}")
76
+ st.write(f"MAE: {mae}")
77
+
78
+ # Prepare plot data
79
+ train_plot_df = train_df[['Reported Date', 'Modal Price (Rs./Quintal)']].copy()
80
+ train_plot_df['Type'] = 'Train'
81
+ test_plot_df = test_df[['Reported Date', 'Modal Price (Rs./Quintal)']].copy()
82
+ test_plot_df['Type'] = 'Test'
83
+ predicted_plot_df = test_df[['Reported Date']].copy()
84
+ predicted_plot_df['Modal Price (Rs./Quintal)'] = y_pred
85
+ predicted_plot_df['Type'] = 'Predicted'
86
+ plot_df = pd.concat([train_plot_df, test_plot_df, predicted_plot_df])
87
+
88
+ import plotly.graph_objects as go
89
+ fig = go.Figure()
90
+ for plot_type, color, dash in [('Train', 'blue', None), ('Test', 'orange', None), ('Predicted', 'green', 'dot')]:
91
+ data = plot_df[plot_df['Type'] == plot_type]
92
+ fig.add_trace(go.Scatter(
93
+ x=data['Reported Date'],
94
+ y=data['Modal Price (Rs./Quintal)'],
95
+ mode='lines',
96
+ name=f"{plot_type} Data",
97
+ line=dict(color=color, dash=dash)
98
+ ))
99
+ fig.update_layout(title="Train, Test, and Predicted Data", xaxis_title="Date", yaxis_title="Modal Price (Rs./Quintal)", template="plotly_white")
100
+ st.plotly_chart(fig, width='stretch')
101
+
102
+ return best_params
103
+
104
+
105
+ def train_and_evaluate(df):
106
+ progress_bar = st.progress(0)
107
+ return _train_and_evaluate_generic(df, create_forecasting_features, '2024-01-01', progress_bar)
108
+
109
+
110
+ def train_and_evaluate_1m(df):
111
+ progress_bar = st.progress(0)
112
+ return _train_and_evaluate_generic(df, create_forecasting_features_1m, pd.to_datetime('2023-01-01'), progress_bar)
113
+
114
+
115
+ def train_and_evaluate_3m(df):
116
+ progress_bar = st.progress(0)
117
+ return _train_and_evaluate_generic(df, create_forecasting_features_3m, pd.to_datetime('2023-01-01'), progress_bar)
118
+
119
+
120
+ def forecast_next_14_days(df, _best_params, key):
121
+ last_date = df['Reported Date'].max()
122
+ future_dates = pd.date_range(start=last_date + pd.Timedelta(days=1), periods=14)
123
+ future_df = pd.DataFrame({'Reported Date': future_dates})
124
+ full_df = pd.concat([df, future_df], ignore_index=True)
125
+ full_df = create_forecasting_features(full_df)
126
+ original_df = full_df[full_df['Reported Date'] <= last_date].copy()
127
+ future_df = full_df[full_df['Reported Date'] > last_date].copy()
128
+ X_train = original_df.drop(columns=['Modal Price (Rs./Quintal)', 'Reported Date'], errors='ignore')
129
+ y_train = original_df['Modal Price (Rs./Quintal)']
130
+ X_future = future_df.drop(columns=['Modal Price (Rs./Quintal)', 'Reported Date'], errors='ignore')
131
+ model = XGBRegressor(**_best_params)
132
+ model.fit(X_train, y_train)
133
+ future_predictions = model.predict(X_future)
134
+ future_df['Modal Price (Rs./Quintal)'] = future_predictions
135
+ plot_data(original_df, future_df, last_date, model, 14)
136
+ download_button(future_df, key)
137
+
138
+
139
+ def forecast_next_30_days(df, _best_params, key):
140
+ last_date = df['Reported Date'].max()
141
+ future_dates = pd.date_range(start=last_date + pd.Timedelta(days=1), periods=30)
142
+ future_df = pd.DataFrame({'Reported Date': future_dates})
143
+ full_df = pd.concat([df, future_df], ignore_index=True)
144
+ full_df = create_forecasting_features_1m(full_df)
145
+ original_df = full_df[full_df['Reported Date'] <= last_date].copy()
146
+ future_df = full_df[full_df['Reported Date'] > last_date].copy()
147
+ X_train = original_df.drop(columns=['Modal Price (Rs./Quintal)', 'Reported Date'], errors='ignore')
148
+ y_train = original_df['Modal Price (Rs./Quintal)']
149
+ X_future = future_df.drop(columns=['Modal Price (Rs./Quintal)', 'Reported Date'], errors='ignore')
150
+ model = XGBRegressor(**_best_params)
151
+ model.fit(X_train, y_train)
152
+ future_predictions = model.predict(X_future)
153
+ future_df['Modal Price (Rs./Quintal)'] = future_predictions
154
+ plot_data(original_df, future_df, last_date, model, 30)
155
+ download_button(future_df, key)
156
+
157
+
158
+ def forecast_next_90_days(df, _best_params, key):
159
+ last_date = df['Reported Date'].max()
160
+ future_dates = pd.date_range(start=last_date + pd.Timedelta(days=1), periods=90)
161
+ future_df = pd.DataFrame({'Reported Date': future_dates})
162
+ full_df = pd.concat([df, future_df], ignore_index=True)
163
+ full_df = create_forecasting_features_3m(full_df)
164
+ original_df = full_df[full_df['Reported Date'] <= last_date].copy()
165
+ future_df = full_df[full_df['Reported Date'] > last_date].copy()
166
+ X_train = original_df.drop(columns=['Modal Price (Rs./Quintal)', 'Reported Date'], errors='ignore')
167
+ y_train = original_df['Modal Price (Rs./Quintal)']
168
+ X_future = future_df.drop(columns=['Modal Price (Rs./Quintal)', 'Reported Date'], errors='ignore')
169
+ model = XGBRegressor(**_best_params)
170
+ model.fit(X_train, y_train)
171
+ future_predictions = model.predict(X_future)
172
+ future_df['Modal Price (Rs./Quintal)'] = future_predictions
173
+ plot_data(original_df, future_df, last_date, model, 90)
174
+ download_button(future_df, key)
175
+
176
+
177
+ def train_and_forecast(df, filter_key, days):
178
+ cols = get_collections()
179
+ if df is not None:
180
+ if days == 14:
181
+ best_params = train_and_evaluate(df)
182
+ cols['best_params_collection'].replace_one({'filter_key': filter_key}, {**best_params, 'filter_key': filter_key, 'last_updated': pd.Timestamp.now().isoformat()}, upsert=True)
183
+ forecast_next_14_days(df, best_params, filter_key)
184
+ elif days == 30:
185
+ best_params = train_and_evaluate_1m(df)
186
+ cols['best_params_collection_1m'].replace_one({'filter_key': filter_key}, {**best_params, 'filter_key': filter_key, 'last_updated': pd.Timestamp.now().isoformat()}, upsert=True)
187
+ forecast_next_30_days(df, best_params, filter_key)
188
+ elif days == 90:
189
+ best_params = train_and_evaluate_3m(df)
190
+ cols['best_params_collection_3m'].replace_one({'filter_key': filter_key}, {**best_params, 'filter_key': filter_key, 'last_updated': pd.Timestamp.now().isoformat()}, upsert=True)
191
+ forecast_next_90_days(df, best_params, filter_key)
192
+
193
+
194
+ def get_best_params(filter_key, collection):
195
+ record = collection.find_one({"filter_key": filter_key})
196
+ return record
197
+
198
+
199
+ def forecast(df, filter_key, days):
200
+ cols = get_collections()
201
+ if days == 14:
202
+ record = get_best_params(filter_key, cols['best_params_collection'])
203
+ if record:
204
+ st.info(f"ℹ️ The model was trained on {record['last_updated']}.")
205
+ forecast_next_14_days(df, record, filter_key)
206
+ else:
207
+ st.warning("⚠️ Model is not trained yet. Please train the model first.")
208
+ if days == 30:
209
+ record = get_best_params(filter_key, cols['best_params_collection_1m'])
210
+ if record:
211
+ st.info(f"ℹ️ The model was trained on {record['last_updated']}.")
212
+ forecast_next_30_days(df, record, filter_key)
213
+ else:
214
+ st.warning("⚠️ Model is not trained yet. Please train the model first.")
215
+ if days == 90:
216
+ record = get_best_params(filter_key, cols['best_params_collection_3m'])
217
+ if record:
218
+ st.info(f"ℹ️ The model was trained on {record['last_updated']}.")
219
+ forecast_next_90_days(df, record, filter_key)
220
+ else:
221
+ st.warning("⚠️ Model is not trained yet. Please train the model first.")
src/agri_predict/plotting.py ADDED
@@ -0,0 +1,198 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import plotly.graph_objects as go
4
+ import io
5
+ import calendar
6
+ import numpy as np
7
+
8
+
9
+ def plot_data(original_df, future_df, last_date, model, days):
10
+ actual_last_df = original_df[original_df['Reported Date'] > (last_date - pd.Timedelta(days=days))]
11
+ predicted_plot_df = actual_last_df[['Reported Date']].copy()
12
+ predicted_plot_df['Modal Price (Rs./Quintal)'] = model.predict(
13
+ actual_last_df.drop(columns=['Modal Price (Rs./Quintal)', 'Reported Date'], errors='ignore'))
14
+ predicted_plot_df['Type'] = 'Actual'
15
+
16
+ future_plot_df = future_df[['Reported Date', 'Modal Price (Rs./Quintal)']].copy()
17
+ future_plot_df['Type'] = 'Forecasted'
18
+ last_actual_point = predicted_plot_df.iloc[[-1]].copy()
19
+ last_actual_point['Type'] = 'Forecasted'
20
+ future_plot_df = pd.concat([last_actual_point, future_plot_df])
21
+ plot_df = pd.concat([predicted_plot_df, future_plot_df])
22
+
23
+ fig = go.Figure()
24
+ for plot_type, color, dash in [('Actual', 'blue', 'solid'), ('Forecasted', 'red', 'dash')]:
25
+ data = plot_df[plot_df['Type'] == plot_type]
26
+ fig.add_trace(go.Scatter(x=data['Reported Date'], y=data['Modal Price (Rs./Quintal)'], mode='lines', name=f"{plot_type} Data", line=dict(color=color, dash=dash)))
27
+ fig.update_layout(title="Actual vs Forecasted Modal Price (Rs./Quintal)", xaxis_title="Date", yaxis_title="Modal Price (Rs./Quintal)", template="plotly_white")
28
+ st.plotly_chart(fig, width='stretch')
29
+
30
+
31
+ def download_button(future_df: pd.DataFrame, key: str):
32
+ download_df = future_df[['Reported Date', 'Modal Price (Rs./Quintal)']].copy()
33
+ download_df['Reported Date'] = pd.to_datetime(download_df['Reported Date']).dt.strftime('%Y-%m-%d')
34
+ towrite = io.BytesIO()
35
+ with pd.ExcelWriter(towrite, engine='xlsxwriter') as writer:
36
+ download_df.to_excel(writer, index=False, sheet_name='Forecast')
37
+ towrite.seek(0)
38
+ st.download_button(label="Download Forecast as Excel", data=towrite, file_name=f"forecast_{key}.xlsx")
39
+
40
+
41
+ def display_statistics(df):
42
+ st.title("📊 National Market Statistics Dashboard")
43
+ st.markdown("""
44
+ <style>
45
+ h1 {
46
+ color: #2e7d32;
47
+ font-size: 36px;
48
+ font-weight: bold;
49
+ }
50
+ h3 {
51
+ color: #388e3c;
52
+ font-size: 28px;
53
+ font-weight: 600;
54
+ }
55
+ p {
56
+ font-size: 16px;
57
+ line-height: 1.6;
58
+ }
59
+ .highlight {
60
+ background-color: #f1f8e9;
61
+ padding: 10px;
62
+ border-radius: 8px;
63
+ font-size: 16px;
64
+ color: #2e7d32;
65
+ font-weight: 500;
66
+ }
67
+ </style>
68
+ """, unsafe_allow_html=True)
69
+
70
+ # Ensure 'Reported Date' is in datetime format
71
+ df['Reported Date'] = pd.to_datetime(df['Reported Date'])
72
+ national_data = df.groupby('Reported Date').agg({
73
+ 'Modal Price (Rs./Quintal)': 'mean',
74
+ 'Arrivals (Tonnes)': 'sum'
75
+ }).reset_index()
76
+
77
+ st.subheader("🗓️ Key Statistics")
78
+ latest_date = national_data['Reported Date'].max()
79
+ latest_price = national_data[national_data['Reported Date'] == latest_date]['Modal Price (Rs./Quintal)'].mean()
80
+ latest_arrivals = national_data[national_data['Reported Date'] == latest_date]['Arrivals (Tonnes)'].sum()
81
+
82
+ st.markdown("<p class='highlight'>This section provides the most recent statistics for the market. It includes the latest available date, the average price of commodities, and the total quantity of goods arriving at the market. These metrics offer an up-to-date snapshot of market conditions.</p>", unsafe_allow_html=True)
83
+ st.write(f"**Latest Date**: {latest_date.strftime('%Y-%m-%d')}")
84
+ st.write(f"**Latest Modal Price**: {latest_price:.2f} Rs./Quintal")
85
+ st.write(f"**Latest Arrivals**: {latest_arrivals:.2f} Tonnes")
86
+
87
+ st.subheader("📆 This Day in Previous Years")
88
+ st.markdown("<p class='highlight'>This table shows the modal price and total arrivals for this exact day across previous years. It provides a historical perspective to compare against current market conditions. This section examines historical data for the same day in previous years. By analyzing trends for this specific day, you can identify seasonal patterns, supply-demand changes, or any deviations that might warrant closer attention.</p>", unsafe_allow_html=True)
89
+ today = latest_date
90
+ previous_years_data = national_data[national_data['Reported Date'].dt.dayofyear == today.dayofyear]
91
+
92
+ if not previous_years_data.empty:
93
+ previous_years_data['Year'] = previous_years_data['Reported Date'].dt.year.astype(str)
94
+ display_data = (previous_years_data[['Year', 'Modal Price (Rs./Quintal)', 'Arrivals (Tonnes)']]
95
+ .sort_values(by='Year', ascending=False)
96
+ .reset_index(drop=True))
97
+ st.table(display_data)
98
+ else:
99
+ st.write("No historical data available for this day in previous years.")
100
+
101
+ st.subheader("📅 Monthly Averages Over Years")
102
+ st.markdown("<p class='highlight'>This section displays the average modal prices and arrivals for each month across all years. It helps identify seasonal trends and peak activity months, which can be crucial for inventory planning and market predictions.</p>", unsafe_allow_html=True)
103
+ national_data['Month'] = national_data['Reported Date'].dt.month
104
+ monthly_avg_price = national_data.groupby('Month')['Modal Price (Rs./Quintal)'].mean().reset_index()
105
+ monthly_avg_arrivals = national_data.groupby('Month')['Arrivals (Tonnes)'].mean().reset_index()
106
+ monthly_avg = pd.merge(monthly_avg_price, monthly_avg_arrivals, on='Month')
107
+ monthly_avg['Month'] = monthly_avg['Month'].apply(lambda x: calendar.month_name[x])
108
+ monthly_avg.columns = ['Month', 'Average Modal Price (Rs./Quintal)', 'Average Arrivals (Tonnes)']
109
+ st.write(monthly_avg)
110
+
111
+ st.subheader("📆 Yearly Averages")
112
+ st.markdown("<p class='highlight'>Yearly averages provide insights into long-term trends in pricing and arrivals. By examining these values, you can detect overall growth, stability, or volatility in the market.</p>", unsafe_allow_html=True)
113
+ national_data['Year'] = national_data['Reported Date'].dt.year
114
+ yearly_avg_price = national_data.groupby('Year')['Modal Price (Rs./Quintal)'].mean().reset_index()
115
+ yearly_sum_arrivals = national_data.groupby('Year')['Arrivals (Tonnes)'].sum().reset_index()
116
+ yearly_avg = pd.merge(yearly_avg_price, yearly_sum_arrivals, on='Year')
117
+ yearly_avg['Year'] = yearly_avg['Year'].apply(lambda x: f"{int(x)}")
118
+ yearly_avg.columns = ['Year', 'Average Modal Price (Rs./Quintal)', 'Average Arrivals (Tonnes)']
119
+ st.write(yearly_avg)
120
+
121
+ st.subheader("📈 Largest Daily Price Changes (Past Year)")
122
+ st.markdown("<p class='highlight'>This analysis identifies the most significant daily price changes in the past year. These fluctuations can highlight periods of market volatility, potentially caused by external factors like weather, policy changes, or supply chain disruptions.</p>", unsafe_allow_html=True)
123
+ one_year_ago = latest_date - pd.DateOffset(years=1)
124
+ recent_data = national_data[national_data['Reported Date'] >= one_year_ago]
125
+ recent_data['Daily Change (%)'] = recent_data['Modal Price (Rs./Quintal)'].pct_change() * 100
126
+ largest_changes = recent_data[['Reported Date', 'Modal Price (Rs./Quintal)', 'Daily Change (%)']].nlargest(5, 'Daily Change (%)')
127
+ largest_changes['Reported Date'] = largest_changes['Reported Date'].dt.date
128
+ largest_changes = largest_changes.reset_index(drop=True)
129
+ st.write(largest_changes)
130
+
131
+ st.subheader("🏆 Top 5 Highest and Lowest Prices (Past Year)")
132
+ st.markdown("<p class='highlight'>This section highlights the highest and lowest prices over the past year. These values reflect the extremes of market dynamics, helping to understand price ceilings and floors in the recent period.</p>", unsafe_allow_html=True)
133
+ highest_prices = recent_data.nlargest(5, 'Modal Price (Rs./Quintal)')[['Reported Date', 'Modal Price (Rs./Quintal)']]
134
+ lowest_prices = recent_data.nsmallest(5, 'Modal Price (Rs./Quintal)')[['Reported Date', 'Modal Price (Rs./Quintal)']]
135
+ highest_prices['Reported Date'] = highest_prices['Reported Date'].dt.date
136
+ lowest_prices['Reported Date'] = lowest_prices['Reported Date'].dt.date
137
+ highest_prices = highest_prices.reset_index(drop=True)
138
+ lowest_prices = lowest_prices.reset_index(drop=True)
139
+ st.write("**Top 5 Highest Prices**")
140
+ st.write(highest_prices)
141
+ st.write("**Top 5 Lowest Prices**")
142
+ st.write(lowest_prices)
143
+
144
+ st.subheader("🗂️ Data Snapshot")
145
+ st.markdown("<p class='highlight'>This snapshot provides a concise overview of the latest data, including rolling averages and lagged values. These metrics help identify short-term trends and lagged effects in pricing.</p>", unsafe_allow_html=True)
146
+ national_data['Rolling Mean (14 Days)'] = national_data['Modal Price (Rs./Quintal)'].rolling(window=14).mean()
147
+ national_data['Lag (14 Days)'] = national_data['Modal Price (Rs./Quintal)'].shift(14)
148
+ national_data['Reported Date'] = national_data['Reported Date'].dt.date
149
+ national_data = national_data.sort_values(by='Reported Date', ascending=False)
150
+ st.dataframe(national_data.head(14).reset_index(drop=True), width='stretch', height=525)
151
+
152
+ editable_spreadsheet()
153
+
154
+
155
+ def editable_spreadsheet():
156
+ st.title("Sowing Report Prediction Model")
157
+
158
+ # Excel file uploader
159
+ uploaded_file = st.file_uploader("Upload your Excel file", type=['xlsx'])
160
+
161
+ # Check if an Excel file is uploaded
162
+ if uploaded_file is not None:
163
+ # Read the Excel file
164
+ df_excel = pd.read_excel(uploaded_file)
165
+
166
+ # Display the DataFrame from the Excel file
167
+ st.write("Excel data loaded:", df_excel)
168
+
169
+ # Form for inputting filtering options and area for calculation
170
+ with st.form("input_form"):
171
+ input_region = st.text_input("Enter Region to Filter By", placeholder="Region Name")
172
+ input_season = st.text_input("Enter Season to Filter By", placeholder="Season (e.g., Winter)")
173
+ input_area = st.number_input("Enter Area (in hectares) for Production Calculation", min_value=0.0, format="%.2f")
174
+ submit_button = st.form_submit_button("Calculate Production")
175
+
176
+ if submit_button:
177
+ if input_region and input_season and input_area > 0:
178
+ # Filter data by the region and season specified
179
+ filtered_df = df_excel[
180
+ (df_excel['Region'].str.lower() == input_region.lower()) &
181
+ (df_excel['Season'].str.lower() == input_season.lower())
182
+ ]
183
+
184
+ if not filtered_df.empty:
185
+ process_dataframe(filtered_df, input_area)
186
+ else:
187
+ st.error("No data found for the specified region and season.")
188
+ else:
189
+ st.error("Please enter valid region, season, and area to proceed.")
190
+
191
+
192
+ def process_dataframe(df, area):
193
+ if 'Yield' in df.columns:
194
+ average_yield = df['Yield'].mean()
195
+ predicted_production = average_yield * area
196
+ st.success(f"The predicted Production Volume for the specified region and season is: {predicted_production:.2f} units")
197
+ else:
198
+ st.error("The DataFrame does not contain a necessary 'Yield' column for calculation.")
src/agri_predict/scraper.py ADDED
@@ -0,0 +1,282 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """API-based scraper for agmarknet.gov.in using direct API calls."""
2
+
3
+ import requests
4
+ import pandas as pd
5
+ from datetime import datetime, timedelta
6
+ from typing import Optional, List, Dict, Any
7
+ import logging
8
+ from pathlib import Path
9
+
10
+ # Configure logging
11
+ logger = logging.getLogger(__name__)
12
+ handler = logging.StreamHandler()
13
+ formatter = logging.Formatter(
14
+ '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
15
+ )
16
+ handler.setFormatter(formatter)
17
+ logger.addHandler(handler)
18
+ logger.setLevel(logging.INFO)
19
+
20
+
21
+ class AgmarknetAPIClient:
22
+ """Client for Agmarknet API."""
23
+
24
+ BASE_URL = "https://api.agmarknet.gov.in/v1/prices-and-arrivals/market-report/specific"
25
+
26
+ # Fixed parameters
27
+ COMMODITY_GROUP_ID = 3 # Commodity group for Sesamum
28
+ COMMODITY_ID = 11 # Sesamum(Sesame,Gingelly,Til)
29
+ INCLUDE_EXCEL = "false"
30
+
31
+ # Timeout in seconds
32
+ TIMEOUT = 30
33
+
34
+ def __init__(self):
35
+ """Initialize API client."""
36
+ self.session = requests.Session()
37
+ logger.info("Agmarknet API client initialized")
38
+
39
+ def _log_api_call(self, date_str: str, url: str, status_code: int,
40
+ records_count: int = 0):
41
+ """Log API call details.
42
+
43
+ Args:
44
+ date_str: Date string (YYYY-MM-DD)
45
+ url: Full URL called
46
+ status_code: HTTP status code
47
+ records_count: Number of records fetched
48
+ """
49
+ logger.info(
50
+ f"API CALL | Date: {date_str} | Status: {status_code} | "
51
+ f"Records: {records_count} | URL: {url}"
52
+ )
53
+
54
+ def fetch_market_data(self, date: str) -> Optional[Dict[str, Any]]:
55
+ """Fetch market data for a specific date.
56
+
57
+ Args:
58
+ date: Date string in format YYYY-MM-DD
59
+
60
+ Returns:
61
+ API response dictionary or None if error
62
+ """
63
+ url = (
64
+ f"{self.BASE_URL}?date={date}&"
65
+ f"commodityGroupId={self.COMMODITY_GROUP_ID}&"
66
+ f"commodityId={self.COMMODITY_ID}&"
67
+ f"includeExcel={self.INCLUDE_EXCEL}"
68
+ )
69
+
70
+ try:
71
+ logger.info(f"Fetching data for date: {date}")
72
+ response = self.session.get(url, timeout=self.TIMEOUT)
73
+ response.raise_for_status()
74
+
75
+ data = response.json()
76
+
77
+ if data.get("success"):
78
+ # Count total records
79
+ total_records = self._count_records(data)
80
+ self._log_api_call(date, url, response.status_code, total_records)
81
+ logger.info(
82
+ f"✅ Successfully fetched data | Date: {date} | "
83
+ f"Total records: {total_records}"
84
+ )
85
+ return data
86
+ else:
87
+ logger.error(
88
+ f"❌ API returned failure | Date: {date} | "
89
+ f"Message: {data.get('message', 'Unknown error')}"
90
+ )
91
+ return None
92
+
93
+ except requests.exceptions.Timeout:
94
+ logger.error(f"❌ Timeout error for date: {date}")
95
+ return None
96
+ except requests.exceptions.HTTPError as e:
97
+ logger.error(f"❌ HTTP error for date: {date} | Status: {e.response.status_code}")
98
+ return None
99
+ except requests.exceptions.RequestException as e:
100
+ logger.error(f"❌ Request error for date: {date} | Error: {str(e)}")
101
+ return None
102
+ except ValueError as e:
103
+ logger.error(f"❌ JSON decode error for date: {date} | Error: {str(e)}")
104
+ return None
105
+
106
+ def fetch_date_range(self, start_date: str, end_date: str) -> List[Dict[str, Any]]:
107
+ """Fetch market data for a date range.
108
+
109
+ Args:
110
+ start_date: Start date (YYYY-MM-DD)
111
+ end_date: End date (YYYY-MM-DD)
112
+
113
+ Returns:
114
+ List of API response dictionaries
115
+ """
116
+ logger.info(f"Starting date range fetch | From: {start_date} To: {end_date}")
117
+
118
+ try:
119
+ start = datetime.strptime(start_date, "%Y-%m-%d")
120
+ end = datetime.strptime(end_date, "%Y-%m-%d")
121
+ except ValueError as e:
122
+ logger.error(f"❌ Invalid date format | Error: {str(e)}")
123
+ return []
124
+
125
+ if start > end:
126
+ logger.error(f"❌ Start date cannot be after end date")
127
+ return []
128
+
129
+ results = []
130
+ current = start
131
+
132
+ logger.info(f"Fetching {(end - start).days + 1} days of data...")
133
+
134
+ while current <= end:
135
+ date_str = current.strftime("%Y-%m-%d")
136
+ data = self.fetch_market_data(date_str)
137
+
138
+ if data:
139
+ results.append(data)
140
+
141
+ current += timedelta(days=1)
142
+
143
+ logger.info(
144
+ f"✅ Completed date range fetch | "
145
+ f"Total days: {(end - start).days + 1} | "
146
+ f"Successful fetches: {len(results)}"
147
+ )
148
+
149
+ return results
150
+
151
+ @staticmethod
152
+ def _count_records(data: Dict[str, Any]) -> int:
153
+ """Count total records in API response.
154
+
155
+ Args:
156
+ data: API response dictionary
157
+
158
+ Returns:
159
+ Total number of records
160
+ """
161
+ count = 0
162
+ states = data.get("states", [])
163
+
164
+ for state in states:
165
+ markets = state.get("markets", [])
166
+ for market in markets:
167
+ market_data = market.get("data", [])
168
+ count += len(market_data)
169
+
170
+ return count
171
+
172
+ @staticmethod
173
+ def parse_response_to_dataframe(api_response: Dict[str, Any]) -> pd.DataFrame:
174
+ """Parse API response to DataFrame.
175
+
176
+ Args:
177
+ api_response: API response dictionary
178
+
179
+ Returns:
180
+ Flattened DataFrame with all market data
181
+ """
182
+ records = []
183
+
184
+ # Extract report date from title
185
+ title = api_response.get("title", "")
186
+ # Format: "Market wise Daily Report for Sesamum(Sesame,Gingelly,Til) on 01-Nov-2025"
187
+ reported_date = None
188
+ if " on " in title:
189
+ date_part = title.split(" on ")[-1].strip()
190
+ try:
191
+ reported_date = pd.to_datetime(date_part, format="%d-%b-%Y")
192
+ except:
193
+ reported_date = None
194
+
195
+ commodity_name = api_response.get("commodityName", "")
196
+ states = api_response.get("states", [])
197
+
198
+ for state in states:
199
+ state_name = state.get("stateName", "")
200
+ state_id = state.get("stateId")
201
+
202
+ markets = state.get("markets", [])
203
+ for market in markets:
204
+ market_name = market.get("marketName", "")
205
+ # Remove "APMC" suffix if present
206
+ if market_name.endswith(" APMC"):
207
+ market_name = market_name[:-5].strip()
208
+ market_id = market.get("marketId")
209
+
210
+ market_data = market.get("data", [])
211
+ for entry in market_data:
212
+ record = {
213
+ "Reported Date": reported_date,
214
+ "State Name": state_name,
215
+ "District Name": state_name, # Using state name as district for now
216
+ "Market Name": market_name,
217
+ "Variety": entry.get("variety"),
218
+ "Group": "Oil Seeds",
219
+ "Arrivals (Tonnes)": entry.get("arrivals"),
220
+ "Min Price (Rs./Quintal)": entry.get("minimumPrice"),
221
+ "Max Price (Rs./Quintal)": entry.get("maximumPrice"),
222
+ "Modal Price (Rs./Quintal)": entry.get("modalPrice"),
223
+ "Grade": entry.get("grade"),
224
+ }
225
+ records.append(record)
226
+
227
+ df = pd.DataFrame(records)
228
+
229
+ logger.info(f"Parsed API response to DataFrame | Records: {len(df)}")
230
+
231
+ return df
232
+
233
+ @staticmethod
234
+ def parse_multiple_responses_to_dataframe(
235
+ responses: List[Dict[str, Any]]
236
+ ) -> pd.DataFrame:
237
+ """Parse multiple API responses to single DataFrame.
238
+
239
+ Args:
240
+ responses: List of API response dictionaries
241
+
242
+ Returns:
243
+ Combined DataFrame
244
+ """
245
+ dfs = []
246
+
247
+ for response in responses:
248
+ df = AgmarknetAPIClient.parse_response_to_dataframe(response)
249
+ dfs.append(df)
250
+
251
+ combined_df = pd.concat(dfs, ignore_index=True)
252
+
253
+ logger.info(
254
+ f"Combined {len(responses)} API responses into DataFrame | "
255
+ f"Total records: {len(combined_df)}"
256
+ )
257
+
258
+ return combined_df
259
+
260
+ def export_response_to_file(self, api_response: Dict[str, Any],
261
+ filename: str = "api_response.json"):
262
+ """Export API response to JSON file.
263
+
264
+ Args:
265
+ api_response: API response dictionary
266
+ filename: Output filename
267
+ """
268
+ import json
269
+
270
+ filepath = Path(filename)
271
+
272
+ try:
273
+ with open(filepath, 'w') as f:
274
+ json.dump(api_response, f, indent=2)
275
+
276
+ logger.info(f"✅ Exported API response to file | Path: {filepath}")
277
+ except Exception as e:
278
+ logger.error(f"❌ Failed to export API response | Error: {str(e)}")
279
+
280
+
281
+ # Global client instance
282
+ api_client = AgmarknetAPIClient()
src/agri_predict/utils.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ from werkzeug.security import check_password_hash, generate_password_hash
3
+
4
+
5
+ def collection_to_dataframe(collection, drop_id=True):
6
+ documents = list(collection.find())
7
+ df = pd.DataFrame(documents)
8
+ if drop_id and '_id' in df.columns:
9
+ df = df.drop(columns=['_id'])
10
+ return df
11
+
12
+
13
+ def authenticate_user(username: str, password: str, users_collection=None) -> bool:
14
+ """Authenticate user with cached collection to avoid reconnection.
15
+
16
+ Args:
17
+ username: Username to authenticate
18
+ password: Password to check
19
+ users_collection: Optional cached users collection. If None, will get fresh collection.
20
+ """
21
+ if users_collection is None:
22
+ from .config import get_collections
23
+ cols = get_collections()
24
+ users_collection = cols['users_collection']
25
+
26
+ user = users_collection.find_one({"username": username})
27
+ if user and check_password_hash(user.get('password', ''), password):
28
+ return True
29
+ return False
30
+
31
+
32
+ def save_best_params(collection, filter_key, best_params):
33
+ best_params = dict(best_params)
34
+ best_params["filter_key"] = filter_key
35
+ best_params["last_updated"] = pd.Timestamp.now().isoformat()
36
+ collection.replace_one({"filter_key": filter_key}, best_params, upsert=True)
37
+
38
+
39
+ def get_best_params(filter_key, collection):
40
+ return collection.find_one({"filter_key": filter_key})