Alaiy commited on
Commit
662dc00
·
verified ·
1 Parent(s): 270fa01

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +364 -393
app.py CHANGED
@@ -20,13 +20,21 @@ from itertools import product
20
  from tqdm import tqdm
21
  import io
22
  from statsmodels.tsa.statespace.sarimax import SARIMAX
 
 
 
 
 
 
 
 
 
23
 
24
  mongo_uri = "mongodb+srv://Agripredict:TjXSvMhOis49qH8E@cluster0.gek7n.mongodb.net/?retryWrites=true&w=majority&appName=Cluster0"
25
  if not mongo_uri:
26
  st.error("MongoDB URI is not set!")
27
  st.stop()
28
  else:
29
- # Connect to MongoDB with SSL certificate validation
30
  client = MongoClient(mongo_uri, tlsCAFile=certifi.where())
31
  db = client["AgriPredict"]
32
  collection = db["WhiteSesame"]
@@ -291,39 +299,27 @@ def create_forecasting_features_3m(df):
291
 
292
 
293
  def preprocess_data(df):
294
- # Retain only 'Reported Date' and 'Modal Price (Rs./Quintal)' columns
295
  df = df[['Reported Date', 'Modal Price (Rs./Quintal)']]
296
-
297
- # Ensure 'Reported Date' is in datetime format
298
  df['Reported Date'] = pd.to_datetime(df['Reported Date'])
299
-
300
- # Group by 'Reported Date' and calculate mean of 'Modal Price (Rs./Quintal)'
301
  df = df.groupby('Reported Date', as_index=False).mean()
302
-
303
- # Generate a full date range from the minimum to the maximum date
304
  full_date_range = pd.date_range(df['Reported Date'].min(), df['Reported Date'].max())
305
  df = df.set_index('Reported Date').reindex(full_date_range).rename_axis('Reported Date').reset_index()
306
 
307
- # Detect and remove outliers for every 30 days
308
  df['Modal Price (Rs./Quintal)'] = (
309
  df['Modal Price (Rs./Quintal)'].fillna(method='ffill').fillna(method='bfill')
310
  )
311
  return df
312
 
 
313
  def train_and_evaluate(df):
314
  import streamlit as st
315
-
316
- # Add progress bar for hyperparameter tuning
317
  progress_bar = st.progress(0)
318
-
319
- # Helper function to update progress during hyperparameter tuning
320
  def update_tuning_progress(current, total):
321
  progress = int((current / total) * 100)
322
  progress_bar.progress(progress)
323
 
324
  df = create_forecasting_features(df)
325
 
326
- # Split the data into training and testing sets
327
  train_df = df[df['Reported Date'] < '2024-01-01']
328
  test_df = df[df['Reported Date'] >= '2024-01-01']
329
 
@@ -331,8 +327,6 @@ def train_and_evaluate(df):
331
  y_train = train_df['Modal Price (Rs./Quintal)']
332
  X_test = test_df.drop(columns=['Modal Price (Rs./Quintal)', 'Reported Date'])
333
  y_test = test_df['Modal Price (Rs./Quintal)']
334
-
335
- # Hyperparameter tuning
336
  st.write("Performing hyperparameter tuning...")
337
  param_grid = {
338
  'learning_rate': [0.01, 0.1, 0.2],
@@ -345,7 +339,7 @@ def train_and_evaluate(df):
345
  param_combinations = len(param_grid['learning_rate']) * len(param_grid['max_depth']) * \
346
  len(param_grid['n_estimators']) * len(param_grid['booster'])
347
 
348
- current_combination = 0 # Counter for combinations
349
 
350
  def custom_grid_search():
351
  nonlocal current_combination
@@ -371,26 +365,22 @@ def train_and_evaluate(df):
371
  'n_estimators': n_estimators,
372
  'booster': booster
373
  }
374
- # Update progress bar
375
  current_combination += 1
376
  update_tuning_progress(current_combination, param_combinations)
377
  return best_params
378
 
379
  best_params = custom_grid_search()
380
-
381
- # Train the best model with the identified parameters
382
  st.write("Training the best model and making predictions...")
383
  best_model = XGBRegressor(**best_params)
384
  best_model.fit(X_train, y_train)
385
  y_pred = best_model.predict(X_test)
386
 
387
- # Metrics
388
  rmse = mean_squared_error(y_test, y_pred, squared=False)
389
  mae = mean_absolute_error(y_test, y_pred)
390
  st.write(f"RMSE: {rmse}")
391
  st.write(f"MAE: {mae}")
392
-
393
- # Prepare data for plotting
394
  train_plot_df = train_df[['Reported Date', 'Modal Price (Rs./Quintal)']].copy()
395
  train_plot_df['Type'] = 'Train'
396
 
@@ -425,7 +415,6 @@ def train_and_evaluate(df):
425
 
426
  st.plotly_chart(fig, use_container_width=True)
427
 
428
- # Return best parameters
429
  return best_params
430
 
431
  def train_and_evaluate_1m(df):
@@ -435,19 +424,16 @@ def train_and_evaluate_1m(df):
435
  from xgboost import XGBRegressor
436
  from sklearn.metrics import mean_squared_error, mean_absolute_error
437
 
438
- # Add progress bar for hyperparameter tuning
439
  progress_bar = st.progress(0)
440
 
441
- # Helper function to update progress during hyperparameter tuning
442
  def update_tuning_progress(current, total):
443
  progress = int((current / total) * 100)
444
  progress_bar.progress(progress)
445
 
446
  df = create_forecasting_features_1m(df)
447
-
448
- # Define train-test split for a 1-month horizon
449
  split_date = pd.to_datetime("2024-01-01")
450
- test_horizon = pd.DateOffset(days=30) # 1-month horizon
451
 
452
  train_df = df[df['Reported Date'] < split_date]
453
  test_df = df[(df['Reported Date'] >= split_date) & (df['Reported Date'] < split_date + test_horizon)]
@@ -457,7 +443,6 @@ def train_and_evaluate_1m(df):
457
  X_test = test_df.drop(columns=['Modal Price (Rs./Quintal)', 'Reported Date'])
458
  y_test = test_df['Modal Price (Rs./Quintal)']
459
 
460
- # Hyperparameter tuning
461
  st.write("Performing hyperparameter tuning...")
462
  param_grid = {
463
  'learning_rate': [0.01, 0.1, 0.2],
@@ -470,7 +455,7 @@ def train_and_evaluate_1m(df):
470
  param_combinations = len(param_grid['learning_rate']) * len(param_grid['max_depth']) * \
471
  len(param_grid['n_estimators']) * len(param_grid['booster'])
472
 
473
- current_combination = 0 # Counter for combinations
474
 
475
  def custom_grid_search():
476
  nonlocal current_combination
@@ -496,26 +481,21 @@ def train_and_evaluate_1m(df):
496
  'n_estimators': n_estimators,
497
  'booster': booster
498
  }
499
- # Update progress bar
500
  current_combination += 1
501
  update_tuning_progress(current_combination, param_combinations)
502
  return best_params
503
 
504
  best_params = custom_grid_search()
505
-
506
- # Train the best model with the identified parameters
507
  st.write("Training the best model and making predictions...")
508
  best_model = XGBRegressor(**best_params)
509
  best_model.fit(X_train, y_train)
510
  y_pred = best_model.predict(X_test)
511
 
512
- # Metrics
513
  rmse = mean_squared_error(y_test, y_pred, squared=False)
514
  mae = mean_absolute_error(y_test, y_pred)
515
  st.write(f"RMSE: {rmse}")
516
  st.write(f"MAE: {mae}")
517
 
518
- # Prepare data for plotting
519
  train_plot_df = train_df[['Reported Date', 'Modal Price (Rs./Quintal)']].copy()
520
  train_plot_df['Type'] = 'Train'
521
 
@@ -550,16 +530,11 @@ def train_and_evaluate_1m(df):
550
 
551
  st.plotly_chart(fig, use_container_width=True)
552
 
553
- # Return best parameters
554
  return best_params
555
 
556
  def train_and_evaluate_3m(df):
557
  import streamlit as st
558
-
559
- # Add progress bar for hyperparameter tuning
560
  progress_bar = st.progress(0)
561
-
562
- # Helper function to update progress during hyperparameter tuning
563
  def update_tuning_progress(current, total):
564
  progress = int((current / total) * 100)
565
  progress_bar.progress(progress)
@@ -573,7 +548,6 @@ def train_and_evaluate_3m(df):
573
  X_test = test_df.drop(columns=['Modal Price (Rs./Quintal)', 'Reported Date'])
574
  y_test = test_df['Modal Price (Rs./Quintal)']
575
 
576
- # Hyperparameter tuning
577
  st.write("Performing hyperparameter tuning...")
578
  param_grid = {
579
  'learning_rate': [0.01, 0.1, 0.2],
@@ -586,7 +560,7 @@ def train_and_evaluate_3m(df):
586
  param_combinations = len(param_grid['learning_rate']) * len(param_grid['max_depth']) * \
587
  len(param_grid['n_estimators']) * len(param_grid['booster'])
588
 
589
- current_combination = 0 # Counter for combinations
590
 
591
  def custom_grid_search():
592
  nonlocal current_combination
@@ -612,26 +586,21 @@ def train_and_evaluate_3m(df):
612
  'n_estimators': n_estimators,
613
  'booster': booster
614
  }
615
- # Update progress bar
616
  current_combination += 1
617
  update_tuning_progress(current_combination, param_combinations)
618
  return best_params
619
 
620
  best_params = custom_grid_search()
621
-
622
- # Train the best model with the identified parameters
623
  st.write("Training the best model and making predictions...")
624
  best_model = XGBRegressor(**best_params)
625
  best_model.fit(X_train, y_train)
626
  y_pred = best_model.predict(X_test)
627
 
628
- # Metrics
629
  rmse = mean_squared_error(y_test, y_pred, squared=False)
630
  mae = mean_absolute_error(y_test, y_pred)
631
  st.write(f"RMSE: {rmse}")
632
  st.write(f"MAE: {mae}")
633
 
634
- # Prepare data for plotting
635
  train_plot_df = train_df[['Reported Date', 'Modal Price (Rs./Quintal)']].copy()
636
  train_plot_df['Type'] = 'Train'
637
 
@@ -666,15 +635,12 @@ def train_and_evaluate_3m(df):
666
 
667
  st.plotly_chart(fig, use_container_width=True)
668
 
669
- # Return best parameters
670
  return best_params
671
 
672
  def forecast_next_14_days(df, _best_params, key):
673
  last_date = df['Reported Date'].max()
674
  future_dates = pd.date_range(start=last_date + pd.Timedelta(days=1), periods=14)
675
  future_df = pd.DataFrame({'Reported Date': future_dates})
676
-
677
- # Assuming 'create_forecasting_features' function is defined elsewhere
678
  full_df = pd.concat([df, future_df], ignore_index=True)
679
  full_df = create_forecasting_features(full_df)
680
 
@@ -690,8 +656,6 @@ def forecast_next_14_days(df, _best_params, key):
690
 
691
  future_predictions = model.predict(X_future)
692
  future_df['Modal Price (Rs./Quintal)'] = future_predictions
693
-
694
- # Pass model to plot_data
695
  plot_data(original_df, future_df, last_date, 14)
696
  download_button(future_df, key)
697
 
@@ -699,8 +663,6 @@ def forecast_next_30_days(df, _best_params, key):
699
  last_date = df['Reported Date'].max()
700
  future_dates = pd.date_range(start=last_date + pd.Timedelta(days=1), periods=30)
701
  future_df = pd.DataFrame({'Reported Date': future_dates})
702
-
703
- # Assuming 'create_forecasting_features' function is defined elsewhere
704
  full_df = pd.concat([df, future_df], ignore_index=True)
705
  full_df = create_forecasting_features_1m(full_df)
706
 
@@ -716,8 +678,6 @@ def forecast_next_30_days(df, _best_params, key):
716
 
717
  future_predictions = model.predict(X_future)
718
  future_df['Modal Price (Rs./Quintal)'] = future_predictions
719
-
720
- # Pass model to plot_data
721
  plot_data(original_df, future_df, last_date, 30)
722
  download_button(future_df, key)
723
 
@@ -725,8 +685,6 @@ def forecast_next_90_days(df, _best_params, key):
725
  last_date = df['Reported Date'].max()
726
  future_dates = pd.date_range(start=last_date + pd.Timedelta(days=1), periods=90)
727
  future_df = pd.DataFrame({'Reported Date': future_dates})
728
-
729
- # Assuming 'create_forecasting_features' function is defined elsewhere
730
  full_df = pd.concat([df, future_df], ignore_index=True)
731
  full_df = create_forecasting_features_3m(full_df)
732
 
@@ -742,29 +700,19 @@ def forecast_next_90_days(df, _best_params, key):
742
 
743
  future_predictions = model.predict(X_future)
744
  future_df['Modal Price (Rs./Quintal)'] = future_predictions
745
-
746
- # Pass model to plot_data
747
  plot_data(original_df, future_df, last_date, 90)
748
  download_button(future_df, key)
749
 
750
  def plot_data(original_df, future_df, last_date, days):
751
- # Filter original_df for the period you want to plot.
752
  actual_df = original_df[original_df['Reported Date'] >= (last_date - pd.Timedelta(days=days))].copy()
753
  actual_df['Type'] = 'Actual'
754
-
755
- # Prepare the future_df (predicted data) and mark it as forecasted.
756
  future_plot_df = future_df[['Reported Date', 'Modal Price (Rs./Quintal)']].copy()
757
  future_plot_df['Type'] = 'Forecasted'
758
-
759
- # Get the last actual data point from actual_df.
760
- # Ensure the DataFrame is sorted by date.
761
  last_actual_point = actual_df.sort_values('Reported Date').iloc[[-1]].copy()
762
  future_plot_df = pd.concat([last_actual_point, future_plot_df])
763
-
764
- # Combine both actual and forecasted data for plotting.
765
  plot_df = pd.concat([actual_df, future_plot_df])
766
-
767
- # Create the plot.
768
  fig = go.Figure()
769
  for plot_type, color, dash in [('Actual', 'blue', 'solid'), ('Forecasted', 'red', 'dash')]:
770
  data = plot_df[plot_df['Type'] == plot_type]
@@ -787,18 +735,11 @@ def plot_data(original_df, future_df, last_date, days):
787
 
788
 
789
  def download_button(future_df, key):
790
- # Create a new DataFrame with only 'Reported Date' and 'Modal Price (Rs./Quintal)'
791
  download_df = future_df[['Reported Date', 'Modal Price (Rs./Quintal)']].copy()
792
-
793
- # Format 'Reported Date' to display only the date in YYYY-MM-DD format
794
  download_df['Reported Date'] = download_df['Reported Date'].dt.strftime('%Y-%m-%d')
795
-
796
- # Write to Excel without the index
797
  towrite = io.BytesIO()
798
- download_df.to_excel(towrite, index=False, engine='xlsxwriter') # Using 'xlsxwriter' for the Excel engine
799
  towrite.seek(0)
800
-
801
- # Create a download button for the Excel file
802
  st.download_button(label="Download Forecasted Values",
803
  data=towrite,
804
  file_name=f'forecasted_prices_{key}.xlsx',
@@ -831,15 +772,13 @@ def save_best_params(collection, filter_key, best_params):
831
  collection.replace_one({"filter_key": filter_key}, best_params)
832
  else:
833
  collection.insert_one(best_params)
834
-
835
- # Function to retrieve best_params from MongoDB
836
  def get_best_params(filter_key, collection):
837
  record = collection.find_one({"filter_key": filter_key})
838
  return record
839
- # Function to handle training and forecasting
840
  def train_and_forecast(df, filter_key, days):
841
  if df is not None:
842
- # Train the model and save parameters to MongoDB
843
  if days==14:
844
  best_params = train_and_evaluate(df)
845
  save_best_params(filter_key, best_params, best_params_collection)
@@ -852,6 +791,8 @@ def train_and_forecast(df, filter_key, days):
852
  best_params = train_and_evaluate_3m(df)
853
  save_best_params(filter_key, best_params, best_params_collection_3m)
854
  forecast_next_90_days(df, best_params, filter_key)
 
 
855
 
856
  def forecast(df, filter_key, days):
857
  if days==14:
@@ -887,13 +828,9 @@ def collection_to_dataframe(collection, drop_id=True):
887
  Returns:
888
  pd.DataFrame: DataFrame containing the collection data.
889
  """
890
- # Fetch all documents from the collection
891
  documents = list(collection.find())
892
 
893
- # Convert to a pandas DataFrame
894
  df = pd.DataFrame(documents)
895
-
896
- # Drop the MongoDB "_id" column if specified
897
  if drop_id and '_id' in df.columns:
898
  df = df.drop(columns=['_id'])
899
 
@@ -903,19 +840,12 @@ def collection_to_dataframe(collection, drop_id=True):
903
 
904
  def editable_spreadsheet():
905
  st.title("Sowing Report Prediction Model")
906
-
907
- # Excel file uploader
908
  uploaded_file = st.file_uploader("Upload your Excel file", type=['xlsx'])
909
-
910
- # Check if an Excel file is uploaded
911
  if uploaded_file is not None:
912
- # Read the Excel file
913
  df_excel = pd.read_excel(uploaded_file)
914
-
915
- # Display the DataFrame from the Excel file
916
  st.write("Excel data loaded:", df_excel)
917
 
918
- # Form for inputting filtering options and area for calculation
919
  with st.form("input_form"):
920
  input_region = st.text_input("Enter Region to Filter By", placeholder="Region Name")
921
  input_season = st.text_input("Enter Season to Filter By", placeholder="Season (e.g., Winter)")
@@ -924,7 +854,6 @@ def editable_spreadsheet():
924
 
925
  if submit_button:
926
  if input_region and input_season and input_area > 0:
927
- # Filter data by the region and season specified
928
  filtered_df = df_excel[
929
  (df_excel['Region'].str.lower() == input_region.lower()) &
930
  (df_excel['Season'].str.lower() == input_season.lower())
@@ -975,8 +904,6 @@ def display_statistics(df):
975
  }
976
  </style>
977
  """, unsafe_allow_html=True)
978
-
979
- # Ensure 'Reported Date' is in datetime format
980
  df['Reported Date'] = pd.to_datetime(df['Reported Date'])
981
  national_data = df.groupby('Reported Date').agg({
982
  'Modal Price (Rs./Quintal)': 'mean',
@@ -986,12 +913,13 @@ def display_statistics(df):
986
  st.subheader("🗓️ Key Statistics")
987
  latest_date = national_data['Reported Date'].max()
988
  latest_price = national_data[national_data['Reported Date'] == latest_date]['Modal Price (Rs./Quintal)'].mean()
 
989
  latest_arrivals = national_data[national_data['Reported Date'] == latest_date]['Arrivals (Tonnes)'].sum()
990
 
991
  st.markdown("<p class='highlight'>This section provides the most recent statistics for the market. It includes the latest available date, the average price of commodities, and the total quantity of goods arriving at the market. These metrics offer an up-to-date snapshot of market conditions.</p>", unsafe_allow_html=True)
992
  st.write(f"**Latest Date**: {latest_date.strftime('%Y-%m-%d')}")
993
  st.write(f"**Latest Modal Price**: {latest_price:.2f} Rs./Quintal")
994
- st.write(f"**Latest Arrivals**: {latest_arrivals:.2f} Tonnes")
995
 
996
  st.subheader("📆 This Day in Previous Years")
997
  st.markdown("<p class='highlight'>This table shows the modal price and total arrivals for this exact day across previous years. It provides a historical perspective to compare against current market conditions. This section examines historical data for the same day in previous years. By analyzing trends for this specific day, you can identify seasonal patterns, supply-demand changes, or any deviations that might warrant closer attention.</p>", unsafe_allow_html=True)
@@ -1060,166 +988,259 @@ def display_statistics(df):
1060
  editable_spreadsheet()
1061
 
1062
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1063
 
1064
  def fetch_and_store_data():
 
1065
  latest_doc = collection.find_one(sort=[("Reported Date", -1)])
1066
- if latest_doc and "Reported Date" in latest_doc:
1067
- latest_date = latest_doc["Reported Date"]
1068
- else:
1069
- latest_date = None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1070
 
1071
- if latest_date:
1072
- from_date = (latest_date + timedelta(days=1)).strftime('%d %b %Y')
1073
- else:
1074
- # If no latest date, set a default from_date
1075
- from_date = "01 Jan 2000"
1076
-
1077
- to_date = (datetime.now() - timedelta(days=1)).strftime('%d %b %Y')
1078
- from_date_obj = datetime.strptime(from_date, '%d %b %Y')
1079
- to_date_obj = datetime.strptime(to_date, '%d %b %Y')
1080
- if to_date_obj < from_date_obj:
1081
- print("Data already scraped")
1082
- return None
1083
- # Build the URL to be requested
1084
- base_url = "https://agmarknet.gov.in/SearchCmmMkt.aspx"
1085
- params = {
1086
- "Tx_Commodity": "11",
1087
- "Tx_State": "0",
1088
- "Tx_District": "0",
1089
- "Tx_Market": "0",
1090
- "DateFrom": from_date,
1091
- "DateTo": to_date,
1092
- "Fr_Date": from_date,
1093
- "To_Date": to_date,
1094
- "Tx_Trend": "2",
1095
- "Tx_CommodityHead": "Sesamum(Sesame,Gingelly,Til)",
1096
- "Tx_StateHead": "--Select--",
1097
- "Tx_DistrictHead": "--Select--",
1098
- "Tx_MarketHead": "--Select--"
1099
- }
1100
-
1101
- full_url = f"{base_url}?{'&'.join(f'{k}={v}' for k, v in params.items())}"
1102
- api_url = "https://api.scraperapi.com"
1103
- api_key = "bbbbde6b56c0fde1e2a61c914eb22d14"
1104
- scraperapi_params = {
1105
- 'api_key': api_key,
1106
- 'url': full_url
1107
- }
1108
 
1109
- response = requests.get(api_url, params=scraperapi_params)
1110
-
1111
- if response.status_code == 200:
1112
- soup = BeautifulSoup(response.content, 'html.parser')
1113
- table = soup.find("table", {"class": "tableagmark_new"})
1114
- if table:
1115
- headers = [th.get_text(strip=True) for th in table.find_all("th")]
1116
- rows = [[td.get_text(strip=True) for td in row.find_all("td")] for row in table.find_all("tr")[1:]]
1117
-
1118
- df = pd.DataFrame(rows, columns=headers)
1119
- df = df[df['Variety']=="White"]
1120
- df["Reported Date"] = pd.to_datetime(df["Reported Date"], format='%d %b %Y', errors='coerce')
1121
- df.dropna(subset=["Reported Date"], inplace=True)
1122
- df.sort_values(by="Reported Date", inplace=True)
1123
- df.rename(columns={"State Name": "state"}, inplace=True)
1124
-
1125
- # Type casting for the columns
1126
- df["Modal Price (Rs./Quintal)"] = pd.to_numeric(df["Modal Price (Rs./Quintal)"], errors='coerce').astype("int64")
1127
- df["Arrivals (Tonnes)"] = pd.to_numeric(df["Arrivals (Tonnes)"], errors='coerce').astype("float64")
1128
- df["state"] = df["state"].astype("string")
1129
- df["Market Name"] = df["Market Name"].astype("string")
1130
- records = df.to_dict(orient="records")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1131
  if records:
1132
  collection.insert_many(records)
1133
- print(f"Inserted {len(records)} new records into MongoDB.")
1134
  else:
1135
- print("No new records to insert.")
1136
 
1137
- return df
 
1138
 
1139
- else:
1140
- print(f"Failed to fetch data with status code: {response.status_code}")
1141
- return None
1142
 
1143
-
1144
  def fetch_and_store_data_market():
 
1145
  latest_doc = market_price_data.find_one(sort=[("Reported Date", -1)])
1146
- if latest_doc and "Reported Date" in latest_doc:
1147
- latest_date = latest_doc["Reported Date"]
1148
- else:
1149
- latest_date = None
1150
-
1151
- if latest_date:
1152
- from_date = (latest_date + timedelta(days=1)).strftime('%d %b %Y')
1153
- else:
1154
- # If no latest date, set a default from_date
1155
- from_date = "01 Jan 2000"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1156
 
1157
- to_date = (datetime.now() - timedelta(days=1)).strftime('%d %b %Y')
1158
- from_date_obj = datetime.strptime(from_date, '%d %b %Y')
1159
- to_date_obj = datetime.strptime(to_date, '%d %b %Y')
1160
-
1161
- if to_date_obj <= from_date_obj:
1162
- st.write("Data already scraped")
1163
- return None
1164
- base_url = "https://agmarknet.gov.in/SearchCmmMkt.aspx"
1165
- params = {
1166
- "Tx_Commodity": "11",
1167
- "Tx_State": "0",
1168
- "Tx_District": "0",
1169
- "Tx_Market": "0",
1170
- "DateFrom": from_date,
1171
- "DateTo": to_date,
1172
- "Fr_Date": from_date,
1173
- "To_Date": to_date,
1174
- "Tx_Trend": "0",
1175
- "Tx_CommodityHead": "Sesamum(Sesame,Gingelly,Til)",
1176
- "Tx_StateHead": "--Select--",
1177
- "Tx_DistrictHead": "--Select--",
1178
- "Tx_MarketHead": "--Select--"
1179
  }
1180
-
1181
- full_url = f"{base_url}?{'&'.join(f'{k}={v}' for k, v in params.items())}"
1182
- api_url = "https://api.scraperapi.com"
1183
- api_key = "8842750a88db7513a1d19325745437cc"
1184
- scraperapi_params = {
1185
- 'api_key': api_key,
1186
- 'url': full_url
1187
- }
1188
 
1189
- response = requests.get(api_url, params=scraperapi_params)
1190
- if response.status_code == 200:
1191
- soup = BeautifulSoup(response.content, 'html.parser')
1192
- table = soup.find("table", {"class": "tableagmark_new"})
1193
- if table:
1194
- headers = [th.get_text(strip=True) for th in table.find_all("th")]
1195
- rows = []
1196
-
1197
- for row in table.find_all("tr")[1:]:
1198
- cells = [td.get_text(strip=True) for td in row.find_all("td")]
1199
- if cells:
1200
- rows.append(cells)
1201
- df = pd.DataFrame(rows, columns=headers)
1202
- df = df[df['Variety']=="White"]
1203
- df["Price Date"] = pd.to_datetime(df["Price Date"], format='%d %b %Y', errors='coerce')
1204
- df.dropna(subset=["Price Date"], inplace=True)
1205
- df.sort_values(by="Price Date", inplace=True)
1206
- df = df[df["Grade"]=="FAQ"]
1207
- df["Modal Price (Rs./Quintal)"] = pd.to_numeric(df["Modal Price (Rs./Quintal)"], errors='coerce').astype("int64")
1208
- df["Market Name"] = df["Market Name"].astype("string")
1209
- df.rename(columns={"Price Date": "Reported Date"}, inplace=True)
1210
- records = df.to_dict(orient="records")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1211
  if records:
1212
  market_price_data.insert_many(records)
1213
- print(f"Inserted {len(records)} new records into MongoDB.")
1214
  else:
1215
- print("No new records to insert.")
1216
- return df
1217
- else:
1218
- st.write("No table found")
1219
 
1220
- else:
1221
- st.write(f"Failed to fetch data with status code: {response.status_code}")
1222
- return None
 
1223
 
1224
 
1225
 
@@ -1312,208 +1333,165 @@ st.markdown("""
1312
  if 'authenticated' not in st.session_state:
1313
  st.session_state.authenticated = False
1314
 
1315
- if st.session_state.authenticated:
1316
  st.title("🌾 AgriPredict Dashboard")
 
1317
  if st.button("Get Live Data Feed"):
 
1318
  fetch_and_store_data()
1319
  fetch_and_store_data_market()
 
1320
  view_mode = st.radio("", ["Statistics", "Plots", "Predictions", "Exim"], horizontal=True)
1321
 
1322
  if view_mode == "Plots":
1323
  st.sidebar.header("Filters")
 
1324
  selected_period = st.sidebar.selectbox(
1325
  "Select Time Period",
1326
- ["2 Weeks", "1 Month", "2 Months", "3 Months", "6 Months", "1 Year", "2 Years", "5 Years"],
1327
  index=1
1328
  )
1329
  period_mapping = {
1330
  "2 Weeks": 14,
1331
  "1 Month": 30,
1332
- "2 Months": 60,
1333
  "3 Months": 90,
1334
- "6 Months": 180,
1335
  "1 Year": 365,
1336
  "2 Years": 730,
1337
  "5 Years": 1825
1338
  }
1339
- st.session_state.selected_period = period_mapping[selected_period]
1340
-
1341
- # Add 'India' option to the list of states
1342
  state_options = list(state_market_dict.keys()) + ['India']
1343
- selected_state = st.sidebar.selectbox("Select", state_options)
1344
-
1345
  market_wise = False
 
 
1346
  if selected_state != 'India':
1347
  market_wise = st.sidebar.checkbox("Market Wise Analysis")
1348
  if market_wise:
1349
  markets = state_market_dict.get(selected_state, [])
 
1350
  selected_market = st.sidebar.selectbox("Select Market", markets)
1351
  query_filter = {"Market Name": selected_market}
1352
  else:
1353
- query_filter = {"state": selected_state}
1354
  else:
1355
- query_filter = {} # For India, no specific state filter
1356
-
1357
- # Dropdown for data type
1358
- data_type = st.sidebar.radio(
1359
- "Select Data Type",
1360
- ["Price", "Volume", "Both"]
1361
- )
1362
-
1363
- # Add date filtering based on selected period
1364
  query_filter["Reported Date"] = {
1365
- "$gte": datetime.now() - timedelta(days=st.session_state.selected_period)
1366
  }
1367
-
1368
- # Submit button to trigger the query and plot
 
 
 
1369
  if st.sidebar.button("✨ Let's go!"):
1370
  try:
1371
- df_market_grouped = []
 
 
 
1372
  if "Market Name" in query_filter:
 
1373
  market_cursor = market_price_data.find(query_filter)
1374
  market_data = list(market_cursor)
1375
- df_market = pd.DataFrame(market_data)
1376
- df_market_grouped = df_market.groupby('Reported Date', as_index=False).agg({
1377
- 'Modal Price (Rs./Quintal)': 'mean'
1378
- })
1379
- date_range = pd.date_range(
1380
- start=df_market_grouped['Reported Date'].min(),
1381
- end=df_market_grouped['Reported Date'].max()
1382
- )
1383
- df_market_grouped = df_market_grouped.set_index('Reported Date').reindex(date_range).rename_axis('Reported Date').reset_index()
1384
- df_market_grouped['Modal Price (Rs./Quintal)'] = df_market_grouped['Modal Price (Rs./Quintal)'].fillna(method='ffill').fillna(method='bfill')
1385
-
1386
-
 
 
 
1387
  cursor = collection.find(query_filter)
1388
  data = list(cursor)
1389
-
 
1390
  if data:
1391
- # Convert MongoDB data to a DataFrame
1392
  df = pd.DataFrame(data)
1393
- df['Reported Date'] = pd.to_datetime(df['Reported Date'])
1394
-
1395
- if selected_state == 'India':
1396
- # Aggregate data for all of India
1397
- df_grouped = df.groupby('Reported Date', as_index=False).agg({
1398
- 'Arrivals (Tonnes)': 'sum',
1399
- 'Modal Price (Rs./Quintal)': 'mean'
1400
- })
1401
- else:
1402
- # Regular grouping by Reported Date
1403
- df_grouped = df.groupby('Reported Date', as_index=False).agg({
1404
- 'Arrivals (Tonnes)': 'sum',
1405
- 'Modal Price (Rs./Quintal)': 'mean'
1406
- })
1407
-
1408
- # Create a complete date range
1409
- date_range = pd.date_range(
1410
- start=df_grouped['Reported Date'].min(),
1411
- end=df_grouped['Reported Date'].max()
1412
- )
1413
  df_grouped = df_grouped.set_index('Reported Date').reindex(date_range).rename_axis('Reported Date').reset_index()
1414
-
1415
- # Fill missing values
1416
  df_grouped['Arrivals (Tonnes)'] = df_grouped['Arrivals (Tonnes)'].fillna(method='ffill').fillna(method='bfill')
1417
  df_grouped['Modal Price (Rs./Quintal)'] = df_grouped['Modal Price (Rs./Quintal)'].fillna(method='ffill').fillna(method='bfill')
1418
-
1419
- st.subheader(f"📈 Trends for {selected_state} ({'Market: ' + selected_market if market_wise else 'State'})")
1420
-
 
 
1421
  if data_type == "Both":
1422
- # Min-Max Scaling
1423
  scaler = MinMaxScaler()
1424
  df_grouped[['Scaled Price', 'Scaled Arrivals']] = scaler.fit_transform(
1425
  df_grouped[['Modal Price (Rs./Quintal)', 'Arrivals (Tonnes)']]
1426
  )
1427
- if "Market Name" in query_filter:
1428
- df_market_grouped['Scaled Price'] = scaler.fit_transform(
1429
- df_market_grouped[["Modal Price (Rs./Quintal)"]]
1430
- )
1431
-
1432
- fig = go.Figure()
1433
-
1434
- fig.add_trace(go.Scatter(
1435
- x=df_market_grouped['Reported Date'],
1436
- y=df_market_grouped['Scaled Price'],
1437
- mode='lines',
1438
- name='Scaled Price',
1439
- line=dict(width=1, color='green'),
1440
- text=df_market_grouped['Modal Price (Rs./Quintal)'],
1441
- hovertemplate='Date: %{x}<br>Scaled Price: %{y:.2f}<br>Actual Price: %{text:.2f}<extra></extra>'
1442
- ))
1443
- else:
1444
- fig = go.Figure()
1445
-
1446
- fig.add_trace(go.Scatter(
1447
- x=df_grouped['Reported Date'],
1448
- y=df_grouped['Scaled Price'],
1449
- mode='lines',
1450
- name='Scaled Price',
1451
- line=dict(width=1, color='green'),
1452
- text=df_grouped['Modal Price (Rs./Quintal)'],
1453
- hovertemplate='Date: %{x}<br>Scaled Price: %{y:.2f}<br>Actual Price: %{text:.2f}<extra></extra>'
1454
- ))
1455
-
1456
  fig.add_trace(go.Scatter(
1457
  x=df_grouped['Reported Date'],
1458
  y=df_grouped['Scaled Arrivals'],
1459
  mode='lines',
1460
  name='Scaled Arrivals',
1461
- line=dict(width=1, color='blue'),
1462
- text=df_grouped['Arrivals (Tonnes)'],
1463
- hovertemplate='Date: %{x}<br>Scaled Arrivals: %{y:.2f}<br>Actual Arrivals: %{text:.2f}<extra></extra>'
1464
  ))
1465
-
1466
- fig.update_layout(
1467
- title="Price and Arrivals Trend",
1468
- xaxis_title='Date',
1469
- yaxis_title='Scaled Values',
1470
- template='plotly_white'
1471
- )
1472
- st.plotly_chart(fig, use_container_width=True)
1473
-
1474
  elif data_type == "Price":
1475
- # Plot Modal Price
1476
- if "Market Name" in query_filter:
1477
- fig = go.Figure()
1478
- fig.add_trace(go.Scatter(
1479
- x=df_market_grouped['Reported Date'],
1480
- y=df_market_grouped['Modal Price (Rs./Quintal)'],
1481
- mode='lines',
1482
- name='Modal Price',
1483
- line=dict(width=1, color='green')
1484
- ))
1485
- fig.update_layout(title="Modal Price Trend", xaxis_title='Date', yaxis_title='Price (/Quintall)', template='plotly_white')
1486
- st.plotly_chart(fig, use_container_width=True)
1487
- else:
1488
- fig = go.Figure()
1489
- fig.add_trace(go.Scatter(
1490
- x=df_grouped['Reported Date'],
1491
- y=df_grouped['Modal Price (Rs./Quintal)'],
1492
- mode='lines',
1493
- name='Modal Price',
1494
- line=dict(width=1, color='green')
1495
- ))
1496
- fig.update_layout(title="Modal Price Trend", xaxis_title='Date', yaxis_title='Price (/Quintall)', template='plotly_white')
1497
- st.plotly_chart(fig, use_container_width=True)
1498
-
1499
  elif data_type == "Volume":
1500
- # Plot Arrivals (Tonnes)
1501
- fig = go.Figure()
1502
  fig.add_trace(go.Scatter(
1503
  x=df_grouped['Reported Date'],
1504
  y=df_grouped['Arrivals (Tonnes)'],
1505
  mode='lines',
1506
  name='Arrivals',
1507
- line=dict(width=1, color='blue')
1508
  ))
1509
- fig.update_layout(title="Arrivals Trend", xaxis_title='Date', yaxis_title='Volume (in Tonnes)', template='plotly_white')
1510
- st.plotly_chart(fig, use_container_width=True)
1511
-
 
 
 
 
 
 
1512
  else:
1513
- st.warning("⚠️ No data found for the selected filters.")
1514
-
1515
  except Exception as e:
1516
  st.error(f"❌ Error fetching data 2: {e}")
 
 
1517
  elif view_mode == "Predictions":
1518
  st.subheader("📊 Model Analysis")
1519
  sub_option = st.radio("Select one of the following", ["India", "States", "Market"], horizontal=True)
@@ -1521,10 +1499,10 @@ if st.session_state.authenticated:
1521
  if sub_option == "States":
1522
  states = ["Karnataka", "Madhya Pradesh", "Gujarat", "Uttar Pradesh", "Telangana"]
1523
  selected_state = st.selectbox("Select State for Model Training", states)
1524
- filter_key = f"state_{selected_state}" # Unique key for each state
1525
 
1526
  if st.button("Forecast"):
1527
- query_filter = {"state": selected_state}
1528
  df = fetch_and_process_data(query_filter, collection)
1529
  if sub_timeline == "14 days":
1530
  forecast(df, filter_key, 14)
@@ -1535,11 +1513,12 @@ if st.session_state.authenticated:
1535
  elif sub_option == "Market":
1536
  market_options = ["Rajkot", "Neemuch", "Kalburgi", "Warangal"]
1537
  selected_market = st.selectbox("Select Market for Model Training", market_options)
1538
- filter_key = f"market_{selected_market}" # Unique key for each market
1539
  if st.button("Forecast"):
1540
  query_filter = {"Market Name": selected_market}
1541
  comparison_date = pd.to_datetime("18 Feb 2025")
1542
  df = fetch_and_process_data(query_filter, market_price_data)
 
1543
  if sub_timeline == "14 days":
1544
  forecast(df, filter_key, 14)
1545
  elif sub_timeline == "1 month":
@@ -1566,24 +1545,19 @@ if st.session_state.authenticated:
1566
  display_statistics(df)
1567
  elif view_mode == "Exim":
1568
  df = collection_to_dataframe(impExp)
1569
-
1570
- # Add radio buttons for user selection
1571
  plot_option = st.radio(
1572
  "Select the data to visualize:",
1573
  ["Import Price", "Import Quantity", "Export Price", "Export Quantity"],
1574
  horizontal=True
1575
  )
1576
-
1577
- # Dropdown for time period selection
1578
  time_period = st.selectbox(
1579
  "Select time period:",
1580
  ["1 Month", "6 Months", "1 Year", "2 Years"]
1581
  )
1582
 
1583
- # Convert Reported Date to datetime
1584
  df["Reported Date"] = pd.to_datetime(df["Reported Date"], format="%Y-%m-%d")
1585
-
1586
- # Filter data based on the time period
1587
  if time_period == "1 Month":
1588
  start_date = pd.Timestamp.now() - pd.DateOffset(months=1)
1589
  elif time_period == "6 Months":
@@ -1594,8 +1568,6 @@ if st.session_state.authenticated:
1594
  start_date = pd.Timestamp.now() - pd.DateOffset(years=2)
1595
 
1596
  filtered_df = df[df["Reported Date"] >= start_date]
1597
-
1598
- # Process data based on the selected option
1599
  if plot_option == "Import Price":
1600
  grouped_df = (
1601
  filtered_df.groupby("Reported Date", as_index=False)["VALUE_IMPORT"]
@@ -1624,12 +1596,11 @@ if st.session_state.authenticated:
1624
  .rename(columns={"QUANTITY_IMPORT": "Total Export Quantity"})
1625
  )
1626
  y_axis_label = "Total Export Quantity (Tonnes)"
1627
-
1628
- # Plot using Plotly
1629
  fig = px.line(
1630
  grouped_df,
1631
  x="Reported Date",
1632
- y=grouped_df.columns[1], # Dynamic y-axis column name
1633
  title=f"{plot_option} Over Time",
1634
  labels={"Reported Date": "Date", grouped_df.columns[1]: y_axis_label},
1635
  )
@@ -1646,9 +1617,9 @@ else:
1646
 
1647
  if login_button:
1648
  if authenticate_user(username, password):
1649
- st.session_state.authenticated = True # Set the authentication state to True
1650
- st.session_state['username'] = username # Store username in session state
1651
  st.write("Login successful!")
1652
- st.rerun() # Page will automatically rerun to show the protected content
1653
  else:
1654
  st.error("Invalid username or password")
 
20
  from tqdm import tqdm
21
  import io
22
  from statsmodels.tsa.statespace.sarimax import SARIMAX
23
+ from datetime import datetime, timedelta
24
+
25
+ def generate_date_ranges(start_date: str, end_date: str):
26
+ current = datetime.strptime(start_date, "%d %b %Y")
27
+ end = datetime.strptime(end_date, "%d %b %Y")
28
+ while current <= end:
29
+ date_str = current.strftime("%d %b %Y")
30
+ yield (date_str, date_str)
31
+ current += timedelta(days=1)
32
 
33
  mongo_uri = "mongodb+srv://Agripredict:TjXSvMhOis49qH8E@cluster0.gek7n.mongodb.net/?retryWrites=true&w=majority&appName=Cluster0"
34
  if not mongo_uri:
35
  st.error("MongoDB URI is not set!")
36
  st.stop()
37
  else:
 
38
  client = MongoClient(mongo_uri, tlsCAFile=certifi.where())
39
  db = client["AgriPredict"]
40
  collection = db["WhiteSesame"]
 
299
 
300
 
301
  def preprocess_data(df):
 
302
  df = df[['Reported Date', 'Modal Price (Rs./Quintal)']]
 
 
303
  df['Reported Date'] = pd.to_datetime(df['Reported Date'])
 
 
304
  df = df.groupby('Reported Date', as_index=False).mean()
 
 
305
  full_date_range = pd.date_range(df['Reported Date'].min(), df['Reported Date'].max())
306
  df = df.set_index('Reported Date').reindex(full_date_range).rename_axis('Reported Date').reset_index()
307
 
 
308
  df['Modal Price (Rs./Quintal)'] = (
309
  df['Modal Price (Rs./Quintal)'].fillna(method='ffill').fillna(method='bfill')
310
  )
311
  return df
312
 
313
+
314
  def train_and_evaluate(df):
315
  import streamlit as st
 
 
316
  progress_bar = st.progress(0)
 
 
317
  def update_tuning_progress(current, total):
318
  progress = int((current / total) * 100)
319
  progress_bar.progress(progress)
320
 
321
  df = create_forecasting_features(df)
322
 
 
323
  train_df = df[df['Reported Date'] < '2024-01-01']
324
  test_df = df[df['Reported Date'] >= '2024-01-01']
325
 
 
327
  y_train = train_df['Modal Price (Rs./Quintal)']
328
  X_test = test_df.drop(columns=['Modal Price (Rs./Quintal)', 'Reported Date'])
329
  y_test = test_df['Modal Price (Rs./Quintal)']
 
 
330
  st.write("Performing hyperparameter tuning...")
331
  param_grid = {
332
  'learning_rate': [0.01, 0.1, 0.2],
 
339
  param_combinations = len(param_grid['learning_rate']) * len(param_grid['max_depth']) * \
340
  len(param_grid['n_estimators']) * len(param_grid['booster'])
341
 
342
+ current_combination = 0
343
 
344
  def custom_grid_search():
345
  nonlocal current_combination
 
365
  'n_estimators': n_estimators,
366
  'booster': booster
367
  }
 
368
  current_combination += 1
369
  update_tuning_progress(current_combination, param_combinations)
370
  return best_params
371
 
372
  best_params = custom_grid_search()
373
+
 
374
  st.write("Training the best model and making predictions...")
375
  best_model = XGBRegressor(**best_params)
376
  best_model.fit(X_train, y_train)
377
  y_pred = best_model.predict(X_test)
378
 
 
379
  rmse = mean_squared_error(y_test, y_pred, squared=False)
380
  mae = mean_absolute_error(y_test, y_pred)
381
  st.write(f"RMSE: {rmse}")
382
  st.write(f"MAE: {mae}")
383
+
 
384
  train_plot_df = train_df[['Reported Date', 'Modal Price (Rs./Quintal)']].copy()
385
  train_plot_df['Type'] = 'Train'
386
 
 
415
 
416
  st.plotly_chart(fig, use_container_width=True)
417
 
 
418
  return best_params
419
 
420
  def train_and_evaluate_1m(df):
 
424
  from xgboost import XGBRegressor
425
  from sklearn.metrics import mean_squared_error, mean_absolute_error
426
 
 
427
  progress_bar = st.progress(0)
428
 
 
429
  def update_tuning_progress(current, total):
430
  progress = int((current / total) * 100)
431
  progress_bar.progress(progress)
432
 
433
  df = create_forecasting_features_1m(df)
434
+
 
435
  split_date = pd.to_datetime("2024-01-01")
436
+ test_horizon = pd.DateOffset(days=30)
437
 
438
  train_df = df[df['Reported Date'] < split_date]
439
  test_df = df[(df['Reported Date'] >= split_date) & (df['Reported Date'] < split_date + test_horizon)]
 
443
  X_test = test_df.drop(columns=['Modal Price (Rs./Quintal)', 'Reported Date'])
444
  y_test = test_df['Modal Price (Rs./Quintal)']
445
 
 
446
  st.write("Performing hyperparameter tuning...")
447
  param_grid = {
448
  'learning_rate': [0.01, 0.1, 0.2],
 
455
  param_combinations = len(param_grid['learning_rate']) * len(param_grid['max_depth']) * \
456
  len(param_grid['n_estimators']) * len(param_grid['booster'])
457
 
458
+ current_combination = 0
459
 
460
  def custom_grid_search():
461
  nonlocal current_combination
 
481
  'n_estimators': n_estimators,
482
  'booster': booster
483
  }
 
484
  current_combination += 1
485
  update_tuning_progress(current_combination, param_combinations)
486
  return best_params
487
 
488
  best_params = custom_grid_search()
 
 
489
  st.write("Training the best model and making predictions...")
490
  best_model = XGBRegressor(**best_params)
491
  best_model.fit(X_train, y_train)
492
  y_pred = best_model.predict(X_test)
493
 
 
494
  rmse = mean_squared_error(y_test, y_pred, squared=False)
495
  mae = mean_absolute_error(y_test, y_pred)
496
  st.write(f"RMSE: {rmse}")
497
  st.write(f"MAE: {mae}")
498
 
 
499
  train_plot_df = train_df[['Reported Date', 'Modal Price (Rs./Quintal)']].copy()
500
  train_plot_df['Type'] = 'Train'
501
 
 
530
 
531
  st.plotly_chart(fig, use_container_width=True)
532
 
 
533
  return best_params
534
 
535
  def train_and_evaluate_3m(df):
536
  import streamlit as st
 
 
537
  progress_bar = st.progress(0)
 
 
538
  def update_tuning_progress(current, total):
539
  progress = int((current / total) * 100)
540
  progress_bar.progress(progress)
 
548
  X_test = test_df.drop(columns=['Modal Price (Rs./Quintal)', 'Reported Date'])
549
  y_test = test_df['Modal Price (Rs./Quintal)']
550
 
 
551
  st.write("Performing hyperparameter tuning...")
552
  param_grid = {
553
  'learning_rate': [0.01, 0.1, 0.2],
 
560
  param_combinations = len(param_grid['learning_rate']) * len(param_grid['max_depth']) * \
561
  len(param_grid['n_estimators']) * len(param_grid['booster'])
562
 
563
+ current_combination = 0
564
 
565
  def custom_grid_search():
566
  nonlocal current_combination
 
586
  'n_estimators': n_estimators,
587
  'booster': booster
588
  }
 
589
  current_combination += 1
590
  update_tuning_progress(current_combination, param_combinations)
591
  return best_params
592
 
593
  best_params = custom_grid_search()
 
 
594
  st.write("Training the best model and making predictions...")
595
  best_model = XGBRegressor(**best_params)
596
  best_model.fit(X_train, y_train)
597
  y_pred = best_model.predict(X_test)
598
 
 
599
  rmse = mean_squared_error(y_test, y_pred, squared=False)
600
  mae = mean_absolute_error(y_test, y_pred)
601
  st.write(f"RMSE: {rmse}")
602
  st.write(f"MAE: {mae}")
603
 
 
604
  train_plot_df = train_df[['Reported Date', 'Modal Price (Rs./Quintal)']].copy()
605
  train_plot_df['Type'] = 'Train'
606
 
 
635
 
636
  st.plotly_chart(fig, use_container_width=True)
637
 
 
638
  return best_params
639
 
640
  def forecast_next_14_days(df, _best_params, key):
641
  last_date = df['Reported Date'].max()
642
  future_dates = pd.date_range(start=last_date + pd.Timedelta(days=1), periods=14)
643
  future_df = pd.DataFrame({'Reported Date': future_dates})
 
 
644
  full_df = pd.concat([df, future_df], ignore_index=True)
645
  full_df = create_forecasting_features(full_df)
646
 
 
656
 
657
  future_predictions = model.predict(X_future)
658
  future_df['Modal Price (Rs./Quintal)'] = future_predictions
 
 
659
  plot_data(original_df, future_df, last_date, 14)
660
  download_button(future_df, key)
661
 
 
663
  last_date = df['Reported Date'].max()
664
  future_dates = pd.date_range(start=last_date + pd.Timedelta(days=1), periods=30)
665
  future_df = pd.DataFrame({'Reported Date': future_dates})
 
 
666
  full_df = pd.concat([df, future_df], ignore_index=True)
667
  full_df = create_forecasting_features_1m(full_df)
668
 
 
678
 
679
  future_predictions = model.predict(X_future)
680
  future_df['Modal Price (Rs./Quintal)'] = future_predictions
 
 
681
  plot_data(original_df, future_df, last_date, 30)
682
  download_button(future_df, key)
683
 
 
685
  last_date = df['Reported Date'].max()
686
  future_dates = pd.date_range(start=last_date + pd.Timedelta(days=1), periods=90)
687
  future_df = pd.DataFrame({'Reported Date': future_dates})
 
 
688
  full_df = pd.concat([df, future_df], ignore_index=True)
689
  full_df = create_forecasting_features_3m(full_df)
690
 
 
700
 
701
  future_predictions = model.predict(X_future)
702
  future_df['Modal Price (Rs./Quintal)'] = future_predictions
 
 
703
  plot_data(original_df, future_df, last_date, 90)
704
  download_button(future_df, key)
705
 
706
  def plot_data(original_df, future_df, last_date, days):
 
707
  actual_df = original_df[original_df['Reported Date'] >= (last_date - pd.Timedelta(days=days))].copy()
708
  actual_df['Type'] = 'Actual'
709
+
 
710
  future_plot_df = future_df[['Reported Date', 'Modal Price (Rs./Quintal)']].copy()
711
  future_plot_df['Type'] = 'Forecasted'
 
 
 
712
  last_actual_point = actual_df.sort_values('Reported Date').iloc[[-1]].copy()
713
  future_plot_df = pd.concat([last_actual_point, future_plot_df])
714
+
 
715
  plot_df = pd.concat([actual_df, future_plot_df])
 
 
716
  fig = go.Figure()
717
  for plot_type, color, dash in [('Actual', 'blue', 'solid'), ('Forecasted', 'red', 'dash')]:
718
  data = plot_df[plot_df['Type'] == plot_type]
 
735
 
736
 
737
  def download_button(future_df, key):
 
738
  download_df = future_df[['Reported Date', 'Modal Price (Rs./Quintal)']].copy()
 
 
739
  download_df['Reported Date'] = download_df['Reported Date'].dt.strftime('%Y-%m-%d')
 
 
740
  towrite = io.BytesIO()
741
+ download_df.to_excel(towrite, index=False, engine='xlsxwriter')
742
  towrite.seek(0)
 
 
743
  st.download_button(label="Download Forecasted Values",
744
  data=towrite,
745
  file_name=f'forecasted_prices_{key}.xlsx',
 
772
  collection.replace_one({"filter_key": filter_key}, best_params)
773
  else:
774
  collection.insert_one(best_params)
775
+
 
776
  def get_best_params(filter_key, collection):
777
  record = collection.find_one({"filter_key": filter_key})
778
  return record
779
+
780
  def train_and_forecast(df, filter_key, days):
781
  if df is not None:
 
782
  if days==14:
783
  best_params = train_and_evaluate(df)
784
  save_best_params(filter_key, best_params, best_params_collection)
 
791
  best_params = train_and_evaluate_3m(df)
792
  save_best_params(filter_key, best_params, best_params_collection_3m)
793
  forecast_next_90_days(df, best_params, filter_key)
794
+ failed_dates_data = []
795
+ failed_dates_market = []
796
 
797
  def forecast(df, filter_key, days):
798
  if days==14:
 
828
  Returns:
829
  pd.DataFrame: DataFrame containing the collection data.
830
  """
 
831
  documents = list(collection.find())
832
 
 
833
  df = pd.DataFrame(documents)
 
 
834
  if drop_id and '_id' in df.columns:
835
  df = df.drop(columns=['_id'])
836
 
 
840
 
841
  def editable_spreadsheet():
842
  st.title("Sowing Report Prediction Model")
 
 
843
  uploaded_file = st.file_uploader("Upload your Excel file", type=['xlsx'])
844
+
 
845
  if uploaded_file is not None:
 
846
  df_excel = pd.read_excel(uploaded_file)
 
 
847
  st.write("Excel data loaded:", df_excel)
848
 
 
849
  with st.form("input_form"):
850
  input_region = st.text_input("Enter Region to Filter By", placeholder="Region Name")
851
  input_season = st.text_input("Enter Season to Filter By", placeholder="Season (e.g., Winter)")
 
854
 
855
  if submit_button:
856
  if input_region and input_season and input_area > 0:
 
857
  filtered_df = df_excel[
858
  (df_excel['Region'].str.lower() == input_region.lower()) &
859
  (df_excel['Season'].str.lower() == input_season.lower())
 
904
  }
905
  </style>
906
  """, unsafe_allow_html=True)
 
 
907
  df['Reported Date'] = pd.to_datetime(df['Reported Date'])
908
  national_data = df.groupby('Reported Date').agg({
909
  'Modal Price (Rs./Quintal)': 'mean',
 
913
  st.subheader("🗓️ Key Statistics")
914
  latest_date = national_data['Reported Date'].max()
915
  latest_price = national_data[national_data['Reported Date'] == latest_date]['Modal Price (Rs./Quintal)'].mean()
916
+ national_data['Arrivals (Tonnes)'] = pd.to_numeric(national_data['Arrivals (Tonnes)'], errors='coerce')
917
  latest_arrivals = national_data[national_data['Reported Date'] == latest_date]['Arrivals (Tonnes)'].sum()
918
 
919
  st.markdown("<p class='highlight'>This section provides the most recent statistics for the market. It includes the latest available date, the average price of commodities, and the total quantity of goods arriving at the market. These metrics offer an up-to-date snapshot of market conditions.</p>", unsafe_allow_html=True)
920
  st.write(f"**Latest Date**: {latest_date.strftime('%Y-%m-%d')}")
921
  st.write(f"**Latest Modal Price**: {latest_price:.2f} Rs./Quintal")
922
+ st.write(f"**Latest Arrivals**: {float(latest_arrivals):.2f} Tonnes")
923
 
924
  st.subheader("📆 This Day in Previous Years")
925
  st.markdown("<p class='highlight'>This table shows the modal price and total arrivals for this exact day across previous years. It provides a historical perspective to compare against current market conditions. This section examines historical data for the same day in previous years. By analyzing trends for this specific day, you can identify seasonal patterns, supply-demand changes, or any deviations that might warrant closer attention.</p>", unsafe_allow_html=True)
 
988
  editable_spreadsheet()
989
 
990
 
991
+ def parse_table_with_rowspan(table):
992
+ data = []
993
+ rowspan_map = {}
994
+
995
+ rows = table.find_all("tr")
996
+ for row_index, tr in enumerate(rows):
997
+ cells = tr.find_all(["td", "th"])
998
+ row_data = []
999
+ col_index = 0
1000
+ cell_index = 0
1001
+
1002
+ while col_index < len(cells) or cell_index in rowspan_map:
1003
+ if cell_index in rowspan_map:
1004
+ cell_info = rowspan_map[cell_index]
1005
+ row_data.append(cell_info["value"])
1006
+ cell_info["rows_left"] -= 1
1007
+ if cell_info["rows_left"] == 0:
1008
+ del rowspan_map[cell_index]
1009
+ cell_index += 1
1010
+ elif col_index < len(cells):
1011
+ cell = cells[col_index]
1012
+ value = cell.get_text(strip=True)
1013
+ rowspan = int(cell.get("rowspan", 1))
1014
+
1015
+ row_data.append(value)
1016
+
1017
+ if rowspan > 1:
1018
+ rowspan_map[cell_index] = {"value": value, "rows_left": rowspan - 1}
1019
+
1020
+ col_index += 1
1021
+ cell_index += 1
1022
+
1023
+ data.append(row_data)
1024
+
1025
+ return data
1026
+
1027
 
1028
  def fetch_and_store_data():
1029
+ SCRAPER_API_KEY = "8842750a88db7513a1d19325745437cc"
1030
  latest_doc = collection.find_one(sort=[("Reported Date", -1)])
1031
+ from_date = (latest_doc["Reported Date"] + timedelta(days=1)) if latest_doc else datetime(2019, 1, 1)
1032
+ to_date = datetime.now() - timedelta(days=1)
1033
+
1034
+ print(f"📦 Modal Data → From: {from_date.strftime('%d-%b-%Y')} To: {to_date.strftime('%d-%b-%Y')}")
1035
+
1036
+ current = from_date.replace(day=1)
1037
+ while current <= to_date:
1038
+ start_of_range = max(current, from_date)
1039
+ end_of_range = (current.replace(day=28) + timedelta(days=4)).replace(day=1) - timedelta(days=1)
1040
+ if end_of_range > to_date:
1041
+ end_of_range = to_date
1042
+
1043
+ date_from_str = start_of_range.strftime('%d-%b-%Y')
1044
+ date_to_str = end_of_range.strftime('%d-%b-%Y')
1045
+
1046
+ print(f"\n📅 Fetching data from {date_from_str} to {date_to_str}")
1047
+
1048
+ target_url = (
1049
+ "https://agmarknet.gov.in/SearchCmmMkt.aspx"
1050
+ f"?Tx_Commodity=11&Tx_State=0&Tx_District=0&Tx_Market=0"
1051
+ f"&DateFrom={date_from_str}&DateTo={date_to_str}"
1052
+ f"&Fr_Date={date_from_str}&To_Date={date_to_str}"
1053
+ "&Tx_Trend=2"
1054
+ "&Tx_CommodityHead=Sesamum(Sesame,Gingelly,Til)"
1055
+ "&Tx_StateHead=--Select--"
1056
+ "&Tx_DistrictHead=--Select--"
1057
+ "&Tx_MarketHead=--Select--"
1058
+ )
1059
 
1060
+ payload = {
1061
+ "api_key": SCRAPER_API_KEY,
1062
+ "url": target_url
1063
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1064
 
1065
+ try:
1066
+ response = requests.get("https://api.scraperapi.com/", params=payload)
1067
+ soup = BeautifulSoup(response.text, "html.parser")
1068
+ table = soup.find("table", {"class": "tableagmark_new"})
1069
+
1070
+ if not table or not table.find_all("tr"):
1071
+ print("❌ No table found.")
1072
+ current = (current + timedelta(days=32)).replace(day=1)
1073
+ continue
1074
+
1075
+ all_rows = parse_table_with_rowspan(table)
1076
+ headers = all_rows[0]
1077
+ rows = all_rows[1:]
1078
+
1079
+ df_raw = pd.DataFrame(rows, columns=headers)
1080
+ print(f"🔍 Raw rows fetched: {len(df_raw)}")
1081
+
1082
+ # Clean invalid state/district/market names
1083
+ required_columns = ["State Name", "District Name", "Market Name"]
1084
+ if all(col in df_raw.columns for col in required_columns):
1085
+ df_raw = df_raw[
1086
+ (df_raw["State Name"].str.strip() != "-") &
1087
+ (df_raw["District Name"].str.strip() != "-") &
1088
+ (df_raw["Market Name"].str.strip() != "-")
1089
+ ]
1090
+ print(f"✅ Rows after filtering: {len(df_raw)}")
1091
+ else:
1092
+ print("⚠️ One or more expected columns are missing. Skipping filter.")
1093
+
1094
+ # Filter by variety + grade
1095
+ df_raw = df_raw[
1096
+ (df_raw["Variety"].str.strip().str.lower() == "white") &
1097
+ (df_raw["Grade"].str.strip().str.upper() == "FAQ")
1098
+ ]
1099
+ print(f"✅ Filtered rows with 'White' variety and 'FAQ' grade: {len(df_raw)}")
1100
+
1101
+ # Parse and clean dates
1102
+ df_raw["Reported Date Parsed"] = pd.to_datetime(
1103
+ df_raw["Reported Date"].str.strip(), format='%d %b %Y', errors='coerce'
1104
+ )
1105
+ df_raw = df_raw[df_raw["Reported Date Parsed"].notna()].copy()
1106
+ df_raw["Reported Date"] = df_raw["Reported Date Parsed"]
1107
+ df_raw.drop(columns=["Reported Date Parsed"], inplace=True)
1108
+
1109
+ # Type conversions
1110
+ df_raw["Modal Price (Rs./Quintal)"] = pd.to_numeric(
1111
+ df_raw["Modal Price (Rs./Quintal)"], errors='coerce'
1112
+ ).round().astype("Int64")
1113
+ df_raw["Arrivals (Tonnes)"] = pd.to_numeric(
1114
+ df_raw["Arrivals (Tonnes)"], errors='coerce'
1115
+ ).astype("float64")
1116
+ df_raw["State Name"] = df_raw["State Name"].astype("string")
1117
+ df_raw["Market Name"] = df_raw["Market Name"].astype("string")
1118
+
1119
+ # Write cleaned CSV
1120
+ raw_csv_filename = f"clean_raw_modal_data_{start_of_range.strftime('%b_%Y')}.csv"
1121
+ df_raw.to_csv(raw_csv_filename, index=False)
1122
+ print(f"📄 Cleaned raw data CSV written to: {raw_csv_filename}")
1123
+
1124
+ # Insert to DB
1125
+ records = df_raw.to_dict(orient="records")
1126
  if records:
1127
  collection.insert_many(records)
1128
+ print(f"Inserted {len(records)} records for {current.strftime('%b %Y')}")
1129
  else:
1130
+ print("⚠️ No valid records after final filtering.")
1131
 
1132
+ except Exception as e:
1133
+ print(f"🔥 Exception during {current.strftime('%b %Y')} fetch: {e}")
1134
 
1135
+ current = (current + timedelta(days=32)).replace(day=1)
 
 
1136
 
 
1137
  def fetch_and_store_data_market():
1138
+ SCRAPER_API_KEY = "8842750a88db7513a1d19325745437cc"
1139
  latest_doc = market_price_data.find_one(sort=[("Reported Date", -1)])
1140
+ from_date = (latest_doc["Reported Date"] + timedelta(days=1)) if latest_doc else datetime(2019, 1, 1)
1141
+ to_date = datetime.now() - timedelta(days=1)
1142
+
1143
+ print(f"📦 Market Data → From: {from_date.strftime('%d-%b-%Y')} To: {to_date.strftime('%d-%b-%Y')}")
1144
+
1145
+ current = from_date.replace(day=1)
1146
+ while current <= to_date:
1147
+ start_of_range = max(current, from_date)
1148
+ end_of_range = (current.replace(day=28) + timedelta(days=4)).replace(day=1) - timedelta(days=1)
1149
+ if end_of_range > to_date:
1150
+ end_of_range = to_date
1151
+
1152
+ date_from_str = start_of_range.strftime('%d-%b-%Y')
1153
+ date_to_str = end_of_range.strftime('%d-%b-%Y')
1154
+
1155
+ print(f"\n📅 Fetching data from {date_from_str} to {date_to_str}")
1156
+
1157
+ target_url = (
1158
+ "https://agmarknet.gov.in/SearchCmmMkt.aspx"
1159
+ f"?Tx_Commodity=11&Tx_State=0&Tx_District=0&Tx_Market=0"
1160
+ f"&DateFrom={date_from_str}&DateTo={date_to_str}"
1161
+ f"&Fr_Date={date_from_str}&To_Date={date_to_str}"
1162
+ "&Tx_Trend=0"
1163
+ "&Tx_CommodityHead=Sesamum(Sesame,Gingelly,Til)"
1164
+ "&Tx_StateHead=--Select--"
1165
+ "&Tx_DistrictHead=--Select--"
1166
+ "&Tx_MarketHead=--Select--"
1167
+ )
1168
 
1169
+ payload = {
1170
+ "api_key": SCRAPER_API_KEY,
1171
+ "url": target_url
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1172
  }
 
 
 
 
 
 
 
 
1173
 
1174
+ try:
1175
+ response = requests.get("https://api.scraperapi.com/", params=payload)
1176
+ soup = BeautifulSoup(response.text, "html.parser")
1177
+ table = soup.find("table", {"class": "tableagmark_new"})
1178
+
1179
+ if not table or not table.find_all("tr"):
1180
+ print("❌ No table found.")
1181
+ current = (current + timedelta(days=32)).replace(day=1)
1182
+ continue
1183
+
1184
+ all_rows = parse_table_with_rowspan(table)
1185
+ headers = all_rows[0]
1186
+ rows = all_rows[1:]
1187
+
1188
+ # Filter out irrelevant columns based on available data
1189
+ required_columns = ["Sl no.", "District Name", "Market Name", "Commodity", "Variety", "Grade", "Min Price (Rs./Quintal)", "Max Price (Rs./Quintal)", "Modal Price (Rs./Quintal)", "Price Date"]
1190
+ df_raw = pd.DataFrame(rows, columns=headers)
1191
+
1192
+ # Remove rows with invalid or missing location data
1193
+ df_raw = df_raw[
1194
+ (df_raw["District Name"].str.strip() != "-") &
1195
+ (df_raw["Market Name"].str.strip() != "-")
1196
+ ]
1197
+ print(f"✅ Rows after filtering invalid locations: {len(df_raw)}")
1198
+
1199
+ # ✅ Filter for variety and grade
1200
+ df_raw = df_raw[
1201
+ (df_raw["Variety"].str.strip().str.lower() == "white") &
1202
+ (df_raw["Grade"].str.strip().str.upper() == "FAQ")
1203
+ ]
1204
+ print(f"✅ Filtered rows with 'White' variety and 'FAQ' grade: {len(df_raw)}")
1205
+
1206
+ # ✅ Parse 'Price Date' as 'Reported Date'
1207
+ df_raw["Reported Date Parsed"] = pd.to_datetime(
1208
+ df_raw["Price Date"].str.strip(), format='%d %b %Y', errors='coerce'
1209
+ )
1210
+ df_raw = df_raw[df_raw["Reported Date Parsed"].notna()].copy()
1211
+ df_raw["Reported Date"] = df_raw["Reported Date Parsed"]
1212
+ df_raw.drop(columns=["Reported Date Parsed"], inplace=True)
1213
+
1214
+ # ✅ Type conversions
1215
+ df_raw["Modal Price (Rs./Quintal)"] = pd.to_numeric(
1216
+ df_raw["Modal Price (Rs./Quintal)"], errors='coerce'
1217
+ ).round().astype("Int64")
1218
+ df_raw["Min Price (Rs./Quintal)"] = pd.to_numeric(
1219
+ df_raw["Min Price (Rs./Quintal)"], errors='coerce'
1220
+ ).round().astype("Int64")
1221
+ df_raw["Max Price (Rs./Quintal)"] = pd.to_numeric(
1222
+ df_raw["Max Price (Rs./Quintal)"], errors='coerce'
1223
+ ).round().astype("Int64")
1224
+ df_raw["District Name"] = df_raw["District Name"].astype("string")
1225
+ df_raw["Market Name"] = df_raw["Market Name"].astype("string")
1226
+
1227
+ # ✅ Save CSV for audit
1228
+ raw_csv_filename = f"clean_raw_market_data_{start_of_range.strftime('%b_%Y')}.csv"
1229
+ df_raw.to_csv(raw_csv_filename, index=False)
1230
+ print(f"📄 CSV saved: {raw_csv_filename}")
1231
+
1232
+ # ✅ Insert into MongoDB
1233
+ records = df_raw.to_dict(orient="records")
1234
  if records:
1235
  market_price_data.insert_many(records)
1236
+ print(f"Inserted {len(records)} records for {current.strftime('%b %Y')}")
1237
  else:
1238
+ print("⚠️ No valid records after final filtering.")
 
 
 
1239
 
1240
+ except Exception as e:
1241
+ print(f"🔥 Exception during {current.strftime('%b %Y')} fetch: {e}")
1242
+
1243
+ current = (current + timedelta(days=32)).replace(day=1)
1244
 
1245
 
1246
 
 
1333
  if 'authenticated' not in st.session_state:
1334
  st.session_state.authenticated = False
1335
 
1336
+ if st.session_state.get("authenticated", False):
1337
  st.title("🌾 AgriPredict Dashboard")
1338
+
1339
  if st.button("Get Live Data Feed"):
1340
+ st.write("🔄 Fetching fresh data from Modal + Agmarknet...")
1341
  fetch_and_store_data()
1342
  fetch_and_store_data_market()
1343
+
1344
  view_mode = st.radio("", ["Statistics", "Plots", "Predictions", "Exim"], horizontal=True)
1345
 
1346
  if view_mode == "Plots":
1347
  st.sidebar.header("Filters")
1348
+
1349
  selected_period = st.sidebar.selectbox(
1350
  "Select Time Period",
1351
+ ["2 Weeks", "1 Month", "3 Months", "1 Year", "5 Years"],
1352
  index=1
1353
  )
1354
  period_mapping = {
1355
  "2 Weeks": 14,
1356
  "1 Month": 30,
 
1357
  "3 Months": 90,
 
1358
  "1 Year": 365,
1359
  "2 Years": 730,
1360
  "5 Years": 1825
1361
  }
1362
+ st.session_state["selected_period"] = period_mapping[selected_period]
1363
+
 
1364
  state_options = list(state_market_dict.keys()) + ['India']
1365
+ selected_state = st.sidebar.selectbox("Select State", state_options)
1366
+
1367
  market_wise = False
1368
+ query_filter = {}
1369
+
1370
  if selected_state != 'India':
1371
  market_wise = st.sidebar.checkbox("Market Wise Analysis")
1372
  if market_wise:
1373
  markets = state_market_dict.get(selected_state, [])
1374
+ st.write(f"✅ Available markets for {selected_state}: {markets}")
1375
  selected_market = st.sidebar.selectbox("Select Market", markets)
1376
  query_filter = {"Market Name": selected_market}
1377
  else:
1378
+ query_filter = {"State Name": selected_state}
1379
  else:
1380
+ query_filter = {"State Name": {"$exists": True}}
1381
+
 
 
 
 
 
 
 
1382
  query_filter["Reported Date"] = {
1383
+ "$gte": datetime.now() - timedelta(days=st.session_state["selected_period"])
1384
  }
1385
+
1386
+ data_type = st.sidebar.radio("Select Data Type", ["Price", "Volume", "Both"])
1387
+
1388
+ st.write(f"🧪 Final Mongo Query Filter: `{query_filter}`")
1389
+
1390
  if st.sidebar.button("✨ Let's go!"):
1391
  try:
1392
+ df_market_grouped = pd.DataFrame()
1393
+ df_grouped = pd.DataFrame()
1394
+
1395
+ # MARKET-WISE
1396
  if "Market Name" in query_filter:
1397
+ st.info("📊 Market-level data mode enabled")
1398
  market_cursor = market_price_data.find(query_filter)
1399
  market_data = list(market_cursor)
1400
+ st.write(f"📄 Market rows fetched: {len(market_data)}")
1401
+
1402
+ if market_data:
1403
+ df_market = pd.DataFrame(market_data)
1404
+ df_market['Reported Date'] = pd.to_datetime(df_market['Reported Date'], errors='coerce')
1405
+ df_market["Modal Price (Rs./Quintal)"] = pd.to_numeric(df_market["Modal Price (Rs./Quintal)"], errors='coerce')
1406
+ df_market_grouped = df_market.groupby('Reported Date', as_index=False).agg({
1407
+ 'Modal Price (Rs./Quintal)': 'mean'
1408
+ }).dropna()
1409
+ date_range = pd.date_range(df_market_grouped['Reported Date'].min(), df_market_grouped['Reported Date'].max())
1410
+ df_market_grouped = df_market_grouped.set_index('Reported Date').reindex(date_range).rename_axis('Reported Date').reset_index()
1411
+ df_market_grouped['Modal Price (Rs./Quintal)'] = df_market_grouped['Modal Price (Rs./Quintal)'].fillna(method='ffill').fillna(method='bfill')
1412
+
1413
+ # STATE/NATIONAL-WISE
1414
+ st.info("📥 Fetching state-level or national data...")
1415
  cursor = collection.find(query_filter)
1416
  data = list(cursor)
1417
+ st.write(f"📄 Total rows fetched from collection: {len(data)}")
1418
+
1419
  if data:
 
1420
  df = pd.DataFrame(data)
1421
+ df['Reported Date'] = pd.to_datetime(df['Reported Date'], errors='coerce')
1422
+ df['Arrivals (Tonnes)'] = pd.to_numeric(df['Arrivals (Tonnes)'], errors='coerce')
1423
+ df['Modal Price (Rs./Quintal)'] = pd.to_numeric(df['Modal Price (Rs./Quintal)'], errors='coerce')
1424
+
1425
+ df_grouped = df.groupby('Reported Date', as_index=False).agg({
1426
+ 'Arrivals (Tonnes)': 'sum',
1427
+ 'Modal Price (Rs./Quintal)': 'mean'
1428
+ }).dropna()
1429
+
1430
+ date_range = pd.date_range(df_grouped['Reported Date'].min(), df_grouped['Reported Date'].max())
 
 
 
 
 
 
 
 
 
 
1431
  df_grouped = df_grouped.set_index('Reported Date').reindex(date_range).rename_axis('Reported Date').reset_index()
 
 
1432
  df_grouped['Arrivals (Tonnes)'] = df_grouped['Arrivals (Tonnes)'].fillna(method='ffill').fillna(method='bfill')
1433
  df_grouped['Modal Price (Rs./Quintal)'] = df_grouped['Modal Price (Rs./Quintal)'].fillna(method='ffill').fillna(method='bfill')
1434
+
1435
+ st.subheader(f"📈 Trends for {selected_state} ({'Market: ' + selected_market if market_wise else 'State-wide'})")
1436
+
1437
+ fig = go.Figure()
1438
+
1439
  if data_type == "Both":
 
1440
  scaler = MinMaxScaler()
1441
  df_grouped[['Scaled Price', 'Scaled Arrivals']] = scaler.fit_transform(
1442
  df_grouped[['Modal Price (Rs./Quintal)', 'Arrivals (Tonnes)']]
1443
  )
1444
+
1445
+ fig.add_trace(go.Scatter(
1446
+ x=df_grouped['Reported Date'],
1447
+ y=df_grouped['Scaled Price'],
1448
+ mode='lines',
1449
+ name='Scaled Modal Price',
1450
+ line=dict(color='green'),
1451
+ ))
1452
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1453
  fig.add_trace(go.Scatter(
1454
  x=df_grouped['Reported Date'],
1455
  y=df_grouped['Scaled Arrivals'],
1456
  mode='lines',
1457
  name='Scaled Arrivals',
1458
+ line=dict(color='blue'),
 
 
1459
  ))
1460
+
 
 
 
 
 
 
 
 
1461
  elif data_type == "Price":
1462
+ price_df = df_market_grouped if not df_market_grouped.empty else df_grouped
1463
+ fig.add_trace(go.Scatter(
1464
+ x=price_df['Reported Date'],
1465
+ y=price_df["Modal Price (Rs./Quintal)"],
1466
+ mode='lines',
1467
+ name='Modal Price',
1468
+ line=dict(color='green'),
1469
+ ))
1470
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1471
  elif data_type == "Volume":
 
 
1472
  fig.add_trace(go.Scatter(
1473
  x=df_grouped['Reported Date'],
1474
  y=df_grouped['Arrivals (Tonnes)'],
1475
  mode='lines',
1476
  name='Arrivals',
1477
+ line=dict(color='blue'),
1478
  ))
1479
+
1480
+ fig.update_layout(
1481
+ title="📊 Agricultural Trends",
1482
+ xaxis_title="Date",
1483
+ yaxis_title="Value (Scaled if Both)",
1484
+ template="plotly_white"
1485
+ )
1486
+ st.plotly_chart(fig, use_container_width=True)
1487
+
1488
  else:
1489
+ st.warning("⚠️ No data found for the selected filter range and region.")
1490
+
1491
  except Exception as e:
1492
  st.error(f"❌ Error fetching data 2: {e}")
1493
+ st.exception(e)
1494
+
1495
  elif view_mode == "Predictions":
1496
  st.subheader("📊 Model Analysis")
1497
  sub_option = st.radio("Select one of the following", ["India", "States", "Market"], horizontal=True)
 
1499
  if sub_option == "States":
1500
  states = ["Karnataka", "Madhya Pradesh", "Gujarat", "Uttar Pradesh", "Telangana"]
1501
  selected_state = st.selectbox("Select State for Model Training", states)
1502
+ filter_key = f"state_{selected_state}"
1503
 
1504
  if st.button("Forecast"):
1505
+ query_filter = {"State Name": selected_state}
1506
  df = fetch_and_process_data(query_filter, collection)
1507
  if sub_timeline == "14 days":
1508
  forecast(df, filter_key, 14)
 
1513
  elif sub_option == "Market":
1514
  market_options = ["Rajkot", "Neemuch", "Kalburgi", "Warangal"]
1515
  selected_market = st.selectbox("Select Market for Model Training", market_options)
1516
+ filter_key = f"market_{selected_market}"
1517
  if st.button("Forecast"):
1518
  query_filter = {"Market Name": selected_market}
1519
  comparison_date = pd.to_datetime("18 Feb 2025")
1520
  df = fetch_and_process_data(query_filter, market_price_data)
1521
+ st.write(df[df["Reported Date"]>comparison_date])
1522
  if sub_timeline == "14 days":
1523
  forecast(df, filter_key, 14)
1524
  elif sub_timeline == "1 month":
 
1545
  display_statistics(df)
1546
  elif view_mode == "Exim":
1547
  df = collection_to_dataframe(impExp)
1548
+
 
1549
  plot_option = st.radio(
1550
  "Select the data to visualize:",
1551
  ["Import Price", "Import Quantity", "Export Price", "Export Quantity"],
1552
  horizontal=True
1553
  )
1554
+
 
1555
  time_period = st.selectbox(
1556
  "Select time period:",
1557
  ["1 Month", "6 Months", "1 Year", "2 Years"]
1558
  )
1559
 
 
1560
  df["Reported Date"] = pd.to_datetime(df["Reported Date"], format="%Y-%m-%d")
 
 
1561
  if time_period == "1 Month":
1562
  start_date = pd.Timestamp.now() - pd.DateOffset(months=1)
1563
  elif time_period == "6 Months":
 
1568
  start_date = pd.Timestamp.now() - pd.DateOffset(years=2)
1569
 
1570
  filtered_df = df[df["Reported Date"] >= start_date]
 
 
1571
  if plot_option == "Import Price":
1572
  grouped_df = (
1573
  filtered_df.groupby("Reported Date", as_index=False)["VALUE_IMPORT"]
 
1596
  .rename(columns={"QUANTITY_IMPORT": "Total Export Quantity"})
1597
  )
1598
  y_axis_label = "Total Export Quantity (Tonnes)"
1599
+
 
1600
  fig = px.line(
1601
  grouped_df,
1602
  x="Reported Date",
1603
+ y=grouped_df.columns[1],
1604
  title=f"{plot_option} Over Time",
1605
  labels={"Reported Date": "Date", grouped_df.columns[1]: y_axis_label},
1606
  )
 
1617
 
1618
  if login_button:
1619
  if authenticate_user(username, password):
1620
+ st.session_state.authenticated = True
1621
+ st.session_state['username'] = username
1622
  st.write("Login successful!")
1623
+ st.rerun()
1624
  else:
1625
  st.error("Invalid username or password")