Spaces:

alaiyGlobalTech
/

agripredict

Sleeping

App Files Files Community

Alaiy commited on Jun 24, 2025

Commit

662dc00

verified ·

1 Parent(s): 270fa01

Update app.py

Browse files

Files changed (1) hide show

app.py +364 -393

app.py CHANGED Viewed

@@ -20,13 +20,21 @@ from itertools import product
 from tqdm import tqdm
 import io
 from statsmodels.tsa.statespace.sarimax import SARIMAX
 mongo_uri = "mongodb+srv://Agripredict:TjXSvMhOis49qH8E@cluster0.gek7n.mongodb.net/?retryWrites=true&w=majority&appName=Cluster0"
 if not mongo_uri:
     st.error("MongoDB URI is not set!")
     st.stop()
 else:
-    # Connect to MongoDB with SSL certificate validation
     client = MongoClient(mongo_uri, tlsCAFile=certifi.where())
     db = client["AgriPredict"]
     collection = db["WhiteSesame"]
@@ -291,39 +299,27 @@ def create_forecasting_features_3m(df):
 def preprocess_data(df):
-    # Retain only 'Reported Date' and 'Modal Price (Rs./Quintal)' columns
     df = df[['Reported Date', 'Modal Price (Rs./Quintal)']]
-    # Ensure 'Reported Date' is in datetime format
     df['Reported Date'] = pd.to_datetime(df['Reported Date'])
-    # Group by 'Reported Date' and calculate mean of 'Modal Price (Rs./Quintal)'
     df = df.groupby('Reported Date', as_index=False).mean()
-    # Generate a full date range from the minimum to the maximum date
     full_date_range = pd.date_range(df['Reported Date'].min(), df['Reported Date'].max())
     df = df.set_index('Reported Date').reindex(full_date_range).rename_axis('Reported Date').reset_index()
-    # Detect and remove outliers for every 30 days
     df['Modal Price (Rs./Quintal)'] = (
         df['Modal Price (Rs./Quintal)'].fillna(method='ffill').fillna(method='bfill')
     )
     return df
 def train_and_evaluate(df):
     import streamlit as st
-    # Add progress bar for hyperparameter tuning
     progress_bar = st.progress(0)
-    # Helper function to update progress during hyperparameter tuning
     def update_tuning_progress(current, total):
         progress = int((current / total) * 100)
         progress_bar.progress(progress)
     df = create_forecasting_features(df)
-    # Split the data into training and testing sets
     train_df = df[df['Reported Date'] < '2024-01-01']
     test_df = df[df['Reported Date'] >= '2024-01-01']
@@ -331,8 +327,6 @@ def train_and_evaluate(df):
     y_train = train_df['Modal Price (Rs./Quintal)']
     X_test = test_df.drop(columns=['Modal Price (Rs./Quintal)', 'Reported Date'])
     y_test = test_df['Modal Price (Rs./Quintal)']
-    # Hyperparameter tuning
     st.write("Performing hyperparameter tuning...")
     param_grid = {
         'learning_rate': [0.01, 0.1, 0.2],
@@ -345,7 +339,7 @@ def train_and_evaluate(df):
     param_combinations = len(param_grid['learning_rate']) * len(param_grid['max_depth']) * \
                          len(param_grid['n_estimators']) * len(param_grid['booster'])
-    current_combination = 0  # Counter for combinations
     def custom_grid_search():
         nonlocal current_combination
@@ -371,26 +365,22 @@ def train_and_evaluate(df):
                                 'n_estimators': n_estimators,
                                 'booster': booster
                             }
-                        # Update progress bar
                         current_combination += 1
                         update_tuning_progress(current_combination, param_combinations)
         return best_params
     best_params = custom_grid_search()
-    # Train the best model with the identified parameters
     st.write("Training the best model and making predictions...")
     best_model = XGBRegressor(**best_params)
     best_model.fit(X_train, y_train)
     y_pred = best_model.predict(X_test)
-    # Metrics
     rmse = mean_squared_error(y_test, y_pred, squared=False)
     mae = mean_absolute_error(y_test, y_pred)
     st.write(f"RMSE: {rmse}")
     st.write(f"MAE: {mae}")
-    # Prepare data for plotting
     train_plot_df = train_df[['Reported Date', 'Modal Price (Rs./Quintal)']].copy()
     train_plot_df['Type'] = 'Train'
@@ -425,7 +415,6 @@ def train_and_evaluate(df):
     st.plotly_chart(fig, use_container_width=True)
-    # Return best parameters
     return best_params
 def train_and_evaluate_1m(df):
@@ -435,19 +424,16 @@ def train_and_evaluate_1m(df):
     from xgboost import XGBRegressor
     from sklearn.metrics import mean_squared_error, mean_absolute_error
-    # Add progress bar for hyperparameter tuning
     progress_bar = st.progress(0)
-    # Helper function to update progress during hyperparameter tuning
     def update_tuning_progress(current, total):
         progress = int((current / total) * 100)
         progress_bar.progress(progress)
     df = create_forecasting_features_1m(df)
-    # Define train-test split for a 1-month horizon
     split_date = pd.to_datetime("2024-01-01")
-    test_horizon = pd.DateOffset(days=30)  # 1-month horizon
     train_df = df[df['Reported Date'] < split_date]
     test_df = df[(df['Reported Date'] >= split_date) & (df['Reported Date'] < split_date + test_horizon)]
@@ -457,7 +443,6 @@ def train_and_evaluate_1m(df):
     X_test = test_df.drop(columns=['Modal Price (Rs./Quintal)', 'Reported Date'])
     y_test = test_df['Modal Price (Rs./Quintal)']
-    # Hyperparameter tuning
     st.write("Performing hyperparameter tuning...")
     param_grid = {
         'learning_rate': [0.01, 0.1, 0.2],
@@ -470,7 +455,7 @@ def train_and_evaluate_1m(df):
     param_combinations = len(param_grid['learning_rate']) * len(param_grid['max_depth']) * \
                          len(param_grid['n_estimators']) * len(param_grid['booster'])
-    current_combination = 0  # Counter for combinations
     def custom_grid_search():
         nonlocal current_combination
@@ -496,26 +481,21 @@ def train_and_evaluate_1m(df):
                                 'n_estimators': n_estimators,
                                 'booster': booster
                             }
-                        # Update progress bar
                         current_combination += 1
                         update_tuning_progress(current_combination, param_combinations)
         return best_params
     best_params = custom_grid_search()
-    # Train the best model with the identified parameters
     st.write("Training the best model and making predictions...")
     best_model = XGBRegressor(**best_params)
     best_model.fit(X_train, y_train)
     y_pred = best_model.predict(X_test)
-    # Metrics
     rmse = mean_squared_error(y_test, y_pred, squared=False)
     mae = mean_absolute_error(y_test, y_pred)
     st.write(f"RMSE: {rmse}")
     st.write(f"MAE: {mae}")
-    # Prepare data for plotting
     train_plot_df = train_df[['Reported Date', 'Modal Price (Rs./Quintal)']].copy()
     train_plot_df['Type'] = 'Train'
@@ -550,16 +530,11 @@ def train_and_evaluate_1m(df):
     st.plotly_chart(fig, use_container_width=True)
-    # Return best parameters
     return best_params
 def train_and_evaluate_3m(df):
     import streamlit as st
-    # Add progress bar for hyperparameter tuning
     progress_bar = st.progress(0)
-    # Helper function to update progress during hyperparameter tuning
     def update_tuning_progress(current, total):
         progress = int((current / total) * 100)
         progress_bar.progress(progress)
@@ -573,7 +548,6 @@ def train_and_evaluate_3m(df):
     X_test = test_df.drop(columns=['Modal Price (Rs./Quintal)', 'Reported Date'])
     y_test = test_df['Modal Price (Rs./Quintal)']
-    # Hyperparameter tuning
     st.write("Performing hyperparameter tuning...")
     param_grid = {
         'learning_rate': [0.01, 0.1, 0.2],
@@ -586,7 +560,7 @@ def train_and_evaluate_3m(df):
     param_combinations = len(param_grid['learning_rate']) * len(param_grid['max_depth']) * \
                          len(param_grid['n_estimators']) * len(param_grid['booster'])
-    current_combination = 0  # Counter for combinations
     def custom_grid_search():
         nonlocal current_combination
@@ -612,26 +586,21 @@ def train_and_evaluate_3m(df):
                                 'n_estimators': n_estimators,
                                 'booster': booster
                             }
-                        # Update progress bar
                         current_combination += 1
                         update_tuning_progress(current_combination, param_combinations)
         return best_params
     best_params = custom_grid_search()
-    # Train the best model with the identified parameters
     st.write("Training the best model and making predictions...")
     best_model = XGBRegressor(**best_params)
     best_model.fit(X_train, y_train)
     y_pred = best_model.predict(X_test)
-    # Metrics
     rmse = mean_squared_error(y_test, y_pred, squared=False)
     mae = mean_absolute_error(y_test, y_pred)
     st.write(f"RMSE: {rmse}")
     st.write(f"MAE: {mae}")
-    # Prepare data for plotting
     train_plot_df = train_df[['Reported Date', 'Modal Price (Rs./Quintal)']].copy()
     train_plot_df['Type'] = 'Train'
@@ -666,15 +635,12 @@ def train_and_evaluate_3m(df):
     st.plotly_chart(fig, use_container_width=True)
-    # Return best parameters
     return best_params
 def forecast_next_14_days(df, _best_params, key):
     last_date = df['Reported Date'].max()
     future_dates = pd.date_range(start=last_date + pd.Timedelta(days=1), periods=14)
     future_df = pd.DataFrame({'Reported Date': future_dates})
-    # Assuming 'create_forecasting_features' function is defined elsewhere
     full_df = pd.concat([df, future_df], ignore_index=True)
     full_df = create_forecasting_features(full_df)
@@ -690,8 +656,6 @@ def forecast_next_14_days(df, _best_params, key):
     future_predictions = model.predict(X_future)
     future_df['Modal Price (Rs./Quintal)'] = future_predictions
-    # Pass model to plot_data
     plot_data(original_df, future_df, last_date, 14)
     download_button(future_df, key)
@@ -699,8 +663,6 @@ def forecast_next_30_days(df, _best_params, key):
     last_date = df['Reported Date'].max()
     future_dates = pd.date_range(start=last_date + pd.Timedelta(days=1), periods=30)
     future_df = pd.DataFrame({'Reported Date': future_dates})
-    # Assuming 'create_forecasting_features' function is defined elsewhere
     full_df = pd.concat([df, future_df], ignore_index=True)
     full_df = create_forecasting_features_1m(full_df)
@@ -716,8 +678,6 @@ def forecast_next_30_days(df, _best_params, key):
     future_predictions = model.predict(X_future)
     future_df['Modal Price (Rs./Quintal)'] = future_predictions
-    # Pass model to plot_data
     plot_data(original_df, future_df, last_date, 30)
     download_button(future_df, key)
@@ -725,8 +685,6 @@ def forecast_next_90_days(df, _best_params, key):
     last_date = df['Reported Date'].max()
     future_dates = pd.date_range(start=last_date + pd.Timedelta(days=1), periods=90)
     future_df = pd.DataFrame({'Reported Date': future_dates})
-    # Assuming 'create_forecasting_features' function is defined elsewhere
     full_df = pd.concat([df, future_df], ignore_index=True)
     full_df = create_forecasting_features_3m(full_df)
@@ -742,29 +700,19 @@ def forecast_next_90_days(df, _best_params, key):
     future_predictions = model.predict(X_future)
     future_df['Modal Price (Rs./Quintal)'] = future_predictions
-    # Pass model to plot_data
     plot_data(original_df, future_df, last_date, 90)
     download_button(future_df, key)
 def plot_data(original_df, future_df, last_date, days):
-    # Filter original_df for the period you want to plot.
     actual_df = original_df[original_df['Reported Date'] >= (last_date - pd.Timedelta(days=days))].copy()
     actual_df['Type'] = 'Actual'
-    # Prepare the future_df (predicted data) and mark it as forecasted.
     future_plot_df = future_df[['Reported Date', 'Modal Price (Rs./Quintal)']].copy()
     future_plot_df['Type'] = 'Forecasted'
-    # Get the last actual data point from actual_df.
-    # Ensure the DataFrame is sorted by date.
     last_actual_point = actual_df.sort_values('Reported Date').iloc[[-1]].copy()
     future_plot_df = pd.concat([last_actual_point, future_plot_df])
-    # Combine both actual and forecasted data for plotting.
     plot_df = pd.concat([actual_df, future_plot_df])
-    # Create the plot.
     fig = go.Figure()
     for plot_type, color, dash in [('Actual', 'blue', 'solid'), ('Forecasted', 'red', 'dash')]:
         data = plot_df[plot_df['Type'] == plot_type]
@@ -787,18 +735,11 @@ def plot_data(original_df, future_df, last_date, days):
 def download_button(future_df, key):
-    # Create a new DataFrame with only 'Reported Date' and 'Modal Price (Rs./Quintal)'
     download_df = future_df[['Reported Date', 'Modal Price (Rs./Quintal)']].copy()
-    # Format 'Reported Date' to display only the date in YYYY-MM-DD format
     download_df['Reported Date'] = download_df['Reported Date'].dt.strftime('%Y-%m-%d')
-    # Write to Excel without the index
     towrite = io.BytesIO()
-    download_df.to_excel(towrite, index=False, engine='xlsxwriter')  # Using 'xlsxwriter' for the Excel engine
     towrite.seek(0)
-    # Create a download button for the Excel file
     st.download_button(label="Download Forecasted Values",
                        data=towrite,
                        file_name=f'forecasted_prices_{key}.xlsx',
@@ -831,15 +772,13 @@ def save_best_params(collection, filter_key, best_params):
         collection.replace_one({"filter_key": filter_key}, best_params)
     else:
         collection.insert_one(best_params)
-# Function to retrieve best_params from MongoDB
 def get_best_params(filter_key, collection):
     record = collection.find_one({"filter_key": filter_key})
     return record
-# Function to handle training and forecasting
 def train_and_forecast(df, filter_key, days):
     if df is not None:
-        # Train the model and save parameters to MongoDB
         if days==14:
             best_params = train_and_evaluate(df)
             save_best_params(filter_key, best_params, best_params_collection)
@@ -852,6 +791,8 @@ def train_and_forecast(df, filter_key, days):
             best_params = train_and_evaluate_3m(df)
             save_best_params(filter_key, best_params, best_params_collection_3m)
             forecast_next_90_days(df, best_params, filter_key)
 def forecast(df, filter_key, days):
     if days==14:
@@ -887,13 +828,9 @@ def collection_to_dataframe(collection, drop_id=True):
     Returns:
         pd.DataFrame: DataFrame containing the collection data.
     """
-    # Fetch all documents from the collection
     documents = list(collection.find())
-    # Convert to a pandas DataFrame
     df = pd.DataFrame(documents)
-    # Drop the MongoDB "_id" column if specified
     if drop_id and '_id' in df.columns:
         df = df.drop(columns=['_id'])
@@ -903,19 +840,12 @@ def collection_to_dataframe(collection, drop_id=True):
 def editable_spreadsheet():
     st.title("Sowing Report Prediction Model")
-    # Excel file uploader
     uploaded_file = st.file_uploader("Upload your Excel file", type=['xlsx'])
-    # Check if an Excel file is uploaded
     if uploaded_file is not None:
-        # Read the Excel file
         df_excel = pd.read_excel(uploaded_file)
-        # Display the DataFrame from the Excel file
         st.write("Excel data loaded:", df_excel)
-        # Form for inputting filtering options and area for calculation
         with st.form("input_form"):
             input_region = st.text_input("Enter Region to Filter By", placeholder="Region Name")
             input_season = st.text_input("Enter Season to Filter By", placeholder="Season (e.g., Winter)")
@@ -924,7 +854,6 @@ def editable_spreadsheet():
         if submit_button:
             if input_region and input_season and input_area > 0:
-                # Filter data by the region and season specified
                 filtered_df = df_excel[
                     (df_excel['Region'].str.lower() == input_region.lower()) &
                     (df_excel['Season'].str.lower() == input_season.lower())
@@ -975,8 +904,6 @@ def display_statistics(df):
             }
         </style>
     """, unsafe_allow_html=True)
-    # Ensure 'Reported Date' is in datetime format
     df['Reported Date'] = pd.to_datetime(df['Reported Date'])
     national_data = df.groupby('Reported Date').agg({
         'Modal Price (Rs./Quintal)': 'mean',
@@ -986,12 +913,13 @@ def display_statistics(df):
     st.subheader("🗓️ Key Statistics")
     latest_date = national_data['Reported Date'].max()
     latest_price = national_data[national_data['Reported Date'] == latest_date]['Modal Price (Rs./Quintal)'].mean()
     latest_arrivals = national_data[national_data['Reported Date'] == latest_date]['Arrivals (Tonnes)'].sum()
     st.markdown("<p class='highlight'>This section provides the most recent statistics for the market. It includes the latest available date, the average price of commodities, and the total quantity of goods arriving at the market. These metrics offer an up-to-date snapshot of market conditions.</p>", unsafe_allow_html=True)
     st.write(f"**Latest Date**: {latest_date.strftime('%Y-%m-%d')}")
     st.write(f"**Latest Modal Price**: {latest_price:.2f} Rs./Quintal")
-    st.write(f"**Latest Arrivals**: {latest_arrivals:.2f} Tonnes")
     st.subheader("📆 This Day in Previous Years")
     st.markdown("<p class='highlight'>This table shows the modal price and total arrivals for this exact day across previous years. It provides a historical perspective to compare against current market conditions. This section examines historical data for the same day in previous years. By analyzing trends for this specific day, you can identify seasonal patterns, supply-demand changes, or any deviations that might warrant closer attention.</p>", unsafe_allow_html=True)
@@ -1060,166 +988,259 @@ def display_statistics(df):
     editable_spreadsheet()
 def fetch_and_store_data():
     latest_doc = collection.find_one(sort=[("Reported Date", -1)])
-    if latest_doc and "Reported Date" in latest_doc:
-        latest_date = latest_doc["Reported Date"]
-    else:
-        latest_date = None
-    if latest_date:
-        from_date = (latest_date + timedelta(days=1)).strftime('%d %b %Y')
-    else:
-        # If no latest date, set a default from_date
-        from_date = "01 Jan 2000"
-    to_date = (datetime.now() - timedelta(days=1)).strftime('%d %b %Y')
-    from_date_obj = datetime.strptime(from_date, '%d %b %Y')
-    to_date_obj = datetime.strptime(to_date, '%d %b %Y')
-    if to_date_obj < from_date_obj:
-        print("Data already scraped")
-        return None
-    # Build the URL to be requested
-    base_url = "https://agmarknet.gov.in/SearchCmmMkt.aspx"
-    params = {
-        "Tx_Commodity": "11",
-        "Tx_State": "0",
-        "Tx_District": "0",
-        "Tx_Market": "0",
-        "DateFrom": from_date,
-        "DateTo": to_date,
-        "Fr_Date": from_date,
-        "To_Date": to_date,
-        "Tx_Trend": "2",
-        "Tx_CommodityHead": "Sesamum(Sesame,Gingelly,Til)",
-        "Tx_StateHead": "--Select--",
-        "Tx_DistrictHead": "--Select--",
-        "Tx_MarketHead": "--Select--"
-    }
-    full_url = f"{base_url}?{'&'.join(f'{k}={v}' for k, v in params.items())}"
-    api_url = "https://api.scraperapi.com"
-    api_key = "bbbbde6b56c0fde1e2a61c914eb22d14"
-    scraperapi_params = {
-        'api_key': api_key,
-        'url': full_url
-    }
-    response = requests.get(api_url, params=scraperapi_params)
-    if response.status_code == 200:
-        soup = BeautifulSoup(response.content, 'html.parser')
-        table = soup.find("table", {"class": "tableagmark_new"})
-        if table:
-            headers = [th.get_text(strip=True) for th in table.find_all("th")]
-            rows = [[td.get_text(strip=True) for td in row.find_all("td")] for row in table.find_all("tr")[1:]]
-            df = pd.DataFrame(rows, columns=headers)
-            df = df[df['Variety']=="White"]
-            df["Reported Date"] = pd.to_datetime(df["Reported Date"], format='%d %b %Y', errors='coerce')
-            df.dropna(subset=["Reported Date"], inplace=True)
-            df.sort_values(by="Reported Date", inplace=True)
-            df.rename(columns={"State Name": "state"}, inplace=True)
-            # Type casting for the columns
-            df["Modal Price (Rs./Quintal)"] = pd.to_numeric(df["Modal Price (Rs./Quintal)"], errors='coerce').astype("int64")
-            df["Arrivals (Tonnes)"] = pd.to_numeric(df["Arrivals (Tonnes)"], errors='coerce').astype("float64")
-            df["state"] = df["state"].astype("string")
-            df["Market Name"] = df["Market Name"].astype("string")
-            records = df.to_dict(orient="records")
             if records:
                 collection.insert_many(records)
-                print(f"Inserted {len(records)} new records into MongoDB.")
             else:
-                print("No new records to insert.")
-            return df
-    else:
-        print(f"Failed to fetch data with status code: {response.status_code}")
-        return None
 def fetch_and_store_data_market():
     latest_doc = market_price_data.find_one(sort=[("Reported Date", -1)])
-    if latest_doc and "Reported Date" in latest_doc:
-        latest_date = latest_doc["Reported Date"]
-    else:
-        latest_date = None
-    if latest_date:
-        from_date = (latest_date + timedelta(days=1)).strftime('%d %b %Y')
-    else:
-        # If no latest date, set a default from_date
-        from_date = "01 Jan 2000"
-    to_date = (datetime.now() - timedelta(days=1)).strftime('%d %b %Y')
-    from_date_obj = datetime.strptime(from_date, '%d %b %Y')
-    to_date_obj = datetime.strptime(to_date, '%d %b %Y')
-    if to_date_obj <= from_date_obj:
-        st.write("Data already scraped")
-        return None
-    base_url = "https://agmarknet.gov.in/SearchCmmMkt.aspx"
-    params = {
-        "Tx_Commodity": "11",
-        "Tx_State": "0",
-        "Tx_District": "0",
-        "Tx_Market": "0",
-        "DateFrom": from_date,
-        "DateTo": to_date,
-        "Fr_Date": from_date,
-        "To_Date": to_date,
-        "Tx_Trend": "0",
-        "Tx_CommodityHead": "Sesamum(Sesame,Gingelly,Til)",
-        "Tx_StateHead": "--Select--",
-        "Tx_DistrictHead": "--Select--",
-        "Tx_MarketHead": "--Select--"
         }
-    full_url = f"{base_url}?{'&'.join(f'{k}={v}' for k, v in params.items())}"
-    api_url = "https://api.scraperapi.com"
-    api_key = "8842750a88db7513a1d19325745437cc"
-    scraperapi_params = {
-        'api_key': api_key,
-        'url': full_url
-    }
-    response = requests.get(api_url, params=scraperapi_params)
-    if response.status_code == 200:
-        soup = BeautifulSoup(response.content, 'html.parser')
-        table = soup.find("table", {"class": "tableagmark_new"})
-        if table:
-            headers = [th.get_text(strip=True) for th in table.find_all("th")]
-            rows = []
-            for row in table.find_all("tr")[1:]:
-                cells = [td.get_text(strip=True) for td in row.find_all("td")]
-                if cells:
-                    rows.append(cells)
-            df = pd.DataFrame(rows, columns=headers)
-            df = df[df['Variety']=="White"]
-            df["Price Date"] = pd.to_datetime(df["Price Date"], format='%d %b %Y', errors='coerce')
-            df.dropna(subset=["Price Date"], inplace=True)
-            df.sort_values(by="Price Date", inplace=True)
-            df = df[df["Grade"]=="FAQ"]
-            df["Modal Price (Rs./Quintal)"] = pd.to_numeric(df["Modal Price (Rs./Quintal)"], errors='coerce').astype("int64")
-            df["Market Name"] = df["Market Name"].astype("string")
-            df.rename(columns={"Price Date": "Reported Date"}, inplace=True)
-            records = df.to_dict(orient="records")
             if records:
                 market_price_data.insert_many(records)
-                print(f"Inserted {len(records)} new records into MongoDB.")
             else:
-                print("No new records to insert.")
-            return df
-        else:
-            st.write("No table found")
-    else:
-        st.write(f"Failed to fetch data with status code: {response.status_code}")
-        return None
@@ -1312,208 +1333,165 @@ st.markdown("""
 if 'authenticated' not in st.session_state:
     st.session_state.authenticated = False
-if st.session_state.authenticated:
     st.title("🌾 AgriPredict Dashboard")
     if st.button("Get Live Data Feed"):
         fetch_and_store_data()
         fetch_and_store_data_market()
     view_mode = st.radio("", ["Statistics", "Plots", "Predictions", "Exim"], horizontal=True)
     if view_mode == "Plots":
         st.sidebar.header("Filters")
         selected_period = st.sidebar.selectbox(
             "Select Time Period",
-            ["2 Weeks", "1 Month", "2 Months", "3 Months", "6 Months", "1 Year", "2 Years", "5 Years"],
             index=1
         )
         period_mapping = {
             "2 Weeks": 14,
             "1 Month": 30,
-            "2 Months": 60,
             "3 Months": 90,
-            "6 Months": 180,
             "1 Year": 365,
             "2 Years": 730,
             "5 Years": 1825
         }
-        st.session_state.selected_period = period_mapping[selected_period]
-        # Add 'India' option to the list of states
         state_options = list(state_market_dict.keys()) + ['India']
-        selected_state = st.sidebar.selectbox("Select", state_options)
         market_wise = False
         if selected_state != 'India':
             market_wise = st.sidebar.checkbox("Market Wise Analysis")
             if market_wise:
                 markets = state_market_dict.get(selected_state, [])
                 selected_market = st.sidebar.selectbox("Select Market", markets)
                 query_filter = {"Market Name": selected_market}
             else:
-                query_filter = {"state": selected_state}
         else:
-            query_filter = {}  # For India, no specific state filter
-        # Dropdown for data type
-        data_type = st.sidebar.radio(
-            "Select Data Type",
-            ["Price", "Volume", "Both"]
-        )
-        # Add date filtering based on selected period
         query_filter["Reported Date"] = {
-            "$gte": datetime.now() - timedelta(days=st.session_state.selected_period)
         }
-        # Submit button to trigger the query and plot
         if st.sidebar.button("✨ Let's go!"):
             try:
-                df_market_grouped = []
                 if "Market Name" in query_filter:
                     market_cursor = market_price_data.find(query_filter)
                     market_data = list(market_cursor)
-                    df_market = pd.DataFrame(market_data)
-                    df_market_grouped = df_market.groupby('Reported Date', as_index=False).agg({
-                        'Modal Price (Rs./Quintal)': 'mean'
-                    })
-                    date_range = pd.date_range(
-                        start=df_market_grouped['Reported Date'].min(),
-                        end=df_market_grouped['Reported Date'].max()
-                    )
-                    df_market_grouped = df_market_grouped.set_index('Reported Date').reindex(date_range).rename_axis('Reported Date').reset_index()
-                    df_market_grouped['Modal Price (Rs./Quintal)'] = df_market_grouped['Modal Price (Rs./Quintal)'].fillna(method='ffill').fillna(method='bfill')
                 cursor = collection.find(query_filter)
                 data = list(cursor)
                 if data:
-                    # Convert MongoDB data to a DataFrame
                     df = pd.DataFrame(data)
-                    df['Reported Date'] = pd.to_datetime(df['Reported Date'])
-                    if selected_state == 'India':
-                        # Aggregate data for all of India
-                        df_grouped = df.groupby('Reported Date', as_index=False).agg({
-                            'Arrivals (Tonnes)': 'sum',
-                            'Modal Price (Rs./Quintal)': 'mean'
-                        })
-                    else:
-                        # Regular grouping by Reported Date
-                        df_grouped = df.groupby('Reported Date', as_index=False).agg({
-                            'Arrivals (Tonnes)': 'sum',
-                            'Modal Price (Rs./Quintal)': 'mean'
-                        })
-                    # Create a complete date range
-                    date_range = pd.date_range(
-                        start=df_grouped['Reported Date'].min(),
-                        end=df_grouped['Reported Date'].max()
-                    )
                     df_grouped = df_grouped.set_index('Reported Date').reindex(date_range).rename_axis('Reported Date').reset_index()
-                    # Fill missing values
                     df_grouped['Arrivals (Tonnes)'] = df_grouped['Arrivals (Tonnes)'].fillna(method='ffill').fillna(method='bfill')
                     df_grouped['Modal Price (Rs./Quintal)'] = df_grouped['Modal Price (Rs./Quintal)'].fillna(method='ffill').fillna(method='bfill')
-                    st.subheader(f"📈 Trends for {selected_state} ({'Market: ' + selected_market if market_wise else 'State'})")
                     if data_type == "Both":
-                        # Min-Max Scaling
                         scaler = MinMaxScaler()
                         df_grouped[['Scaled Price', 'Scaled Arrivals']] = scaler.fit_transform(
                             df_grouped[['Modal Price (Rs./Quintal)', 'Arrivals (Tonnes)']]
                         )
-                        if "Market Name" in query_filter:
-                            df_market_grouped['Scaled Price'] = scaler.fit_transform(
-                                df_market_grouped[["Modal Price (Rs./Quintal)"]]
-                            )
-                            fig = go.Figure()
-                            fig.add_trace(go.Scatter(
-                                x=df_market_grouped['Reported Date'],
-                                y=df_market_grouped['Scaled Price'],
-                                mode='lines',
-                                name='Scaled Price',
-                                line=dict(width=1, color='green'),
-                                text=df_market_grouped['Modal Price (Rs./Quintal)'],
-                                hovertemplate='Date: %{x}<br>Scaled Price: %{y:.2f}<br>Actual Price: %{text:.2f}<extra></extra>'
-                            ))
-                        else:
-                            fig = go.Figure()
-                            fig.add_trace(go.Scatter(
-                                x=df_grouped['Reported Date'],
-                                y=df_grouped['Scaled Price'],
-                                mode='lines',
-                                name='Scaled Price',
-                                line=dict(width=1, color='green'),
-                                text=df_grouped['Modal Price (Rs./Quintal)'],
-                                hovertemplate='Date: %{x}<br>Scaled Price: %{y:.2f}<br>Actual Price: %{text:.2f}<extra></extra>'
-                            ))
                         fig.add_trace(go.Scatter(
                             x=df_grouped['Reported Date'],
                             y=df_grouped['Scaled Arrivals'],
                             mode='lines',
                             name='Scaled Arrivals',
-                            line=dict(width=1, color='blue'),
-                            text=df_grouped['Arrivals (Tonnes)'],
-                            hovertemplate='Date: %{x}<br>Scaled Arrivals: %{y:.2f}<br>Actual Arrivals: %{text:.2f}<extra></extra>'
                         ))
-                        fig.update_layout(
-                            title="Price and Arrivals Trend",
-                            xaxis_title='Date',
-                            yaxis_title='Scaled Values',
-                            template='plotly_white'
-                        )
-                        st.plotly_chart(fig, use_container_width=True)
                     elif data_type == "Price":
-                        # Plot Modal Price
-                        if "Market Name" in query_filter:
-                            fig = go.Figure()
-                            fig.add_trace(go.Scatter(
-                                x=df_market_grouped['Reported Date'],
-                                y=df_market_grouped['Modal Price (Rs./Quintal)'],
-                                mode='lines',
-                                name='Modal Price',
-                                line=dict(width=1, color='green')
-                            ))
-                            fig.update_layout(title="Modal Price Trend", xaxis_title='Date', yaxis_title='Price (/Quintall)', template='plotly_white')
-                            st.plotly_chart(fig, use_container_width=True)
-                        else:
-                            fig = go.Figure()
-                            fig.add_trace(go.Scatter(
-                                x=df_grouped['Reported Date'],
-                                y=df_grouped['Modal Price (Rs./Quintal)'],
-                                mode='lines',
-                                name='Modal Price',
-                                line=dict(width=1, color='green')
-                            ))
-                            fig.update_layout(title="Modal Price Trend", xaxis_title='Date', yaxis_title='Price (/Quintall)', template='plotly_white')
-                            st.plotly_chart(fig, use_container_width=True)
                     elif data_type == "Volume":
-                        # Plot Arrivals (Tonnes)
-                        fig = go.Figure()
                         fig.add_trace(go.Scatter(
                             x=df_grouped['Reported Date'],
                             y=df_grouped['Arrivals (Tonnes)'],
                             mode='lines',
                             name='Arrivals',
-                            line=dict(width=1, color='blue')
                         ))
-                        fig.update_layout(title="Arrivals Trend", xaxis_title='Date', yaxis_title='Volume (in Tonnes)', template='plotly_white')
-                        st.plotly_chart(fig, use_container_width=True)
                 else:
-                    st.warning("⚠️ No data found for the selected filters.")
             except Exception as e:
                 st.error(f"❌ Error fetching data 2: {e}")
     elif view_mode == "Predictions":
         st.subheader("📊 Model Analysis")
         sub_option = st.radio("Select one of the following", ["India", "States", "Market"], horizontal=True)
@@ -1521,10 +1499,10 @@ if st.session_state.authenticated:
         if sub_option == "States":
             states = ["Karnataka", "Madhya Pradesh", "Gujarat", "Uttar Pradesh", "Telangana"]
             selected_state = st.selectbox("Select State for Model Training", states)
-            filter_key = f"state_{selected_state}"  # Unique key for each state
             if st.button("Forecast"):
-                query_filter = {"state": selected_state}
                 df = fetch_and_process_data(query_filter, collection)
                 if sub_timeline == "14 days":
                     forecast(df, filter_key, 14)
@@ -1535,11 +1513,12 @@ if st.session_state.authenticated:
         elif sub_option == "Market":
             market_options = ["Rajkot", "Neemuch", "Kalburgi", "Warangal"]
             selected_market = st.selectbox("Select Market for Model Training", market_options)
-            filter_key = f"market_{selected_market}"  # Unique key for each market
             if st.button("Forecast"):
                 query_filter = {"Market Name": selected_market}
                 comparison_date = pd.to_datetime("18 Feb 2025")
                 df = fetch_and_process_data(query_filter, market_price_data)
                 if sub_timeline == "14 days":
                     forecast(df, filter_key, 14)
                 elif sub_timeline == "1 month":
@@ -1566,24 +1545,19 @@ if st.session_state.authenticated:
         display_statistics(df)
     elif view_mode == "Exim":
         df = collection_to_dataframe(impExp)
-        # Add radio buttons for user selection
         plot_option = st.radio(
             "Select the data to visualize:",
             ["Import Price", "Import Quantity", "Export Price", "Export Quantity"],
             horizontal=True
         )
-        # Dropdown for time period selection
         time_period = st.selectbox(
             "Select time period:",
             ["1 Month", "6 Months", "1 Year", "2 Years"]
         )
-        # Convert Reported Date to datetime
         df["Reported Date"] = pd.to_datetime(df["Reported Date"], format="%Y-%m-%d")
-        # Filter data based on the time period
         if time_period == "1 Month":
             start_date = pd.Timestamp.now() - pd.DateOffset(months=1)
         elif time_period == "6 Months":
@@ -1594,8 +1568,6 @@ if st.session_state.authenticated:
             start_date = pd.Timestamp.now() - pd.DateOffset(years=2)
         filtered_df = df[df["Reported Date"] >= start_date]
-        # Process data based on the selected option
         if plot_option == "Import Price":
             grouped_df = (
                 filtered_df.groupby("Reported Date", as_index=False)["VALUE_IMPORT"]
@@ -1624,12 +1596,11 @@ if st.session_state.authenticated:
                 .rename(columns={"QUANTITY_IMPORT": "Total Export Quantity"})
             )
             y_axis_label = "Total Export Quantity (Tonnes)"
-        # Plot using Plotly
         fig = px.line(
             grouped_df,
             x="Reported Date",
-            y=grouped_df.columns[1],  # Dynamic y-axis column name
             title=f"{plot_option} Over Time",
             labels={"Reported Date": "Date", grouped_df.columns[1]: y_axis_label},
         )
@@ -1646,9 +1617,9 @@ else:
         if login_button:
             if authenticate_user(username, password):
-                st.session_state.authenticated = True  # Set the authentication state to True
-                st.session_state['username'] = username  # Store username in session state
                 st.write("Login successful!")
-                st.rerun()  # Page will automatically rerun to show the protected content
             else:
                 st.error("Invalid username or password")

 from tqdm import tqdm
 import io
 from statsmodels.tsa.statespace.sarimax import SARIMAX
+from datetime import datetime, timedelta
+def generate_date_ranges(start_date: str, end_date: str):
+    current = datetime.strptime(start_date, "%d %b %Y")
+    end = datetime.strptime(end_date, "%d %b %Y")
+    while current <= end:
+        date_str = current.strftime("%d %b %Y")
+        yield (date_str, date_str)
+        current += timedelta(days=1)
 mongo_uri = "mongodb+srv://Agripredict:TjXSvMhOis49qH8E@cluster0.gek7n.mongodb.net/?retryWrites=true&w=majority&appName=Cluster0"
 if not mongo_uri:
     st.error("MongoDB URI is not set!")
     st.stop()
 else:
     client = MongoClient(mongo_uri, tlsCAFile=certifi.where())
     db = client["AgriPredict"]
     collection = db["WhiteSesame"]
 def preprocess_data(df):
     df = df[['Reported Date', 'Modal Price (Rs./Quintal)']]
     df['Reported Date'] = pd.to_datetime(df['Reported Date'])
     df = df.groupby('Reported Date', as_index=False).mean()
     full_date_range = pd.date_range(df['Reported Date'].min(), df['Reported Date'].max())
     df = df.set_index('Reported Date').reindex(full_date_range).rename_axis('Reported Date').reset_index()
     df['Modal Price (Rs./Quintal)'] = (
         df['Modal Price (Rs./Quintal)'].fillna(method='ffill').fillna(method='bfill')
     )
     return df
 def train_and_evaluate(df):
     import streamlit as st
     progress_bar = st.progress(0)
     def update_tuning_progress(current, total):
         progress = int((current / total) * 100)
         progress_bar.progress(progress)
     df = create_forecasting_features(df)
     train_df = df[df['Reported Date'] < '2024-01-01']
     test_df = df[df['Reported Date'] >= '2024-01-01']
     y_train = train_df['Modal Price (Rs./Quintal)']
     X_test = test_df.drop(columns=['Modal Price (Rs./Quintal)', 'Reported Date'])
     y_test = test_df['Modal Price (Rs./Quintal)']
     st.write("Performing hyperparameter tuning...")
     param_grid = {
         'learning_rate': [0.01, 0.1, 0.2],
     param_combinations = len(param_grid['learning_rate']) * len(param_grid['max_depth']) * \
                          len(param_grid['n_estimators']) * len(param_grid['booster'])
+    current_combination = 0
     def custom_grid_search():
         nonlocal current_combination
                                 'n_estimators': n_estimators,
                                 'booster': booster
                             }
                         current_combination += 1
                         update_tuning_progress(current_combination, param_combinations)
         return best_params
     best_params = custom_grid_search()
     st.write("Training the best model and making predictions...")
     best_model = XGBRegressor(**best_params)
     best_model.fit(X_train, y_train)
     y_pred = best_model.predict(X_test)
     rmse = mean_squared_error(y_test, y_pred, squared=False)
     mae = mean_absolute_error(y_test, y_pred)
     st.write(f"RMSE: {rmse}")
     st.write(f"MAE: {mae}")
     train_plot_df = train_df[['Reported Date', 'Modal Price (Rs./Quintal)']].copy()
     train_plot_df['Type'] = 'Train'
     st.plotly_chart(fig, use_container_width=True)
     return best_params
 def train_and_evaluate_1m(df):
     from xgboost import XGBRegressor
     from sklearn.metrics import mean_squared_error, mean_absolute_error
     progress_bar = st.progress(0)
     def update_tuning_progress(current, total):
         progress = int((current / total) * 100)
         progress_bar.progress(progress)
     df = create_forecasting_features_1m(df)
     split_date = pd.to_datetime("2024-01-01")
+    test_horizon = pd.DateOffset(days=30)
     train_df = df[df['Reported Date'] < split_date]
     test_df = df[(df['Reported Date'] >= split_date) & (df['Reported Date'] < split_date + test_horizon)]
     X_test = test_df.drop(columns=['Modal Price (Rs./Quintal)', 'Reported Date'])
     y_test = test_df['Modal Price (Rs./Quintal)']
     st.write("Performing hyperparameter tuning...")
     param_grid = {
         'learning_rate': [0.01, 0.1, 0.2],
     param_combinations = len(param_grid['learning_rate']) * len(param_grid['max_depth']) * \
                          len(param_grid['n_estimators']) * len(param_grid['booster'])
+    current_combination = 0
     def custom_grid_search():
         nonlocal current_combination
                                 'n_estimators': n_estimators,
                                 'booster': booster
                             }
                         current_combination += 1
                         update_tuning_progress(current_combination, param_combinations)
         return best_params
     best_params = custom_grid_search()
     st.write("Training the best model and making predictions...")
     best_model = XGBRegressor(**best_params)
     best_model.fit(X_train, y_train)
     y_pred = best_model.predict(X_test)
     rmse = mean_squared_error(y_test, y_pred, squared=False)
     mae = mean_absolute_error(y_test, y_pred)
     st.write(f"RMSE: {rmse}")
     st.write(f"MAE: {mae}")
     train_plot_df = train_df[['Reported Date', 'Modal Price (Rs./Quintal)']].copy()
     train_plot_df['Type'] = 'Train'
     st.plotly_chart(fig, use_container_width=True)
     return best_params
 def train_and_evaluate_3m(df):
     import streamlit as st
     progress_bar = st.progress(0)
     def update_tuning_progress(current, total):
         progress = int((current / total) * 100)
         progress_bar.progress(progress)
     X_test = test_df.drop(columns=['Modal Price (Rs./Quintal)', 'Reported Date'])
     y_test = test_df['Modal Price (Rs./Quintal)']
     st.write("Performing hyperparameter tuning...")
     param_grid = {
         'learning_rate': [0.01, 0.1, 0.2],
     param_combinations = len(param_grid['learning_rate']) * len(param_grid['max_depth']) * \
                          len(param_grid['n_estimators']) * len(param_grid['booster'])
+    current_combination = 0
     def custom_grid_search():
         nonlocal current_combination
                                 'n_estimators': n_estimators,
                                 'booster': booster
                             }
                         current_combination += 1
                         update_tuning_progress(current_combination, param_combinations)
         return best_params
     best_params = custom_grid_search()
     st.write("Training the best model and making predictions...")
     best_model = XGBRegressor(**best_params)
     best_model.fit(X_train, y_train)
     y_pred = best_model.predict(X_test)
     rmse = mean_squared_error(y_test, y_pred, squared=False)
     mae = mean_absolute_error(y_test, y_pred)
     st.write(f"RMSE: {rmse}")
     st.write(f"MAE: {mae}")
     train_plot_df = train_df[['Reported Date', 'Modal Price (Rs./Quintal)']].copy()
     train_plot_df['Type'] = 'Train'
     st.plotly_chart(fig, use_container_width=True)
     return best_params
 def forecast_next_14_days(df, _best_params, key):
     last_date = df['Reported Date'].max()
     future_dates = pd.date_range(start=last_date + pd.Timedelta(days=1), periods=14)
     future_df = pd.DataFrame({'Reported Date': future_dates})
     full_df = pd.concat([df, future_df], ignore_index=True)
     full_df = create_forecasting_features(full_df)
     future_predictions = model.predict(X_future)
     future_df['Modal Price (Rs./Quintal)'] = future_predictions
     plot_data(original_df, future_df, last_date, 14)
     download_button(future_df, key)
     last_date = df['Reported Date'].max()
     future_dates = pd.date_range(start=last_date + pd.Timedelta(days=1), periods=30)
     future_df = pd.DataFrame({'Reported Date': future_dates})
     full_df = pd.concat([df, future_df], ignore_index=True)
     full_df = create_forecasting_features_1m(full_df)
     future_predictions = model.predict(X_future)
     future_df['Modal Price (Rs./Quintal)'] = future_predictions
     plot_data(original_df, future_df, last_date, 30)
     download_button(future_df, key)
     last_date = df['Reported Date'].max()
     future_dates = pd.date_range(start=last_date + pd.Timedelta(days=1), periods=90)
     future_df = pd.DataFrame({'Reported Date': future_dates})
     full_df = pd.concat([df, future_df], ignore_index=True)
     full_df = create_forecasting_features_3m(full_df)
     future_predictions = model.predict(X_future)
     future_df['Modal Price (Rs./Quintal)'] = future_predictions
     plot_data(original_df, future_df, last_date, 90)
     download_button(future_df, key)
 def plot_data(original_df, future_df, last_date, days):
     actual_df = original_df[original_df['Reported Date'] >= (last_date - pd.Timedelta(days=days))].copy()
     actual_df['Type'] = 'Actual'
     future_plot_df = future_df[['Reported Date', 'Modal Price (Rs./Quintal)']].copy()
     future_plot_df['Type'] = 'Forecasted'
     last_actual_point = actual_df.sort_values('Reported Date').iloc[[-1]].copy()
     future_plot_df = pd.concat([last_actual_point, future_plot_df])
     plot_df = pd.concat([actual_df, future_plot_df])
     fig = go.Figure()
     for plot_type, color, dash in [('Actual', 'blue', 'solid'), ('Forecasted', 'red', 'dash')]:
         data = plot_df[plot_df['Type'] == plot_type]
 def download_button(future_df, key):
     download_df = future_df[['Reported Date', 'Modal Price (Rs./Quintal)']].copy()
     download_df['Reported Date'] = download_df['Reported Date'].dt.strftime('%Y-%m-%d')
     towrite = io.BytesIO()
+    download_df.to_excel(towrite, index=False, engine='xlsxwriter')
     towrite.seek(0)
     st.download_button(label="Download Forecasted Values",
                        data=towrite,
                        file_name=f'forecasted_prices_{key}.xlsx',
         collection.replace_one({"filter_key": filter_key}, best_params)
     else:
         collection.insert_one(best_params)
 def get_best_params(filter_key, collection):
     record = collection.find_one({"filter_key": filter_key})
     return record
 def train_and_forecast(df, filter_key, days):
     if df is not None:
         if days==14:
             best_params = train_and_evaluate(df)
             save_best_params(filter_key, best_params, best_params_collection)
             best_params = train_and_evaluate_3m(df)
             save_best_params(filter_key, best_params, best_params_collection_3m)
             forecast_next_90_days(df, best_params, filter_key)
+failed_dates_data = []
+failed_dates_market = []
 def forecast(df, filter_key, days):
     if days==14:
     Returns:
         pd.DataFrame: DataFrame containing the collection data.
     """
     documents = list(collection.find())
     df = pd.DataFrame(documents)
     if drop_id and '_id' in df.columns:
         df = df.drop(columns=['_id'])
 def editable_spreadsheet():
     st.title("Sowing Report Prediction Model")
     uploaded_file = st.file_uploader("Upload your Excel file", type=['xlsx'])
     if uploaded_file is not None:
         df_excel = pd.read_excel(uploaded_file)
         st.write("Excel data loaded:", df_excel)
         with st.form("input_form"):
             input_region = st.text_input("Enter Region to Filter By", placeholder="Region Name")
             input_season = st.text_input("Enter Season to Filter By", placeholder="Season (e.g., Winter)")
         if submit_button:
             if input_region and input_season and input_area > 0:
                 filtered_df = df_excel[
                     (df_excel['Region'].str.lower() == input_region.lower()) &
                     (df_excel['Season'].str.lower() == input_season.lower())
             }
         </style>
     """, unsafe_allow_html=True)
     df['Reported Date'] = pd.to_datetime(df['Reported Date'])
     national_data = df.groupby('Reported Date').agg({
         'Modal Price (Rs./Quintal)': 'mean',
     st.subheader("🗓️ Key Statistics")
     latest_date = national_data['Reported Date'].max()
     latest_price = national_data[national_data['Reported Date'] == latest_date]['Modal Price (Rs./Quintal)'].mean()
+    national_data['Arrivals (Tonnes)'] = pd.to_numeric(national_data['Arrivals (Tonnes)'], errors='coerce')
     latest_arrivals = national_data[national_data['Reported Date'] == latest_date]['Arrivals (Tonnes)'].sum()
     st.markdown("<p class='highlight'>This section provides the most recent statistics for the market. It includes the latest available date, the average price of commodities, and the total quantity of goods arriving at the market. These metrics offer an up-to-date snapshot of market conditions.</p>", unsafe_allow_html=True)
     st.write(f"**Latest Date**: {latest_date.strftime('%Y-%m-%d')}")
     st.write(f"**Latest Modal Price**: {latest_price:.2f} Rs./Quintal")
+    st.write(f"**Latest Arrivals**: {float(latest_arrivals):.2f} Tonnes")
     st.subheader("📆 This Day in Previous Years")
     st.markdown("<p class='highlight'>This table shows the modal price and total arrivals for this exact day across previous years. It provides a historical perspective to compare against current market conditions. This section examines historical data for the same day in previous years. By analyzing trends for this specific day, you can identify seasonal patterns, supply-demand changes, or any deviations that might warrant closer attention.</p>", unsafe_allow_html=True)
     editable_spreadsheet()
+def parse_table_with_rowspan(table):
+    data = []
+    rowspan_map = {}
+    rows = table.find_all("tr")
+    for row_index, tr in enumerate(rows):
+        cells = tr.find_all(["td", "th"])
+        row_data = []
+        col_index = 0
+        cell_index = 0
+        while col_index < len(cells) or cell_index in rowspan_map:
+            if cell_index in rowspan_map:
+                cell_info = rowspan_map[cell_index]
+                row_data.append(cell_info["value"])
+                cell_info["rows_left"] -= 1
+                if cell_info["rows_left"] == 0:
+                    del rowspan_map[cell_index]
+                cell_index += 1
+            elif col_index < len(cells):
+                cell = cells[col_index]
+                value = cell.get_text(strip=True)
+                rowspan = int(cell.get("rowspan", 1))
+                row_data.append(value)
+                if rowspan > 1:
+                    rowspan_map[cell_index] = {"value": value, "rows_left": rowspan - 1}
+                col_index += 1
+                cell_index += 1
+        data.append(row_data)
+    return data
 def fetch_and_store_data():
+    SCRAPER_API_KEY = "8842750a88db7513a1d19325745437cc"
     latest_doc = collection.find_one(sort=[("Reported Date", -1)])
+    from_date = (latest_doc["Reported Date"] + timedelta(days=1)) if latest_doc else datetime(2019, 1, 1)
+    to_date = datetime.now() - timedelta(days=1)
+    print(f"📦 Modal Data → From: {from_date.strftime('%d-%b-%Y')} To: {to_date.strftime('%d-%b-%Y')}")
+    current = from_date.replace(day=1)
+    while current <= to_date:
+        start_of_range = max(current, from_date)
+        end_of_range = (current.replace(day=28) + timedelta(days=4)).replace(day=1) - timedelta(days=1)
+        if end_of_range > to_date:
+            end_of_range = to_date
+        date_from_str = start_of_range.strftime('%d-%b-%Y')
+        date_to_str = end_of_range.strftime('%d-%b-%Y')
+        print(f"\n📅 Fetching data from {date_from_str} to {date_to_str}")
+        target_url = (
+            "https://agmarknet.gov.in/SearchCmmMkt.aspx"
+            f"?Tx_Commodity=11&Tx_State=0&Tx_District=0&Tx_Market=0"
+            f"&DateFrom={date_from_str}&DateTo={date_to_str}"
+            f"&Fr_Date={date_from_str}&To_Date={date_to_str}"
+            "&Tx_Trend=2"
+            "&Tx_CommodityHead=Sesamum(Sesame,Gingelly,Til)"
+            "&Tx_StateHead=--Select--"
+            "&Tx_DistrictHead=--Select--"
+            "&Tx_MarketHead=--Select--"
+        )
+        payload = {
+            "api_key": SCRAPER_API_KEY,
+            "url": target_url
+        }
+        try:
+            response = requests.get("https://api.scraperapi.com/", params=payload)
+            soup = BeautifulSoup(response.text, "html.parser")
+            table = soup.find("table", {"class": "tableagmark_new"})
+            if not table or not table.find_all("tr"):
+                print("❌ No table found.")
+                current = (current + timedelta(days=32)).replace(day=1)
+                continue
+            all_rows = parse_table_with_rowspan(table)
+            headers = all_rows[0]
+            rows = all_rows[1:]
+            df_raw = pd.DataFrame(rows, columns=headers)
+            print(f"🔍 Raw rows fetched: {len(df_raw)}")
+            # Clean invalid state/district/market names
+            required_columns = ["State Name", "District Name", "Market Name"]
+            if all(col in df_raw.columns for col in required_columns):
+                df_raw = df_raw[
+                    (df_raw["State Name"].str.strip() != "-") &
+                    (df_raw["District Name"].str.strip() != "-") &
+                    (df_raw["Market Name"].str.strip() != "-")
+                ]
+                print(f"✅ Rows after filtering: {len(df_raw)}")
+            else:
+                print("⚠️ One or more expected columns are missing. Skipping filter.")
+            # Filter by variety + grade
+            df_raw = df_raw[
+                (df_raw["Variety"].str.strip().str.lower() == "white") &
+                (df_raw["Grade"].str.strip().str.upper() == "FAQ")
+            ]
+            print(f"✅ Filtered rows with 'White' variety and 'FAQ' grade: {len(df_raw)}")
+            # Parse and clean dates
+            df_raw["Reported Date Parsed"] = pd.to_datetime(
+                df_raw["Reported Date"].str.strip(), format='%d %b %Y', errors='coerce'
+            )
+            df_raw = df_raw[df_raw["Reported Date Parsed"].notna()].copy()
+            df_raw["Reported Date"] = df_raw["Reported Date Parsed"]
+            df_raw.drop(columns=["Reported Date Parsed"], inplace=True)
+            # Type conversions
+            df_raw["Modal Price (Rs./Quintal)"] = pd.to_numeric(
+                df_raw["Modal Price (Rs./Quintal)"], errors='coerce'
+            ).round().astype("Int64")
+            df_raw["Arrivals (Tonnes)"] = pd.to_numeric(
+                df_raw["Arrivals (Tonnes)"], errors='coerce'
+            ).astype("float64")
+            df_raw["State Name"] = df_raw["State Name"].astype("string")
+            df_raw["Market Name"] = df_raw["Market Name"].astype("string")
+            # Write cleaned CSV
+            raw_csv_filename = f"clean_raw_modal_data_{start_of_range.strftime('%b_%Y')}.csv"
+            df_raw.to_csv(raw_csv_filename, index=False)
+            print(f"📄 Cleaned raw data CSV written to: {raw_csv_filename}")
+            # Insert to DB
+            records = df_raw.to_dict(orient="records")
             if records:
                 collection.insert_many(records)
+                print(f"✅ Inserted {len(records)} records for {current.strftime('%b %Y')}")
             else:
+                print("⚠️ No valid records after final filtering.")
+        except Exception as e:
+            print(f"🔥 Exception during {current.strftime('%b %Y')} fetch: {e}")
+        current = (current + timedelta(days=32)).replace(day=1)
 def fetch_and_store_data_market():
+    SCRAPER_API_KEY = "8842750a88db7513a1d19325745437cc"
     latest_doc = market_price_data.find_one(sort=[("Reported Date", -1)])
+    from_date = (latest_doc["Reported Date"] + timedelta(days=1)) if latest_doc else datetime(2019, 1, 1)
+    to_date = datetime.now() - timedelta(days=1)
+    print(f"📦 Market Data → From: {from_date.strftime('%d-%b-%Y')} To: {to_date.strftime('%d-%b-%Y')}")
+    current = from_date.replace(day=1)
+    while current <= to_date:
+        start_of_range = max(current, from_date)
+        end_of_range = (current.replace(day=28) + timedelta(days=4)).replace(day=1) - timedelta(days=1)
+        if end_of_range > to_date:
+            end_of_range = to_date
+        date_from_str = start_of_range.strftime('%d-%b-%Y')
+        date_to_str = end_of_range.strftime('%d-%b-%Y')
+        print(f"\n📅 Fetching data from {date_from_str} to {date_to_str}")
+        target_url = (
+            "https://agmarknet.gov.in/SearchCmmMkt.aspx"
+            f"?Tx_Commodity=11&Tx_State=0&Tx_District=0&Tx_Market=0"
+            f"&DateFrom={date_from_str}&DateTo={date_to_str}"
+            f"&Fr_Date={date_from_str}&To_Date={date_to_str}"
+            "&Tx_Trend=0"
+            "&Tx_CommodityHead=Sesamum(Sesame,Gingelly,Til)"
+            "&Tx_StateHead=--Select--"
+            "&Tx_DistrictHead=--Select--"
+            "&Tx_MarketHead=--Select--"
+        )
+        payload = {
+            "api_key": SCRAPER_API_KEY,
+            "url": target_url
         }
+        try:
+            response = requests.get("https://api.scraperapi.com/", params=payload)
+            soup = BeautifulSoup(response.text, "html.parser")
+            table = soup.find("table", {"class": "tableagmark_new"})
+            if not table or not table.find_all("tr"):
+                print("❌ No table found.")
+                current = (current + timedelta(days=32)).replace(day=1)
+                continue
+            all_rows = parse_table_with_rowspan(table)
+            headers = all_rows[0]
+            rows = all_rows[1:]
+            # ✅ Filter out irrelevant columns based on available data
+            required_columns = ["Sl no.", "District Name", "Market Name", "Commodity", "Variety", "Grade", "Min Price (Rs./Quintal)", "Max Price (Rs./Quintal)", "Modal Price (Rs./Quintal)", "Price Date"]
+            df_raw = pd.DataFrame(rows, columns=headers)
+            # Remove rows with invalid or missing location data
+            df_raw = df_raw[
+                (df_raw["District Name"].str.strip() != "-") &
+                (df_raw["Market Name"].str.strip() != "-")
+            ]
+            print(f"✅ Rows after filtering invalid locations: {len(df_raw)}")
+            # ✅ Filter for variety and grade
+            df_raw = df_raw[
+                (df_raw["Variety"].str.strip().str.lower() == "white") &
+                (df_raw["Grade"].str.strip().str.upper() == "FAQ")
+            ]
+            print(f"✅ Filtered rows with 'White' variety and 'FAQ' grade: {len(df_raw)}")
+            # ✅ Parse 'Price Date' as 'Reported Date'
+            df_raw["Reported Date Parsed"] = pd.to_datetime(
+                df_raw["Price Date"].str.strip(), format='%d %b %Y', errors='coerce'
+            )
+            df_raw = df_raw[df_raw["Reported Date Parsed"].notna()].copy()
+            df_raw["Reported Date"] = df_raw["Reported Date Parsed"]
+            df_raw.drop(columns=["Reported Date Parsed"], inplace=True)
+            # ✅ Type conversions
+            df_raw["Modal Price (Rs./Quintal)"] = pd.to_numeric(
+                df_raw["Modal Price (Rs./Quintal)"], errors='coerce'
+            ).round().astype("Int64")
+            df_raw["Min Price (Rs./Quintal)"] = pd.to_numeric(
+                df_raw["Min Price (Rs./Quintal)"], errors='coerce'
+            ).round().astype("Int64")
+            df_raw["Max Price (Rs./Quintal)"] = pd.to_numeric(
+                df_raw["Max Price (Rs./Quintal)"], errors='coerce'
+            ).round().astype("Int64")
+            df_raw["District Name"] = df_raw["District Name"].astype("string")
+            df_raw["Market Name"] = df_raw["Market Name"].astype("string")
+            # ✅ Save CSV for audit
+            raw_csv_filename = f"clean_raw_market_data_{start_of_range.strftime('%b_%Y')}.csv"
+            df_raw.to_csv(raw_csv_filename, index=False)
+            print(f"📄 CSV saved: {raw_csv_filename}")
+            # ✅ Insert into MongoDB
+            records = df_raw.to_dict(orient="records")
             if records:
                 market_price_data.insert_many(records)
+                print(f"✅ Inserted {len(records)} records for {current.strftime('%b %Y')}")
             else:
+                print("⚠️ No valid records after final filtering.")
+        except Exception as e:
+            print(f"🔥 Exception during {current.strftime('%b %Y')} fetch: {e}")
+        current = (current + timedelta(days=32)).replace(day=1)
 if 'authenticated' not in st.session_state:
     st.session_state.authenticated = False
+if st.session_state.get("authenticated", False):
     st.title("🌾 AgriPredict Dashboard")
     if st.button("Get Live Data Feed"):
+        st.write("🔄 Fetching fresh data from Modal + Agmarknet...")
         fetch_and_store_data()
         fetch_and_store_data_market()
     view_mode = st.radio("", ["Statistics", "Plots", "Predictions", "Exim"], horizontal=True)
     if view_mode == "Plots":
         st.sidebar.header("Filters")
         selected_period = st.sidebar.selectbox(
             "Select Time Period",
+            ["2 Weeks", "1 Month", "3 Months", "1 Year", "5 Years"],
             index=1
         )
         period_mapping = {
             "2 Weeks": 14,
             "1 Month": 30,
             "3 Months": 90,
             "1 Year": 365,
             "2 Years": 730,
             "5 Years": 1825
         }
+        st.session_state["selected_period"] = period_mapping[selected_period]
         state_options = list(state_market_dict.keys()) + ['India']
+        selected_state = st.sidebar.selectbox("Select State", state_options)
         market_wise = False
+        query_filter = {}
         if selected_state != 'India':
             market_wise = st.sidebar.checkbox("Market Wise Analysis")
             if market_wise:
                 markets = state_market_dict.get(selected_state, [])
+                st.write(f"✅ Available markets for {selected_state}: {markets}")
                 selected_market = st.sidebar.selectbox("Select Market", markets)
                 query_filter = {"Market Name": selected_market}
             else:
+                query_filter = {"State Name": selected_state}
         else:
+            query_filter = {"State Name": {"$exists": True}}
         query_filter["Reported Date"] = {
+            "$gte": datetime.now() - timedelta(days=st.session_state["selected_period"])
         }
+        data_type = st.sidebar.radio("Select Data Type", ["Price", "Volume", "Both"])
+        st.write(f"🧪 Final Mongo Query Filter: `{query_filter}`")
         if st.sidebar.button("✨ Let's go!"):
             try:
+                df_market_grouped = pd.DataFrame()
+                df_grouped = pd.DataFrame()
+                # MARKET-WISE
                 if "Market Name" in query_filter:
+                    st.info("📊 Market-level data mode enabled")
                     market_cursor = market_price_data.find(query_filter)
                     market_data = list(market_cursor)
+                    st.write(f"📄 Market rows fetched: {len(market_data)}")
+                    if market_data:
+                        df_market = pd.DataFrame(market_data)
+                        df_market['Reported Date'] = pd.to_datetime(df_market['Reported Date'], errors='coerce')
+                        df_market["Modal Price (Rs./Quintal)"] = pd.to_numeric(df_market["Modal Price (Rs./Quintal)"], errors='coerce')
+                        df_market_grouped = df_market.groupby('Reported Date', as_index=False).agg({
+                            'Modal Price (Rs./Quintal)': 'mean'
+                        }).dropna()
+                        date_range = pd.date_range(df_market_grouped['Reported Date'].min(), df_market_grouped['Reported Date'].max())
+                        df_market_grouped = df_market_grouped.set_index('Reported Date').reindex(date_range).rename_axis('Reported Date').reset_index()
+                        df_market_grouped['Modal Price (Rs./Quintal)'] = df_market_grouped['Modal Price (Rs./Quintal)'].fillna(method='ffill').fillna(method='bfill')
+                # STATE/NATIONAL-WISE
+                st.info("📥 Fetching state-level or national data...")
                 cursor = collection.find(query_filter)
                 data = list(cursor)
+                st.write(f"📄 Total rows fetched from collection: {len(data)}")
                 if data:
                     df = pd.DataFrame(data)
+                    df['Reported Date'] = pd.to_datetime(df['Reported Date'], errors='coerce')
+                    df['Arrivals (Tonnes)'] = pd.to_numeric(df['Arrivals (Tonnes)'], errors='coerce')
+                    df['Modal Price (Rs./Quintal)'] = pd.to_numeric(df['Modal Price (Rs./Quintal)'], errors='coerce')
+                    df_grouped = df.groupby('Reported Date', as_index=False).agg({
+                        'Arrivals (Tonnes)': 'sum',
+                        'Modal Price (Rs./Quintal)': 'mean'
+                    }).dropna()
+                    date_range = pd.date_range(df_grouped['Reported Date'].min(), df_grouped['Reported Date'].max())
                     df_grouped = df_grouped.set_index('Reported Date').reindex(date_range).rename_axis('Reported Date').reset_index()
                     df_grouped['Arrivals (Tonnes)'] = df_grouped['Arrivals (Tonnes)'].fillna(method='ffill').fillna(method='bfill')
                     df_grouped['Modal Price (Rs./Quintal)'] = df_grouped['Modal Price (Rs./Quintal)'].fillna(method='ffill').fillna(method='bfill')
+                    st.subheader(f"📈 Trends for {selected_state} ({'Market: ' + selected_market if market_wise else 'State-wide'})")
+                    fig = go.Figure()
                     if data_type == "Both":
                         scaler = MinMaxScaler()
                         df_grouped[['Scaled Price', 'Scaled Arrivals']] = scaler.fit_transform(
                             df_grouped[['Modal Price (Rs./Quintal)', 'Arrivals (Tonnes)']]
                         )
+                        fig.add_trace(go.Scatter(
+                            x=df_grouped['Reported Date'],
+                            y=df_grouped['Scaled Price'],
+                            mode='lines',
+                            name='Scaled Modal Price',
+                            line=dict(color='green'),
+                        ))
                         fig.add_trace(go.Scatter(
                             x=df_grouped['Reported Date'],
                             y=df_grouped['Scaled Arrivals'],
                             mode='lines',
                             name='Scaled Arrivals',
+                            line=dict(color='blue'),
                         ))
                     elif data_type == "Price":
+                        price_df = df_market_grouped if not df_market_grouped.empty else df_grouped
+                        fig.add_trace(go.Scatter(
+                            x=price_df['Reported Date'],
+                            y=price_df["Modal Price (Rs./Quintal)"],
+                            mode='lines',
+                            name='Modal Price',
+                            line=dict(color='green'),
+                        ))
                     elif data_type == "Volume":
                         fig.add_trace(go.Scatter(
                             x=df_grouped['Reported Date'],
                             y=df_grouped['Arrivals (Tonnes)'],
                             mode='lines',
                             name='Arrivals',
+                            line=dict(color='blue'),
                         ))
+                    fig.update_layout(
+                        title="📊 Agricultural Trends",
+                        xaxis_title="Date",
+                        yaxis_title="Value (Scaled if Both)",
+                        template="plotly_white"
+                    )
+                    st.plotly_chart(fig, use_container_width=True)
                 else:
+                    st.warning("⚠️ No data found for the selected filter range and region.")
             except Exception as e:
                 st.error(f"❌ Error fetching data 2: {e}")
+                st.exception(e)
     elif view_mode == "Predictions":
         st.subheader("📊 Model Analysis")
         sub_option = st.radio("Select one of the following", ["India", "States", "Market"], horizontal=True)
         if sub_option == "States":
             states = ["Karnataka", "Madhya Pradesh", "Gujarat", "Uttar Pradesh", "Telangana"]
             selected_state = st.selectbox("Select State for Model Training", states)
+            filter_key = f"state_{selected_state}"
             if st.button("Forecast"):
+                query_filter = {"State Name": selected_state}
                 df = fetch_and_process_data(query_filter, collection)
                 if sub_timeline == "14 days":
                     forecast(df, filter_key, 14)
         elif sub_option == "Market":
             market_options = ["Rajkot", "Neemuch", "Kalburgi", "Warangal"]
             selected_market = st.selectbox("Select Market for Model Training", market_options)
+            filter_key = f"market_{selected_market}"
             if st.button("Forecast"):
                 query_filter = {"Market Name": selected_market}
                 comparison_date = pd.to_datetime("18 Feb 2025")
                 df = fetch_and_process_data(query_filter, market_price_data)
+                st.write(df[df["Reported Date"]>comparison_date])
                 if sub_timeline == "14 days":
                     forecast(df, filter_key, 14)
                 elif sub_timeline == "1 month":
         display_statistics(df)
     elif view_mode == "Exim":
         df = collection_to_dataframe(impExp)
         plot_option = st.radio(
             "Select the data to visualize:",
             ["Import Price", "Import Quantity", "Export Price", "Export Quantity"],
             horizontal=True
         )
         time_period = st.selectbox(
             "Select time period:",
             ["1 Month", "6 Months", "1 Year", "2 Years"]
         )
         df["Reported Date"] = pd.to_datetime(df["Reported Date"], format="%Y-%m-%d")
         if time_period == "1 Month":
             start_date = pd.Timestamp.now() - pd.DateOffset(months=1)
         elif time_period == "6 Months":
             start_date = pd.Timestamp.now() - pd.DateOffset(years=2)
         filtered_df = df[df["Reported Date"] >= start_date]
         if plot_option == "Import Price":
             grouped_df = (
                 filtered_df.groupby("Reported Date", as_index=False)["VALUE_IMPORT"]
                 .rename(columns={"QUANTITY_IMPORT": "Total Export Quantity"})
             )
             y_axis_label = "Total Export Quantity (Tonnes)"
         fig = px.line(
             grouped_df,
             x="Reported Date",
+            y=grouped_df.columns[1],
             title=f"{plot_option} Over Time",
             labels={"Reported Date": "Date", grouped_df.columns[1]: y_axis_label},
         )
         if login_button:
             if authenticate_user(username, password):
+                st.session_state.authenticated = True
+                st.session_state['username'] = username
                 st.write("Login successful!")
+                st.rerun()
             else:
                 st.error("Invalid username or password")