Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -20,13 +20,21 @@ from itertools import product
|
|
| 20 |
from tqdm import tqdm
|
| 21 |
import io
|
| 22 |
from statsmodels.tsa.statespace.sarimax import SARIMAX
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
|
| 24 |
mongo_uri = "mongodb+srv://Agripredict:TjXSvMhOis49qH8E@cluster0.gek7n.mongodb.net/?retryWrites=true&w=majority&appName=Cluster0"
|
| 25 |
if not mongo_uri:
|
| 26 |
st.error("MongoDB URI is not set!")
|
| 27 |
st.stop()
|
| 28 |
else:
|
| 29 |
-
# Connect to MongoDB with SSL certificate validation
|
| 30 |
client = MongoClient(mongo_uri, tlsCAFile=certifi.where())
|
| 31 |
db = client["AgriPredict"]
|
| 32 |
collection = db["WhiteSesame"]
|
|
@@ -291,39 +299,27 @@ def create_forecasting_features_3m(df):
|
|
| 291 |
|
| 292 |
|
| 293 |
def preprocess_data(df):
|
| 294 |
-
# Retain only 'Reported Date' and 'Modal Price (Rs./Quintal)' columns
|
| 295 |
df = df[['Reported Date', 'Modal Price (Rs./Quintal)']]
|
| 296 |
-
|
| 297 |
-
# Ensure 'Reported Date' is in datetime format
|
| 298 |
df['Reported Date'] = pd.to_datetime(df['Reported Date'])
|
| 299 |
-
|
| 300 |
-
# Group by 'Reported Date' and calculate mean of 'Modal Price (Rs./Quintal)'
|
| 301 |
df = df.groupby('Reported Date', as_index=False).mean()
|
| 302 |
-
|
| 303 |
-
# Generate a full date range from the minimum to the maximum date
|
| 304 |
full_date_range = pd.date_range(df['Reported Date'].min(), df['Reported Date'].max())
|
| 305 |
df = df.set_index('Reported Date').reindex(full_date_range).rename_axis('Reported Date').reset_index()
|
| 306 |
|
| 307 |
-
# Detect and remove outliers for every 30 days
|
| 308 |
df['Modal Price (Rs./Quintal)'] = (
|
| 309 |
df['Modal Price (Rs./Quintal)'].fillna(method='ffill').fillna(method='bfill')
|
| 310 |
)
|
| 311 |
return df
|
| 312 |
|
|
|
|
| 313 |
def train_and_evaluate(df):
|
| 314 |
import streamlit as st
|
| 315 |
-
|
| 316 |
-
# Add progress bar for hyperparameter tuning
|
| 317 |
progress_bar = st.progress(0)
|
| 318 |
-
|
| 319 |
-
# Helper function to update progress during hyperparameter tuning
|
| 320 |
def update_tuning_progress(current, total):
|
| 321 |
progress = int((current / total) * 100)
|
| 322 |
progress_bar.progress(progress)
|
| 323 |
|
| 324 |
df = create_forecasting_features(df)
|
| 325 |
|
| 326 |
-
# Split the data into training and testing sets
|
| 327 |
train_df = df[df['Reported Date'] < '2024-01-01']
|
| 328 |
test_df = df[df['Reported Date'] >= '2024-01-01']
|
| 329 |
|
|
@@ -331,8 +327,6 @@ def train_and_evaluate(df):
|
|
| 331 |
y_train = train_df['Modal Price (Rs./Quintal)']
|
| 332 |
X_test = test_df.drop(columns=['Modal Price (Rs./Quintal)', 'Reported Date'])
|
| 333 |
y_test = test_df['Modal Price (Rs./Quintal)']
|
| 334 |
-
|
| 335 |
-
# Hyperparameter tuning
|
| 336 |
st.write("Performing hyperparameter tuning...")
|
| 337 |
param_grid = {
|
| 338 |
'learning_rate': [0.01, 0.1, 0.2],
|
|
@@ -345,7 +339,7 @@ def train_and_evaluate(df):
|
|
| 345 |
param_combinations = len(param_grid['learning_rate']) * len(param_grid['max_depth']) * \
|
| 346 |
len(param_grid['n_estimators']) * len(param_grid['booster'])
|
| 347 |
|
| 348 |
-
current_combination = 0
|
| 349 |
|
| 350 |
def custom_grid_search():
|
| 351 |
nonlocal current_combination
|
|
@@ -371,26 +365,22 @@ def train_and_evaluate(df):
|
|
| 371 |
'n_estimators': n_estimators,
|
| 372 |
'booster': booster
|
| 373 |
}
|
| 374 |
-
# Update progress bar
|
| 375 |
current_combination += 1
|
| 376 |
update_tuning_progress(current_combination, param_combinations)
|
| 377 |
return best_params
|
| 378 |
|
| 379 |
best_params = custom_grid_search()
|
| 380 |
-
|
| 381 |
-
# Train the best model with the identified parameters
|
| 382 |
st.write("Training the best model and making predictions...")
|
| 383 |
best_model = XGBRegressor(**best_params)
|
| 384 |
best_model.fit(X_train, y_train)
|
| 385 |
y_pred = best_model.predict(X_test)
|
| 386 |
|
| 387 |
-
# Metrics
|
| 388 |
rmse = mean_squared_error(y_test, y_pred, squared=False)
|
| 389 |
mae = mean_absolute_error(y_test, y_pred)
|
| 390 |
st.write(f"RMSE: {rmse}")
|
| 391 |
st.write(f"MAE: {mae}")
|
| 392 |
-
|
| 393 |
-
# Prepare data for plotting
|
| 394 |
train_plot_df = train_df[['Reported Date', 'Modal Price (Rs./Quintal)']].copy()
|
| 395 |
train_plot_df['Type'] = 'Train'
|
| 396 |
|
|
@@ -425,7 +415,6 @@ def train_and_evaluate(df):
|
|
| 425 |
|
| 426 |
st.plotly_chart(fig, use_container_width=True)
|
| 427 |
|
| 428 |
-
# Return best parameters
|
| 429 |
return best_params
|
| 430 |
|
| 431 |
def train_and_evaluate_1m(df):
|
|
@@ -435,19 +424,16 @@ def train_and_evaluate_1m(df):
|
|
| 435 |
from xgboost import XGBRegressor
|
| 436 |
from sklearn.metrics import mean_squared_error, mean_absolute_error
|
| 437 |
|
| 438 |
-
# Add progress bar for hyperparameter tuning
|
| 439 |
progress_bar = st.progress(0)
|
| 440 |
|
| 441 |
-
# Helper function to update progress during hyperparameter tuning
|
| 442 |
def update_tuning_progress(current, total):
|
| 443 |
progress = int((current / total) * 100)
|
| 444 |
progress_bar.progress(progress)
|
| 445 |
|
| 446 |
df = create_forecasting_features_1m(df)
|
| 447 |
-
|
| 448 |
-
# Define train-test split for a 1-month horizon
|
| 449 |
split_date = pd.to_datetime("2024-01-01")
|
| 450 |
-
test_horizon = pd.DateOffset(days=30)
|
| 451 |
|
| 452 |
train_df = df[df['Reported Date'] < split_date]
|
| 453 |
test_df = df[(df['Reported Date'] >= split_date) & (df['Reported Date'] < split_date + test_horizon)]
|
|
@@ -457,7 +443,6 @@ def train_and_evaluate_1m(df):
|
|
| 457 |
X_test = test_df.drop(columns=['Modal Price (Rs./Quintal)', 'Reported Date'])
|
| 458 |
y_test = test_df['Modal Price (Rs./Quintal)']
|
| 459 |
|
| 460 |
-
# Hyperparameter tuning
|
| 461 |
st.write("Performing hyperparameter tuning...")
|
| 462 |
param_grid = {
|
| 463 |
'learning_rate': [0.01, 0.1, 0.2],
|
|
@@ -470,7 +455,7 @@ def train_and_evaluate_1m(df):
|
|
| 470 |
param_combinations = len(param_grid['learning_rate']) * len(param_grid['max_depth']) * \
|
| 471 |
len(param_grid['n_estimators']) * len(param_grid['booster'])
|
| 472 |
|
| 473 |
-
current_combination = 0
|
| 474 |
|
| 475 |
def custom_grid_search():
|
| 476 |
nonlocal current_combination
|
|
@@ -496,26 +481,21 @@ def train_and_evaluate_1m(df):
|
|
| 496 |
'n_estimators': n_estimators,
|
| 497 |
'booster': booster
|
| 498 |
}
|
| 499 |
-
# Update progress bar
|
| 500 |
current_combination += 1
|
| 501 |
update_tuning_progress(current_combination, param_combinations)
|
| 502 |
return best_params
|
| 503 |
|
| 504 |
best_params = custom_grid_search()
|
| 505 |
-
|
| 506 |
-
# Train the best model with the identified parameters
|
| 507 |
st.write("Training the best model and making predictions...")
|
| 508 |
best_model = XGBRegressor(**best_params)
|
| 509 |
best_model.fit(X_train, y_train)
|
| 510 |
y_pred = best_model.predict(X_test)
|
| 511 |
|
| 512 |
-
# Metrics
|
| 513 |
rmse = mean_squared_error(y_test, y_pred, squared=False)
|
| 514 |
mae = mean_absolute_error(y_test, y_pred)
|
| 515 |
st.write(f"RMSE: {rmse}")
|
| 516 |
st.write(f"MAE: {mae}")
|
| 517 |
|
| 518 |
-
# Prepare data for plotting
|
| 519 |
train_plot_df = train_df[['Reported Date', 'Modal Price (Rs./Quintal)']].copy()
|
| 520 |
train_plot_df['Type'] = 'Train'
|
| 521 |
|
|
@@ -550,16 +530,11 @@ def train_and_evaluate_1m(df):
|
|
| 550 |
|
| 551 |
st.plotly_chart(fig, use_container_width=True)
|
| 552 |
|
| 553 |
-
# Return best parameters
|
| 554 |
return best_params
|
| 555 |
|
| 556 |
def train_and_evaluate_3m(df):
|
| 557 |
import streamlit as st
|
| 558 |
-
|
| 559 |
-
# Add progress bar for hyperparameter tuning
|
| 560 |
progress_bar = st.progress(0)
|
| 561 |
-
|
| 562 |
-
# Helper function to update progress during hyperparameter tuning
|
| 563 |
def update_tuning_progress(current, total):
|
| 564 |
progress = int((current / total) * 100)
|
| 565 |
progress_bar.progress(progress)
|
|
@@ -573,7 +548,6 @@ def train_and_evaluate_3m(df):
|
|
| 573 |
X_test = test_df.drop(columns=['Modal Price (Rs./Quintal)', 'Reported Date'])
|
| 574 |
y_test = test_df['Modal Price (Rs./Quintal)']
|
| 575 |
|
| 576 |
-
# Hyperparameter tuning
|
| 577 |
st.write("Performing hyperparameter tuning...")
|
| 578 |
param_grid = {
|
| 579 |
'learning_rate': [0.01, 0.1, 0.2],
|
|
@@ -586,7 +560,7 @@ def train_and_evaluate_3m(df):
|
|
| 586 |
param_combinations = len(param_grid['learning_rate']) * len(param_grid['max_depth']) * \
|
| 587 |
len(param_grid['n_estimators']) * len(param_grid['booster'])
|
| 588 |
|
| 589 |
-
current_combination = 0
|
| 590 |
|
| 591 |
def custom_grid_search():
|
| 592 |
nonlocal current_combination
|
|
@@ -612,26 +586,21 @@ def train_and_evaluate_3m(df):
|
|
| 612 |
'n_estimators': n_estimators,
|
| 613 |
'booster': booster
|
| 614 |
}
|
| 615 |
-
# Update progress bar
|
| 616 |
current_combination += 1
|
| 617 |
update_tuning_progress(current_combination, param_combinations)
|
| 618 |
return best_params
|
| 619 |
|
| 620 |
best_params = custom_grid_search()
|
| 621 |
-
|
| 622 |
-
# Train the best model with the identified parameters
|
| 623 |
st.write("Training the best model and making predictions...")
|
| 624 |
best_model = XGBRegressor(**best_params)
|
| 625 |
best_model.fit(X_train, y_train)
|
| 626 |
y_pred = best_model.predict(X_test)
|
| 627 |
|
| 628 |
-
# Metrics
|
| 629 |
rmse = mean_squared_error(y_test, y_pred, squared=False)
|
| 630 |
mae = mean_absolute_error(y_test, y_pred)
|
| 631 |
st.write(f"RMSE: {rmse}")
|
| 632 |
st.write(f"MAE: {mae}")
|
| 633 |
|
| 634 |
-
# Prepare data for plotting
|
| 635 |
train_plot_df = train_df[['Reported Date', 'Modal Price (Rs./Quintal)']].copy()
|
| 636 |
train_plot_df['Type'] = 'Train'
|
| 637 |
|
|
@@ -666,15 +635,12 @@ def train_and_evaluate_3m(df):
|
|
| 666 |
|
| 667 |
st.plotly_chart(fig, use_container_width=True)
|
| 668 |
|
| 669 |
-
# Return best parameters
|
| 670 |
return best_params
|
| 671 |
|
| 672 |
def forecast_next_14_days(df, _best_params, key):
|
| 673 |
last_date = df['Reported Date'].max()
|
| 674 |
future_dates = pd.date_range(start=last_date + pd.Timedelta(days=1), periods=14)
|
| 675 |
future_df = pd.DataFrame({'Reported Date': future_dates})
|
| 676 |
-
|
| 677 |
-
# Assuming 'create_forecasting_features' function is defined elsewhere
|
| 678 |
full_df = pd.concat([df, future_df], ignore_index=True)
|
| 679 |
full_df = create_forecasting_features(full_df)
|
| 680 |
|
|
@@ -690,8 +656,6 @@ def forecast_next_14_days(df, _best_params, key):
|
|
| 690 |
|
| 691 |
future_predictions = model.predict(X_future)
|
| 692 |
future_df['Modal Price (Rs./Quintal)'] = future_predictions
|
| 693 |
-
|
| 694 |
-
# Pass model to plot_data
|
| 695 |
plot_data(original_df, future_df, last_date, 14)
|
| 696 |
download_button(future_df, key)
|
| 697 |
|
|
@@ -699,8 +663,6 @@ def forecast_next_30_days(df, _best_params, key):
|
|
| 699 |
last_date = df['Reported Date'].max()
|
| 700 |
future_dates = pd.date_range(start=last_date + pd.Timedelta(days=1), periods=30)
|
| 701 |
future_df = pd.DataFrame({'Reported Date': future_dates})
|
| 702 |
-
|
| 703 |
-
# Assuming 'create_forecasting_features' function is defined elsewhere
|
| 704 |
full_df = pd.concat([df, future_df], ignore_index=True)
|
| 705 |
full_df = create_forecasting_features_1m(full_df)
|
| 706 |
|
|
@@ -716,8 +678,6 @@ def forecast_next_30_days(df, _best_params, key):
|
|
| 716 |
|
| 717 |
future_predictions = model.predict(X_future)
|
| 718 |
future_df['Modal Price (Rs./Quintal)'] = future_predictions
|
| 719 |
-
|
| 720 |
-
# Pass model to plot_data
|
| 721 |
plot_data(original_df, future_df, last_date, 30)
|
| 722 |
download_button(future_df, key)
|
| 723 |
|
|
@@ -725,8 +685,6 @@ def forecast_next_90_days(df, _best_params, key):
|
|
| 725 |
last_date = df['Reported Date'].max()
|
| 726 |
future_dates = pd.date_range(start=last_date + pd.Timedelta(days=1), periods=90)
|
| 727 |
future_df = pd.DataFrame({'Reported Date': future_dates})
|
| 728 |
-
|
| 729 |
-
# Assuming 'create_forecasting_features' function is defined elsewhere
|
| 730 |
full_df = pd.concat([df, future_df], ignore_index=True)
|
| 731 |
full_df = create_forecasting_features_3m(full_df)
|
| 732 |
|
|
@@ -742,29 +700,19 @@ def forecast_next_90_days(df, _best_params, key):
|
|
| 742 |
|
| 743 |
future_predictions = model.predict(X_future)
|
| 744 |
future_df['Modal Price (Rs./Quintal)'] = future_predictions
|
| 745 |
-
|
| 746 |
-
# Pass model to plot_data
|
| 747 |
plot_data(original_df, future_df, last_date, 90)
|
| 748 |
download_button(future_df, key)
|
| 749 |
|
| 750 |
def plot_data(original_df, future_df, last_date, days):
|
| 751 |
-
# Filter original_df for the period you want to plot.
|
| 752 |
actual_df = original_df[original_df['Reported Date'] >= (last_date - pd.Timedelta(days=days))].copy()
|
| 753 |
actual_df['Type'] = 'Actual'
|
| 754 |
-
|
| 755 |
-
# Prepare the future_df (predicted data) and mark it as forecasted.
|
| 756 |
future_plot_df = future_df[['Reported Date', 'Modal Price (Rs./Quintal)']].copy()
|
| 757 |
future_plot_df['Type'] = 'Forecasted'
|
| 758 |
-
|
| 759 |
-
# Get the last actual data point from actual_df.
|
| 760 |
-
# Ensure the DataFrame is sorted by date.
|
| 761 |
last_actual_point = actual_df.sort_values('Reported Date').iloc[[-1]].copy()
|
| 762 |
future_plot_df = pd.concat([last_actual_point, future_plot_df])
|
| 763 |
-
|
| 764 |
-
# Combine both actual and forecasted data for plotting.
|
| 765 |
plot_df = pd.concat([actual_df, future_plot_df])
|
| 766 |
-
|
| 767 |
-
# Create the plot.
|
| 768 |
fig = go.Figure()
|
| 769 |
for plot_type, color, dash in [('Actual', 'blue', 'solid'), ('Forecasted', 'red', 'dash')]:
|
| 770 |
data = plot_df[plot_df['Type'] == plot_type]
|
|
@@ -787,18 +735,11 @@ def plot_data(original_df, future_df, last_date, days):
|
|
| 787 |
|
| 788 |
|
| 789 |
def download_button(future_df, key):
|
| 790 |
-
# Create a new DataFrame with only 'Reported Date' and 'Modal Price (Rs./Quintal)'
|
| 791 |
download_df = future_df[['Reported Date', 'Modal Price (Rs./Quintal)']].copy()
|
| 792 |
-
|
| 793 |
-
# Format 'Reported Date' to display only the date in YYYY-MM-DD format
|
| 794 |
download_df['Reported Date'] = download_df['Reported Date'].dt.strftime('%Y-%m-%d')
|
| 795 |
-
|
| 796 |
-
# Write to Excel without the index
|
| 797 |
towrite = io.BytesIO()
|
| 798 |
-
download_df.to_excel(towrite, index=False, engine='xlsxwriter')
|
| 799 |
towrite.seek(0)
|
| 800 |
-
|
| 801 |
-
# Create a download button for the Excel file
|
| 802 |
st.download_button(label="Download Forecasted Values",
|
| 803 |
data=towrite,
|
| 804 |
file_name=f'forecasted_prices_{key}.xlsx',
|
|
@@ -831,15 +772,13 @@ def save_best_params(collection, filter_key, best_params):
|
|
| 831 |
collection.replace_one({"filter_key": filter_key}, best_params)
|
| 832 |
else:
|
| 833 |
collection.insert_one(best_params)
|
| 834 |
-
|
| 835 |
-
# Function to retrieve best_params from MongoDB
|
| 836 |
def get_best_params(filter_key, collection):
|
| 837 |
record = collection.find_one({"filter_key": filter_key})
|
| 838 |
return record
|
| 839 |
-
|
| 840 |
def train_and_forecast(df, filter_key, days):
|
| 841 |
if df is not None:
|
| 842 |
-
# Train the model and save parameters to MongoDB
|
| 843 |
if days==14:
|
| 844 |
best_params = train_and_evaluate(df)
|
| 845 |
save_best_params(filter_key, best_params, best_params_collection)
|
|
@@ -852,6 +791,8 @@ def train_and_forecast(df, filter_key, days):
|
|
| 852 |
best_params = train_and_evaluate_3m(df)
|
| 853 |
save_best_params(filter_key, best_params, best_params_collection_3m)
|
| 854 |
forecast_next_90_days(df, best_params, filter_key)
|
|
|
|
|
|
|
| 855 |
|
| 856 |
def forecast(df, filter_key, days):
|
| 857 |
if days==14:
|
|
@@ -887,13 +828,9 @@ def collection_to_dataframe(collection, drop_id=True):
|
|
| 887 |
Returns:
|
| 888 |
pd.DataFrame: DataFrame containing the collection data.
|
| 889 |
"""
|
| 890 |
-
# Fetch all documents from the collection
|
| 891 |
documents = list(collection.find())
|
| 892 |
|
| 893 |
-
# Convert to a pandas DataFrame
|
| 894 |
df = pd.DataFrame(documents)
|
| 895 |
-
|
| 896 |
-
# Drop the MongoDB "_id" column if specified
|
| 897 |
if drop_id and '_id' in df.columns:
|
| 898 |
df = df.drop(columns=['_id'])
|
| 899 |
|
|
@@ -903,19 +840,12 @@ def collection_to_dataframe(collection, drop_id=True):
|
|
| 903 |
|
| 904 |
def editable_spreadsheet():
|
| 905 |
st.title("Sowing Report Prediction Model")
|
| 906 |
-
|
| 907 |
-
# Excel file uploader
|
| 908 |
uploaded_file = st.file_uploader("Upload your Excel file", type=['xlsx'])
|
| 909 |
-
|
| 910 |
-
# Check if an Excel file is uploaded
|
| 911 |
if uploaded_file is not None:
|
| 912 |
-
# Read the Excel file
|
| 913 |
df_excel = pd.read_excel(uploaded_file)
|
| 914 |
-
|
| 915 |
-
# Display the DataFrame from the Excel file
|
| 916 |
st.write("Excel data loaded:", df_excel)
|
| 917 |
|
| 918 |
-
# Form for inputting filtering options and area for calculation
|
| 919 |
with st.form("input_form"):
|
| 920 |
input_region = st.text_input("Enter Region to Filter By", placeholder="Region Name")
|
| 921 |
input_season = st.text_input("Enter Season to Filter By", placeholder="Season (e.g., Winter)")
|
|
@@ -924,7 +854,6 @@ def editable_spreadsheet():
|
|
| 924 |
|
| 925 |
if submit_button:
|
| 926 |
if input_region and input_season and input_area > 0:
|
| 927 |
-
# Filter data by the region and season specified
|
| 928 |
filtered_df = df_excel[
|
| 929 |
(df_excel['Region'].str.lower() == input_region.lower()) &
|
| 930 |
(df_excel['Season'].str.lower() == input_season.lower())
|
|
@@ -975,8 +904,6 @@ def display_statistics(df):
|
|
| 975 |
}
|
| 976 |
</style>
|
| 977 |
""", unsafe_allow_html=True)
|
| 978 |
-
|
| 979 |
-
# Ensure 'Reported Date' is in datetime format
|
| 980 |
df['Reported Date'] = pd.to_datetime(df['Reported Date'])
|
| 981 |
national_data = df.groupby('Reported Date').agg({
|
| 982 |
'Modal Price (Rs./Quintal)': 'mean',
|
|
@@ -986,12 +913,13 @@ def display_statistics(df):
|
|
| 986 |
st.subheader("🗓️ Key Statistics")
|
| 987 |
latest_date = national_data['Reported Date'].max()
|
| 988 |
latest_price = national_data[national_data['Reported Date'] == latest_date]['Modal Price (Rs./Quintal)'].mean()
|
|
|
|
| 989 |
latest_arrivals = national_data[national_data['Reported Date'] == latest_date]['Arrivals (Tonnes)'].sum()
|
| 990 |
|
| 991 |
st.markdown("<p class='highlight'>This section provides the most recent statistics for the market. It includes the latest available date, the average price of commodities, and the total quantity of goods arriving at the market. These metrics offer an up-to-date snapshot of market conditions.</p>", unsafe_allow_html=True)
|
| 992 |
st.write(f"**Latest Date**: {latest_date.strftime('%Y-%m-%d')}")
|
| 993 |
st.write(f"**Latest Modal Price**: {latest_price:.2f} Rs./Quintal")
|
| 994 |
-
st.write(f"**Latest Arrivals**: {latest_arrivals:.2f} Tonnes")
|
| 995 |
|
| 996 |
st.subheader("📆 This Day in Previous Years")
|
| 997 |
st.markdown("<p class='highlight'>This table shows the modal price and total arrivals for this exact day across previous years. It provides a historical perspective to compare against current market conditions. This section examines historical data for the same day in previous years. By analyzing trends for this specific day, you can identify seasonal patterns, supply-demand changes, or any deviations that might warrant closer attention.</p>", unsafe_allow_html=True)
|
|
@@ -1060,166 +988,259 @@ def display_statistics(df):
|
|
| 1060 |
editable_spreadsheet()
|
| 1061 |
|
| 1062 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1063 |
|
| 1064 |
def fetch_and_store_data():
|
|
|
|
| 1065 |
latest_doc = collection.find_one(sort=[("Reported Date", -1)])
|
| 1066 |
-
|
| 1067 |
-
|
| 1068 |
-
|
| 1069 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1070 |
|
| 1071 |
-
|
| 1072 |
-
|
| 1073 |
-
|
| 1074 |
-
|
| 1075 |
-
from_date = "01 Jan 2000"
|
| 1076 |
-
|
| 1077 |
-
to_date = (datetime.now() - timedelta(days=1)).strftime('%d %b %Y')
|
| 1078 |
-
from_date_obj = datetime.strptime(from_date, '%d %b %Y')
|
| 1079 |
-
to_date_obj = datetime.strptime(to_date, '%d %b %Y')
|
| 1080 |
-
if to_date_obj < from_date_obj:
|
| 1081 |
-
print("Data already scraped")
|
| 1082 |
-
return None
|
| 1083 |
-
# Build the URL to be requested
|
| 1084 |
-
base_url = "https://agmarknet.gov.in/SearchCmmMkt.aspx"
|
| 1085 |
-
params = {
|
| 1086 |
-
"Tx_Commodity": "11",
|
| 1087 |
-
"Tx_State": "0",
|
| 1088 |
-
"Tx_District": "0",
|
| 1089 |
-
"Tx_Market": "0",
|
| 1090 |
-
"DateFrom": from_date,
|
| 1091 |
-
"DateTo": to_date,
|
| 1092 |
-
"Fr_Date": from_date,
|
| 1093 |
-
"To_Date": to_date,
|
| 1094 |
-
"Tx_Trend": "2",
|
| 1095 |
-
"Tx_CommodityHead": "Sesamum(Sesame,Gingelly,Til)",
|
| 1096 |
-
"Tx_StateHead": "--Select--",
|
| 1097 |
-
"Tx_DistrictHead": "--Select--",
|
| 1098 |
-
"Tx_MarketHead": "--Select--"
|
| 1099 |
-
}
|
| 1100 |
-
|
| 1101 |
-
full_url = f"{base_url}?{'&'.join(f'{k}={v}' for k, v in params.items())}"
|
| 1102 |
-
api_url = "https://api.scraperapi.com"
|
| 1103 |
-
api_key = "bbbbde6b56c0fde1e2a61c914eb22d14"
|
| 1104 |
-
scraperapi_params = {
|
| 1105 |
-
'api_key': api_key,
|
| 1106 |
-
'url': full_url
|
| 1107 |
-
}
|
| 1108 |
|
| 1109 |
-
|
| 1110 |
-
|
| 1111 |
-
|
| 1112 |
-
|
| 1113 |
-
|
| 1114 |
-
|
| 1115 |
-
|
| 1116 |
-
|
| 1117 |
-
|
| 1118 |
-
|
| 1119 |
-
|
| 1120 |
-
|
| 1121 |
-
|
| 1122 |
-
|
| 1123 |
-
|
| 1124 |
-
|
| 1125 |
-
|
| 1126 |
-
|
| 1127 |
-
|
| 1128 |
-
|
| 1129 |
-
|
| 1130 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1131 |
if records:
|
| 1132 |
collection.insert_many(records)
|
| 1133 |
-
print(f"Inserted {len(records)}
|
| 1134 |
else:
|
| 1135 |
-
print("No
|
| 1136 |
|
| 1137 |
-
|
|
|
|
| 1138 |
|
| 1139 |
-
|
| 1140 |
-
print(f"Failed to fetch data with status code: {response.status_code}")
|
| 1141 |
-
return None
|
| 1142 |
|
| 1143 |
-
|
| 1144 |
def fetch_and_store_data_market():
|
|
|
|
| 1145 |
latest_doc = market_price_data.find_one(sort=[("Reported Date", -1)])
|
| 1146 |
-
|
| 1147 |
-
|
| 1148 |
-
|
| 1149 |
-
|
| 1150 |
-
|
| 1151 |
-
|
| 1152 |
-
|
| 1153 |
-
|
| 1154 |
-
|
| 1155 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1156 |
|
| 1157 |
-
|
| 1158 |
-
|
| 1159 |
-
|
| 1160 |
-
|
| 1161 |
-
if to_date_obj <= from_date_obj:
|
| 1162 |
-
st.write("Data already scraped")
|
| 1163 |
-
return None
|
| 1164 |
-
base_url = "https://agmarknet.gov.in/SearchCmmMkt.aspx"
|
| 1165 |
-
params = {
|
| 1166 |
-
"Tx_Commodity": "11",
|
| 1167 |
-
"Tx_State": "0",
|
| 1168 |
-
"Tx_District": "0",
|
| 1169 |
-
"Tx_Market": "0",
|
| 1170 |
-
"DateFrom": from_date,
|
| 1171 |
-
"DateTo": to_date,
|
| 1172 |
-
"Fr_Date": from_date,
|
| 1173 |
-
"To_Date": to_date,
|
| 1174 |
-
"Tx_Trend": "0",
|
| 1175 |
-
"Tx_CommodityHead": "Sesamum(Sesame,Gingelly,Til)",
|
| 1176 |
-
"Tx_StateHead": "--Select--",
|
| 1177 |
-
"Tx_DistrictHead": "--Select--",
|
| 1178 |
-
"Tx_MarketHead": "--Select--"
|
| 1179 |
}
|
| 1180 |
-
|
| 1181 |
-
full_url = f"{base_url}?{'&'.join(f'{k}={v}' for k, v in params.items())}"
|
| 1182 |
-
api_url = "https://api.scraperapi.com"
|
| 1183 |
-
api_key = "8842750a88db7513a1d19325745437cc"
|
| 1184 |
-
scraperapi_params = {
|
| 1185 |
-
'api_key': api_key,
|
| 1186 |
-
'url': full_url
|
| 1187 |
-
}
|
| 1188 |
|
| 1189 |
-
|
| 1190 |
-
|
| 1191 |
-
|
| 1192 |
-
|
| 1193 |
-
|
| 1194 |
-
|
| 1195 |
-
|
| 1196 |
-
|
| 1197 |
-
|
| 1198 |
-
|
| 1199 |
-
|
| 1200 |
-
|
| 1201 |
-
|
| 1202 |
-
|
| 1203 |
-
|
| 1204 |
-
|
| 1205 |
-
|
| 1206 |
-
|
| 1207 |
-
|
| 1208 |
-
|
| 1209 |
-
|
| 1210 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1211 |
if records:
|
| 1212 |
market_price_data.insert_many(records)
|
| 1213 |
-
print(f"Inserted {len(records)}
|
| 1214 |
else:
|
| 1215 |
-
print("No
|
| 1216 |
-
return df
|
| 1217 |
-
else:
|
| 1218 |
-
st.write("No table found")
|
| 1219 |
|
| 1220 |
-
|
| 1221 |
-
|
| 1222 |
-
|
|
|
|
| 1223 |
|
| 1224 |
|
| 1225 |
|
|
@@ -1312,208 +1333,165 @@ st.markdown("""
|
|
| 1312 |
if 'authenticated' not in st.session_state:
|
| 1313 |
st.session_state.authenticated = False
|
| 1314 |
|
| 1315 |
-
if st.session_state.authenticated:
|
| 1316 |
st.title("🌾 AgriPredict Dashboard")
|
|
|
|
| 1317 |
if st.button("Get Live Data Feed"):
|
|
|
|
| 1318 |
fetch_and_store_data()
|
| 1319 |
fetch_and_store_data_market()
|
|
|
|
| 1320 |
view_mode = st.radio("", ["Statistics", "Plots", "Predictions", "Exim"], horizontal=True)
|
| 1321 |
|
| 1322 |
if view_mode == "Plots":
|
| 1323 |
st.sidebar.header("Filters")
|
|
|
|
| 1324 |
selected_period = st.sidebar.selectbox(
|
| 1325 |
"Select Time Period",
|
| 1326 |
-
["2 Weeks", "1 Month", "
|
| 1327 |
index=1
|
| 1328 |
)
|
| 1329 |
period_mapping = {
|
| 1330 |
"2 Weeks": 14,
|
| 1331 |
"1 Month": 30,
|
| 1332 |
-
"2 Months": 60,
|
| 1333 |
"3 Months": 90,
|
| 1334 |
-
"6 Months": 180,
|
| 1335 |
"1 Year": 365,
|
| 1336 |
"2 Years": 730,
|
| 1337 |
"5 Years": 1825
|
| 1338 |
}
|
| 1339 |
-
st.session_state
|
| 1340 |
-
|
| 1341 |
-
# Add 'India' option to the list of states
|
| 1342 |
state_options = list(state_market_dict.keys()) + ['India']
|
| 1343 |
-
selected_state = st.sidebar.selectbox("Select", state_options)
|
| 1344 |
-
|
| 1345 |
market_wise = False
|
|
|
|
|
|
|
| 1346 |
if selected_state != 'India':
|
| 1347 |
market_wise = st.sidebar.checkbox("Market Wise Analysis")
|
| 1348 |
if market_wise:
|
| 1349 |
markets = state_market_dict.get(selected_state, [])
|
|
|
|
| 1350 |
selected_market = st.sidebar.selectbox("Select Market", markets)
|
| 1351 |
query_filter = {"Market Name": selected_market}
|
| 1352 |
else:
|
| 1353 |
-
query_filter = {"
|
| 1354 |
else:
|
| 1355 |
-
query_filter = {
|
| 1356 |
-
|
| 1357 |
-
# Dropdown for data type
|
| 1358 |
-
data_type = st.sidebar.radio(
|
| 1359 |
-
"Select Data Type",
|
| 1360 |
-
["Price", "Volume", "Both"]
|
| 1361 |
-
)
|
| 1362 |
-
|
| 1363 |
-
# Add date filtering based on selected period
|
| 1364 |
query_filter["Reported Date"] = {
|
| 1365 |
-
"$gte": datetime.now() - timedelta(days=st.session_state
|
| 1366 |
}
|
| 1367 |
-
|
| 1368 |
-
|
|
|
|
|
|
|
|
|
|
| 1369 |
if st.sidebar.button("✨ Let's go!"):
|
| 1370 |
try:
|
| 1371 |
-
df_market_grouped =
|
|
|
|
|
|
|
|
|
|
| 1372 |
if "Market Name" in query_filter:
|
|
|
|
| 1373 |
market_cursor = market_price_data.find(query_filter)
|
| 1374 |
market_data = list(market_cursor)
|
| 1375 |
-
|
| 1376 |
-
|
| 1377 |
-
|
| 1378 |
-
|
| 1379 |
-
|
| 1380 |
-
|
| 1381 |
-
|
| 1382 |
-
|
| 1383 |
-
|
| 1384 |
-
|
| 1385 |
-
|
| 1386 |
-
|
|
|
|
|
|
|
|
|
|
| 1387 |
cursor = collection.find(query_filter)
|
| 1388 |
data = list(cursor)
|
| 1389 |
-
|
|
|
|
| 1390 |
if data:
|
| 1391 |
-
# Convert MongoDB data to a DataFrame
|
| 1392 |
df = pd.DataFrame(data)
|
| 1393 |
-
df['Reported Date'] = pd.to_datetime(df['Reported Date'])
|
| 1394 |
-
|
| 1395 |
-
|
| 1396 |
-
|
| 1397 |
-
|
| 1398 |
-
|
| 1399 |
-
|
| 1400 |
-
|
| 1401 |
-
|
| 1402 |
-
|
| 1403 |
-
df_grouped = df.groupby('Reported Date', as_index=False).agg({
|
| 1404 |
-
'Arrivals (Tonnes)': 'sum',
|
| 1405 |
-
'Modal Price (Rs./Quintal)': 'mean'
|
| 1406 |
-
})
|
| 1407 |
-
|
| 1408 |
-
# Create a complete date range
|
| 1409 |
-
date_range = pd.date_range(
|
| 1410 |
-
start=df_grouped['Reported Date'].min(),
|
| 1411 |
-
end=df_grouped['Reported Date'].max()
|
| 1412 |
-
)
|
| 1413 |
df_grouped = df_grouped.set_index('Reported Date').reindex(date_range).rename_axis('Reported Date').reset_index()
|
| 1414 |
-
|
| 1415 |
-
# Fill missing values
|
| 1416 |
df_grouped['Arrivals (Tonnes)'] = df_grouped['Arrivals (Tonnes)'].fillna(method='ffill').fillna(method='bfill')
|
| 1417 |
df_grouped['Modal Price (Rs./Quintal)'] = df_grouped['Modal Price (Rs./Quintal)'].fillna(method='ffill').fillna(method='bfill')
|
| 1418 |
-
|
| 1419 |
-
st.subheader(f"📈 Trends for {selected_state} ({'Market: ' + selected_market if market_wise else 'State'})")
|
| 1420 |
-
|
|
|
|
|
|
|
| 1421 |
if data_type == "Both":
|
| 1422 |
-
# Min-Max Scaling
|
| 1423 |
scaler = MinMaxScaler()
|
| 1424 |
df_grouped[['Scaled Price', 'Scaled Arrivals']] = scaler.fit_transform(
|
| 1425 |
df_grouped[['Modal Price (Rs./Quintal)', 'Arrivals (Tonnes)']]
|
| 1426 |
)
|
| 1427 |
-
|
| 1428 |
-
|
| 1429 |
-
|
| 1430 |
-
|
| 1431 |
-
|
| 1432 |
-
|
| 1433 |
-
|
| 1434 |
-
|
| 1435 |
-
|
| 1436 |
-
y=df_market_grouped['Scaled Price'],
|
| 1437 |
-
mode='lines',
|
| 1438 |
-
name='Scaled Price',
|
| 1439 |
-
line=dict(width=1, color='green'),
|
| 1440 |
-
text=df_market_grouped['Modal Price (Rs./Quintal)'],
|
| 1441 |
-
hovertemplate='Date: %{x}<br>Scaled Price: %{y:.2f}<br>Actual Price: %{text:.2f}<extra></extra>'
|
| 1442 |
-
))
|
| 1443 |
-
else:
|
| 1444 |
-
fig = go.Figure()
|
| 1445 |
-
|
| 1446 |
-
fig.add_trace(go.Scatter(
|
| 1447 |
-
x=df_grouped['Reported Date'],
|
| 1448 |
-
y=df_grouped['Scaled Price'],
|
| 1449 |
-
mode='lines',
|
| 1450 |
-
name='Scaled Price',
|
| 1451 |
-
line=dict(width=1, color='green'),
|
| 1452 |
-
text=df_grouped['Modal Price (Rs./Quintal)'],
|
| 1453 |
-
hovertemplate='Date: %{x}<br>Scaled Price: %{y:.2f}<br>Actual Price: %{text:.2f}<extra></extra>'
|
| 1454 |
-
))
|
| 1455 |
-
|
| 1456 |
fig.add_trace(go.Scatter(
|
| 1457 |
x=df_grouped['Reported Date'],
|
| 1458 |
y=df_grouped['Scaled Arrivals'],
|
| 1459 |
mode='lines',
|
| 1460 |
name='Scaled Arrivals',
|
| 1461 |
-
line=dict(
|
| 1462 |
-
text=df_grouped['Arrivals (Tonnes)'],
|
| 1463 |
-
hovertemplate='Date: %{x}<br>Scaled Arrivals: %{y:.2f}<br>Actual Arrivals: %{text:.2f}<extra></extra>'
|
| 1464 |
))
|
| 1465 |
-
|
| 1466 |
-
fig.update_layout(
|
| 1467 |
-
title="Price and Arrivals Trend",
|
| 1468 |
-
xaxis_title='Date',
|
| 1469 |
-
yaxis_title='Scaled Values',
|
| 1470 |
-
template='plotly_white'
|
| 1471 |
-
)
|
| 1472 |
-
st.plotly_chart(fig, use_container_width=True)
|
| 1473 |
-
|
| 1474 |
elif data_type == "Price":
|
| 1475 |
-
|
| 1476 |
-
|
| 1477 |
-
|
| 1478 |
-
|
| 1479 |
-
|
| 1480 |
-
|
| 1481 |
-
|
| 1482 |
-
|
| 1483 |
-
|
| 1484 |
-
))
|
| 1485 |
-
fig.update_layout(title="Modal Price Trend", xaxis_title='Date', yaxis_title='Price (/Quintall)', template='plotly_white')
|
| 1486 |
-
st.plotly_chart(fig, use_container_width=True)
|
| 1487 |
-
else:
|
| 1488 |
-
fig = go.Figure()
|
| 1489 |
-
fig.add_trace(go.Scatter(
|
| 1490 |
-
x=df_grouped['Reported Date'],
|
| 1491 |
-
y=df_grouped['Modal Price (Rs./Quintal)'],
|
| 1492 |
-
mode='lines',
|
| 1493 |
-
name='Modal Price',
|
| 1494 |
-
line=dict(width=1, color='green')
|
| 1495 |
-
))
|
| 1496 |
-
fig.update_layout(title="Modal Price Trend", xaxis_title='Date', yaxis_title='Price (/Quintall)', template='plotly_white')
|
| 1497 |
-
st.plotly_chart(fig, use_container_width=True)
|
| 1498 |
-
|
| 1499 |
elif data_type == "Volume":
|
| 1500 |
-
# Plot Arrivals (Tonnes)
|
| 1501 |
-
fig = go.Figure()
|
| 1502 |
fig.add_trace(go.Scatter(
|
| 1503 |
x=df_grouped['Reported Date'],
|
| 1504 |
y=df_grouped['Arrivals (Tonnes)'],
|
| 1505 |
mode='lines',
|
| 1506 |
name='Arrivals',
|
| 1507 |
-
line=dict(
|
| 1508 |
))
|
| 1509 |
-
|
| 1510 |
-
|
| 1511 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1512 |
else:
|
| 1513 |
-
st.warning("⚠️ No data found for the selected
|
| 1514 |
-
|
| 1515 |
except Exception as e:
|
| 1516 |
st.error(f"❌ Error fetching data 2: {e}")
|
|
|
|
|
|
|
| 1517 |
elif view_mode == "Predictions":
|
| 1518 |
st.subheader("📊 Model Analysis")
|
| 1519 |
sub_option = st.radio("Select one of the following", ["India", "States", "Market"], horizontal=True)
|
|
@@ -1521,10 +1499,10 @@ if st.session_state.authenticated:
|
|
| 1521 |
if sub_option == "States":
|
| 1522 |
states = ["Karnataka", "Madhya Pradesh", "Gujarat", "Uttar Pradesh", "Telangana"]
|
| 1523 |
selected_state = st.selectbox("Select State for Model Training", states)
|
| 1524 |
-
filter_key = f"state_{selected_state}"
|
| 1525 |
|
| 1526 |
if st.button("Forecast"):
|
| 1527 |
-
query_filter = {"
|
| 1528 |
df = fetch_and_process_data(query_filter, collection)
|
| 1529 |
if sub_timeline == "14 days":
|
| 1530 |
forecast(df, filter_key, 14)
|
|
@@ -1535,11 +1513,12 @@ if st.session_state.authenticated:
|
|
| 1535 |
elif sub_option == "Market":
|
| 1536 |
market_options = ["Rajkot", "Neemuch", "Kalburgi", "Warangal"]
|
| 1537 |
selected_market = st.selectbox("Select Market for Model Training", market_options)
|
| 1538 |
-
filter_key = f"market_{selected_market}"
|
| 1539 |
if st.button("Forecast"):
|
| 1540 |
query_filter = {"Market Name": selected_market}
|
| 1541 |
comparison_date = pd.to_datetime("18 Feb 2025")
|
| 1542 |
df = fetch_and_process_data(query_filter, market_price_data)
|
|
|
|
| 1543 |
if sub_timeline == "14 days":
|
| 1544 |
forecast(df, filter_key, 14)
|
| 1545 |
elif sub_timeline == "1 month":
|
|
@@ -1566,24 +1545,19 @@ if st.session_state.authenticated:
|
|
| 1566 |
display_statistics(df)
|
| 1567 |
elif view_mode == "Exim":
|
| 1568 |
df = collection_to_dataframe(impExp)
|
| 1569 |
-
|
| 1570 |
-
# Add radio buttons for user selection
|
| 1571 |
plot_option = st.radio(
|
| 1572 |
"Select the data to visualize:",
|
| 1573 |
["Import Price", "Import Quantity", "Export Price", "Export Quantity"],
|
| 1574 |
horizontal=True
|
| 1575 |
)
|
| 1576 |
-
|
| 1577 |
-
# Dropdown for time period selection
|
| 1578 |
time_period = st.selectbox(
|
| 1579 |
"Select time period:",
|
| 1580 |
["1 Month", "6 Months", "1 Year", "2 Years"]
|
| 1581 |
)
|
| 1582 |
|
| 1583 |
-
# Convert Reported Date to datetime
|
| 1584 |
df["Reported Date"] = pd.to_datetime(df["Reported Date"], format="%Y-%m-%d")
|
| 1585 |
-
|
| 1586 |
-
# Filter data based on the time period
|
| 1587 |
if time_period == "1 Month":
|
| 1588 |
start_date = pd.Timestamp.now() - pd.DateOffset(months=1)
|
| 1589 |
elif time_period == "6 Months":
|
|
@@ -1594,8 +1568,6 @@ if st.session_state.authenticated:
|
|
| 1594 |
start_date = pd.Timestamp.now() - pd.DateOffset(years=2)
|
| 1595 |
|
| 1596 |
filtered_df = df[df["Reported Date"] >= start_date]
|
| 1597 |
-
|
| 1598 |
-
# Process data based on the selected option
|
| 1599 |
if plot_option == "Import Price":
|
| 1600 |
grouped_df = (
|
| 1601 |
filtered_df.groupby("Reported Date", as_index=False)["VALUE_IMPORT"]
|
|
@@ -1624,12 +1596,11 @@ if st.session_state.authenticated:
|
|
| 1624 |
.rename(columns={"QUANTITY_IMPORT": "Total Export Quantity"})
|
| 1625 |
)
|
| 1626 |
y_axis_label = "Total Export Quantity (Tonnes)"
|
| 1627 |
-
|
| 1628 |
-
# Plot using Plotly
|
| 1629 |
fig = px.line(
|
| 1630 |
grouped_df,
|
| 1631 |
x="Reported Date",
|
| 1632 |
-
y=grouped_df.columns[1],
|
| 1633 |
title=f"{plot_option} Over Time",
|
| 1634 |
labels={"Reported Date": "Date", grouped_df.columns[1]: y_axis_label},
|
| 1635 |
)
|
|
@@ -1646,9 +1617,9 @@ else:
|
|
| 1646 |
|
| 1647 |
if login_button:
|
| 1648 |
if authenticate_user(username, password):
|
| 1649 |
-
st.session_state.authenticated = True
|
| 1650 |
-
st.session_state['username'] = username
|
| 1651 |
st.write("Login successful!")
|
| 1652 |
-
st.rerun()
|
| 1653 |
else:
|
| 1654 |
st.error("Invalid username or password")
|
|
|
|
| 20 |
from tqdm import tqdm
|
| 21 |
import io
|
| 22 |
from statsmodels.tsa.statespace.sarimax import SARIMAX
|
| 23 |
+
from datetime import datetime, timedelta
|
| 24 |
+
|
| 25 |
+
def generate_date_ranges(start_date: str, end_date: str):
|
| 26 |
+
current = datetime.strptime(start_date, "%d %b %Y")
|
| 27 |
+
end = datetime.strptime(end_date, "%d %b %Y")
|
| 28 |
+
while current <= end:
|
| 29 |
+
date_str = current.strftime("%d %b %Y")
|
| 30 |
+
yield (date_str, date_str)
|
| 31 |
+
current += timedelta(days=1)
|
| 32 |
|
| 33 |
mongo_uri = "mongodb+srv://Agripredict:TjXSvMhOis49qH8E@cluster0.gek7n.mongodb.net/?retryWrites=true&w=majority&appName=Cluster0"
|
| 34 |
if not mongo_uri:
|
| 35 |
st.error("MongoDB URI is not set!")
|
| 36 |
st.stop()
|
| 37 |
else:
|
|
|
|
| 38 |
client = MongoClient(mongo_uri, tlsCAFile=certifi.where())
|
| 39 |
db = client["AgriPredict"]
|
| 40 |
collection = db["WhiteSesame"]
|
|
|
|
| 299 |
|
| 300 |
|
| 301 |
def preprocess_data(df):
|
|
|
|
| 302 |
df = df[['Reported Date', 'Modal Price (Rs./Quintal)']]
|
|
|
|
|
|
|
| 303 |
df['Reported Date'] = pd.to_datetime(df['Reported Date'])
|
|
|
|
|
|
|
| 304 |
df = df.groupby('Reported Date', as_index=False).mean()
|
|
|
|
|
|
|
| 305 |
full_date_range = pd.date_range(df['Reported Date'].min(), df['Reported Date'].max())
|
| 306 |
df = df.set_index('Reported Date').reindex(full_date_range).rename_axis('Reported Date').reset_index()
|
| 307 |
|
|
|
|
| 308 |
df['Modal Price (Rs./Quintal)'] = (
|
| 309 |
df['Modal Price (Rs./Quintal)'].fillna(method='ffill').fillna(method='bfill')
|
| 310 |
)
|
| 311 |
return df
|
| 312 |
|
| 313 |
+
|
| 314 |
def train_and_evaluate(df):
|
| 315 |
import streamlit as st
|
|
|
|
|
|
|
| 316 |
progress_bar = st.progress(0)
|
|
|
|
|
|
|
| 317 |
def update_tuning_progress(current, total):
|
| 318 |
progress = int((current / total) * 100)
|
| 319 |
progress_bar.progress(progress)
|
| 320 |
|
| 321 |
df = create_forecasting_features(df)
|
| 322 |
|
|
|
|
| 323 |
train_df = df[df['Reported Date'] < '2024-01-01']
|
| 324 |
test_df = df[df['Reported Date'] >= '2024-01-01']
|
| 325 |
|
|
|
|
| 327 |
y_train = train_df['Modal Price (Rs./Quintal)']
|
| 328 |
X_test = test_df.drop(columns=['Modal Price (Rs./Quintal)', 'Reported Date'])
|
| 329 |
y_test = test_df['Modal Price (Rs./Quintal)']
|
|
|
|
|
|
|
| 330 |
st.write("Performing hyperparameter tuning...")
|
| 331 |
param_grid = {
|
| 332 |
'learning_rate': [0.01, 0.1, 0.2],
|
|
|
|
| 339 |
param_combinations = len(param_grid['learning_rate']) * len(param_grid['max_depth']) * \
|
| 340 |
len(param_grid['n_estimators']) * len(param_grid['booster'])
|
| 341 |
|
| 342 |
+
current_combination = 0
|
| 343 |
|
| 344 |
def custom_grid_search():
|
| 345 |
nonlocal current_combination
|
|
|
|
| 365 |
'n_estimators': n_estimators,
|
| 366 |
'booster': booster
|
| 367 |
}
|
|
|
|
| 368 |
current_combination += 1
|
| 369 |
update_tuning_progress(current_combination, param_combinations)
|
| 370 |
return best_params
|
| 371 |
|
| 372 |
best_params = custom_grid_search()
|
| 373 |
+
|
|
|
|
| 374 |
st.write("Training the best model and making predictions...")
|
| 375 |
best_model = XGBRegressor(**best_params)
|
| 376 |
best_model.fit(X_train, y_train)
|
| 377 |
y_pred = best_model.predict(X_test)
|
| 378 |
|
|
|
|
| 379 |
rmse = mean_squared_error(y_test, y_pred, squared=False)
|
| 380 |
mae = mean_absolute_error(y_test, y_pred)
|
| 381 |
st.write(f"RMSE: {rmse}")
|
| 382 |
st.write(f"MAE: {mae}")
|
| 383 |
+
|
|
|
|
| 384 |
train_plot_df = train_df[['Reported Date', 'Modal Price (Rs./Quintal)']].copy()
|
| 385 |
train_plot_df['Type'] = 'Train'
|
| 386 |
|
|
|
|
| 415 |
|
| 416 |
st.plotly_chart(fig, use_container_width=True)
|
| 417 |
|
|
|
|
| 418 |
return best_params
|
| 419 |
|
| 420 |
def train_and_evaluate_1m(df):
|
|
|
|
| 424 |
from xgboost import XGBRegressor
|
| 425 |
from sklearn.metrics import mean_squared_error, mean_absolute_error
|
| 426 |
|
|
|
|
| 427 |
progress_bar = st.progress(0)
|
| 428 |
|
|
|
|
| 429 |
def update_tuning_progress(current, total):
|
| 430 |
progress = int((current / total) * 100)
|
| 431 |
progress_bar.progress(progress)
|
| 432 |
|
| 433 |
df = create_forecasting_features_1m(df)
|
| 434 |
+
|
|
|
|
| 435 |
split_date = pd.to_datetime("2024-01-01")
|
| 436 |
+
test_horizon = pd.DateOffset(days=30)
|
| 437 |
|
| 438 |
train_df = df[df['Reported Date'] < split_date]
|
| 439 |
test_df = df[(df['Reported Date'] >= split_date) & (df['Reported Date'] < split_date + test_horizon)]
|
|
|
|
| 443 |
X_test = test_df.drop(columns=['Modal Price (Rs./Quintal)', 'Reported Date'])
|
| 444 |
y_test = test_df['Modal Price (Rs./Quintal)']
|
| 445 |
|
|
|
|
| 446 |
st.write("Performing hyperparameter tuning...")
|
| 447 |
param_grid = {
|
| 448 |
'learning_rate': [0.01, 0.1, 0.2],
|
|
|
|
| 455 |
param_combinations = len(param_grid['learning_rate']) * len(param_grid['max_depth']) * \
|
| 456 |
len(param_grid['n_estimators']) * len(param_grid['booster'])
|
| 457 |
|
| 458 |
+
current_combination = 0
|
| 459 |
|
| 460 |
def custom_grid_search():
|
| 461 |
nonlocal current_combination
|
|
|
|
| 481 |
'n_estimators': n_estimators,
|
| 482 |
'booster': booster
|
| 483 |
}
|
|
|
|
| 484 |
current_combination += 1
|
| 485 |
update_tuning_progress(current_combination, param_combinations)
|
| 486 |
return best_params
|
| 487 |
|
| 488 |
best_params = custom_grid_search()
|
|
|
|
|
|
|
| 489 |
st.write("Training the best model and making predictions...")
|
| 490 |
best_model = XGBRegressor(**best_params)
|
| 491 |
best_model.fit(X_train, y_train)
|
| 492 |
y_pred = best_model.predict(X_test)
|
| 493 |
|
|
|
|
| 494 |
rmse = mean_squared_error(y_test, y_pred, squared=False)
|
| 495 |
mae = mean_absolute_error(y_test, y_pred)
|
| 496 |
st.write(f"RMSE: {rmse}")
|
| 497 |
st.write(f"MAE: {mae}")
|
| 498 |
|
|
|
|
| 499 |
train_plot_df = train_df[['Reported Date', 'Modal Price (Rs./Quintal)']].copy()
|
| 500 |
train_plot_df['Type'] = 'Train'
|
| 501 |
|
|
|
|
| 530 |
|
| 531 |
st.plotly_chart(fig, use_container_width=True)
|
| 532 |
|
|
|
|
| 533 |
return best_params
|
| 534 |
|
| 535 |
def train_and_evaluate_3m(df):
|
| 536 |
import streamlit as st
|
|
|
|
|
|
|
| 537 |
progress_bar = st.progress(0)
|
|
|
|
|
|
|
| 538 |
def update_tuning_progress(current, total):
|
| 539 |
progress = int((current / total) * 100)
|
| 540 |
progress_bar.progress(progress)
|
|
|
|
| 548 |
X_test = test_df.drop(columns=['Modal Price (Rs./Quintal)', 'Reported Date'])
|
| 549 |
y_test = test_df['Modal Price (Rs./Quintal)']
|
| 550 |
|
|
|
|
| 551 |
st.write("Performing hyperparameter tuning...")
|
| 552 |
param_grid = {
|
| 553 |
'learning_rate': [0.01, 0.1, 0.2],
|
|
|
|
| 560 |
param_combinations = len(param_grid['learning_rate']) * len(param_grid['max_depth']) * \
|
| 561 |
len(param_grid['n_estimators']) * len(param_grid['booster'])
|
| 562 |
|
| 563 |
+
current_combination = 0
|
| 564 |
|
| 565 |
def custom_grid_search():
|
| 566 |
nonlocal current_combination
|
|
|
|
| 586 |
'n_estimators': n_estimators,
|
| 587 |
'booster': booster
|
| 588 |
}
|
|
|
|
| 589 |
current_combination += 1
|
| 590 |
update_tuning_progress(current_combination, param_combinations)
|
| 591 |
return best_params
|
| 592 |
|
| 593 |
best_params = custom_grid_search()
|
|
|
|
|
|
|
| 594 |
st.write("Training the best model and making predictions...")
|
| 595 |
best_model = XGBRegressor(**best_params)
|
| 596 |
best_model.fit(X_train, y_train)
|
| 597 |
y_pred = best_model.predict(X_test)
|
| 598 |
|
|
|
|
| 599 |
rmse = mean_squared_error(y_test, y_pred, squared=False)
|
| 600 |
mae = mean_absolute_error(y_test, y_pred)
|
| 601 |
st.write(f"RMSE: {rmse}")
|
| 602 |
st.write(f"MAE: {mae}")
|
| 603 |
|
|
|
|
| 604 |
train_plot_df = train_df[['Reported Date', 'Modal Price (Rs./Quintal)']].copy()
|
| 605 |
train_plot_df['Type'] = 'Train'
|
| 606 |
|
|
|
|
| 635 |
|
| 636 |
st.plotly_chart(fig, use_container_width=True)
|
| 637 |
|
|
|
|
| 638 |
return best_params
|
| 639 |
|
| 640 |
def forecast_next_14_days(df, _best_params, key):
|
| 641 |
last_date = df['Reported Date'].max()
|
| 642 |
future_dates = pd.date_range(start=last_date + pd.Timedelta(days=1), periods=14)
|
| 643 |
future_df = pd.DataFrame({'Reported Date': future_dates})
|
|
|
|
|
|
|
| 644 |
full_df = pd.concat([df, future_df], ignore_index=True)
|
| 645 |
full_df = create_forecasting_features(full_df)
|
| 646 |
|
|
|
|
| 656 |
|
| 657 |
future_predictions = model.predict(X_future)
|
| 658 |
future_df['Modal Price (Rs./Quintal)'] = future_predictions
|
|
|
|
|
|
|
| 659 |
plot_data(original_df, future_df, last_date, 14)
|
| 660 |
download_button(future_df, key)
|
| 661 |
|
|
|
|
| 663 |
last_date = df['Reported Date'].max()
|
| 664 |
future_dates = pd.date_range(start=last_date + pd.Timedelta(days=1), periods=30)
|
| 665 |
future_df = pd.DataFrame({'Reported Date': future_dates})
|
|
|
|
|
|
|
| 666 |
full_df = pd.concat([df, future_df], ignore_index=True)
|
| 667 |
full_df = create_forecasting_features_1m(full_df)
|
| 668 |
|
|
|
|
| 678 |
|
| 679 |
future_predictions = model.predict(X_future)
|
| 680 |
future_df['Modal Price (Rs./Quintal)'] = future_predictions
|
|
|
|
|
|
|
| 681 |
plot_data(original_df, future_df, last_date, 30)
|
| 682 |
download_button(future_df, key)
|
| 683 |
|
|
|
|
| 685 |
last_date = df['Reported Date'].max()
|
| 686 |
future_dates = pd.date_range(start=last_date + pd.Timedelta(days=1), periods=90)
|
| 687 |
future_df = pd.DataFrame({'Reported Date': future_dates})
|
|
|
|
|
|
|
| 688 |
full_df = pd.concat([df, future_df], ignore_index=True)
|
| 689 |
full_df = create_forecasting_features_3m(full_df)
|
| 690 |
|
|
|
|
| 700 |
|
| 701 |
future_predictions = model.predict(X_future)
|
| 702 |
future_df['Modal Price (Rs./Quintal)'] = future_predictions
|
|
|
|
|
|
|
| 703 |
plot_data(original_df, future_df, last_date, 90)
|
| 704 |
download_button(future_df, key)
|
| 705 |
|
| 706 |
def plot_data(original_df, future_df, last_date, days):
|
|
|
|
| 707 |
actual_df = original_df[original_df['Reported Date'] >= (last_date - pd.Timedelta(days=days))].copy()
|
| 708 |
actual_df['Type'] = 'Actual'
|
| 709 |
+
|
|
|
|
| 710 |
future_plot_df = future_df[['Reported Date', 'Modal Price (Rs./Quintal)']].copy()
|
| 711 |
future_plot_df['Type'] = 'Forecasted'
|
|
|
|
|
|
|
|
|
|
| 712 |
last_actual_point = actual_df.sort_values('Reported Date').iloc[[-1]].copy()
|
| 713 |
future_plot_df = pd.concat([last_actual_point, future_plot_df])
|
| 714 |
+
|
|
|
|
| 715 |
plot_df = pd.concat([actual_df, future_plot_df])
|
|
|
|
|
|
|
| 716 |
fig = go.Figure()
|
| 717 |
for plot_type, color, dash in [('Actual', 'blue', 'solid'), ('Forecasted', 'red', 'dash')]:
|
| 718 |
data = plot_df[plot_df['Type'] == plot_type]
|
|
|
|
| 735 |
|
| 736 |
|
| 737 |
def download_button(future_df, key):
|
|
|
|
| 738 |
download_df = future_df[['Reported Date', 'Modal Price (Rs./Quintal)']].copy()
|
|
|
|
|
|
|
| 739 |
download_df['Reported Date'] = download_df['Reported Date'].dt.strftime('%Y-%m-%d')
|
|
|
|
|
|
|
| 740 |
towrite = io.BytesIO()
|
| 741 |
+
download_df.to_excel(towrite, index=False, engine='xlsxwriter')
|
| 742 |
towrite.seek(0)
|
|
|
|
|
|
|
| 743 |
st.download_button(label="Download Forecasted Values",
|
| 744 |
data=towrite,
|
| 745 |
file_name=f'forecasted_prices_{key}.xlsx',
|
|
|
|
| 772 |
collection.replace_one({"filter_key": filter_key}, best_params)
|
| 773 |
else:
|
| 774 |
collection.insert_one(best_params)
|
| 775 |
+
|
|
|
|
| 776 |
def get_best_params(filter_key, collection):
|
| 777 |
record = collection.find_one({"filter_key": filter_key})
|
| 778 |
return record
|
| 779 |
+
|
| 780 |
def train_and_forecast(df, filter_key, days):
|
| 781 |
if df is not None:
|
|
|
|
| 782 |
if days==14:
|
| 783 |
best_params = train_and_evaluate(df)
|
| 784 |
save_best_params(filter_key, best_params, best_params_collection)
|
|
|
|
| 791 |
best_params = train_and_evaluate_3m(df)
|
| 792 |
save_best_params(filter_key, best_params, best_params_collection_3m)
|
| 793 |
forecast_next_90_days(df, best_params, filter_key)
|
| 794 |
+
failed_dates_data = []
|
| 795 |
+
failed_dates_market = []
|
| 796 |
|
| 797 |
def forecast(df, filter_key, days):
|
| 798 |
if days==14:
|
|
|
|
| 828 |
Returns:
|
| 829 |
pd.DataFrame: DataFrame containing the collection data.
|
| 830 |
"""
|
|
|
|
| 831 |
documents = list(collection.find())
|
| 832 |
|
|
|
|
| 833 |
df = pd.DataFrame(documents)
|
|
|
|
|
|
|
| 834 |
if drop_id and '_id' in df.columns:
|
| 835 |
df = df.drop(columns=['_id'])
|
| 836 |
|
|
|
|
| 840 |
|
| 841 |
def editable_spreadsheet():
|
| 842 |
st.title("Sowing Report Prediction Model")
|
|
|
|
|
|
|
| 843 |
uploaded_file = st.file_uploader("Upload your Excel file", type=['xlsx'])
|
| 844 |
+
|
|
|
|
| 845 |
if uploaded_file is not None:
|
|
|
|
| 846 |
df_excel = pd.read_excel(uploaded_file)
|
|
|
|
|
|
|
| 847 |
st.write("Excel data loaded:", df_excel)
|
| 848 |
|
|
|
|
| 849 |
with st.form("input_form"):
|
| 850 |
input_region = st.text_input("Enter Region to Filter By", placeholder="Region Name")
|
| 851 |
input_season = st.text_input("Enter Season to Filter By", placeholder="Season (e.g., Winter)")
|
|
|
|
| 854 |
|
| 855 |
if submit_button:
|
| 856 |
if input_region and input_season and input_area > 0:
|
|
|
|
| 857 |
filtered_df = df_excel[
|
| 858 |
(df_excel['Region'].str.lower() == input_region.lower()) &
|
| 859 |
(df_excel['Season'].str.lower() == input_season.lower())
|
|
|
|
| 904 |
}
|
| 905 |
</style>
|
| 906 |
""", unsafe_allow_html=True)
|
|
|
|
|
|
|
| 907 |
df['Reported Date'] = pd.to_datetime(df['Reported Date'])
|
| 908 |
national_data = df.groupby('Reported Date').agg({
|
| 909 |
'Modal Price (Rs./Quintal)': 'mean',
|
|
|
|
| 913 |
st.subheader("🗓️ Key Statistics")
|
| 914 |
latest_date = national_data['Reported Date'].max()
|
| 915 |
latest_price = national_data[national_data['Reported Date'] == latest_date]['Modal Price (Rs./Quintal)'].mean()
|
| 916 |
+
national_data['Arrivals (Tonnes)'] = pd.to_numeric(national_data['Arrivals (Tonnes)'], errors='coerce')
|
| 917 |
latest_arrivals = national_data[national_data['Reported Date'] == latest_date]['Arrivals (Tonnes)'].sum()
|
| 918 |
|
| 919 |
st.markdown("<p class='highlight'>This section provides the most recent statistics for the market. It includes the latest available date, the average price of commodities, and the total quantity of goods arriving at the market. These metrics offer an up-to-date snapshot of market conditions.</p>", unsafe_allow_html=True)
|
| 920 |
st.write(f"**Latest Date**: {latest_date.strftime('%Y-%m-%d')}")
|
| 921 |
st.write(f"**Latest Modal Price**: {latest_price:.2f} Rs./Quintal")
|
| 922 |
+
st.write(f"**Latest Arrivals**: {float(latest_arrivals):.2f} Tonnes")
|
| 923 |
|
| 924 |
st.subheader("📆 This Day in Previous Years")
|
| 925 |
st.markdown("<p class='highlight'>This table shows the modal price and total arrivals for this exact day across previous years. It provides a historical perspective to compare against current market conditions. This section examines historical data for the same day in previous years. By analyzing trends for this specific day, you can identify seasonal patterns, supply-demand changes, or any deviations that might warrant closer attention.</p>", unsafe_allow_html=True)
|
|
|
|
| 988 |
editable_spreadsheet()
|
| 989 |
|
| 990 |
|
| 991 |
+
def parse_table_with_rowspan(table):
|
| 992 |
+
data = []
|
| 993 |
+
rowspan_map = {}
|
| 994 |
+
|
| 995 |
+
rows = table.find_all("tr")
|
| 996 |
+
for row_index, tr in enumerate(rows):
|
| 997 |
+
cells = tr.find_all(["td", "th"])
|
| 998 |
+
row_data = []
|
| 999 |
+
col_index = 0
|
| 1000 |
+
cell_index = 0
|
| 1001 |
+
|
| 1002 |
+
while col_index < len(cells) or cell_index in rowspan_map:
|
| 1003 |
+
if cell_index in rowspan_map:
|
| 1004 |
+
cell_info = rowspan_map[cell_index]
|
| 1005 |
+
row_data.append(cell_info["value"])
|
| 1006 |
+
cell_info["rows_left"] -= 1
|
| 1007 |
+
if cell_info["rows_left"] == 0:
|
| 1008 |
+
del rowspan_map[cell_index]
|
| 1009 |
+
cell_index += 1
|
| 1010 |
+
elif col_index < len(cells):
|
| 1011 |
+
cell = cells[col_index]
|
| 1012 |
+
value = cell.get_text(strip=True)
|
| 1013 |
+
rowspan = int(cell.get("rowspan", 1))
|
| 1014 |
+
|
| 1015 |
+
row_data.append(value)
|
| 1016 |
+
|
| 1017 |
+
if rowspan > 1:
|
| 1018 |
+
rowspan_map[cell_index] = {"value": value, "rows_left": rowspan - 1}
|
| 1019 |
+
|
| 1020 |
+
col_index += 1
|
| 1021 |
+
cell_index += 1
|
| 1022 |
+
|
| 1023 |
+
data.append(row_data)
|
| 1024 |
+
|
| 1025 |
+
return data
|
| 1026 |
+
|
| 1027 |
|
| 1028 |
def fetch_and_store_data():
|
| 1029 |
+
SCRAPER_API_KEY = "8842750a88db7513a1d19325745437cc"
|
| 1030 |
latest_doc = collection.find_one(sort=[("Reported Date", -1)])
|
| 1031 |
+
from_date = (latest_doc["Reported Date"] + timedelta(days=1)) if latest_doc else datetime(2019, 1, 1)
|
| 1032 |
+
to_date = datetime.now() - timedelta(days=1)
|
| 1033 |
+
|
| 1034 |
+
print(f"📦 Modal Data → From: {from_date.strftime('%d-%b-%Y')} To: {to_date.strftime('%d-%b-%Y')}")
|
| 1035 |
+
|
| 1036 |
+
current = from_date.replace(day=1)
|
| 1037 |
+
while current <= to_date:
|
| 1038 |
+
start_of_range = max(current, from_date)
|
| 1039 |
+
end_of_range = (current.replace(day=28) + timedelta(days=4)).replace(day=1) - timedelta(days=1)
|
| 1040 |
+
if end_of_range > to_date:
|
| 1041 |
+
end_of_range = to_date
|
| 1042 |
+
|
| 1043 |
+
date_from_str = start_of_range.strftime('%d-%b-%Y')
|
| 1044 |
+
date_to_str = end_of_range.strftime('%d-%b-%Y')
|
| 1045 |
+
|
| 1046 |
+
print(f"\n📅 Fetching data from {date_from_str} to {date_to_str}")
|
| 1047 |
+
|
| 1048 |
+
target_url = (
|
| 1049 |
+
"https://agmarknet.gov.in/SearchCmmMkt.aspx"
|
| 1050 |
+
f"?Tx_Commodity=11&Tx_State=0&Tx_District=0&Tx_Market=0"
|
| 1051 |
+
f"&DateFrom={date_from_str}&DateTo={date_to_str}"
|
| 1052 |
+
f"&Fr_Date={date_from_str}&To_Date={date_to_str}"
|
| 1053 |
+
"&Tx_Trend=2"
|
| 1054 |
+
"&Tx_CommodityHead=Sesamum(Sesame,Gingelly,Til)"
|
| 1055 |
+
"&Tx_StateHead=--Select--"
|
| 1056 |
+
"&Tx_DistrictHead=--Select--"
|
| 1057 |
+
"&Tx_MarketHead=--Select--"
|
| 1058 |
+
)
|
| 1059 |
|
| 1060 |
+
payload = {
|
| 1061 |
+
"api_key": SCRAPER_API_KEY,
|
| 1062 |
+
"url": target_url
|
| 1063 |
+
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1064 |
|
| 1065 |
+
try:
|
| 1066 |
+
response = requests.get("https://api.scraperapi.com/", params=payload)
|
| 1067 |
+
soup = BeautifulSoup(response.text, "html.parser")
|
| 1068 |
+
table = soup.find("table", {"class": "tableagmark_new"})
|
| 1069 |
+
|
| 1070 |
+
if not table or not table.find_all("tr"):
|
| 1071 |
+
print("❌ No table found.")
|
| 1072 |
+
current = (current + timedelta(days=32)).replace(day=1)
|
| 1073 |
+
continue
|
| 1074 |
+
|
| 1075 |
+
all_rows = parse_table_with_rowspan(table)
|
| 1076 |
+
headers = all_rows[0]
|
| 1077 |
+
rows = all_rows[1:]
|
| 1078 |
+
|
| 1079 |
+
df_raw = pd.DataFrame(rows, columns=headers)
|
| 1080 |
+
print(f"🔍 Raw rows fetched: {len(df_raw)}")
|
| 1081 |
+
|
| 1082 |
+
# Clean invalid state/district/market names
|
| 1083 |
+
required_columns = ["State Name", "District Name", "Market Name"]
|
| 1084 |
+
if all(col in df_raw.columns for col in required_columns):
|
| 1085 |
+
df_raw = df_raw[
|
| 1086 |
+
(df_raw["State Name"].str.strip() != "-") &
|
| 1087 |
+
(df_raw["District Name"].str.strip() != "-") &
|
| 1088 |
+
(df_raw["Market Name"].str.strip() != "-")
|
| 1089 |
+
]
|
| 1090 |
+
print(f"✅ Rows after filtering: {len(df_raw)}")
|
| 1091 |
+
else:
|
| 1092 |
+
print("⚠️ One or more expected columns are missing. Skipping filter.")
|
| 1093 |
+
|
| 1094 |
+
# Filter by variety + grade
|
| 1095 |
+
df_raw = df_raw[
|
| 1096 |
+
(df_raw["Variety"].str.strip().str.lower() == "white") &
|
| 1097 |
+
(df_raw["Grade"].str.strip().str.upper() == "FAQ")
|
| 1098 |
+
]
|
| 1099 |
+
print(f"✅ Filtered rows with 'White' variety and 'FAQ' grade: {len(df_raw)}")
|
| 1100 |
+
|
| 1101 |
+
# Parse and clean dates
|
| 1102 |
+
df_raw["Reported Date Parsed"] = pd.to_datetime(
|
| 1103 |
+
df_raw["Reported Date"].str.strip(), format='%d %b %Y', errors='coerce'
|
| 1104 |
+
)
|
| 1105 |
+
df_raw = df_raw[df_raw["Reported Date Parsed"].notna()].copy()
|
| 1106 |
+
df_raw["Reported Date"] = df_raw["Reported Date Parsed"]
|
| 1107 |
+
df_raw.drop(columns=["Reported Date Parsed"], inplace=True)
|
| 1108 |
+
|
| 1109 |
+
# Type conversions
|
| 1110 |
+
df_raw["Modal Price (Rs./Quintal)"] = pd.to_numeric(
|
| 1111 |
+
df_raw["Modal Price (Rs./Quintal)"], errors='coerce'
|
| 1112 |
+
).round().astype("Int64")
|
| 1113 |
+
df_raw["Arrivals (Tonnes)"] = pd.to_numeric(
|
| 1114 |
+
df_raw["Arrivals (Tonnes)"], errors='coerce'
|
| 1115 |
+
).astype("float64")
|
| 1116 |
+
df_raw["State Name"] = df_raw["State Name"].astype("string")
|
| 1117 |
+
df_raw["Market Name"] = df_raw["Market Name"].astype("string")
|
| 1118 |
+
|
| 1119 |
+
# Write cleaned CSV
|
| 1120 |
+
raw_csv_filename = f"clean_raw_modal_data_{start_of_range.strftime('%b_%Y')}.csv"
|
| 1121 |
+
df_raw.to_csv(raw_csv_filename, index=False)
|
| 1122 |
+
print(f"📄 Cleaned raw data CSV written to: {raw_csv_filename}")
|
| 1123 |
+
|
| 1124 |
+
# Insert to DB
|
| 1125 |
+
records = df_raw.to_dict(orient="records")
|
| 1126 |
if records:
|
| 1127 |
collection.insert_many(records)
|
| 1128 |
+
print(f"✅ Inserted {len(records)} records for {current.strftime('%b %Y')}")
|
| 1129 |
else:
|
| 1130 |
+
print("⚠️ No valid records after final filtering.")
|
| 1131 |
|
| 1132 |
+
except Exception as e:
|
| 1133 |
+
print(f"🔥 Exception during {current.strftime('%b %Y')} fetch: {e}")
|
| 1134 |
|
| 1135 |
+
current = (current + timedelta(days=32)).replace(day=1)
|
|
|
|
|
|
|
| 1136 |
|
|
|
|
| 1137 |
def fetch_and_store_data_market():
|
| 1138 |
+
SCRAPER_API_KEY = "8842750a88db7513a1d19325745437cc"
|
| 1139 |
latest_doc = market_price_data.find_one(sort=[("Reported Date", -1)])
|
| 1140 |
+
from_date = (latest_doc["Reported Date"] + timedelta(days=1)) if latest_doc else datetime(2019, 1, 1)
|
| 1141 |
+
to_date = datetime.now() - timedelta(days=1)
|
| 1142 |
+
|
| 1143 |
+
print(f"📦 Market Data → From: {from_date.strftime('%d-%b-%Y')} To: {to_date.strftime('%d-%b-%Y')}")
|
| 1144 |
+
|
| 1145 |
+
current = from_date.replace(day=1)
|
| 1146 |
+
while current <= to_date:
|
| 1147 |
+
start_of_range = max(current, from_date)
|
| 1148 |
+
end_of_range = (current.replace(day=28) + timedelta(days=4)).replace(day=1) - timedelta(days=1)
|
| 1149 |
+
if end_of_range > to_date:
|
| 1150 |
+
end_of_range = to_date
|
| 1151 |
+
|
| 1152 |
+
date_from_str = start_of_range.strftime('%d-%b-%Y')
|
| 1153 |
+
date_to_str = end_of_range.strftime('%d-%b-%Y')
|
| 1154 |
+
|
| 1155 |
+
print(f"\n📅 Fetching data from {date_from_str} to {date_to_str}")
|
| 1156 |
+
|
| 1157 |
+
target_url = (
|
| 1158 |
+
"https://agmarknet.gov.in/SearchCmmMkt.aspx"
|
| 1159 |
+
f"?Tx_Commodity=11&Tx_State=0&Tx_District=0&Tx_Market=0"
|
| 1160 |
+
f"&DateFrom={date_from_str}&DateTo={date_to_str}"
|
| 1161 |
+
f"&Fr_Date={date_from_str}&To_Date={date_to_str}"
|
| 1162 |
+
"&Tx_Trend=0"
|
| 1163 |
+
"&Tx_CommodityHead=Sesamum(Sesame,Gingelly,Til)"
|
| 1164 |
+
"&Tx_StateHead=--Select--"
|
| 1165 |
+
"&Tx_DistrictHead=--Select--"
|
| 1166 |
+
"&Tx_MarketHead=--Select--"
|
| 1167 |
+
)
|
| 1168 |
|
| 1169 |
+
payload = {
|
| 1170 |
+
"api_key": SCRAPER_API_KEY,
|
| 1171 |
+
"url": target_url
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1172 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1173 |
|
| 1174 |
+
try:
|
| 1175 |
+
response = requests.get("https://api.scraperapi.com/", params=payload)
|
| 1176 |
+
soup = BeautifulSoup(response.text, "html.parser")
|
| 1177 |
+
table = soup.find("table", {"class": "tableagmark_new"})
|
| 1178 |
+
|
| 1179 |
+
if not table or not table.find_all("tr"):
|
| 1180 |
+
print("❌ No table found.")
|
| 1181 |
+
current = (current + timedelta(days=32)).replace(day=1)
|
| 1182 |
+
continue
|
| 1183 |
+
|
| 1184 |
+
all_rows = parse_table_with_rowspan(table)
|
| 1185 |
+
headers = all_rows[0]
|
| 1186 |
+
rows = all_rows[1:]
|
| 1187 |
+
|
| 1188 |
+
# ✅ Filter out irrelevant columns based on available data
|
| 1189 |
+
required_columns = ["Sl no.", "District Name", "Market Name", "Commodity", "Variety", "Grade", "Min Price (Rs./Quintal)", "Max Price (Rs./Quintal)", "Modal Price (Rs./Quintal)", "Price Date"]
|
| 1190 |
+
df_raw = pd.DataFrame(rows, columns=headers)
|
| 1191 |
+
|
| 1192 |
+
# Remove rows with invalid or missing location data
|
| 1193 |
+
df_raw = df_raw[
|
| 1194 |
+
(df_raw["District Name"].str.strip() != "-") &
|
| 1195 |
+
(df_raw["Market Name"].str.strip() != "-")
|
| 1196 |
+
]
|
| 1197 |
+
print(f"✅ Rows after filtering invalid locations: {len(df_raw)}")
|
| 1198 |
+
|
| 1199 |
+
# ✅ Filter for variety and grade
|
| 1200 |
+
df_raw = df_raw[
|
| 1201 |
+
(df_raw["Variety"].str.strip().str.lower() == "white") &
|
| 1202 |
+
(df_raw["Grade"].str.strip().str.upper() == "FAQ")
|
| 1203 |
+
]
|
| 1204 |
+
print(f"✅ Filtered rows with 'White' variety and 'FAQ' grade: {len(df_raw)}")
|
| 1205 |
+
|
| 1206 |
+
# ✅ Parse 'Price Date' as 'Reported Date'
|
| 1207 |
+
df_raw["Reported Date Parsed"] = pd.to_datetime(
|
| 1208 |
+
df_raw["Price Date"].str.strip(), format='%d %b %Y', errors='coerce'
|
| 1209 |
+
)
|
| 1210 |
+
df_raw = df_raw[df_raw["Reported Date Parsed"].notna()].copy()
|
| 1211 |
+
df_raw["Reported Date"] = df_raw["Reported Date Parsed"]
|
| 1212 |
+
df_raw.drop(columns=["Reported Date Parsed"], inplace=True)
|
| 1213 |
+
|
| 1214 |
+
# ✅ Type conversions
|
| 1215 |
+
df_raw["Modal Price (Rs./Quintal)"] = pd.to_numeric(
|
| 1216 |
+
df_raw["Modal Price (Rs./Quintal)"], errors='coerce'
|
| 1217 |
+
).round().astype("Int64")
|
| 1218 |
+
df_raw["Min Price (Rs./Quintal)"] = pd.to_numeric(
|
| 1219 |
+
df_raw["Min Price (Rs./Quintal)"], errors='coerce'
|
| 1220 |
+
).round().astype("Int64")
|
| 1221 |
+
df_raw["Max Price (Rs./Quintal)"] = pd.to_numeric(
|
| 1222 |
+
df_raw["Max Price (Rs./Quintal)"], errors='coerce'
|
| 1223 |
+
).round().astype("Int64")
|
| 1224 |
+
df_raw["District Name"] = df_raw["District Name"].astype("string")
|
| 1225 |
+
df_raw["Market Name"] = df_raw["Market Name"].astype("string")
|
| 1226 |
+
|
| 1227 |
+
# ✅ Save CSV for audit
|
| 1228 |
+
raw_csv_filename = f"clean_raw_market_data_{start_of_range.strftime('%b_%Y')}.csv"
|
| 1229 |
+
df_raw.to_csv(raw_csv_filename, index=False)
|
| 1230 |
+
print(f"📄 CSV saved: {raw_csv_filename}")
|
| 1231 |
+
|
| 1232 |
+
# ✅ Insert into MongoDB
|
| 1233 |
+
records = df_raw.to_dict(orient="records")
|
| 1234 |
if records:
|
| 1235 |
market_price_data.insert_many(records)
|
| 1236 |
+
print(f"✅ Inserted {len(records)} records for {current.strftime('%b %Y')}")
|
| 1237 |
else:
|
| 1238 |
+
print("⚠️ No valid records after final filtering.")
|
|
|
|
|
|
|
|
|
|
| 1239 |
|
| 1240 |
+
except Exception as e:
|
| 1241 |
+
print(f"🔥 Exception during {current.strftime('%b %Y')} fetch: {e}")
|
| 1242 |
+
|
| 1243 |
+
current = (current + timedelta(days=32)).replace(day=1)
|
| 1244 |
|
| 1245 |
|
| 1246 |
|
|
|
|
| 1333 |
if 'authenticated' not in st.session_state:
|
| 1334 |
st.session_state.authenticated = False
|
| 1335 |
|
| 1336 |
+
if st.session_state.get("authenticated", False):
|
| 1337 |
st.title("🌾 AgriPredict Dashboard")
|
| 1338 |
+
|
| 1339 |
if st.button("Get Live Data Feed"):
|
| 1340 |
+
st.write("🔄 Fetching fresh data from Modal + Agmarknet...")
|
| 1341 |
fetch_and_store_data()
|
| 1342 |
fetch_and_store_data_market()
|
| 1343 |
+
|
| 1344 |
view_mode = st.radio("", ["Statistics", "Plots", "Predictions", "Exim"], horizontal=True)
|
| 1345 |
|
| 1346 |
if view_mode == "Plots":
|
| 1347 |
st.sidebar.header("Filters")
|
| 1348 |
+
|
| 1349 |
selected_period = st.sidebar.selectbox(
|
| 1350 |
"Select Time Period",
|
| 1351 |
+
["2 Weeks", "1 Month", "3 Months", "1 Year", "5 Years"],
|
| 1352 |
index=1
|
| 1353 |
)
|
| 1354 |
period_mapping = {
|
| 1355 |
"2 Weeks": 14,
|
| 1356 |
"1 Month": 30,
|
|
|
|
| 1357 |
"3 Months": 90,
|
|
|
|
| 1358 |
"1 Year": 365,
|
| 1359 |
"2 Years": 730,
|
| 1360 |
"5 Years": 1825
|
| 1361 |
}
|
| 1362 |
+
st.session_state["selected_period"] = period_mapping[selected_period]
|
| 1363 |
+
|
|
|
|
| 1364 |
state_options = list(state_market_dict.keys()) + ['India']
|
| 1365 |
+
selected_state = st.sidebar.selectbox("Select State", state_options)
|
| 1366 |
+
|
| 1367 |
market_wise = False
|
| 1368 |
+
query_filter = {}
|
| 1369 |
+
|
| 1370 |
if selected_state != 'India':
|
| 1371 |
market_wise = st.sidebar.checkbox("Market Wise Analysis")
|
| 1372 |
if market_wise:
|
| 1373 |
markets = state_market_dict.get(selected_state, [])
|
| 1374 |
+
st.write(f"✅ Available markets for {selected_state}: {markets}")
|
| 1375 |
selected_market = st.sidebar.selectbox("Select Market", markets)
|
| 1376 |
query_filter = {"Market Name": selected_market}
|
| 1377 |
else:
|
| 1378 |
+
query_filter = {"State Name": selected_state}
|
| 1379 |
else:
|
| 1380 |
+
query_filter = {"State Name": {"$exists": True}}
|
| 1381 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1382 |
query_filter["Reported Date"] = {
|
| 1383 |
+
"$gte": datetime.now() - timedelta(days=st.session_state["selected_period"])
|
| 1384 |
}
|
| 1385 |
+
|
| 1386 |
+
data_type = st.sidebar.radio("Select Data Type", ["Price", "Volume", "Both"])
|
| 1387 |
+
|
| 1388 |
+
st.write(f"🧪 Final Mongo Query Filter: `{query_filter}`")
|
| 1389 |
+
|
| 1390 |
if st.sidebar.button("✨ Let's go!"):
|
| 1391 |
try:
|
| 1392 |
+
df_market_grouped = pd.DataFrame()
|
| 1393 |
+
df_grouped = pd.DataFrame()
|
| 1394 |
+
|
| 1395 |
+
# MARKET-WISE
|
| 1396 |
if "Market Name" in query_filter:
|
| 1397 |
+
st.info("📊 Market-level data mode enabled")
|
| 1398 |
market_cursor = market_price_data.find(query_filter)
|
| 1399 |
market_data = list(market_cursor)
|
| 1400 |
+
st.write(f"📄 Market rows fetched: {len(market_data)}")
|
| 1401 |
+
|
| 1402 |
+
if market_data:
|
| 1403 |
+
df_market = pd.DataFrame(market_data)
|
| 1404 |
+
df_market['Reported Date'] = pd.to_datetime(df_market['Reported Date'], errors='coerce')
|
| 1405 |
+
df_market["Modal Price (Rs./Quintal)"] = pd.to_numeric(df_market["Modal Price (Rs./Quintal)"], errors='coerce')
|
| 1406 |
+
df_market_grouped = df_market.groupby('Reported Date', as_index=False).agg({
|
| 1407 |
+
'Modal Price (Rs./Quintal)': 'mean'
|
| 1408 |
+
}).dropna()
|
| 1409 |
+
date_range = pd.date_range(df_market_grouped['Reported Date'].min(), df_market_grouped['Reported Date'].max())
|
| 1410 |
+
df_market_grouped = df_market_grouped.set_index('Reported Date').reindex(date_range).rename_axis('Reported Date').reset_index()
|
| 1411 |
+
df_market_grouped['Modal Price (Rs./Quintal)'] = df_market_grouped['Modal Price (Rs./Quintal)'].fillna(method='ffill').fillna(method='bfill')
|
| 1412 |
+
|
| 1413 |
+
# STATE/NATIONAL-WISE
|
| 1414 |
+
st.info("📥 Fetching state-level or national data...")
|
| 1415 |
cursor = collection.find(query_filter)
|
| 1416 |
data = list(cursor)
|
| 1417 |
+
st.write(f"📄 Total rows fetched from collection: {len(data)}")
|
| 1418 |
+
|
| 1419 |
if data:
|
|
|
|
| 1420 |
df = pd.DataFrame(data)
|
| 1421 |
+
df['Reported Date'] = pd.to_datetime(df['Reported Date'], errors='coerce')
|
| 1422 |
+
df['Arrivals (Tonnes)'] = pd.to_numeric(df['Arrivals (Tonnes)'], errors='coerce')
|
| 1423 |
+
df['Modal Price (Rs./Quintal)'] = pd.to_numeric(df['Modal Price (Rs./Quintal)'], errors='coerce')
|
| 1424 |
+
|
| 1425 |
+
df_grouped = df.groupby('Reported Date', as_index=False).agg({
|
| 1426 |
+
'Arrivals (Tonnes)': 'sum',
|
| 1427 |
+
'Modal Price (Rs./Quintal)': 'mean'
|
| 1428 |
+
}).dropna()
|
| 1429 |
+
|
| 1430 |
+
date_range = pd.date_range(df_grouped['Reported Date'].min(), df_grouped['Reported Date'].max())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1431 |
df_grouped = df_grouped.set_index('Reported Date').reindex(date_range).rename_axis('Reported Date').reset_index()
|
|
|
|
|
|
|
| 1432 |
df_grouped['Arrivals (Tonnes)'] = df_grouped['Arrivals (Tonnes)'].fillna(method='ffill').fillna(method='bfill')
|
| 1433 |
df_grouped['Modal Price (Rs./Quintal)'] = df_grouped['Modal Price (Rs./Quintal)'].fillna(method='ffill').fillna(method='bfill')
|
| 1434 |
+
|
| 1435 |
+
st.subheader(f"📈 Trends for {selected_state} ({'Market: ' + selected_market if market_wise else 'State-wide'})")
|
| 1436 |
+
|
| 1437 |
+
fig = go.Figure()
|
| 1438 |
+
|
| 1439 |
if data_type == "Both":
|
|
|
|
| 1440 |
scaler = MinMaxScaler()
|
| 1441 |
df_grouped[['Scaled Price', 'Scaled Arrivals']] = scaler.fit_transform(
|
| 1442 |
df_grouped[['Modal Price (Rs./Quintal)', 'Arrivals (Tonnes)']]
|
| 1443 |
)
|
| 1444 |
+
|
| 1445 |
+
fig.add_trace(go.Scatter(
|
| 1446 |
+
x=df_grouped['Reported Date'],
|
| 1447 |
+
y=df_grouped['Scaled Price'],
|
| 1448 |
+
mode='lines',
|
| 1449 |
+
name='Scaled Modal Price',
|
| 1450 |
+
line=dict(color='green'),
|
| 1451 |
+
))
|
| 1452 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1453 |
fig.add_trace(go.Scatter(
|
| 1454 |
x=df_grouped['Reported Date'],
|
| 1455 |
y=df_grouped['Scaled Arrivals'],
|
| 1456 |
mode='lines',
|
| 1457 |
name='Scaled Arrivals',
|
| 1458 |
+
line=dict(color='blue'),
|
|
|
|
|
|
|
| 1459 |
))
|
| 1460 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1461 |
elif data_type == "Price":
|
| 1462 |
+
price_df = df_market_grouped if not df_market_grouped.empty else df_grouped
|
| 1463 |
+
fig.add_trace(go.Scatter(
|
| 1464 |
+
x=price_df['Reported Date'],
|
| 1465 |
+
y=price_df["Modal Price (Rs./Quintal)"],
|
| 1466 |
+
mode='lines',
|
| 1467 |
+
name='Modal Price',
|
| 1468 |
+
line=dict(color='green'),
|
| 1469 |
+
))
|
| 1470 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1471 |
elif data_type == "Volume":
|
|
|
|
|
|
|
| 1472 |
fig.add_trace(go.Scatter(
|
| 1473 |
x=df_grouped['Reported Date'],
|
| 1474 |
y=df_grouped['Arrivals (Tonnes)'],
|
| 1475 |
mode='lines',
|
| 1476 |
name='Arrivals',
|
| 1477 |
+
line=dict(color='blue'),
|
| 1478 |
))
|
| 1479 |
+
|
| 1480 |
+
fig.update_layout(
|
| 1481 |
+
title="📊 Agricultural Trends",
|
| 1482 |
+
xaxis_title="Date",
|
| 1483 |
+
yaxis_title="Value (Scaled if Both)",
|
| 1484 |
+
template="plotly_white"
|
| 1485 |
+
)
|
| 1486 |
+
st.plotly_chart(fig, use_container_width=True)
|
| 1487 |
+
|
| 1488 |
else:
|
| 1489 |
+
st.warning("⚠️ No data found for the selected filter range and region.")
|
| 1490 |
+
|
| 1491 |
except Exception as e:
|
| 1492 |
st.error(f"❌ Error fetching data 2: {e}")
|
| 1493 |
+
st.exception(e)
|
| 1494 |
+
|
| 1495 |
elif view_mode == "Predictions":
|
| 1496 |
st.subheader("📊 Model Analysis")
|
| 1497 |
sub_option = st.radio("Select one of the following", ["India", "States", "Market"], horizontal=True)
|
|
|
|
| 1499 |
if sub_option == "States":
|
| 1500 |
states = ["Karnataka", "Madhya Pradesh", "Gujarat", "Uttar Pradesh", "Telangana"]
|
| 1501 |
selected_state = st.selectbox("Select State for Model Training", states)
|
| 1502 |
+
filter_key = f"state_{selected_state}"
|
| 1503 |
|
| 1504 |
if st.button("Forecast"):
|
| 1505 |
+
query_filter = {"State Name": selected_state}
|
| 1506 |
df = fetch_and_process_data(query_filter, collection)
|
| 1507 |
if sub_timeline == "14 days":
|
| 1508 |
forecast(df, filter_key, 14)
|
|
|
|
| 1513 |
elif sub_option == "Market":
|
| 1514 |
market_options = ["Rajkot", "Neemuch", "Kalburgi", "Warangal"]
|
| 1515 |
selected_market = st.selectbox("Select Market for Model Training", market_options)
|
| 1516 |
+
filter_key = f"market_{selected_market}"
|
| 1517 |
if st.button("Forecast"):
|
| 1518 |
query_filter = {"Market Name": selected_market}
|
| 1519 |
comparison_date = pd.to_datetime("18 Feb 2025")
|
| 1520 |
df = fetch_and_process_data(query_filter, market_price_data)
|
| 1521 |
+
st.write(df[df["Reported Date"]>comparison_date])
|
| 1522 |
if sub_timeline == "14 days":
|
| 1523 |
forecast(df, filter_key, 14)
|
| 1524 |
elif sub_timeline == "1 month":
|
|
|
|
| 1545 |
display_statistics(df)
|
| 1546 |
elif view_mode == "Exim":
|
| 1547 |
df = collection_to_dataframe(impExp)
|
| 1548 |
+
|
|
|
|
| 1549 |
plot_option = st.radio(
|
| 1550 |
"Select the data to visualize:",
|
| 1551 |
["Import Price", "Import Quantity", "Export Price", "Export Quantity"],
|
| 1552 |
horizontal=True
|
| 1553 |
)
|
| 1554 |
+
|
|
|
|
| 1555 |
time_period = st.selectbox(
|
| 1556 |
"Select time period:",
|
| 1557 |
["1 Month", "6 Months", "1 Year", "2 Years"]
|
| 1558 |
)
|
| 1559 |
|
|
|
|
| 1560 |
df["Reported Date"] = pd.to_datetime(df["Reported Date"], format="%Y-%m-%d")
|
|
|
|
|
|
|
| 1561 |
if time_period == "1 Month":
|
| 1562 |
start_date = pd.Timestamp.now() - pd.DateOffset(months=1)
|
| 1563 |
elif time_period == "6 Months":
|
|
|
|
| 1568 |
start_date = pd.Timestamp.now() - pd.DateOffset(years=2)
|
| 1569 |
|
| 1570 |
filtered_df = df[df["Reported Date"] >= start_date]
|
|
|
|
|
|
|
| 1571 |
if plot_option == "Import Price":
|
| 1572 |
grouped_df = (
|
| 1573 |
filtered_df.groupby("Reported Date", as_index=False)["VALUE_IMPORT"]
|
|
|
|
| 1596 |
.rename(columns={"QUANTITY_IMPORT": "Total Export Quantity"})
|
| 1597 |
)
|
| 1598 |
y_axis_label = "Total Export Quantity (Tonnes)"
|
| 1599 |
+
|
|
|
|
| 1600 |
fig = px.line(
|
| 1601 |
grouped_df,
|
| 1602 |
x="Reported Date",
|
| 1603 |
+
y=grouped_df.columns[1],
|
| 1604 |
title=f"{plot_option} Over Time",
|
| 1605 |
labels={"Reported Date": "Date", grouped_df.columns[1]: y_axis_label},
|
| 1606 |
)
|
|
|
|
| 1617 |
|
| 1618 |
if login_button:
|
| 1619 |
if authenticate_user(username, password):
|
| 1620 |
+
st.session_state.authenticated = True
|
| 1621 |
+
st.session_state['username'] = username
|
| 1622 |
st.write("Login successful!")
|
| 1623 |
+
st.rerun()
|
| 1624 |
else:
|
| 1625 |
st.error("Invalid username or password")
|