import streamlit as st import pandas as pd from xgboost import XGBRegressor from sklearn.metrics import mean_squared_error, mean_absolute_error from sklearn.preprocessing import MinMaxScaler from .features import ( create_forecasting_features, create_forecasting_features_1m, create_forecasting_features_3m, ) from .plotting import plot_data, download_button from .config import get_collections def _custom_grid_search(X_train, y_train, X_test, y_test, param_grid, progress_bar): model = XGBRegressor() param_combinations = len(param_grid['learning_rate']) * len(param_grid['max_depth']) * \ len(param_grid['n_estimators']) * len(param_grid['booster']) current_combination = 0 best_score = float('-inf') best_params = None for learning_rate in param_grid['learning_rate']: for max_depth in param_grid['max_depth']: for n_estimators in param_grid['n_estimators']: for booster in param_grid['booster']: model.set_params( learning_rate=learning_rate, max_depth=max_depth, n_estimators=n_estimators, booster=booster ) model.fit(X_train, y_train) score = model.score(X_test, y_test) if score > best_score: best_score = score best_params = { 'learning_rate': learning_rate, 'max_depth': max_depth, 'n_estimators': n_estimators, 'booster': booster } current_combination += 1 progress_bar.progress(int((current_combination / param_combinations) * 100)) return best_params def _train_and_evaluate_generic(df, feature_fn, split_date, progress_bar): df = feature_fn(df) train_df = df[df['Reported Date'] < split_date] test_df = df[df['Reported Date'] >= split_date] X_train = train_df.drop(columns=['Modal Price (Rs./Quintal)', 'Reported Date']) y_train = train_df['Modal Price (Rs./Quintal)'] X_test = test_df.drop(columns=['Modal Price (Rs./Quintal)', 'Reported Date']) y_test = test_df['Modal Price (Rs./Quintal)'] param_grid = { 'learning_rate': [0.01, 0.1, 0.2], 'max_depth': [3, 5, 7], 'n_estimators': [50, 100, 150], 'booster': ['gbtree', 'dart'] } st.write("Performing hyperparameter tuning...") best_params = _custom_grid_search(X_train, y_train, X_test, y_test, param_grid, progress_bar) st.write("Training the best model and making predictions...") best_model = XGBRegressor(**best_params) best_model.fit(X_train, y_train) y_pred = best_model.predict(X_test) rmse = mean_squared_error(y_test, y_pred) mae = mean_absolute_error(y_test, y_pred) st.write(f"RMSE: {rmse}") st.write(f"MAE: {mae}") # Prepare plot data train_plot_df = train_df[['Reported Date', 'Modal Price (Rs./Quintal)']].copy() train_plot_df['Type'] = 'Train' test_plot_df = test_df[['Reported Date', 'Modal Price (Rs./Quintal)']].copy() test_plot_df['Type'] = 'Test' predicted_plot_df = test_df[['Reported Date']].copy() predicted_plot_df['Modal Price (Rs./Quintal)'] = y_pred predicted_plot_df['Type'] = 'Predicted' plot_df = pd.concat([train_plot_df, test_plot_df, predicted_plot_df]) import plotly.graph_objects as go fig = go.Figure() for plot_type, color, dash in [('Train', 'blue', None), ('Test', 'orange', None), ('Predicted', 'green', 'dot')]: data = plot_df[plot_df['Type'] == plot_type] fig.add_trace(go.Scatter( x=data['Reported Date'], y=data['Modal Price (Rs./Quintal)'], mode='lines', name=f"{plot_type} Data", line=dict(color=color, dash=dash) )) fig.update_layout(title="Train, Test, and Predicted Data", xaxis_title="Date", yaxis_title="Modal Price (Rs./Quintal)", template="plotly_white") st.plotly_chart(fig, use_container_width=True) return best_params def train_and_evaluate(df): progress_bar = st.progress(0) return _train_and_evaluate_generic(df, create_forecasting_features, '2024-01-01', progress_bar) def train_and_evaluate_1m(df): progress_bar = st.progress(0) return _train_and_evaluate_generic(df, create_forecasting_features_1m, pd.to_datetime('2023-01-01'), progress_bar) def train_and_evaluate_3m(df): progress_bar = st.progress(0) return _train_and_evaluate_generic(df, create_forecasting_features_3m, pd.to_datetime('2023-01-01'), progress_bar) def forecast_next_14_days(df, _best_params, key): last_date = df['Reported Date'].max() future_dates = pd.date_range(start=last_date + pd.Timedelta(days=1), periods=14) future_df = pd.DataFrame({'Reported Date': future_dates}) full_df = pd.concat([df, future_df], ignore_index=True) full_df = create_forecasting_features(full_df) original_df = full_df[full_df['Reported Date'] <= last_date].copy() future_df = full_df[full_df['Reported Date'] > last_date].copy() X_train = original_df.drop(columns=['Modal Price (Rs./Quintal)', 'Reported Date'], errors='ignore') y_train = original_df['Modal Price (Rs./Quintal)'] X_future = future_df.drop(columns=['Modal Price (Rs./Quintal)', 'Reported Date'], errors='ignore') model = XGBRegressor(**_best_params) model.fit(X_train, y_train) future_predictions = model.predict(X_future) future_df['Modal Price (Rs./Quintal)'] = future_predictions plot_data(original_df, future_df, last_date, model, 14) download_button(future_df, key) def forecast_next_30_days(df, _best_params, key): last_date = df['Reported Date'].max() future_dates = pd.date_range(start=last_date + pd.Timedelta(days=1), periods=30) future_df = pd.DataFrame({'Reported Date': future_dates}) full_df = pd.concat([df, future_df], ignore_index=True) full_df = create_forecasting_features_1m(full_df) original_df = full_df[full_df['Reported Date'] <= last_date].copy() future_df = full_df[full_df['Reported Date'] > last_date].copy() X_train = original_df.drop(columns=['Modal Price (Rs./Quintal)', 'Reported Date'], errors='ignore') y_train = original_df['Modal Price (Rs./Quintal)'] X_future = future_df.drop(columns=['Modal Price (Rs./Quintal)', 'Reported Date'], errors='ignore') model = XGBRegressor(**_best_params) model.fit(X_train, y_train) future_predictions = model.predict(X_future) future_df['Modal Price (Rs./Quintal)'] = future_predictions plot_data(original_df, future_df, last_date, model, 30) download_button(future_df, key) def forecast_next_90_days(df, _best_params, key): last_date = df['Reported Date'].max() future_dates = pd.date_range(start=last_date + pd.Timedelta(days=1), periods=90) future_df = pd.DataFrame({'Reported Date': future_dates}) full_df = pd.concat([df, future_df], ignore_index=True) full_df = create_forecasting_features_3m(full_df) original_df = full_df[full_df['Reported Date'] <= last_date].copy() future_df = full_df[full_df['Reported Date'] > last_date].copy() X_train = original_df.drop(columns=['Modal Price (Rs./Quintal)', 'Reported Date'], errors='ignore') y_train = original_df['Modal Price (Rs./Quintal)'] X_future = future_df.drop(columns=['Modal Price (Rs./Quintal)', 'Reported Date'], errors='ignore') model = XGBRegressor(**_best_params) model.fit(X_train, y_train) future_predictions = model.predict(X_future) future_df['Modal Price (Rs./Quintal)'] = future_predictions plot_data(original_df, future_df, last_date, model, 90) download_button(future_df, key) def train_and_forecast(df, filter_key, days): cols = get_collections() if df is not None: if days == 14: best_params = train_and_evaluate(df) cols['best_params_collection'].replace_one({'filter_key': filter_key}, {**best_params, 'filter_key': filter_key, 'last_updated': pd.Timestamp.now().isoformat()}, upsert=True) forecast_next_14_days(df, best_params, filter_key) elif days == 30: best_params = train_and_evaluate_1m(df) cols['best_params_collection_1m'].replace_one({'filter_key': filter_key}, {**best_params, 'filter_key': filter_key, 'last_updated': pd.Timestamp.now().isoformat()}, upsert=True) forecast_next_30_days(df, best_params, filter_key) elif days == 90: best_params = train_and_evaluate_3m(df) cols['best_params_collection_3m'].replace_one({'filter_key': filter_key}, {**best_params, 'filter_key': filter_key, 'last_updated': pd.Timestamp.now().isoformat()}, upsert=True) forecast_next_90_days(df, best_params, filter_key) def get_best_params(filter_key, collection): record = collection.find_one({"filter_key": filter_key}) return record def forecast(df, filter_key, days): cols = get_collections() if days == 14: record = get_best_params(filter_key, cols['best_params_collection']) if record: st.info(f"ℹ️ The model was trained on {record['last_updated']}.") forecast_next_14_days(df, record, filter_key) else: st.warning("⚠️ Model is not trained yet. Please train the model first.") if days == 30: record = get_best_params(filter_key, cols['best_params_collection_1m']) if record: st.info(f"ℹ️ The model was trained on {record['last_updated']}.") forecast_next_30_days(df, record, filter_key) else: st.warning("⚠️ Model is not trained yet. Please train the model first.") if days == 90: record = get_best_params(filter_key, cols['best_params_collection_3m']) if record: st.info(f"ℹ️ The model was trained on {record['last_updated']}.") forecast_next_90_days(df, record, filter_key) else: st.warning("⚠️ Model is not trained yet. Please train the model first.")