Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pandas as pd | |
| from xgboost import XGBRegressor | |
| from sklearn.metrics import mean_squared_error, mean_absolute_error | |
| from sklearn.preprocessing import MinMaxScaler | |
| from .features import ( | |
| create_forecasting_features, | |
| create_forecasting_features_1m, | |
| create_forecasting_features_3m, | |
| ) | |
| from .plotting import plot_data, download_button | |
| from .config import get_collections | |
| def _custom_grid_search(X_train, y_train, X_test, y_test, param_grid, progress_bar): | |
| model = XGBRegressor() | |
| param_combinations = len(param_grid['learning_rate']) * len(param_grid['max_depth']) * \ | |
| len(param_grid['n_estimators']) * len(param_grid['booster']) | |
| current_combination = 0 | |
| best_score = float('-inf') | |
| best_params = None | |
| for learning_rate in param_grid['learning_rate']: | |
| for max_depth in param_grid['max_depth']: | |
| for n_estimators in param_grid['n_estimators']: | |
| for booster in param_grid['booster']: | |
| model.set_params( | |
| learning_rate=learning_rate, | |
| max_depth=max_depth, | |
| n_estimators=n_estimators, | |
| booster=booster | |
| ) | |
| model.fit(X_train, y_train) | |
| score = model.score(X_test, y_test) | |
| if score > best_score: | |
| best_score = score | |
| best_params = { | |
| 'learning_rate': learning_rate, | |
| 'max_depth': max_depth, | |
| 'n_estimators': n_estimators, | |
| 'booster': booster | |
| } | |
| current_combination += 1 | |
| progress_bar.progress(int((current_combination / param_combinations) * 100)) | |
| return best_params | |
| def _train_and_evaluate_generic(df, feature_fn, split_date, progress_bar): | |
| df = feature_fn(df) | |
| train_df = df[df['Reported Date'] < split_date] | |
| test_df = df[df['Reported Date'] >= split_date] | |
| X_train = train_df.drop(columns=['Modal Price (Rs./Quintal)', 'Reported Date']) | |
| y_train = train_df['Modal Price (Rs./Quintal)'] | |
| X_test = test_df.drop(columns=['Modal Price (Rs./Quintal)', 'Reported Date']) | |
| y_test = test_df['Modal Price (Rs./Quintal)'] | |
| param_grid = { | |
| 'learning_rate': [0.01, 0.1, 0.2], | |
| 'max_depth': [3, 5, 7], | |
| 'n_estimators': [50, 100, 150], | |
| 'booster': ['gbtree', 'dart'] | |
| } | |
| st.write("Performing hyperparameter tuning...") | |
| best_params = _custom_grid_search(X_train, y_train, X_test, y_test, param_grid, progress_bar) | |
| st.write("Training the best model and making predictions...") | |
| best_model = XGBRegressor(**best_params) | |
| best_model.fit(X_train, y_train) | |
| y_pred = best_model.predict(X_test) | |
| rmse = mean_squared_error(y_test, y_pred) | |
| mae = mean_absolute_error(y_test, y_pred) | |
| st.write(f"RMSE: {rmse}") | |
| st.write(f"MAE: {mae}") | |
| # Prepare plot data | |
| train_plot_df = train_df[['Reported Date', 'Modal Price (Rs./Quintal)']].copy() | |
| train_plot_df['Type'] = 'Train' | |
| test_plot_df = test_df[['Reported Date', 'Modal Price (Rs./Quintal)']].copy() | |
| test_plot_df['Type'] = 'Test' | |
| predicted_plot_df = test_df[['Reported Date']].copy() | |
| predicted_plot_df['Modal Price (Rs./Quintal)'] = y_pred | |
| predicted_plot_df['Type'] = 'Predicted' | |
| plot_df = pd.concat([train_plot_df, test_plot_df, predicted_plot_df]) | |
| import plotly.graph_objects as go | |
| fig = go.Figure() | |
| for plot_type, color, dash in [('Train', 'blue', None), ('Test', 'orange', None), ('Predicted', 'green', 'dot')]: | |
| data = plot_df[plot_df['Type'] == plot_type] | |
| fig.add_trace(go.Scatter( | |
| x=data['Reported Date'], | |
| y=data['Modal Price (Rs./Quintal)'], | |
| mode='lines', | |
| name=f"{plot_type} Data", | |
| line=dict(color=color, dash=dash) | |
| )) | |
| fig.update_layout(title="Train, Test, and Predicted Data", xaxis_title="Date", yaxis_title="Modal Price (Rs./Quintal)", template="plotly_white") | |
| st.plotly_chart(fig, use_container_width=True) | |
| return best_params | |
| def train_and_evaluate(df): | |
| progress_bar = st.progress(0) | |
| return _train_and_evaluate_generic(df, create_forecasting_features, '2024-01-01', progress_bar) | |
| def train_and_evaluate_1m(df): | |
| progress_bar = st.progress(0) | |
| return _train_and_evaluate_generic(df, create_forecasting_features_1m, pd.to_datetime('2023-01-01'), progress_bar) | |
| def train_and_evaluate_3m(df): | |
| progress_bar = st.progress(0) | |
| return _train_and_evaluate_generic(df, create_forecasting_features_3m, pd.to_datetime('2023-01-01'), progress_bar) | |
| def forecast_next_14_days(df, _best_params, key): | |
| last_date = df['Reported Date'].max() | |
| future_dates = pd.date_range(start=last_date + pd.Timedelta(days=1), periods=14) | |
| future_df = pd.DataFrame({'Reported Date': future_dates}) | |
| full_df = pd.concat([df, future_df], ignore_index=True) | |
| full_df = create_forecasting_features(full_df) | |
| original_df = full_df[full_df['Reported Date'] <= last_date].copy() | |
| future_df = full_df[full_df['Reported Date'] > last_date].copy() | |
| X_train = original_df.drop(columns=['Modal Price (Rs./Quintal)', 'Reported Date'], errors='ignore') | |
| y_train = original_df['Modal Price (Rs./Quintal)'] | |
| X_future = future_df.drop(columns=['Modal Price (Rs./Quintal)', 'Reported Date'], errors='ignore') | |
| model = XGBRegressor(**_best_params) | |
| model.fit(X_train, y_train) | |
| future_predictions = model.predict(X_future) | |
| future_df['Modal Price (Rs./Quintal)'] = future_predictions | |
| plot_data(original_df, future_df, last_date, model, 14) | |
| download_button(future_df, key) | |
| def forecast_next_30_days(df, _best_params, key): | |
| last_date = df['Reported Date'].max() | |
| future_dates = pd.date_range(start=last_date + pd.Timedelta(days=1), periods=30) | |
| future_df = pd.DataFrame({'Reported Date': future_dates}) | |
| full_df = pd.concat([df, future_df], ignore_index=True) | |
| full_df = create_forecasting_features_1m(full_df) | |
| original_df = full_df[full_df['Reported Date'] <= last_date].copy() | |
| future_df = full_df[full_df['Reported Date'] > last_date].copy() | |
| X_train = original_df.drop(columns=['Modal Price (Rs./Quintal)', 'Reported Date'], errors='ignore') | |
| y_train = original_df['Modal Price (Rs./Quintal)'] | |
| X_future = future_df.drop(columns=['Modal Price (Rs./Quintal)', 'Reported Date'], errors='ignore') | |
| model = XGBRegressor(**_best_params) | |
| model.fit(X_train, y_train) | |
| future_predictions = model.predict(X_future) | |
| future_df['Modal Price (Rs./Quintal)'] = future_predictions | |
| plot_data(original_df, future_df, last_date, model, 30) | |
| download_button(future_df, key) | |
| def forecast_next_90_days(df, _best_params, key): | |
| last_date = df['Reported Date'].max() | |
| future_dates = pd.date_range(start=last_date + pd.Timedelta(days=1), periods=90) | |
| future_df = pd.DataFrame({'Reported Date': future_dates}) | |
| full_df = pd.concat([df, future_df], ignore_index=True) | |
| full_df = create_forecasting_features_3m(full_df) | |
| original_df = full_df[full_df['Reported Date'] <= last_date].copy() | |
| future_df = full_df[full_df['Reported Date'] > last_date].copy() | |
| X_train = original_df.drop(columns=['Modal Price (Rs./Quintal)', 'Reported Date'], errors='ignore') | |
| y_train = original_df['Modal Price (Rs./Quintal)'] | |
| X_future = future_df.drop(columns=['Modal Price (Rs./Quintal)', 'Reported Date'], errors='ignore') | |
| model = XGBRegressor(**_best_params) | |
| model.fit(X_train, y_train) | |
| future_predictions = model.predict(X_future) | |
| future_df['Modal Price (Rs./Quintal)'] = future_predictions | |
| plot_data(original_df, future_df, last_date, model, 90) | |
| download_button(future_df, key) | |
| def train_and_forecast(df, filter_key, days): | |
| cols = get_collections() | |
| if df is not None: | |
| if days == 14: | |
| best_params = train_and_evaluate(df) | |
| cols['best_params_collection'].replace_one({'filter_key': filter_key}, {**best_params, 'filter_key': filter_key, 'last_updated': pd.Timestamp.now().isoformat()}, upsert=True) | |
| forecast_next_14_days(df, best_params, filter_key) | |
| elif days == 30: | |
| best_params = train_and_evaluate_1m(df) | |
| cols['best_params_collection_1m'].replace_one({'filter_key': filter_key}, {**best_params, 'filter_key': filter_key, 'last_updated': pd.Timestamp.now().isoformat()}, upsert=True) | |
| forecast_next_30_days(df, best_params, filter_key) | |
| elif days == 90: | |
| best_params = train_and_evaluate_3m(df) | |
| cols['best_params_collection_3m'].replace_one({'filter_key': filter_key}, {**best_params, 'filter_key': filter_key, 'last_updated': pd.Timestamp.now().isoformat()}, upsert=True) | |
| forecast_next_90_days(df, best_params, filter_key) | |
| def get_best_params(filter_key, collection): | |
| record = collection.find_one({"filter_key": filter_key}) | |
| return record | |
| def forecast(df, filter_key, days): | |
| cols = get_collections() | |
| if days == 14: | |
| record = get_best_params(filter_key, cols['best_params_collection']) | |
| if record: | |
| st.info(f"ℹ️ The model was trained on {record['last_updated']}.") | |
| forecast_next_14_days(df, record, filter_key) | |
| else: | |
| st.warning("⚠️ Model is not trained yet. Please train the model first.") | |
| if days == 30: | |
| record = get_best_params(filter_key, cols['best_params_collection_1m']) | |
| if record: | |
| st.info(f"ℹ️ The model was trained on {record['last_updated']}.") | |
| forecast_next_30_days(df, record, filter_key) | |
| else: | |
| st.warning("⚠️ Model is not trained yet. Please train the model first.") | |
| if days == 90: | |
| record = get_best_params(filter_key, cols['best_params_collection_3m']) | |
| if record: | |
| st.info(f"ℹ️ The model was trained on {record['last_updated']}.") | |
| forecast_next_90_days(df, record, filter_key) | |
| else: | |
| st.warning("⚠️ Model is not trained yet. Please train the model first.") | |