| import plotly.express as px |
| import numpy as np |
| import plotly.graph_objects as go |
| import streamlit as st |
| import pandas as pd |
| import statsmodels.api as sm |
| from sklearn.metrics import mean_absolute_percentage_error |
| import sys |
| import os |
| from utilities import set_header, load_local_css |
| import seaborn as sns |
| import matplotlib.pyplot as plt |
| import tempfile |
| from sklearn.preprocessing import MinMaxScaler |
| |
| |
| |
| import sys |
| import re |
| import pickle |
| from sklearn.metrics import r2_score, mean_absolute_percentage_error |
| from Data_prep_functions import plot_actual_vs_predicted |
| import sqlite3 |
| from utilities import set_header, load_local_css,update_db,project_selection |
| sys.setrecursionlimit(10**6) |
|
|
| original_stdout = sys.stdout |
| sys.stdout = open("temp_stdout.txt", "w") |
| sys.stdout.close() |
| sys.stdout = original_stdout |
|
|
| st.set_page_config(layout="wide") |
| load_local_css("styles.css") |
| set_header() |
|
|
| |
|
|
|
|
| if 'username' not in st.session_state: |
| st.session_state['username']=None |
|
|
| if "project_name" not in st.session_state: |
| st.session_state["project_name"] = None |
|
|
| if "project_dct" not in st.session_state: |
| project_selection() |
| st.stop() |
|
|
| try: |
| with open(os.path.join(st.session_state["project_path"], "data_import.pkl"), "rb") as f: |
| data = pickle.load(f) |
| st.session_state["bin_dict"] = data["bin_dict"] |
| except Exception as e: |
| st.warning('Save atleast one tuned model to proceed') |
| st.stop() |
|
|
| st.session_state["bin_dict"]['Panel Level 1']=st.session_state["bin_dict"].get('Panel Level 1',[]) |
|
|
| if 'gd_table' not in st.session_state: |
| st.session_state['gd_table']=pd.DataFrame() |
|
|
| if 'username' in st.session_state and st.session_state['username'] is not None: |
|
|
| conn = sqlite3.connect( |
| r"DB/User.db", check_same_thread=False |
| ) |
| c = conn.cursor() |
|
|
| if not os.path.exists( |
| os.path.join(st.session_state["project_path"], "tuned_model.pkl") |
| ): |
| st.error("Please save a tuned model") |
| st.stop() |
|
|
| if ( |
| "session_state_saved" in st.session_state["project_dct"]["model_tuning"].keys() |
| and st.session_state["project_dct"]["model_tuning"]["session_state_saved"] != [] |
| ): |
| for key in ["used_response_metrics", "media_data", "bin_dict"]: |
| if key not in st.session_state: |
| st.session_state[key] = st.session_state["project_dct"]["model_tuning"][ |
| "session_state_saved" |
| ][key] |
| |
| |
| |
|
|
| media_data = st.session_state["media_data"] |
|
|
| |
|
|
| |
| is_panel = True if len(st.session_state["bin_dict"]['Panel Level 1']) > 0 else False |
| |
| if is_panel: |
|
|
| panel_col = [ |
| col.lower() |
| .replace(".", "_") |
| .replace("@", "_") |
| .replace(" ", "_") |
| .replace("-", "") |
| .replace(":", "") |
| .replace("__", "_") |
| for col in st.session_state["bin_dict"]["Panel Level 1"] |
| ][ |
| 0 |
| ] |
|
|
|
|
| date_col = "date" |
|
|
| def plot_residual_predicted(actual, predicted, df_): |
| df_["Residuals"] = actual - pd.Series(predicted) |
| df_["StdResidual"] = (df_["Residuals"] - df_["Residuals"].mean()) / df_[ |
| "Residuals" |
| ].std() |
|
|
| |
| fig = px.scatter( |
| df_, |
| x=predicted, |
| y="StdResidual", |
| opacity=0.5, |
| color_discrete_sequence=["#11B6BD"], |
| ) |
|
|
| |
| fig.add_hline(y=0, line_dash="dash", line_color="darkorange") |
| fig.add_hline(y=2, line_color="red") |
| fig.add_hline(y=-2, line_color="red") |
|
|
| fig.update_xaxes(title="Predicted") |
| fig.update_yaxes(title="Standardized Residuals (Actual - Predicted)") |
|
|
| |
| fig.update_layout( |
| title="Residuals over Predicted Values", |
| autosize=False, |
| width=600, |
| height=400, |
| ) |
|
|
| return fig |
|
|
| def residual_distribution(actual, predicted): |
| Residuals = actual - pd.Series(predicted) |
|
|
| |
| sns.set(style="whitegrid") |
| plt.figure(figsize=(6, 4)) |
| sns.histplot(Residuals, kde=True, color="#11B6BD") |
|
|
| plt.title(" Distribution of Residuals") |
| plt.xlabel("Residuals") |
| plt.ylabel("Probability Density") |
|
|
| return plt |
|
|
| def qqplot(actual, predicted): |
| Residuals = actual - pd.Series(predicted) |
| Residuals = pd.Series(Residuals) |
| Resud_std = (Residuals - Residuals.mean()) / Residuals.std() |
|
|
| |
| fig = go.Figure() |
| fig.add_trace( |
| go.Scatter( |
| x=sm.ProbPlot(Resud_std).theoretical_quantiles, |
| y=sm.ProbPlot(Resud_std).sample_quantiles, |
| mode="markers", |
| marker=dict(size=5, color="#11B6BD"), |
| name="QQ Plot", |
| ) |
| ) |
|
|
| |
| diagonal_line = go.Scatter( |
| x=[ |
| -2, |
| 2, |
| ], |
| y=[-2, 2], |
| mode="lines", |
| line=dict(color="red"), |
| name=" ", |
| ) |
| fig.add_trace(diagonal_line) |
|
|
| |
| fig.update_layout( |
| title="QQ Plot of Residuals", |
| title_x=0.5, |
| autosize=False, |
| width=600, |
| height=400, |
| xaxis_title="Theoretical Quantiles", |
| yaxis_title="Sample Quantiles", |
| ) |
|
|
| return fig |
|
|
| def get_random_effects(media_data, panel_col, mdf): |
| random_eff_df = pd.DataFrame(columns=[panel_col, "random_effect"]) |
| for i, market in enumerate(media_data[panel_col].unique()): |
| print(i, end="\r") |
| intercept = mdf.random_effects[market].values[0] |
| random_eff_df.loc[i, "random_effect"] = intercept |
| random_eff_df.loc[i, panel_col] = market |
|
|
| return random_eff_df |
|
|
| def mdf_predict(X_df, mdf, random_eff_df): |
| X = X_df.copy() |
| X = pd.merge( |
| X, |
| random_eff_df[[panel_col, "random_effect"]], |
| on=panel_col, |
| how="left", |
| ) |
| X["pred_fixed_effect"] = mdf.predict(X) |
|
|
| X["pred"] = X["pred_fixed_effect"] + X["random_effect"] |
| X.drop(columns=["pred_fixed_effect", "random_effect"], inplace=True) |
| return X |
|
|
| def metrics_df_panel(model_dict, is_panel): |
| metrics_df = pd.DataFrame( |
| columns=[ |
| "Model", |
| "R2", |
| "ADJR2", |
| "Train Mape", |
| "Test Mape", |
| "Summary", |
| "Model_object", |
| ] |
| ) |
| i = 0 |
| for key in model_dict.keys(): |
| target = key.split("__")[1] |
| metrics_df.at[i, "Model"] = target |
| y = model_dict[key]["X_train_tuned"][target] |
|
|
| feature_set = model_dict[key]["feature_set"] |
|
|
| if is_panel: |
| random_df = get_random_effects( |
| media_data, panel_col, model_dict[key]["Model_object"] |
| ) |
| pred = mdf_predict( |
| model_dict[key]["X_train_tuned"], |
| model_dict[key]["Model_object"], |
| random_df, |
| )["pred"] |
| else: |
| pred = model_dict[key]["Model_object"].predict(model_dict[key]["X_train_tuned"][feature_set]) |
|
|
| ytest = model_dict[key]["X_test_tuned"][target] |
| if is_panel: |
|
|
| predtest = mdf_predict( |
| model_dict[key]["X_test_tuned"], |
| model_dict[key]["Model_object"], |
| random_df, |
| )["pred"] |
|
|
| else: |
| predtest = model_dict[key]["Model_object"].predict(model_dict[key]["X_test_tuned"][feature_set]) |
|
|
| metrics_df.at[i, "R2"] = r2_score(y, pred) |
| metrics_df.at[i, "ADJR2"] = 1 - (1 - metrics_df.loc[i, "R2"]) * ( |
| len(y) - 1 |
| ) / (len(y) - len(model_dict[key]["feature_set"]) - 1) |
| metrics_df.at[i, "Train Mape"] = mean_absolute_percentage_error(y, pred) |
| metrics_df.at[i, "Test Mape"] = mean_absolute_percentage_error( |
| ytest, predtest |
| ) |
| metrics_df.at[i, "Summary"] = model_dict[key]["Model_object"].summary() |
| metrics_df.at[i, "Model_object"] = model_dict[key]["Model_object"] |
| i += 1 |
| metrics_df = np.round(metrics_df, 2) |
|
|
| return metrics_df |
|
|
| with open( |
| os.path.join(st.session_state["project_path"], "final_df_transformed.pkl"), |
| "rb", |
| ) as f: |
| data = pickle.load(f) |
| transformed_data = data["final_df_transformed"] |
| with open( |
| os.path.join(st.session_state["project_path"], "data_import.pkl"), "rb" |
| ) as f: |
| data = pickle.load(f) |
| st.session_state["bin_dict"] = data["bin_dict"] |
| with open( |
| os.path.join(st.session_state["project_path"], "tuned_model.pkl"), "rb" |
| ) as file: |
| tuned_model_dict = pickle.load(file) |
| feature_set_dct = { |
| key.split("__")[1]: key_dict["feature_set"] |
| for key, key_dict in tuned_model_dict.items() |
| } |
|
|
| |
|
|
|
|
|
|
| if "contribution_df" not in st.session_state: |
| st.session_state["contribution_df"] = None |
|
|
| def map_channel(transformed_var, channel_dict): |
| for key, value_list in channel_dict.items(): |
| if any(raw_var in transformed_var for raw_var in value_list): |
| return key |
| return transformed_var |
|
|
|
|
| def contributions_nonpanel(model_dict): |
| with open(os.path.join(st.session_state["project_path"], "channel_groups.pkl"), "rb") as f: |
| channels = pickle.load(f) |
| media_data = st.session_state["media_data"] |
| contribution_df = pd.DataFrame(columns=["Channel"]) |
|
|
| for key in model_dict.keys(): |
|
|
| best_feature_set = model_dict[key]["feature_set"] |
| model = model_dict[key]["Model_object"] |
| target = key.split("__")[1] |
| X_train = model_dict[key]["X_train_tuned"] |
| contri_df = pd.DataFrame() |
| y = [] |
| y_pred = [] |
|
|
| coef_df = pd.DataFrame(model.params) |
| coef_df.reset_index(inplace=True) |
| coef_df.columns = ["feature", "coef"] |
| x_train_contribution = X_train.copy() |
| x_train_contribution['pred'] = model.predict(X_train[best_feature_set]) |
|
|
| for i in range(len(coef_df)): |
|
|
| coef = coef_df.loc[i, "coef"] |
| col = coef_df.loc[i, "feature"] |
| |
| if col != 'const': |
| x_train_contribution[str(col) + "_contr"] = (coef * x_train_contribution[col]) |
| else: |
| x_train_contribution["const"] = coef |
|
|
| tuning_cols = [c for c in x_train_contribution.filter(regex="contr").columns if c in ["Week_number_contr", "Trend_contr", "sine_wave_contr", "cosine_wave_contr"]] |
| flag_cols =[c for c in x_train_contribution.filter(regex="contr").columns if "_flag" in c] |
|
|
| |
| all_exog_vars = st.session_state['bin_dict']['Exogenous'] |
| all_exog_vars = [var.lower().replace(".", "_").replace("@", "_").replace(" ", "_").replace("-", "").replace(":", "").replace("__", "_") for var in all_exog_vars] |
| exog_cols = [] |
| if len(all_exog_vars)>0: |
| for col in x_train_contribution.filter(regex="contr").columns: |
| if len([exog_var for exog_var in all_exog_vars if exog_var in col])>0: |
| exog_cols.append(col) |
|
|
| base_cols = ["const"] + flag_cols + tuning_cols +exog_cols |
| |
|
|
| x_train_contribution["base_contr"] = x_train_contribution[base_cols].sum(axis=1) |
| x_train_contribution.drop(columns=base_cols, inplace=True) |
| |
|
|
| x_train_contribution.to_csv("Test/smr_x_train_contribution.csv", index=False) |
| |
| contri_df = pd.DataFrame( |
| x_train_contribution.filter(regex="contr").sum(axis=0) |
| ) |
|
|
| contri_df.reset_index(inplace=True) |
| contri_df.columns = ["Channel", target] |
| contri_df["Channel"] = contri_df["Channel"].apply(lambda x : map_channel(x, channels)) |
| contri_df[target] = 100 * contri_df[target] / contri_df[target].sum() |
| contri_df["Channel"].replace("base_contr", "base", inplace=True) |
| contribution_df = pd.merge(contribution_df, contri_df, on="Channel", how="outer") |
|
|
| |
|
|
| return contribution_df |
|
|
| def contributions_panel(model_dict): |
| media_data = st.session_state["media_data"] |
| contribution_df = pd.DataFrame(columns=["Channel"]) |
| for key in model_dict.keys(): |
| best_feature_set = model_dict[key]["feature_set"] |
| model = model_dict[key]["Model_object"] |
| target = key.split("__")[1] |
| X_train = model_dict[key]["X_train_tuned"] |
| contri_df = pd.DataFrame() |
|
|
| y = [] |
| y_pred = [] |
|
|
| random_eff_df = get_random_effects(media_data, panel_col, model) |
| random_eff_df["fixed_effect"] = model.fe_params["Intercept"] |
| random_eff_df["panel_effect"] = ( |
| random_eff_df["random_effect"] + random_eff_df["fixed_effect"] |
| ) |
|
|
| coef_df = pd.DataFrame(model.fe_params) |
| coef_df.reset_index(inplace=True) |
| coef_df.columns = ["feature", "coef"] |
|
|
| x_train_contribution = X_train.copy() |
| x_train_contribution = mdf_predict( |
| x_train_contribution, model, random_eff_df |
| ) |
|
|
| x_train_contribution = pd.merge( |
| x_train_contribution, |
| random_eff_df[[panel_col, "panel_effect"]], |
| on=panel_col, |
| how="left", |
| ) |
|
|
| for i in range(len(coef_df))[1:]: |
| coef = coef_df.loc[i, "coef"] |
| col = coef_df.loc[i, "feature"] |
| x_train_contribution[str(col) + "_contr"] = ( |
| coef * x_train_contribution[col] |
| ) |
|
|
| |
| |
| |
|
|
| base_cols = ["panel_effect"] + [ |
| c |
| for c in x_train_contribution.filter(regex="contr").columns |
| if c |
| in [ |
| "Week_number_contr", |
| "Trend_contr", |
| "sine_wave_contr", |
| "cosine_wave_contr", |
| ] |
| ] |
| x_train_contribution["base_contr"] = x_train_contribution[base_cols].sum( |
| axis=1 |
| ) |
| x_train_contribution.drop(columns=base_cols, inplace=True) |
| |
|
|
| contri_df = pd.DataFrame( |
| x_train_contribution.filter(regex="contr").sum(axis=0) |
| ) |
| contri_df.reset_index(inplace=True) |
| contri_df.columns = ["Channel", target] |
| contri_df["Channel"] = ( |
| contri_df["Channel"] |
| .str.split("(_impres|_clicks)") |
| .apply(lambda c: c[0]) |
| ) |
| contri_df[target] = 100 * contri_df[target] / contri_df[target].sum() |
| contri_df["Channel"].replace("base_contr", "base", inplace=True) |
| contribution_df = pd.merge( |
| contribution_df, contri_df, on="Channel", how="outer" |
| ) |
| |
| return contribution_df |
|
|
| metrics_table = metrics_df_panel(tuned_model_dict,is_panel) |
|
|
| cols1 = st.columns([2, 1]) |
| with cols1[0]: |
| st.markdown(f"**Welcome {st.session_state['username']}**") |
| with cols1[1]: |
| st.markdown( |
| f"**Current Project: {st.session_state['project_name']}**" |
| ) |
|
|
| st.title("AI Model Results") |
|
|
| st.header('Contribution Overview') |
|
|
| options = st.session_state["used_response_metrics"] |
|
|
|
|
| options = [ |
| opt.lower() |
| .replace(" ", "_") |
| .replace("-", "") |
| .replace(":", "") |
| .replace("__", "_") |
| for opt in options |
| ] |
|
|
| default_options = ( |
| st.session_state["project_dct"]["saved_model_results"].get("selected_options") |
| if st.session_state["project_dct"]["saved_model_results"].get( |
| "selected_options" |
| ) |
| is not None |
| else [options[-1]] |
| ) |
| for i in default_options: |
| if i not in options: |
| |
| default_options.remove(i) |
|
|
| def format_display(inp): |
| return inp.title().replace("_", " ").strip() |
|
|
| contribution_selections = st.multiselect( |
| "Select the Response Metrics to compare contributions", |
| options, |
| default=options[0], |
| format_func=format_display, |
| ) |
| trace_data = [] |
|
|
| if is_panel: |
| st.session_state["contribution_df"] = contributions_panel(tuned_model_dict) |
|
|
| else: |
| st.session_state["contribution_df"] = contributions_nonpanel(tuned_model_dict) |
|
|
| |
| |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| def create_grouped_bar_plot(contribution_df, contribution_selections): |
| |
| channel_names = contribution_df["Channel"].tolist() |
|
|
| |
| all_contributions = { |
| name: [] for name in channel_names if name not in ["const", "base"] |
| } |
|
|
| |
| base_sales_dict = {} |
|
|
| |
| for selection in contribution_selections: |
| contributions = contribution_df[selection].values.astype(float) |
| base_sales = 0 |
|
|
| for channel_name, contribution in zip(channel_names, contributions): |
| if channel_name in all_contributions: |
| all_contributions[channel_name].append(contribution) |
| elif channel_name == "base": |
| base_sales = ( |
| contribution |
| ) |
|
|
| |
| base_sales_dict[selection] = base_sales |
|
|
| |
| sorted_channels = sorted( |
| all_contributions.items(), key=lambda x: -np.mean(x[1]) |
| ) |
| sorted_channel_names = [name for name, _ in sorted_channels] |
| sorted_channel_names = [ |
| "Base Sales" |
| ] + sorted_channel_names |
|
|
| trace_data = [] |
| max_value = ( |
| 0 |
| ) |
|
|
| |
| for i, selection in enumerate(contribution_selections): |
| display_name = sorted_channel_names |
| display_contribution = [base_sales_dict[selection]] + [ |
| all_contributions[name][i] for name in sorted_channel_names[1:] |
| ] |
|
|
| |
| text_values = [ |
| f"{val}%" for val in np.round(display_contribution, 0).astype(int) |
| ] |
|
|
| |
| max_contribution = max(display_contribution) |
| if max_contribution > max_value: |
| max_value = max_contribution |
|
|
| |
| trace = go.Bar( |
| x=display_name, |
| y=display_contribution, |
| name=selection, |
| text=text_values, |
| textposition="outside", |
| ) |
| trace_data.append(trace) |
|
|
| |
| layout = go.Layout( |
| title="Metrics Contribution by Channel", |
| xaxis=dict(title="Channel Name"), |
| yaxis=dict( |
| title="Metrics Contribution", range=[0, max_value * 1.2] |
| ), |
| barmode="group", |
| plot_bgcolor="white", |
| ) |
|
|
| |
| fig = go.Figure(data=trace_data, layout=layout) |
|
|
| return fig |
|
|
| |
| st.plotly_chart( |
| create_grouped_bar_plot( |
| st.session_state["contribution_df"], contribution_selections |
| ), |
| use_container_width=True, |
| ) |
|
|
| |
|
|
| import plotly.graph_objects as go |
|
|
| |
| |
|
|
| |
| |
| |
| |
| |
| |
|
|
| |
| |
| |
| |
| |
| |
| |
|
|
| |
| |
|
|
| |
| |
| |
| |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| |
| |
| |
| |
| |
| |
|
|
| |
| |
|
|
| def preprocess_and_plot(contribution_df, contribution_selections): |
| |
| channel_names = contribution_df["Channel"].tolist() |
|
|
| |
| all_contributions = { |
| name: [] for name in channel_names if name not in ["const", "base"] |
| } |
|
|
| |
| base_sales_dict = {} |
|
|
| |
| for selection in contribution_selections: |
| contributions = contribution_df[selection].values.astype(float) |
| base_sales = 0 |
|
|
| for channel_name, contribution in zip(channel_names, contributions): |
| if channel_name in all_contributions: |
| all_contributions[channel_name].append(contribution) |
| elif channel_name == "base": |
| base_sales = ( |
| contribution |
| ) |
|
|
| |
| base_sales_dict[selection] = base_sales |
|
|
| |
| sorted_channels = sorted( |
| all_contributions.items(), key=lambda x: -np.mean(x[1]) |
| ) |
| sorted_channel_names = [name for name, _ in sorted_channels] |
| sorted_channel_names = [ |
| "Base Sales" |
| ] + sorted_channel_names |
|
|
| |
| fig = go.Figure() |
|
|
| for i, selection in enumerate(contribution_selections): |
| display_name = ["Base Sales"] + sorted_channel_names[ |
| 1: |
| ] |
| display_contribution = [ |
| base_sales_dict[selection] |
| ] |
|
|
| |
| for name in sorted_channel_names[1:]: |
| display_contribution.append(all_contributions[name][i]) |
|
|
| |
| text_values = [ |
| f"{val}%" for val in np.round(display_contribution, 0).astype(int) |
| ] |
|
|
| |
| fig.add_trace( |
| go.Waterfall( |
| orientation="v", |
| measure=["relative"] * len(display_contribution), |
| x=display_name, |
| text=text_values, |
| textposition="outside", |
| y=display_contribution, |
| increasing={"marker": {"color": "green"}}, |
| decreasing={"marker": {"color": "red"}}, |
| totals={"marker": {"color": "blue"}}, |
| name=selection, |
| ) |
| ) |
|
|
| |
| fig.update_layout( |
| title="Metrics Contribution by Channel", |
| xaxis={"title": "Channel Name"}, |
| yaxis=dict(title="Metrics Contribution", range=[0, 100 * 1.2]), |
| ) |
|
|
| return fig |
|
|
| |
| st.plotly_chart( |
| preprocess_and_plot( |
| st.session_state["contribution_df"], contribution_selections |
| ), |
| use_container_width=True, |
| ) |
|
|
| |
| st.header("Analysis of Models Result") |
| |
| |
| |
| |
| gd_table = metrics_table.iloc[:, :-2] |
| gd_table['selected']=list([False]*(len(gd_table))) |
| target_column = gd_table.at[0, "Model"] |
| if "selected_row_index_gd_table" not in st.session_state: |
| st.session_state["selected_row_index_gd_table"] = None |
|
|
| |
| st.session_state['gd_table']=gd_table |
|
|
| def selection_change(): |
| edited_rows: dict = st.session_state.project_selection["edited_rows"] |
| st.session_state["selected_row_index_gd_table"] = next(iter(edited_rows)) |
| st.session_state["gd_table"] =st.session_state['gd_table'].assign(selected=False) |
|
|
| update_dict = {idx: values for idx, values in edited_rows.items()} |
|
|
| st.session_state["gd_table"].update( |
| pd.DataFrame.from_dict(update_dict, orient="index") |
| ) |
|
|
|
|
| with st.container(): |
| table = st.data_editor( |
| st.session_state["gd_table"], |
| hide_index=True, |
| on_change=selection_change, |
| key="project_selection", |
| use_container_width=True, |
| ) |
|
|
|
|
|
|
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| if st.session_state["selected_row_index_gd_table"] is not None: |
| |
| |
| |
| |
| |
| target_column= st.session_state["gd_table"].at[ |
| st.session_state["selected_row_index_gd_table"], "Model" |
| ] |
| |
| feature_set = feature_set_dct[target_column] |
|
|
|
|
| model = metrics_table[metrics_table["Model"] == target_column]["Model_object"].iloc[ |
| 0 |
| ] |
| target = metrics_table[metrics_table["Model"] == target_column]["Model"].iloc[0] |
| st.header("Model Summary") |
| st.write(model.summary()) |
|
|
| sel_dict = tuned_model_dict[ |
| [k for k in tuned_model_dict.keys() if k.split("__")[1] == target][0] |
| ] |
|
|
| feature_set=sel_dict['feature_set'] |
| X_train = sel_dict["X_train_tuned"] |
| y_train = X_train[target] |
|
|
| if is_panel: |
| random_effects = get_random_effects(media_data, panel_col, model) |
| pred = mdf_predict(X_train, model, random_effects)["pred"] |
| else: |
| pred=model.predict(X_train[feature_set]) |
|
|
| X_test = sel_dict["X_test_tuned"] |
| y_test = X_test[target] |
| if is_panel: |
| predtest = mdf_predict(X_test, model, random_effects)["pred"] |
| else: |
| predtest=model.predict(X_test[feature_set]) |
|
|
| metrics_table_train, _, fig_train = plot_actual_vs_predicted( |
| X_train[date_col], |
| y_train, |
| pred, |
| model, |
| target_column=target_column, |
| flag=None, |
| repeat_all_years=False, |
| is_panel=is_panel, |
| ) |
|
|
| metrics_table_test, _, fig_test = plot_actual_vs_predicted( |
| X_test[date_col], |
| y_test, |
| predtest, |
| model, |
| target_column=target_column, |
| flag=None, |
| repeat_all_years=False, |
| is_panel=is_panel, |
| ) |
|
|
| metrics_table_train = metrics_table_train.set_index("Metric").transpose() |
| metrics_table_train.index = ["Train"] |
| metrics_table_test = metrics_table_test.set_index("Metric").transpose() |
| metrics_table_test.index = ["Test"] |
| metrics_table = np.round(pd.concat([metrics_table_train, metrics_table_test]), 2) |
|
|
| st.markdown("Result Overview") |
| st.dataframe(np.round(metrics_table, 2), use_container_width=True) |
|
|
| st.subheader("Actual vs Predicted Plot Train") |
|
|
| st.plotly_chart(fig_train, use_container_width=True) |
| st.subheader("Actual vs Predicted Plot Test") |
| st.plotly_chart(fig_test, use_container_width=True) |
|
|
| st.markdown("## Residual Analysis") |
| columns = st.columns(2) |
|
|
| Xtrain1 = X_train.copy() |
| with columns[0]: |
| fig = plot_residual_predicted(y_train, pred, Xtrain1) |
| st.plotly_chart(fig) |
|
|
| with columns[1]: |
| st.empty() |
| fig = qqplot(y_train, pred) |
| st.plotly_chart(fig) |
|
|
| with columns[0]: |
| fig = residual_distribution(y_train, pred) |
| st.pyplot(fig) |
|
|
| update_db("6_AI_Model_Result.py") |
|
|