Spaces:
Build error
Build error
| import streamlit as st | |
| import pandas as pd | |
| from sklearn.preprocessing import MinMaxScaler | |
| import pickle | |
| import os | |
| from utilities_with_panel import load_local_css, set_header | |
| import yaml | |
| from yaml import SafeLoader | |
| import sqlite3 | |
| from datetime import timedelta | |
| from utilities import ( | |
| set_header, | |
| load_local_css, | |
| update_db, | |
| project_selection, | |
| retrieve_pkl_object, | |
| ) | |
| from utilities_with_panel import ( | |
| overview_test_data_prep_panel, | |
| overview_test_data_prep_nonpanel, | |
| initialize_data_cmp, | |
| create_channel_summary, | |
| create_contribution_pie, | |
| create_contribuion_stacked_plot, | |
| create_channel_spends_sales_plot, | |
| format_numbers, | |
| channel_name_formating, | |
| ) | |
| from log_application import log_message | |
| import sys, traceback | |
| from post_gres_cred import db_cred | |
| st.set_page_config(layout="wide") | |
| load_local_css("styles.css") | |
| set_header() | |
| schema = db_cred["schema"] | |
| if "username" not in st.session_state: | |
| st.session_state["username"] = None | |
| if "project_name" not in st.session_state: | |
| st.session_state["project_name"] = None | |
| if "project_dct" not in st.session_state: | |
| project_selection() | |
| st.stop() | |
| tuned_model = retrieve_pkl_object( | |
| st.session_state["project_number"], "Model_Tuning", "tuned_model", schema | |
| ) | |
| if tuned_model is None: | |
| st.error("Please save a tuned model") | |
| st.stop() | |
| if ( | |
| "session_state_saved" in st.session_state["project_dct"]["model_tuning"].keys() | |
| and st.session_state["project_dct"]["model_tuning"]["session_state_saved"] != [] | |
| ): | |
| for key in ["used_response_metrics", "media_data", "bin_dict"]: | |
| if key not in st.session_state: | |
| st.session_state[key] = st.session_state["project_dct"]["model_tuning"][ | |
| "session_state_saved" | |
| ][key] | |
| ## DEFINE ALL FUNCTIONS | |
| def get_random_effects(media_data, panel_col, mdf): | |
| random_eff_df = pd.DataFrame(columns=[panel_col, "random_effect"]) | |
| for i, market in enumerate(media_data[panel_col].unique()): | |
| print(i, end="\r") | |
| intercept = mdf.random_effects[market].values[0] | |
| random_eff_df.loc[i, "random_effect"] = intercept | |
| random_eff_df.loc[i, panel_col] = market | |
| return random_eff_df | |
| def process_train_and_test(train, test, features, panel_col, target_col): | |
| X1 = train[features] | |
| ss = MinMaxScaler() | |
| X1 = pd.DataFrame(ss.fit_transform(X1), columns=X1.columns) | |
| X1[panel_col] = train[panel_col] | |
| X1[target_col] = train[target_col] | |
| if test is not None: | |
| X2 = test[features] | |
| X2 = pd.DataFrame(ss.transform(X2), columns=X2.columns) | |
| X2[panel_col] = test[panel_col] | |
| X2[target_col] = test[target_col] | |
| return X1, X2 | |
| return X1 | |
| def mdf_predict(X_df, mdf, random_eff_df): | |
| X = X_df.copy() | |
| X = pd.merge( | |
| X, | |
| random_eff_df[[panel_col, "random_effect"]], | |
| on=panel_col, | |
| how="left", | |
| ) | |
| X["pred_fixed_effect"] = mdf.predict(X) | |
| X["pred"] = X["pred_fixed_effect"] + X["random_effect"] | |
| X.drop(columns=["pred_fixed_effect", "random_effect"], inplace=True) | |
| return X | |
| try: | |
| if "username" in st.session_state and st.session_state["username"] is not None: | |
| # conn = sqlite3.connect( | |
| # r"DB/User.db", check_same_thread=False | |
| # ) # connection with sql db | |
| # c = conn.cursor() | |
| tuned_model = retrieve_pkl_object( | |
| st.session_state["project_number"], "Model_Tuning", "tuned_model", schema | |
| ) | |
| if tuned_model is None: | |
| st.error("Please save a tuned model") | |
| st.stop() | |
| if ( | |
| "session_state_saved" | |
| in st.session_state["project_dct"]["model_tuning"].keys() | |
| and st.session_state["project_dct"]["model_tuning"]["session_state_saved"] | |
| != [] | |
| ): | |
| for key in [ | |
| "used_response_metrics", | |
| "is_tuned_model", | |
| "media_data", | |
| "X_test_spends", | |
| "spends_data", | |
| ]: | |
| st.session_state[key] = st.session_state["project_dct"]["model_tuning"][ | |
| "session_state_saved" | |
| ][key] | |
| elif ( | |
| "session_state_saved" | |
| in st.session_state["project_dct"]["model_build"].keys() | |
| and st.session_state["project_dct"]["model_build"]["session_state_saved"] | |
| != [] | |
| ): | |
| for key in [ | |
| "used_response_metrics", | |
| "date", | |
| "saved_model_names", | |
| "media_data", | |
| "X_test_spends", | |
| ]: | |
| st.session_state[key] = st.session_state["project_dct"]["model_build"][ | |
| "session_state_saved" | |
| ][key] | |
| else: | |
| st.error("Please tune a model first") | |
| st.session_state["bin_dict"] = st.session_state["project_dct"]["model_build"][ | |
| "session_state_saved" | |
| ]["bin_dict"] | |
| st.session_state["media_data"].columns = [ | |
| c.lower() for c in st.session_state["media_data"].columns | |
| ] | |
| # with open( | |
| # os.path.join(st.session_state["project_path"], "data_import.pkl"), | |
| # "rb", | |
| # ) as f: | |
| # data = pickle.load(f) | |
| # # Accessing the loaded objects | |
| # st.session_state["orig_media_data"] = data["final_df"] | |
| st.session_state["orig_media_data"] = st.session_state["project_dct"][ | |
| "data_import" | |
| ][ | |
| "imputed_tool_df" | |
| ].copy() # db | |
| st.session_state["channels"] = st.session_state["project_dct"]["data_import"][ | |
| "group_dict" | |
| ].copy() | |
| # target='Revenue' | |
| # set the panel column | |
| panel_col = "panel" | |
| is_panel = ( | |
| True if st.session_state["media_data"][panel_col].nunique() > 1 else False | |
| ) | |
| date_col = "date" | |
| cols1 = st.columns([2, 1]) | |
| with cols1[0]: | |
| st.markdown(f"**Welcome {st.session_state['username']}**") | |
| with cols1[1]: | |
| st.markdown(f"**Current Project: {st.session_state['project_name']}**") | |
| st.title("AI Model Media Performance") | |
| def remove_response_metric(name): | |
| # Convert the name to a lowercase string and remove any leading or trailing spaces | |
| name_str = str(name).lower().strip() | |
| # Check if the name starts with "response metric" or "response_metric" | |
| if name_str.startswith("response metric"): | |
| return name[len("response metric") :].replace("_", " ").strip().title() | |
| elif name_str.startswith("response_metric"): | |
| return name[len("response_metric") :].replace("_", " ").strip().title() | |
| else: | |
| return name | |
| sel_target_col = st.selectbox( | |
| "Select the response metric", | |
| st.session_state["used_response_metrics"], | |
| format_func=remove_response_metric, | |
| ) | |
| sel_target_col_frmttd = sel_target_col.replace("_", " ").replace("-", " ") | |
| sel_target_col_frmttd = sel_target_col_frmttd.title() | |
| target_col = ( | |
| sel_target_col.lower() | |
| .replace(" ", "_") | |
| .replace("-", "") | |
| .replace(":", "") | |
| .replace("__", "_") | |
| ) | |
| target = sel_target_col | |
| # Contribution | |
| if is_panel: | |
| # read tuned mixedLM model | |
| if st.session_state["is_tuned_model"][target_col] == True: | |
| model_dict = retrieve_pkl_object( | |
| st.session_state["project_number"], | |
| "Model_Tuning", | |
| "tuned_model", | |
| schema, | |
| ) # db | |
| saved_models = list(model_dict.keys()) | |
| required_saved_models = [ | |
| m.split("__")[0] | |
| for m in saved_models | |
| if m.split("__")[1] == target_col | |
| ] | |
| sel_model = required_saved_models[ | |
| 0 | |
| ] # only 1 tuned model available per resp metric | |
| sel_model_dict = model_dict[sel_model + "__" + target_col] | |
| model = sel_model_dict["Model_object"] | |
| X_train = sel_model_dict["X_train_tuned"] | |
| X_test = sel_model_dict["X_test_tuned"] | |
| best_feature_set = sel_model_dict["feature_set"] | |
| # Calculate contributions | |
| st.session_state["orig_media_data"].columns = [ | |
| col.lower() | |
| .replace(".", "_") | |
| .replace("@", "_") | |
| .replace(" ", "_") | |
| .replace("-", "") | |
| .replace(":", "") | |
| .replace("__", "_") | |
| for col in st.session_state["orig_media_data"].columns | |
| ] | |
| media_data = st.session_state["media_data"] | |
| contri_df = pd.DataFrame() | |
| y = [] | |
| y_pred = [] | |
| random_eff_df = get_random_effects(media_data, panel_col, model) | |
| random_eff_df["fixed_effect"] = model.fe_params["Intercept"] | |
| random_eff_df["panel_effect"] = ( | |
| random_eff_df["random_effect"] + random_eff_df["fixed_effect"] | |
| ) | |
| coef_df = pd.DataFrame(model.fe_params) | |
| coef_df.reset_index(inplace=True) | |
| coef_df.columns = ["feature", "coef"] | |
| x_train_contribution = X_train.copy() | |
| x_test_contribution = X_test.copy() | |
| # preprocessing not needed since X_train is already preprocessed | |
| # X1, X2 = process_train_and_test(x_train_contribution, x_test_contribution, best_feature_set, panel_col, target_col) | |
| # x_train_contribution[best_feature_set] = X1[best_feature_set] | |
| # x_test_contribution[best_feature_set] = X2[best_feature_set] | |
| x_train_contribution = mdf_predict( | |
| x_train_contribution, model, random_eff_df | |
| ) | |
| x_test_contribution = mdf_predict(x_test_contribution, model, random_eff_df) | |
| x_train_contribution = pd.merge( | |
| x_train_contribution, | |
| random_eff_df[[panel_col, "panel_effect"]], | |
| on=panel_col, | |
| how="left", | |
| ) | |
| x_test_contribution = pd.merge( | |
| x_test_contribution, | |
| random_eff_df[[panel_col, "panel_effect"]], | |
| on=panel_col, | |
| how="left", | |
| ) | |
| for i in range(len(coef_df))[1:]: | |
| coef = coef_df.loc[i, "coef"] | |
| col = coef_df.loc[i, "feature"] | |
| if col.lower() != "intercept": | |
| x_train_contribution[str(col) + "_contr"] = ( | |
| coef * x_train_contribution[col] | |
| ) | |
| x_test_contribution[str(col) + "_contr"] = ( | |
| coef * x_train_contribution[col] | |
| ) | |
| tuning_cols = [ | |
| c | |
| for c in x_train_contribution.filter(regex="contr").columns | |
| if c | |
| in [ | |
| "day_of_week_contr", | |
| "Trend_contr", | |
| "sine_wave_contr", | |
| "cosine_wave_contr", | |
| ] | |
| ] | |
| flag_cols = [ | |
| c | |
| for c in x_train_contribution.filter(regex="contr").columns | |
| if "_flag" in c | |
| ] | |
| # add exogenous contribution to base | |
| all_exog_vars = st.session_state["bin_dict"]["Exogenous"] | |
| all_exog_vars = [ | |
| var.lower() | |
| .replace(".", "_") | |
| .replace("@", "_") | |
| .replace(" ", "_") | |
| .replace("-", "") | |
| .replace(":", "") | |
| .replace("__", "_") | |
| for var in all_exog_vars | |
| ] | |
| exog_cols = [] | |
| if len(all_exog_vars) > 0: | |
| for col in x_train_contribution.filter(regex="contr").columns: | |
| if ( | |
| len([exog_var for exog_var in all_exog_vars if exog_var in col]) | |
| > 0 | |
| ): | |
| exog_cols.append(col) | |
| base_cols = ["panel_effect"] + flag_cols + tuning_cols + exog_cols | |
| x_train_contribution["base_contr"] = x_train_contribution[base_cols].sum( | |
| axis=1 | |
| ) | |
| x_train_contribution.drop(columns=base_cols, inplace=True) | |
| x_test_contribution["base_contr"] = x_test_contribution[base_cols].sum( | |
| axis=1 | |
| ) | |
| x_test_contribution.drop(columns=base_cols, inplace=True) | |
| overall_contributions = pd.concat( | |
| [x_train_contribution, x_test_contribution] | |
| ).reset_index(drop=True) | |
| overview_test_data_prep_panel( | |
| overall_contributions, | |
| st.session_state["orig_media_data"], | |
| st.session_state["spends_data"], | |
| date_col, | |
| panel_col, | |
| target_col, | |
| ) | |
| else: # NON PANEL | |
| if st.session_state["is_tuned_model"][target_col] == True: # Sprint4 | |
| # with open( | |
| # os.path.join(st.session_state["project_path"], "tuned_model.pkl"), | |
| # "rb", | |
| # ) as file: | |
| # model_dict = pickle.load(file) | |
| model_dict = retrieve_pkl_object( | |
| st.session_state["project_number"], | |
| "Model_Tuning", | |
| "tuned_model", | |
| schema, | |
| ) # db | |
| saved_models = list(model_dict.keys()) | |
| required_saved_models = [ | |
| m.split("__")[0] | |
| for m in saved_models | |
| if m.split("__")[1] == target_col | |
| ] | |
| sel_model = required_saved_models[ | |
| 0 | |
| ] # only 1 tuned model available per resp metric | |
| sel_model_dict = model_dict[sel_model + "__" + target_col] | |
| model = sel_model_dict["Model_object"] | |
| X_train = sel_model_dict["X_train_tuned"] | |
| X_test = sel_model_dict["X_test_tuned"] | |
| best_feature_set = sel_model_dict["feature_set"] | |
| x_train_contribution = X_train.copy() | |
| x_test_contribution = X_test.copy() | |
| x_train_contribution["pred"] = model.predict( | |
| x_train_contribution[best_feature_set] | |
| ) | |
| x_test_contribution["pred"] = model.predict( | |
| x_test_contribution[best_feature_set] | |
| ) | |
| coef_df = pd.DataFrame(model.params) | |
| coef_df.reset_index(inplace=True) | |
| coef_df.columns = ["feature", "coef"] | |
| # st.write(coef_df) | |
| for i in range(len(coef_df)): | |
| coef = coef_df.loc[i, "coef"] | |
| col = coef_df.loc[i, "feature"] | |
| if col != "const": | |
| x_train_contribution[str(col) + "_contr"] = ( | |
| coef * x_train_contribution[col] | |
| ) | |
| x_test_contribution[str(col) + "_contr"] = ( | |
| coef * x_test_contribution[col] | |
| ) | |
| else: | |
| x_train_contribution["const"] = coef | |
| x_test_contribution["const"] = coef | |
| tuning_cols = [ | |
| c | |
| for c in x_train_contribution.filter(regex="contr").columns | |
| if c | |
| in [ | |
| "day_of_week_contr", | |
| "Trend_contr", | |
| "sine_wave_contr", | |
| "cosine_wave_contr", | |
| ] | |
| ] | |
| flag_cols = [ | |
| c | |
| for c in x_train_contribution.filter(regex="contr").columns | |
| if "_flag" in c | |
| ] | |
| # add exogenous contribution to base | |
| all_exog_vars = st.session_state["bin_dict"]["Exogenous"] | |
| all_exog_vars = [ | |
| var.lower() | |
| .replace(".", "_") | |
| .replace("@", "_") | |
| .replace(" ", "_") | |
| .replace("-", "") | |
| .replace(":", "") | |
| .replace("__", "_") | |
| for var in all_exog_vars | |
| ] | |
| exog_cols = [] | |
| if len(all_exog_vars) > 0: | |
| for col in x_train_contribution.filter(regex="contr").columns: | |
| if ( | |
| len([exog_var for exog_var in all_exog_vars if exog_var in col]) | |
| > 0 | |
| ): | |
| exog_cols.append(col) | |
| base_cols = ["const"] + flag_cols + tuning_cols + exog_cols | |
| # st.write(base_cols) | |
| x_train_contribution["base_contr"] = x_train_contribution[base_cols].sum( | |
| axis=1 | |
| ) | |
| x_train_contribution.drop(columns=base_cols, inplace=True) | |
| x_test_contribution["base_contr"] = x_test_contribution[base_cols].sum( | |
| axis=1 | |
| ) | |
| x_test_contribution.drop(columns=base_cols, inplace=True) | |
| # x_test_contribution.to_csv("Test/test_contr.csv", index=False) | |
| overall_contributions = pd.concat( | |
| [x_train_contribution, x_test_contribution] | |
| ).reset_index(drop=True) | |
| # overall_contributions.to_csv("Test/overall_contributions.csv", index=False) | |
| overview_test_data_prep_nonpanel( | |
| overall_contributions, | |
| st.session_state["orig_media_data"].copy(), | |
| st.session_state["spends_data"].copy(), | |
| date_col, | |
| target_col, | |
| ) | |
| # for k, v in st.session_sta | |
| # te.items(): | |
| # if k not in ['logout', 'login','config'] and not k.startswith('FormSubmitter'): | |
| # st.session_state[k] = v | |
| # authenticator = st.session_state.get('authenticator') | |
| # if authenticator is None: | |
| # authenticator = load_authenticator() | |
| # name, authentication_status, username = authenticator.login('Login', 'main') | |
| # auth_status = st.session_state['authentication_status'] | |
| # if auth_status: | |
| # authenticator.logout('Logout', 'main') | |
| # is_state_initiaized = st.session_state.get('initialized',False) | |
| # if not is_state_initiaized: | |
| min_date = X_train[date_col].min().date() | |
| max_date = X_test[date_col].max().date() | |
| if "media_performance" not in st.session_state["project_dct"]: | |
| st.session_state["project_dct"]["media_performance"] = { | |
| "start_date": None, | |
| "end_date": None, | |
| } | |
| start_default = st.session_state["project_dct"]["media_performance"].get( | |
| "start_date", None | |
| ) | |
| start_default = start_default if start_default is not None else min_date | |
| start_default = start_default if start_default > min_date else min_date | |
| start_default = start_default if start_default < max_date else min_date | |
| end_default = st.session_state["project_dct"]["media_performance"].get( | |
| "end_date", None | |
| ) | |
| end_default = end_default if end_default is not None else max_date | |
| end_default = end_default if end_default > min_date else max_date | |
| end_default = end_default if end_default < max_date else max_date | |
| st.write("Select a timeline for analysis") | |
| date_columns = st.columns(2) | |
| with date_columns[0]: | |
| start_date = st.date_input( | |
| "Select Start Date", | |
| start_default, | |
| min_value=min_date, | |
| max_value=max_date, | |
| ) | |
| if (start_date < min_date) or (start_date > max_date): | |
| st.error("Please select dates in the range of the dates in the data") | |
| st.stop() | |
| end_default = ( | |
| end_default if end_default > start_date + timedelta(days=1) else max_date | |
| ) | |
| with date_columns[1]: | |
| end_default = ( | |
| end_default | |
| if pd.Timestamp(end_default) >= pd.Timestamp(start_date) | |
| else start_date | |
| ) | |
| end_date = st.date_input( | |
| "Select End Date", | |
| end_default, | |
| min_value=start_date + timedelta(days=1), | |
| max_value=max_date, | |
| ) | |
| if ( | |
| (start_date < min_date) | |
| or (end_date < min_date) | |
| or (start_date > max_date) | |
| or (end_date > max_date) | |
| ): | |
| st.error("Please select dates in the range of the dates in the data") | |
| st.stop() | |
| if end_date < start_date + timedelta(days=1): | |
| st.error("Please select end date after start date") | |
| st.stop() | |
| st.session_state["project_dct"]["media_performance"]["start_date"] = start_date | |
| st.session_state["project_dct"]["media_performance"]["end_date"] = end_date | |
| st.header("Overview of Previous Media Spend") | |
| initialize_data_cmp(target_col, is_panel, panel_col, start_date, end_date) | |
| scenario = st.session_state["scenario"] | |
| raw_df = st.session_state["raw_df"] | |
| columns = st.columns(2) | |
| with columns[0]: | |
| st.metric( | |
| label="Media Spend", | |
| value=format_numbers(float(scenario.actual_total_spends)), | |
| ) | |
| ###print(f"##################### {scenario.actual_total_sales} ##################") | |
| with columns[1]: | |
| st.metric( | |
| label=sel_target_col_frmttd, | |
| value=format_numbers( | |
| float(scenario.actual_total_sales), include_indicator=False | |
| ), | |
| ) | |
| actual_summary_df = create_channel_summary(scenario, sel_target_col_frmttd) | |
| actual_summary_df["Channel"] = actual_summary_df["Channel"].apply( | |
| channel_name_formating | |
| ) | |
| columns = st.columns((3, 1)) | |
| with columns[0]: | |
| with st.expander("Channel wise overview"): | |
| st.markdown( | |
| actual_summary_df.style.set_table_styles( | |
| [ | |
| { | |
| "selector": "th", | |
| "props": [("background-color", "#f6dcc7")], | |
| }, | |
| { | |
| "selector": "tr:nth-child(even)", | |
| "props": [("background-color", "#f6dcc7")], | |
| }, | |
| ] | |
| ).to_html(), | |
| unsafe_allow_html=True, | |
| ) | |
| st.markdown("<hr>", unsafe_allow_html=True) | |
| ############################## | |
| st.plotly_chart( | |
| create_contribution_pie(scenario, sel_target_col_frmttd), | |
| use_container_width=True, | |
| ) | |
| st.markdown("<hr>", unsafe_allow_html=True) | |
| ################################3 | |
| st.plotly_chart( | |
| create_contribuion_stacked_plot(scenario, sel_target_col_frmttd), | |
| use_container_width=True, | |
| ) | |
| st.markdown("<hr>", unsafe_allow_html=True) | |
| ####################################### | |
| selected_channel_name = st.selectbox( | |
| "Channel", | |
| st.session_state["channels_list"] + ["non media"], | |
| format_func=channel_name_formating, | |
| ) | |
| selected_channel = scenario.channels.get(selected_channel_name, None) | |
| st.plotly_chart( | |
| create_channel_spends_sales_plot(selected_channel, sel_target_col_frmttd), | |
| use_container_width=True, | |
| ) | |
| st.markdown("<hr>", unsafe_allow_html=True) | |
| if st.button("Save this session", use_container_width=True): | |
| project_dct_pkl = pickle.dumps(st.session_state["project_dct"]) | |
| update_db( | |
| st.session_state["project_number"], | |
| "Current_Media_Performance", | |
| "project_dct", | |
| project_dct_pkl, | |
| schema, | |
| # resp_mtrc=None, | |
| ) # db | |
| st.success("Session Saved!") | |
| # Remove "response_metric_" from the start and "_total" from the end | |
| if str(target_col).startswith("response_metric_"): | |
| target_col = target_col.replace("response_metric_", "", 1) | |
| # Remove the last 6 characters (length of "_total") | |
| if str(target_col).endswith("_total"): | |
| target_col = target_col[:-6] | |
| if ( | |
| st.session_state["project_dct"]["current_media_performance"][ | |
| "model_outputs" | |
| ][target_col] | |
| is not None | |
| ): | |
| if ( | |
| len( | |
| st.session_state["project_dct"]["current_media_performance"][ | |
| "model_outputs" | |
| ][target_col]["contribution_data"] | |
| ) | |
| > 0 | |
| ): | |
| st.download_button( | |
| label="Download Contribution File", | |
| data=st.session_state["project_dct"]["current_media_performance"][ | |
| "model_outputs" | |
| ][target_col]["contribution_data"].to_csv(), | |
| file_name="contributions.csv", | |
| key="dwnld_contr", | |
| ) | |
| except: | |
| exc_type, exc_value, exc_traceback = sys.exc_info() | |
| error_message = "".join( | |
| traceback.format_exception(exc_type, exc_value, exc_traceback) | |
| ) | |
| log_message("error", f"Error: {error_message}", "Current Media Performance") | |
| st.warning("An error occured, please try again", icon="⚠️") | |