| | import streamlit as st |
| | import pandas as pd |
| | import plotly.express as px |
| | import plotly.graph_objects as go |
| | import numpy as np |
| | import plotly.express as px |
| | import plotly.graph_objects as go |
| | import pandas as pd |
| | import seaborn as sns |
| | import matplotlib.pyplot as plt |
| | import datetime |
| | from utilities import set_header,initialize_data,load_local_css |
| | from scipy.optimize import curve_fit |
| | import statsmodels.api as sm |
| | from plotly.subplots import make_subplots |
| |
|
| | st.set_page_config( |
| | page_title="Data Validation", |
| | page_icon=":shark:", |
| | layout="wide", |
| | initial_sidebar_state='collapsed' |
| | ) |
| | load_local_css('styles.css') |
| | set_header() |
| |
|
| | def format_numbers(x): |
| | if abs(x) >= 1e6: |
| | |
| | return f'{x/1e6:,.1f}M' |
| | elif abs(x) >= 1e3: |
| | |
| | return f'{x/1e3:,.1f}K' |
| | else: |
| | |
| | return f'{x:,.1f}' |
| |
|
| | def format_axis(x): |
| | if isinstance(x, tuple): |
| | x = x[0] |
| | if abs(x) >= 1e6: |
| | return f'{x / 1e6:.0f}M' |
| | elif abs(x) >= 1e3: |
| | return f'{x / 1e3:.0f}k' |
| | else: |
| | return f'{x:.0f}' |
| |
|
| |
|
| | attributred_app_installs=pd.read_csv("attributed_app_installs.csv") |
| | attributred_app_installs_tactic=pd.read_excel('attributed_app_installs_tactic.xlsx') |
| | data=pd.read_excel('Channel_wise_imp_click_spends.xlsx') |
| | data['Date']=pd.to_datetime(data['Date']) |
| | st.header('Saturation Curves') |
| |
|
| | |
| | st.markdown('Data QC') |
| |
|
| | st.markdown('Channel wise summary') |
| | summary_df=data.groupby(data['Date'].dt.strftime('%B %Y')).sum() |
| | summary_df=summary_df.sort_index(key=lambda x: pd.to_datetime(x, format='%B %Y')) |
| | st.dataframe(summary_df.applymap(format_numbers)) |
| |
|
| |
|
| |
|
| | def line_plot_target(df,target,title): |
| | df=df |
| | df['Date_unix'] = df['Date'].apply(lambda x: x.timestamp()) |
| |
|
| | |
| | coefficients = np.polyfit(df['Date_unix'], df[target], 1) |
| | |
| | coefficients = np.polyfit(df['Date'].view('int64'), df[target], 1) |
| | trendline = np.poly1d(coefficients) |
| | fig = go.Figure() |
| |
|
| | fig.add_trace(go.Scatter(x=df['Date'], y=df[target], mode='lines', name=target,line=dict(color='#11B6BD'))) |
| | trendline_x = df['Date'] |
| | trendline_y = trendline(df['Date'].view('int64')) |
| |
|
| |
|
| | fig.add_trace(go.Scatter(x=trendline_x, y=trendline_y, mode='lines', name='Trendline', line=dict(color='#739FAE'))) |
| |
|
| | fig.update_layout( |
| | title=title, |
| | xaxis=dict(type='date') |
| | ) |
| |
|
| | for year in df['Date'].dt.year.unique()[1:]: |
| |
|
| | january_1 = pd.Timestamp(year=year, month=1, day=1) |
| | fig.add_shape( |
| | go.layout.Shape( |
| | type="line", |
| | x0=january_1, |
| | x1=january_1, |
| | y0=0, |
| | y1=1, |
| | xref="x", |
| | yref="paper", |
| | line=dict(color="grey", width=1.5, dash="dash"), |
| | ) |
| | ) |
| |
|
| | return fig |
| | channels_d= data.columns[:28] |
| | channels=list(set([col.replace('_impressions','').replace('_clicks','').replace('_spend','') for col in channels_d if col.lower()!='date'])) |
| | channel= st.selectbox('Select Channel_name',channels) |
| | target_column = st.selectbox('Select Channel)',[col for col in data.columns if col.startswith(channel)]) |
| | fig=line_plot_target(data, target=str(target_column), title=f'{str(target_column)} Over Time') |
| | st.plotly_chart(fig, use_container_width=True) |
| |
|
| | |
| |
|
| |
|
| | st.header('Build saturation curve') |
| |
|
| | |
| | |
| | |
| | |
| | |
| | col=st.columns(2) |
| | with col[0]: |
| | if st.checkbox('Cap Outliers'): |
| | x = data[target_column] |
| | x.index=data['Date'] |
| | |
| | result = sm.tsa.seasonal_decompose(x, model='additive') |
| | x_resid=result.resid |
| | |
| | |
| | |
| | |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | |
| | |
| | x_mean = np.mean(x) |
| | x_std = np.std(x) |
| | x_scaled = (x - x_mean) / x_std |
| | lower_threshold = -2.0 |
| | upper_threshold = 2.0 |
| | x_scaled = np.clip(x_scaled, lower_threshold, upper_threshold) |
| | else: |
| | x = data[target_column] |
| | x_mean = np.mean(x) |
| | x_std = np.std(x) |
| | x_scaled = (x - x_mean) / x_std |
| | with col[1]: |
| | if st.checkbox('Attributed'): |
| | column=[col for col in attributred_app_installs.columns if col in target_column] |
| | data['app_installs_appsflyer']=attributred_app_installs[column] |
| | y=data['app_installs_appsflyer'] |
| | title='Attributed-App_installs_appsflyer' |
| | |
| | |
| | |
| | else: |
| | y=data["app_installs_appsflyer"] |
| | title='App_installs_appsflyer' |
| | |
| | |
| | def sigmoid(x, K, a, x0): |
| | return K / (1 + np.exp(-a * (x - x0))) |
| |
|
| | initial_K = np.max(y) |
| | initial_a = 1 |
| | initial_x0 = 0 |
| | columns=st.columns(3) |
| |
|
| |
|
| | with columns[0]: |
| | K = st.number_input('K (Amplitude)', min_value=0.01, max_value=2.0 * np.max(y), value=float(initial_K), step=5.0) |
| | with columns[1]: |
| | a = st.number_input('a (Slope)', min_value=0.01, max_value=5.0, value=float(initial_a), step=0.5) |
| | with columns[2]: |
| | x0 = st.number_input('x0 (Center)', min_value=float(min(x_scaled)), max_value=float(max(x_scaled)), value=float(initial_x0), step=2.0) |
| | params, _ = curve_fit(sigmoid, x_scaled, y, p0=[K, a, x0], maxfev=20000) |
| |
|
| |
|
| | x_slider = st.slider('X Value', min_value=float(min(x)), max_value=float(max(x))+1, value=float(x_mean), step=1.) |
| |
|
| | |
| | x_slider_scaled = (x_slider - x_mean) / x_std |
| | y_slider_fit = sigmoid(x_slider_scaled, *params) |
| |
|
| | |
| | st.write(f'{target_column}: {format_numbers(x_slider)}') |
| | st.write(f'Corresponding App_installs: {format_numbers(y_slider_fit)}') |
| |
|
| | |
| | fig = px.scatter(data_frame=data, x=x_scaled, y=y, labels={'x': f'{target_column}', 'y': 'App Installs'}, title=title) |
| |
|
| | |
| | x_fit = np.linspace(min(x_scaled), max(x_scaled), 100) |
| | y_fit = sigmoid(x_fit, *params) |
| | fig.add_trace(px.line(x=x_fit, y=y_fit).data[0]) |
| | fig.data[1].update(line=dict(color='orange')) |
| | fig.add_vline(x=x_slider_scaled, line_dash='dash', line_color='red', annotation_text=f'{format_numbers(x_slider)}') |
| |
|
| | x_tick_labels = {format_axis(x_scaled[i]): format_axis(x[i]) for i in range(len(x_scaled))} |
| | num_points = 30 |
| | keys = list(x_tick_labels.keys()) |
| | values = list(x_tick_labels.values()) |
| | spacing = len(keys) // num_points |
| | if spacing==0: |
| | spacing=15 |
| | selected_keys = keys[::spacing] |
| | selected_values = values[::spacing] |
| | else: |
| | selected_keys = keys[::spacing] |
| | selected_values = values[::spacing] |
| |
|
| | |
| | fig.update_xaxes(tickvals=selected_keys, ticktext=selected_values) |
| | fig.update_xaxes(tickvals=list(x_tick_labels.keys()), ticktext=list(x_tick_labels.values())) |
| | |
| |
|
| | fig.update_xaxes(showgrid=False) |
| | fig.update_yaxes(showgrid=False) |
| | fig.update_layout( |
| | width=600, |
| | height=600 |
| | ) |
| | st.plotly_chart(fig) |
| |
|
| |
|
| |
|
| |
|
| | st.markdown('Tactic level') |
| | if channel=='paid_social': |
| |
|
| | tactic_data=pd.read_excel("Tatcic_paid.xlsx",sheet_name='paid_social_impressions') |
| | else: |
| | tactic_data=pd.read_excel("Tatcic_paid.xlsx",sheet_name='digital_app_display_impressions') |
| | target_column = st.selectbox('Select Channel)',[col for col in tactic_data.columns if col!='Date' and col!='app_installs_appsflyer']) |
| | fig=line_plot_target(tactic_data, target=str(target_column), title=f'{str(target_column)} Over Time') |
| | st.plotly_chart(fig, use_container_width=True) |
| |
|
| | if st.checkbox('Cap Outliers',key='tactic1'): |
| | x = tactic_data[target_column] |
| | x_mean = np.mean(x) |
| | x_std = np.std(x) |
| | x_scaled = (x - x_mean) / x_std |
| | lower_threshold = -2.0 |
| | upper_threshold = 2.0 |
| | x_scaled = np.clip(x_scaled, lower_threshold, upper_threshold) |
| | else: |
| | x = tactic_data[target_column] |
| | x_mean = np.mean(x) |
| | x_std = np.std(x) |
| | x_scaled = (x - x_mean) / x_std |
| |
|
| | if st.checkbox('Attributed',key='tactic2'): |
| | column=[col for col in attributred_app_installs_tactic.columns if col in target_column] |
| | tactic_data['app_installs_appsflyer']=attributred_app_installs_tactic[column] |
| | y=tactic_data['app_installs_appsflyer'] |
| | title='Attributed-App_installs_appsflyer' |
| | |
| | |
| | |
| | else: |
| | y=data["app_installs_appsflyer"] |
| | title='App_installs_appsflyer' |
| | |
| | |
| | def sigmoid(x, K, a, x0): |
| | return K / (1 + np.exp(-a * (x - x0))) |
| |
|
| | |
| | |
| | |
| | |
| | initial_K = np.max(y) |
| | initial_a = 1 |
| | initial_x0 = 0 |
| | K = st.number_input('K (Amplitude)', min_value=0.01, max_value=2.0 * np.max(y), value=float(initial_K), step=5.0,key='tactic3') |
| | a = st.number_input('a (Slope)', min_value=0.01, max_value=5.0, value=float(initial_a), step=2.0,key='tactic41') |
| | x0 = st.number_input('x0 (Center)', min_value=float(min(x_scaled)), max_value=float(max(x_scaled)), value=float(initial_x0), step=2.0,key='tactic4') |
| | params, _ = curve_fit(sigmoid, x_scaled, y, p0=[K, a, x0], maxfev=20000) |
| |
|
| | |
| | x_slider = st.slider('X Value', min_value=float(min(x)), max_value=float(max(x)), value=float(x_mean), step=1.,key='tactic7') |
| |
|
| | |
| | x_slider_scaled = (x_slider - x_mean) / x_std |
| | y_slider_fit = sigmoid(x_slider_scaled, *params) |
| |
|
| | |
| | st.write(f'{target_column}: {format_axis(x_slider)}') |
| | st.write(f'Corresponding App_installs: {format_axis(y_slider_fit)}') |
| |
|
| | |
| | fig = px.scatter(data_frame=data, x=x_scaled, y=y, labels={'x': f'{target_column}', 'y': 'App Installs'}, title=title) |
| |
|
| | |
| | x_fit = np.linspace(min(x_scaled), max(x_scaled), 100) |
| | y_fit = sigmoid(x_fit, *params) |
| | fig.add_trace(px.line(x=x_fit, y=y_fit).data[0]) |
| | fig.data[1].update(line=dict(color='orange')) |
| | fig.add_vline(x=x_slider_scaled, line_dash='dash', line_color='red', annotation_text=f'{format_numbers(x_slider)}') |
| |
|
| |
|
| |
|
| | x_tick_labels = {format_axis((x_scaled[i],0)): format_axis(x[i]) for i in range(len(x_scaled))} |
| | num_points = 50 |
| | keys = list(x_tick_labels.keys()) |
| | values = list(x_tick_labels.values()) |
| | spacing = len(keys) // num_points |
| | if spacing==0: |
| | spacing=2 |
| | selected_keys = keys[::spacing] |
| | selected_values = values[::spacing] |
| | else: |
| | selected_keys = keys[::spacing] |
| | selected_values = values[::spacing] |
| |
|
| | |
| | fig.update_xaxes(tickvals=selected_keys, ticktext=selected_values) |
| |
|
| | |
| | fig.update_xaxes(tickformat=".f") |
| | fig.update_yaxes(tickformat=".f") |
| |
|
| | |
| | fig.update_xaxes(showgrid=False) |
| | fig.update_yaxes(showgrid=False) |
| | fig.update_layout( |
| | width=600, |
| | height=600 |
| | ) |
| | st.plotly_chart(fig) |