#Import import streamlit as st import pandas as pd import seaborn as sns import matplotlib.pyplot as plt tips = sns.load_dataset("tips") #Clean data tips['tip_percentage'] = tips['tip'] / tips['total_bill'] * 100 tips_cleaned = tips.drop_duplicates(keep='first') #Titles st.title('What characteristics make a good tipper?') st.subheader("The aim of this page is to enable users to explore whether any of the given characteristics make a good tipper based on the available data") #Sidebar with st.sidebar: st.subheader('filters') both_genders = sorted(tips_cleaned['sex'].unique().tolist()) smoker_all = sorted(tips_cleaned['smoker'].unique().tolist()) selected_gender = st.multiselect('Genders to show', options=both_genders,default=both_genders) selected_smoker = st.multiselect('Smoker status', options=smoker_all, default=smoker_all) feature_options = { "tip percentage": "tip_percentage", "tip amount": "tip" } feature_label = st.selectbox("Feature (y-axis)", list(feature_options.keys())) y_col = feature_options[feature_label] #Filtered data data = tips_cleaned[ (tips_cleaned["sex"].isin(selected_gender)) & (tips_cleaned["smoker"].isin(selected_smoker)) ].dropna(subset=[y_col]) #KPI mean_val = data[y_col].mean() st.metric( label=f'Average {feature_label}', value=f'{mean_val:.2f}' ) #Plot p = sns.relplot(data=data, x='total_bill', y=y_col, hue='sex') p.figure.suptitle("Relationship between total bill and the amount tipped") st.pyplot(p.figure) #More stats median_val = data[y_col].median() count_val = len(data) #Dynamic text if y_col == "tip_percentage": st.markdown( f"Based on the current filters, the average tip percentage is {mean_val:.2f}%, with a median of {median_val:.2f}% across {int(count_val)} bills." ) else: st.markdown( f"Based on the current filters, the average tip amount is {mean_val:.2f} dollars, with a median of {median_val:.2f} dollars across {int(count_val)} bills." )