# my_app.py import streamlit as st import pandas as pd import seaborn as sns import matplotlib.pyplot as plt # Load dataset df = sns.load_dataset("tips") df["tip_pct"] = (df["tip"] / df["total_bill"]) * 100 df["tip_pct"] = df["tip_pct"].round(2) # App title st.title("Restaurant Tip Insights") st.markdown("Explore tip percentages by day, gender, and smoker status.") # Sidebar filters day = st.sidebar.selectbox("Select a day", df['day'].unique()) gender = st.sidebar.multiselect("Select gender(s)", df['sex'].unique(), default=df['sex'].unique()) smoker = st.sidebar.multiselect("Select smoker status", df['smoker'].unique(), default=df['smoker'].unique()) # Filter data filtered = df[(df['day'] == day) & (df['sex'].isin(gender)) & (df['smoker'].isin(smoker))] # KPI avg_tip = filtered['tip_pct'].mean() st.metric(label=f"Average Tip % on {day}", value=f"{avg_tip:.2f}%") median_smokers = filtered[filtered["smoker"]=="Yes"]["tip_pct"].median() median_non_smokers = filtered[filtered["smoker"]=="No"]["tip_pct"].median() st.write(f"Median tip % for smokers: {median_smokers:.2f}%") st.write(f"Median tip % for non-smokers: {median_non_smokers:.2f}%") # Plot 1 — Average Tip Percentage by Day avg_by_day = df.groupby("day")["tip_pct"].mean() fig1, ax1 = plt.subplots(figsize=(6,4)) avg_by_day.plot(kind="bar", color="skyblue", ax=ax1) ax1.set_title("Average Tip Percentage by Day") ax1.set_xlabel("Day of Week") ax1.set_ylabel("Average Tip Percentage (%)") ax1.grid(axis="y", linestyle="--", alpha=0.7) st.pyplot(fig1) st.markdown("**Interpretation:** Friday has the highest average tip percentage, Saturday the lowest.") # Plot 2 — Tip Percentage vs Total Bill by Sex fig2, ax2 = plt.subplots(figsize=(8,6)) sns.scatterplot(x="total_bill", y="tip_pct", hue="sex", data=filtered, alpha=0.6, ax=ax2) ax2.set_title("Tip Percentage vs Total Bill by Sex") ax2.set_xlabel("Total Bill") ax2.set_ylabel("Tip Percentage (%)") ax2.grid(True, linestyle="--", alpha=0.5) st.pyplot(fig2) st.markdown("**Interpretation:** Tip % tends to decrease as total bill increases. Some small bills have very high tip percentages.") # Plot 3 — Tip Percentage by Smoking Status fig3, ax3 = plt.subplots() sns.boxplot(x="smoker", y="tip_pct", data=filtered, palette="Set2", ax=ax3) ax3.set_title("Tip Percentage by Smoking Status") ax3.set_xlabel("Smoker Status") ax3.set_ylabel("Tip Percentage (%)") ax3.grid(axis="y", linestyle="--", alpha=0.7) st.pyplot(fig3) st.markdown("**Interpretation:** Smoking status does not largely impact tip %, but some smokers tip higher.")