# my_app.py import streamlit as st import pandas as pd import seaborn as sns import matplotlib.pyplot as plt # Load dataset # Load dataset from CSV df = pd.read_csv("tips.csv") # App title st.title("Restaurant Tip Insights") st.markdown("How do day, gender, and smoking status affect tip percentages in a restaurant?.") # Sidebar filters day = st.sidebar.selectbox("Select a day", df['day'].unique()) gender = st.sidebar.multiselect("Select gender(s)", df['sex'].unique(), default=df['sex'].unique()) smoker = st.sidebar.multiselect("Select smoker status", df['smoker'].unique(), default=df['smoker'].unique()) # Filter data filtered = df[(df['day'] == day) & (df['sex'].isin(gender)) & (df['smoker'].isin(smoker))] # KPI avg_tip = filtered['tip_pct'].mean() st.metric(label=f"Average Tip % on {day}", value=f"{avg_tip:.2f}%") # Plot 1 — Average Tip Percentage by Day avg_by_day = df.groupby("day")["tip_pct"].mean() fig1, ax1 = plt.subplots(figsize=(6,4)) avg_by_day.plot(kind="bar", color="skyblue", ax=ax1) ax1.set_title("Average Tip Percentage by Day") ax1.set_xlabel("Day of Week") ax1.set_ylabel("Average Tip Percentage (%)") ax1.grid(axis="y", linestyle="--", alpha=0.7) st.pyplot(fig1) st.markdown("**Interpretation:** This bar chart shows the average tip percentage for each day of the week present in the dataset (Thursday, Friday, Saturday, and Sunday)." \ " Friday has the highest average tip percentage, while Saturday has the lowest." \ " Thursday and Sunday have similar average tip percentages, falling between Friday and Saturday.") # Plot 2 — Tip Percentage vs Total Bill by Sex fig2, ax2 = plt.subplots(figsize=(8,6)) sns.scatterplot(x="total_bill", y="tip_pct", hue="sex", data=filtered, alpha=0.6, ax=ax2) ax2.set_title("Tip Percentage vs Total Bill by Sex") ax2.set_xlabel("Total Bill") ax2.set_ylabel("Tip Percentage (%)") ax2.grid(True, linestyle="--", alpha=0.5) st.pyplot(fig2) # --- Calculate dynamic interpretation --- avg_tip_male = filtered[filtered["sex"]=="Male"]["tip_pct"].mean() avg_tip_female = filtered[filtered["sex"]=="Female"]["tip_pct"].mean() st.markdown( f"**Interpretation:** For the selected filters, the average tip percentage for males is {avg_tip_male:.2f}% " f"and for females is {avg_tip_female:.2f}%. " "The scatter plot shows that as the total bill increases, tip percentage tends to decrease, " "although there are some cases where smaller bills have higher tip percentages." ) # Plot 3 — Tip Percentage by Smoking Status fig3, ax3 = plt.subplots(figsize=(6,4)) # match Colab size sns.set_style("whitegrid") # match Colab style # Use filtered data from sidebar sns.boxplot(x="smoker", y="tip_pct", data=filtered, palette="Set2", ax=ax3) # Titles and labels ax3.set_title("Tip Percentage by Smoking Status") ax3.set_xlabel("Smoker Status") ax3.set_ylabel("Tip Percentage (%)") ax3.grid(axis="y", linestyle="--", alpha=0.7) # Display plot in Streamlit st.pyplot(fig3) # --- Calculate medians dynamically --- median_smokers = filtered[filtered["smoker"]=="Yes"]["tip_pct"].median() median_non_smokers = filtered[filtered["smoker"]=="No"]["tip_pct"].median() # --- Display interpretation dynamically --- st.markdown( f"**Interpretation:** The median tip percentage for smokers is {median_smokers:.2f}%, " f"and for non-smokers it is {median_non_smokers:.2f}%. " "The box plot suggests that smoking status does not have a large impact on the average tip percentage. " "However, there are some instances where smokers gave significantly higher tips." )