Spaces:
Sleeping
Sleeping
| # my_app.py | |
| import streamlit as st | |
| import pandas as pd | |
| import seaborn as sns | |
| import matplotlib.pyplot as plt | |
| # Load dataset | |
| # Load dataset from CSV | |
| df = pd.read_csv("tips.csv") | |
| # App title | |
| st.title("Restaurant Tip Insights") | |
| st.markdown("How do day, gender, and smoking status affect tip percentages in a restaurant?.") | |
| # Sidebar filters | |
| day = st.sidebar.selectbox("Select a day", df['day'].unique()) | |
| gender = st.sidebar.multiselect("Select gender(s)", df['sex'].unique(), default=df['sex'].unique()) | |
| smoker = st.sidebar.multiselect("Select smoker status", df['smoker'].unique(), default=df['smoker'].unique()) | |
| # Filter data | |
| filtered = df[(df['day'] == day) & (df['sex'].isin(gender)) & (df['smoker'].isin(smoker))] | |
| # KPI | |
| avg_tip = filtered['tip_pct'].mean() | |
| st.metric(label=f"Average Tip % on {day}", value=f"{avg_tip:.2f}%") | |
| # Plot 1 β Average Tip Percentage by Day | |
| avg_by_day = df.groupby("day")["tip_pct"].mean() | |
| fig1, ax1 = plt.subplots(figsize=(6,4)) | |
| avg_by_day.plot(kind="bar", color="skyblue", ax=ax1) | |
| ax1.set_title("Average Tip Percentage by Day") | |
| ax1.set_xlabel("Day of Week") | |
| ax1.set_ylabel("Average Tip Percentage (%)") | |
| ax1.grid(axis="y", linestyle="--", alpha=0.7) | |
| st.pyplot(fig1) | |
| st.markdown("**Interpretation:** This bar chart shows the average tip percentage for each day of the week present in the dataset (Thursday, Friday, Saturday, and Sunday)." \ | |
| " Friday has the highest average tip percentage, while Saturday has the lowest." \ | |
| " Thursday and Sunday have similar average tip percentages, falling between Friday and Saturday.") | |
| # Plot 2 β Tip Percentage vs Total Bill by Sex | |
| fig2, ax2 = plt.subplots(figsize=(8,6)) | |
| sns.scatterplot(x="total_bill", y="tip_pct", hue="sex", data=filtered, alpha=0.6, ax=ax2) | |
| ax2.set_title("Tip Percentage vs Total Bill by Sex") | |
| ax2.set_xlabel("Total Bill") | |
| ax2.set_ylabel("Tip Percentage (%)") | |
| ax2.grid(True, linestyle="--", alpha=0.5) | |
| st.pyplot(fig2) | |
| # --- Calculate dynamic interpretation --- | |
| avg_tip_male = filtered[filtered["sex"]=="Male"]["tip_pct"].mean() | |
| avg_tip_female = filtered[filtered["sex"]=="Female"]["tip_pct"].mean() | |
| st.markdown( | |
| f"**Interpretation:** For the selected filters, the average tip percentage for males is {avg_tip_male:.2f}% " | |
| f"and for females is {avg_tip_female:.2f}%. " | |
| "The scatter plot shows that as the total bill increases, tip percentage tends to decrease, " | |
| "although there are some cases where smaller bills have higher tip percentages." | |
| ) | |
| # Plot 3 β Tip Percentage by Smoking Status | |
| fig3, ax3 = plt.subplots(figsize=(6,4)) # match Colab size | |
| sns.set_style("whitegrid") # match Colab style | |
| # Use filtered data from sidebar | |
| sns.boxplot(x="smoker", y="tip_pct", data=filtered, palette="Set2", ax=ax3) | |
| # Titles and labels | |
| ax3.set_title("Tip Percentage by Smoking Status") | |
| ax3.set_xlabel("Smoker Status") | |
| ax3.set_ylabel("Tip Percentage (%)") | |
| ax3.grid(axis="y", linestyle="--", alpha=0.7) | |
| # Display plot in Streamlit | |
| st.pyplot(fig3) | |
| # --- Calculate medians dynamically --- | |
| median_smokers = filtered[filtered["smoker"]=="Yes"]["tip_pct"].median() | |
| median_non_smokers = filtered[filtered["smoker"]=="No"]["tip_pct"].median() | |
| # --- Display interpretation dynamically --- | |
| st.markdown( | |
| f"**Interpretation:** The median tip percentage for smokers is {median_smokers:.2f}%, " | |
| f"and for non-smokers it is {median_non_smokers:.2f}%. " | |
| "The box plot suggests that smoking status does not have a large impact on the average tip percentage. " | |
| "However, there are some instances where smokers gave significantly higher tips." | |
| ) | |