File size: 3,969 Bytes
7c2134f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
baae375
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
import streamlit as st
import pandas as pd
import seaborn as sns
import duckdb # For in-memory SQL Quering
import matplotlib.pyplot as plt

st.set_page_config(page_title="Tips EDA & Insights", layout="wide")

# --- Using App Header ---
st.title("πŸ’Έ Tipping Behavior Analyzer")
st.markdown("""
Welcome to the interactive explorer for the Seaborn Tips dataset!  
Analyze tipping behavior by gender, day, and party size.  
""")

# --- Data Loading & Caching ---
@st.cache_data(show_spinner=True)
def load_data():
    tips = sns.load_dataset("tips")
    tips['tip_pct'] = (tips['tip'] / tips['total_bill']) * 100
    tips.to_parquet("tips.parquet")
    return tips

tips = load_data()

# --- DuckDB Query Function ---
@st.cache_data(show_spinner=True)
def query_duckdb(gender, day):
    query = f"""
        SELECT * FROM 'tips.parquet'
        WHERE sex = '{gender}' AND day = '{day}'
    """
    return duckdb.query(query).to_df()

# --- Sidebar Controls ---
st.sidebar.header("πŸ”Ž Filter Data")
gender = st.sidebar.selectbox("Select Gender", options=tips['sex'].unique())
day = st.sidebar.selectbox("Select Day", options=tips['day'].unique())
party_size = st.sidebar.slider("Party Size", int(tips['size'].min()), int(tips['size'].max()), int(tips['size'].min()))
hue_option = st.sidebar.selectbox("Color by (hue)", options=['smoker', 'time', 'day', 'sex'])

filtered = query_duckdb(gender, day)
filtered = filtered[filtered['size'] == party_size]

# --- KPI Section ---
st.subheader("πŸ“Š Key Performance Indicator")
mean_tip = filtered['tip_pct'].mean()
st.metric("Average Tip Percentage", f"{mean_tip:.2f}%")

# --- Data Table ---
st.subheader("πŸ—ƒοΈ Filtered Data")
st.dataframe(filtered)

# --- Visualizations ---
st.subheader("πŸ“ˆ Visualizations")

col1, col2 = st.columns(2)

with col1:
    st.markdown(f"#### Tip Percentage Distribution by Gender")
    fig1, ax1 = plt.subplots()
    sns.boxplot(data=tips, x="sex", y="tip_pct", hue=hue_option, ax=ax1)
    ax1.set_title(f"Tip Percentage Distribution by Gender (Hue: {hue_option})")
    st.pyplot(fig1)

with col2:
    st.markdown(f"#### Tip Percentage vs. Party Size")
    fig2, ax2 = plt.subplots()
    sns.scatterplot(data=tips, x="size", y="tip_pct", hue=hue_option, ax=ax2)
    ax2.set_title(f"Tip Percentage vs. Party Size (Hue: {hue_option})")
    st.pyplot(fig2)

st.markdown("#### Average Tip Percentage by Day")
mean_tip_by_day = tips.groupby('day')['tip_pct'].mean()
st.bar_chart(mean_tip_by_day)

# --- Dynamic Insight ---
st.subheader("πŸ’‘ Insight")
st.write(
    f"On **{day}s**, for **{gender}** customers in a party of size **{party_size}**, "
    f"the average tip percentage is **{mean_tip:.2f}%**."
)

# --- Cache Invalidation Patterns ---
# ...existing code...

# --- Authors & Plot Explanations ---
st.markdown("---")
st.header("πŸ‘¨β€πŸ’» Project Contributors")
st.markdown("""
**Muhammad Ibrahim**  
**Asalun Hye Arnob**

---

### πŸ“Š Plot Explanations

- **Tip Percentage Distribution by Gender (Box Plot):**  
  This plot shows how tip percentages vary between male and female customers. The box represents the middle 50% of values, the line inside is the median, and dots outside the box are outliers. It helps us compare tipping habits by gender.

- **Tip Percentage vs. Party Size (Scatter Plot):**  
  This plot displays individual tip percentages for each party size. Each dot is a meal. It helps us see if larger groups tend to tip more or less, and spot any patterns or clusters.

- **Average Tip Percentage by Day (Bar Chart):**  
  This chart shows the average tip percentage for each day of the week. It helps us identify which days have higher or lower tipping rates.

---

**Project Summary:**  
We loaded and cleaned the tips dataset, created a tip percentage variable, and built interactive visualizations to explore how tipping behavior varies by gender, day, and party size.  
Our app uses DuckDB for fast queries and Streamlit for a user-friendly interface.
""")