Muhammad Ibrahim commited on
Commit
7c2134f
Β·
verified Β·
1 Parent(s): baae375

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +111 -111
app.py CHANGED
@@ -1,112 +1,112 @@
1
- import streamlit as st
2
- import pandas as pd
3
- import seaborn as sns
4
- import duckdb # For in-memory SQL Quering
5
- import matplotlib.pyplot as plt
6
-
7
- st.set_page_config(page_title="Tips EDA & Insights", layout="wide")
8
-
9
- # --- Using App Header ---
10
- st.title("πŸ’Έ Tipping Behavior Analyzer")
11
- st.markdown("""
12
- Welcome to the interactive explorer for the Seaborn Tips dataset!
13
- Analyze tipping behavior by gender, day, and party size.
14
- """)
15
-
16
- # --- Data Loading & Caching ---
17
- @st.cache_data(show_spinner=True)
18
- def load_data():
19
- tips = sns.load_dataset("tips")
20
- tips['tip_pct'] = (tips['tip'] / tips['total_bill']) * 100
21
- tips.to_parquet("tips.parquet")
22
- return tips
23
-
24
- tips = load_data()
25
-
26
- # --- DuckDB Query Function ---
27
- @st.cache_data(show_spinner=True)
28
- def query_duckdb(gender, day):
29
- query = f"""
30
- SELECT * FROM 'tips.parquet'
31
- WHERE sex = '{gender}' AND day = '{day}'
32
- """
33
- return duckdb.query(query).to_df()
34
-
35
- # --- Sidebar Controls ---
36
- st.sidebar.header("πŸ”Ž Filter Data")
37
- gender = st.sidebar.selectbox("Select Gender", options=tips['sex'].unique())
38
- day = st.sidebar.selectbox("Select Day", options=tips['day'].unique())
39
- party_size = st.sidebar.slider("Party Size", int(tips['size'].min()), int(tips['size'].max()), int(tips['size'].min()))
40
- hue_option = st.sidebar.selectbox("Color by (hue)", options=['smoker', 'time', 'day', 'sex'])
41
-
42
- filtered = query_duckdb(gender, day)
43
- filtered = filtered[filtered['size'] == party_size]
44
-
45
- # --- KPI Section ---
46
- st.subheader("πŸ“Š Key Performance Indicator")
47
- mean_tip = filtered['tip_pct'].mean()
48
- st.metric("Average Tip Percentage", f"{mean_tip:.2f}%")
49
-
50
- # --- Data Table ---
51
- st.subheader("πŸ—ƒοΈ Filtered Data")
52
- st.dataframe(filtered)
53
-
54
- # --- Visualizations ---
55
- st.subheader("πŸ“ˆ Visualizations")
56
-
57
- col1, col2 = st.columns(2)
58
-
59
- with col1:
60
- st.markdown(f"#### Tip Percentage Distribution by Gender")
61
- fig1, ax1 = plt.subplots()
62
- sns.boxplot(data=tips, x="sex", y="tip_pct", hue=hue_option, ax=ax1)
63
- ax1.set_title(f"Tip Percentage Distribution by Gender (Hue: {hue_option})")
64
- st.pyplot(fig1)
65
-
66
- with col2:
67
- st.markdown(f"#### Tip Percentage vs. Party Size")
68
- fig2, ax2 = plt.subplots()
69
- sns.scatterplot(data=tips, x="size", y="tip_pct", hue=hue_option, ax=ax2)
70
- ax2.set_title(f"Tip Percentage vs. Party Size (Hue: {hue_option})")
71
- st.pyplot(fig2)
72
-
73
- st.markdown("#### Average Tip Percentage by Day")
74
- mean_tip_by_day = tips.groupby('day')['tip_pct'].mean()
75
- st.bar_chart(mean_tip_by_day)
76
-
77
- # --- Dynamic Insight ---
78
- st.subheader("πŸ’‘ Insight")
79
- st.write(
80
- f"On **{day}s**, for **{gender}** customers in a party of size **{party_size}**, "
81
- f"the average tip percentage is **{mean_tip:.2f}%**."
82
- )
83
-
84
- # --- Cache Invalidation Patterns ---
85
- # ...existing code...
86
-
87
- # --- Authors & Plot Explanations ---
88
- st.markdown("---")
89
- st.header("πŸ‘¨β€πŸ’» Project Contributors")
90
- st.markdown("""
91
- **Muhammad Ibrahim**
92
- **Asalun Hye Arnob**
93
-
94
- ---
95
-
96
- ### πŸ“Š Plot Explanations
97
-
98
- - **Tip Percentage Distribution by Gender (Box Plot):**
99
- This plot shows how tip percentages vary between male and female customers. The box represents the middle 50% of values, the line inside is the median, and dots outside the box are outliers. It helps us compare tipping habits by gender.
100
-
101
- - **Tip Percentage vs. Party Size (Scatter Plot):**
102
- This plot displays individual tip percentages for each party size. Each dot is a meal. It helps us see if larger groups tend to tip more or less, and spot any patterns or clusters.
103
-
104
- - **Average Tip Percentage by Day (Bar Chart):**
105
- This chart shows the average tip percentage for each day of the week. It helps us identify which days have higher or lower tipping rates.
106
-
107
- ---
108
-
109
- **Summary:**
110
- We loaded and cleaned the tips dataset, created a tip percentage variable, and built interactive visualizations to explore how tipping behavior varies by gender, day, and party size.
111
- Our app uses DuckDB for fast queries and Streamlit for a user-friendly interface.
112
  """)
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import seaborn as sns
4
+ import duckdb # For in-memory SQL Quering
5
+ import matplotlib.pyplot as plt
6
+
7
+ st.set_page_config(page_title="Tips EDA & Insights", layout="wide")
8
+
9
+ # --- Using App Header ---
10
+ st.title("πŸ’Έ Tipping Behavior Analyzer")
11
+ st.markdown("""
12
+ Welcome to the interactive explorer for the Seaborn Tips dataset!
13
+ Analyze tipping behavior by gender, day, and party size.
14
+ """)
15
+
16
+ # --- Data Loading & Caching ---
17
+ @st.cache_data(show_spinner=True)
18
+ def load_data():
19
+ tips = sns.load_dataset("tips")
20
+ tips['tip_pct'] = (tips['tip'] / tips['total_bill']) * 100
21
+ tips.to_parquet("tips.parquet")
22
+ return tips
23
+
24
+ tips = load_data()
25
+
26
+ # --- DuckDB Query Function ---
27
+ @st.cache_data(show_spinner=True)
28
+ def query_duckdb(gender, day):
29
+ query = f"""
30
+ SELECT * FROM 'tips.parquet'
31
+ WHERE sex = '{gender}' AND day = '{day}'
32
+ """
33
+ return duckdb.query(query).to_df()
34
+
35
+ # --- Sidebar Controls ---
36
+ st.sidebar.header("πŸ”Ž Filter Data")
37
+ gender = st.sidebar.selectbox("Select Gender", options=tips['sex'].unique())
38
+ day = st.sidebar.selectbox("Select Day", options=tips['day'].unique())
39
+ party_size = st.sidebar.slider("Party Size", int(tips['size'].min()), int(tips['size'].max()), int(tips['size'].min()))
40
+ hue_option = st.sidebar.selectbox("Color by (hue)", options=['smoker', 'time', 'day', 'sex'])
41
+
42
+ filtered = query_duckdb(gender, day)
43
+ filtered = filtered[filtered['size'] == party_size]
44
+
45
+ # --- KPI Section ---
46
+ st.subheader("πŸ“Š Key Performance Indicator")
47
+ mean_tip = filtered['tip_pct'].mean()
48
+ st.metric("Average Tip Percentage", f"{mean_tip:.2f}%")
49
+
50
+ # --- Data Table ---
51
+ st.subheader("πŸ—ƒοΈ Filtered Data")
52
+ st.dataframe(filtered)
53
+
54
+ # --- Visualizations ---
55
+ st.subheader("πŸ“ˆ Visualizations")
56
+
57
+ col1, col2 = st.columns(2)
58
+
59
+ with col1:
60
+ st.markdown(f"#### Tip Percentage Distribution by Gender")
61
+ fig1, ax1 = plt.subplots()
62
+ sns.boxplot(data=tips, x="sex", y="tip_pct", hue=hue_option, ax=ax1)
63
+ ax1.set_title(f"Tip Percentage Distribution by Gender (Hue: {hue_option})")
64
+ st.pyplot(fig1)
65
+
66
+ with col2:
67
+ st.markdown(f"#### Tip Percentage vs. Party Size")
68
+ fig2, ax2 = plt.subplots()
69
+ sns.scatterplot(data=tips, x="size", y="tip_pct", hue=hue_option, ax=ax2)
70
+ ax2.set_title(f"Tip Percentage vs. Party Size (Hue: {hue_option})")
71
+ st.pyplot(fig2)
72
+
73
+ st.markdown("#### Average Tip Percentage by Day")
74
+ mean_tip_by_day = tips.groupby('day')['tip_pct'].mean()
75
+ st.bar_chart(mean_tip_by_day)
76
+
77
+ # --- Dynamic Insight ---
78
+ st.subheader("πŸ’‘ Insight")
79
+ st.write(
80
+ f"On **{day}s**, for **{gender}** customers in a party of size **{party_size}**, "
81
+ f"the average tip percentage is **{mean_tip:.2f}%**."
82
+ )
83
+
84
+ # --- Cache Invalidation Patterns ---
85
+ # ...existing code...
86
+
87
+ # --- Authors & Plot Explanations ---
88
+ st.markdown("---")
89
+ st.header("πŸ‘¨β€πŸ’» Project Contributors")
90
+ st.markdown("""
91
+ **Muhammad Ibrahim**
92
+ **Asalun Hye Arnob**
93
+
94
+ ---
95
+
96
+ ### πŸ“Š Plot Explanations
97
+
98
+ - **Tip Percentage Distribution by Gender (Box Plot):**
99
+ This plot shows how tip percentages vary between male and female customers. The box represents the middle 50% of values, the line inside is the median, and dots outside the box are outliers. It helps us compare tipping habits by gender.
100
+
101
+ - **Tip Percentage vs. Party Size (Scatter Plot):**
102
+ This plot displays individual tip percentages for each party size. Each dot is a meal. It helps us see if larger groups tend to tip more or less, and spot any patterns or clusters.
103
+
104
+ - **Average Tip Percentage by Day (Bar Chart):**
105
+ This chart shows the average tip percentage for each day of the week. It helps us identify which days have higher or lower tipping rates.
106
+
107
+ ---
108
+
109
+ **Project Summary:**
110
+ We loaded and cleaned the tips dataset, created a tip percentage variable, and built interactive visualizations to explore how tipping behavior varies by gender, day, and party size.
111
+ Our app uses DuckDB for fast queries and Streamlit for a user-friendly interface.
112
  """)