xiaotongwu05's picture
Update app.py
010e3f7 verified
import streamlit as st
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
# Load data
def load_data():
df = pd.read_csv("processed_data.csv") # replace with your dataset
return df
# Create Streamlit app
def app():
# Title for the app
st.title("Pizza Sales Data Analysis Dashboard")
df = load_data()
df = pd.DataFrame(df)
# Calculate key metrics
total_orders = df['order_id'].nunique() #Write the appropriate function which can calculate the number of unique values
total_revenue = df['total_price'].sum() #Write a appropriate function which can sum the column
most_popular_size = df['pizza_size'].value_counts().idxmax() #Write a appropriate function which can get the maximum value
most_frequent_category = df['pizza_category'].value_counts().idxmax() #Write a appropriate function which can count of value of each product
total_pizzas_sold = df['quantity'].sum()
# Sidebar with key metrics
st.sidebar.header("Key Metrics")
st.sidebar.metric("Total Orders", f"{total_orders:,}")
st.sidebar.metric("Total Revenue", f"${total_revenue:,.2f}")
st.sidebar.metric("Most Popular Size", most_popular_size)
st.sidebar.metric("Most Popular Category", most_frequent_category)
st.sidebar.metric("Total Pizzas Sold", f"{int(total_pizzas_sold):,}")
plots = [
{"title": "Top 10 Most Popular Pizza Names", "x": "pizza_name", "y": "quantity", "top": 10}, #Write the appropriiate column as per the title given
{"title": "Pizza Categories Ordered by Time of Day", "x": "time_of_day", "hue": "pizza_category"}, #Write the appropriiate column as per the title given
{"title": "Pizza Sizes Ordered by Time of Day", "x": "time_of_day", "hue": "pizza_size"}, #Write the appropriiate column as per the title given
{"title": "Monthly Revenue Trends by Pizza Category", "x": "order_month", "y": "total_price", "hue": "pizza_category", "estimator": "sum", "marker": "o"}, #Write the appropriiate column as per the title given
]
for plot in plots:
st.header(plot["title"])
fig, ax = plt.subplots(figsize=(8, 6))
if "Top 10 Most Popular" in plot["title"]:
# Count order items (same as notebook approach)
data_aux = df[plot["x"]].value_counts().head(plot["top"]).reset_index()
data_aux.columns = [plot["x"], plot["y"]]
bars = ax.bar(data_aux[plot["x"]].values.tolist(), data_aux[plot["y"]].values.tolist())
ax.tick_params(axis='x', rotation=70, labelsize=10)
ax.set_ylabel("Number of order items")
# Add value labels on top of bars with comma formatting
for bar in bars:
height = bar.get_height()
ax.text(bar.get_x() + bar.get_width()/2., height + 0.1,
f'{int(height):,}', ha='center', va='bottom', fontsize=9)
if "by Time of Day" in plot["title"]:
sns.countplot(data=df, x=plot["x"], hue=plot["hue"], ax=ax)
ax.set_ylabel("Number of order items")
# Add value labels on top of bars for countplot with comma formatting
for container in ax.containers:
labels = [f'{int(height):,}' for height in container.datavalues]
ax.bar_label(container, labels=labels, label_type='edge', fontsize=9)
if "Monthly Revenue" in plot["title"]:
sns.lineplot(data=df, x=plot["x"], y=plot["y"], hue=plot["hue"], estimator=plot["estimator"], errorbar=None, marker=plot["marker"], ax=ax)
ax.set_ylabel("Total Revenue ($)")
month_names = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
plt.xticks(ticks=range(1, 13), labels=month_names)
ax.set_xlabel("Order Month")
if "Monthly Revenue" not in plot["title"]:
ax.set_xlabel(" ".join(plot["x"].split("_")).capitalize())
ax.legend(bbox_to_anchor=(1,1))
st.pyplot(fig)
if __name__ == "__main__":
app()