docster99's picture
Upload folder using huggingface_hub
f018b9d verified
import streamlit as st
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
# Load data
@st.cache_data # Cache the data loading to speed up app performance
def load_data():
# Load pre-processed data from csv file
df = pd.read_csv("processed_data.csv") # replace with your dataset
# Explicitly cast metric columns to numeric types after reading from CSV
try:
df['total_price'] = df['total_price'].astype(float)
df['quantity'] = df['quantity'].astype(int)
except Exception as e:
# Fallback for logging if a type conversion fails unexpectedly
print(f"Error during type casting in app.py: {e}")
# Ensure order_time is treated as datetime for plots (important since it loses dtype during CSV save/load)
if 'order_time' in df.columns:
df['order_time'] = pd.to_datetime(df['order_time'])
return df
# Code to create Streamlit app
def app():
# Title for the app
st.title("Pizza Sales Data Analysis Dashboard")
# Load and cache the data
df = load_data()
# Calculate key metrics
# Write a code snippet to calculate key metrics from the pizza orders dataframe, including the
# total number of unique orders, total revenue generated, the most popular pizza size, the most
# frequent pizza category, total pizzas sold
# Calculate key metrics from dataset
total_orders = df['order_id'].nunique()
total_revenue = df['total_price'].sum()
most_popular_pizza_size = df['pizza_size'].value_counts().idxmax()
most_frequent_pizza_category = df['pizza_category'].value_counts().idxmax()
most_popular_pizza_name = df['pizza_name'].value_counts().idxmax()
total_pizzas_sold = df['quantity'].sum()
# Sidebar with key metrics
# Write a code snippet to display key metrics in the sidebar of a Streamlit application.
# Show the total number of orders, total revenue (formatted as currency), the most popular
# pizza size, the most popular pizza category, and the total number of pizzas sold
# using the st.sidebar.metric function.
# Generate Sidebar on dashboard with key metrics
st.sidebar.header("Key Metrics")
st.sidebar.metric("Total Orders", total_orders)
st.sidebar.metric("Total Revenue", f"${total_revenue:,.2f}")
st.sidebar.metric("Most Popular Pizza Size", most_popular_pizza_size )
st.sidebar.metric("Most Popular Pizza Category", most_frequent_pizza_category)
st.sidebar.metric("Most Popular Pizza Name", most_popular_pizza_name)
st.sidebar.metric("Total Pizzas Sold", total_pizzas_sold)
# Plot Configurations for the Reports Dashboard
plots = [
{"title": "Top Selling Pizzas (by Quantity)", "type": "bar_sorted", "x": "pizza_name", "y": "quantity", "top": 5},
{"title": "Quantity of Pizzas Sold by Hour of the Day", "type": "bar", "x": "order_hour", "y": "quantity"},
{"title": "Quantity of Pizzas Sold by Category and Time of the Day", "type": "count", "x": "pizza_category", "hue": "time_of_day"},
{"title": "Quantity of Pizzas Sold by Size and Time of the Day", "type": "count", "x": "pizza_size", "hue": "time_of_day"},
{"title": "Monthly Revenue Trends by Pizza Category", "type": "line", "x": "order_month", "y": "total_price", "hue": "pizza_category", "estimator": "sum", "marker": "o"},
]
# Iterate through the plots dictionary above to display them
for plot in plots:
st.header(plot["title"])
fig, ax = plt.subplots(figsize=(10, 6))
if plot["type"] == "bar_sorted":
# Bar Plot: Top Selling Pizzas (Custom sorting)
plot_var = df.groupby(plot["x"])[plot["y"]].sum().reset_index().sort_values(by=plot["y"], ascending=False).head(plot["top"])
sns.barplot(data=plot_var, x=plot["x"], y=plot["y"], ax=ax, palette="plasma")
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha='right')
plt.tight_layout()
elif plot["type"] == "bar":
# Bar Plot: Pizzas sold per hour of day (No sorting)
hourly_var = df.groupby(plot["x"])[plot["y"]].sum().reset_index()
sns.barplot(data=hourly_var, x=plot["x"], y=plot["y"], ax=ax, palette="rocket", order=np.arange(24))
# Set explicit ticks from 0 to 23 for clarity
ax.set_xticks(np.arange(0, 24, 2)) # Show every other tick for less clutter
ax.tick_params(axis='x', rotation=0)
elif plot["type"] == "count":
# Count Plot: Category or Size breakdown
sns.countplot(data=df, x=plot["x"], hue=plot["hue"], ax=ax)
ax.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.) # Move legend outside
elif plot["type"] == "line":
# Line Plot: Monthly Revenue Trends
# Ensure order_month is treated as categorical for the plot if plotting trends is needed.
sns.lineplot(data=df, x=plot["x"], y=plot["y"], hue=plot["hue"], estimator=plot["estimator"], errorbar=None, marker=plot["marker"], ax=ax)
ax.set_xticks(df[plot["x"]].unique()) # Ensure ticks are on actual month values
# Axis and Final Display Cleanup
ax.set_xlabel(" ".join(plot["x"].split("_")).title())
if "y" in plot.keys():
# Customize y-label for revenue plots
if plot["title"] == "Monthly Revenue Trends by Pizza Category":
ax.set_ylabel("Total Revenue (USD)")
elif plot["y"] != "quantity":
ax.set_ylabel(" ".join(plot["y"].split("_")).title())
else:
ax.set_ylabel("Quantity Sold")
elif plot["type"] == "count":
ax.set_ylabel("Order Count")
st.pyplot(fig)
plt.close(fig) # Close the figure to free up memory
if __name__ == "__main__":
app()