File size: 5,986 Bytes
04d791c f018b9d 04d791c 2cb69f2 04d791c 2cb69f2 e2d2837 2cb69f2 75582a8 2cb69f2 04d791c 9cf3b68 04d791c e2d2837 2cb69f2 04d791c e2d2837 2cb69f2 e2d2837 75582a8 e2d2837 04d791c e2d2837 2cb69f2 04d791c e2d2837 75582a8 e2d2837 2cb69f2 04d791c 4aa8a1c 2cb69f2 04d791c 2cb69f2 04d791c 2cb69f2 4aa8a1c 3395ccd 4aa8a1c 3395ccd 4aa8a1c 2cb69f2 75582a8 2cb69f2 04d791c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 |
import streamlit as st
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
# Load data
@st.cache_data # Cache the data loading to speed up app performance
def load_data():
# Load pre-processed data from csv file
df = pd.read_csv("processed_data.csv") # replace with your dataset
# Explicitly cast metric columns to numeric types after reading from CSV
try:
df['total_price'] = df['total_price'].astype(float)
df['quantity'] = df['quantity'].astype(int)
except Exception as e:
# Fallback for logging if a type conversion fails unexpectedly
print(f"Error during type casting in app.py: {e}")
# Ensure order_time is treated as datetime for plots (important since it loses dtype during CSV save/load)
if 'order_time' in df.columns:
df['order_time'] = pd.to_datetime(df['order_time'])
return df
# Code to create Streamlit app
def app():
# Title for the app
st.title("Pizza Sales Data Analysis Dashboard")
# Load and cache the data
df = load_data()
# Calculate key metrics
# Write a code snippet to calculate key metrics from the pizza orders dataframe, including the
# total number of unique orders, total revenue generated, the most popular pizza size, the most
# frequent pizza category, total pizzas sold
# Calculate key metrics from dataset
total_orders = df['order_id'].nunique()
total_revenue = df['total_price'].sum()
most_popular_pizza_size = df['pizza_size'].value_counts().idxmax()
most_frequent_pizza_category = df['pizza_category'].value_counts().idxmax()
most_popular_pizza_name = df['pizza_name'].value_counts().idxmax()
total_pizzas_sold = df['quantity'].sum()
# Sidebar with key metrics
# Write a code snippet to display key metrics in the sidebar of a Streamlit application.
# Show the total number of orders, total revenue (formatted as currency), the most popular
# pizza size, the most popular pizza category, and the total number of pizzas sold
# using the st.sidebar.metric function.
# Generate Sidebar on dashboard with key metrics
st.sidebar.header("Key Metrics")
st.sidebar.metric("Total Orders", total_orders)
st.sidebar.metric("Total Revenue", f"${total_revenue:,.2f}")
st.sidebar.metric("Most Popular Pizza Size", most_popular_pizza_size )
st.sidebar.metric("Most Popular Pizza Category", most_frequent_pizza_category)
st.sidebar.metric("Most Popular Pizza Name", most_popular_pizza_name)
st.sidebar.metric("Total Pizzas Sold", total_pizzas_sold)
# Plot Configurations for the Reports Dashboard
plots = [
{"title": "Top Selling Pizzas (by Quantity)", "type": "bar_sorted", "x": "pizza_name", "y": "quantity", "top": 5},
{"title": "Quantity of Pizzas Sold by Hour of the Day", "type": "bar", "x": "order_hour", "y": "quantity"},
{"title": "Quantity of Pizzas Sold by Category and Time of the Day", "type": "count", "x": "pizza_category", "hue": "time_of_day"},
{"title": "Quantity of Pizzas Sold by Size and Time of the Day", "type": "count", "x": "pizza_size", "hue": "time_of_day"},
{"title": "Monthly Revenue Trends by Pizza Category", "type": "line", "x": "order_month", "y": "total_price", "hue": "pizza_category", "estimator": "sum", "marker": "o"},
]
# Iterate through the plots dictionary above to display them
for plot in plots:
st.header(plot["title"])
fig, ax = plt.subplots(figsize=(10, 6))
if plot["type"] == "bar_sorted":
# Bar Plot: Top Selling Pizzas (Custom sorting)
plot_var = df.groupby(plot["x"])[plot["y"]].sum().reset_index().sort_values(by=plot["y"], ascending=False).head(plot["top"])
sns.barplot(data=plot_var, x=plot["x"], y=plot["y"], ax=ax, palette="plasma")
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha='right')
plt.tight_layout()
elif plot["type"] == "bar":
# Bar Plot: Pizzas sold per hour of day (No sorting)
hourly_var = df.groupby(plot["x"])[plot["y"]].sum().reset_index()
sns.barplot(data=hourly_var, x=plot["x"], y=plot["y"], ax=ax, palette="rocket", order=np.arange(24))
# Set explicit ticks from 0 to 23 for clarity
ax.set_xticks(np.arange(0, 24, 2)) # Show every other tick for less clutter
ax.tick_params(axis='x', rotation=0)
elif plot["type"] == "count":
# Count Plot: Category or Size breakdown
sns.countplot(data=df, x=plot["x"], hue=plot["hue"], ax=ax)
ax.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.) # Move legend outside
elif plot["type"] == "line":
# Line Plot: Monthly Revenue Trends
# Ensure order_month is treated as categorical for the plot if plotting trends is needed.
sns.lineplot(data=df, x=plot["x"], y=plot["y"], hue=plot["hue"], estimator=plot["estimator"], errorbar=None, marker=plot["marker"], ax=ax)
ax.set_xticks(df[plot["x"]].unique()) # Ensure ticks are on actual month values
# Axis and Final Display Cleanup
ax.set_xlabel(" ".join(plot["x"].split("_")).title())
if "y" in plot.keys():
# Customize y-label for revenue plots
if plot["title"] == "Monthly Revenue Trends by Pizza Category":
ax.set_ylabel("Total Revenue (USD)")
elif plot["y"] != "quantity":
ax.set_ylabel(" ".join(plot["y"].split("_")).title())
else:
ax.set_ylabel("Quantity Sold")
elif plot["type"] == "count":
ax.set_ylabel("Order Count")
st.pyplot(fig)
plt.close(fig) # Close the figure to free up memory
if __name__ == "__main__":
app()
|