Spaces:

docster99
/

Project_1_Pizza_Sales_Analysis

Running

App Files Files Community

Project_1_Pizza_Sales_Analysis / app.py

docster99

Upload folder using huggingface_hub

f018b9d verified 3 months ago

raw

history blame contribute delete

5.99 kB

	import streamlit as st
	import seaborn as sns
	import matplotlib.pyplot as plt
	import pandas as pd
	import numpy as np

	# Load data
	@st.cache_data # Cache the data loading to speed up app performance
	def load_data():
	# Load pre-processed data from csv file
	df = pd.read_csv("processed_data.csv") # replace with your dataset

	# Explicitly cast metric columns to numeric types after reading from CSV
	try:
	df['total_price'] = df['total_price'].astype(float)
	df['quantity'] = df['quantity'].astype(int)
	except Exception as e:
	# Fallback for logging if a type conversion fails unexpectedly
	print(f"Error during type casting in app.py: {e}")


	# Ensure order_time is treated as datetime for plots (important since it loses dtype during CSV save/load)
	if 'order_time' in df.columns:
	df['order_time'] = pd.to_datetime(df['order_time'])

	return df

	# Code to create Streamlit app
	def app():
	# Title for the app
	st.title("Pizza Sales Data Analysis Dashboard")

	# Load and cache the data
	df = load_data()


	# Calculate key metrics
	# Write a code snippet to calculate key metrics from the pizza orders dataframe, including the
	# total number of unique orders, total revenue generated, the most popular pizza size, the most
	# frequent pizza category, total pizzas sold

	# Calculate key metrics from dataset
	total_orders = df['order_id'].nunique()
	total_revenue = df['total_price'].sum()
	most_popular_pizza_size = df['pizza_size'].value_counts().idxmax()
	most_frequent_pizza_category = df['pizza_category'].value_counts().idxmax()
	most_popular_pizza_name = df['pizza_name'].value_counts().idxmax()
	total_pizzas_sold = df['quantity'].sum()


	# Sidebar with key metrics
	# Write a code snippet to display key metrics in the sidebar of a Streamlit application.
	# Show the total number of orders, total revenue (formatted as currency), the most popular
	# pizza size, the most popular pizza category, and the total number of pizzas sold
	# using the st.sidebar.metric function.

	# Generate Sidebar on dashboard with key metrics
	st.sidebar.header("Key Metrics")
	st.sidebar.metric("Total Orders", total_orders)
	st.sidebar.metric("Total Revenue", f"${total_revenue:,.2f}")
	st.sidebar.metric("Most Popular Pizza Size", most_popular_pizza_size )
	st.sidebar.metric("Most Popular Pizza Category", most_frequent_pizza_category)
	st.sidebar.metric("Most Popular Pizza Name", most_popular_pizza_name)
	st.sidebar.metric("Total Pizzas Sold", total_pizzas_sold)


	# Plot Configurations for the Reports Dashboard

	plots = [
	{"title": "Top Selling Pizzas (by Quantity)", "type": "bar_sorted", "x": "pizza_name", "y": "quantity", "top": 5},
	{"title": "Quantity of Pizzas Sold by Hour of the Day", "type": "bar", "x": "order_hour", "y": "quantity"},
	{"title": "Quantity of Pizzas Sold by Category and Time of the Day", "type": "count", "x": "pizza_category", "hue": "time_of_day"},
	{"title": "Quantity of Pizzas Sold by Size and Time of the Day", "type": "count", "x": "pizza_size", "hue": "time_of_day"},
	{"title": "Monthly Revenue Trends by Pizza Category", "type": "line", "x": "order_month", "y": "total_price", "hue": "pizza_category", "estimator": "sum", "marker": "o"},
	]

	# Iterate through the plots dictionary above to display them
	for plot in plots:
	st.header(plot["title"])
	fig, ax = plt.subplots(figsize=(10, 6))

	if plot["type"] == "bar_sorted":
	# Bar Plot: Top Selling Pizzas (Custom sorting)
	plot_var = df.groupby(plot["x"])[plot["y"]].sum().reset_index().sort_values(by=plot["y"], ascending=False).head(plot["top"])
	sns.barplot(data=plot_var, x=plot["x"], y=plot["y"], ax=ax, palette="plasma")
	ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha='right')
	plt.tight_layout()

	elif plot["type"] == "bar":
	# Bar Plot: Pizzas sold per hour of day (No sorting)
	hourly_var = df.groupby(plot["x"])[plot["y"]].sum().reset_index()
	sns.barplot(data=hourly_var, x=plot["x"], y=plot["y"], ax=ax, palette="rocket", order=np.arange(24))

	# Set explicit ticks from 0 to 23 for clarity
	ax.set_xticks(np.arange(0, 24, 2)) # Show every other tick for less clutter
	ax.tick_params(axis='x', rotation=0)

	elif plot["type"] == "count":
	# Count Plot: Category or Size breakdown
	sns.countplot(data=df, x=plot["x"], hue=plot["hue"], ax=ax)
	ax.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.) # Move legend outside

	elif plot["type"] == "line":
	# Line Plot: Monthly Revenue Trends
	# Ensure order_month is treated as categorical for the plot if plotting trends is needed.
	sns.lineplot(data=df, x=plot["x"], y=plot["y"], hue=plot["hue"], estimator=plot["estimator"], errorbar=None, marker=plot["marker"], ax=ax)
	ax.set_xticks(df[plot["x"]].unique()) # Ensure ticks are on actual month values


	# Axis and Final Display Cleanup
	ax.set_xlabel(" ".join(plot["x"].split("_")).title())

	if "y" in plot.keys():
	# Customize y-label for revenue plots
	if plot["title"] == "Monthly Revenue Trends by Pizza Category":
	ax.set_ylabel("Total Revenue (USD)")
	elif plot["y"] != "quantity":
	ax.set_ylabel(" ".join(plot["y"].split("_")).title())
	else:
	ax.set_ylabel("Quantity Sold")
	elif plot["type"] == "count":
	ax.set_ylabel("Order Count")

	st.pyplot(fig)
	plt.close(fig) # Close the figure to free up memory


	if __name__ == "__main__":
	app()