import streamlit as st import seaborn as sns import matplotlib.pyplot as plt import pandas as pd import numpy as np # Load data @st.cache_data # Cache the data loading to speed up app performance def load_data(): # Load pre-processed data from csv file df = pd.read_csv("processed_data.csv") # replace with your dataset # Explicitly cast metric columns to numeric types after reading from CSV try: df['total_price'] = df['total_price'].astype(float) df['quantity'] = df['quantity'].astype(int) except Exception as e: # Fallback for logging if a type conversion fails unexpectedly print(f"Error during type casting in app.py: {e}") # Ensure order_time is treated as datetime for plots (important since it loses dtype during CSV save/load) if 'order_time' in df.columns: df['order_time'] = pd.to_datetime(df['order_time']) return df # Code to create Streamlit app def app(): # Title for the app st.title("Pizza Sales Data Analysis Dashboard") # Load and cache the data df = load_data() # Calculate key metrics # Write a code snippet to calculate key metrics from the pizza orders dataframe, including the # total number of unique orders, total revenue generated, the most popular pizza size, the most # frequent pizza category, total pizzas sold # Calculate key metrics from dataset total_orders = df['order_id'].nunique() total_revenue = df['total_price'].sum() most_popular_pizza_size = df['pizza_size'].value_counts().idxmax() most_frequent_pizza_category = df['pizza_category'].value_counts().idxmax() most_popular_pizza_name = df['pizza_name'].value_counts().idxmax() total_pizzas_sold = df['quantity'].sum() # Sidebar with key metrics # Write a code snippet to display key metrics in the sidebar of a Streamlit application. # Show the total number of orders, total revenue (formatted as currency), the most popular # pizza size, the most popular pizza category, and the total number of pizzas sold # using the st.sidebar.metric function. # Generate Sidebar on dashboard with key metrics st.sidebar.header("Key Metrics") st.sidebar.metric("Total Orders", total_orders) st.sidebar.metric("Total Revenue", f"${total_revenue:,.2f}") st.sidebar.metric("Most Popular Pizza Size", most_popular_pizza_size ) st.sidebar.metric("Most Popular Pizza Category", most_frequent_pizza_category) st.sidebar.metric("Most Popular Pizza Name", most_popular_pizza_name) st.sidebar.metric("Total Pizzas Sold", total_pizzas_sold) # Plot Configurations for the Reports Dashboard plots = [ {"title": "Top Selling Pizzas (by Quantity)", "type": "bar_sorted", "x": "pizza_name", "y": "quantity", "top": 5}, {"title": "Quantity of Pizzas Sold by Hour of the Day", "type": "bar", "x": "order_hour", "y": "quantity"}, {"title": "Quantity of Pizzas Sold by Category and Time of the Day", "type": "count", "x": "pizza_category", "hue": "time_of_day"}, {"title": "Quantity of Pizzas Sold by Size and Time of the Day", "type": "count", "x": "pizza_size", "hue": "time_of_day"}, {"title": "Monthly Revenue Trends by Pizza Category", "type": "line", "x": "order_month", "y": "total_price", "hue": "pizza_category", "estimator": "sum", "marker": "o"}, ] # Iterate through the plots dictionary above to display them for plot in plots: st.header(plot["title"]) fig, ax = plt.subplots(figsize=(10, 6)) if plot["type"] == "bar_sorted": # Bar Plot: Top Selling Pizzas (Custom sorting) plot_var = df.groupby(plot["x"])[plot["y"]].sum().reset_index().sort_values(by=plot["y"], ascending=False).head(plot["top"]) sns.barplot(data=plot_var, x=plot["x"], y=plot["y"], ax=ax, palette="plasma") ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha='right') plt.tight_layout() elif plot["type"] == "bar": # Bar Plot: Pizzas sold per hour of day (No sorting) hourly_var = df.groupby(plot["x"])[plot["y"]].sum().reset_index() sns.barplot(data=hourly_var, x=plot["x"], y=plot["y"], ax=ax, palette="rocket", order=np.arange(24)) # Set explicit ticks from 0 to 23 for clarity ax.set_xticks(np.arange(0, 24, 2)) # Show every other tick for less clutter ax.tick_params(axis='x', rotation=0) elif plot["type"] == "count": # Count Plot: Category or Size breakdown sns.countplot(data=df, x=plot["x"], hue=plot["hue"], ax=ax) ax.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.) # Move legend outside elif plot["type"] == "line": # Line Plot: Monthly Revenue Trends # Ensure order_month is treated as categorical for the plot if plotting trends is needed. sns.lineplot(data=df, x=plot["x"], y=plot["y"], hue=plot["hue"], estimator=plot["estimator"], errorbar=None, marker=plot["marker"], ax=ax) ax.set_xticks(df[plot["x"]].unique()) # Ensure ticks are on actual month values # Axis and Final Display Cleanup ax.set_xlabel(" ".join(plot["x"].split("_")).title()) if "y" in plot.keys(): # Customize y-label for revenue plots if plot["title"] == "Monthly Revenue Trends by Pizza Category": ax.set_ylabel("Total Revenue (USD)") elif plot["y"] != "quantity": ax.set_ylabel(" ".join(plot["y"].split("_")).title()) else: ax.set_ylabel("Quantity Sold") elif plot["type"] == "count": ax.set_ylabel("Order Count") st.pyplot(fig) plt.close(fig) # Close the figure to free up memory if __name__ == "__main__": app()