Spaces:

docster99
/

Project_1_Pizza_Sales_Analysis

Running

File size: 5,986 Bytes

04d791c
 
 
 
f018b9d
04d791c
 
2cb69f2
04d791c
2cb69f2
e2d2837
2cb69f2
75582a8
 
 
 
 
 
 
 
 
2cb69f2
 
 
 
04d791c
 
9cf3b68
04d791c
 
e2d2837
2cb69f2
 
04d791c
 
e2d2837
 
 
 
 
 
2cb69f2
e2d2837
 
 
 
75582a8
e2d2837
 
04d791c
e2d2837
 
 
 
 
 
2cb69f2
04d791c
 
 
e2d2837
 
75582a8
e2d2837
 
2cb69f2
 
04d791c
 
4aa8a1c
 
2cb69f2
 
 
04d791c
 
2cb69f2
04d791c
 
2cb69f2
 
 
 
 
 
 
 
 
4aa8a1c
 
 
3395ccd
4aa8a1c
3395ccd
 
 
4aa8a1c
2cb69f2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75582a8
 
 
 
 
 
 
 
 
 
2cb69f2
 
 
04d791c

import streamlit as st
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

# Load data
@st.cache_data # Cache the data loading to speed up app performance
def load_data():
    # Load pre-processed data from csv file
    df = pd.read_csv("processed_data.csv")  # replace with your dataset

   # Explicitly cast metric columns to numeric types after reading from CSV
    try:
        df['total_price'] = df['total_price'].astype(float)
        df['quantity'] = df['quantity'].astype(int)
    except Exception as e:
        # Fallback for logging if a type conversion fails unexpectedly
        print(f"Error during type casting in app.py: {e}")


    # Ensure order_time is treated as datetime for plots (important since it loses dtype during CSV save/load)
    if 'order_time' in df.columns:
        df['order_time'] = pd.to_datetime(df['order_time'])
    
    return df

# Code to create Streamlit app
def app():
    # Title for the app
    st.title("Pizza Sales Data Analysis Dashboard")
    
    # Load and cache the data
    df = load_data()


    # Calculate key metrics
    # Write a code snippet to calculate key metrics from the pizza orders dataframe, including the 
    # total number of unique orders, total revenue generated, the most popular pizza size, the most 
    # frequent pizza category, total pizzas sold

    # Calculate key metrics from dataset
    total_orders = df['order_id'].nunique()
    total_revenue = df['total_price'].sum()
    most_popular_pizza_size = df['pizza_size'].value_counts().idxmax()
    most_frequent_pizza_category = df['pizza_category'].value_counts().idxmax()
    most_popular_pizza_name = df['pizza_name'].value_counts().idxmax()
    total_pizzas_sold = df['quantity'].sum()
             

     # Sidebar with key metrics
    # Write a code snippet to display key metrics in the sidebar of a Streamlit application. 
    # Show the total number of orders, total revenue (formatted as currency), the most popular
    # pizza size, the most popular pizza category, and the total number of pizzas sold 
    # using the st.sidebar.metric function.

    # Generate Sidebar on dashboard with key metrics 
    st.sidebar.header("Key Metrics")
    st.sidebar.metric("Total Orders", total_orders)
    st.sidebar.metric("Total Revenue", f"${total_revenue:,.2f}")
    st.sidebar.metric("Most Popular Pizza Size", most_popular_pizza_size )
    st.sidebar.metric("Most Popular Pizza Category", most_frequent_pizza_category)
    st.sidebar.metric("Most Popular Pizza Name", most_popular_pizza_name)
    st.sidebar.metric("Total Pizzas Sold", total_pizzas_sold)

  
    # Plot Configurations for the Reports Dashboard

    plots = [
        {"title": "Top Selling Pizzas (by Quantity)", "type": "bar_sorted", "x": "pizza_name", "y": "quantity", "top": 5},
        {"title": "Quantity of Pizzas Sold by Hour of the Day", "type": "bar", "x": "order_hour", "y": "quantity"},    
        {"title": "Quantity of Pizzas Sold by Category and Time of the Day", "type": "count", "x": "pizza_category", "hue": "time_of_day"},   
        {"title": "Quantity of Pizzas Sold by Size and Time of the Day", "type": "count", "x": "pizza_size", "hue": "time_of_day"},  
        {"title": "Monthly Revenue Trends by Pizza Category", "type": "line", "x": "order_month", "y": "total_price", "hue": "pizza_category", "estimator": "sum", "marker": "o"}, 
    ]

    # Iterate through the plots dictionary above to display them
    for plot in plots:
        st.header(plot["title"])
        fig, ax = plt.subplots(figsize=(10, 6))
        
        if plot["type"] == "bar_sorted":
            # Bar Plot: Top Selling Pizzas (Custom sorting)
            plot_var = df.groupby(plot["x"])[plot["y"]].sum().reset_index().sort_values(by=plot["y"], ascending=False).head(plot["top"])
            sns.barplot(data=plot_var, x=plot["x"], y=plot["y"], ax=ax, palette="plasma")
            ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha='right')
            plt.tight_layout()
            
        elif plot["type"] == "bar":
            # Bar Plot: Pizzas sold per hour of day (No sorting)
            hourly_var = df.groupby(plot["x"])[plot["y"]].sum().reset_index()           
            sns.barplot(data=hourly_var, x=plot["x"], y=plot["y"], ax=ax, palette="rocket", order=np.arange(24))
          
            # Set explicit ticks from 0 to 23 for clarity
            ax.set_xticks(np.arange(0, 24, 2)) # Show every other tick for less clutter
            ax.tick_params(axis='x', rotation=0)

        elif plot["type"] == "count":
            # Count Plot: Category or Size breakdown
            sns.countplot(data=df, x=plot["x"], hue=plot["hue"], ax=ax)
            ax.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.) # Move legend outside
            
        elif plot["type"] == "line":
            # Line Plot: Monthly Revenue Trends
            # Ensure order_month is treated as categorical for the plot if plotting trends is needed.
            sns.lineplot(data=df, x=plot["x"], y=plot["y"], hue=plot["hue"], estimator=plot["estimator"], errorbar=None, marker=plot["marker"], ax=ax)
            ax.set_xticks(df[plot["x"]].unique()) # Ensure ticks are on actual month values
            
        
        # Axis and Final Display Cleanup
        ax.set_xlabel(" ".join(plot["x"].split("_")).title())
        
        if "y" in plot.keys():
            # Customize y-label for revenue plots
            if plot["title"] == "Monthly Revenue Trends by Pizza Category":
                 ax.set_ylabel("Total Revenue (USD)")
            elif plot["y"] != "quantity":
                ax.set_ylabel(" ".join(plot["y"].split("_")).title())
            else:
                 ax.set_ylabel("Quantity Sold")
        elif plot["type"] == "count":
             ax.set_ylabel("Order Count")
        
        st.pyplot(fig)
        plt.close(fig) # Close the figure to free up memory


if __name__ == "__main__":
    app()