Upload folder using huggingface_hub
Browse files
app.py
CHANGED
|
@@ -4,29 +4,36 @@ import matplotlib.pyplot as plt
|
|
| 4 |
import pandas as pd
|
| 5 |
|
| 6 |
# Load data
|
|
|
|
| 7 |
def load_data():
|
|
|
|
| 8 |
df = pd.read_csv("processed_data.csv") # replace with your dataset
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
return df
|
| 10 |
|
| 11 |
# Code to create Streamlit app
|
| 12 |
def app():
|
| 13 |
# Title for the app
|
| 14 |
st.title("Pizza Sales Data Analysis Dashboard")
|
|
|
|
|
|
|
| 15 |
df = load_data()
|
| 16 |
|
| 17 |
-
df = pd.DataFrame(df) # what is this line of code doing?
|
| 18 |
|
| 19 |
# Calculate key metrics
|
| 20 |
# Write a code snippet to calculate key metrics from the pizza orders dataframe, including the
|
| 21 |
# total number of unique orders, total revenue generated, the most popular pizza size, the most
|
| 22 |
# frequent pizza category, total pizzas sold
|
| 23 |
|
| 24 |
-
#
|
| 25 |
total_orders = df['order_id'].nunique()
|
| 26 |
total_revenue = df['total_price'].sum()
|
| 27 |
most_popular_pizza_size = df['pizza_size'].value_counts().idxmax()
|
| 28 |
most_frequent_pizza_category = df['pizza_category'].value_counts().idxmax()
|
| 29 |
-
most_popular_pizza_name = df['pizza_name'].value_counts().idxmax()
|
| 30 |
total_pizzas_sold = df['quantity'].sum()
|
| 31 |
|
| 32 |
|
|
@@ -36,57 +43,59 @@ def app():
|
|
| 36 |
# pizza size, the most popular pizza category, and the total number of pizzas sold
|
| 37 |
# using the st.sidebar.metric function.
|
| 38 |
|
| 39 |
-
#
|
| 40 |
st.sidebar.header("Key Metrics")
|
| 41 |
st.sidebar.metric("Total Orders", total_orders)
|
| 42 |
st.sidebar.metric("Total Revenue", f"${total_revenue:,.2f}")
|
| 43 |
st.sidebar.metric("Most Popular Pizza Size", most_popular_pizza_size )
|
| 44 |
st.sidebar.metric("Most Popular Pizza Category", most_frequent_pizza_category)
|
| 45 |
-
st.sidebar.metric("Most Popilar Pizza Name", most_popular_pizza_name)
|
| 46 |
st.sidebar.metric("Total Pizzas Sold", total_pizzas_sold)
|
| 47 |
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
# Provide the details of the plots here **** need to review and update these
|
| 51 |
-
#plots = [
|
| 52 |
-
# {"title": "Top Selling Pizzas (Quantity)", "x": "_________", "y": "___________"},
|
| 53 |
-
#]
|
| 54 |
-
# ---------- **** this needs to be reviewed from low code
|
| 55 |
-
|
| 56 |
-
# Provide the details of the plots here
|
| 57 |
-
# """"""""""""" this is from low code notebook
|
| 58 |
|
| 59 |
plots = [
|
| 60 |
-
{"title": "Top Selling Pizzas (by Quantity)", "x": "pizza_name", "y": "quantity", "top": 5},
|
| 61 |
-
{"title": "Quantity of Pizzas Sold by Category and Time of the Day", "x": "pizza_category", "hue": "time_of_day"},
|
| 62 |
-
{"title": "Quantity of Pizzas Sold by Size and Time of the Day", "x": "
|
| 63 |
-
{"title": "Monthly Revenue Trends by Pizza Category", "x": "order_month", "y": "total_price", "hue": "pizza_category", "estimator": "sum", "marker": "o"},
|
| 64 |
]
|
| 65 |
|
|
|
|
| 66 |
for plot in plots:
|
| 67 |
st.header(plot["title"])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 68 |
|
| 69 |
-
fig, ax = plt.subplots()
|
| 70 |
-
|
| 71 |
-
if "Top Selling Pizzas" in plot["title"]:
|
| 72 |
-
data_aux = df.groupby(plot["x"])[plot["y"]].sum().reset_index().sort_values(by=plot["y"], ascending=False).head(plot["top"])
|
| 73 |
-
ax.bar(data_aux[plot["x"]].values.tolist(), data_aux[plot["y"]].values.tolist())
|
| 74 |
-
|
| 75 |
-
if "Quantity of Pizzas" in plot["title"]:
|
| 76 |
-
sns.countplot(data=df, x=plot["x"], hue=plot["hue"], ax=ax)
|
| 77 |
-
|
| 78 |
-
if "Monthly Revenue" in plot["title"]:
|
| 79 |
-
sns.lineplot(data=df, x=plot["x"], y=plot["y"], hue=plot["hue"], estimator=plot["estimator"], errorbar=None, marker=plot["marker"], ax=ax)
|
| 80 |
-
|
| 81 |
-
ax.set_xlabel(" ".join(plot["x"].split("_")).capitalize())
|
| 82 |
-
if "y" in plot.keys():
|
| 83 |
-
ax.set_ylabel(" ".join(plot["y"].split("_")).capitalize())
|
| 84 |
-
else:
|
| 85 |
-
ax.set_ylabel("Quantity")
|
| 86 |
-
ax.legend(bbox_to_anchor=(1,1))
|
| 87 |
-
|
| 88 |
-
st.pyplot(fig)
|
| 89 |
-
|
| 90 |
|
| 91 |
if __name__ == "__main__":
|
| 92 |
app()
|
|
|
|
| 4 |
import pandas as pd
|
| 5 |
|
| 6 |
# Load data
|
| 7 |
+
@st.cache_data # Cache the data loading to speed up app performance
|
| 8 |
def load_data():
|
| 9 |
+
# Load pre-processed data from csv file
|
| 10 |
df = pd.read_csv("processed_data.csv") # replace with your dataset
|
| 11 |
+
|
| 12 |
+
# Ensure order_time is treated as datetime for plots (important since it loses dtype during CSV save/load)
|
| 13 |
+
if 'order_time' in df.columns:
|
| 14 |
+
df['order_time'] = pd.to_datetime(df['order_time'])
|
| 15 |
+
|
| 16 |
return df
|
| 17 |
|
| 18 |
# Code to create Streamlit app
|
| 19 |
def app():
|
| 20 |
# Title for the app
|
| 21 |
st.title("Pizza Sales Data Analysis Dashboard")
|
| 22 |
+
|
| 23 |
+
# Load and cache the data
|
| 24 |
df = load_data()
|
| 25 |
|
|
|
|
| 26 |
|
| 27 |
# Calculate key metrics
|
| 28 |
# Write a code snippet to calculate key metrics from the pizza orders dataframe, including the
|
| 29 |
# total number of unique orders, total revenue generated, the most popular pizza size, the most
|
| 30 |
# frequent pizza category, total pizzas sold
|
| 31 |
|
| 32 |
+
# Calculate key metrics from dataset
|
| 33 |
total_orders = df['order_id'].nunique()
|
| 34 |
total_revenue = df['total_price'].sum()
|
| 35 |
most_popular_pizza_size = df['pizza_size'].value_counts().idxmax()
|
| 36 |
most_frequent_pizza_category = df['pizza_category'].value_counts().idxmax()
|
|
|
|
| 37 |
total_pizzas_sold = df['quantity'].sum()
|
| 38 |
|
| 39 |
|
|
|
|
| 43 |
# pizza size, the most popular pizza category, and the total number of pizzas sold
|
| 44 |
# using the st.sidebar.metric function.
|
| 45 |
|
| 46 |
+
# Generate Sidebar on dashboard with key metrics
|
| 47 |
st.sidebar.header("Key Metrics")
|
| 48 |
st.sidebar.metric("Total Orders", total_orders)
|
| 49 |
st.sidebar.metric("Total Revenue", f"${total_revenue:,.2f}")
|
| 50 |
st.sidebar.metric("Most Popular Pizza Size", most_popular_pizza_size )
|
| 51 |
st.sidebar.metric("Most Popular Pizza Category", most_frequent_pizza_category)
|
|
|
|
| 52 |
st.sidebar.metric("Total Pizzas Sold", total_pizzas_sold)
|
| 53 |
|
| 54 |
+
|
| 55 |
+
# Plot Configurations for the Reports Dashboard
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
|
| 57 |
plots = [
|
| 58 |
+
{"title": "Top Selling Pizzas (by Quantity)", "type": "bar_sorted", "x": "pizza_name", "y": "quantity", "top": 5},
|
| 59 |
+
{"title": "Quantity of Pizzas Sold by Category and Time of the Day", "type": "count", "x": "pizza_category", "hue": "time_of_day"},
|
| 60 |
+
{"title": "Quantity of Pizzas Sold by Size and Time of the Day", "type": "count", "x": "pizza_size", "hue": "time_of_day"},
|
| 61 |
+
{"title": "Monthly Revenue Trends by Pizza Category", "type": "line", "x": "order_month", "y": "total_price", "hue": "pizza_category", "estimator": "sum", "marker": "o"},
|
| 62 |
]
|
| 63 |
|
| 64 |
+
# Iterate through the plots dictionary above to display them
|
| 65 |
for plot in plots:
|
| 66 |
st.header(plot["title"])
|
| 67 |
+
fig, ax = plt.subplots(figsize=(10, 6))
|
| 68 |
+
|
| 69 |
+
if plot["type"] == "bar_sorted":
|
| 70 |
+
# Bar Plot: Top Selling Pizzas (Custom sorting)
|
| 71 |
+
plot_var = df.groupby(plot["x"])[plot["y"]].sum().reset_index().sort_values(by=plot["y"], ascending=False).head(plot["top"])
|
| 72 |
+
sns.barplot(data=plot_var, x=plot["x"], y=plot["y"], ax=ax, palette="plasma")
|
| 73 |
+
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha='right')
|
| 74 |
+
plt.tight_layout()
|
| 75 |
+
|
| 76 |
+
elif plot["type"] == "count":
|
| 77 |
+
# Count Plot: Category or Size breakdown
|
| 78 |
+
sns.countplot(data=df, x=plot["x"], hue=plot["hue"], ax=ax)
|
| 79 |
+
ax.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.) # Move legend outside
|
| 80 |
+
|
| 81 |
+
elif plot["type"] == "line":
|
| 82 |
+
# Line Plot: Monthly Revenue Trends
|
| 83 |
+
# Ensure order_month is treated as categorical for the plot if plotting trends is needed.
|
| 84 |
+
sns.lineplot(data=df, x=plot["x"], y=plot["y"], hue=plot["hue"], estimator=plot["estimator"], errorbar=None, marker=plot["marker"], ax=ax)
|
| 85 |
+
ax.set_xticks(df[plot["x"]].unique()) # Ensure ticks are on actual month values
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
# Axis and Final Display Cleanup
|
| 89 |
+
ax.set_xlabel(" ".join(plot["x"].split("_")).title())
|
| 90 |
+
|
| 91 |
+
if "y" in plot.keys() and plot["y"] != "quantity":
|
| 92 |
+
ax.set_ylabel(" ".join(plot["y"].split("_")).title())
|
| 93 |
+
elif plot["type"] != "count":
|
| 94 |
+
ax.set_ylabel("Quantity")
|
| 95 |
+
|
| 96 |
+
st.pyplot(fig)
|
| 97 |
+
plt.close(fig) # Close the figure to free up memory
|
| 98 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 99 |
|
| 100 |
if __name__ == "__main__":
|
| 101 |
app()
|