docster99 commited on
Commit
e2d2837
·
verified ·
1 Parent(s): 3b42908

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. Dockerfile +1 -1
  2. app.py +60 -36
Dockerfile CHANGED
@@ -22,7 +22,7 @@ RUN apt-get update && apt-get install -y \
22
 
23
  curl \
24
 
25
- software-properties-common \
26
 
27
  git \
28
 
 
22
 
23
  curl \
24
 
25
+ # software-properties-common \
26
 
27
  git \
28
 
app.py CHANGED
@@ -5,63 +5,87 @@ import pandas as pd
5
 
6
  # Load data
7
  def load_data():
8
- df = pd.read_csv("processed_data.csv") # Replace with your dataset
9
  return df
10
 
11
  # Create Streamlit app
12
  def app():
13
  # Title for the app
14
- st.title("Retail Sales Data Insights Dashboard-Nov 09")
15
-
16
- # Load data
17
  df = load_data()
18
 
19
- # Key Metrics from the data
20
- total_orders = df['Transaction ID'].nunique()
21
- total_products_sold = df['Quantity'].sum()
22
- total_revenue = df['Total Amount'].sum()
23
- most_popular_product_cat = df['Product Category'].value_counts().idxmax()
24
- most_frequent_age_cat = df['Age Category'].value_counts().idxmax()
 
 
 
 
 
 
 
 
 
25
 
26
- # Display metrics in the sidebar
 
 
 
 
 
 
27
  st.sidebar.header("Key Metrics")
28
  st.sidebar.metric("Total Orders", total_orders)
29
- st.sidebar.metric("Total Products Sold", total_products_sold)
30
  st.sidebar.metric("Total Revenue", f"${total_revenue:,.2f}")
31
- st.sidebar.metric("Most Popular Product Category", most_popular_product_cat)
32
- st.sidebar.metric("Most Frequent Age Category", most_frequent_age_cat)
 
 
 
 
 
 
 
 
 
 
 
 
 
33
 
34
-
35
  plots = [
36
- {"title": "Total Products Sold by Product and Age Categories", "x": "Product Category", "hue": "Age Category"},
37
- {"title": "Monthly Revenue Trends by Product Category", "x": "month", "y": "Total Amount", "hue": "Product Category", "estimator": "sum", "marker": "o"},
38
- {"title": "Monthly Revenue Trends by Age Category", "x": "month", "y": "Total Amount", "hue": "Age Category", "estimator": "sum", "marker": "o"},
39
- {"title": "Revenue by Product Category", "x": "Product Category", "y": "Total Amount", "estimator": "sum"},
40
  ]
41
 
42
  for plot in plots:
43
  st.header(plot["title"])
44
 
45
  fig, ax = plt.subplots()
 
 
 
 
 
 
 
 
 
 
46
 
47
- if "Total Products" in plot["title"]:
48
- sns.countplot(data=df, x=plot["x"], hue=plot["hue"], ax=ax)
49
-
50
- if "Monthly Revenue" in plot["title"]:
51
- sns.lineplot(data=df, x=plot["x"], y=plot["y"], hue=plot["hue"], estimator=plot["estimator"], errorbar=None, marker=plot["marker"], ax=ax)
52
-
53
- if "Revenue by Product" in plot["title"]:
54
- sns.barplot(data=df, x=plot["x"], y=plot["y"], estimator=plot["estimator"], errorbar=None, ax=ax)
55
-
56
- ax.set_xlabel(" ".join(plot["x"].split("_")).capitalize())
57
- if "y" in plot.keys():
58
- ax.set_ylabel(" ".join(plot["y"].split("_")).capitalize())
59
- else:
60
- ax.set_ylabel("Quantity")
61
- ax.legend(bbox_to_anchor=(1,1))
62
 
63
- st.pyplot(fig)
64
- plt.show()
65
 
66
 
67
  if __name__ == "__main__":
 
5
 
6
  # Load data
7
  def load_data():
8
+ df = pd.read_csv("processed_data.csv") # replace with your dataset
9
  return df
10
 
11
  # Create Streamlit app
12
  def app():
13
  # Title for the app
14
+ st.title("Pizza Sales Data Analysis Dashboard")
 
 
15
  df = load_data()
16
 
17
+ df = pd.DataFrame(df) # what is this line of code doing?
18
+
19
+ # Calculate key metrics
20
+ # Write a code snippet to calculate key metrics from the pizza orders dataframe, including the
21
+ # total number of unique orders, total revenue generated, the most popular pizza size, the most
22
+ # frequent pizza category, total pizzas sold
23
+
24
+ # Code to pull key metrics from dataset
25
+ total_orders = df['order_id'].nunique()
26
+ total_revenue = df['total_price'].sum()
27
+ most_popular_pizza_size = df['pizza_size'].value_counts().idxmax()
28
+ most_frequent_pizza_category = df['pizza_category'].value_counts().idxmax()
29
+ most_popular_pizza_name = df['pizza_name'].value_counts().idxmax()
30
+ total_pizzas_sold = df['quantity'].sum()
31
+
32
 
33
+ # Sidebar with key metrics
34
+ # Write a code snippet to display key metrics in the sidebar of a Streamlit application.
35
+ # Show the total number of orders, total revenue (formatted as currency), the most popular
36
+ # pizza size, the most popular pizza category, and the total number of pizzas sold
37
+ # using the st.sidebar.metric function.
38
+
39
+ # Code to display metrics in sidebar of the report
40
  st.sidebar.header("Key Metrics")
41
  st.sidebar.metric("Total Orders", total_orders)
 
42
  st.sidebar.metric("Total Revenue", f"${total_revenue:,.2f}")
43
+ st.sidebar.metric("Most Popular Pizza Size", most_popular_pizza_size )
44
+ st.sidebar.metric("Most Popular Pizza Category", most_frequent_pizza_category)
45
+ st.sidebar.metric("Most Popilar Pizza Name", most_popular_pizza_name)
46
+ st.sidebar.metric("Total Pizzas Sold", total_pizzas_sold)
47
+
48
+ #"""""""""""""
49
+
50
+ # Provide the details of the plots here **** need to review and update these
51
+ #plots = [
52
+ # {"title": "Top Selling Pizzas (Quantity)", "x": "_________", "y": "___________"},
53
+ #]
54
+ # ---------- **** this needs to be reviewed from low code
55
+
56
+ # Provide the details of the plots here
57
+ # """"""""""""" this is from low code notebook
58
 
 
59
  plots = [
60
+ {"title": "Top Selling Pizzas (by Quantity)", "x": "pizza_name", "y": "quantity", "top": 5}, #Write the appropriiate column as per the title given
61
+ {"title": "Quantity of Pizzas Sold by Category and Time of the Day", "x": "pizza_category", "hue": "time_of_day"}, #Write the appropriiate column as per the title given
62
+ {"title": "Quantity of Pizzas Sold by Size and Time of the Day", "x": "quantity", "hue": "time_of_day"}, #Write the appropriiate column as per the title given
63
+ {"title": "Monthly Revenue Trends by Pizza Category", "x": "order_month", "y": "total_price", "hue": "pizza_category", "estimator": "sum", "marker": "o"}, #Write the appropriiate column as per the title given
64
  ]
65
 
66
  for plot in plots:
67
  st.header(plot["title"])
68
 
69
  fig, ax = plt.subplots()
70
+
71
+ if "Top Selling Pizzas" in plot["title"]:
72
+ data_aux = df.groupby(plot["x"])[plot["y"]].sum().reset_index().sort_values(by=plot["y"], ascending=False).head(plot["top"])
73
+ ax.bar(data_aux[plot["x"]].values.tolist(), data_aux[plot["y"]].values.tolist())
74
+
75
+ if "Quantity of Pizzas" in plot["title"]:
76
+ sns.countplot(data=df, x=plot["x"], hue=plot["hue"], ax=ax)
77
+
78
+ if "Monthly Revenue" in plot["title"]:
79
+ sns.lineplot(data=df, x=plot["x"], y=plot["y"], hue=plot["hue"], estimator=plot["estimator"], errorbar=None, marker=plot["marker"], ax=ax)
80
 
81
+ ax.set_xlabel(" ".join(plot["x"].split("_")).capitalize())
82
+ if "y" in plot.keys():
83
+ ax.set_ylabel(" ".join(plot["y"].split("_")).capitalize())
84
+ else:
85
+ ax.set_ylabel("Quantity")
86
+ ax.legend(bbox_to_anchor=(1,1))
 
 
 
 
 
 
 
 
 
87
 
88
+ st.pyplot(fig)
 
89
 
90
 
91
  if __name__ == "__main__":