iBrokeTheCode commited on
Commit
d97a914
·
1 Parent(s): a80e545

chore: Add Plot section

Browse files
Files changed (3) hide show
  1. app.py +152 -10
  2. requirements.txt +3 -0
  3. src/plots.py +243 -0
app.py CHANGED
@@ -127,7 +127,7 @@ def _(mo):
127
  def _(QueryEnum, query_results: "dict[str, DataFrame]"):
128
  revenue_by_month_year = query_results[QueryEnum.REVENUE_BY_MONTH_YEAR.value]
129
  revenue_by_month_year
130
- return
131
 
132
 
133
  @app.cell
@@ -142,7 +142,7 @@ def _(QueryEnum, query_results: "dict[str, DataFrame]"):
142
  QueryEnum.TOP_10_REVENUE_CATEGORIES.value
143
  ]
144
  top_10_revenue_categories
145
- return
146
 
147
 
148
  @app.cell
@@ -157,7 +157,7 @@ def _(QueryEnum, query_results: "dict[str, DataFrame]"):
157
  QueryEnum.TOP_10_LEAST_REVENUE_CATEGORIES.value
158
  ]
159
  top_10_least_revenue_categories
160
- return
161
 
162
 
163
  @app.cell
@@ -170,7 +170,7 @@ def _(mo):
170
  def _(QueryEnum, query_results: "dict[str, DataFrame]"):
171
  revenue_per_state = query_results[QueryEnum.REVENUE_PER_STATE.value]
172
  revenue_per_state
173
- return
174
 
175
 
176
  @app.cell
@@ -185,7 +185,7 @@ def _(QueryEnum, query_results: "dict[str, DataFrame]"):
185
  QueryEnum.DELIVERY_DATE_DIFFERENCE.value
186
  ]
187
  delivery_date_difference
188
- return
189
 
190
 
191
  @app.cell
@@ -200,7 +200,7 @@ def _(QueryEnum, query_results: "dict[str, DataFrame]"):
200
  QueryEnum.REAL_VS_ESTIMATED_DELIVERED_TIME.value
201
  ]
202
  real_vs_estimated_delivery_time
203
- return
204
 
205
 
206
  @app.cell
@@ -215,7 +215,7 @@ def _(QueryEnum, query_results: "dict[str, DataFrame]"):
215
  QueryEnum.GLOBAL_AMOUNT_ORDER_STATUS.value
216
  ]
217
  global_amount_order_status
218
- return
219
 
220
 
221
  @app.cell
@@ -230,7 +230,7 @@ def _(QueryEnum, query_results: "dict[str, DataFrame]"):
230
  QueryEnum.ORDERS_PER_DAY_AND_HOLIDAYS_2017.value
231
  ]
232
  orders_per_day_and_holidays
233
- return
234
 
235
 
236
  @app.cell
@@ -245,7 +245,7 @@ def _(QueryEnum, query_results: "dict[str, DataFrame]"):
245
  QueryEnum.GET_FREIGHT_VALUE_WEIGHT_RELATIONSHIP.value
246
  ]
247
  freight_value_weight_relationship
248
- return
249
 
250
 
251
  @app.cell
@@ -254,6 +254,34 @@ def _(mo):
254
  return
255
 
256
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
257
  @app.cell
258
  def _(mo):
259
  mo.md(r"""**A. Revenue by Month in 2017**""")
@@ -261,7 +289,121 @@ def _(mo):
261
 
262
 
263
  @app.cell
264
- def _():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
265
  return
266
 
267
 
 
127
  def _(QueryEnum, query_results: "dict[str, DataFrame]"):
128
  revenue_by_month_year = query_results[QueryEnum.REVENUE_BY_MONTH_YEAR.value]
129
  revenue_by_month_year
130
+ return (revenue_by_month_year,)
131
 
132
 
133
  @app.cell
 
142
  QueryEnum.TOP_10_REVENUE_CATEGORIES.value
143
  ]
144
  top_10_revenue_categories
145
+ return (top_10_revenue_categories,)
146
 
147
 
148
  @app.cell
 
157
  QueryEnum.TOP_10_LEAST_REVENUE_CATEGORIES.value
158
  ]
159
  top_10_least_revenue_categories
160
+ return (top_10_least_revenue_categories,)
161
 
162
 
163
  @app.cell
 
170
  def _(QueryEnum, query_results: "dict[str, DataFrame]"):
171
  revenue_per_state = query_results[QueryEnum.REVENUE_PER_STATE.value]
172
  revenue_per_state
173
+ return (revenue_per_state,)
174
 
175
 
176
  @app.cell
 
185
  QueryEnum.DELIVERY_DATE_DIFFERENCE.value
186
  ]
187
  delivery_date_difference
188
+ return (delivery_date_difference,)
189
 
190
 
191
  @app.cell
 
200
  QueryEnum.REAL_VS_ESTIMATED_DELIVERED_TIME.value
201
  ]
202
  real_vs_estimated_delivery_time
203
+ return (real_vs_estimated_delivery_time,)
204
 
205
 
206
  @app.cell
 
215
  QueryEnum.GLOBAL_AMOUNT_ORDER_STATUS.value
216
  ]
217
  global_amount_order_status
218
+ return (global_amount_order_status,)
219
 
220
 
221
  @app.cell
 
230
  QueryEnum.ORDERS_PER_DAY_AND_HOLIDAYS_2017.value
231
  ]
232
  orders_per_day_and_holidays
233
+ return (orders_per_day_and_holidays,)
234
 
235
 
236
  @app.cell
 
245
  QueryEnum.GET_FREIGHT_VALUE_WEIGHT_RELATIONSHIP.value
246
  ]
247
  freight_value_weight_relationship
248
+ return (freight_value_weight_relationship,)
249
 
250
 
251
  @app.cell
 
254
  return
255
 
256
 
257
+ @app.cell
258
+ def _():
259
+ from src.plots import (
260
+ plot_revenue_by_month_year,
261
+ plot_real_vs_predicted_delivered_time,
262
+ plot_global_amount_order_status,
263
+ plot_revenue_per_state,
264
+ plot_top_10_least_revenue_categories,
265
+ plot_top_10_revenue_categories_amount,
266
+ plot_top_10_revenue_categories,
267
+ plot_freight_value_weight_relationship,
268
+ plot_delivery_date_difference,
269
+ plot_order_amount_per_day_with_holidays,
270
+ )
271
+ return (
272
+ plot_delivery_date_difference,
273
+ plot_freight_value_weight_relationship,
274
+ plot_global_amount_order_status,
275
+ plot_order_amount_per_day_with_holidays,
276
+ plot_real_vs_predicted_delivered_time,
277
+ plot_revenue_by_month_year,
278
+ plot_revenue_per_state,
279
+ plot_top_10_least_revenue_categories,
280
+ plot_top_10_revenue_categories,
281
+ plot_top_10_revenue_categories_amount,
282
+ )
283
+
284
+
285
  @app.cell
286
  def _(mo):
287
  mo.md(r"""**A. Revenue by Month in 2017**""")
 
289
 
290
 
291
  @app.cell
292
+ def _(plot_revenue_by_month_year, revenue_by_month_year):
293
+ plot_revenue_by_month_year(df=revenue_by_month_year, year=2017)
294
+ return
295
+
296
+
297
+ @app.cell
298
+ def _(mo):
299
+ mo.md(r"""**B. Real vs. Predicted Delivered Time**""")
300
+ return
301
+
302
+
303
+ @app.cell
304
+ def _(plot_real_vs_predicted_delivered_time, real_vs_estimated_delivery_time):
305
+ plot_real_vs_predicted_delivered_time(
306
+ df=real_vs_estimated_delivery_time, year=2017
307
+ )
308
+ return
309
+
310
+
311
+ @app.cell
312
+ def _(mo):
313
+ mo.md(r"""**C. Global Amount of Order Status**""")
314
+ return
315
+
316
+
317
+ @app.cell
318
+ def _(global_amount_order_status, plot_global_amount_order_status):
319
+ plot_global_amount_order_status(df=global_amount_order_status)
320
+ return
321
+
322
+
323
+ @app.cell
324
+ def _(mo):
325
+ mo.md(r"""**D. Revenue per State**""")
326
+ return
327
+
328
+
329
+ @app.cell
330
+ def _(plot_revenue_per_state, revenue_per_state):
331
+ plot_revenue_per_state(df=revenue_per_state)
332
+ return
333
+
334
+
335
+ @app.cell
336
+ def _(mo):
337
+ mo.md(r"""**E. Top 10 Least Revenue by Categories**""")
338
+ return
339
+
340
+
341
+ @app.cell
342
+ def _(plot_top_10_least_revenue_categories, top_10_least_revenue_categories):
343
+ plot_top_10_least_revenue_categories(df=top_10_least_revenue_categories)
344
+ return
345
+
346
+
347
+ @app.cell
348
+ def _(mo):
349
+ mo.md(r"""**F. Top 10 Revenue Categories Amount**""")
350
+ return
351
+
352
+
353
+ @app.cell
354
+ def _(plot_top_10_revenue_categories_amount, top_10_revenue_categories):
355
+ plot_top_10_revenue_categories_amount(df=top_10_revenue_categories)
356
+ return
357
+
358
+
359
+ @app.cell
360
+ def _(mo):
361
+ mo.md(r"""**G. Top 10 Revenue by Categories**""")
362
+ return
363
+
364
+
365
+ @app.cell
366
+ def _(plot_top_10_revenue_categories, top_10_revenue_categories):
367
+ plot_top_10_revenue_categories(df=top_10_revenue_categories)
368
+ return
369
+
370
+
371
+ @app.cell
372
+ def _(mo):
373
+ mo.md(r"""**H. Freight Value vs. Product Weight**""")
374
+ return
375
+
376
+
377
+ @app.cell
378
+ def _(
379
+ freight_value_weight_relationship,
380
+ plot_freight_value_weight_relationship,
381
+ ):
382
+ plot_freight_value_weight_relationship(df=freight_value_weight_relationship)
383
+ return
384
+
385
+
386
+ @app.cell
387
+ def _(mo):
388
+ mo.md(r"""**I. Diffrence Between Deliver Estimated Date and Delivery Date**""")
389
+ return
390
+
391
+
392
+ @app.cell
393
+ def _(delivery_date_difference, plot_delivery_date_difference):
394
+ plot_delivery_date_difference(df=delivery_date_difference)
395
+ return
396
+
397
+
398
+ @app.cell
399
+ def _(mo):
400
+ mo.md(r"""**J. Order Amount per Day with Holidays**""")
401
+ return
402
+
403
+
404
+ @app.cell
405
+ def _(orders_per_day_and_holidays, plot_order_amount_per_day_with_holidays):
406
+ plot_order_amount_per_day_with_holidays(df=orders_per_day_and_holidays)
407
  return
408
 
409
 
requirements.txt CHANGED
@@ -1,7 +1,10 @@
1
  marimo==0.14.16
 
2
  pandas==2.3.1
 
3
  pyarrow==21.0.0
4
  pytest==8.4.1
5
  requests==2.32.4
6
  ruff==0.12.7
 
7
  sqlalchemy==2.0.42
 
1
  marimo==0.14.16
2
+ matplotlib==3.10.5
3
  pandas==2.3.1
4
+ plotly==6.2.0
5
  pyarrow==21.0.0
6
  pytest==8.4.1
7
  requests==2.32.4
8
  ruff==0.12.7
9
+ seaborn==0.13.2
10
  sqlalchemy==2.0.42
src/plots.py ADDED
@@ -0,0 +1,243 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import matplotlib.pyplot as plt
2
+ import plotly.express as px
3
+ import seaborn as sns
4
+ from matplotlib import rc_file_defaults
5
+ from pandas import DataFrame, to_datetime
6
+
7
+
8
+ def plot_revenue_by_month_year(df: DataFrame, year: int) -> None:
9
+ """
10
+ Plot the revenue by month and year
11
+
12
+ Args:
13
+ df (DataFrame): The dataframe
14
+ year (int): The year
15
+ """
16
+ rc_file_defaults()
17
+ sns.set_style(style="darkgrid", rc=None)
18
+
19
+ _, ax1 = plt.subplots(figsize=(12, 6))
20
+
21
+ sns.lineplot(data=df[f"Year{year}"], marker="o", sort=False, ax=ax1)
22
+ ax2 = ax1.twinx()
23
+
24
+ sns.barplot(data=df, x="month", y=f"Year{year}", alpha=0.5, ax=ax2)
25
+ ax1.set_title(f"Revenue by month in {year}")
26
+
27
+ plt.show()
28
+
29
+
30
+ def plot_real_vs_predicted_delivered_time(df: DataFrame, year: int) -> None:
31
+ """
32
+ Plot the real vs predicted delivered time
33
+
34
+ Args:
35
+ df (DataFrame): The dataframe
36
+ year (int): The year
37
+ """
38
+ rc_file_defaults()
39
+ sns.set_style(style=None, rc=None)
40
+
41
+ _, ax1 = plt.subplots(figsize=(12, 6))
42
+
43
+ sns.lineplot(data=df[f"Year{year}_real_time"], marker="o", sort=False, ax=ax1)
44
+ ax1.twinx()
45
+ g = sns.lineplot(
46
+ data=df[f"Year{year}_estimated_time"], marker="o", sort=False, ax=ax1
47
+ )
48
+ g.set_xticks(range(len(df)))
49
+ g.set_xticklabels(df.month.values)
50
+ g.set(xlabel="month", ylabel="Average days delivery time", title="some title")
51
+ ax1.set_title(f"Average days delivery time by month in {year}")
52
+ ax1.legend(["Real time", "Estimated time"])
53
+
54
+ plt.show()
55
+
56
+
57
+ def plot_global_amount_order_status(df: DataFrame) -> None:
58
+ """
59
+ Plot global amount of order status
60
+
61
+ Args:
62
+ df (DataFrame): The dataframe
63
+ """
64
+ _, ax = plt.subplots(figsize=(8, 3), subplot_kw=dict(aspect="equal"))
65
+
66
+ elements = [x.split()[-1] for x in df["order_status"]]
67
+
68
+ wedges, autotexts = ax.pie(df["Amount"], textprops=dict(color="w"))
69
+
70
+ ax.legend(
71
+ wedges,
72
+ elements,
73
+ title="Order Status",
74
+ loc="center left",
75
+ bbox_to_anchor=(1, 0, 0.5, 1),
76
+ )
77
+
78
+ plt.setp(autotexts, size=8, weight="bold")
79
+
80
+ ax.set_title("Order Status Total")
81
+
82
+ my_circle = plt.Circle((0, 0), 0.7, color="white")
83
+ p = plt.gcf()
84
+ p.gca().add_artist(my_circle)
85
+
86
+ plt.show()
87
+
88
+
89
+ def plot_revenue_per_state(df: DataFrame) -> None:
90
+ """
91
+ Plot revenue per state
92
+
93
+ Args:
94
+ df (DataFrame): The dataframe
95
+ """
96
+ fig = px.treemap(
97
+ df, path=["customer_state"], values="Revenue", width=800, height=300
98
+ )
99
+ fig.update_layout(margin=dict(t=50, l=25, r=25, b=25))
100
+ fig.show()
101
+
102
+
103
+ def plot_top_10_least_revenue_categories(df: DataFrame) -> None:
104
+ """
105
+ Plot top 10 least revenue categories
106
+
107
+ Args:
108
+ df (DataFrame): The dataframe
109
+ """
110
+ _, ax = plt.subplots(figsize=(6, 3), subplot_kw=dict(aspect="equal"))
111
+
112
+ elements = [x.split()[-1] for x in df["Category"]]
113
+
114
+ revenue = df["Revenue"]
115
+ wedges, autotexts = ax.pie(revenue, textprops=dict(color="w"))
116
+
117
+ ax.legend(
118
+ wedges,
119
+ elements,
120
+ title="Top 10 Revenue Categories",
121
+ loc="center left",
122
+ bbox_to_anchor=(1, 0, 0.5, 1),
123
+ )
124
+
125
+ plt.setp(autotexts, size=8, weight="bold")
126
+ my_circle = plt.Circle((0, 0), 0.7, color="white")
127
+ p = plt.gcf()
128
+ p.gca().add_artist(my_circle)
129
+
130
+ ax.set_title("Top 10 Least Revenue Categories Amount")
131
+
132
+ plt.show()
133
+
134
+
135
+ def plot_top_10_revenue_categories_amount(df: DataFrame) -> None:
136
+ """Plot top 10 revenue categories
137
+
138
+ Args:
139
+ df (DataFrame): Dataframe with top 10 revenue categories query result
140
+ """
141
+ # Plotting the top 10 revenue categories amount
142
+ _, ax = plt.subplots(figsize=(6, 3), subplot_kw=dict(aspect="equal"))
143
+
144
+ elements = [x.split()[-1] for x in df["Category"]]
145
+
146
+ revenue = df["Revenue"]
147
+ wedges, autotexts = ax.pie(revenue, textprops=dict(color="w"))
148
+
149
+ ax.legend(
150
+ wedges,
151
+ elements,
152
+ title="Top 10 Revenue Categories",
153
+ loc="center left",
154
+ bbox_to_anchor=(1, 0, 0.5, 1),
155
+ )
156
+
157
+ plt.setp(autotexts, size=8, weight="bold")
158
+ my_circle = plt.Circle((0, 0), 0.7, color="white")
159
+ p = plt.gcf()
160
+ p.gca().add_artist(my_circle)
161
+
162
+ ax.set_title("Top 10 Revenue Categories Amount")
163
+
164
+ plt.show()
165
+
166
+
167
+ def plot_top_10_revenue_categories(df: DataFrame) -> None:
168
+ """Plot top 10 revenue categories
169
+
170
+ Args:
171
+ df (DataFrame): Dataframe with top 10 revenue categories query result
172
+ """
173
+ fig = px.treemap(df, path=["Category"], values="Num_order", width=800, height=400)
174
+ fig.update_layout(margin=dict(t=50, l=25, r=25, b=25))
175
+ fig.show()
176
+
177
+
178
+ def plot_freight_value_weight_relationship(df: DataFrame) -> None:
179
+ """Plot freight value weight relationship
180
+
181
+ Args:
182
+ df (DataFrame): Dataframe with freight value weight relationship query result
183
+ """
184
+ # Set the figure size
185
+ plt.figure(figsize=(8, 4))
186
+
187
+ # Scatter plot: x=product weight, y=freight value
188
+ sns.scatterplot(
189
+ data=df,
190
+ x="product_weight_g",
191
+ y="freight_value",
192
+ edgecolor="white",
193
+ )
194
+
195
+ # Customize chart
196
+ plt.title("Freight Value vs Product Weight")
197
+ plt.xlabel("Product Weight (g)")
198
+ plt.ylabel("Freight Value ($)")
199
+ plt.tight_layout()
200
+ plt.show()
201
+
202
+
203
+ def plot_delivery_date_difference(df: DataFrame) -> None:
204
+ """Plot delivery date difference
205
+
206
+ Args:
207
+ df (DataFrame): Dataframe with delivery date difference query result
208
+ """
209
+ plt.figure(figsize=(12, 6))
210
+ sns.barplot(data=df, x="Delivery_Difference", y="State").set(
211
+ title="Difference Between Delivery Estimate Date and Delivery Date"
212
+ )
213
+ plt.show()
214
+
215
+
216
+ def plot_order_amount_per_day_with_holidays(df: DataFrame) -> None:
217
+ """Plot order amount per day with holidays
218
+
219
+ Args:
220
+ df (DataFrame): Dataframe with order amount per day with holidays query result
221
+ """
222
+
223
+ # Convert timestamp in milliseconds to datetime
224
+ df["date"] = to_datetime(df["date"], unit="ms")
225
+
226
+ # Sort by date
227
+ df = df.sort_values("date")
228
+
229
+ # Plot the line chart for order count
230
+ plt.figure(figsize=(9, 4))
231
+ plt.plot(df["date"], df["order_count"], color="green")
232
+
233
+ # Add vertical lines for holidays
234
+ holidays = df[df["holiday"] == True]
235
+ for holiday_date in holidays["date"]:
236
+ plt.axvline(holiday_date, color="blue", linestyle="dotted", alpha=0.6)
237
+
238
+ # Customize chart
239
+ plt.title("Order Amount per Day with Holidays")
240
+ plt.xlabel("Date")
241
+ plt.ylabel("Order Count")
242
+ plt.tight_layout()
243
+ plt.show()