iBrokeTheCode commited on
Commit
4523429
·
1 Parent(s): c1893f5

refactor: Add theme and improve plots

Browse files
Files changed (4) hide show
  1. app.py +81 -88
  2. src/plots.py +241 -131
  3. src/utils/__init__.py +0 -0
  4. src/utils/theme.py +21 -0
app.py CHANGED
@@ -79,27 +79,26 @@ def _(mo):
79
  def _():
80
  # 📌 IMPORT LIBRARIES AND PACKAGES
81
 
82
- from pandas import DataFrame
83
  from pathlib import Path
 
 
84
  from sqlalchemy import create_engine
85
 
86
  from src import config
87
  from src.extract import extract
88
  from src.load import load
89
- from src.transform import QueryEnum, run_queries
90
-
91
  from src.plots import (
92
- plot_revenue_by_month_year,
93
- plot_real_vs_predicted_delivered_time,
94
  plot_global_amount_order_status,
 
 
 
95
  plot_revenue_per_state,
96
  plot_top_10_least_revenue_categories,
97
- plot_top_10_revenue_categories_amount,
98
  plot_top_10_revenue_categories,
99
- plot_freight_value_weight_relationship,
100
- plot_delivery_date_difference,
101
- plot_order_amount_per_day_with_holidays,
102
  )
 
103
  return (
104
  DataFrame,
105
  Path,
@@ -311,7 +310,7 @@ def _(mo):
311
 
312
  @app.cell
313
  def _(mo):
314
- mo.md(r"""# 📋 Tables""")
315
  return
316
 
317
 
@@ -321,63 +320,86 @@ def _(
321
  global_amount_order_status,
322
  mo,
323
  orders_per_day_and_holidays,
 
 
 
 
 
 
 
 
 
324
  real_vs_estimated_delivery_time,
325
  revenue_by_month_year,
326
  revenue_per_state,
327
  top_10_least_revenue_categories,
328
  top_10_revenue_categories,
329
  ):
330
- overview_table_tab = mo.vstack(
331
  align="center",
332
  justify="center",
333
  gap=2,
334
  items=[
335
  mo.center(mo.md("## Global Order Status Overview")),
336
- global_amount_order_status,
337
  ],
338
  )
339
- revenue_table_tab = mo.vstack(
 
340
  align="center",
341
  justify="center",
342
  gap=2,
343
  items=[
344
  mo.center(mo.md("## Revenue by Month and Year")),
345
- revenue_by_month_year,
346
  mo.center(mo.md("## Revenue by State")),
347
- revenue_per_state,
348
  ],
349
  )
350
- categories_table_tab = mo.vstack(
 
351
  align="center",
352
  justify="center",
353
  gap=2,
354
  items=[
355
  mo.center(mo.md("## Top 10 Revenue Categories")),
356
- top_10_revenue_categories,
 
 
357
  mo.center(mo.md("## Bottom 10 Revenue Categories")),
358
- top_10_least_revenue_categories,
359
  ],
360
  )
361
- delivery_table_tab = mo.vstack(
362
- align="center",
363
- justify="center",
364
  gap=2,
 
 
 
365
  items=[
366
- mo.center(mo.md("## Freight Value vs Product Weight")),
367
- freight_value_weight_relationship,
368
  mo.center(mo.md("## Real vs Estimated Delivery Time")),
369
- real_vs_estimated_delivery_time,
 
 
 
 
 
 
370
  mo.center(mo.md("## Orders and Holidays")),
371
- orders_per_day_and_holidays,
372
  ],
373
  )
 
 
374
 
 
 
375
  mo.ui.tabs(
376
  {
377
- "📊 Overview": overview_table_tab,
378
- "💰 Revenue": revenue_table_tab,
379
- "📦 Categories": categories_table_tab,
380
- "🚚 Freight & Delivery": delivery_table_tab,
381
  }
382
  )
383
  return
@@ -391,7 +413,17 @@ def _(mo):
391
 
392
  @app.cell
393
  def _(mo):
394
- mo.md(r"""# 📊 Charts""")
 
 
 
 
 
 
 
 
 
 
395
  return
396
 
397
 
@@ -401,106 +433,67 @@ def _(
401
  global_amount_order_status,
402
  mo,
403
  orders_per_day_and_holidays,
404
- plot_freight_value_weight_relationship,
405
- plot_global_amount_order_status,
406
- plot_order_amount_per_day_with_holidays,
407
- plot_real_vs_predicted_delivered_time,
408
- plot_revenue_by_month_year,
409
- plot_revenue_per_state,
410
- plot_top_10_least_revenue_categories,
411
- plot_top_10_revenue_categories,
412
- plot_top_10_revenue_categories_amount,
413
  real_vs_estimated_delivery_time,
414
  revenue_by_month_year,
415
  revenue_per_state,
416
  top_10_least_revenue_categories,
417
  top_10_revenue_categories,
418
  ):
419
- overview_tab = mo.vstack(
420
  align="center",
421
  justify="center",
422
  gap=2,
423
  items=[
424
  mo.center(mo.md("## Global Order Status Overview")),
425
- plot_global_amount_order_status(df=global_amount_order_status),
426
  ],
427
  )
428
-
429
- revenue_tab = mo.vstack(
430
  align="center",
431
  justify="center",
432
  gap=2,
433
  items=[
434
  mo.center(mo.md("## Revenue by Month and Year")),
435
- plot_revenue_by_month_year(df=revenue_by_month_year, year=2017),
436
  mo.center(mo.md("## Revenue by State")),
437
- plot_revenue_per_state(revenue_per_state),
438
  ],
439
  )
440
-
441
- categories_tab = mo.vstack(
442
  align="center",
443
  justify="center",
444
  gap=2,
445
  items=[
446
  mo.center(mo.md("## Top 10 Revenue Categories")),
447
- plot_top_10_revenue_categories(top_10_revenue_categories),
448
- mo.center(mo.md("## Top 10 Revenue Categories by Amount")),
449
- plot_top_10_revenue_categories_amount(top_10_revenue_categories),
450
  mo.center(mo.md("## Bottom 10 Revenue Categories")),
451
- plot_top_10_least_revenue_categories(top_10_least_revenue_categories),
452
  ],
453
  )
454
-
455
- delivery_tab = mo.vstack(
456
- gap=2,
457
- justify="center",
458
  align="center",
459
- heights="equal",
 
460
  items=[
461
- mo.center(mo.md("## Freight Value vs Product Weight")),
462
- plot_freight_value_weight_relationship(
463
- freight_value_weight_relationship
464
- ),
465
  mo.center(mo.md("## Real vs Estimated Delivery Time")),
466
- plot_real_vs_predicted_delivered_time(
467
- df=real_vs_estimated_delivery_time, year=2017
468
- ),
469
  mo.center(mo.md("## Orders and Holidays")),
470
- plot_order_amount_per_day_with_holidays(orders_per_day_and_holidays),
471
  ],
472
  )
473
- return categories_tab, delivery_tab, overview_tab, revenue_tab
474
-
475
 
476
- @app.cell
477
- def _(categories_tab, delivery_tab, mo, overview_tab, revenue_tab):
478
  mo.ui.tabs(
479
  {
480
- "📊 Overview": overview_tab,
481
- "💰 Revenue": revenue_tab,
482
- "📦 Categories": categories_tab,
483
- "🚚 Freight & Delivery": delivery_tab,
484
  }
485
  )
486
  return
487
 
488
 
489
- @app.cell
490
- def _(mo):
491
- mo.Html("<br><hr><br>")
492
- return
493
-
494
-
495
- @app.cell
496
- def _(mo):
497
- mo.center(
498
- mo.md(
499
- "**Connect with me:** 💼 [Linkedin](https://www.linkedin.com/in/alex-turpo/) • 🐱 [GitHub](https://github.com/iBrokeTheCode) • 🤗 [Hugging Face](https://huggingface.co/iBrokeTheCode)"
500
- )
501
- )
502
- return
503
-
504
-
505
  if __name__ == "__main__":
506
  app.run()
 
79
  def _():
80
  # 📌 IMPORT LIBRARIES AND PACKAGES
81
 
 
82
  from pathlib import Path
83
+
84
+ from pandas import DataFrame
85
  from sqlalchemy import create_engine
86
 
87
  from src import config
88
  from src.extract import extract
89
  from src.load import load
 
 
90
  from src.plots import (
91
+ plot_freight_value_weight_relationship,
 
92
  plot_global_amount_order_status,
93
+ plot_order_amount_per_day_with_holidays,
94
+ plot_real_vs_predicted_delivered_time,
95
+ plot_revenue_by_month_year,
96
  plot_revenue_per_state,
97
  plot_top_10_least_revenue_categories,
 
98
  plot_top_10_revenue_categories,
99
+ plot_top_10_revenue_categories_amount,
 
 
100
  )
101
+ from src.transform import QueryEnum, run_queries
102
  return (
103
  DataFrame,
104
  Path,
 
310
 
311
  @app.cell
312
  def _(mo):
313
+ mo.md(r"""# 📊 Charts""")
314
  return
315
 
316
 
 
320
  global_amount_order_status,
321
  mo,
322
  orders_per_day_and_holidays,
323
+ plot_freight_value_weight_relationship,
324
+ plot_global_amount_order_status,
325
+ plot_order_amount_per_day_with_holidays,
326
+ plot_real_vs_predicted_delivered_time,
327
+ plot_revenue_by_month_year,
328
+ plot_revenue_per_state,
329
+ plot_top_10_least_revenue_categories,
330
+ plot_top_10_revenue_categories,
331
+ plot_top_10_revenue_categories_amount,
332
  real_vs_estimated_delivery_time,
333
  revenue_by_month_year,
334
  revenue_per_state,
335
  top_10_least_revenue_categories,
336
  top_10_revenue_categories,
337
  ):
338
+ overview_tab = mo.vstack(
339
  align="center",
340
  justify="center",
341
  gap=2,
342
  items=[
343
  mo.center(mo.md("## Global Order Status Overview")),
344
+ plot_global_amount_order_status(df=global_amount_order_status),
345
  ],
346
  )
347
+
348
+ revenue_tab = mo.vstack(
349
  align="center",
350
  justify="center",
351
  gap=2,
352
  items=[
353
  mo.center(mo.md("## Revenue by Month and Year")),
354
+ plot_revenue_by_month_year(df=revenue_by_month_year, year=2017),
355
  mo.center(mo.md("## Revenue by State")),
356
+ plot_revenue_per_state(revenue_per_state),
357
  ],
358
  )
359
+
360
+ categories_tab = mo.vstack(
361
  align="center",
362
  justify="center",
363
  gap=2,
364
  items=[
365
  mo.center(mo.md("## Top 10 Revenue Categories")),
366
+ plot_top_10_revenue_categories(top_10_revenue_categories),
367
+ mo.center(mo.md("## Top 10 Revenue Categories by Amount")),
368
+ plot_top_10_revenue_categories_amount(top_10_revenue_categories),
369
  mo.center(mo.md("## Bottom 10 Revenue Categories")),
370
+ plot_top_10_least_revenue_categories(top_10_least_revenue_categories),
371
  ],
372
  )
373
+
374
+ delivery_tab = mo.vstack(
 
375
  gap=2,
376
+ justify="center",
377
+ align="center",
378
+ heights="equal",
379
  items=[
 
 
380
  mo.center(mo.md("## Real vs Estimated Delivery Time")),
381
+ plot_real_vs_predicted_delivered_time(
382
+ df=real_vs_estimated_delivery_time, year=2017
383
+ ),
384
+ mo.center(mo.md("## Freight Value vs Product Weight")),
385
+ plot_freight_value_weight_relationship(
386
+ freight_value_weight_relationship
387
+ ),
388
  mo.center(mo.md("## Orders and Holidays")),
389
+ plot_order_amount_per_day_with_holidays(orders_per_day_and_holidays),
390
  ],
391
  )
392
+ return categories_tab, delivery_tab, overview_tab, revenue_tab
393
+
394
 
395
+ @app.cell
396
+ def _(categories_tab, delivery_tab, mo, overview_tab, revenue_tab):
397
  mo.ui.tabs(
398
  {
399
+ "📊 Overview": overview_tab,
400
+ "💰 Revenue": revenue_tab,
401
+ "📦 Categories": categories_tab,
402
+ "🚚 Freight & Delivery": delivery_tab,
403
  }
404
  )
405
  return
 
413
 
414
  @app.cell
415
  def _(mo):
416
+ mo.center(
417
+ mo.md(
418
+ "**Connect with me:** 💼 [Linkedin](https://www.linkedin.com/in/alex-turpo/) • 🐱 [GitHub](https://github.com/iBrokeTheCode) • 🤗 [Hugging Face](https://huggingface.co/iBrokeTheCode)"
419
+ )
420
+ )
421
+ return
422
+
423
+
424
+ @app.cell
425
+ def _(mo):
426
+ mo.md(r"""# 📋 Tables""")
427
  return
428
 
429
 
 
433
  global_amount_order_status,
434
  mo,
435
  orders_per_day_and_holidays,
 
 
 
 
 
 
 
 
 
436
  real_vs_estimated_delivery_time,
437
  revenue_by_month_year,
438
  revenue_per_state,
439
  top_10_least_revenue_categories,
440
  top_10_revenue_categories,
441
  ):
442
+ overview_table_tab = mo.vstack(
443
  align="center",
444
  justify="center",
445
  gap=2,
446
  items=[
447
  mo.center(mo.md("## Global Order Status Overview")),
448
+ global_amount_order_status,
449
  ],
450
  )
451
+ revenue_table_tab = mo.vstack(
 
452
  align="center",
453
  justify="center",
454
  gap=2,
455
  items=[
456
  mo.center(mo.md("## Revenue by Month and Year")),
457
+ revenue_by_month_year,
458
  mo.center(mo.md("## Revenue by State")),
459
+ revenue_per_state,
460
  ],
461
  )
462
+ categories_table_tab = mo.vstack(
 
463
  align="center",
464
  justify="center",
465
  gap=2,
466
  items=[
467
  mo.center(mo.md("## Top 10 Revenue Categories")),
468
+ top_10_revenue_categories,
 
 
469
  mo.center(mo.md("## Bottom 10 Revenue Categories")),
470
+ top_10_least_revenue_categories,
471
  ],
472
  )
473
+ delivery_table_tab = mo.vstack(
 
 
 
474
  align="center",
475
+ justify="center",
476
+ gap=2,
477
  items=[
 
 
 
 
478
  mo.center(mo.md("## Real vs Estimated Delivery Time")),
479
+ real_vs_estimated_delivery_time,
480
+ mo.center(mo.md("## Freight Value vs Product Weight")),
481
+ freight_value_weight_relationship,
482
  mo.center(mo.md("## Orders and Holidays")),
483
+ orders_per_day_and_holidays,
484
  ],
485
  )
 
 
486
 
 
 
487
  mo.ui.tabs(
488
  {
489
+ "📊 Overview": overview_table_tab,
490
+ "💰 Revenue": revenue_table_tab,
491
+ "📦 Categories": categories_table_tab,
492
+ "🚚 Freight & Delivery": delivery_table_tab,
493
  }
494
  )
495
  return
496
 
497
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
498
  if __name__ == "__main__":
499
  app.run()
src/plots.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import matplotlib.pyplot as plt
2
  import plotly.express as px
3
  import plotly.graph_objects as go
@@ -6,135 +7,190 @@ from matplotlib import rc_file_defaults
6
  from matplotlib.figure import Figure
7
  from pandas import DataFrame, to_datetime
8
 
 
 
9
 
10
  def plot_revenue_by_month_year(df: DataFrame, year: int) -> Figure:
11
  """
12
- Generate and return a matplotlib figure showing monthly revenue for a given year.
 
 
13
 
14
- Designed to be used in interactive environments like Marimo, where the figure
15
- will be rendered automatically when returned from a code cell.
16
 
17
- Args:
18
- df (DataFrame): DataFrame containing revenue data, with a column 'month'
19
- and a column named 'Year{year}' for the selected year.
20
- year (int): The year to visualize (e.g., 2018).
21
-
22
- Returns:
23
- Figure: A matplotlib figure object with a line and bar chart overlay.
24
- """
25
- rc_file_defaults()
26
- sns.set_style(style=None, rc=None)
27
 
28
  fig, ax1 = plt.subplots(figsize=(12, 4))
29
 
30
- sns.lineplot(data=df[f"Year{year}"], marker="o", sort=False, ax=ax1)
 
 
 
 
 
 
 
 
 
 
31
  ax2 = ax1.twinx()
32
- sns.barplot(data=df, x="month", y=f"Year{year}", alpha=0.5, ax=ax2)
 
 
 
 
 
 
 
33
 
34
- ax1.set_title(f"Revenue by month in {year}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
 
36
  return fig
37
 
38
 
39
  def plot_real_vs_predicted_delivered_time(df: DataFrame, year: int) -> Figure:
40
  """
41
- Generate and return a matplotlib figure comparing real vs. estimated delivery time
42
- by month for a specific year.
43
-
44
- Intended for interactive environments like Marimo where returning the figure
45
- automatically renders the plot.
46
-
47
- Args:
48
- df (DataFrame): DataFrame with columns:
49
- - 'month': Month names or numbers.
50
- - f'Year{year}_real_time': Real average delivery time.
51
- - f'Year{year}_estimated_time': Estimated average delivery time.
52
- year (int): The year to visualize (e.g., 2018).
53
-
54
- Returns:
55
- Figure: A matplotlib figure with two overlaid line plots.
56
  """
57
  rc_file_defaults()
58
- sns.set_style(style=None, rc=None)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
 
60
- fig, ax1 = plt.subplots(figsize=(12, 4))
 
 
 
 
61
 
62
- sns.lineplot(data=df[f"Year{year}_real_time"], marker="o", sort=False, ax=ax1)
63
- sns.lineplot(data=df[f"Year{year}_estimated_time"], marker="o", sort=False, ax=ax1)
64
 
65
- ax1.set_xticks(range(len(df)))
66
- ax1.set_xticklabels(df["month"].values)
67
- ax1.set_xlabel("Month")
68
- ax1.set_ylabel("Average Days to Deliver")
69
- ax1.set_title(f"Average Delivery Time (Real vs Estimated) in {year}")
70
- ax1.legend(["Real Time", "Estimated Time"])
71
 
72
  return fig
73
 
74
 
75
- from matplotlib.figure import Figure
76
- from pandas import DataFrame
77
-
78
-
79
  def plot_global_amount_order_status(df: DataFrame) -> Figure:
80
  """
81
- Create and return a donut pie chart showing the global amount per order status.
82
 
83
  Args:
84
- df (DataFrame): DataFrame containing:
85
- - 'order_status': Status labels (e.g., 'order delivered').
86
- - 'Amount': Corresponding counts or totals per status.
87
 
88
  Returns:
89
- Figure: A matplotlib figure containing a pie (donut) chart with legend.
90
  """
91
- fig, ax = plt.subplots(figsize=(12, 4), subplot_kw=dict(aspect="equal"))
 
92
 
93
- # Extract last word of each status for cleaner labels
94
- elements = [x.split()[-1] for x in df["order_status"]]
 
95
 
96
- wedges, autotexts = ax.pie(df["Amount"], textprops=dict(color="w"))
97
 
98
- ax.legend(
99
- wedges,
100
- elements,
101
- title="Order Status",
102
- loc="center left",
103
- bbox_to_anchor=(1, 0, 0.5, 1),
104
  )
105
 
106
- plt.setp(autotexts, size=8, weight="bold")
107
- ax.set_title("Order Status Total")
108
-
109
- # Add donut center
110
- center_circle = plt.Circle((0, 0), 0.7, color="white")
111
- ax.add_artist(center_circle)
112
-
 
 
 
 
 
 
 
 
 
113
  return fig
114
 
115
 
116
  def plot_revenue_per_state(df: DataFrame) -> go.Figure:
117
  """
118
- Create a Plotly treemap to visualize revenue per customer state.
119
-
120
- Args:
121
- df (DataFrame): DataFrame with columns:
122
- - 'customer_state': State or region
123
- - 'Revenue': Revenue value per state
124
-
125
- Returns:
126
- go.Figure: A Plotly treemap figure object.
127
  """
128
  fig = px.treemap(
129
- df, path=["customer_state"], values="Revenue", width=800, height=300
 
 
 
 
 
 
130
  )
131
- fig.update_layout(margin=dict(t=50, l=25, r=25, b=25))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
132
  return fig
133
 
134
 
135
  def plot_top_10_least_revenue_categories(df: DataFrame) -> Figure:
136
  """
137
- Create a donut pie chart showing the top 10 least revenue categories.
138
 
139
  Args:
140
  df (DataFrame): DataFrame with columns:
@@ -142,35 +198,41 @@ def plot_top_10_least_revenue_categories(df: DataFrame) -> Figure:
142
  - 'Revenue': Corresponding revenue values
143
 
144
  Returns:
145
- Figure: A matplotlib figure with a donut chart and legend.
146
  """
147
- fig, ax = plt.subplots(figsize=(12, 4), subplot_kw=dict(aspect="equal"))
148
-
149
- elements = [x.split()[-1] for x in df["Category"]]
150
- revenue = df["Revenue"]
151
 
152
- wedges, autotexts = ax.pie(revenue, textprops=dict(color="w"))
 
 
153
 
154
- ax.legend(
155
- wedges,
156
- elements,
157
- title="Top 10 Revenue Categories",
158
- loc="center left",
159
- bbox_to_anchor=(1, 0, 0.5, 1),
160
  )
161
 
162
- plt.setp(autotexts, size=8, weight="bold")
163
- ax.set_title("Top 10 Least Revenue Categories Amount")
164
-
165
- center_circle = plt.Circle((0, 0), 0.7, color="white")
166
- ax.add_artist(center_circle)
167
-
 
 
 
 
 
 
 
 
 
 
168
  return fig
169
 
170
 
171
  def plot_top_10_revenue_categories_amount(df: DataFrame) -> Figure:
172
  """
173
- Create a donut pie chart showing the revenue distribution of the top 10 categories.
174
 
175
  Args:
176
  df (DataFrame): DataFrame with columns:
@@ -180,28 +242,32 @@ def plot_top_10_revenue_categories_amount(df: DataFrame) -> Figure:
180
  Returns:
181
  Figure: A matplotlib figure object.
182
  """
183
- fig, ax = plt.subplots(figsize=(12, 4), subplot_kw=dict(aspect="equal"))
184
-
185
- elements = [x.split()[-1] for x in df["Category"]]
186
- revenue = df["Revenue"]
187
 
188
- wedges, autotexts = ax.pie(revenue, textprops=dict(color="w"))
 
189
 
190
- ax.legend(
191
- wedges,
192
- elements,
193
- title="Top 10 Revenue Categories",
194
- loc="center left",
195
- bbox_to_anchor=(1, 0, 0.5, 1),
196
  )
197
 
198
- plt.setp(autotexts, size=8, weight="bold")
199
-
200
- ax.set_title("Top 10 Revenue Categories Amount")
201
-
202
- center_circle = plt.Circle((0, 0), 0.7, color="white")
203
- ax.add_artist(center_circle)
204
-
 
 
 
 
 
 
 
 
 
205
  return fig
206
 
207
 
@@ -217,8 +283,20 @@ def plot_top_10_revenue_categories(df: DataFrame) -> go.Figure:
217
  Returns:
218
  go.Figure: A Plotly treemap figure object.
219
  """
220
- fig = px.treemap(df, path=["Category"], values="Num_order", width=800, height=400)
221
- fig.update_layout(margin=dict(t=50, l=25, r=25, b=25))
 
 
 
 
 
 
 
 
 
 
 
 
222
  return fig
223
 
224
 
@@ -234,17 +312,25 @@ def plot_freight_value_weight_relationship(df: DataFrame) -> Figure:
234
  Returns:
235
  Figure: A matplotlib figure object.
236
  """
237
- fig, ax = plt.subplots(figsize=(8, 4))
 
238
 
239
  sns.scatterplot(
240
- data=df, x="product_weight_g", y="freight_value", edgecolor="white", ax=ax
 
 
 
 
 
 
 
241
  )
242
 
243
- ax.set_title("Freight Value vs Product Weight")
244
- ax.set_xlabel("Product Weight (g)")
245
  ax.set_ylabel("Freight Value ($)")
246
- fig.tight_layout()
247
 
 
248
  return fig
249
 
250
 
@@ -260,12 +346,21 @@ def plot_delivery_date_difference(df: DataFrame) -> Figure:
260
  Returns:
261
  Figure: A matplotlib figure object.
262
  """
 
263
  fig, ax = plt.subplots(figsize=(10, 6))
264
 
265
- sns.barplot(data=df, x="Delivery_Difference", y="State", ax=ax)
266
- ax.set_title("Difference Between Delivery Estimate Date and Delivery Date")
267
- ax.set_xlabel("Delivery Difference (days)")
 
 
 
 
 
 
 
268
  ax.set_ylabel("State")
 
269
 
270
  fig.tight_layout()
271
  return fig
@@ -284,19 +379,34 @@ def plot_order_amount_per_day_with_holidays(df: DataFrame) -> Figure:
284
  Returns:
285
  Figure: A matplotlib figure object.
286
  """
 
287
  df = df.copy()
288
  df["date"] = to_datetime(df["date"], unit="ms")
289
  df = df.sort_values("date")
290
 
291
- fig, ax = plt.subplots(figsize=(9, 4))
292
- ax.plot(df["date"], df["order_count"], color="green")
293
 
294
  for holiday_date in df[df["holiday"]]["date"]:
295
- ax.axvline(holiday_date, color="blue", linestyle="dotted", alpha=0.6)
 
 
 
 
 
 
296
 
297
- ax.set_title("Order Amount per Day with Holidays")
298
  ax.set_xlabel("Date")
299
  ax.set_ylabel("Order Count")
300
- fig.tight_layout()
301
 
 
 
 
 
 
 
 
 
 
 
302
  return fig
 
1
+ import matplotlib.dates as mdates
2
  import matplotlib.pyplot as plt
3
  import plotly.express as px
4
  import plotly.graph_objects as go
 
7
  from matplotlib.figure import Figure
8
  from pandas import DataFrame, to_datetime
9
 
10
+ from src.utils.theme import apply_custom_palette, custom_palette
11
+
12
 
13
  def plot_revenue_by_month_year(df: DataFrame, year: int) -> Figure:
14
  """
15
+ Generate a matplotlib figure showing monthly revenue for a given year,
16
+ using consistent color styling.
17
+ """
18
 
19
+ # Set the theme
20
+ apply_custom_palette()
21
 
22
+ # Clear any previous settings and set seaborn style
23
+ sns.set_style("whitegrid")
 
 
 
 
 
 
 
 
24
 
25
  fig, ax1 = plt.subplots(figsize=(12, 4))
26
 
27
+ # Line plot for revenue trend
28
+ sns.lineplot(
29
+ data=df[f"Year{year}"],
30
+ marker="o",
31
+ sort=False,
32
+ linewidth=2,
33
+ ax=ax1,
34
+ label=f"Line: Revenue {year}",
35
+ )
36
+
37
+ # Bar plot with light transparency
38
  ax2 = ax1.twinx()
39
+ sns.barplot(
40
+ data=df,
41
+ x="month",
42
+ y=f"Year{year}",
43
+ alpha=0.4,
44
+ ax=ax2,
45
+ label=f"Bar: Revenue {year}",
46
+ )
47
 
48
+ # Beautify axes
49
+ ax1.set_ylabel("Revenue")
50
+ ax1.set_xlabel("Month")
51
+ ax1.grid(True, linestyle="--", alpha=0.5)
52
+
53
+ # Optional: display value annotations on bars
54
+ for i, value in enumerate(df[f"Year{year}"]):
55
+ ax2.text(
56
+ i,
57
+ value + value * 0.02, # small offset above bar
58
+ f"{int(value):,}",
59
+ ha="center",
60
+ va="bottom",
61
+ fontsize=8,
62
+ color="black",
63
+ )
64
+
65
+ # Remove default plot title (you handle titles in Marimo)
66
+ ax1.set_title("")
67
+ fig.tight_layout()
68
 
69
  return fig
70
 
71
 
72
  def plot_real_vs_predicted_delivered_time(df: DataFrame, year: int) -> Figure:
73
  """
74
+ Create a line plot comparing real vs. estimated delivery time
75
+ by month for a given year.
 
 
 
 
 
 
 
 
 
 
 
 
 
76
  """
77
  rc_file_defaults()
78
+ sns.set_style("whitegrid") # Use light grid for clarity
79
+
80
+ fig, ax = plt.subplots(figsize=(12, 4))
81
+
82
+ # Plot each line with explicit color and label
83
+ sns.lineplot(
84
+ x=df["month"],
85
+ y=df[f"Year{year}_real_time"],
86
+ marker="o",
87
+ label="Real Time",
88
+ color=custom_palette[0],
89
+ ax=ax,
90
+ )
91
+ sns.lineplot(
92
+ x=df["month"],
93
+ y=df[f"Year{year}_estimated_time"],
94
+ marker="s",
95
+ label="Estimated Time",
96
+ color=custom_palette[1],
97
+ ax=ax,
98
+ )
99
 
100
+ # Axis labeling and ticks
101
+ ax.set_xlabel("Month")
102
+ ax.set_ylabel("Average Days to Deliver")
103
+ ax.set_xticks(range(len(df)))
104
+ ax.set_xticklabels(df["month"].values, rotation=45)
105
 
106
+ # Legend configuration
107
+ ax.legend(title="", loc="upper right")
108
 
109
+ # Improve spacing
110
+ fig.tight_layout()
 
 
 
 
111
 
112
  return fig
113
 
114
 
 
 
 
 
115
  def plot_global_amount_order_status(df: DataFrame) -> Figure:
116
  """
117
+ Create a horizontal bar chart showing the global amount per order status.
118
 
119
  Args:
120
+ df (DataFrame): DataFrame with:
121
+ - 'order_status': Status labels (e.g., 'order delivered')
122
+ - 'Amount': Count or value per status
123
 
124
  Returns:
125
+ Figure: A matplotlib bar chart figure.
126
  """
127
+ rc_file_defaults()
128
+ fig, ax = plt.subplots(figsize=(10, 5))
129
 
130
+ df = df.copy()
131
+ df["short_status"] = df["order_status"].apply(lambda x: x.split()[-1].capitalize())
132
+ sorted_df = df.sort_values("Amount", ascending=True)
133
 
134
+ colors = custom_palette[: len(sorted_df)]
135
 
136
+ bars = ax.barh(
137
+ sorted_df["short_status"], sorted_df["Amount"], color=colors, edgecolor="black"
 
 
 
 
138
  )
139
 
140
+ # Add value labels
141
+ for bar in bars:
142
+ width = bar.get_width()
143
+ ax.text(
144
+ width + 50,
145
+ bar.get_y() + bar.get_height() / 2,
146
+ f"{int(width):,}",
147
+ va="center",
148
+ fontsize=9,
149
+ color="black",
150
+ )
151
+
152
+ ax.set_xlabel("Amount")
153
+ ax.set_ylabel("Order Status")
154
+ ax.grid(axis="x", linestyle="--", alpha=0.4)
155
+ fig.tight_layout()
156
  return fig
157
 
158
 
159
  def plot_revenue_per_state(df: DataFrame) -> go.Figure:
160
  """
161
+ Create a Plotly treemap to visualize revenue per customer state,
162
+ using a consistent custom color palette.
 
 
 
 
 
 
 
163
  """
164
  fig = px.treemap(
165
+ df,
166
+ path=["customer_state"],
167
+ values="Revenue",
168
+ color="customer_state", # Important to trigger color mapping
169
+ color_discrete_sequence=custom_palette,
170
+ width=800,
171
+ height=300,
172
  )
173
+
174
+ # Add label customization
175
+ fig.update_traces(
176
+ textinfo="label+percent entry+value", # show label, percentage, and raw value
177
+ textfont_size=14,
178
+ marker=dict(
179
+ line=dict(color="#FFFFFF", width=1)
180
+ ), # white borders between blocks
181
+ )
182
+
183
+ fig.update_layout(
184
+ margin=dict(t=20, l=20, r=20, b=20),
185
+ uniformtext=dict(minsize=12, mode="hide"),
186
+ )
187
+
188
  return fig
189
 
190
 
191
  def plot_top_10_least_revenue_categories(df: DataFrame) -> Figure:
192
  """
193
+ Create a horizontal bar chart showing the top 10 least revenue categories.
194
 
195
  Args:
196
  df (DataFrame): DataFrame with columns:
 
198
  - 'Revenue': Corresponding revenue values
199
 
200
  Returns:
201
+ Figure: A matplotlib figure with a horizontal bar chart.
202
  """
203
+ rc_file_defaults()
204
+ fig, ax = plt.subplots(figsize=(10, 6))
 
 
205
 
206
+ # Sort and plot
207
+ sorted_df = df.sort_values("Revenue", ascending=True)
208
+ colors = custom_palette[: len(sorted_df)]
209
 
210
+ bars = ax.barh(
211
+ sorted_df["Category"], sorted_df["Revenue"], color=colors, edgecolor="black"
 
 
 
 
212
  )
213
 
214
+ # Add value labels
215
+ for bar in bars:
216
+ width = bar.get_width()
217
+ ax.text(
218
+ width + 100, # shift label to the right of the bar
219
+ bar.get_y() + bar.get_height() / 2,
220
+ f"${int(width):,}",
221
+ va="center",
222
+ fontsize=9,
223
+ color="black",
224
+ )
225
+
226
+ ax.set_xlabel("Revenue")
227
+ ax.set_ylabel("Category")
228
+ ax.grid(axis="x", linestyle="--", alpha=0.4)
229
+ fig.tight_layout()
230
  return fig
231
 
232
 
233
  def plot_top_10_revenue_categories_amount(df: DataFrame) -> Figure:
234
  """
235
+ Create a horizontal bar chart showing the revenue of the top 10 categories.
236
 
237
  Args:
238
  df (DataFrame): DataFrame with columns:
 
242
  Returns:
243
  Figure: A matplotlib figure object.
244
  """
245
+ rc_file_defaults()
246
+ fig, ax = plt.subplots(figsize=(10, 6))
 
 
247
 
248
+ sorted_df = df.sort_values("Revenue", ascending=True)
249
+ colors = custom_palette[: len(sorted_df)]
250
 
251
+ bars = ax.barh(
252
+ sorted_df["Category"], sorted_df["Revenue"], color=colors, edgecolor="black"
 
 
 
 
253
  )
254
 
255
+ # Add value labels on the right
256
+ for bar in bars:
257
+ width = bar.get_width()
258
+ ax.text(
259
+ width + 100,
260
+ bar.get_y() + bar.get_height() / 2,
261
+ f"${int(width):,}",
262
+ va="center",
263
+ fontsize=9,
264
+ color="black",
265
+ )
266
+
267
+ ax.set_xlabel("Revenue")
268
+ ax.set_ylabel("Category")
269
+ ax.grid(axis="x", linestyle="--", alpha=0.4)
270
+ fig.tight_layout()
271
  return fig
272
 
273
 
 
283
  Returns:
284
  go.Figure: A Plotly treemap figure object.
285
  """
286
+ fig = px.treemap(
287
+ df,
288
+ path=["Category"],
289
+ values="Num_order",
290
+ color="Num_order",
291
+ color_continuous_scale=custom_palette, # Optional for consistency
292
+ hover_data={"Num_order": ":,"}, # Adds commas to values
293
+ width=800,
294
+ height=400,
295
+ )
296
+ fig.update_layout(
297
+ margin=dict(t=40, l=30, r=30, b=30),
298
+ coloraxis_showscale=False, # Optional: hides legend bar
299
+ )
300
  return fig
301
 
302
 
 
312
  Returns:
313
  Figure: A matplotlib figure object.
314
  """
315
+ rc_file_defaults()
316
+ fig, ax = plt.subplots(figsize=(10, 5))
317
 
318
  sns.scatterplot(
319
+ data=df,
320
+ x="product_weight_g",
321
+ y="freight_value",
322
+ color=custom_palette[2],
323
+ edgecolor="white",
324
+ alpha=0.7,
325
+ s=50,
326
+ ax=ax,
327
  )
328
 
329
+ ax.set_xlabel("Product Weight (grams)")
 
330
  ax.set_ylabel("Freight Value ($)")
331
+ ax.grid(True, linestyle="--", alpha=0.5)
332
 
333
+ fig.tight_layout()
334
  return fig
335
 
336
 
 
346
  Returns:
347
  Figure: A matplotlib figure object.
348
  """
349
+ rc_file_defaults()
350
  fig, ax = plt.subplots(figsize=(10, 6))
351
 
352
+ sns.barplot(
353
+ data=df, x="Delivery_Difference", y="State", color=custom_palette[0], ax=ax
354
+ )
355
+
356
+ ax.set_title(
357
+ "Difference Between Estimated and Actual Delivery Dates by State",
358
+ fontsize=12,
359
+ weight="bold",
360
+ )
361
+ ax.set_xlabel("Delivery Difference (Days)")
362
  ax.set_ylabel("State")
363
+ ax.grid(True, linestyle="--", alpha=0.4, axis="x")
364
 
365
  fig.tight_layout()
366
  return fig
 
379
  Returns:
380
  Figure: A matplotlib figure object.
381
  """
382
+ rc_file_defaults()
383
  df = df.copy()
384
  df["date"] = to_datetime(df["date"], unit="ms")
385
  df = df.sort_values("date")
386
 
387
+ fig, ax = plt.subplots(figsize=(12, 4))
388
+ ax.plot(df["date"], df["order_count"], color=custom_palette[2], label="Order Count")
389
 
390
  for holiday_date in df[df["holiday"]]["date"]:
391
+ ax.axvline(
392
+ holiday_date,
393
+ color=custom_palette[3],
394
+ linestyle="--",
395
+ alpha=0.4,
396
+ label="Holiday",
397
+ )
398
 
 
399
  ax.set_xlabel("Date")
400
  ax.set_ylabel("Order Count")
 
401
 
402
+ ax.xaxis.set_major_locator(mdates.MonthLocator())
403
+ ax.xaxis.set_major_formatter(mdates.DateFormatter("%b %Y"))
404
+ ax.tick_params(axis="x", rotation=45)
405
+
406
+ ax.grid(True, linestyle="--", alpha=0.5)
407
+ handles, labels = ax.get_legend_handles_labels()
408
+ by_label = dict(zip(labels, handles)) # avoid duplicate "Holiday" entries
409
+ ax.legend(by_label.values(), by_label.keys(), loc="upper left")
410
+
411
+ fig.tight_layout()
412
  return fig
src/utils/__init__.py ADDED
File without changes
src/utils/theme.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ custom_palette = [
2
+ "#636EFA", # Blue
3
+ "#EF553B", # Red
4
+ "#00CC96", # Green
5
+ "#AB63FA", # Purple
6
+ "#FFA15A", # Orange
7
+ "#19D3F3", # Cyan
8
+ "#FF6692", # Pink
9
+ "#B6E880", # Light Green
10
+ "#FF97FF", # Magenta
11
+ "#FECB52", # Yellow
12
+ ]
13
+
14
+
15
+ def apply_custom_palette():
16
+ import matplotlib.pyplot as plt
17
+ import seaborn as sns
18
+
19
+ # Set globally for matplotlib and seaborn
20
+ plt.rcParams["axes.prop_cycle"] = plt.cycler(color=custom_palette)
21
+ sns.set_palette(custom_palette)