iBrokeTheCode commited on
Commit
9894e45
Β·
1 Parent(s): 1412774

refactor: Return figure instead of using plt.plot()

Browse files
Files changed (4) hide show
  1. app.py +2 -24
  2. src/plots.py +15 -7
  3. tutorial_app.ipynb +78 -114
  4. tutorial_app.py +0 -419
app.py CHANGED
@@ -20,31 +20,9 @@ def _(mo):
20
  def _(mo):
21
  mo.md(
22
  r"""
23
- πŸ“˜ Want a step-by-step walkthrough instead?
24
 
25
- You can check the tutorial version of this app here: πŸ‘‰ [Tutorial app](https://huggingface.co/spaces/iBrokeTheCode/E-Commerce_ELT/blob/main/tutorial_app.py)
26
-
27
- Or the Jupyter notebook version here: πŸ‘‰ [Jupyter version](https://huggingface.co/spaces/iBrokeTheCode/E-Commerce_ELT/blob/main/tutorial_app.ipynb)
28
- """
29
- )
30
- return
31
-
32
-
33
- @app.cell
34
- def _(mo):
35
- mo.md(r"""## Table of Contents""")
36
- return
37
-
38
-
39
- @app.cell
40
- def _(mo):
41
- mo.md(
42
- r"""
43
- - [Description](#1-description)
44
- - [ETL](#2-etl)
45
- - [Extract & Load](#21-extract-and-load)
46
- - [Transform](#22-transform)
47
- - [Plots](#3-plots)
48
  """
49
  )
50
  return
 
20
  def _(mo):
21
  mo.md(
22
  r"""
23
+ πŸ’‘ Want a step-by-step walkthrough instead?
24
 
25
+ You can check the Jupyter notebook version here: πŸ‘‰ [Jupyter version](https://huggingface.co/spaces/iBrokeTheCode/E-Commerce_ELT/blob/main/tutorial_app.ipynb)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  """
27
  )
28
  return
src/plots.py CHANGED
@@ -2,29 +2,37 @@ import matplotlib.pyplot as plt
2
  import plotly.express as px
3
  import seaborn as sns
4
  from matplotlib import rc_file_defaults
 
5
  from pandas import DataFrame, to_datetime
6
 
7
 
8
- def plot_revenue_by_month_year(df: DataFrame, year: int) -> None:
9
  """
10
- Plot the revenue by month and year
 
 
 
11
 
12
  Args:
13
- df (DataFrame): The dataframe
14
- year (int): The year
 
 
 
 
15
  """
16
  rc_file_defaults()
17
  sns.set_style(style="darkgrid", rc=None)
18
 
19
- _, ax1 = plt.subplots(figsize=(12, 6))
20
 
21
  sns.lineplot(data=df[f"Year{year}"], marker="o", sort=False, ax=ax1)
22
  ax2 = ax1.twinx()
23
-
24
  sns.barplot(data=df, x="month", y=f"Year{year}", alpha=0.5, ax=ax2)
 
25
  ax1.set_title(f"Revenue by month in {year}")
26
 
27
- plt.show()
28
 
29
 
30
  def plot_real_vs_predicted_delivered_time(df: DataFrame, year: int) -> None:
 
2
  import plotly.express as px
3
  import seaborn as sns
4
  from matplotlib import rc_file_defaults
5
+ from matplotlib.figure import Figure
6
  from pandas import DataFrame, to_datetime
7
 
8
 
9
+ def plot_revenue_by_month_year(df: DataFrame, year: int) -> Figure:
10
  """
11
+ Generate and return a matplotlib figure showing monthly revenue for a given year.
12
+
13
+ Designed to be used in interactive environments like Marimo, where the figure
14
+ will be rendered automatically when returned from a code cell.
15
 
16
  Args:
17
+ df (DataFrame): DataFrame containing revenue data, with a column 'month'
18
+ and a column named 'Year{year}' for the selected year.
19
+ year (int): The year to visualize (e.g., 2018).
20
+
21
+ Returns:
22
+ Figure: A matplotlib figure object with a line and bar chart overlay.
23
  """
24
  rc_file_defaults()
25
  sns.set_style(style="darkgrid", rc=None)
26
 
27
+ fig, ax1 = plt.subplots(figsize=(12, 6))
28
 
29
  sns.lineplot(data=df[f"Year{year}"], marker="o", sort=False, ax=ax1)
30
  ax2 = ax1.twinx()
 
31
  sns.barplot(data=df, x="month", y=f"Year{year}", alpha=0.5, ax=ax2)
32
+
33
  ax1.set_title(f"Revenue by month in {year}")
34
 
35
+ return fig
36
 
37
 
38
  def plot_real_vs_predicted_delivered_time(df: DataFrame, year: int) -> None:
tutorial_app.ipynb CHANGED
@@ -13,44 +13,22 @@
13
  "id": "vblA",
14
  "metadata": {},
15
  "source": [
16
- "πŸ“˜ Want a step-by-step walkthrough instead?\n",
17
  "\n",
18
- "You can check the tutorial version of this app here: πŸ‘‰ [Tutorial app](https://huggingface.co/spaces/iBrokeTheCode/E-Commerce_ELT/blob/main/tutorial_app.py)\n",
19
- "\n",
20
- "Or the Jupyter notebook version here: πŸ‘‰ [Jupyter version](https://huggingface.co/spaces/iBrokeTheCode/E-Commerce_ELT/blob/main/tutorial_app.ipynb)\n"
21
  ]
22
  },
23
  {
24
  "cell_type": "markdown",
25
  "id": "bkHC",
26
  "metadata": {},
27
- "source": [
28
- "## Table of Contents\n"
29
- ]
30
- },
31
- {
32
- "cell_type": "markdown",
33
- "id": "lEQa",
34
- "metadata": {},
35
- "source": [
36
- "- [Description](#1-description)\n",
37
- "- [ETL](#2-etl)\n",
38
- " - [Extract & Load](#21-extract-and-load)\n",
39
- " - [Transform](#22-transform)\n",
40
- "- [Plots](#3-plots)\n"
41
- ]
42
- },
43
- {
44
- "cell_type": "markdown",
45
- "id": "PKri",
46
- "metadata": {},
47
  "source": [
48
  "## 1. Description\n"
49
  ]
50
  },
51
  {
52
  "cell_type": "markdown",
53
- "id": "Xref",
54
  "metadata": {},
55
  "source": [
56
  "This project analyzes e-commerce data from a Brazilian marketplace to explore key business metrics related to **revenue** and **delivery performance**. Using an interactive Marimo application, the analysis provides insights into:\n",
@@ -63,7 +41,7 @@
63
  },
64
  {
65
  "cell_type": "markdown",
66
- "id": "SFPL",
67
  "metadata": {},
68
  "source": [
69
  "## 2. ETL\n"
@@ -71,8 +49,8 @@
71
  },
72
  {
73
  "cell_type": "code",
74
- "execution_count": 3,
75
- "id": "BYtC",
76
  "metadata": {},
77
  "outputs": [],
78
  "source": [
@@ -88,7 +66,7 @@
88
  },
89
  {
90
  "cell_type": "markdown",
91
- "id": "RGSE",
92
  "metadata": {},
93
  "source": [
94
  "### 2.1 Extract and Load\n"
@@ -96,8 +74,8 @@
96
  },
97
  {
98
  "cell_type": "code",
99
- "execution_count": 4,
100
- "id": "Kclp",
101
  "metadata": {},
102
  "outputs": [
103
  {
@@ -130,7 +108,7 @@
130
  },
131
  {
132
  "cell_type": "markdown",
133
- "id": "emfo",
134
  "metadata": {},
135
  "source": [
136
  "### 2.2 Transform\n"
@@ -138,8 +116,8 @@
138
  },
139
  {
140
  "cell_type": "code",
141
- "execution_count": 5,
142
- "id": "Hstk",
143
  "metadata": {},
144
  "outputs": [],
145
  "source": [
@@ -148,7 +126,7 @@
148
  },
149
  {
150
  "cell_type": "markdown",
151
- "id": "nWHF",
152
  "metadata": {},
153
  "source": [
154
  "**A. Revenue by Month and Year**\n"
@@ -156,8 +134,8 @@
156
  },
157
  {
158
  "cell_type": "code",
159
- "execution_count": 6,
160
- "id": "iLit",
161
  "metadata": {},
162
  "outputs": [
163
  {
@@ -305,7 +283,7 @@
305
  "11 12 Dec 960.85 1082600.69 0.00"
306
  ]
307
  },
308
- "execution_count": 6,
309
  "metadata": {},
310
  "output_type": "execute_result"
311
  }
@@ -317,7 +295,7 @@
317
  },
318
  {
319
  "cell_type": "markdown",
320
- "id": "ZHCJ",
321
  "metadata": {},
322
  "source": [
323
  "**B. Top 10 Revenue by categories**\n"
@@ -326,7 +304,7 @@
326
  {
327
  "cell_type": "code",
328
  "execution_count": null,
329
- "id": "ROlb",
330
  "metadata": {},
331
  "outputs": [
332
  {
@@ -434,7 +412,7 @@
434
  "9 cool_stuff 3559 744649.32"
435
  ]
436
  },
437
- "execution_count": 7,
438
  "metadata": {},
439
  "output_type": "execute_result"
440
  }
@@ -446,7 +424,7 @@
446
  },
447
  {
448
  "cell_type": "markdown",
449
- "id": "qnkX",
450
  "metadata": {},
451
  "source": [
452
  "**C. Top 10 Least Revenue by Categories**\n"
@@ -454,8 +432,8 @@
454
  },
455
  {
456
  "cell_type": "code",
457
- "execution_count": 8,
458
- "id": "TqIu",
459
  "metadata": {},
460
  "outputs": [
461
  {
@@ -563,7 +541,7 @@
563
  "9 fashio_female_clothing 36 4902.67"
564
  ]
565
  },
566
- "execution_count": 8,
567
  "metadata": {},
568
  "output_type": "execute_result"
569
  }
@@ -577,7 +555,7 @@
577
  },
578
  {
579
  "cell_type": "markdown",
580
- "id": "Vxnm",
581
  "metadata": {},
582
  "source": [
583
  "**D. Revenue per State**\n"
@@ -585,8 +563,8 @@
585
  },
586
  {
587
  "cell_type": "code",
588
- "execution_count": 9,
589
- "id": "DnEU",
590
  "metadata": {},
591
  "outputs": [
592
  {
@@ -683,7 +661,7 @@
683
  "9 ES 317682.65"
684
  ]
685
  },
686
- "execution_count": 9,
687
  "metadata": {},
688
  "output_type": "execute_result"
689
  }
@@ -695,7 +673,7 @@
695
  },
696
  {
697
  "cell_type": "markdown",
698
- "id": "ulZA",
699
  "metadata": {},
700
  "source": [
701
  "**E. Delivery Date Difference**\n"
@@ -704,7 +682,7 @@
704
  {
705
  "cell_type": "code",
706
  "execution_count": null,
707
- "id": "ecfG",
708
  "metadata": {},
709
  "outputs": [
710
  {
@@ -903,7 +881,7 @@
903
  "26 RO 20"
904
  ]
905
  },
906
- "execution_count": 10,
907
  "metadata": {},
908
  "output_type": "execute_result"
909
  }
@@ -915,7 +893,7 @@
915
  },
916
  {
917
  "cell_type": "markdown",
918
- "id": "Pvdt",
919
  "metadata": {},
920
  "source": [
921
  "**F. Real vs. Predicted Delivered Time**\n"
@@ -923,8 +901,8 @@
923
  },
924
  {
925
  "cell_type": "code",
926
- "execution_count": 11,
927
- "id": "ZBYS",
928
  "metadata": {},
929
  "outputs": [
930
  {
@@ -1125,7 +1103,7 @@
1125
  "11 26.030012 27.681340 NaN "
1126
  ]
1127
  },
1128
- "execution_count": 11,
1129
  "metadata": {},
1130
  "output_type": "execute_result"
1131
  }
@@ -1139,7 +1117,7 @@
1139
  },
1140
  {
1141
  "cell_type": "markdown",
1142
- "id": "aLJB",
1143
  "metadata": {},
1144
  "source": [
1145
  "**G. Global Amount of Order Status**\n"
@@ -1148,7 +1126,7 @@
1148
  {
1149
  "cell_type": "code",
1150
  "execution_count": null,
1151
- "id": "nHfw",
1152
  "metadata": {},
1153
  "outputs": [
1154
  {
@@ -1233,7 +1211,7 @@
1233
  "7 unavailable 609"
1234
  ]
1235
  },
1236
- "execution_count": 12,
1237
  "metadata": {},
1238
  "output_type": "execute_result"
1239
  }
@@ -1245,7 +1223,7 @@
1245
  },
1246
  {
1247
  "cell_type": "markdown",
1248
- "id": "xXTn",
1249
  "metadata": {},
1250
  "source": [
1251
  "**H. Orders per Day and Holidays in 2017**\n"
@@ -1253,8 +1231,8 @@
1253
  },
1254
  {
1255
  "cell_type": "code",
1256
- "execution_count": 13,
1257
- "id": "AjVT",
1258
  "metadata": {},
1259
  "outputs": [
1260
  {
@@ -1372,7 +1350,7 @@
1372
  "[361 rows x 3 columns]"
1373
  ]
1374
  },
1375
- "execution_count": 13,
1376
  "metadata": {},
1377
  "output_type": "execute_result"
1378
  }
@@ -1386,7 +1364,7 @@
1386
  },
1387
  {
1388
  "cell_type": "markdown",
1389
- "id": "pHFh",
1390
  "metadata": {},
1391
  "source": [
1392
  "**I. Freight Value Weight Relationship**\n"
@@ -1394,8 +1372,8 @@
1394
  },
1395
  {
1396
  "cell_type": "code",
1397
- "execution_count": 14,
1398
- "id": "NCOB",
1399
  "metadata": {},
1400
  "outputs": [
1401
  {
@@ -1513,7 +1491,7 @@
1513
  "[96478 rows x 3 columns]"
1514
  ]
1515
  },
1516
- "execution_count": 14,
1517
  "metadata": {},
1518
  "output_type": "execute_result"
1519
  }
@@ -1527,7 +1505,7 @@
1527
  },
1528
  {
1529
  "cell_type": "markdown",
1530
- "id": "aqbW",
1531
  "metadata": {},
1532
  "source": [
1533
  "## 3. Plots\n"
@@ -1535,8 +1513,8 @@
1535
  },
1536
  {
1537
  "cell_type": "code",
1538
- "execution_count": 15,
1539
- "id": "TRpd",
1540
  "metadata": {},
1541
  "outputs": [],
1542
  "source": [
@@ -1556,7 +1534,7 @@
1556
  },
1557
  {
1558
  "cell_type": "markdown",
1559
- "id": "TXez",
1560
  "metadata": {},
1561
  "source": [
1562
  "**A. Revenue by Month in 2017**\n"
@@ -1564,8 +1542,8 @@
1564
  },
1565
  {
1566
  "cell_type": "code",
1567
- "execution_count": 16,
1568
- "id": "dNNg",
1569
  "metadata": {},
1570
  "outputs": [
1571
  {
@@ -1585,7 +1563,7 @@
1585
  },
1586
  {
1587
  "cell_type": "markdown",
1588
- "id": "yCnT",
1589
  "metadata": {},
1590
  "source": [
1591
  "**B. Real vs. Predicted Delivered Time**\n"
@@ -1594,7 +1572,7 @@
1594
  {
1595
  "cell_type": "code",
1596
  "execution_count": null,
1597
- "id": "wlCL",
1598
  "metadata": {},
1599
  "outputs": [
1600
  {
@@ -1614,7 +1592,7 @@
1614
  },
1615
  {
1616
  "cell_type": "markdown",
1617
- "id": "kqZH",
1618
  "metadata": {},
1619
  "source": [
1620
  "**C. Global Amount of Order Status**\n"
@@ -1622,8 +1600,8 @@
1622
  },
1623
  {
1624
  "cell_type": "code",
1625
- "execution_count": 18,
1626
- "id": "wAgl",
1627
  "metadata": {},
1628
  "outputs": [
1629
  {
@@ -1643,7 +1621,7 @@
1643
  },
1644
  {
1645
  "cell_type": "markdown",
1646
- "id": "rEll",
1647
  "metadata": {},
1648
  "source": [
1649
  "**D. Revenue per State**\n"
@@ -1651,8 +1629,8 @@
1651
  },
1652
  {
1653
  "cell_type": "code",
1654
- "execution_count": 19,
1655
- "id": "dGlV",
1656
  "metadata": {},
1657
  "outputs": [
1658
  {
@@ -2520,7 +2498,7 @@
2520
  },
2521
  {
2522
  "cell_type": "markdown",
2523
- "id": "SdmI",
2524
  "metadata": {},
2525
  "source": [
2526
  "**E. Top 10 Least Revenue by Categories**\n"
@@ -2528,8 +2506,8 @@
2528
  },
2529
  {
2530
  "cell_type": "code",
2531
- "execution_count": 20,
2532
- "id": "lgWD",
2533
  "metadata": {},
2534
  "outputs": [
2535
  {
@@ -2549,7 +2527,7 @@
2549
  },
2550
  {
2551
  "cell_type": "markdown",
2552
- "id": "yOPj",
2553
  "metadata": {},
2554
  "source": [
2555
  "**F. Top 10 Revenue Categories Amount**\n"
@@ -2557,8 +2535,8 @@
2557
  },
2558
  {
2559
  "cell_type": "code",
2560
- "execution_count": 21,
2561
- "id": "fwwy",
2562
  "metadata": {},
2563
  "outputs": [
2564
  {
@@ -2578,7 +2556,7 @@
2578
  },
2579
  {
2580
  "cell_type": "markdown",
2581
- "id": "LJZf",
2582
  "metadata": {},
2583
  "source": [
2584
  "**G. Top 10 Revenue by Categories**\n"
@@ -2586,8 +2564,8 @@
2586
  },
2587
  {
2588
  "cell_type": "code",
2589
- "execution_count": 22,
2590
- "id": "urSm",
2591
  "metadata": {},
2592
  "outputs": [
2593
  {
@@ -3455,7 +3433,7 @@
3455
  },
3456
  {
3457
  "cell_type": "markdown",
3458
- "id": "jxvo",
3459
  "metadata": {},
3460
  "source": [
3461
  "**H. Freight Value vs. Product Weight**\n"
@@ -3463,8 +3441,8 @@
3463
  },
3464
  {
3465
  "cell_type": "code",
3466
- "execution_count": 23,
3467
- "id": "mWxS",
3468
  "metadata": {},
3469
  "outputs": [
3470
  {
@@ -3484,7 +3462,7 @@
3484
  },
3485
  {
3486
  "cell_type": "markdown",
3487
- "id": "CcZR",
3488
  "metadata": {},
3489
  "source": [
3490
  "**I. Diffrence Between Deliver Estimated Date and Delivery Date**\n"
@@ -3492,8 +3470,8 @@
3492
  },
3493
  {
3494
  "cell_type": "code",
3495
- "execution_count": 24,
3496
- "id": "YWSi",
3497
  "metadata": {},
3498
  "outputs": [
3499
  {
@@ -3513,7 +3491,7 @@
3513
  },
3514
  {
3515
  "cell_type": "markdown",
3516
- "id": "zlud",
3517
  "metadata": {},
3518
  "source": [
3519
  "**J. Order Amount per Day with Holidays**\n"
@@ -3521,8 +3499,8 @@
3521
  },
3522
  {
3523
  "cell_type": "code",
3524
- "execution_count": 25,
3525
- "id": "tZnO",
3526
  "metadata": {},
3527
  "outputs": [
3528
  {
@@ -3542,22 +3520,8 @@
3542
  }
3543
  ],
3544
  "metadata": {
3545
- "kernelspec": {
3546
- "display_name": "E-Commerce_ELT",
3547
- "language": "python",
3548
- "name": "python3"
3549
- },
3550
  "language_info": {
3551
- "codemirror_mode": {
3552
- "name": "ipython",
3553
- "version": 3
3554
- },
3555
- "file_extension": ".py",
3556
- "mimetype": "text/x-python",
3557
- "name": "python",
3558
- "nbconvert_exporter": "python",
3559
- "pygments_lexer": "ipython3",
3560
- "version": "3.12.3"
3561
  }
3562
  },
3563
  "nbformat": 4,
 
13
  "id": "vblA",
14
  "metadata": {},
15
  "source": [
16
+ "πŸ’‘ Want a step-by-step walkthrough instead?\n",
17
  "\n",
18
+ "You can check the Jupyter notebook version here: πŸ‘‰ [Jupyter version](https://huggingface.co/spaces/iBrokeTheCode/E-Commerce_ELT/blob/main/tutorial_app.ipynb)\n"
 
 
19
  ]
20
  },
21
  {
22
  "cell_type": "markdown",
23
  "id": "bkHC",
24
  "metadata": {},
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  "source": [
26
  "## 1. Description\n"
27
  ]
28
  },
29
  {
30
  "cell_type": "markdown",
31
+ "id": "lEQa",
32
  "metadata": {},
33
  "source": [
34
  "This project analyzes e-commerce data from a Brazilian marketplace to explore key business metrics related to **revenue** and **delivery performance**. Using an interactive Marimo application, the analysis provides insights into:\n",
 
41
  },
42
  {
43
  "cell_type": "markdown",
44
+ "id": "PKri",
45
  "metadata": {},
46
  "source": [
47
  "## 2. ETL\n"
 
49
  },
50
  {
51
  "cell_type": "code",
52
+ "execution_count": 26,
53
+ "id": "Xref",
54
  "metadata": {},
55
  "outputs": [],
56
  "source": [
 
66
  },
67
  {
68
  "cell_type": "markdown",
69
+ "id": "SFPL",
70
  "metadata": {},
71
  "source": [
72
  "### 2.1 Extract and Load\n"
 
74
  },
75
  {
76
  "cell_type": "code",
77
+ "execution_count": 27,
78
+ "id": "BYtC",
79
  "metadata": {},
80
  "outputs": [
81
  {
 
108
  },
109
  {
110
  "cell_type": "markdown",
111
+ "id": "RGSE",
112
  "metadata": {},
113
  "source": [
114
  "### 2.2 Transform\n"
 
116
  },
117
  {
118
  "cell_type": "code",
119
+ "execution_count": 28,
120
+ "id": "Kclp",
121
  "metadata": {},
122
  "outputs": [],
123
  "source": [
 
126
  },
127
  {
128
  "cell_type": "markdown",
129
+ "id": "emfo",
130
  "metadata": {},
131
  "source": [
132
  "**A. Revenue by Month and Year**\n"
 
134
  },
135
  {
136
  "cell_type": "code",
137
+ "execution_count": 29,
138
+ "id": "Hstk",
139
  "metadata": {},
140
  "outputs": [
141
  {
 
283
  "11 12 Dec 960.85 1082600.69 0.00"
284
  ]
285
  },
286
+ "execution_count": 29,
287
  "metadata": {},
288
  "output_type": "execute_result"
289
  }
 
295
  },
296
  {
297
  "cell_type": "markdown",
298
+ "id": "nWHF",
299
  "metadata": {},
300
  "source": [
301
  "**B. Top 10 Revenue by categories**\n"
 
304
  {
305
  "cell_type": "code",
306
  "execution_count": null,
307
+ "id": "iLit",
308
  "metadata": {},
309
  "outputs": [
310
  {
 
412
  "9 cool_stuff 3559 744649.32"
413
  ]
414
  },
415
+ "execution_count": 30,
416
  "metadata": {},
417
  "output_type": "execute_result"
418
  }
 
424
  },
425
  {
426
  "cell_type": "markdown",
427
+ "id": "ZHCJ",
428
  "metadata": {},
429
  "source": [
430
  "**C. Top 10 Least Revenue by Categories**\n"
 
432
  },
433
  {
434
  "cell_type": "code",
435
+ "execution_count": 31,
436
+ "id": "ROlb",
437
  "metadata": {},
438
  "outputs": [
439
  {
 
541
  "9 fashio_female_clothing 36 4902.67"
542
  ]
543
  },
544
+ "execution_count": 31,
545
  "metadata": {},
546
  "output_type": "execute_result"
547
  }
 
555
  },
556
  {
557
  "cell_type": "markdown",
558
+ "id": "qnkX",
559
  "metadata": {},
560
  "source": [
561
  "**D. Revenue per State**\n"
 
563
  },
564
  {
565
  "cell_type": "code",
566
+ "execution_count": 32,
567
+ "id": "TqIu",
568
  "metadata": {},
569
  "outputs": [
570
  {
 
661
  "9 ES 317682.65"
662
  ]
663
  },
664
+ "execution_count": 32,
665
  "metadata": {},
666
  "output_type": "execute_result"
667
  }
 
673
  },
674
  {
675
  "cell_type": "markdown",
676
+ "id": "Vxnm",
677
  "metadata": {},
678
  "source": [
679
  "**E. Delivery Date Difference**\n"
 
682
  {
683
  "cell_type": "code",
684
  "execution_count": null,
685
+ "id": "DnEU",
686
  "metadata": {},
687
  "outputs": [
688
  {
 
881
  "26 RO 20"
882
  ]
883
  },
884
+ "execution_count": 33,
885
  "metadata": {},
886
  "output_type": "execute_result"
887
  }
 
893
  },
894
  {
895
  "cell_type": "markdown",
896
+ "id": "ulZA",
897
  "metadata": {},
898
  "source": [
899
  "**F. Real vs. Predicted Delivered Time**\n"
 
901
  },
902
  {
903
  "cell_type": "code",
904
+ "execution_count": 34,
905
+ "id": "ecfG",
906
  "metadata": {},
907
  "outputs": [
908
  {
 
1103
  "11 26.030012 27.681340 NaN "
1104
  ]
1105
  },
1106
+ "execution_count": 34,
1107
  "metadata": {},
1108
  "output_type": "execute_result"
1109
  }
 
1117
  },
1118
  {
1119
  "cell_type": "markdown",
1120
+ "id": "Pvdt",
1121
  "metadata": {},
1122
  "source": [
1123
  "**G. Global Amount of Order Status**\n"
 
1126
  {
1127
  "cell_type": "code",
1128
  "execution_count": null,
1129
+ "id": "ZBYS",
1130
  "metadata": {},
1131
  "outputs": [
1132
  {
 
1211
  "7 unavailable 609"
1212
  ]
1213
  },
1214
+ "execution_count": 35,
1215
  "metadata": {},
1216
  "output_type": "execute_result"
1217
  }
 
1223
  },
1224
  {
1225
  "cell_type": "markdown",
1226
+ "id": "aLJB",
1227
  "metadata": {},
1228
  "source": [
1229
  "**H. Orders per Day and Holidays in 2017**\n"
 
1231
  },
1232
  {
1233
  "cell_type": "code",
1234
+ "execution_count": 36,
1235
+ "id": "nHfw",
1236
  "metadata": {},
1237
  "outputs": [
1238
  {
 
1350
  "[361 rows x 3 columns]"
1351
  ]
1352
  },
1353
+ "execution_count": 36,
1354
  "metadata": {},
1355
  "output_type": "execute_result"
1356
  }
 
1364
  },
1365
  {
1366
  "cell_type": "markdown",
1367
+ "id": "xXTn",
1368
  "metadata": {},
1369
  "source": [
1370
  "**I. Freight Value Weight Relationship**\n"
 
1372
  },
1373
  {
1374
  "cell_type": "code",
1375
+ "execution_count": 37,
1376
+ "id": "AjVT",
1377
  "metadata": {},
1378
  "outputs": [
1379
  {
 
1491
  "[96478 rows x 3 columns]"
1492
  ]
1493
  },
1494
+ "execution_count": 37,
1495
  "metadata": {},
1496
  "output_type": "execute_result"
1497
  }
 
1505
  },
1506
  {
1507
  "cell_type": "markdown",
1508
+ "id": "pHFh",
1509
  "metadata": {},
1510
  "source": [
1511
  "## 3. Plots\n"
 
1513
  },
1514
  {
1515
  "cell_type": "code",
1516
+ "execution_count": 38,
1517
+ "id": "NCOB",
1518
  "metadata": {},
1519
  "outputs": [],
1520
  "source": [
 
1534
  },
1535
  {
1536
  "cell_type": "markdown",
1537
+ "id": "aqbW",
1538
  "metadata": {},
1539
  "source": [
1540
  "**A. Revenue by Month in 2017**\n"
 
1542
  },
1543
  {
1544
  "cell_type": "code",
1545
+ "execution_count": 39,
1546
+ "id": "TRpd",
1547
  "metadata": {},
1548
  "outputs": [
1549
  {
 
1563
  },
1564
  {
1565
  "cell_type": "markdown",
1566
+ "id": "TXez",
1567
  "metadata": {},
1568
  "source": [
1569
  "**B. Real vs. Predicted Delivered Time**\n"
 
1572
  {
1573
  "cell_type": "code",
1574
  "execution_count": null,
1575
+ "id": "dNNg",
1576
  "metadata": {},
1577
  "outputs": [
1578
  {
 
1592
  },
1593
  {
1594
  "cell_type": "markdown",
1595
+ "id": "yCnT",
1596
  "metadata": {},
1597
  "source": [
1598
  "**C. Global Amount of Order Status**\n"
 
1600
  },
1601
  {
1602
  "cell_type": "code",
1603
+ "execution_count": 41,
1604
+ "id": "wlCL",
1605
  "metadata": {},
1606
  "outputs": [
1607
  {
 
1621
  },
1622
  {
1623
  "cell_type": "markdown",
1624
+ "id": "kqZH",
1625
  "metadata": {},
1626
  "source": [
1627
  "**D. Revenue per State**\n"
 
1629
  },
1630
  {
1631
  "cell_type": "code",
1632
+ "execution_count": 42,
1633
+ "id": "wAgl",
1634
  "metadata": {},
1635
  "outputs": [
1636
  {
 
2498
  },
2499
  {
2500
  "cell_type": "markdown",
2501
+ "id": "rEll",
2502
  "metadata": {},
2503
  "source": [
2504
  "**E. Top 10 Least Revenue by Categories**\n"
 
2506
  },
2507
  {
2508
  "cell_type": "code",
2509
+ "execution_count": 43,
2510
+ "id": "dGlV",
2511
  "metadata": {},
2512
  "outputs": [
2513
  {
 
2527
  },
2528
  {
2529
  "cell_type": "markdown",
2530
+ "id": "SdmI",
2531
  "metadata": {},
2532
  "source": [
2533
  "**F. Top 10 Revenue Categories Amount**\n"
 
2535
  },
2536
  {
2537
  "cell_type": "code",
2538
+ "execution_count": 44,
2539
+ "id": "lgWD",
2540
  "metadata": {},
2541
  "outputs": [
2542
  {
 
2556
  },
2557
  {
2558
  "cell_type": "markdown",
2559
+ "id": "yOPj",
2560
  "metadata": {},
2561
  "source": [
2562
  "**G. Top 10 Revenue by Categories**\n"
 
2564
  },
2565
  {
2566
  "cell_type": "code",
2567
+ "execution_count": 45,
2568
+ "id": "fwwy",
2569
  "metadata": {},
2570
  "outputs": [
2571
  {
 
3433
  },
3434
  {
3435
  "cell_type": "markdown",
3436
+ "id": "LJZf",
3437
  "metadata": {},
3438
  "source": [
3439
  "**H. Freight Value vs. Product Weight**\n"
 
3441
  },
3442
  {
3443
  "cell_type": "code",
3444
+ "execution_count": 46,
3445
+ "id": "urSm",
3446
  "metadata": {},
3447
  "outputs": [
3448
  {
 
3462
  },
3463
  {
3464
  "cell_type": "markdown",
3465
+ "id": "jxvo",
3466
  "metadata": {},
3467
  "source": [
3468
  "**I. Diffrence Between Deliver Estimated Date and Delivery Date**\n"
 
3470
  },
3471
  {
3472
  "cell_type": "code",
3473
+ "execution_count": 47,
3474
+ "id": "mWxS",
3475
  "metadata": {},
3476
  "outputs": [
3477
  {
 
3491
  },
3492
  {
3493
  "cell_type": "markdown",
3494
+ "id": "CcZR",
3495
  "metadata": {},
3496
  "source": [
3497
  "**J. Order Amount per Day with Holidays**\n"
 
3499
  },
3500
  {
3501
  "cell_type": "code",
3502
+ "execution_count": 48,
3503
+ "id": "YWSi",
3504
  "metadata": {},
3505
  "outputs": [
3506
  {
 
3520
  }
3521
  ],
3522
  "metadata": {
 
 
 
 
 
3523
  "language_info": {
3524
+ "name": "python"
 
 
 
 
 
 
 
 
 
3525
  }
3526
  },
3527
  "nbformat": 4,
tutorial_app.py DELETED
@@ -1,419 +0,0 @@
1
- import marimo
2
-
3
- __generated_with = "0.14.16"
4
- app = marimo.App(width="medium")
5
-
6
-
7
- @app.cell
8
- def _():
9
- import marimo as mo
10
- return (mo,)
11
-
12
-
13
- @app.cell
14
- def _(mo):
15
- mo.md(r"""# E-Commerce ELT Pipeline""")
16
- return
17
-
18
-
19
- @app.cell
20
- def _(mo):
21
- mo.md(r"""## Table of Contents""")
22
- return
23
-
24
-
25
- @app.cell
26
- def _(mo):
27
- mo.md(
28
- r"""
29
- - [Description](#1-description)
30
- - [ETL](#2-etl)
31
- - [Extract & Load](#21-extract-and-load)
32
- - [Transform](#22-transform)
33
- - [Plots](#3-plots)
34
- """
35
- )
36
- return
37
-
38
-
39
- @app.cell
40
- def _(mo):
41
- mo.md(r"""## 1. Description""")
42
- return
43
-
44
-
45
- @app.cell
46
- def _(mo):
47
- mo.md(
48
- r"""
49
- This project analyzes e-commerce data from a Brazilian marketplace to explore key business metrics related to **revenue** and **delivery performance**. Using an interactive Marimo application, the analysis provides insights into:
50
-
51
- * **Revenue:** Annual revenue, popular product categories, and sales by state.
52
- * **Delivery:** Delivery performance, including time-to-delivery and its correlation with public holidays.
53
-
54
- The data pipeline processes information from multiple CSV files and a public API, storing and analyzing the results using Python. The final interactive report is presented as a Hugging Face Space built with Marimo.
55
- """
56
- )
57
- return
58
-
59
-
60
- @app.cell
61
- def _(mo):
62
- mo.md(r"""## 2. ETL""")
63
- return
64
-
65
-
66
- @app.cell
67
- def _():
68
- from pandas import DataFrame
69
- from pathlib import Path
70
- from sqlalchemy import create_engine
71
-
72
- from src import config
73
- from src.extract import extract
74
- from src.load import load
75
- from src.transform import QueryEnum, run_queries
76
- return (
77
- DataFrame,
78
- Path,
79
- QueryEnum,
80
- config,
81
- create_engine,
82
- extract,
83
- load,
84
- run_queries,
85
- )
86
-
87
-
88
- @app.cell
89
- def _(mo):
90
- mo.md(r"""### 2.1 Extract and Load""")
91
- return
92
-
93
-
94
- @app.cell
95
- def _(Path, config, create_engine, extract, load):
96
- DB_PATH = Path(config.SQLITE_DB_ABSOLUTE_PATH)
97
-
98
- if DB_PATH.exists() and DB_PATH.stat().st_size > 0:
99
- print("Database found. Skipping ETL process.")
100
- ENGINE = create_engine(f"sqlite:///{DB_PATH}", echo=False)
101
- else:
102
- print("Database not found or empty. Starting ETL process...")
103
- ENGINE = create_engine(f"sqlite:///{DB_PATH}", echo=False)
104
-
105
- csv_dataframes = extract(
106
- csv_folder=config.DATASET_ROOT_PATH,
107
- csv_table_mapping=config.get_csv_to_table_mapping(),
108
- public_holidays_url=config.PUBLIC_HOLIDAYS_URL,
109
- )
110
-
111
- load(dataframes=csv_dataframes, database=ENGINE)
112
- print("ETL process complete.")
113
- return (ENGINE,)
114
-
115
-
116
- @app.cell
117
- def _(mo):
118
- mo.md(r"""### 2.2 Transform""")
119
- return
120
-
121
-
122
- @app.cell
123
- def _(DataFrame, ENGINE, run_queries):
124
- query_results: dict[str, DataFrame] = run_queries(database=ENGINE)
125
- return (query_results,)
126
-
127
-
128
- @app.cell
129
- def _(mo):
130
- mo.md(r"""**A. Revenue by Month and Year**""")
131
- return
132
-
133
-
134
- @app.cell
135
- def _(QueryEnum, query_results: "dict[str, DataFrame]"):
136
- revenue_by_month_year = query_results[QueryEnum.REVENUE_BY_MONTH_YEAR.value]
137
- revenue_by_month_year
138
- return (revenue_by_month_year,)
139
-
140
-
141
- @app.cell
142
- def _(mo):
143
- mo.md(r"""**B. Top 10 Revenue by categories**""")
144
- return
145
-
146
-
147
- @app.cell
148
- def _(QueryEnum, query_results: "dict[str, DataFrame]"):
149
- top_10_revenue_categories = query_results[
150
- QueryEnum.TOP_10_REVENUE_CATEGORIES.value
151
- ]
152
- top_10_revenue_categories
153
- return (top_10_revenue_categories,)
154
-
155
-
156
- @app.cell
157
- def _(mo):
158
- mo.md(r"""**C. Top 10 Least Revenue by Categories**""")
159
- return
160
-
161
-
162
- @app.cell
163
- def _(QueryEnum, query_results: "dict[str, DataFrame]"):
164
- top_10_least_revenue_categories = query_results[
165
- QueryEnum.TOP_10_LEAST_REVENUE_CATEGORIES.value
166
- ]
167
- top_10_least_revenue_categories
168
- return (top_10_least_revenue_categories,)
169
-
170
-
171
- @app.cell
172
- def _(mo):
173
- mo.md(r"""**D. Revenue per State**""")
174
- return
175
-
176
-
177
- @app.cell
178
- def _(QueryEnum, query_results: "dict[str, DataFrame]"):
179
- revenue_per_state = query_results[QueryEnum.REVENUE_PER_STATE.value]
180
- revenue_per_state
181
- return (revenue_per_state,)
182
-
183
-
184
- @app.cell
185
- def _(mo):
186
- mo.md(r"""**E. Delivery Date Difference**""")
187
- return
188
-
189
-
190
- @app.cell
191
- def _(QueryEnum, query_results: "dict[str, DataFrame]"):
192
- delivery_date_difference = query_results[
193
- QueryEnum.DELIVERY_DATE_DIFFERENCE.value
194
- ]
195
- delivery_date_difference
196
- return (delivery_date_difference,)
197
-
198
-
199
- @app.cell
200
- def _(mo):
201
- mo.md(r"""**F. Real vs. Predicted Delivered Time**""")
202
- return
203
-
204
-
205
- @app.cell
206
- def _(QueryEnum, query_results: "dict[str, DataFrame]"):
207
- real_vs_estimated_delivery_time = query_results[
208
- QueryEnum.REAL_VS_ESTIMATED_DELIVERED_TIME.value
209
- ]
210
- real_vs_estimated_delivery_time
211
- return (real_vs_estimated_delivery_time,)
212
-
213
-
214
- @app.cell
215
- def _(mo):
216
- mo.md(r"""**G. Global Amount of Order Status**""")
217
- return
218
-
219
-
220
- @app.cell
221
- def _(QueryEnum, query_results: "dict[str, DataFrame]"):
222
- global_amount_order_status = query_results[
223
- QueryEnum.GLOBAL_AMOUNT_ORDER_STATUS.value
224
- ]
225
- global_amount_order_status
226
- return (global_amount_order_status,)
227
-
228
-
229
- @app.cell
230
- def _(mo):
231
- mo.md(r"""**H. Orders per Day and Holidays in 2017**""")
232
- return
233
-
234
-
235
- @app.cell
236
- def _(QueryEnum, query_results: "dict[str, DataFrame]"):
237
- orders_per_day_and_holidays = query_results[
238
- QueryEnum.ORDERS_PER_DAY_AND_HOLIDAYS_2017.value
239
- ]
240
- orders_per_day_and_holidays
241
- return (orders_per_day_and_holidays,)
242
-
243
-
244
- @app.cell
245
- def _(mo):
246
- mo.md(r"""**I. Freight Value Weight Relationship**""")
247
- return
248
-
249
-
250
- @app.cell
251
- def _(QueryEnum, query_results: "dict[str, DataFrame]"):
252
- freight_value_weight_relationship = query_results[
253
- QueryEnum.GET_FREIGHT_VALUE_WEIGHT_RELATIONSHIP.value
254
- ]
255
- freight_value_weight_relationship
256
- return (freight_value_weight_relationship,)
257
-
258
-
259
- @app.cell
260
- def _(mo):
261
- mo.md(r"""## 3. Plots""")
262
- return
263
-
264
-
265
- @app.cell
266
- def _():
267
- from src.plots import (
268
- plot_revenue_by_month_year,
269
- plot_real_vs_predicted_delivered_time,
270
- plot_global_amount_order_status,
271
- plot_revenue_per_state,
272
- plot_top_10_least_revenue_categories,
273
- plot_top_10_revenue_categories_amount,
274
- plot_top_10_revenue_categories,
275
- plot_freight_value_weight_relationship,
276
- plot_delivery_date_difference,
277
- plot_order_amount_per_day_with_holidays,
278
- )
279
- return (
280
- plot_delivery_date_difference,
281
- plot_freight_value_weight_relationship,
282
- plot_global_amount_order_status,
283
- plot_order_amount_per_day_with_holidays,
284
- plot_real_vs_predicted_delivered_time,
285
- plot_revenue_by_month_year,
286
- plot_revenue_per_state,
287
- plot_top_10_least_revenue_categories,
288
- plot_top_10_revenue_categories,
289
- plot_top_10_revenue_categories_amount,
290
- )
291
-
292
-
293
- @app.cell
294
- def _(mo):
295
- mo.md(r"""**A. Revenue by Month in 2017**""")
296
- return
297
-
298
-
299
- @app.cell
300
- def _(plot_revenue_by_month_year, revenue_by_month_year):
301
- plot_revenue_by_month_year(df=revenue_by_month_year, year=2017)
302
- return
303
-
304
-
305
- @app.cell
306
- def _(mo):
307
- mo.md(r"""**B. Real vs. Predicted Delivered Time**""")
308
- return
309
-
310
-
311
- @app.cell
312
- def _(plot_real_vs_predicted_delivered_time, real_vs_estimated_delivery_time):
313
- plot_real_vs_predicted_delivered_time(
314
- df=real_vs_estimated_delivery_time, year=2017
315
- )
316
- return
317
-
318
-
319
- @app.cell
320
- def _(mo):
321
- mo.md(r"""**C. Global Amount of Order Status**""")
322
- return
323
-
324
-
325
- @app.cell
326
- def _(global_amount_order_status, plot_global_amount_order_status):
327
- plot_global_amount_order_status(df=global_amount_order_status)
328
- return
329
-
330
-
331
- @app.cell
332
- def _(mo):
333
- mo.md(r"""**D. Revenue per State**""")
334
- return
335
-
336
-
337
- @app.cell
338
- def _(plot_revenue_per_state, revenue_per_state):
339
- plot_revenue_per_state(df=revenue_per_state)
340
- return
341
-
342
-
343
- @app.cell
344
- def _(mo):
345
- mo.md(r"""**E. Top 10 Least Revenue by Categories**""")
346
- return
347
-
348
-
349
- @app.cell
350
- def _(plot_top_10_least_revenue_categories, top_10_least_revenue_categories):
351
- plot_top_10_least_revenue_categories(df=top_10_least_revenue_categories)
352
- return
353
-
354
-
355
- @app.cell
356
- def _(mo):
357
- mo.md(r"""**F. Top 10 Revenue Categories Amount**""")
358
- return
359
-
360
-
361
- @app.cell
362
- def _(plot_top_10_revenue_categories_amount, top_10_revenue_categories):
363
- plot_top_10_revenue_categories_amount(df=top_10_revenue_categories)
364
- return
365
-
366
-
367
- @app.cell
368
- def _(mo):
369
- mo.md(r"""**G. Top 10 Revenue by Categories**""")
370
- return
371
-
372
-
373
- @app.cell
374
- def _(plot_top_10_revenue_categories, top_10_revenue_categories):
375
- plot_top_10_revenue_categories(df=top_10_revenue_categories)
376
- return
377
-
378
-
379
- @app.cell
380
- def _(mo):
381
- mo.md(r"""**H. Freight Value vs. Product Weight**""")
382
- return
383
-
384
-
385
- @app.cell
386
- def _(
387
- freight_value_weight_relationship,
388
- plot_freight_value_weight_relationship,
389
- ):
390
- plot_freight_value_weight_relationship(df=freight_value_weight_relationship)
391
- return
392
-
393
-
394
- @app.cell
395
- def _(mo):
396
- mo.md(r"""**I. Diffrence Between Deliver Estimated Date and Delivery Date**""")
397
- return
398
-
399
-
400
- @app.cell
401
- def _(delivery_date_difference, plot_delivery_date_difference):
402
- plot_delivery_date_difference(df=delivery_date_difference)
403
- return
404
-
405
-
406
- @app.cell
407
- def _(mo):
408
- mo.md(r"""**J. Order Amount per Day with Holidays**""")
409
- return
410
-
411
-
412
- @app.cell
413
- def _(orders_per_day_and_holidays, plot_order_amount_per_day_with_holidays):
414
- plot_order_amount_per_day_with_holidays(df=orders_per_day_and_holidays)
415
- return
416
-
417
-
418
- if __name__ == "__main__":
419
- app.run()