iBrokeTheCode commited on
Commit
e5af7f7
Β·
1 Parent(s): 397b56d

feature: Add first version of marimo interactive app

Browse files
Files changed (2) hide show
  1. app.py +177 -52
  2. src/plots.py +3 -3
app.py CHANGED
@@ -26,19 +26,15 @@ def _(mo):
26
  mo.md(
27
  r"""
28
  This interactive dashboard explores insights from the [Brazilian e-commerce dataset](https://www.kaggle.com/datasets/olistbr/brazilian-ecommerce) and the [Public Holiday API](https://date.nager.at/Api) :
 
29
  - Sales performance by category and state
30
  - Delivery efficiency
31
  - Seasonal trends and holidays impact
32
 
33
- Use the tabs above to explore different insights!
34
-
35
- _Built with Marimo._
36
-
37
- ---
38
 
39
- πŸ’‘ **Want a step-by-step walkthrough instead?**
40
-
41
- You can check the Jupyter notebook version here: πŸ‘‰ [Jupyter notebook](https://huggingface.co/spaces/iBrokeTheCode/E-Commerce_ELT/blob/main/tutorial_app.ipynb)
42
  """
43
  )
44
  return
@@ -46,6 +42,8 @@ def _(mo):
46
 
47
  @app.cell
48
  def _():
 
 
49
  from pandas import DataFrame
50
  from pathlib import Path
51
  from sqlalchemy import create_engine
@@ -90,6 +88,8 @@ def _():
90
 
91
  @app.cell
92
  def _(DataFrame, Path, config, create_engine, extract, load, run_queries):
 
 
93
  DB_PATH = Path(config.SQLITE_DB_ABSOLUTE_PATH)
94
 
95
  if DB_PATH.exists() and DB_PATH.stat().st_size > 0:
@@ -114,43 +114,36 @@ def _(DataFrame, Path, config, create_engine, extract, load, run_queries):
114
 
115
  @app.cell
116
  def _(QueryEnum, query_results: "dict[str, DataFrame]"):
117
- # **A. Revenue by Month and Year**
 
118
  revenue_by_month_year = query_results[QueryEnum.REVENUE_BY_MONTH_YEAR.value]
119
 
120
- # **B. Top 10 Revenue by categories**
121
  top_10_revenue_categories = query_results[
122
  QueryEnum.TOP_10_REVENUE_CATEGORIES.value
123
  ]
124
 
125
- # **C. Top 10 Least Revenue by Categories**
126
  top_10_least_revenue_categories = query_results[
127
  QueryEnum.TOP_10_LEAST_REVENUE_CATEGORIES.value
128
  ]
129
 
130
- # **D. Revenue per State**
131
  revenue_per_state = query_results[QueryEnum.REVENUE_PER_STATE.value]
132
 
133
- # **E. Delivery Date Difference**
134
  delivery_date_difference = query_results[
135
  QueryEnum.DELIVERY_DATE_DIFFERENCE.value
136
  ]
137
 
138
- # **F. Real vs. Predicted Delivered Time**
139
  real_vs_estimated_delivery_time = query_results[
140
  QueryEnum.REAL_VS_ESTIMATED_DELIVERED_TIME.value
141
  ]
142
 
143
- # **G. Global Amount of Order Status**
144
  global_amount_order_status = query_results[
145
  QueryEnum.GLOBAL_AMOUNT_ORDER_STATUS.value
146
  ]
147
 
148
- # **H. Orders per Day and Holidays in 2017**
149
  orders_per_day_and_holidays = query_results[
150
  QueryEnum.ORDERS_PER_DAY_AND_HOLIDAYS_2017.value
151
  ]
152
 
153
- # **I. Freight Value Weight Relationship**
154
  freight_value_weight_relationship = query_results[
155
  QueryEnum.GET_FREIGHT_VALUE_WEIGHT_RELATIONSHIP.value
156
  ]
@@ -168,7 +161,142 @@ def _(QueryEnum, query_results: "dict[str, DataFrame]"):
168
 
169
  @app.cell
170
  def _(mo):
171
- mo.md(r"""## Insights""")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
172
  return
173
 
174
 
@@ -194,56 +322,58 @@ def _(
194
  top_10_revenue_categories,
195
  ):
196
  overview_tab = mo.vstack(
197
- [
198
- mo.md("### Global Order Status Overview"),
199
- mo.hstack(
200
- [
201
- global_amount_order_status,
202
- plot_global_amount_order_status(df=global_amount_order_status),
203
- ]
204
- ),
205
- ]
206
  )
207
 
208
  revenue_tab = mo.vstack(
209
- [
210
- mo.md("### Revenue by Month and Year"),
211
- mo.ui.table(revenue_by_month_year),
 
 
212
  plot_revenue_by_month_year(df=revenue_by_month_year, year=2017),
213
- mo.md("### Revenue by State"),
214
- mo.ui.table(revenue_per_state),
215
  plot_revenue_per_state(revenue_per_state),
216
- ]
217
  )
218
 
219
  categories_tab = mo.vstack(
220
- [
221
- mo.md("### Top 10 Revenue Categories"),
222
- mo.ui.table(top_10_revenue_categories),
 
 
223
  plot_top_10_revenue_categories(top_10_revenue_categories),
 
224
  plot_top_10_revenue_categories_amount(top_10_revenue_categories),
225
- mo.md("### Bottom 10 Revenue Categories"),
226
- mo.ui.table(top_10_least_revenue_categories),
227
  plot_top_10_least_revenue_categories(top_10_least_revenue_categories),
228
- ]
229
  )
230
 
231
  delivery_tab = mo.vstack(
232
- [
233
- mo.md("### Freight Value vs Product Weight"),
234
- mo.ui.table(freight_value_weight_relationship),
 
 
 
235
  plot_freight_value_weight_relationship(
236
  freight_value_weight_relationship
237
  ),
238
- mo.md("### Real vs Estimated Delivery Time"),
239
- mo.ui.table(real_vs_estimated_delivery_time),
240
  plot_real_vs_predicted_delivered_time(
241
  df=real_vs_estimated_delivery_time, year=2017
242
  ),
243
- mo.md("### Orders and Holidays"),
244
- mo.ui.table(orders_per_day_and_holidays),
245
  plot_order_amount_per_day_with_holidays(orders_per_day_and_holidays),
246
- ]
247
  )
248
  return categories_tab, delivery_tab, overview_tab, revenue_tab
249
 
@@ -261,10 +391,5 @@ def _(categories_tab, delivery_tab, mo, overview_tab, revenue_tab):
261
  return
262
 
263
 
264
- @app.cell
265
- def _():
266
- return
267
-
268
-
269
  if __name__ == "__main__":
270
  app.run()
 
26
  mo.md(
27
  r"""
28
  This interactive dashboard explores insights from the [Brazilian e-commerce dataset](https://www.kaggle.com/datasets/olistbr/brazilian-ecommerce) and the [Public Holiday API](https://date.nager.at/Api) :
29
+
30
  - Sales performance by category and state
31
  - Delivery efficiency
32
  - Seasonal trends and holidays impact
33
 
34
+ _Built with [Marimo](https://marimo.io)._
 
 
 
 
35
 
36
+ > πŸ’‘ **Want a step-by-step walkthrough instead?**
37
+ > Check the Jupyter notebook version here: πŸ‘‰ [Jupyter notebook](https://huggingface.co/spaces/iBrokeTheCode/E-Commerce_ELT/blob/main/tutorial_app.ipynb)
 
38
  """
39
  )
40
  return
 
42
 
43
  @app.cell
44
  def _():
45
+ # πŸ“Œ IMPORT LIBRARIES AND PACKAGES
46
+
47
  from pandas import DataFrame
48
  from pathlib import Path
49
  from sqlalchemy import create_engine
 
88
 
89
  @app.cell
90
  def _(DataFrame, Path, config, create_engine, extract, load, run_queries):
91
+ # πŸ“Œ LOAD SQLITE DATABASE
92
+
93
  DB_PATH = Path(config.SQLITE_DB_ABSOLUTE_PATH)
94
 
95
  if DB_PATH.exists() and DB_PATH.stat().st_size > 0:
 
114
 
115
  @app.cell
116
  def _(QueryEnum, query_results: "dict[str, DataFrame]"):
117
+ # πŸ“Œ RETRIEVE RELEVANT DATA FROM DATABASE
118
+
119
  revenue_by_month_year = query_results[QueryEnum.REVENUE_BY_MONTH_YEAR.value]
120
 
 
121
  top_10_revenue_categories = query_results[
122
  QueryEnum.TOP_10_REVENUE_CATEGORIES.value
123
  ]
124
 
 
125
  top_10_least_revenue_categories = query_results[
126
  QueryEnum.TOP_10_LEAST_REVENUE_CATEGORIES.value
127
  ]
128
 
 
129
  revenue_per_state = query_results[QueryEnum.REVENUE_PER_STATE.value]
130
 
 
131
  delivery_date_difference = query_results[
132
  QueryEnum.DELIVERY_DATE_DIFFERENCE.value
133
  ]
134
 
 
135
  real_vs_estimated_delivery_time = query_results[
136
  QueryEnum.REAL_VS_ESTIMATED_DELIVERED_TIME.value
137
  ]
138
 
 
139
  global_amount_order_status = query_results[
140
  QueryEnum.GLOBAL_AMOUNT_ORDER_STATUS.value
141
  ]
142
 
 
143
  orders_per_day_and_holidays = query_results[
144
  QueryEnum.ORDERS_PER_DAY_AND_HOLIDAYS_2017.value
145
  ]
146
 
 
147
  freight_value_weight_relationship = query_results[
148
  QueryEnum.GET_FREIGHT_VALUE_WEIGHT_RELATIONSHIP.value
149
  ]
 
161
 
162
  @app.cell
163
  def _(mo):
164
+ mo.Html("<br><hr><br>")
165
+ return
166
+
167
+
168
+ @app.cell
169
+ def _(mo):
170
+ mo.md(r"""# πŸ“ˆ Insights""")
171
+ return
172
+
173
+
174
+ @app.cell
175
+ def _(mo):
176
+ # πŸ“Œ TODO: Retrieve real data
177
+
178
+ st1 = mo.stat(
179
+ label="Total Revenue 2017",
180
+ bordered=True,
181
+ value=f"${2_000_000:,}",
182
+ caption=f"Previous year: ${1_500_000:,}",
183
+ direction="increase",
184
+ )
185
+ st2 = mo.stat(
186
+ label="Successful Deliveries",
187
+ bordered=True,
188
+ value=f"{1_280_700:,}",
189
+ caption="Review chart for more details",
190
+ direction="increase",
191
+ )
192
+ st3 = mo.stat(
193
+ label="Uncompleted Orders",
194
+ bordered=True,
195
+ value=f"{80_700:,}",
196
+ caption="Review chart for more details",
197
+ direction="decrease",
198
+ )
199
+ st4 = mo.stat(
200
+ label="Category with greater revenue",
201
+ bordered=True,
202
+ value=f"{'bed_bath_table'.replace('_', ' ').title()}",
203
+ caption=f"${884_220:,}",
204
+ direction="increase",
205
+ )
206
+
207
+ mo.hstack([st1, st2, st3, st4], widths="equal", gap=1)
208
+ return
209
+
210
+
211
+ @app.cell
212
+ def _(mo):
213
+ mo.Html("<br><hr><br>")
214
+ return
215
+
216
+
217
+ @app.cell
218
+ def _(mo):
219
+ mo.md(r"""# πŸ“‹ Tables""")
220
+ return
221
+
222
+
223
+ @app.cell
224
+ def _(
225
+ freight_value_weight_relationship,
226
+ global_amount_order_status,
227
+ mo,
228
+ orders_per_day_and_holidays,
229
+ real_vs_estimated_delivery_time,
230
+ revenue_by_month_year,
231
+ revenue_per_state,
232
+ top_10_least_revenue_categories,
233
+ top_10_revenue_categories,
234
+ ):
235
+ overview_table_tab = mo.vstack(
236
+ align="center",
237
+ justify="center",
238
+ gap=2,
239
+ items=[
240
+ mo.center(mo.md("## Global Order Status Overview")),
241
+ global_amount_order_status,
242
+ ],
243
+ )
244
+ revenue_table_tab = mo.vstack(
245
+ align="center",
246
+ justify="center",
247
+ gap=2,
248
+ items=[
249
+ mo.center(mo.md("## Revenue by Month and Year")),
250
+ revenue_by_month_year,
251
+ mo.center(mo.md("## Revenue by State")),
252
+ revenue_per_state,
253
+ ],
254
+ )
255
+ categories_table_tab = mo.vstack(
256
+ align="center",
257
+ justify="center",
258
+ gap=2,
259
+ items=[
260
+ mo.center(mo.md("## Top 10 Revenue Categories")),
261
+ top_10_revenue_categories,
262
+ mo.center(mo.md("## Bottom 10 Revenue Categories")),
263
+ top_10_least_revenue_categories,
264
+ ],
265
+ )
266
+ delivery_table_tab = mo.vstack(
267
+ align="center",
268
+ justify="center",
269
+ gap=2,
270
+ items=[
271
+ mo.center(mo.md("## Freight Value vs Product Weight")),
272
+ freight_value_weight_relationship,
273
+ mo.center(mo.md("## Real vs Estimated Delivery Time")),
274
+ real_vs_estimated_delivery_time,
275
+ mo.center(mo.md("## Orders and Holidays")),
276
+ orders_per_day_and_holidays,
277
+ ],
278
+ )
279
+
280
+ mo.ui.tabs(
281
+ {
282
+ "πŸ“Š Overview": overview_table_tab,
283
+ "πŸ’° Revenue": revenue_table_tab,
284
+ "πŸ“¦ Categories": categories_table_tab,
285
+ "🚚 Freight & Delivery": delivery_table_tab,
286
+ }
287
+ )
288
+ return
289
+
290
+
291
+ @app.cell
292
+ def _(mo):
293
+ mo.Html("<br><hr><br>")
294
+ return
295
+
296
+
297
+ @app.cell
298
+ def _(mo):
299
+ mo.md(r"""# πŸ“Š Charts""")
300
  return
301
 
302
 
 
322
  top_10_revenue_categories,
323
  ):
324
  overview_tab = mo.vstack(
325
+ align="center",
326
+ justify="center",
327
+ gap=2,
328
+ items=[
329
+ mo.center(mo.md("## Global Order Status Overview")),
330
+ plot_global_amount_order_status(df=global_amount_order_status),
331
+ ],
 
 
332
  )
333
 
334
  revenue_tab = mo.vstack(
335
+ align="center",
336
+ justify="center",
337
+ gap=2,
338
+ items=[
339
+ mo.center(mo.md("## Revenue by Month and Year")),
340
  plot_revenue_by_month_year(df=revenue_by_month_year, year=2017),
341
+ mo.center(mo.md("## Revenue by State")),
 
342
  plot_revenue_per_state(revenue_per_state),
343
+ ],
344
  )
345
 
346
  categories_tab = mo.vstack(
347
+ align="center",
348
+ justify="center",
349
+ gap=2,
350
+ items=[
351
+ mo.center(mo.md("## Top 10 Revenue Categories")),
352
  plot_top_10_revenue_categories(top_10_revenue_categories),
353
+ mo.center(mo.md("## Top 10 Revenue Categories by Amount")),
354
  plot_top_10_revenue_categories_amount(top_10_revenue_categories),
355
+ mo.center(mo.md("## Bottom 10 Revenue Categories")),
 
356
  plot_top_10_least_revenue_categories(top_10_least_revenue_categories),
357
+ ],
358
  )
359
 
360
  delivery_tab = mo.vstack(
361
+ gap=2,
362
+ justify="center",
363
+ align="center",
364
+ heights="equal",
365
+ items=[
366
+ mo.center(mo.md("## Freight Value vs Product Weight")),
367
  plot_freight_value_weight_relationship(
368
  freight_value_weight_relationship
369
  ),
370
+ mo.center(mo.md("## Real vs Estimated Delivery Time")),
 
371
  plot_real_vs_predicted_delivered_time(
372
  df=real_vs_estimated_delivery_time, year=2017
373
  ),
374
+ mo.center(mo.md("## Orders and Holidays")),
 
375
  plot_order_amount_per_day_with_holidays(orders_per_day_and_holidays),
376
+ ],
377
  )
378
  return categories_tab, delivery_tab, overview_tab, revenue_tab
379
 
 
391
  return
392
 
393
 
 
 
 
 
 
394
  if __name__ == "__main__":
395
  app.run()
src/plots.py CHANGED
@@ -23,9 +23,9 @@ def plot_revenue_by_month_year(df: DataFrame, year: int) -> Figure:
23
  Figure: A matplotlib figure object with a line and bar chart overlay.
24
  """
25
  rc_file_defaults()
26
- sns.set_style(style="darkgrid", rc=None)
27
 
28
- fig, ax1 = plt.subplots(figsize=(12, 6))
29
 
30
  sns.lineplot(data=df[f"Year{year}"], marker="o", sort=False, ax=ax1)
31
  ax2 = ax1.twinx()
@@ -57,7 +57,7 @@ def plot_real_vs_predicted_delivered_time(df: DataFrame, year: int) -> Figure:
57
  rc_file_defaults()
58
  sns.set_style(style=None, rc=None)
59
 
60
- fig, ax1 = plt.subplots(figsize=(12, 6))
61
 
62
  sns.lineplot(data=df[f"Year{year}_real_time"], marker="o", sort=False, ax=ax1)
63
  sns.lineplot(data=df[f"Year{year}_estimated_time"], marker="o", sort=False, ax=ax1)
 
23
  Figure: A matplotlib figure object with a line and bar chart overlay.
24
  """
25
  rc_file_defaults()
26
+ sns.set_style(style=None, rc=None)
27
 
28
+ fig, ax1 = plt.subplots(figsize=(12, 4))
29
 
30
  sns.lineplot(data=df[f"Year{year}"], marker="o", sort=False, ax=ax1)
31
  ax2 = ax1.twinx()
 
57
  rc_file_defaults()
58
  sns.set_style(style=None, rc=None)
59
 
60
+ fig, ax1 = plt.subplots(figsize=(12, 4))
61
 
62
  sns.lineplot(data=df[f"Year{year}_real_time"], marker="o", sort=False, ax=ax1)
63
  sns.lineplot(data=df[f"Year{year}_estimated_time"], marker="o", sort=False, ax=ax1)