iBrokeTheCode commited on
Commit
cdd27bb
·
1 Parent(s): a312419

chore: Generate database

Browse files
Files changed (2) hide show
  1. app.py +102 -0
  2. src/extract.py +1 -1
app.py CHANGED
@@ -49,5 +49,107 @@ def _(mo):
49
  return
50
 
51
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  if __name__ == "__main__":
53
  app.run()
 
49
  return
50
 
51
 
52
+ @app.cell
53
+ def _(mo):
54
+ mo.md(r"""## 2. ETL""")
55
+ return
56
+
57
+
58
+ @app.cell
59
+ def _():
60
+ from pandas import DataFrame
61
+ from pathlib import Path
62
+ from sqlalchemy import create_engine
63
+
64
+
65
+ from src import config
66
+ from src.extract import extract
67
+ from src.load import load
68
+ from src.transform import QueryEnum, run_queries
69
+ return (
70
+ DataFrame,
71
+ Path,
72
+ QueryEnum,
73
+ config,
74
+ create_engine,
75
+ extract,
76
+ load,
77
+ run_queries,
78
+ )
79
+
80
+
81
+ @app.cell
82
+ def _(Path, config, create_engine):
83
+ # Create the sqlite database
84
+ Path(config.SQLITE_DB_ABSOLUTE_PATH).touch()
85
+
86
+ # Create the database connection
87
+ ENGINE = create_engine(
88
+ r"sqlite:///{}".format(config.SQLITE_DB_ABSOLUTE_PATH), echo=False
89
+ )
90
+ return (ENGINE,)
91
+
92
+
93
+ @app.cell
94
+ def _(mo):
95
+ mo.md(r"""### 2.1 Extract""")
96
+ return
97
+
98
+
99
+ @app.cell
100
+ def _(config, extract):
101
+ csv_folder = config.DATASET_ROOT_PATH
102
+ public_holidays_url = config.PUBLIC_HOLIDAYS_URL
103
+
104
+ # Get the mapping of the csv files to the table names
105
+ csv_table_mapping = config.get_csv_to_table_mapping()
106
+
107
+ # Extract the data from the csv files, holidays and load them into the dataframes
108
+ csv_dataframes = extract(
109
+ csv_folder=csv_folder,
110
+ csv_table_mapping=csv_table_mapping,
111
+ public_holidays_url=public_holidays_url,
112
+ )
113
+ return (csv_dataframes,)
114
+
115
+
116
+ @app.cell
117
+ def _(mo):
118
+ mo.md(r"""### 2.2 Load""")
119
+ return
120
+
121
+
122
+ @app.cell
123
+ def _(ENGINE, csv_dataframes, load):
124
+ # Store dataframes in SQLite database (our Data Warehouse in this case)
125
+ load(dataframes=csv_dataframes, database=ENGINE)
126
+ return
127
+
128
+
129
+ @app.cell
130
+ def _(mo):
131
+ mo.md(r"""### 2.3 Transform""")
132
+ return
133
+
134
+
135
+ @app.cell
136
+ def _(DataFrame, ENGINE, run_queries):
137
+ query_results: dict[str, DataFrame] = run_queries(database=ENGINE)
138
+ return (query_results,)
139
+
140
+
141
+ @app.cell
142
+ def _(QueryEnum, query_results: "dict[str, DataFrame]"):
143
+ # Transforming the revenue_by_month_year query to a table
144
+ revenue_by_month_year = query_results[QueryEnum.REVENUE_BY_MONTH_YEAR.value]
145
+ revenue_by_month_year
146
+ return
147
+
148
+
149
+ @app.cell
150
+ def _():
151
+ return
152
+
153
+
154
  if __name__ == "__main__":
155
  app.run()
src/extract.py CHANGED
@@ -25,7 +25,7 @@ def get_public_holidays(url: str, year: str) -> DataFrame:
25
  data = DataFrame(response.json())
26
 
27
  # Drop the columns types and countries
28
- df = data.drop(["types", "countries"], axis=1)
29
  # Convert the date column to datetime
30
  df["date"] = to_datetime(df["date"])
31
 
 
25
  data = DataFrame(response.json())
26
 
27
  # Drop the columns types and countries
28
+ df = data.drop(["types", "counties"], axis=1) # Miss spelling in the API
29
  # Convert the date column to datetime
30
  df["date"] = to_datetime(df["date"])
31