File size: 1,821 Bytes
a81b53c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cdd27bb
a81b53c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import requests
from pandas import DataFrame, read_csv, to_datetime


def get_public_holidays(url: str, year: str) -> DataFrame:
    """
    Get public holidays for the given year for Brazil

    Args:
        url (str): The url to get the public holidays
        year (str): The year to get the public holidays

    Raises:
        SystemExit: If the request fails

    Returns:
        DataFrame: The public holidays
    """
    url = "{}/{}/BR".format(url, year)

    try:
        response = requests.get(url)
        response.raise_for_status()  # Raise error if something went wrong

        data = DataFrame(response.json())

        # Drop the columns types and countries
        df = data.drop(["types", "counties"], axis=1)  # Miss spelling in the API
        # Convert the date column to datetime
        df["date"] = to_datetime(df["date"])

        return df

    except requests.exceptions.HTTPError:
        raise SystemExit


def extract(
    csv_folder: str, csv_table_mapping: dict[str, str], public_holidays_url: str
) -> dict[str, DataFrame]:
    """
    Extract the data from the csv files and load them into a dictionary of dataframes

    Args:
      csv_folder (str): The folder where the csv files are
      csv_table_mapping (dict[str, str]): The mapping between the csv files and the table names
      public_holidays_url (str): The url to get the public holidays

    Returns:
      Dict[str, DataFrame]: A dictionary with keys as the table names and values as the dataframes
    """
    dataframes = {
        table_name: read_csv("{}/{}".format(csv_folder, csv_file))
        for csv_file, table_name in csv_table_mapping.items()
    }

    public_holidays = get_public_holidays(url=public_holidays_url, year="2017")
    dataframes["public_holidays"] = public_holidays

    return dataframes