Spaces:
Sleeping
Sleeping
| import json | |
| import math | |
| import pandas as pd | |
| from pytest import fixture | |
| from sqlalchemy import create_engine | |
| from sqlalchemy.engine.base import Engine | |
| from src.config import ( | |
| DATASET_ROOT_PATH, | |
| PUBLIC_HOLIDAYS_URL, | |
| QUERY_RESULTS_ROOT_PATH, | |
| get_csv_to_table_mapping, | |
| ) | |
| from src.extract import extract | |
| from src.load import load | |
| from src.transform import ( | |
| QueryResult, | |
| query_delivery_date_difference, | |
| query_freight_value_weight_relationship, | |
| query_global_amount_order_status, | |
| query_orders_per_day_and_holidays_2017, | |
| query_real_vs_estimated_delivered_time, | |
| query_revenue_by_month_year, | |
| query_revenue_per_state, | |
| query_top_10_least_revenue_categories, | |
| query_top_10_revenue_categories, | |
| ) | |
| TOLERANCE = 0.1 | |
| def to_float(objs, year_col): | |
| return list(map(lambda obj: float(obj[year_col]) if obj[year_col] else 0.0, objs)) | |
| def float_vectors_are_close(a: list, b: list, tolerance: float = TOLERANCE) -> bool: | |
| """Check if two vectors of floats are close. | |
| Args: | |
| a (list): The first vector. | |
| b (list): The second vector. | |
| tolerance (float): The tolerance. | |
| Returns: | |
| bool: True if the vectors are close, False otherwise. | |
| """ | |
| return all([math.isclose(a[i], b[i], abs_tol=tolerance) for i in range(len(a))]) | |
| def database() -> Engine: | |
| """Initialize the database for testing.""" | |
| engine = create_engine("sqlite://") | |
| csv_folder = DATASET_ROOT_PATH | |
| public_holidays_url = PUBLIC_HOLIDAYS_URL | |
| csv_table_mapping = get_csv_to_table_mapping() | |
| csv_dataframes = extract(csv_folder, csv_table_mapping, public_holidays_url) | |
| load(dataframes=csv_dataframes, database=engine) | |
| return engine | |
| def read_query_result(query_name: str) -> dict: | |
| """Read the query from the json file. | |
| Args: | |
| query_name (str): The name of the query. | |
| Returns: | |
| dict: The query as a dictionary. | |
| """ | |
| with open(f"{QUERY_RESULTS_ROOT_PATH}/{query_name}.json", "r") as f: | |
| query_result = json.load(f) | |
| return query_result | |
| def pandas_to_json_object(df: pd.DataFrame) -> dict: | |
| """Convert pandas dataframe to json object. | |
| Args: | |
| df (pd.DataFrame): The dataframe. | |
| Returns: | |
| dict: The dataframe as a json object. | |
| """ | |
| return json.loads(df.to_json(orient="records")) | |
| def test_query_revenue_by_month_year(database: Engine): | |
| query_name = "revenue_by_month_year" | |
| actual = pandas_to_json_object(query_revenue_by_month_year(database).result) | |
| expected = read_query_result(query_name) | |
| def to_float(objs, year_col): | |
| return list( | |
| map(lambda obj: float(obj[year_col]) if obj[year_col] else 0.0, objs) | |
| ) | |
| assert len(actual) == len(expected) | |
| assert [obj["month_no"] for obj in actual] == [obj["month_no"] for obj in expected] | |
| assert float_vectors_are_close( | |
| to_float(actual, "Year2016"), to_float(expected, "Year2016") | |
| ) | |
| assert float_vectors_are_close( | |
| to_float(actual, "Year2017"), to_float(expected, "Year2017") | |
| ) | |
| assert float_vectors_are_close( | |
| to_float(actual, "Year2018"), to_float(expected, "Year2018") | |
| ) | |
| assert list(actual[0].keys()) == list(expected[0].keys()) | |
| def test_query_delivery_date_difference(database: Engine): | |
| query_name = "delivery_date_difference" | |
| actual: QueryResult = query_delivery_date_difference(database) | |
| expected = read_query_result(query_name) | |
| assert pandas_to_json_object(actual.result) == expected | |
| def test_query_global_amount_order_status(database: Engine): | |
| query_name = "global_amount_order_status" | |
| actual: QueryResult = query_global_amount_order_status(database) | |
| expected = read_query_result(query_name) | |
| assert pandas_to_json_object(actual.result) == expected | |
| def test_query_revenue_per_state(database: Engine): | |
| query_name = "revenue_per_state" | |
| actual = pandas_to_json_object(query_revenue_per_state(database).result) | |
| expected = read_query_result(query_name) | |
| assert len(actual) == len(expected) | |
| assert list(actual[0].keys()) == list(expected[0].keys()) | |
| assert float_vectors_are_close( | |
| [obj["Revenue"] for obj in actual], [obj["Revenue"] for obj in expected] | |
| ) | |
| def test_query_top_10_least_revenue_categories(database: Engine): | |
| query_name = "top_10_least_revenue_categories" | |
| actual = pandas_to_json_object( | |
| query_top_10_least_revenue_categories(database).result | |
| ) | |
| expected = read_query_result(query_name) | |
| assert len(actual) == len(expected) | |
| assert list(actual[0].keys()) == list(expected[0].keys()) | |
| assert [obj["Category"] for obj in actual] == [obj["Category"] for obj in expected] | |
| assert [obj["Num_order"] for obj in actual] == [ | |
| obj["Num_order"] for obj in expected | |
| ] | |
| assert float_vectors_are_close( | |
| [obj["Revenue"] for obj in actual], [obj["Revenue"] for obj in expected] | |
| ) | |
| def test_query_top_10_revenue_categories(database: Engine): | |
| query_name = "top_10_revenue_categories" | |
| actual = pandas_to_json_object(query_top_10_revenue_categories(database).result) | |
| expected = read_query_result(query_name) | |
| assert len(actual) == len(expected) | |
| assert list(actual[0].keys()) == list(expected[0].keys()) | |
| assert [obj["Category"] for obj in actual] == [obj["Category"] for obj in expected] | |
| assert [obj["Num_order"] for obj in actual] == [ | |
| obj["Num_order"] for obj in expected | |
| ] | |
| assert float_vectors_are_close( | |
| [obj["Revenue"] for obj in actual], [obj["Revenue"] for obj in expected] | |
| ) | |
| def test_real_vs_estimated_delivered_time(database: Engine): | |
| query_name = "real_vs_estimated_delivered_time" | |
| actual = pandas_to_json_object( | |
| query_real_vs_estimated_delivered_time(database).result | |
| ) | |
| expected = read_query_result(query_name) | |
| def to_float(objs, year_col): | |
| return list( | |
| map(lambda obj: float(obj[year_col]) if obj[year_col] else 0.0, objs) | |
| ) | |
| assert len(actual) == len(expected) | |
| assert list(actual[0].keys()) == list(expected[0].keys()) | |
| assert [obj["month_no"] for obj in actual] == [obj["month_no"] for obj in expected] | |
| assert float_vectors_are_close( | |
| to_float(actual, "Year2016_real_time"), to_float(expected, "Year2016_real_time") | |
| ) | |
| assert float_vectors_are_close( | |
| to_float(actual, "Year2017_real_time"), to_float(expected, "Year2017_real_time") | |
| ) | |
| assert float_vectors_are_close( | |
| to_float(actual, "Year2018_real_time"), to_float(expected, "Year2018_real_time") | |
| ) | |
| assert float_vectors_are_close( | |
| to_float(actual, "Year2016_estimated_time"), | |
| to_float(expected, "Year2016_estimated_time"), | |
| ) | |
| assert float_vectors_are_close( | |
| to_float(actual, "Year2017_estimated_time"), | |
| to_float(expected, "Year2017_estimated_time"), | |
| ) | |
| assert float_vectors_are_close( | |
| to_float(actual, "Year2018_estimated_time"), | |
| to_float(expected, "Year2018_estimated_time"), | |
| ) | |
| def test_query_orders_per_day_and_holidays_2017(database: Engine): | |
| query_name = "orders_per_day_and_holidays_2017" | |
| actual: QueryResult = query_orders_per_day_and_holidays_2017(database) | |
| expected = read_query_result(query_name) | |
| assert pandas_to_json_object(actual.result) == expected | |
| def test_query_get_freight_value_weight_relationship(database: Engine): | |
| query_name = "get_freight_value_weight_relationship" | |
| actual: QueryResult = query_freight_value_weight_relationship(database) | |
| expected = read_query_result(query_name) | |
| assert pandas_to_json_object(actual.result) == expected | |