Spaces:
Sleeping
Sleeping
File size: 7,661 Bytes
d81b677 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 |
import json
import math
import pandas as pd
from pytest import fixture
from sqlalchemy import create_engine
from sqlalchemy.engine.base import Engine
from src.config import (
DATASET_ROOT_PATH,
PUBLIC_HOLIDAYS_URL,
QUERY_RESULTS_ROOT_PATH,
get_csv_to_table_mapping,
)
from src.extract import extract
from src.load import load
from src.transform import (
QueryResult,
query_delivery_date_difference,
query_freight_value_weight_relationship,
query_global_amount_order_status,
query_orders_per_day_and_holidays_2017,
query_real_vs_estimated_delivered_time,
query_revenue_by_month_year,
query_revenue_per_state,
query_top_10_least_revenue_categories,
query_top_10_revenue_categories,
)
TOLERANCE = 0.1
def to_float(objs, year_col):
return list(map(lambda obj: float(obj[year_col]) if obj[year_col] else 0.0, objs))
def float_vectors_are_close(a: list, b: list, tolerance: float = TOLERANCE) -> bool:
"""Check if two vectors of floats are close.
Args:
a (list): The first vector.
b (list): The second vector.
tolerance (float): The tolerance.
Returns:
bool: True if the vectors are close, False otherwise.
"""
return all([math.isclose(a[i], b[i], abs_tol=tolerance) for i in range(len(a))])
@fixture(scope="session", autouse=True)
def database() -> Engine:
"""Initialize the database for testing."""
engine = create_engine("sqlite://")
csv_folder = DATASET_ROOT_PATH
public_holidays_url = PUBLIC_HOLIDAYS_URL
csv_table_mapping = get_csv_to_table_mapping()
csv_dataframes = extract(csv_folder, csv_table_mapping, public_holidays_url)
load(dataframes=csv_dataframes, database=engine)
return engine
def read_query_result(query_name: str) -> dict:
"""Read the query from the json file.
Args:
query_name (str): The name of the query.
Returns:
dict: The query as a dictionary.
"""
with open(f"{QUERY_RESULTS_ROOT_PATH}/{query_name}.json", "r") as f:
query_result = json.load(f)
return query_result
def pandas_to_json_object(df: pd.DataFrame) -> dict:
"""Convert pandas dataframe to json object.
Args:
df (pd.DataFrame): The dataframe.
Returns:
dict: The dataframe as a json object.
"""
return json.loads(df.to_json(orient="records"))
def test_query_revenue_by_month_year(database: Engine):
query_name = "revenue_by_month_year"
actual = pandas_to_json_object(query_revenue_by_month_year(database).result)
expected = read_query_result(query_name)
def to_float(objs, year_col):
return list(
map(lambda obj: float(obj[year_col]) if obj[year_col] else 0.0, objs)
)
assert len(actual) == len(expected)
assert [obj["month_no"] for obj in actual] == [obj["month_no"] for obj in expected]
assert float_vectors_are_close(
to_float(actual, "Year2016"), to_float(expected, "Year2016")
)
assert float_vectors_are_close(
to_float(actual, "Year2017"), to_float(expected, "Year2017")
)
assert float_vectors_are_close(
to_float(actual, "Year2018"), to_float(expected, "Year2018")
)
assert list(actual[0].keys()) == list(expected[0].keys())
def test_query_delivery_date_difference(database: Engine):
query_name = "delivery_date_difference"
actual: QueryResult = query_delivery_date_difference(database)
expected = read_query_result(query_name)
assert pandas_to_json_object(actual.result) == expected
def test_query_global_amount_order_status(database: Engine):
query_name = "global_amount_order_status"
actual: QueryResult = query_global_amount_order_status(database)
expected = read_query_result(query_name)
assert pandas_to_json_object(actual.result) == expected
def test_query_revenue_per_state(database: Engine):
query_name = "revenue_per_state"
actual = pandas_to_json_object(query_revenue_per_state(database).result)
expected = read_query_result(query_name)
assert len(actual) == len(expected)
assert list(actual[0].keys()) == list(expected[0].keys())
assert float_vectors_are_close(
[obj["Revenue"] for obj in actual], [obj["Revenue"] for obj in expected]
)
def test_query_top_10_least_revenue_categories(database: Engine):
query_name = "top_10_least_revenue_categories"
actual = pandas_to_json_object(
query_top_10_least_revenue_categories(database).result
)
expected = read_query_result(query_name)
assert len(actual) == len(expected)
assert list(actual[0].keys()) == list(expected[0].keys())
assert [obj["Category"] for obj in actual] == [obj["Category"] for obj in expected]
assert [obj["Num_order"] for obj in actual] == [
obj["Num_order"] for obj in expected
]
assert float_vectors_are_close(
[obj["Revenue"] for obj in actual], [obj["Revenue"] for obj in expected]
)
def test_query_top_10_revenue_categories(database: Engine):
query_name = "top_10_revenue_categories"
actual = pandas_to_json_object(query_top_10_revenue_categories(database).result)
expected = read_query_result(query_name)
assert len(actual) == len(expected)
assert list(actual[0].keys()) == list(expected[0].keys())
assert [obj["Category"] for obj in actual] == [obj["Category"] for obj in expected]
assert [obj["Num_order"] for obj in actual] == [
obj["Num_order"] for obj in expected
]
assert float_vectors_are_close(
[obj["Revenue"] for obj in actual], [obj["Revenue"] for obj in expected]
)
def test_real_vs_estimated_delivered_time(database: Engine):
query_name = "real_vs_estimated_delivered_time"
actual = pandas_to_json_object(
query_real_vs_estimated_delivered_time(database).result
)
expected = read_query_result(query_name)
def to_float(objs, year_col):
return list(
map(lambda obj: float(obj[year_col]) if obj[year_col] else 0.0, objs)
)
assert len(actual) == len(expected)
assert list(actual[0].keys()) == list(expected[0].keys())
assert [obj["month_no"] for obj in actual] == [obj["month_no"] for obj in expected]
assert float_vectors_are_close(
to_float(actual, "Year2016_real_time"), to_float(expected, "Year2016_real_time")
)
assert float_vectors_are_close(
to_float(actual, "Year2017_real_time"), to_float(expected, "Year2017_real_time")
)
assert float_vectors_are_close(
to_float(actual, "Year2018_real_time"), to_float(expected, "Year2018_real_time")
)
assert float_vectors_are_close(
to_float(actual, "Year2016_estimated_time"),
to_float(expected, "Year2016_estimated_time"),
)
assert float_vectors_are_close(
to_float(actual, "Year2017_estimated_time"),
to_float(expected, "Year2017_estimated_time"),
)
assert float_vectors_are_close(
to_float(actual, "Year2018_estimated_time"),
to_float(expected, "Year2018_estimated_time"),
)
def test_query_orders_per_day_and_holidays_2017(database: Engine):
query_name = "orders_per_day_and_holidays_2017"
actual: QueryResult = query_orders_per_day_and_holidays_2017(database)
expected = read_query_result(query_name)
assert pandas_to_json_object(actual.result) == expected
def test_query_get_freight_value_weight_relationship(database: Engine):
query_name = "get_freight_value_weight_relationship"
actual: QueryResult = query_freight_value_weight_relationship(database)
expected = read_query_result(query_name)
assert pandas_to_json_object(actual.result) == expected
|