haileyhalimj@gmail.com
Rename src/etl to src/preprocess for better code organization
1131bea
raw
history blame
2.78 kB
import pandas as pd
import src.etl.extract as ex
def get_product_list():
demand = ex.read_demand_data()
print(demand["Material Number"].unique())
return demand["Material Number"].unique()
def get_employee_list():
employee = ex.read_employee_data()
employee = employee["Description"]
return employee["Employee_Type"].unique()
def get_released_product_list(start_date=None):
released_orders = ex.read_orders_data(
start_date=start_date,
# end_date=end_date
)
product_list = released_orders["Material Number"].unique().tolist()
print(f"Released products for date range {start_date}: {len(product_list)} products")
return product_list
def get_available_dates():
"""
COOIS_Released_Prod_Orders.csv์—์„œ ์‚ฌ์šฉ ๊ฐ€๋Šฅํ•œ ๋ชจ๋“  ๋‚ ์งœ๋ฅผ ๊ฐ€์ ธ์˜ต๋‹ˆ๋‹ค.
Returns:
tuple: (start_dates, end_dates) - ๊ณ ์œ ํ•œ ์‹œ์ž‘ ๋‚ ์งœ์™€ ์ข…๋ฃŒ ๋‚ ์งœ ๋ฆฌ์ŠคํŠธ
"""
# ๋ชจ๋“  ๋ฐ์ดํ„ฐ๋ฅผ ๊ฐ€์ ธ์˜ด (๋‚ ์งœ ํ•„ํ„ฐ๋ง ์—†์ด)
released_orders = ex.read_orders_data()
# ๋‚ ์งœ ์ปฌ๋Ÿผ์„ datetime์œผ๋กœ ๋ณ€ํ™˜ (์ด๋ฏธ extract.py์—์„œ ๋ณ€ํ™˜๋˜์ง€๋งŒ ํ™•์‹คํžˆ ํ•˜๊ธฐ ์œ„ํ•ด)
released_orders["Basic start date"] = pd.to_datetime(released_orders["Basic start date"])
released_orders["Basic finish date"] = pd.to_datetime(released_orders["Basic finish date"])
# ๊ณ ์œ ํ•œ ์‹œ์ž‘ ๋‚ ์งœ์™€ ์ข…๋ฃŒ ๋‚ ์งœ ์ถ”์ถœ
start_dates = sorted(released_orders["Basic start date"].dt.date.unique())
end_dates = sorted(released_orders["Basic finish date"].dt.date.unique())
# ๋ชจ๋“  ๊ณ ์œ ํ•œ ๋‚ ์งœ๋“ค (์‹œ์ž‘๋‚ ์งœ + ์ข…๋ฃŒ๋‚ ์งœ)
all_dates = sorted(set(start_dates + end_dates))
return all_dates, start_dates, end_dates
def get_date_ranges():
"""
COOIS_Released_Prod_Orders.csv์—์„œ ๋‚ ์งœ ๋ฒ”์œ„ ์กฐํ•ฉ์„ ๊ฐ€์ ธ์˜ต๋‹ˆ๋‹ค.
Returns:
list: ์‚ฌ์šฉ ๊ฐ€๋Šฅํ•œ (start_date, end_date) ์กฐํ•ฉ ๋ฆฌ์ŠคํŠธ
"""
released_orders = ex.read_orders_data()
# ๋‚ ์งœ ์ปฌ๋Ÿผ์„ datetime์œผ๋กœ ๋ณ€ํ™˜
released_orders["Basic start date"] = pd.to_datetime(released_orders["Basic start date"])
released_orders["Basic finish date"] = pd.to_datetime(released_orders["Basic finish date"])
# ๊ณ ์œ ํ•œ ๋‚ ์งœ ๋ฒ”์œ„ ์กฐํ•ฉ ์ถ”์ถœ
date_ranges = released_orders[["Basic start date", "Basic finish date"]].drop_duplicates()
date_ranges["start_date"] = date_ranges["Basic start date"].dt.date
date_ranges["end_date"] = date_ranges["Basic finish date"].dt.date
# (start_date, end_date) ํŠœํ”Œ ๋ฆฌ์ŠคํŠธ๋กœ ๋ฐ˜ํ™˜
ranges = [(row["start_date"], row["end_date"]) for _, row in date_ranges.iterrows()]
ranges = sorted(set(ranges)) # ์ค‘๋ณต ์ œ๊ฑฐ ๋ฐ ์ •๋ ฌ
return ranges