HaLim
commited on
Commit
ยท
29608b7
1
Parent(s):
9befc00
Change fixed values into variables from the UI or the database
Browse files- src/config/optimization_config.py +50 -8
- src/etl/extract.py +34 -1
- src/etl/transform.py +63 -4
src/config/optimization_config.py
CHANGED
|
@@ -1,15 +1,57 @@
|
|
| 1 |
import pandas as pd
|
| 2 |
-
import etl.transform as transformed_data
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
|
| 4 |
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
|
| 11 |
-
# WH_Workforce_Hourly_payment_scale.csv -> This is default setting. User should be able to manipulate
|
| 12 |
-
SHIFT_LIST = [1, 2, 3] # WH_Workforce_Hourly_Pay_Scale.csv -> This is default setting. User should be able to manipulate
|
| 13 |
LINE_LIST = ["long", "short"] # WH_Workforce_Hourly_Pay_Scale.csv -> This is default setting. User should be able to manipulate
|
| 14 |
|
| 15 |
LINE_LIST_PER_TYPE = {
|
|
|
|
| 1 |
import pandas as pd
|
| 2 |
+
import src.etl.transform as transformed_data
|
| 3 |
+
import streamlit_page.page1 as dashboard
|
| 4 |
+
import datetime
|
| 5 |
+
from datetime import timedelta
|
| 6 |
+
import src.etl.extract as extract
|
| 7 |
|
| 8 |
|
| 9 |
+
def get_date_span():
|
| 10 |
+
try:
|
| 11 |
+
start_date = dashboard.start_date
|
| 12 |
+
end_date = dashboard.end_date
|
| 13 |
+
date_span = list(range(1, (end_date - start_date).days + 1))
|
| 14 |
+
print(f"date from user input")
|
| 15 |
+
return date_span, start_date, end_date
|
| 16 |
+
except Exception as e:
|
| 17 |
+
print(f"using default value for date span")
|
| 18 |
+
return list(range(1, 5)), datetime(2025, 3, 24), datetime(2025, 3, 28) # Default 7 days
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
#fetch date from streamlit or default value. The streamlit and default references the demand data (COOIS_Planned_and_Released.csv)
|
| 22 |
+
DATE_SPAN, start_date, end_date = get_date_span()
|
| 23 |
+
|
| 24 |
+
# COOIS_Released_Prod_Orders.csv
|
| 25 |
+
PRODUCT_LIST = transformed_data.get_released_product_list(start_date, end_date)
|
| 26 |
+
print(PRODUCT_LIST)
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
def get_employee_type_list():
|
| 30 |
+
try:
|
| 31 |
+
streamlit_employee_type_list = dashboard.employee_type_list
|
| 32 |
+
return streamlit_employee_type_list
|
| 33 |
+
except Exception as e:
|
| 34 |
+
print(f"using default value for employee type list")
|
| 35 |
+
employee_type_list = extract.read_employee_data()
|
| 36 |
+
emp_type_list = employee_type_list["employment_type"].unique()
|
| 37 |
+
return emp_type_list
|
| 38 |
+
|
| 39 |
+
EMPLOYEE_TYPE_LIST = get_employee_type_list()
|
| 40 |
+
print(EMPLOYEE_TYPE_LIST)
|
| 41 |
+
|
| 42 |
+
def get_shift_list():
|
| 43 |
+
try:
|
| 44 |
+
streamlit_shift_list = dashboard.shift_list
|
| 45 |
+
return streamlit_shift_list
|
| 46 |
+
except Exception as e:
|
| 47 |
+
print(f"using default value for shift list")
|
| 48 |
+
shift_list = extract.read_shift_data()
|
| 49 |
+
shift_list = shift_list["shift"].unique()
|
| 50 |
+
return shift_list
|
| 51 |
+
SHIFT_LIST = get_shift_list()
|
| 52 |
+
print(SHIFT_LIST)
|
| 53 |
+
|
| 54 |
|
|
|
|
|
|
|
| 55 |
LINE_LIST = ["long", "short"] # WH_Workforce_Hourly_Pay_Scale.csv -> This is default setting. User should be able to manipulate
|
| 56 |
|
| 57 |
LINE_LIST_PER_TYPE = {
|
src/etl/extract.py
CHANGED
|
@@ -24,7 +24,7 @@ def read_demand_data(
|
|
| 24 |
|
| 25 |
|
| 26 |
def read_employee_data(
|
| 27 |
-
path="data/real_data_excel/converted_csv/
|
| 28 |
) -> pd.DataFrame:
|
| 29 |
return pd.read_csv(path)
|
| 30 |
|
|
@@ -53,6 +53,39 @@ def read_material_master(
|
|
| 53 |
return pd.read_csv(path)
|
| 54 |
|
| 55 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
if __name__ == "__main__":
|
| 57 |
demand_data = read_demand_data()
|
| 58 |
print(demand_data.head())
|
|
|
|
| 24 |
|
| 25 |
|
| 26 |
def read_employee_data(
|
| 27 |
+
path="data/real_data_excel/converted_csv/WH_Workforce_Hourly_Pay_Scale_processed.csv",
|
| 28 |
) -> pd.DataFrame:
|
| 29 |
return pd.read_csv(path)
|
| 30 |
|
|
|
|
| 53 |
return pd.read_csv(path)
|
| 54 |
|
| 55 |
|
| 56 |
+
def read_released_orders_data(
|
| 57 |
+
path="data/real_data_excel/converted_csv/COOIS_Released_Prod_Orders.csv",
|
| 58 |
+
start_date=None,
|
| 59 |
+
end_date=None,
|
| 60 |
+
) -> pd.DataFrame:
|
| 61 |
+
"""
|
| 62 |
+
COOIS_Released_Prod_Orders.csv
|
| 63 |
+
|
| 64 |
+
Args:
|
| 65 |
+
path: path to the csv file
|
| 66 |
+
start_date: start date (pd.Timestamp or datetime)
|
| 67 |
+
end_date: end date (pd.Timestamp or datetime)
|
| 68 |
+
|
| 69 |
+
Returns:
|
| 70 |
+
pd.DataFrame: filtered dataframe by date
|
| 71 |
+
"""
|
| 72 |
+
df = pd.read_csv(path)
|
| 73 |
+
assert len(df) > 0, "No data found in the file"
|
| 74 |
+
# convert date column to datetime
|
| 75 |
+
df["Basic start date"] = pd.to_datetime(df["Basic start date"])
|
| 76 |
+
df["Basic finish date"] = pd.to_datetime(df["Basic finish date"])
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
# filter by date
|
| 80 |
+
if start_date is not None and end_date is not None:
|
| 81 |
+
# filter by date
|
| 82 |
+
df = df[
|
| 83 |
+
(df["Basic start date"] == pd.to_datetime(start_date)) & (df["Basic finish date"] == pd.to_datetime(end_date))
|
| 84 |
+
]
|
| 85 |
+
|
| 86 |
+
return df
|
| 87 |
+
|
| 88 |
+
|
| 89 |
if __name__ == "__main__":
|
| 90 |
demand_data = read_demand_data()
|
| 91 |
print(demand_data.head())
|
src/etl/transform.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
import pandas as pd
|
| 2 |
-
import extract as ex
|
| 3 |
|
| 4 |
|
| 5 |
def get_product_list():
|
|
@@ -15,6 +15,65 @@ def get_employee_list():
|
|
| 15 |
return employee["Employee_Type"].unique()
|
| 16 |
|
| 17 |
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import pandas as pd
|
| 2 |
+
import src.etl.extract as ex
|
| 3 |
|
| 4 |
|
| 5 |
def get_product_list():
|
|
|
|
| 15 |
return employee["Employee_Type"].unique()
|
| 16 |
|
| 17 |
|
| 18 |
+
def get_released_product_list(start_date=None, end_date=None):
|
| 19 |
+
|
| 20 |
+
released_orders = ex.read_released_orders_data(
|
| 21 |
+
start_date=start_date,
|
| 22 |
+
end_date=end_date
|
| 23 |
+
)
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
product_list = released_orders["Material Number"].unique().tolist()
|
| 27 |
+
print(f"Released products for date range {start_date} to {end_date}: {len(product_list)} products")
|
| 28 |
+
return product_list
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
def get_available_dates():
|
| 32 |
+
"""
|
| 33 |
+
COOIS_Released_Prod_Orders.csv์์ ์ฌ์ฉ ๊ฐ๋ฅํ ๋ชจ๋ ๋ ์ง๋ฅผ ๊ฐ์ ธ์ต๋๋ค.
|
| 34 |
+
|
| 35 |
+
Returns:
|
| 36 |
+
tuple: (start_dates, end_dates) - ๊ณ ์ ํ ์์ ๋ ์ง์ ์ข
๋ฃ ๋ ์ง ๋ฆฌ์คํธ
|
| 37 |
+
"""
|
| 38 |
+
# ๋ชจ๋ ๋ฐ์ดํฐ๋ฅผ ๊ฐ์ ธ์ด (๋ ์ง ํํฐ๋ง ์์ด)
|
| 39 |
+
released_orders = ex.read_released_orders_data()
|
| 40 |
+
|
| 41 |
+
# ๋ ์ง ์ปฌ๋ผ์ datetime์ผ๋ก ๋ณํ (์ด๋ฏธ extract.py์์ ๋ณํ๋์ง๋ง ํ์คํ ํ๊ธฐ ์ํด)
|
| 42 |
+
released_orders["Basic start date"] = pd.to_datetime(released_orders["Basic start date"])
|
| 43 |
+
released_orders["Basic finish date"] = pd.to_datetime(released_orders["Basic finish date"])
|
| 44 |
+
|
| 45 |
+
# ๊ณ ์ ํ ์์ ๋ ์ง์ ์ข
๋ฃ ๋ ์ง ์ถ์ถ
|
| 46 |
+
start_dates = sorted(released_orders["Basic start date"].dt.date.unique())
|
| 47 |
+
end_dates = sorted(released_orders["Basic finish date"].dt.date.unique())
|
| 48 |
+
|
| 49 |
+
# ๋ชจ๋ ๊ณ ์ ํ ๋ ์ง๋ค (์์๋ ์ง + ์ข
๋ฃ๋ ์ง)
|
| 50 |
+
all_dates = sorted(set(start_dates + end_dates))
|
| 51 |
+
|
| 52 |
+
return all_dates, start_dates, end_dates
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
def get_date_ranges():
|
| 56 |
+
"""
|
| 57 |
+
COOIS_Released_Prod_Orders.csv์์ ๋ ์ง ๋ฒ์ ์กฐํฉ์ ๊ฐ์ ธ์ต๋๋ค.
|
| 58 |
+
|
| 59 |
+
Returns:
|
| 60 |
+
list: ์ฌ์ฉ ๊ฐ๋ฅํ (start_date, end_date) ์กฐํฉ ๋ฆฌ์คํธ
|
| 61 |
+
"""
|
| 62 |
+
released_orders = ex.read_released_orders_data()
|
| 63 |
+
|
| 64 |
+
# ๋ ์ง ์ปฌ๋ผ์ datetime์ผ๋ก ๋ณํ
|
| 65 |
+
released_orders["Basic start date"] = pd.to_datetime(released_orders["Basic start date"])
|
| 66 |
+
released_orders["Basic finish date"] = pd.to_datetime(released_orders["Basic finish date"])
|
| 67 |
+
|
| 68 |
+
# ๊ณ ์ ํ ๋ ์ง ๋ฒ์ ์กฐํฉ ์ถ์ถ
|
| 69 |
+
date_ranges = released_orders[["Basic start date", "Basic finish date"]].drop_duplicates()
|
| 70 |
+
date_ranges["start_date"] = date_ranges["Basic start date"].dt.date
|
| 71 |
+
date_ranges["end_date"] = date_ranges["Basic finish date"].dt.date
|
| 72 |
+
|
| 73 |
+
# (start_date, end_date) ํํ ๋ฆฌ์คํธ๋ก ๋ฐํ
|
| 74 |
+
ranges = [(row["start_date"], row["end_date"]) for _, row in date_ranges.iterrows()]
|
| 75 |
+
ranges = sorted(set(ranges)) # ์ค๋ณต ์ ๊ฑฐ ๋ฐ ์ ๋ ฌ
|
| 76 |
+
|
| 77 |
+
return ranges
|
| 78 |
+
|
| 79 |
+
|