HaLim commited on
Commit Β·
5afa2a4
1
Parent(s): 11f91a6
remove end date
Browse files- src/config/optimization_config.py +46 -14
- src/etl/extract.py +57 -14
src/config/optimization_config.py
CHANGED
|
@@ -17,12 +17,39 @@ def get_date_span():
|
|
| 17 |
try:
|
| 18 |
# Try to get from streamlit session state (from config page)
|
| 19 |
import streamlit as st
|
| 20 |
-
if hasattr(st, 'session_state') and 'start_date' in st.session_state
|
| 21 |
-
from datetime import datetime
|
| 22 |
start_date = datetime.combine(st.session_state.start_date, datetime.min.time())
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
print("date span", date_span)
|
| 27 |
return date_span, start_date, end_date
|
| 28 |
except Exception as e:
|
|
@@ -37,10 +64,12 @@ def get_date_span():
|
|
| 37 |
#fetch date from streamlit or default value. The streamlit and default references the demand data (COOIS_Planned_and_Released.csv)
|
| 38 |
DATE_SPAN, start_date, end_date = get_date_span()
|
| 39 |
|
|
|
|
|
|
|
| 40 |
|
| 41 |
print(f"\nπ
DATE RANGE: {start_date} to {end_date}")
|
| 42 |
print(f"π PRODUCT SOURCE: COOIS_Released_Prod_Orders.csv")
|
| 43 |
-
PRODUCT_LIST = transformed_data.get_released_product_list(start_date
|
| 44 |
print(f"π¦ PRODUCTS FOUND: {len(PRODUCT_LIST)} products -> {PRODUCT_LIST}")
|
| 45 |
|
| 46 |
|
|
@@ -212,7 +241,7 @@ def get_demand_dictionary():
|
|
| 212 |
|
| 213 |
print(f"Loading default demand values from data files")
|
| 214 |
# Use released orders instead of planned orders for demand
|
| 215 |
-
demand_df = extract.
|
| 216 |
demand_dictionary = demand_df.groupby('Material Number')["Order quantity (GMEIN)"].sum().to_dict()
|
| 217 |
print(f"π DEMAND DATA: {len(demand_dictionary)} products with total demand {sum(demand_dictionary.values())}")
|
| 218 |
return demand_dictionary
|
|
@@ -271,9 +300,11 @@ def get_team_requirements(PRODUCT_LIST):
|
|
| 271 |
print(f"Using default value for team requirements, extracting from CSV: {e}")
|
| 272 |
|
| 273 |
# Read the kits calculation data directly
|
| 274 |
-
|
| 275 |
-
|
| 276 |
-
|
|
|
|
|
|
|
| 277 |
# Initialize the team requirements dictionary
|
| 278 |
team_req_dict = {
|
| 279 |
"UNICEF Fixed term": {},
|
|
@@ -282,17 +313,18 @@ def get_team_requirements(PRODUCT_LIST):
|
|
| 282 |
|
| 283 |
# Process each product in the product list
|
| 284 |
for product in PRODUCT_LIST:
|
|
|
|
| 285 |
print(f"Processing team requirements for product: {product}")
|
| 286 |
product_data = kits_df[kits_df['Kit'] == product]
|
| 287 |
-
|
| 288 |
if not product_data.empty:
|
| 289 |
# Extract Humanizer and UNICEF staff requirements
|
| 290 |
humanizer_req = product_data["Humanizer"].iloc[0]
|
| 291 |
unicef_req = product_data["UNICEF staff"].iloc[0]
|
| 292 |
|
| 293 |
-
# Convert to int
|
| 294 |
-
team_req_dict["Humanizer"][product] = int(humanizer_req)
|
| 295 |
-
team_req_dict["UNICEF Fixed term"][product] = int(unicef_req)
|
| 296 |
else:
|
| 297 |
print(f"Warning: Product {product} not found in Kits Calculation data, setting requirements to 0")
|
| 298 |
|
|
|
|
| 17 |
try:
|
| 18 |
# Try to get from streamlit session state (from config page)
|
| 19 |
import streamlit as st
|
| 20 |
+
if hasattr(st, 'session_state') and 'start_date' in st.session_state:
|
| 21 |
+
from datetime import datetime, timedelta
|
| 22 |
start_date = datetime.combine(st.session_state.start_date, datetime.min.time())
|
| 23 |
+
|
| 24 |
+
# Check if we have calculated planning_days, otherwise determine from data
|
| 25 |
+
if 'planning_days' in st.session_state and st.session_state.planning_days:
|
| 26 |
+
planning_days = st.session_state.planning_days
|
| 27 |
+
end_date = start_date + timedelta(days=planning_days - 1)
|
| 28 |
+
else:
|
| 29 |
+
# Determine date range from actual demand data for the exact start date
|
| 30 |
+
try:
|
| 31 |
+
demand_data = extract.read_orders_data(start_date=start_date)
|
| 32 |
+
if not demand_data.empty:
|
| 33 |
+
import pandas as pd
|
| 34 |
+
# Get unique finish dates for this exact start date
|
| 35 |
+
finish_dates = pd.to_datetime(demand_data["Basic finish date"]).dt.date.unique()
|
| 36 |
+
finish_dates = sorted(finish_dates)
|
| 37 |
+
if finish_dates:
|
| 38 |
+
end_date = datetime.combine(max(finish_dates), datetime.min.time())
|
| 39 |
+
planning_days = (end_date - start_date).days + 1
|
| 40 |
+
else:
|
| 41 |
+
end_date = start_date
|
| 42 |
+
planning_days = 1
|
| 43 |
+
else:
|
| 44 |
+
end_date = start_date + timedelta(days=4) # Default 5 days
|
| 45 |
+
planning_days = 5
|
| 46 |
+
except Exception as e:
|
| 47 |
+
print(f"Could not determine date range from data: {e}")
|
| 48 |
+
end_date = start_date + timedelta(days=4) # Default 5 days
|
| 49 |
+
planning_days = 5
|
| 50 |
+
|
| 51 |
+
date_span = list(range(1, planning_days + 1))
|
| 52 |
+
print(f"Using dates from config page: {start_date} to {end_date} ({planning_days} days)")
|
| 53 |
print("date span", date_span)
|
| 54 |
return date_span, start_date, end_date
|
| 55 |
except Exception as e:
|
|
|
|
| 64 |
#fetch date from streamlit or default value. The streamlit and default references the demand data (COOIS_Planned_and_Released.csv)
|
| 65 |
DATE_SPAN, start_date, end_date = get_date_span()
|
| 66 |
|
| 67 |
+
# Update global dates in extract module BEFORE any data loading
|
| 68 |
+
extract.set_global_dates(start_date, end_date)
|
| 69 |
|
| 70 |
print(f"\nπ
DATE RANGE: {start_date} to {end_date}")
|
| 71 |
print(f"π PRODUCT SOURCE: COOIS_Released_Prod_Orders.csv")
|
| 72 |
+
PRODUCT_LIST = transformed_data.get_released_product_list(start_date)
|
| 73 |
print(f"π¦ PRODUCTS FOUND: {len(PRODUCT_LIST)} products -> {PRODUCT_LIST}")
|
| 74 |
|
| 75 |
|
|
|
|
| 241 |
|
| 242 |
print(f"Loading default demand values from data files")
|
| 243 |
# Use released orders instead of planned orders for demand
|
| 244 |
+
demand_df = extract.read_orders_data(start_date=start_date)
|
| 245 |
demand_dictionary = demand_df.groupby('Material Number')["Order quantity (GMEIN)"].sum().to_dict()
|
| 246 |
print(f"π DEMAND DATA: {len(demand_dictionary)} products with total demand {sum(demand_dictionary.values())}")
|
| 247 |
return demand_dictionary
|
|
|
|
| 300 |
print(f"Using default value for team requirements, extracting from CSV: {e}")
|
| 301 |
|
| 302 |
# Read the kits calculation data directly
|
| 303 |
+
kits_df = extract.read_personnel_requirement_data()
|
| 304 |
+
# kits_path = "data/real_data_excel/converted_csv/Kits__Calculation.csv"
|
| 305 |
+
# kits_df = pd.read_csv(kits_path)
|
| 306 |
+
print("kits_df columns:", kits_df.columns.tolist())
|
| 307 |
+
print("kits_df head:", kits_df.head())
|
| 308 |
# Initialize the team requirements dictionary
|
| 309 |
team_req_dict = {
|
| 310 |
"UNICEF Fixed term": {},
|
|
|
|
| 313 |
|
| 314 |
# Process each product in the product list
|
| 315 |
for product in PRODUCT_LIST:
|
| 316 |
+
print("product",product)
|
| 317 |
print(f"Processing team requirements for product: {product}")
|
| 318 |
product_data = kits_df[kits_df['Kit'] == product]
|
| 319 |
+
print("product_data",product_data)
|
| 320 |
if not product_data.empty:
|
| 321 |
# Extract Humanizer and UNICEF staff requirements
|
| 322 |
humanizer_req = product_data["Humanizer"].iloc[0]
|
| 323 |
unicef_req = product_data["UNICEF staff"].iloc[0]
|
| 324 |
|
| 325 |
+
# Convert to int (data is already cleaned in extract function)
|
| 326 |
+
team_req_dict["Humanizer"][product] = int(humanizer_req)
|
| 327 |
+
team_req_dict["UNICEF Fixed term"][product] = int(unicef_req)
|
| 328 |
else:
|
| 329 |
print(f"Warning: Product {product} not found in Kits Calculation data, setting requirements to 0")
|
| 330 |
|
src/etl/extract.py
CHANGED
|
@@ -3,9 +3,17 @@ import datetime
|
|
| 3 |
from datetime import date, timedelta
|
| 4 |
import json
|
| 5 |
import os
|
|
|
|
| 6 |
START_DATE = pd.Timestamp(2025, 7, 7)
|
| 7 |
END_DATE = pd.Timestamp(2025, 7, 11)
|
| 8 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
|
| 10 |
def read_excel(path: str) -> pd.DataFrame:
|
| 11 |
return pd.read_excel(path, dtype={"id": "Int64"})
|
|
@@ -13,13 +21,18 @@ def read_excel(path: str) -> pd.DataFrame:
|
|
| 13 |
|
| 14 |
def read_demand_data(
|
| 15 |
path="data/real_data_excel/converted_csv/COOIS_Planned_and_Released.csv",
|
|
|
|
|
|
|
| 16 |
) -> pd.DataFrame:
|
| 17 |
df = pd.read_csv(path)
|
| 18 |
df["Basic start date"] = pd.to_datetime(df["Basic start date"])
|
| 19 |
-
df["Basic finish date"] = pd.to_datetime(df["Basic finish date"])
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
|
|
|
|
|
|
|
|
|
| 23 |
|
| 24 |
return df
|
| 25 |
|
|
@@ -67,10 +80,10 @@ def read_packaging_line_data(
|
|
| 67 |
return df
|
| 68 |
|
| 69 |
|
| 70 |
-
def
|
| 71 |
-
path="data/real_data_excel/converted_csv/
|
| 72 |
start_date=None,
|
| 73 |
-
end_date=None,
|
| 74 |
) -> pd.DataFrame:
|
| 75 |
"""
|
| 76 |
COOIS_Released_Prod_Orders.csv
|
|
@@ -78,7 +91,7 @@ def read_released_orders_data(
|
|
| 78 |
Args:
|
| 79 |
path: path to the csv file
|
| 80 |
start_date: start date (pd.Timestamp or datetime)
|
| 81 |
-
|
| 82 |
|
| 83 |
Returns:
|
| 84 |
pd.DataFrame: filtered dataframe by date
|
|
@@ -87,15 +100,14 @@ def read_released_orders_data(
|
|
| 87 |
assert len(df) > 0, "No data found in the file"
|
| 88 |
# convert date column to datetime
|
| 89 |
df["Basic start date"] = pd.to_datetime(df["Basic start date"])
|
| 90 |
-
df["Basic finish date"] = pd.to_datetime(df["Basic finish date"])
|
| 91 |
|
| 92 |
|
| 93 |
# filter by date
|
| 94 |
-
if start_date is not None
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
]
|
| 99 |
|
| 100 |
return df
|
| 101 |
|
|
@@ -104,10 +116,41 @@ def read_package_speed_data(
|
|
| 104 |
path="data/real_data_excel/converted_csv/Kits__Calculation.csv",
|
| 105 |
):
|
| 106 |
df = pd.read_csv(path, usecols=["Kit", "Kit per day","Paid work hours per day"])
|
|
|
|
|
|
|
|
|
|
| 107 |
df['kits_per_hour'] = df['Kit per day']/df['Paid work hours per day']
|
| 108 |
speeds_per_hour = dict(zip(df["Kit"], df["kits_per_hour"]))
|
| 109 |
return speeds_per_hour
|
| 110 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 111 |
|
| 112 |
|
| 113 |
def get_production_order_data():
|
|
|
|
| 3 |
from datetime import date, timedelta
|
| 4 |
import json
|
| 5 |
import os
|
| 6 |
+
# Default dates - will be overridden by optimization_config.py
|
| 7 |
START_DATE = pd.Timestamp(2025, 7, 7)
|
| 8 |
END_DATE = pd.Timestamp(2025, 7, 11)
|
| 9 |
|
| 10 |
+
def set_global_dates(start_date, end_date):
|
| 11 |
+
"""Update global START_DATE and END_DATE variables"""
|
| 12 |
+
global START_DATE, END_DATE
|
| 13 |
+
START_DATE = pd.Timestamp(start_date)
|
| 14 |
+
END_DATE = pd.Timestamp(end_date)
|
| 15 |
+
print(f"Updated global dates: {START_DATE} to {END_DATE}")
|
| 16 |
+
|
| 17 |
|
| 18 |
def read_excel(path: str) -> pd.DataFrame:
|
| 19 |
return pd.read_excel(path, dtype={"id": "Int64"})
|
|
|
|
| 21 |
|
| 22 |
def read_demand_data(
|
| 23 |
path="data/real_data_excel/converted_csv/COOIS_Planned_and_Released.csv",
|
| 24 |
+
start_date=None,
|
| 25 |
+
end_date=None,
|
| 26 |
) -> pd.DataFrame:
|
| 27 |
df = pd.read_csv(path)
|
| 28 |
df["Basic start date"] = pd.to_datetime(df["Basic start date"])
|
| 29 |
+
# df["Basic finish date"] = pd.to_datetime(df["Basic finish date"])
|
| 30 |
+
|
| 31 |
+
# Use provided dates or fall back to module defaults
|
| 32 |
+
filter_start_date = start_date if start_date is not None else START_DATE
|
| 33 |
+
filter_end_date = end_date if end_date is not None else END_DATE
|
| 34 |
+
|
| 35 |
+
df = df[(df["Basic start date"] == filter_start_date)]
|
| 36 |
|
| 37 |
return df
|
| 38 |
|
|
|
|
| 80 |
return df
|
| 81 |
|
| 82 |
|
| 83 |
+
def read_orders_data(
|
| 84 |
+
path="data/real_data_excel/converted_csv/COOIS_Planned_and_Released.csv",
|
| 85 |
start_date=None,
|
| 86 |
+
# end_date=None,
|
| 87 |
) -> pd.DataFrame:
|
| 88 |
"""
|
| 89 |
COOIS_Released_Prod_Orders.csv
|
|
|
|
| 91 |
Args:
|
| 92 |
path: path to the csv file
|
| 93 |
start_date: start date (pd.Timestamp or datetime)
|
| 94 |
+
|
| 95 |
|
| 96 |
Returns:
|
| 97 |
pd.DataFrame: filtered dataframe by date
|
|
|
|
| 100 |
assert len(df) > 0, "No data found in the file"
|
| 101 |
# convert date column to datetime
|
| 102 |
df["Basic start date"] = pd.to_datetime(df["Basic start date"])
|
| 103 |
+
# df["Basic finish date"] = pd.to_datetime(df["Basic finish date"])
|
| 104 |
|
| 105 |
|
| 106 |
# filter by date
|
| 107 |
+
if start_date is not None: # Filter for exact start date only
|
| 108 |
+
df = df[df["Basic start date"] == pd.to_datetime(start_date)]
|
| 109 |
+
else:
|
| 110 |
+
raise ValueError("start_date is required")
|
|
|
|
| 111 |
|
| 112 |
return df
|
| 113 |
|
|
|
|
| 116 |
path="data/real_data_excel/converted_csv/Kits__Calculation.csv",
|
| 117 |
):
|
| 118 |
df = pd.read_csv(path, usecols=["Kit", "Kit per day","Paid work hours per day"])
|
| 119 |
+
df["Kit per day"] = df["Kit per day"].astype(float)
|
| 120 |
+
df["Paid work hours per day"] = df["Paid work hours per day"].astype(float)
|
| 121 |
+
df["Kit"] = df["Kit"].astype(str)
|
| 122 |
df['kits_per_hour'] = df['Kit per day']/df['Paid work hours per day']
|
| 123 |
speeds_per_hour = dict(zip(df["Kit"], df["kits_per_hour"]))
|
| 124 |
return speeds_per_hour
|
| 125 |
|
| 126 |
+
def read_personnel_requirement_data(
|
| 127 |
+
path="data/real_data_excel/converted_csv/Kits__Calculation.csv",
|
| 128 |
+
):
|
| 129 |
+
df = pd.read_csv(path, usecols=["Kit", "Humanizer", "UNICEF staff"])
|
| 130 |
+
|
| 131 |
+
# Clean the data by handling special whitespace characters like \xa0 (non-breaking space)
|
| 132 |
+
def clean_and_convert_to_float(value):
|
| 133 |
+
if pd.isna(value):
|
| 134 |
+
return 0.0
|
| 135 |
+
|
| 136 |
+
# Convert to string and strip all kinds of whitespace (including \xa0)
|
| 137 |
+
clean_value = str(value).strip()
|
| 138 |
+
|
| 139 |
+
# If empty after stripping, return 0
|
| 140 |
+
if clean_value == '' or clean_value == 'nan':
|
| 141 |
+
return 0.0
|
| 142 |
+
|
| 143 |
+
try:
|
| 144 |
+
return float(clean_value)
|
| 145 |
+
except ValueError as e:
|
| 146 |
+
print(f"Warning: Could not convert '{repr(value)}' to float, setting to 0. Error: {e}")
|
| 147 |
+
return 0.0
|
| 148 |
+
|
| 149 |
+
df["Humanizer"] = df["Humanizer"].apply(clean_and_convert_to_float)
|
| 150 |
+
df["UNICEF staff"] = df["UNICEF staff"].apply(clean_and_convert_to_float)
|
| 151 |
+
df["Kit"] = df["Kit"].astype(str)
|
| 152 |
+
|
| 153 |
+
return df
|
| 154 |
|
| 155 |
|
| 156 |
def get_production_order_data():
|