HaLim commited on
Commit
5afa2a4
Β·
1 Parent(s): 11f91a6

remove end date

Browse files
src/config/optimization_config.py CHANGED
@@ -17,12 +17,39 @@ def get_date_span():
17
  try:
18
  # Try to get from streamlit session state (from config page)
19
  import streamlit as st
20
- if hasattr(st, 'session_state') and 'start_date' in st.session_state and 'end_date' in st.session_state:
21
- from datetime import datetime
22
  start_date = datetime.combine(st.session_state.start_date, datetime.min.time())
23
- end_date = datetime.combine(st.session_state.end_date, datetime.min.time())
24
- date_span = list(range(1, (end_date - start_date).days + 2))
25
- print(f"Using dates from config page: {start_date} to {end_date}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  print("date span", date_span)
27
  return date_span, start_date, end_date
28
  except Exception as e:
@@ -37,10 +64,12 @@ def get_date_span():
37
  #fetch date from streamlit or default value. The streamlit and default references the demand data (COOIS_Planned_and_Released.csv)
38
  DATE_SPAN, start_date, end_date = get_date_span()
39
 
 
 
40
 
41
  print(f"\nπŸ“… DATE RANGE: {start_date} to {end_date}")
42
  print(f"πŸ“ PRODUCT SOURCE: COOIS_Released_Prod_Orders.csv")
43
- PRODUCT_LIST = transformed_data.get_released_product_list(start_date, end_date)
44
  print(f"πŸ“¦ PRODUCTS FOUND: {len(PRODUCT_LIST)} products -> {PRODUCT_LIST}")
45
 
46
 
@@ -212,7 +241,7 @@ def get_demand_dictionary():
212
 
213
  print(f"Loading default demand values from data files")
214
  # Use released orders instead of planned orders for demand
215
- demand_df = extract.read_released_orders_data(start_date=start_date, end_date=end_date)
216
  demand_dictionary = demand_df.groupby('Material Number')["Order quantity (GMEIN)"].sum().to_dict()
217
  print(f"πŸ“ˆ DEMAND DATA: {len(demand_dictionary)} products with total demand {sum(demand_dictionary.values())}")
218
  return demand_dictionary
@@ -271,9 +300,11 @@ def get_team_requirements(PRODUCT_LIST):
271
  print(f"Using default value for team requirements, extracting from CSV: {e}")
272
 
273
  # Read the kits calculation data directly
274
- kits_path = "data/real_data_excel/converted_csv/Kits__Calculation.csv"
275
- kits_df = pd.read_csv(kits_path)
276
-
 
 
277
  # Initialize the team requirements dictionary
278
  team_req_dict = {
279
  "UNICEF Fixed term": {},
@@ -282,17 +313,18 @@ def get_team_requirements(PRODUCT_LIST):
282
 
283
  # Process each product in the product list
284
  for product in PRODUCT_LIST:
 
285
  print(f"Processing team requirements for product: {product}")
286
  product_data = kits_df[kits_df['Kit'] == product]
287
-
288
  if not product_data.empty:
289
  # Extract Humanizer and UNICEF staff requirements
290
  humanizer_req = product_data["Humanizer"].iloc[0]
291
  unicef_req = product_data["UNICEF staff"].iloc[0]
292
 
293
- # Convert to int, handle NaN/empty values
294
- team_req_dict["Humanizer"][product] = int(humanizer_req) if pd.notna(humanizer_req) else 0
295
- team_req_dict["UNICEF Fixed term"][product] = int(unicef_req) if pd.notna(unicef_req) else 0
296
  else:
297
  print(f"Warning: Product {product} not found in Kits Calculation data, setting requirements to 0")
298
 
 
17
  try:
18
  # Try to get from streamlit session state (from config page)
19
  import streamlit as st
20
+ if hasattr(st, 'session_state') and 'start_date' in st.session_state:
21
+ from datetime import datetime, timedelta
22
  start_date = datetime.combine(st.session_state.start_date, datetime.min.time())
23
+
24
+ # Check if we have calculated planning_days, otherwise determine from data
25
+ if 'planning_days' in st.session_state and st.session_state.planning_days:
26
+ planning_days = st.session_state.planning_days
27
+ end_date = start_date + timedelta(days=planning_days - 1)
28
+ else:
29
+ # Determine date range from actual demand data for the exact start date
30
+ try:
31
+ demand_data = extract.read_orders_data(start_date=start_date)
32
+ if not demand_data.empty:
33
+ import pandas as pd
34
+ # Get unique finish dates for this exact start date
35
+ finish_dates = pd.to_datetime(demand_data["Basic finish date"]).dt.date.unique()
36
+ finish_dates = sorted(finish_dates)
37
+ if finish_dates:
38
+ end_date = datetime.combine(max(finish_dates), datetime.min.time())
39
+ planning_days = (end_date - start_date).days + 1
40
+ else:
41
+ end_date = start_date
42
+ planning_days = 1
43
+ else:
44
+ end_date = start_date + timedelta(days=4) # Default 5 days
45
+ planning_days = 5
46
+ except Exception as e:
47
+ print(f"Could not determine date range from data: {e}")
48
+ end_date = start_date + timedelta(days=4) # Default 5 days
49
+ planning_days = 5
50
+
51
+ date_span = list(range(1, planning_days + 1))
52
+ print(f"Using dates from config page: {start_date} to {end_date} ({planning_days} days)")
53
  print("date span", date_span)
54
  return date_span, start_date, end_date
55
  except Exception as e:
 
64
  #fetch date from streamlit or default value. The streamlit and default references the demand data (COOIS_Planned_and_Released.csv)
65
  DATE_SPAN, start_date, end_date = get_date_span()
66
 
67
+ # Update global dates in extract module BEFORE any data loading
68
+ extract.set_global_dates(start_date, end_date)
69
 
70
  print(f"\nπŸ“… DATE RANGE: {start_date} to {end_date}")
71
  print(f"πŸ“ PRODUCT SOURCE: COOIS_Released_Prod_Orders.csv")
72
+ PRODUCT_LIST = transformed_data.get_released_product_list(start_date)
73
  print(f"πŸ“¦ PRODUCTS FOUND: {len(PRODUCT_LIST)} products -> {PRODUCT_LIST}")
74
 
75
 
 
241
 
242
  print(f"Loading default demand values from data files")
243
  # Use released orders instead of planned orders for demand
244
+ demand_df = extract.read_orders_data(start_date=start_date)
245
  demand_dictionary = demand_df.groupby('Material Number')["Order quantity (GMEIN)"].sum().to_dict()
246
  print(f"πŸ“ˆ DEMAND DATA: {len(demand_dictionary)} products with total demand {sum(demand_dictionary.values())}")
247
  return demand_dictionary
 
300
  print(f"Using default value for team requirements, extracting from CSV: {e}")
301
 
302
  # Read the kits calculation data directly
303
+ kits_df = extract.read_personnel_requirement_data()
304
+ # kits_path = "data/real_data_excel/converted_csv/Kits__Calculation.csv"
305
+ # kits_df = pd.read_csv(kits_path)
306
+ print("kits_df columns:", kits_df.columns.tolist())
307
+ print("kits_df head:", kits_df.head())
308
  # Initialize the team requirements dictionary
309
  team_req_dict = {
310
  "UNICEF Fixed term": {},
 
313
 
314
  # Process each product in the product list
315
  for product in PRODUCT_LIST:
316
+ print("product",product)
317
  print(f"Processing team requirements for product: {product}")
318
  product_data = kits_df[kits_df['Kit'] == product]
319
+ print("product_data",product_data)
320
  if not product_data.empty:
321
  # Extract Humanizer and UNICEF staff requirements
322
  humanizer_req = product_data["Humanizer"].iloc[0]
323
  unicef_req = product_data["UNICEF staff"].iloc[0]
324
 
325
+ # Convert to int (data is already cleaned in extract function)
326
+ team_req_dict["Humanizer"][product] = int(humanizer_req)
327
+ team_req_dict["UNICEF Fixed term"][product] = int(unicef_req)
328
  else:
329
  print(f"Warning: Product {product} not found in Kits Calculation data, setting requirements to 0")
330
 
src/etl/extract.py CHANGED
@@ -3,9 +3,17 @@ import datetime
3
  from datetime import date, timedelta
4
  import json
5
  import os
 
6
  START_DATE = pd.Timestamp(2025, 7, 7)
7
  END_DATE = pd.Timestamp(2025, 7, 11)
8
 
 
 
 
 
 
 
 
9
 
10
  def read_excel(path: str) -> pd.DataFrame:
11
  return pd.read_excel(path, dtype={"id": "Int64"})
@@ -13,13 +21,18 @@ def read_excel(path: str) -> pd.DataFrame:
13
 
14
  def read_demand_data(
15
  path="data/real_data_excel/converted_csv/COOIS_Planned_and_Released.csv",
 
 
16
  ) -> pd.DataFrame:
17
  df = pd.read_csv(path)
18
  df["Basic start date"] = pd.to_datetime(df["Basic start date"])
19
- df["Basic finish date"] = pd.to_datetime(df["Basic finish date"])
20
- df = df[
21
- (df["Basic start date"] >= START_DATE) & (df["Basic finish date"] <= END_DATE)
22
- ]
 
 
 
23
 
24
  return df
25
 
@@ -67,10 +80,10 @@ def read_packaging_line_data(
67
  return df
68
 
69
 
70
- def read_released_orders_data(
71
- path="data/real_data_excel/converted_csv/COOIS_Released_Prod_Orders.csv",
72
  start_date=None,
73
- end_date=None,
74
  ) -> pd.DataFrame:
75
  """
76
  COOIS_Released_Prod_Orders.csv
@@ -78,7 +91,7 @@ def read_released_orders_data(
78
  Args:
79
  path: path to the csv file
80
  start_date: start date (pd.Timestamp or datetime)
81
- end_date: end date (pd.Timestamp or datetime)
82
 
83
  Returns:
84
  pd.DataFrame: filtered dataframe by date
@@ -87,15 +100,14 @@ def read_released_orders_data(
87
  assert len(df) > 0, "No data found in the file"
88
  # convert date column to datetime
89
  df["Basic start date"] = pd.to_datetime(df["Basic start date"])
90
- df["Basic finish date"] = pd.to_datetime(df["Basic finish date"])
91
 
92
 
93
  # filter by date
94
- if start_date is not None and end_date is not None:
95
- # filter by date
96
- df = df[
97
- (df["Basic start date"] == pd.to_datetime(start_date)) & (df["Basic finish date"] == pd.to_datetime(end_date))
98
- ]
99
 
100
  return df
101
 
@@ -104,10 +116,41 @@ def read_package_speed_data(
104
  path="data/real_data_excel/converted_csv/Kits__Calculation.csv",
105
  ):
106
  df = pd.read_csv(path, usecols=["Kit", "Kit per day","Paid work hours per day"])
 
 
 
107
  df['kits_per_hour'] = df['Kit per day']/df['Paid work hours per day']
108
  speeds_per_hour = dict(zip(df["Kit"], df["kits_per_hour"]))
109
  return speeds_per_hour
110
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
111
 
112
 
113
  def get_production_order_data():
 
3
  from datetime import date, timedelta
4
  import json
5
  import os
6
+ # Default dates - will be overridden by optimization_config.py
7
  START_DATE = pd.Timestamp(2025, 7, 7)
8
  END_DATE = pd.Timestamp(2025, 7, 11)
9
 
10
+ def set_global_dates(start_date, end_date):
11
+ """Update global START_DATE and END_DATE variables"""
12
+ global START_DATE, END_DATE
13
+ START_DATE = pd.Timestamp(start_date)
14
+ END_DATE = pd.Timestamp(end_date)
15
+ print(f"Updated global dates: {START_DATE} to {END_DATE}")
16
+
17
 
18
  def read_excel(path: str) -> pd.DataFrame:
19
  return pd.read_excel(path, dtype={"id": "Int64"})
 
21
 
22
  def read_demand_data(
23
  path="data/real_data_excel/converted_csv/COOIS_Planned_and_Released.csv",
24
+ start_date=None,
25
+ end_date=None,
26
  ) -> pd.DataFrame:
27
  df = pd.read_csv(path)
28
  df["Basic start date"] = pd.to_datetime(df["Basic start date"])
29
+ # df["Basic finish date"] = pd.to_datetime(df["Basic finish date"])
30
+
31
+ # Use provided dates or fall back to module defaults
32
+ filter_start_date = start_date if start_date is not None else START_DATE
33
+ filter_end_date = end_date if end_date is not None else END_DATE
34
+
35
+ df = df[(df["Basic start date"] == filter_start_date)]
36
 
37
  return df
38
 
 
80
  return df
81
 
82
 
83
+ def read_orders_data(
84
+ path="data/real_data_excel/converted_csv/COOIS_Planned_and_Released.csv",
85
  start_date=None,
86
+ # end_date=None,
87
  ) -> pd.DataFrame:
88
  """
89
  COOIS_Released_Prod_Orders.csv
 
91
  Args:
92
  path: path to the csv file
93
  start_date: start date (pd.Timestamp or datetime)
94
+
95
 
96
  Returns:
97
  pd.DataFrame: filtered dataframe by date
 
100
  assert len(df) > 0, "No data found in the file"
101
  # convert date column to datetime
102
  df["Basic start date"] = pd.to_datetime(df["Basic start date"])
103
+ # df["Basic finish date"] = pd.to_datetime(df["Basic finish date"])
104
 
105
 
106
  # filter by date
107
+ if start_date is not None: # Filter for exact start date only
108
+ df = df[df["Basic start date"] == pd.to_datetime(start_date)]
109
+ else:
110
+ raise ValueError("start_date is required")
 
111
 
112
  return df
113
 
 
116
  path="data/real_data_excel/converted_csv/Kits__Calculation.csv",
117
  ):
118
  df = pd.read_csv(path, usecols=["Kit", "Kit per day","Paid work hours per day"])
119
+ df["Kit per day"] = df["Kit per day"].astype(float)
120
+ df["Paid work hours per day"] = df["Paid work hours per day"].astype(float)
121
+ df["Kit"] = df["Kit"].astype(str)
122
  df['kits_per_hour'] = df['Kit per day']/df['Paid work hours per day']
123
  speeds_per_hour = dict(zip(df["Kit"], df["kits_per_hour"]))
124
  return speeds_per_hour
125
 
126
+ def read_personnel_requirement_data(
127
+ path="data/real_data_excel/converted_csv/Kits__Calculation.csv",
128
+ ):
129
+ df = pd.read_csv(path, usecols=["Kit", "Humanizer", "UNICEF staff"])
130
+
131
+ # Clean the data by handling special whitespace characters like \xa0 (non-breaking space)
132
+ def clean_and_convert_to_float(value):
133
+ if pd.isna(value):
134
+ return 0.0
135
+
136
+ # Convert to string and strip all kinds of whitespace (including \xa0)
137
+ clean_value = str(value).strip()
138
+
139
+ # If empty after stripping, return 0
140
+ if clean_value == '' or clean_value == 'nan':
141
+ return 0.0
142
+
143
+ try:
144
+ return float(clean_value)
145
+ except ValueError as e:
146
+ print(f"Warning: Could not convert '{repr(value)}' to float, setting to 0. Error: {e}")
147
+ return 0.0
148
+
149
+ df["Humanizer"] = df["Humanizer"].apply(clean_and_convert_to_float)
150
+ df["UNICEF staff"] = df["UNICEF staff"].apply(clean_and_convert_to_float)
151
+ df["Kit"] = df["Kit"].astype(str)
152
+
153
+ return df
154
 
155
 
156
  def get_production_order_data():