import pandas as pd import datetime from datetime import date, timedelta import json import os # Default dates - will be overridden by optimization_config.py START_DATE = pd.Timestamp(2025, 7, 7) END_DATE = pd.Timestamp(2025, 7, 11) def set_global_dates(start_date, end_date): """Update global START_DATE and END_DATE variables""" global START_DATE, END_DATE START_DATE = pd.Timestamp(start_date) END_DATE = pd.Timestamp(end_date) print(f"Updated global dates: {START_DATE} to {END_DATE}") def read_excel(path: str) -> pd.DataFrame: return pd.read_excel(path, dtype={"id": "Int64"}) def read_demand_data( path="data/real_data_excel/converted_csv/COOIS_Planned_and_Released.csv", start_date=None, end_date=None, ) -> pd.DataFrame: df = pd.read_csv(path) df["Basic start date"] = pd.to_datetime(df["Basic start date"]) # df["Basic finish date"] = pd.to_datetime(df["Basic finish date"]) # Use provided dates or fall back to module defaults filter_start_date = start_date if start_date is not None else START_DATE filter_end_date = end_date if end_date is not None else END_DATE df = df[(df["Basic start date"] == filter_start_date)] return df def read_kit_line_match_data( path="data/real_data_excel/converted_csv/Kit_Composition_and_relation_cleaned_with_line_type.csv", ) -> pd.DataFrame: return pd.read_csv(path) def read_employee_data( path="data/real_data_excel/converted_csv/WH_Workforce_Hourly_Pay_Scale_processed.csv", ) -> pd.DataFrame: return pd.read_csv(path) def get_shift_info( path = "data/real_data_excel/converted_csv/work_shift.csv" ) -> pd.DataFrame: df = pd.read_csv(path) return df def read_shift_cost_data( path="data/real_data_excel/converted_csv/WH_Workforce_Hourly_Pay_Scale_processed.csv", ) -> pd.DataFrame: return pd.read_csv(path) def read_work_center_capacity( path="data/real_data_excel/converted_csv/Work_Centre_Capacity.csv", ) -> pd.DataFrame: return pd.read_csv(path) def read_material_master( path="data/real_data_excel/converted_csv/Material_Master_WMS.csv", ) -> pd.DataFrame: return pd.read_csv(path) def read_packaging_line_data( path="data/real_data_excel/converted_csv/Work_Centre_Capacity_processed.csv", ) -> pd.DataFrame: df = pd.read_csv(path) # Filter for packaging lines only df = df[df["line_for_packaging"] == True] return df def read_orders_data( path="data/real_data_excel/converted_csv/COOIS_Planned_and_Released.csv", start_date=None, # end_date=None, ) -> pd.DataFrame: """ COOIS_Released_Prod_Orders.csv Args: path: path to the csv file start_date: start date (pd.Timestamp or datetime) Returns: pd.DataFrame: filtered dataframe by date """ df = pd.read_csv(path) assert len(df) > 0, "No data found in the file" # convert date column to datetime df["Basic start date"] = pd.to_datetime(df["Basic start date"]) # df["Basic finish date"] = pd.to_datetime(df["Basic finish date"]) # filter by date if start_date is not None: # Filter for exact start date only df = df[df["Basic start date"] == pd.to_datetime(start_date)] else: raise ValueError("start_date is required") return df def read_package_speed_data( path="data/real_data_excel/converted_csv/Kits__Calculation.csv", ): df = pd.read_csv(path, usecols=["Kit", "Kit per day","Paid work hours per day"]) df["Kit per day"] = df["Kit per day"].astype(float) df["Paid work hours per day"] = df["Paid work hours per day"].astype(float) df["Kit"] = df["Kit"].astype(str) df['kits_per_hour'] = df['Kit per day']/df['Paid work hours per day'] speeds_per_hour = dict(zip(df["Kit"], df["kits_per_hour"])) return speeds_per_hour def read_personnel_requirement_data( path="data/real_data_excel/converted_csv/Kits__Calculation.csv", ): df = pd.read_csv(path, usecols=["Kit", "Humanizer", "UNICEF staff"]) # Clean the data by handling special whitespace characters like \xa0 (non-breaking space) def clean_and_convert_to_float(value): if pd.isna(value): return 0.0 # Convert to string and strip all kinds of whitespace (including \xa0) clean_value = str(value).strip() # If empty after stripping, return 0 if clean_value == '' or clean_value == 'nan': return 0.0 try: return float(clean_value) except ValueError as e: print(f"Warning: Could not convert '{repr(value)}' to float, setting to 0. Error: {e}") return 0.0 df["Humanizer"] = df["Humanizer"].apply(clean_and_convert_to_float) df["UNICEF staff"] = df["UNICEF staff"].apply(clean_and_convert_to_float) df["Kit"] = df["Kit"].astype(str) return df def get_production_order_data(): """ Extract production order information from hierarchy. Returns: - kit_levels: {kit_id: level} where level 0=prepack, 1=subkit, 2=master - dependencies: {kit_id: [dependency_list]} - priority_order: [kit_ids] sorted by production priority """ path = "data/hierarchy_exports/kit_hierarchy.json" with open(path, 'r', encoding='utf-8') as f: hierarchy = json.load(f) kit_levels = {} dependencies = {} # Process hierarchy to extract levels and dependencies for master_id, master_data in hierarchy.items(): # Master kits are level 2 kit_levels[master_id] = 2 dependencies[master_id] = master_data.get('dependencies', []) # Process subkits (level 1) for subkit_id, subkit_data in master_data.get('subkits', {}).items(): kit_levels[subkit_id] = 1 dependencies[subkit_id] = subkit_data.get('dependencies', []) # Process prepacks under subkits (level 0) for prepack_id in subkit_data.get('prepacks', []): if prepack_id not in kit_levels: # Avoid overwriting if already exists kit_levels[prepack_id] = 0 dependencies[prepack_id] = [] # Process direct prepacks under master (level 0) for prepack_id in master_data.get('direct_prepacks', []): if prepack_id not in kit_levels: # Avoid overwriting if already exists kit_levels[prepack_id] = 0 dependencies[prepack_id] = [] # Create priority order: prepacks first, then subkits, then masters priority_order = [] # Level 0: Prepacks (highest priority) prepacks = [kit for kit, level in kit_levels.items() if level == 0] priority_order.extend(sorted(prepacks)) # Level 1: Subkits (medium priority) subkits = [kit for kit, level in kit_levels.items() if level == 1] priority_order.extend(sorted(subkits)) # Level 2: Masters (lowest priority) masters = [kit for kit, level in kit_levels.items() if level == 2] priority_order.extend(sorted(masters)) return kit_levels, dependencies, priority_order if __name__ == "__main__": employee_data = read_employee_data() print("employee data") print(employee_data) print("line speed data",read_package_speed_data())