""" Demand Data Filtering Module This module filters demand data to include only products that are ready for optimization. Excludes products that: 1. Have no line type assignments (non-standalone masters) 2. Have zero staffing requirements (both Humanizer and UNICEF staff = 0) The filtered data is used by the optimization system. """ import pandas as pd from typing import Dict, List, Tuple from src.etl import extract class DemandFilter: """ Filters demand data to include only products ready for optimization """ def __init__(self): self.demand_data = None self.kit_levels = None self.kit_dependencies = None self.line_assignments = None self.team_requirements = None def load_data(self, force_reload=False): """Load all necessary data for filtering""" try: # Skip loading if data already exists and not forcing reload if not force_reload and self.demand_data is not None: print("šŸ“Š Using cached filter data (set force_reload=True to refresh)") return True print("šŸ”„ Loading fresh filtering data...") # Get start date for demand data from optimization config from src.config.optimization_config import get_date_span date_span, start_date, end_date = get_date_span() print(f"šŸ—“ļø DEMAND FILTERING DATE: Using {start_date.date() if start_date else 'None'} (same as optimization)") # Load demand data directly from extract demand_df = extract.read_orders_data(start_date=start_date) self.demand_data = demand_df.groupby('Material Number')["Order quantity (GMEIN)"].sum().to_dict() # Load kit hierarchy data kit_levels, dependencies, _ = extract.get_production_order_data() self.kit_levels = kit_levels self.kit_dependencies = dependencies # Load line assignments from kit line match data kit_line_match = extract.read_kit_line_match_data() kit_line_match_dict = kit_line_match.set_index("kit_name")["line_type"].to_dict() # Convert string line names to numeric IDs from src.config.constants import LineType line_name_to_id = { "long line": LineType.LONG_LINE, "mini load": LineType.MINI_LOAD, "miniload": LineType.MINI_LOAD, "Long_line": LineType.LONG_LINE, "Mini_load": LineType.MINI_LOAD, } self.line_assignments = {} for kit, line_name in kit_line_match_dict.items(): if isinstance(line_name, str) and line_name.strip(): line_id = line_name_to_id.get(line_name.strip()) if line_id is not None: self.line_assignments[kit] = line_id elif isinstance(line_name, (int, float)) and not pd.isna(line_name): self.line_assignments[kit] = int(line_name) # Load team requirements from Kits Calculation data kits_df = extract.read_personnel_requirement_data() self.team_requirements = { 'UNICEF Fixed term': kits_df.set_index('Kit')['UNICEF staff'].to_dict(), 'Humanizer': kits_df.set_index('Kit')['Humanizer'].to_dict() } return True except Exception as e: print(f"Error loading data for filtering: {str(e)}") return False def classify_product_type(self, product_id: str) -> Tuple[str, bool]: """ Classify product type and check if it's a standalone master. Returns: Tuple[str, bool]: (product_type, is_standalone_master) """ if product_id in self.kit_levels: level = self.kit_levels[product_id] if level == 0: return "prepack", False elif level == 1: return "subkit", False elif level == 2: # Check if this master is standalone (no subkits/prepacks) dependencies = self.kit_dependencies.get(product_id, []) is_standalone = len(dependencies) == 0 return "master", is_standalone else: return "unknown", False else: return "unclassified", False def is_product_ready_for_optimization(self, product_id: str) -> Tuple[bool, List[str]]: """ Check if a product is ready for optimization. Returns: Tuple[bool, List[str]]: (is_ready, exclusion_reasons) """ exclusion_reasons = [] # Classify product type product_type, is_standalone_master = self.classify_product_type(product_id) # Check line assignment logic has_line_assignment = product_id in self.line_assignments # For masters: standalone should have line assignment, non-standalone should NOT if product_type == "master": if is_standalone_master: # Standalone masters should have "long line" assignment if not has_line_assignment: exclusion_reasons.append("Standalone master missing line assignment") elif self.line_assignments.get(product_id) != 6: # 6 = LONG_LINE exclusion_reasons.append("Standalone master should have long line assignment") else: # Non-standalone masters should NOT have line assignment (excluded from production) exclusion_reasons.append("Non-standalone master (excluded from production)") else: # For subkits and prepacks, check normal line assignment if not has_line_assignment: exclusion_reasons.append("No line assignment") # Check staffing requirements unicef_staff = self.team_requirements.get('UNICEF Fixed term', {}).get(product_id, 0) humanizer_staff = self.team_requirements.get('Humanizer', {}).get(product_id, 0) total_staff = unicef_staff + humanizer_staff if total_staff == 0: exclusion_reasons.append("Zero staffing requirements") is_ready = len(exclusion_reasons) == 0 return is_ready, exclusion_reasons def filter_products(self) -> Tuple[List[str], Dict[str, int], List[str], Dict[str, int]]: """ Filter products into included and excluded lists. Returns: Tuple containing: - included_products: List of product IDs ready for optimization - included_demand: Dict of {product_id: demand} for included products - excluded_products: List of product IDs excluded from optimization - excluded_demand: Dict of {product_id: demand} for excluded products """ if not self.load_data(): raise Exception("Failed to load data for filtering") included_products = [] included_demand = {} excluded_products = [] excluded_demand = {} excluded_details = {} print("šŸ” FILTERING DEMAND DATA FOR OPTIMIZATION") for product_id, demand in self.demand_data.items(): if demand <= 0: # Skip products with no demand continue is_ready, exclusion_reasons = self.is_product_ready_for_optimization(product_id) if is_ready: included_products.append(product_id) included_demand[product_id] = demand else: excluded_products.append(product_id) excluded_demand[product_id] = demand excluded_details[product_id] = exclusion_reasons # Sort products for consistent output included_products.sort() excluded_products.sort() # Print summary total_demand = sum(self.demand_data.values()) included_total = sum(included_demand.values()) excluded_total = sum(excluded_demand.values()) print(f"āœ… INCLUDED in optimization: {len(included_products)} products ({included_total:,} units)") print(f"🚫 EXCLUDED from optimization: {len(excluded_products)} products ({excluded_total:,} units)") print(f"šŸ“Š Total demand: {total_demand:,} units") # Print exclusion breakdown if excluded_products: print(f"\nšŸ“‹ EXCLUSION BREAKDOWN:") reason_counts = {} for reasons in excluded_details.values(): for reason in reasons: reason_counts[reason] = reason_counts.get(reason, 0) + 1 for reason, count in reason_counts.items(): print(f" • {reason}: {count} products") # Print data quality warnings for included products (without recursion) # Load speed data for validation speed_data = None try: from src.config import optimization_config speed_data = optimization_config.PER_PRODUCT_SPEED except Exception as e: print(f"Warning: Could not load speed data for validation: {e}") if speed_data: included_without_speed = sum(1 for pid in included_products if pid not in speed_data) if included_without_speed > 0: print(f"\nāš ļø DATA QUALITY WARNING: {included_without_speed} included products missing speed data (will use default 106.7 units/hour)") included_without_hierarchy = sum(1 for pid in included_products if self.classify_product_type(pid)[0] == "unclassified") if included_without_hierarchy > 0: print(f"āš ļø DATA QUALITY WARNING: {included_without_hierarchy} included products missing hierarchy data") return included_products, included_demand, excluded_products, excluded_demand def get_filtered_product_list(self) -> List[str]: """Get list of products ready for optimization""" included_products, _, _, _ = self.filter_products() return included_products def get_filtered_demand_dictionary(self) -> Dict[str, int]: """Get demand dictionary for products ready for optimization""" _, included_demand, _, _ = self.filter_products() return included_demand def get_complete_product_analysis(self) -> Dict: """Get complete analysis of all products for visualization""" included_products, included_demand, excluded_products, excluded_demand = self.filter_products() all_products = {**included_demand, **excluded_demand} product_details = {} # Load speed data for additional validation speed_data = None try: from src.config import optimization_config speed_data = optimization_config.PER_PRODUCT_SPEED except Exception as e: print(f"Warning: Could not load speed data for analysis: {e}") for product_id, demand in all_products.items(): product_type, is_standalone_master = self.classify_product_type(product_id) is_ready, exclusion_reasons = self.is_product_ready_for_optimization(product_id) # Get staffing info unicef_staff = self.team_requirements.get('UNICEF Fixed term', {}).get(product_id, 0) humanizer_staff = self.team_requirements.get('Humanizer', {}).get(product_id, 0) # Get line assignment line_assignment = self.line_assignments.get(product_id) # Get production speed info has_speed_data = speed_data is not None and product_id in speed_data product_details[product_id] = { 'demand': demand, 'product_type': product_type, 'is_standalone_master': is_standalone_master, 'is_included_in_optimization': is_ready, 'exclusion_reasons': exclusion_reasons, 'unicef_staff': unicef_staff, 'humanizer_staff': humanizer_staff, 'total_staff': unicef_staff + humanizer_staff, 'line_assignment': line_assignment, 'has_line_assignment': line_assignment is not None, 'has_staffing': (unicef_staff + humanizer_staff) > 0, 'has_hierarchy': product_type != "unclassified", 'has_speed_data': has_speed_data } # Calculate data quality statistics for included products included_without_speed = sum(1 for pid in included_products if not product_details[pid]['has_speed_data']) included_without_hierarchy = sum(1 for pid in included_products if not product_details[pid]['has_hierarchy']) return { 'included_count': len(included_products), 'included_demand': sum(included_demand.values()), 'excluded_count': len(excluded_products), 'excluded_demand': sum(excluded_demand.values()), 'total_products': len(all_products), 'total_demand': sum(all_products.values()), 'product_details': product_details, 'standalone_masters_count': sum(1 for p in product_details.values() if p['is_standalone_master']), 'included_products': included_products, 'excluded_products': excluded_products, # Data quality metrics for included products 'included_missing_speed_count': included_without_speed, 'included_missing_hierarchy_count': included_without_hierarchy } def get_exclusion_summary(self) -> Dict: """Get summary of excluded products for reporting""" included_products, included_demand, excluded_products, excluded_demand = self.filter_products() excluded_details = {} for product_id in excluded_products: _, reasons = self.is_product_ready_for_optimization(product_id) excluded_details[product_id] = { 'demand': excluded_demand[product_id], 'reasons': reasons } return { 'included_count': len(included_products), 'included_demand': sum(included_demand.values()), 'excluded_count': len(excluded_products), 'excluded_demand': sum(excluded_demand.values()), 'excluded_details': excluded_details } # Convenience functions for easy import def get_filtered_product_list() -> List[str]: """Get list of products ready for optimization""" filter_instance = DemandFilter() return filter_instance.get_filtered_product_list() def get_filtered_demand_dictionary() -> Dict[str, int]: """Get demand dictionary for products ready for optimization""" filter_instance = DemandFilter() return filter_instance.get_filtered_demand_dictionary() def get_exclusion_summary() -> Dict: """Get summary of excluded products for reporting""" filter_instance = DemandFilter() return filter_instance.get_exclusion_summary() def get_complete_analysis() -> Dict: """Get complete product analysis including data quality metrics""" filter_instance = DemandFilter() return filter_instance.get_complete_product_analysis() # Singleton instance for consistency across modules _SHARED_FILTER_INSTANCE = None def get_shared_filter_instance(): """Returns a shared singleton instance of DemandFilter.""" global _SHARED_FILTER_INSTANCE if _SHARED_FILTER_INSTANCE is None: _SHARED_FILTER_INSTANCE = DemandFilter() return _SHARED_FILTER_INSTANCE if __name__ == "__main__": # Test the filtering filter_instance = DemandFilter() included_products, included_demand, excluded_products, excluded_demand = filter_instance.filter_products() print(f"\n=== FILTERING TEST RESULTS ===") print(f"Included products: {included_products[:5]}..." if len(included_products) > 5 else f"Included products: {included_products}") print(f"Excluded products: {excluded_products[:5]}..." if len(excluded_products) > 5 else f"Excluded products: {excluded_products}")