| | """ |
| | Demand Data Filtering Module |
| | |
| | This module filters demand data to include only products that are ready for optimization. |
| | Excludes products that: |
| | 1. Have no line type assignments (non-standalone masters) |
| | 2. Have zero staffing requirements (both Humanizer and UNICEF staff = 0) |
| | |
| | The filtered data is used by the optimization system. |
| | """ |
| |
|
| | import pandas as pd |
| | from typing import Dict, List, Tuple |
| | from src.etl import extract |
| |
|
| |
|
| | class DemandFilter: |
| | """ |
| | Filters demand data to include only products ready for optimization |
| | """ |
| | |
| | def __init__(self): |
| | self.demand_data = None |
| | self.kit_levels = None |
| | self.kit_dependencies = None |
| | self.line_assignments = None |
| | self.team_requirements = None |
| | |
| | def load_data(self, force_reload=False): |
| | """Load all necessary data for filtering""" |
| | try: |
| | |
| | if not force_reload and self.demand_data is not None: |
| | print("📊 Using cached filter data (set force_reload=True to refresh)") |
| | return True |
| | |
| | print("🔄 Loading fresh filtering data...") |
| | |
| | from src.config.optimization_config import get_date_span |
| | date_span, start_date, end_date = get_date_span() |
| | print(f"🗓️ DEMAND FILTERING DATE: Using {start_date.date() if start_date else 'None'} (same as optimization)") |
| | |
| | |
| | demand_df = extract.read_orders_data(start_date=start_date) |
| | self.demand_data = demand_df.groupby('Material Number')["Order quantity (GMEIN)"].sum().to_dict() |
| | |
| | |
| | kit_levels, dependencies, _ = extract.get_production_order_data() |
| | self.kit_levels = kit_levels |
| | self.kit_dependencies = dependencies |
| | |
| | |
| | kit_line_match = extract.read_kit_line_match_data() |
| | kit_line_match_dict = kit_line_match.set_index("kit_name")["line_type"].to_dict() |
| | |
| | |
| | from src.config.constants import LineType |
| | line_name_to_id = { |
| | "long line": LineType.LONG_LINE, |
| | "mini load": LineType.MINI_LOAD, |
| | "miniload": LineType.MINI_LOAD, |
| | "Long_line": LineType.LONG_LINE, |
| | "Mini_load": LineType.MINI_LOAD, |
| | } |
| | |
| | self.line_assignments = {} |
| | for kit, line_name in kit_line_match_dict.items(): |
| | if isinstance(line_name, str) and line_name.strip(): |
| | line_id = line_name_to_id.get(line_name.strip()) |
| | if line_id is not None: |
| | self.line_assignments[kit] = line_id |
| | elif isinstance(line_name, (int, float)) and not pd.isna(line_name): |
| | self.line_assignments[kit] = int(line_name) |
| | |
| | |
| | kits_df = extract.read_personnel_requirement_data() |
| | self.team_requirements = { |
| | 'UNICEF Fixed term': kits_df.set_index('Kit')['UNICEF staff'].to_dict(), |
| | 'Humanizer': kits_df.set_index('Kit')['Humanizer'].to_dict() |
| | } |
| | |
| | return True |
| | |
| | except Exception as e: |
| | print(f"Error loading data for filtering: {str(e)}") |
| | return False |
| | |
| | def classify_product_type(self, product_id: str) -> Tuple[str, bool]: |
| | """ |
| | Classify product type and check if it's a standalone master. |
| | |
| | Returns: |
| | Tuple[str, bool]: (product_type, is_standalone_master) |
| | """ |
| | if product_id in self.kit_levels: |
| | level = self.kit_levels[product_id] |
| | |
| | if level == 0: |
| | return "prepack", False |
| | elif level == 1: |
| | return "subkit", False |
| | elif level == 2: |
| | |
| | dependencies = self.kit_dependencies.get(product_id, []) |
| | is_standalone = len(dependencies) == 0 |
| | return "master", is_standalone |
| | else: |
| | return "unknown", False |
| | else: |
| | return "unclassified", False |
| | |
| | def is_product_ready_for_optimization(self, product_id: str) -> Tuple[bool, List[str]]: |
| | """ |
| | Check if a product is ready for optimization. |
| | |
| | Returns: |
| | Tuple[bool, List[str]]: (is_ready, exclusion_reasons) |
| | """ |
| | exclusion_reasons = [] |
| | |
| | |
| | product_type, is_standalone_master = self.classify_product_type(product_id) |
| | |
| | |
| | has_line_assignment = product_id in self.line_assignments |
| | |
| | |
| | if product_type == "master": |
| | if is_standalone_master: |
| | |
| | if not has_line_assignment: |
| | exclusion_reasons.append("Standalone master missing line assignment") |
| | elif self.line_assignments.get(product_id) != 6: |
| | exclusion_reasons.append("Standalone master should have long line assignment") |
| | else: |
| | |
| | exclusion_reasons.append("Non-standalone master (excluded from production)") |
| | else: |
| | |
| | if not has_line_assignment: |
| | exclusion_reasons.append("No line assignment") |
| | |
| | |
| | unicef_staff = self.team_requirements.get('UNICEF Fixed term', {}).get(product_id, 0) |
| | humanizer_staff = self.team_requirements.get('Humanizer', {}).get(product_id, 0) |
| | total_staff = unicef_staff + humanizer_staff |
| | |
| | if total_staff == 0: |
| | exclusion_reasons.append("Zero staffing requirements") |
| | |
| | is_ready = len(exclusion_reasons) == 0 |
| | return is_ready, exclusion_reasons |
| | |
| | def filter_products(self) -> Tuple[List[str], Dict[str, int], List[str], Dict[str, int]]: |
| | """ |
| | Filter products into included and excluded lists. |
| | |
| | Returns: |
| | Tuple containing: |
| | - included_products: List of product IDs ready for optimization |
| | - included_demand: Dict of {product_id: demand} for included products |
| | - excluded_products: List of product IDs excluded from optimization |
| | - excluded_demand: Dict of {product_id: demand} for excluded products |
| | """ |
| | if not self.load_data(): |
| | raise Exception("Failed to load data for filtering") |
| | |
| | included_products = [] |
| | included_demand = {} |
| | excluded_products = [] |
| | excluded_demand = {} |
| | excluded_details = {} |
| | |
| | print("🔍 FILTERING DEMAND DATA FOR OPTIMIZATION") |
| | |
| | for product_id, demand in self.demand_data.items(): |
| | if demand <= 0: |
| | continue |
| | |
| | is_ready, exclusion_reasons = self.is_product_ready_for_optimization(product_id) |
| | |
| | if is_ready: |
| | included_products.append(product_id) |
| | included_demand[product_id] = demand |
| | else: |
| | excluded_products.append(product_id) |
| | excluded_demand[product_id] = demand |
| | excluded_details[product_id] = exclusion_reasons |
| | |
| | |
| | included_products.sort() |
| | excluded_products.sort() |
| | |
| | |
| | total_demand = sum(self.demand_data.values()) |
| | included_total = sum(included_demand.values()) |
| | excluded_total = sum(excluded_demand.values()) |
| | |
| | print(f"✅ INCLUDED in optimization: {len(included_products)} products ({included_total:,} units)") |
| | print(f"🚫 EXCLUDED from optimization: {len(excluded_products)} products ({excluded_total:,} units)") |
| | print(f"📊 Total demand: {total_demand:,} units") |
| | |
| | |
| | if excluded_products: |
| | print(f"\n📋 EXCLUSION BREAKDOWN:") |
| | reason_counts = {} |
| | for reasons in excluded_details.values(): |
| | for reason in reasons: |
| | reason_counts[reason] = reason_counts.get(reason, 0) + 1 |
| | |
| | for reason, count in reason_counts.items(): |
| | print(f" • {reason}: {count} products") |
| | |
| | |
| | |
| | speed_data = None |
| | try: |
| | from src.config import optimization_config |
| | speed_data = optimization_config.PER_PRODUCT_SPEED |
| | except Exception as e: |
| | print(f"Warning: Could not load speed data for validation: {e}") |
| | |
| | if speed_data: |
| | included_without_speed = sum(1 for pid in included_products if pid not in speed_data) |
| | if included_without_speed > 0: |
| | print(f"\n⚠️ DATA QUALITY WARNING: {included_without_speed} included products missing speed data (will use default 106.7 units/hour)") |
| | |
| | included_without_hierarchy = sum(1 for pid in included_products if self.classify_product_type(pid)[0] == "unclassified") |
| | if included_without_hierarchy > 0: |
| | print(f"⚠️ DATA QUALITY WARNING: {included_without_hierarchy} included products missing hierarchy data") |
| | |
| | return included_products, included_demand, excluded_products, excluded_demand |
| | |
| | def get_filtered_product_list(self) -> List[str]: |
| | """Get list of products ready for optimization""" |
| | included_products, _, _, _ = self.filter_products() |
| | return included_products |
| | |
| | def get_filtered_demand_dictionary(self) -> Dict[str, int]: |
| | """Get demand dictionary for products ready for optimization""" |
| | _, included_demand, _, _ = self.filter_products() |
| | return included_demand |
| | |
| | def get_complete_product_analysis(self) -> Dict: |
| | """Get complete analysis of all products for visualization""" |
| | included_products, included_demand, excluded_products, excluded_demand = self.filter_products() |
| | |
| | all_products = {**included_demand, **excluded_demand} |
| | product_details = {} |
| | |
| | |
| | speed_data = None |
| | try: |
| | from src.config import optimization_config |
| | speed_data = optimization_config.PER_PRODUCT_SPEED |
| | except Exception as e: |
| | print(f"Warning: Could not load speed data for analysis: {e}") |
| | |
| | for product_id, demand in all_products.items(): |
| | product_type, is_standalone_master = self.classify_product_type(product_id) |
| | is_ready, exclusion_reasons = self.is_product_ready_for_optimization(product_id) |
| | |
| | |
| | unicef_staff = self.team_requirements.get('UNICEF Fixed term', {}).get(product_id, 0) |
| | humanizer_staff = self.team_requirements.get('Humanizer', {}).get(product_id, 0) |
| | |
| | |
| | line_assignment = self.line_assignments.get(product_id) |
| | |
| | |
| | has_speed_data = speed_data is not None and product_id in speed_data |
| | |
| | product_details[product_id] = { |
| | 'demand': demand, |
| | 'product_type': product_type, |
| | 'is_standalone_master': is_standalone_master, |
| | 'is_included_in_optimization': is_ready, |
| | 'exclusion_reasons': exclusion_reasons, |
| | 'unicef_staff': unicef_staff, |
| | 'humanizer_staff': humanizer_staff, |
| | 'total_staff': unicef_staff + humanizer_staff, |
| | 'line_assignment': line_assignment, |
| | 'has_line_assignment': line_assignment is not None, |
| | 'has_staffing': (unicef_staff + humanizer_staff) > 0, |
| | 'has_hierarchy': product_type != "unclassified", |
| | 'has_speed_data': has_speed_data |
| | } |
| | |
| | |
| | included_without_speed = sum(1 for pid in included_products if not product_details[pid]['has_speed_data']) |
| | included_without_hierarchy = sum(1 for pid in included_products if not product_details[pid]['has_hierarchy']) |
| | |
| | return { |
| | 'included_count': len(included_products), |
| | 'included_demand': sum(included_demand.values()), |
| | 'excluded_count': len(excluded_products), |
| | 'excluded_demand': sum(excluded_demand.values()), |
| | 'total_products': len(all_products), |
| | 'total_demand': sum(all_products.values()), |
| | 'product_details': product_details, |
| | 'standalone_masters_count': sum(1 for p in product_details.values() if p['is_standalone_master']), |
| | 'included_products': included_products, |
| | 'excluded_products': excluded_products, |
| | |
| | 'included_missing_speed_count': included_without_speed, |
| | 'included_missing_hierarchy_count': included_without_hierarchy |
| | } |
| | |
| | def get_exclusion_summary(self) -> Dict: |
| | """Get summary of excluded products for reporting""" |
| | included_products, included_demand, excluded_products, excluded_demand = self.filter_products() |
| | |
| | excluded_details = {} |
| | for product_id in excluded_products: |
| | _, reasons = self.is_product_ready_for_optimization(product_id) |
| | excluded_details[product_id] = { |
| | 'demand': excluded_demand[product_id], |
| | 'reasons': reasons |
| | } |
| | |
| | return { |
| | 'included_count': len(included_products), |
| | 'included_demand': sum(included_demand.values()), |
| | 'excluded_count': len(excluded_products), |
| | 'excluded_demand': sum(excluded_demand.values()), |
| | 'excluded_details': excluded_details |
| | } |
| |
|
| |
|
| | |
| | def get_filtered_product_list() -> List[str]: |
| | """Get list of products ready for optimization""" |
| | filter_instance = DemandFilter() |
| | return filter_instance.get_filtered_product_list() |
| |
|
| |
|
| | def get_filtered_demand_dictionary() -> Dict[str, int]: |
| | """Get demand dictionary for products ready for optimization""" |
| | filter_instance = DemandFilter() |
| | return filter_instance.get_filtered_demand_dictionary() |
| |
|
| |
|
| | def get_exclusion_summary() -> Dict: |
| | """Get summary of excluded products for reporting""" |
| | filter_instance = DemandFilter() |
| | return filter_instance.get_exclusion_summary() |
| |
|
| |
|
| | def get_complete_analysis() -> Dict: |
| | """Get complete product analysis including data quality metrics""" |
| | filter_instance = DemandFilter() |
| | return filter_instance.get_complete_product_analysis() |
| |
|
| |
|
| | |
| | _SHARED_FILTER_INSTANCE = None |
| |
|
| | def get_shared_filter_instance(): |
| | """Returns a shared singleton instance of DemandFilter.""" |
| | global _SHARED_FILTER_INSTANCE |
| | if _SHARED_FILTER_INSTANCE is None: |
| | _SHARED_FILTER_INSTANCE = DemandFilter() |
| | return _SHARED_FILTER_INSTANCE |
| |
|
| | if __name__ == "__main__": |
| | |
| | filter_instance = DemandFilter() |
| | included_products, included_demand, excluded_products, excluded_demand = filter_instance.filter_products() |
| | |
| | print(f"\n=== FILTERING TEST RESULTS ===") |
| | print(f"Included products: {included_products[:5]}..." if len(included_products) > 5 else f"Included products: {included_products}") |
| | print(f"Excluded products: {excluded_products[:5]}..." if len(excluded_products) > 5 else f"Excluded products: {excluded_products}") |
| |
|