Spaces:

OOI-FrontierTech
/

supply-roster-optimization

Sleeping

File size: 16,525 Bytes

"""
Demand Data Filtering Module

This module filters demand data to include only products that are ready for optimization.
Excludes products that:
1. Have no line type assignments (non-standalone masters)
2. Have zero staffing requirements (both Humanizer and UNICEF staff = 0)

The filtered data is used by the optimization system.
"""

import pandas as pd
from typing import Dict, List, Tuple
from src.etl import extract


class DemandFilter:
    """
    Filters demand data to include only products ready for optimization
    """
    
    def __init__(self):
        self.demand_data = None
        self.kit_levels = None
        self.kit_dependencies = None
        self.line_assignments = None
        self.team_requirements = None
        
    def load_data(self, force_reload=False):
        """Load all necessary data for filtering"""
        try:
            # Skip loading if data already exists and not forcing reload
            if not force_reload and self.demand_data is not None:
                print("📊 Using cached filter data (set force_reload=True to refresh)")
                return True
                
            print("🔄 Loading fresh filtering data...")
            # Get start date for demand data from optimization config
            from src.config.optimization_config import get_date_span
            date_span, start_date, end_date = get_date_span()
            print(f"🗓️ DEMAND FILTERING DATE: Using {start_date.date() if start_date else 'None'} (same as optimization)")
            
            # Load demand data directly from extract
            demand_df = extract.read_orders_data(start_date=start_date)
            self.demand_data = demand_df.groupby('Material Number')["Order quantity (GMEIN)"].sum().to_dict()
            
            # Load kit hierarchy data
            kit_levels, dependencies, _ = extract.get_production_order_data()
            self.kit_levels = kit_levels
            self.kit_dependencies = dependencies
            
            # Load line assignments from kit line match data
            kit_line_match = extract.read_kit_line_match_data()
            kit_line_match_dict = kit_line_match.set_index("kit_name")["line_type"].to_dict()
            
            # Convert string line names to numeric IDs
            from src.config.constants import LineType
            line_name_to_id = {
                "long line": LineType.LONG_LINE,
                "mini load": LineType.MINI_LOAD,
                "miniload": LineType.MINI_LOAD,
                "Long_line": LineType.LONG_LINE,
                "Mini_load": LineType.MINI_LOAD,
            }
            
            self.line_assignments = {}
            for kit, line_name in kit_line_match_dict.items():
                if isinstance(line_name, str) and line_name.strip():
                    line_id = line_name_to_id.get(line_name.strip())
                    if line_id is not None:
                        self.line_assignments[kit] = line_id
                elif isinstance(line_name, (int, float)) and not pd.isna(line_name):
                    self.line_assignments[kit] = int(line_name)
            
            # Load team requirements from Kits Calculation data
            kits_df = extract.read_personnel_requirement_data()
            self.team_requirements = {
                'UNICEF Fixed term': kits_df.set_index('Kit')['UNICEF staff'].to_dict(),
                'Humanizer': kits_df.set_index('Kit')['Humanizer'].to_dict()
            }
            
            return True
            
        except Exception as e:
            print(f"Error loading data for filtering: {str(e)}")
            return False
    
    def classify_product_type(self, product_id: str) -> Tuple[str, bool]:
        """
        Classify product type and check if it's a standalone master.
        
        Returns:
            Tuple[str, bool]: (product_type, is_standalone_master)
        """
        if product_id in self.kit_levels:
            level = self.kit_levels[product_id]
            
            if level == 0:
                return "prepack", False
            elif level == 1:
                return "subkit", False
            elif level == 2:
                # Check if this master is standalone (no subkits/prepacks)
                dependencies = self.kit_dependencies.get(product_id, [])
                is_standalone = len(dependencies) == 0
                return "master", is_standalone
            else:
                return "unknown", False
        else:
            return "unclassified", False
    
    def is_product_ready_for_optimization(self, product_id: str) -> Tuple[bool, List[str]]:
        """
        Check if a product is ready for optimization.
        
        Returns:
            Tuple[bool, List[str]]: (is_ready, exclusion_reasons)
        """
        exclusion_reasons = []
        
        # Classify product type
        product_type, is_standalone_master = self.classify_product_type(product_id)
        
        # Check line assignment logic
        has_line_assignment = product_id in self.line_assignments
        
        # For masters: standalone should have line assignment, non-standalone should NOT
        if product_type == "master":
            if is_standalone_master:
                # Standalone masters should have "long line" assignment
                if not has_line_assignment:
                    exclusion_reasons.append("Standalone master missing line assignment")
                elif self.line_assignments.get(product_id) != 6:  # 6 = LONG_LINE
                    exclusion_reasons.append("Standalone master should have long line assignment")
            else:
                # Non-standalone masters should NOT have line assignment (excluded from production)
                exclusion_reasons.append("Non-standalone master (excluded from production)")
        else:
            # For subkits and prepacks, check normal line assignment
            if not has_line_assignment:
                exclusion_reasons.append("No line assignment")
        
        # Check staffing requirements
        unicef_staff = self.team_requirements.get('UNICEF Fixed term', {}).get(product_id, 0)
        humanizer_staff = self.team_requirements.get('Humanizer', {}).get(product_id, 0)
        total_staff = unicef_staff + humanizer_staff
        
        if total_staff == 0:
            exclusion_reasons.append("Zero staffing requirements")
        
        is_ready = len(exclusion_reasons) == 0
        return is_ready, exclusion_reasons
    
    def filter_products(self) -> Tuple[List[str], Dict[str, int], List[str], Dict[str, int]]:
        """
        Filter products into included and excluded lists.
        
        Returns:
            Tuple containing:
            - included_products: List of product IDs ready for optimization
            - included_demand: Dict of {product_id: demand} for included products
            - excluded_products: List of product IDs excluded from optimization
            - excluded_demand: Dict of {product_id: demand} for excluded products
        """
        if not self.load_data():
            raise Exception("Failed to load data for filtering")
        
        included_products = []
        included_demand = {}
        excluded_products = []
        excluded_demand = {}
        excluded_details = {}
        
        print("🔍 FILTERING DEMAND DATA FOR OPTIMIZATION")
        
        for product_id, demand in self.demand_data.items():
            if demand <= 0:  # Skip products with no demand
                continue
                
            is_ready, exclusion_reasons = self.is_product_ready_for_optimization(product_id)
            
            if is_ready:
                included_products.append(product_id)
                included_demand[product_id] = demand
            else:
                excluded_products.append(product_id)
                excluded_demand[product_id] = demand
                excluded_details[product_id] = exclusion_reasons
        
        # Sort products for consistent output
        included_products.sort()
        excluded_products.sort()
        
        # Print summary
        total_demand = sum(self.demand_data.values())
        included_total = sum(included_demand.values())
        excluded_total = sum(excluded_demand.values())
        
        print(f"✅ INCLUDED in optimization: {len(included_products)} products ({included_total:,} units)")
        print(f"🚫 EXCLUDED from optimization: {len(excluded_products)} products ({excluded_total:,} units)")
        print(f"📊 Total demand: {total_demand:,} units")
        
        # Print exclusion breakdown
        if excluded_products:
            print(f"\n📋 EXCLUSION BREAKDOWN:")
            reason_counts = {}
            for reasons in excluded_details.values():
                for reason in reasons:
                    reason_counts[reason] = reason_counts.get(reason, 0) + 1
            
            for reason, count in reason_counts.items():
                print(f"  • {reason}: {count} products")
        
        # Print data quality warnings for included products (without recursion)
        # Load speed data for validation
        speed_data = None
        try:
            from src.config import optimization_config
            speed_data = optimization_config.PER_PRODUCT_SPEED
        except Exception as e:
            print(f"Warning: Could not load speed data for validation: {e}")
        
        if speed_data:
            included_without_speed = sum(1 for pid in included_products if pid not in speed_data)
            if included_without_speed > 0:
                print(f"\n⚠️  DATA QUALITY WARNING: {included_without_speed} included products missing speed data (will use default 106.7 units/hour)")
        
        included_without_hierarchy = sum(1 for pid in included_products if self.classify_product_type(pid)[0] == "unclassified")
        if included_without_hierarchy > 0:
            print(f"⚠️  DATA QUALITY WARNING: {included_without_hierarchy} included products missing hierarchy data")
        
        return included_products, included_demand, excluded_products, excluded_demand
    
    def get_filtered_product_list(self) -> List[str]:
        """Get list of products ready for optimization"""
        included_products, _, _, _ = self.filter_products()
        return included_products
    
    def get_filtered_demand_dictionary(self) -> Dict[str, int]:
        """Get demand dictionary for products ready for optimization"""
        _, included_demand, _, _ = self.filter_products()
        return included_demand
    
    def get_complete_product_analysis(self) -> Dict:
        """Get complete analysis of all products for visualization"""
        included_products, included_demand, excluded_products, excluded_demand = self.filter_products()
        
        all_products = {**included_demand, **excluded_demand}
        product_details = {}
        
        # Load speed data for additional validation
        speed_data = None
        try:
            from src.config import optimization_config
            speed_data = optimization_config.PER_PRODUCT_SPEED
        except Exception as e:
            print(f"Warning: Could not load speed data for analysis: {e}")
        
        for product_id, demand in all_products.items():
            product_type, is_standalone_master = self.classify_product_type(product_id)
            is_ready, exclusion_reasons = self.is_product_ready_for_optimization(product_id)
            
            # Get staffing info
            unicef_staff = self.team_requirements.get('UNICEF Fixed term', {}).get(product_id, 0)
            humanizer_staff = self.team_requirements.get('Humanizer', {}).get(product_id, 0)
            
            # Get line assignment
            line_assignment = self.line_assignments.get(product_id)
            
            # Get production speed info
            has_speed_data = speed_data is not None and product_id in speed_data
            
            product_details[product_id] = {
                'demand': demand,
                'product_type': product_type,
                'is_standalone_master': is_standalone_master,
                'is_included_in_optimization': is_ready,
                'exclusion_reasons': exclusion_reasons,
                'unicef_staff': unicef_staff,
                'humanizer_staff': humanizer_staff,
                'total_staff': unicef_staff + humanizer_staff,
                'line_assignment': line_assignment,
                'has_line_assignment': line_assignment is not None,
                'has_staffing': (unicef_staff + humanizer_staff) > 0,
                'has_hierarchy': product_type != "unclassified",
                'has_speed_data': has_speed_data
            }
        
        # Calculate data quality statistics for included products
        included_without_speed = sum(1 for pid in included_products if not product_details[pid]['has_speed_data'])
        included_without_hierarchy = sum(1 for pid in included_products if not product_details[pid]['has_hierarchy'])
        
        return {
            'included_count': len(included_products),
            'included_demand': sum(included_demand.values()),
            'excluded_count': len(excluded_products),
            'excluded_demand': sum(excluded_demand.values()),
            'total_products': len(all_products),
            'total_demand': sum(all_products.values()),
            'product_details': product_details,
            'standalone_masters_count': sum(1 for p in product_details.values() if p['is_standalone_master']),
            'included_products': included_products,
            'excluded_products': excluded_products,
            # Data quality metrics for included products
            'included_missing_speed_count': included_without_speed,
            'included_missing_hierarchy_count': included_without_hierarchy
        }
    
    def get_exclusion_summary(self) -> Dict:
        """Get summary of excluded products for reporting"""
        included_products, included_demand, excluded_products, excluded_demand = self.filter_products()
        
        excluded_details = {}
        for product_id in excluded_products:
            _, reasons = self.is_product_ready_for_optimization(product_id)
            excluded_details[product_id] = {
                'demand': excluded_demand[product_id],
                'reasons': reasons
            }
        
        return {
            'included_count': len(included_products),
            'included_demand': sum(included_demand.values()),
            'excluded_count': len(excluded_products),
            'excluded_demand': sum(excluded_demand.values()),
            'excluded_details': excluded_details
        }


# Convenience functions for easy import
def get_filtered_product_list() -> List[str]:
    """Get list of products ready for optimization"""
    filter_instance = DemandFilter()
    return filter_instance.get_filtered_product_list()


def get_filtered_demand_dictionary() -> Dict[str, int]:
    """Get demand dictionary for products ready for optimization"""
    filter_instance = DemandFilter()
    return filter_instance.get_filtered_demand_dictionary()


def get_exclusion_summary() -> Dict:
    """Get summary of excluded products for reporting"""
    filter_instance = DemandFilter()
    return filter_instance.get_exclusion_summary()


def get_complete_analysis() -> Dict:
    """Get complete product analysis including data quality metrics"""
    filter_instance = DemandFilter()
    return filter_instance.get_complete_product_analysis()


# Singleton instance for consistency across modules
_SHARED_FILTER_INSTANCE = None

def get_shared_filter_instance():
    """Returns a shared singleton instance of DemandFilter."""
    global _SHARED_FILTER_INSTANCE
    if _SHARED_FILTER_INSTANCE is None:
        _SHARED_FILTER_INSTANCE = DemandFilter()
    return _SHARED_FILTER_INSTANCE

if __name__ == "__main__":
    # Test the filtering
    filter_instance = DemandFilter()
    included_products, included_demand, excluded_products, excluded_demand = filter_instance.filter_products()
    
    print(f"\n=== FILTERING TEST RESULTS ===")
    print(f"Included products: {included_products[:5]}..." if len(included_products) > 5 else f"Included products: {included_products}")
    print(f"Excluded products: {excluded_products[:5]}..." if len(excluded_products) > 5 else f"Excluded products: {excluded_products}")