File size: 16,525 Bytes
e542954
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
709359a
e542954
 
709359a
 
 
 
 
 
 
 
 
 
e542954
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
709359a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e542954
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
709359a
 
 
 
 
 
 
 
e542954
 
 
 
 
 
 
 
 
 
 
709359a
 
 
e542954
 
 
 
 
 
 
 
 
 
 
 
709359a
 
e542954
 
709359a
 
 
 
e542954
 
 
 
 
 
 
 
 
 
709359a
 
 
 
e542954
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
709359a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e542954
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
"""
Demand Data Filtering Module

This module filters demand data to include only products that are ready for optimization.
Excludes products that:
1. Have no line type assignments (non-standalone masters)
2. Have zero staffing requirements (both Humanizer and UNICEF staff = 0)

The filtered data is used by the optimization system.
"""

import pandas as pd
from typing import Dict, List, Tuple
from src.etl import extract


class DemandFilter:
    """
    Filters demand data to include only products ready for optimization
    """
    
    def __init__(self):
        self.demand_data = None
        self.kit_levels = None
        self.kit_dependencies = None
        self.line_assignments = None
        self.team_requirements = None
        
    def load_data(self, force_reload=False):
        """Load all necessary data for filtering"""
        try:
            # Skip loading if data already exists and not forcing reload
            if not force_reload and self.demand_data is not None:
                print("πŸ“Š Using cached filter data (set force_reload=True to refresh)")
                return True
                
            print("πŸ”„ Loading fresh filtering data...")
            # Get start date for demand data from optimization config
            from src.config.optimization_config import get_date_span
            date_span, start_date, end_date = get_date_span()
            print(f"πŸ—“οΈ DEMAND FILTERING DATE: Using {start_date.date() if start_date else 'None'} (same as optimization)")
            
            # Load demand data directly from extract
            demand_df = extract.read_orders_data(start_date=start_date)
            self.demand_data = demand_df.groupby('Material Number')["Order quantity (GMEIN)"].sum().to_dict()
            
            # Load kit hierarchy data
            kit_levels, dependencies, _ = extract.get_production_order_data()
            self.kit_levels = kit_levels
            self.kit_dependencies = dependencies
            
            # Load line assignments from kit line match data
            kit_line_match = extract.read_kit_line_match_data()
            kit_line_match_dict = kit_line_match.set_index("kit_name")["line_type"].to_dict()
            
            # Convert string line names to numeric IDs
            from src.config.constants import LineType
            line_name_to_id = {
                "long line": LineType.LONG_LINE,
                "mini load": LineType.MINI_LOAD,
                "miniload": LineType.MINI_LOAD,
                "Long_line": LineType.LONG_LINE,
                "Mini_load": LineType.MINI_LOAD,
            }
            
            self.line_assignments = {}
            for kit, line_name in kit_line_match_dict.items():
                if isinstance(line_name, str) and line_name.strip():
                    line_id = line_name_to_id.get(line_name.strip())
                    if line_id is not None:
                        self.line_assignments[kit] = line_id
                elif isinstance(line_name, (int, float)) and not pd.isna(line_name):
                    self.line_assignments[kit] = int(line_name)
            
            # Load team requirements from Kits Calculation data
            kits_df = extract.read_personnel_requirement_data()
            self.team_requirements = {
                'UNICEF Fixed term': kits_df.set_index('Kit')['UNICEF staff'].to_dict(),
                'Humanizer': kits_df.set_index('Kit')['Humanizer'].to_dict()
            }
            
            return True
            
        except Exception as e:
            print(f"Error loading data for filtering: {str(e)}")
            return False
    
    def classify_product_type(self, product_id: str) -> Tuple[str, bool]:
        """
        Classify product type and check if it's a standalone master.
        
        Returns:
            Tuple[str, bool]: (product_type, is_standalone_master)
        """
        if product_id in self.kit_levels:
            level = self.kit_levels[product_id]
            
            if level == 0:
                return "prepack", False
            elif level == 1:
                return "subkit", False
            elif level == 2:
                # Check if this master is standalone (no subkits/prepacks)
                dependencies = self.kit_dependencies.get(product_id, [])
                is_standalone = len(dependencies) == 0
                return "master", is_standalone
            else:
                return "unknown", False
        else:
            return "unclassified", False
    
    def is_product_ready_for_optimization(self, product_id: str) -> Tuple[bool, List[str]]:
        """
        Check if a product is ready for optimization.
        
        Returns:
            Tuple[bool, List[str]]: (is_ready, exclusion_reasons)
        """
        exclusion_reasons = []
        
        # Classify product type
        product_type, is_standalone_master = self.classify_product_type(product_id)
        
        # Check line assignment logic
        has_line_assignment = product_id in self.line_assignments
        
        # For masters: standalone should have line assignment, non-standalone should NOT
        if product_type == "master":
            if is_standalone_master:
                # Standalone masters should have "long line" assignment
                if not has_line_assignment:
                    exclusion_reasons.append("Standalone master missing line assignment")
                elif self.line_assignments.get(product_id) != 6:  # 6 = LONG_LINE
                    exclusion_reasons.append("Standalone master should have long line assignment")
            else:
                # Non-standalone masters should NOT have line assignment (excluded from production)
                exclusion_reasons.append("Non-standalone master (excluded from production)")
        else:
            # For subkits and prepacks, check normal line assignment
            if not has_line_assignment:
                exclusion_reasons.append("No line assignment")
        
        # Check staffing requirements
        unicef_staff = self.team_requirements.get('UNICEF Fixed term', {}).get(product_id, 0)
        humanizer_staff = self.team_requirements.get('Humanizer', {}).get(product_id, 0)
        total_staff = unicef_staff + humanizer_staff
        
        if total_staff == 0:
            exclusion_reasons.append("Zero staffing requirements")
        
        is_ready = len(exclusion_reasons) == 0
        return is_ready, exclusion_reasons
    
    def filter_products(self) -> Tuple[List[str], Dict[str, int], List[str], Dict[str, int]]:
        """
        Filter products into included and excluded lists.
        
        Returns:
            Tuple containing:
            - included_products: List of product IDs ready for optimization
            - included_demand: Dict of {product_id: demand} for included products
            - excluded_products: List of product IDs excluded from optimization
            - excluded_demand: Dict of {product_id: demand} for excluded products
        """
        if not self.load_data():
            raise Exception("Failed to load data for filtering")
        
        included_products = []
        included_demand = {}
        excluded_products = []
        excluded_demand = {}
        excluded_details = {}
        
        print("πŸ” FILTERING DEMAND DATA FOR OPTIMIZATION")
        
        for product_id, demand in self.demand_data.items():
            if demand <= 0:  # Skip products with no demand
                continue
                
            is_ready, exclusion_reasons = self.is_product_ready_for_optimization(product_id)
            
            if is_ready:
                included_products.append(product_id)
                included_demand[product_id] = demand
            else:
                excluded_products.append(product_id)
                excluded_demand[product_id] = demand
                excluded_details[product_id] = exclusion_reasons
        
        # Sort products for consistent output
        included_products.sort()
        excluded_products.sort()
        
        # Print summary
        total_demand = sum(self.demand_data.values())
        included_total = sum(included_demand.values())
        excluded_total = sum(excluded_demand.values())
        
        print(f"βœ… INCLUDED in optimization: {len(included_products)} products ({included_total:,} units)")
        print(f"🚫 EXCLUDED from optimization: {len(excluded_products)} products ({excluded_total:,} units)")
        print(f"πŸ“Š Total demand: {total_demand:,} units")
        
        # Print exclusion breakdown
        if excluded_products:
            print(f"\nπŸ“‹ EXCLUSION BREAKDOWN:")
            reason_counts = {}
            for reasons in excluded_details.values():
                for reason in reasons:
                    reason_counts[reason] = reason_counts.get(reason, 0) + 1
            
            for reason, count in reason_counts.items():
                print(f"  β€’ {reason}: {count} products")
        
        # Print data quality warnings for included products (without recursion)
        # Load speed data for validation
        speed_data = None
        try:
            from src.config import optimization_config
            speed_data = optimization_config.PER_PRODUCT_SPEED
        except Exception as e:
            print(f"Warning: Could not load speed data for validation: {e}")
        
        if speed_data:
            included_without_speed = sum(1 for pid in included_products if pid not in speed_data)
            if included_without_speed > 0:
                print(f"\n⚠️  DATA QUALITY WARNING: {included_without_speed} included products missing speed data (will use default 106.7 units/hour)")
        
        included_without_hierarchy = sum(1 for pid in included_products if self.classify_product_type(pid)[0] == "unclassified")
        if included_without_hierarchy > 0:
            print(f"⚠️  DATA QUALITY WARNING: {included_without_hierarchy} included products missing hierarchy data")
        
        return included_products, included_demand, excluded_products, excluded_demand
    
    def get_filtered_product_list(self) -> List[str]:
        """Get list of products ready for optimization"""
        included_products, _, _, _ = self.filter_products()
        return included_products
    
    def get_filtered_demand_dictionary(self) -> Dict[str, int]:
        """Get demand dictionary for products ready for optimization"""
        _, included_demand, _, _ = self.filter_products()
        return included_demand
    
    def get_complete_product_analysis(self) -> Dict:
        """Get complete analysis of all products for visualization"""
        included_products, included_demand, excluded_products, excluded_demand = self.filter_products()
        
        all_products = {**included_demand, **excluded_demand}
        product_details = {}
        
        # Load speed data for additional validation
        speed_data = None
        try:
            from src.config import optimization_config
            speed_data = optimization_config.PER_PRODUCT_SPEED
        except Exception as e:
            print(f"Warning: Could not load speed data for analysis: {e}")
        
        for product_id, demand in all_products.items():
            product_type, is_standalone_master = self.classify_product_type(product_id)
            is_ready, exclusion_reasons = self.is_product_ready_for_optimization(product_id)
            
            # Get staffing info
            unicef_staff = self.team_requirements.get('UNICEF Fixed term', {}).get(product_id, 0)
            humanizer_staff = self.team_requirements.get('Humanizer', {}).get(product_id, 0)
            
            # Get line assignment
            line_assignment = self.line_assignments.get(product_id)
            
            # Get production speed info
            has_speed_data = speed_data is not None and product_id in speed_data
            
            product_details[product_id] = {
                'demand': demand,
                'product_type': product_type,
                'is_standalone_master': is_standalone_master,
                'is_included_in_optimization': is_ready,
                'exclusion_reasons': exclusion_reasons,
                'unicef_staff': unicef_staff,
                'humanizer_staff': humanizer_staff,
                'total_staff': unicef_staff + humanizer_staff,
                'line_assignment': line_assignment,
                'has_line_assignment': line_assignment is not None,
                'has_staffing': (unicef_staff + humanizer_staff) > 0,
                'has_hierarchy': product_type != "unclassified",
                'has_speed_data': has_speed_data
            }
        
        # Calculate data quality statistics for included products
        included_without_speed = sum(1 for pid in included_products if not product_details[pid]['has_speed_data'])
        included_without_hierarchy = sum(1 for pid in included_products if not product_details[pid]['has_hierarchy'])
        
        return {
            'included_count': len(included_products),
            'included_demand': sum(included_demand.values()),
            'excluded_count': len(excluded_products),
            'excluded_demand': sum(excluded_demand.values()),
            'total_products': len(all_products),
            'total_demand': sum(all_products.values()),
            'product_details': product_details,
            'standalone_masters_count': sum(1 for p in product_details.values() if p['is_standalone_master']),
            'included_products': included_products,
            'excluded_products': excluded_products,
            # Data quality metrics for included products
            'included_missing_speed_count': included_without_speed,
            'included_missing_hierarchy_count': included_without_hierarchy
        }
    
    def get_exclusion_summary(self) -> Dict:
        """Get summary of excluded products for reporting"""
        included_products, included_demand, excluded_products, excluded_demand = self.filter_products()
        
        excluded_details = {}
        for product_id in excluded_products:
            _, reasons = self.is_product_ready_for_optimization(product_id)
            excluded_details[product_id] = {
                'demand': excluded_demand[product_id],
                'reasons': reasons
            }
        
        return {
            'included_count': len(included_products),
            'included_demand': sum(included_demand.values()),
            'excluded_count': len(excluded_products),
            'excluded_demand': sum(excluded_demand.values()),
            'excluded_details': excluded_details
        }


# Convenience functions for easy import
def get_filtered_product_list() -> List[str]:
    """Get list of products ready for optimization"""
    filter_instance = DemandFilter()
    return filter_instance.get_filtered_product_list()


def get_filtered_demand_dictionary() -> Dict[str, int]:
    """Get demand dictionary for products ready for optimization"""
    filter_instance = DemandFilter()
    return filter_instance.get_filtered_demand_dictionary()


def get_exclusion_summary() -> Dict:
    """Get summary of excluded products for reporting"""
    filter_instance = DemandFilter()
    return filter_instance.get_exclusion_summary()


def get_complete_analysis() -> Dict:
    """Get complete product analysis including data quality metrics"""
    filter_instance = DemandFilter()
    return filter_instance.get_complete_product_analysis()


# Singleton instance for consistency across modules
_SHARED_FILTER_INSTANCE = None

def get_shared_filter_instance():
    """Returns a shared singleton instance of DemandFilter."""
    global _SHARED_FILTER_INSTANCE
    if _SHARED_FILTER_INSTANCE is None:
        _SHARED_FILTER_INSTANCE = DemandFilter()
    return _SHARED_FILTER_INSTANCE

if __name__ == "__main__":
    # Test the filtering
    filter_instance = DemandFilter()
    included_products, included_demand, excluded_products, excluded_demand = filter_instance.filter_products()
    
    print(f"\n=== FILTERING TEST RESULTS ===")
    print(f"Included products: {included_products[:5]}..." if len(included_products) > 5 else f"Included products: {included_products}")
    print(f"Excluded products: {excluded_products[:5]}..." if len(excluded_products) > 5 else f"Excluded products: {excluded_products}")