HaLim commited on
Commit
e542954
Β·
1 Parent(s): 5afa2a4

data validation in progress and hierarchy parser fixed

Browse files
app.py CHANGED
@@ -61,7 +61,7 @@ elif page == "πŸ“Š Optimization Results":
61
  elif page == "πŸ“‹ Demand Validation":
62
  # Import and render the demand validation page
63
  try:
64
- from src.demand_validation import display_demand_validation
65
 
66
  st.title("πŸ“‹ Demand Data Validation")
67
  st.markdown("---")
 
61
  elif page == "πŸ“‹ Demand Validation":
62
  # Import and render the demand validation page
63
  try:
64
+ from src.demand_validation_viz import display_demand_validation
65
 
66
  st.title("πŸ“‹ Demand Data Validation")
67
  st.markdown("---")
config_page.py CHANGED
@@ -64,7 +64,7 @@ def render_config_page():
64
  st.markdown("Analyzing your demand data to identify potential optimization issues...")
65
 
66
  try:
67
- from src.demand_validation import display_demand_validation
68
  display_demand_validation()
69
 
70
  # Show validation reminder before optimization
@@ -995,10 +995,10 @@ def check_critical_data_issues():
995
  if src_path not in sys.path:
996
  sys.path.append(src_path)
997
 
998
- from src.demand_validation import DemandValidator
999
 
1000
  # Initialize validator and load data
1001
- validator = DemandValidator()
1002
  if not validator.load_data():
1003
  warnings.append("Failed to load validation data")
1004
  return warnings
 
64
  st.markdown("Analyzing your demand data to identify potential optimization issues...")
65
 
66
  try:
67
+ from src.demand_validation_viz import display_demand_validation
68
  display_demand_validation()
69
 
70
  # Show validation reminder before optimization
 
995
  if src_path not in sys.path:
996
  sys.path.append(src_path)
997
 
998
+ from src.demand_validation_viz import DemandValidationViz
999
 
1000
  # Initialize validator and load data
1001
+ validator = DemandValidationViz()
1002
  if not validator.load_data():
1003
  warnings.append("Failed to load validation data")
1004
  return warnings
optimization_results.py CHANGED
@@ -947,7 +947,7 @@ def display_demand_validation_tab():
947
  Display demand validation in the optimization results tab
948
  """
949
  try:
950
- from src.demand_validation import display_demand_validation
951
  display_demand_validation()
952
  except ImportError as e:
953
  st.error(f"❌ Error loading demand validation module: {str(e)}")
 
947
  Display demand validation in the optimization results tab
948
  """
949
  try:
950
+ from src.demand_validation_viz import display_demand_validation
951
  display_demand_validation()
952
  except ImportError as e:
953
  st.error(f"❌ Error loading demand validation module: {str(e)}")
src/config/optimization_config.py CHANGED
@@ -69,8 +69,17 @@ extract.set_global_dates(start_date, end_date)
69
 
70
  print(f"\nπŸ“… DATE RANGE: {start_date} to {end_date}")
71
  print(f"πŸ“ PRODUCT SOURCE: COOIS_Released_Prod_Orders.csv")
72
- PRODUCT_LIST = transformed_data.get_released_product_list(start_date)
73
- print(f"πŸ“¦ PRODUCTS FOUND: {len(PRODUCT_LIST)} products -> {PRODUCT_LIST}")
 
 
 
 
 
 
 
 
 
74
 
75
 
76
  def get_employee_type_list():
@@ -182,6 +191,7 @@ def get_kit_line_match():
182
  line_name_to_id = {
183
  "long line": LineType.LONG_LINE,
184
  "mini load": LineType.MINI_LOAD,
 
185
  "Long_line": LineType.LONG_LINE, # Alternative naming
186
  "Mini_load": LineType.MINI_LOAD, # Alternative naming
187
  }
@@ -202,8 +212,8 @@ def get_kit_line_match():
202
  # Already numeric
203
  converted_dict[kit] = int(line_name)
204
  else:
205
- # Missing or empty line type - default to long line
206
- converted_dict[kit] = LineType.LONG_LINE
207
 
208
  return converted_dict
209
 
@@ -239,12 +249,20 @@ def get_demand_dictionary():
239
  except Exception as e:
240
  print(f"Could not get demand dictionary from streamlit session: {e}")
241
 
242
- print(f"Loading default demand values from data files")
243
- # Use released orders instead of planned orders for demand
244
- demand_df = extract.read_orders_data(start_date=start_date)
245
- demand_dictionary = demand_df.groupby('Material Number')["Order quantity (GMEIN)"].sum().to_dict()
246
- print(f"πŸ“ˆ DEMAND DATA: {len(demand_dictionary)} products with total demand {sum(demand_dictionary.values())}")
247
- return demand_dictionary
 
 
 
 
 
 
 
 
248
 
249
  DEMAND_DICTIONARY = get_demand_dictionary()
250
  print(f"🎯 FINAL DEMAND: {DEMAND_DICTIONARY}")
 
69
 
70
  print(f"\nπŸ“… DATE RANGE: {start_date} to {end_date}")
71
  print(f"πŸ“ PRODUCT SOURCE: COOIS_Released_Prod_Orders.csv")
72
+
73
+ # Get filtered products ready for optimization
74
+ try:
75
+ from src.demand_filtering import get_filtered_product_list
76
+ PRODUCT_LIST = get_filtered_product_list()
77
+ print(f"πŸ“¦ FILTERED PRODUCTS: {len(PRODUCT_LIST)} products ready for optimization")
78
+ print(f"🎯 Products: {PRODUCT_LIST}")
79
+ except ImportError:
80
+ # Fallback to unfiltered list if demand_filtering not available
81
+ PRODUCT_LIST = transformed_data.get_released_product_list(start_date)
82
+ print(f"πŸ“¦ UNFILTERED PRODUCTS: {len(PRODUCT_LIST)} products -> {PRODUCT_LIST}")
83
 
84
 
85
  def get_employee_type_list():
 
191
  line_name_to_id = {
192
  "long line": LineType.LONG_LINE,
193
  "mini load": LineType.MINI_LOAD,
194
+ "miniload": LineType.MINI_LOAD, # Alternative naming (no space)
195
  "Long_line": LineType.LONG_LINE, # Alternative naming
196
  "Mini_load": LineType.MINI_LOAD, # Alternative naming
197
  }
 
212
  # Already numeric
213
  converted_dict[kit] = int(line_name)
214
  else:
215
+ # Missing or empty line type - skip (no production needed for non-standalone masters)
216
+ pass # Don't add to converted_dict - these kits won't have line assignments
217
 
218
  return converted_dict
219
 
 
249
  except Exception as e:
250
  print(f"Could not get demand dictionary from streamlit session: {e}")
251
 
252
+ print(f"Loading demand values from data files")
253
+
254
+ # Try to get filtered demand (only for products ready for optimization)
255
+ try:
256
+ from src.demand_filtering import get_filtered_demand_dictionary
257
+ demand_dictionary = get_filtered_demand_dictionary()
258
+ print(f"πŸ“ˆ FILTERED DEMAND: {len(demand_dictionary)} products with total demand {sum(demand_dictionary.values())}")
259
+ return demand_dictionary
260
+ except ImportError:
261
+ # Fallback to unfiltered demand if demand_filtering not available
262
+ demand_df = extract.read_orders_data(start_date=start_date)
263
+ demand_dictionary = demand_df.groupby('Material Number')["Order quantity (GMEIN)"].sum().to_dict()
264
+ print(f"πŸ“ˆ UNFILTERED DEMAND: {len(demand_dictionary)} products with total demand {sum(demand_dictionary.values())}")
265
+ return demand_dictionary
266
 
267
  DEMAND_DICTIONARY = get_demand_dictionary()
268
  print(f"🎯 FINAL DEMAND: {DEMAND_DICTIONARY}")
src/demand_filtering.py ADDED
@@ -0,0 +1,312 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Demand Data Filtering Module
3
+
4
+ This module filters demand data to include only products that are ready for optimization.
5
+ Excludes products that:
6
+ 1. Have no line type assignments (non-standalone masters)
7
+ 2. Have zero staffing requirements (both Humanizer and UNICEF staff = 0)
8
+
9
+ The filtered data is used by the optimization system.
10
+ """
11
+
12
+ import pandas as pd
13
+ from typing import Dict, List, Tuple
14
+ from src.etl import extract
15
+
16
+
17
+ class DemandFilter:
18
+ """
19
+ Filters demand data to include only products ready for optimization
20
+ """
21
+
22
+ def __init__(self):
23
+ self.demand_data = None
24
+ self.kit_levels = None
25
+ self.kit_dependencies = None
26
+ self.line_assignments = None
27
+ self.team_requirements = None
28
+
29
+ def load_data(self):
30
+ """Load all necessary data for filtering"""
31
+ try:
32
+ # Get start date for demand data
33
+ from datetime import datetime
34
+ start_date = datetime(2025, 7, 7) # Default date, can be made configurable
35
+
36
+ # Load demand data directly from extract
37
+ demand_df = extract.read_orders_data(start_date=start_date)
38
+ self.demand_data = demand_df.groupby('Material Number')["Order quantity (GMEIN)"].sum().to_dict()
39
+
40
+ # Load kit hierarchy data
41
+ kit_levels, dependencies, _ = extract.get_production_order_data()
42
+ self.kit_levels = kit_levels
43
+ self.kit_dependencies = dependencies
44
+
45
+ # Load line assignments from kit line match data
46
+ kit_line_match = extract.read_kit_line_match_data()
47
+ kit_line_match_dict = kit_line_match.set_index("kit_name")["line_type"].to_dict()
48
+
49
+ # Convert string line names to numeric IDs
50
+ from src.config.constants import LineType
51
+ line_name_to_id = {
52
+ "long line": LineType.LONG_LINE,
53
+ "mini load": LineType.MINI_LOAD,
54
+ "miniload": LineType.MINI_LOAD,
55
+ "Long_line": LineType.LONG_LINE,
56
+ "Mini_load": LineType.MINI_LOAD,
57
+ }
58
+
59
+ self.line_assignments = {}
60
+ for kit, line_name in kit_line_match_dict.items():
61
+ if isinstance(line_name, str) and line_name.strip():
62
+ line_id = line_name_to_id.get(line_name.strip())
63
+ if line_id is not None:
64
+ self.line_assignments[kit] = line_id
65
+ elif isinstance(line_name, (int, float)) and not pd.isna(line_name):
66
+ self.line_assignments[kit] = int(line_name)
67
+
68
+ # Load team requirements from Kits Calculation data
69
+ kits_df = extract.read_personnel_requirement_data()
70
+ self.team_requirements = {
71
+ 'UNICEF Fixed term': kits_df.set_index('Kit')['UNICEF staff'].to_dict(),
72
+ 'Humanizer': kits_df.set_index('Kit')['Humanizer'].to_dict()
73
+ }
74
+
75
+ return True
76
+
77
+ except Exception as e:
78
+ print(f"Error loading data for filtering: {str(e)}")
79
+ return False
80
+
81
+ def classify_product_type(self, product_id: str) -> Tuple[str, bool]:
82
+ """
83
+ Classify product type and check if it's a standalone master.
84
+
85
+ Returns:
86
+ Tuple[str, bool]: (product_type, is_standalone_master)
87
+ """
88
+ if product_id in self.kit_levels:
89
+ level = self.kit_levels[product_id]
90
+
91
+ if level == 0:
92
+ return "prepack", False
93
+ elif level == 1:
94
+ return "subkit", False
95
+ elif level == 2:
96
+ # Check if this master is standalone (no subkits/prepacks)
97
+ dependencies = self.kit_dependencies.get(product_id, [])
98
+ is_standalone = len(dependencies) == 0
99
+ return "master", is_standalone
100
+ else:
101
+ return "unknown", False
102
+ else:
103
+ return "unclassified", False
104
+
105
+ def is_product_ready_for_optimization(self, product_id: str) -> Tuple[bool, List[str]]:
106
+ """
107
+ Check if a product is ready for optimization.
108
+
109
+ Returns:
110
+ Tuple[bool, List[str]]: (is_ready, exclusion_reasons)
111
+ """
112
+ exclusion_reasons = []
113
+
114
+ # Classify product type
115
+ product_type, is_standalone_master = self.classify_product_type(product_id)
116
+
117
+ # Check line assignment logic
118
+ has_line_assignment = product_id in self.line_assignments
119
+
120
+ # For masters: standalone should have line assignment, non-standalone should NOT
121
+ if product_type == "master":
122
+ if is_standalone_master:
123
+ # Standalone masters should have "long line" assignment
124
+ if not has_line_assignment:
125
+ exclusion_reasons.append("Standalone master missing line assignment")
126
+ elif self.line_assignments.get(product_id) != 6: # 6 = LONG_LINE
127
+ exclusion_reasons.append("Standalone master should have long line assignment")
128
+ else:
129
+ # Non-standalone masters should NOT have line assignment (excluded from production)
130
+ exclusion_reasons.append("Non-standalone master (excluded from production)")
131
+ else:
132
+ # For subkits and prepacks, check normal line assignment
133
+ if not has_line_assignment:
134
+ exclusion_reasons.append("No line assignment")
135
+
136
+ # Check staffing requirements
137
+ unicef_staff = self.team_requirements.get('UNICEF Fixed term', {}).get(product_id, 0)
138
+ humanizer_staff = self.team_requirements.get('Humanizer', {}).get(product_id, 0)
139
+ total_staff = unicef_staff + humanizer_staff
140
+
141
+ if total_staff == 0:
142
+ exclusion_reasons.append("Zero staffing requirements")
143
+
144
+ is_ready = len(exclusion_reasons) == 0
145
+ return is_ready, exclusion_reasons
146
+
147
+ def filter_products(self) -> Tuple[List[str], Dict[str, int], List[str], Dict[str, int]]:
148
+ """
149
+ Filter products into included and excluded lists.
150
+
151
+ Returns:
152
+ Tuple containing:
153
+ - included_products: List of product IDs ready for optimization
154
+ - included_demand: Dict of {product_id: demand} for included products
155
+ - excluded_products: List of product IDs excluded from optimization
156
+ - excluded_demand: Dict of {product_id: demand} for excluded products
157
+ """
158
+ if not self.load_data():
159
+ raise Exception("Failed to load data for filtering")
160
+
161
+ included_products = []
162
+ included_demand = {}
163
+ excluded_products = []
164
+ excluded_demand = {}
165
+ excluded_details = {}
166
+
167
+ print("πŸ” FILTERING DEMAND DATA FOR OPTIMIZATION")
168
+
169
+ for product_id, demand in self.demand_data.items():
170
+ if demand <= 0: # Skip products with no demand
171
+ continue
172
+
173
+ is_ready, exclusion_reasons = self.is_product_ready_for_optimization(product_id)
174
+
175
+ if is_ready:
176
+ included_products.append(product_id)
177
+ included_demand[product_id] = demand
178
+ else:
179
+ excluded_products.append(product_id)
180
+ excluded_demand[product_id] = demand
181
+ excluded_details[product_id] = exclusion_reasons
182
+
183
+ # Sort products for consistent output
184
+ included_products.sort()
185
+ excluded_products.sort()
186
+
187
+ # Print summary
188
+ total_demand = sum(self.demand_data.values())
189
+ included_total = sum(included_demand.values())
190
+ excluded_total = sum(excluded_demand.values())
191
+
192
+ print(f"βœ… INCLUDED in optimization: {len(included_products)} products ({included_total:,} units)")
193
+ print(f"🚫 EXCLUDED from optimization: {len(excluded_products)} products ({excluded_total:,} units)")
194
+ print(f"πŸ“Š Total demand: {total_demand:,} units")
195
+
196
+ # Print exclusion breakdown
197
+ if excluded_products:
198
+ print(f"\nπŸ“‹ EXCLUSION BREAKDOWN:")
199
+ reason_counts = {}
200
+ for reasons in excluded_details.values():
201
+ for reason in reasons:
202
+ reason_counts[reason] = reason_counts.get(reason, 0) + 1
203
+
204
+ for reason, count in reason_counts.items():
205
+ print(f" β€’ {reason}: {count} products")
206
+
207
+ return included_products, included_demand, excluded_products, excluded_demand
208
+
209
+ def get_filtered_product_list(self) -> List[str]:
210
+ """Get list of products ready for optimization"""
211
+ included_products, _, _, _ = self.filter_products()
212
+ return included_products
213
+
214
+ def get_filtered_demand_dictionary(self) -> Dict[str, int]:
215
+ """Get demand dictionary for products ready for optimization"""
216
+ _, included_demand, _, _ = self.filter_products()
217
+ return included_demand
218
+
219
+ def get_complete_product_analysis(self) -> Dict:
220
+ """Get complete analysis of all products for visualization"""
221
+ included_products, included_demand, excluded_products, excluded_demand = self.filter_products()
222
+
223
+ all_products = {**included_demand, **excluded_demand}
224
+ product_details = {}
225
+
226
+ for product_id, demand in all_products.items():
227
+ product_type, is_standalone_master = self.classify_product_type(product_id)
228
+ is_ready, exclusion_reasons = self.is_product_ready_for_optimization(product_id)
229
+
230
+ # Get staffing info
231
+ unicef_staff = self.team_requirements.get('UNICEF Fixed term', {}).get(product_id, 0)
232
+ humanizer_staff = self.team_requirements.get('Humanizer', {}).get(product_id, 0)
233
+
234
+ # Get line assignment
235
+ line_assignment = self.line_assignments.get(product_id)
236
+
237
+ product_details[product_id] = {
238
+ 'demand': demand,
239
+ 'product_type': product_type,
240
+ 'is_standalone_master': is_standalone_master,
241
+ 'is_included_in_optimization': is_ready,
242
+ 'exclusion_reasons': exclusion_reasons,
243
+ 'unicef_staff': unicef_staff,
244
+ 'humanizer_staff': humanizer_staff,
245
+ 'total_staff': unicef_staff + humanizer_staff,
246
+ 'line_assignment': line_assignment,
247
+ 'has_line_assignment': line_assignment is not None,
248
+ 'has_staffing': (unicef_staff + humanizer_staff) > 0,
249
+ 'has_hierarchy': product_type != "unclassified"
250
+ }
251
+
252
+ return {
253
+ 'included_count': len(included_products),
254
+ 'included_demand': sum(included_demand.values()),
255
+ 'excluded_count': len(excluded_products),
256
+ 'excluded_demand': sum(excluded_demand.values()),
257
+ 'total_products': len(all_products),
258
+ 'total_demand': sum(all_products.values()),
259
+ 'product_details': product_details,
260
+ 'standalone_masters_count': sum(1 for p in product_details.values() if p['is_standalone_master']),
261
+ 'included_products': included_products,
262
+ 'excluded_products': excluded_products
263
+ }
264
+
265
+ def get_exclusion_summary(self) -> Dict:
266
+ """Get summary of excluded products for reporting"""
267
+ included_products, included_demand, excluded_products, excluded_demand = self.filter_products()
268
+
269
+ excluded_details = {}
270
+ for product_id in excluded_products:
271
+ _, reasons = self.is_product_ready_for_optimization(product_id)
272
+ excluded_details[product_id] = {
273
+ 'demand': excluded_demand[product_id],
274
+ 'reasons': reasons
275
+ }
276
+
277
+ return {
278
+ 'included_count': len(included_products),
279
+ 'included_demand': sum(included_demand.values()),
280
+ 'excluded_count': len(excluded_products),
281
+ 'excluded_demand': sum(excluded_demand.values()),
282
+ 'excluded_details': excluded_details
283
+ }
284
+
285
+
286
+ # Convenience functions for easy import
287
+ def get_filtered_product_list() -> List[str]:
288
+ """Get list of products ready for optimization"""
289
+ filter_instance = DemandFilter()
290
+ return filter_instance.get_filtered_product_list()
291
+
292
+
293
+ def get_filtered_demand_dictionary() -> Dict[str, int]:
294
+ """Get demand dictionary for products ready for optimization"""
295
+ filter_instance = DemandFilter()
296
+ return filter_instance.get_filtered_demand_dictionary()
297
+
298
+
299
+ def get_exclusion_summary() -> Dict:
300
+ """Get summary of excluded products for reporting"""
301
+ filter_instance = DemandFilter()
302
+ return filter_instance.get_exclusion_summary()
303
+
304
+
305
+ if __name__ == "__main__":
306
+ # Test the filtering
307
+ filter_instance = DemandFilter()
308
+ included_products, included_demand, excluded_products, excluded_demand = filter_instance.filter_products()
309
+
310
+ print(f"\n=== FILTERING TEST RESULTS ===")
311
+ print(f"Included products: {included_products[:5]}..." if len(included_products) > 5 else f"Included products: {included_products}")
312
+ print(f"Excluded products: {excluded_products[:5]}..." if len(excluded_products) > 5 else f"Excluded products: {excluded_products}")
src/demand_validation_viz.py ADDED
@@ -0,0 +1,394 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Demand Data Validation Visualization Module
4
+
5
+ This module provides Streamlit visualization for demand data validation,
6
+ showing which products are included/excluded from optimization and why.
7
+ Uses the demand_filtering module for the actual filtering logic.
8
+ """
9
+
10
+ import pandas as pd
11
+ import streamlit as st
12
+ from typing import Dict, List, Tuple, Optional
13
+ import json
14
+ from src.config.constants import ShiftType, LineType, KitLevel
15
+ from src.demand_filtering import DemandFilter
16
+
17
+
18
+ class DemandValidationViz:
19
+ """
20
+ Provides visualization for demand data validation using the filtering module
21
+ """
22
+
23
+ def __init__(self):
24
+ self.filter_instance = DemandFilter()
25
+ self.speed_data = None
26
+
27
+ def load_data(self):
28
+ """Load data needed for visualization"""
29
+ try:
30
+ # Load speed data for visualization
31
+ from src.config import optimization_config
32
+ self.speed_data = optimization_config.PER_PRODUCT_SPEED
33
+
34
+ # Load data in the filter instance
35
+ return self.filter_instance.load_data()
36
+
37
+ except Exception as e:
38
+ error_msg = f"Error loading data: {str(e)}"
39
+ print(error_msg)
40
+ try:
41
+ st.error(error_msg)
42
+ except:
43
+ pass
44
+ return False
45
+
46
+ def classify_product_type(self, product_id: str) -> Tuple[str, str]:
47
+ """Get product classification from filter instance"""
48
+ if product_id in self.filter_instance.kit_levels:
49
+ level = self.filter_instance.kit_levels[product_id]
50
+ level_name = KitLevel.get_name(level)
51
+
52
+ if level == 0:
53
+ return "prepack", level_name
54
+ elif level == 1:
55
+ return "subkit", level_name
56
+ elif level == 2:
57
+ return "master", level_name
58
+ else:
59
+ return "unknown", f"level_{level}"
60
+ else:
61
+ return "unclassified", "no_hierarchy_data"
62
+
63
+ def get_line_assignment(self, product_id: str) -> Tuple[Optional[int], str]:
64
+ """Get line assignment from filter instance"""
65
+ if product_id in self.filter_instance.line_assignments:
66
+ line_type = self.filter_instance.line_assignments[product_id]
67
+ line_name = LineType.get_name(line_type)
68
+ return line_type, line_name
69
+ else:
70
+ return None, "no_assignment"
71
+
72
+ def get_staffing_requirements(self, product_id: str) -> Dict[str, int]:
73
+ """Get staffing requirements from filter instance"""
74
+ staffing = {}
75
+
76
+ for emp_type, products in self.filter_instance.team_requirements.items():
77
+ staffing[emp_type] = products.get(product_id, 0)
78
+
79
+ return staffing
80
+
81
+ def get_production_speed(self, product_id: str) -> Optional[float]:
82
+ """Get production speed for product"""
83
+ return self.speed_data.get(product_id, None)
84
+
85
+ def validate_all_products(self) -> pd.DataFrame:
86
+ """
87
+ Create visualization dataframe using complete analysis from filtering module
88
+ Returns: DataFrame with validation results for display
89
+ """
90
+ # Get complete analysis from filtering module
91
+ analysis = self.filter_instance.get_complete_product_analysis()
92
+ product_details = analysis['product_details']
93
+
94
+ results = []
95
+
96
+ for product_id, details in product_details.items():
97
+ # Get additional data for visualization
98
+ speed = self.get_production_speed(product_id)
99
+ production_hours_needed = None
100
+ if speed and speed > 0:
101
+ production_hours_needed = details['demand'] / speed
102
+
103
+ # Get line type name
104
+ line_type_id = details['line_assignment']
105
+ line_name = "no_assignment"
106
+ if line_type_id is not None:
107
+ from src.config.constants import LineType
108
+ line_name = LineType.get_name(line_type_id)
109
+
110
+ # Get level name
111
+ level_name = "no_hierarchy_data"
112
+ if details['product_type'] != "unclassified":
113
+ if details['product_type'] == "prepack":
114
+ level_name = "prepack"
115
+ elif details['product_type'] == "subkit":
116
+ level_name = "subkit"
117
+ elif details['product_type'] == "master":
118
+ if details['is_standalone_master']:
119
+ level_name = "standalone_master"
120
+ else:
121
+ level_name = "master_with_hierarchy"
122
+ else:
123
+ level_name = f"level_{details['product_type']}"
124
+
125
+ # Overall status
126
+ if not details['is_included_in_optimization']:
127
+ validation_status = f"🚫 Excluded: {', '.join(details['exclusion_reasons'])}"
128
+ else:
129
+ # Check for other issues that don't exclude from optimization
130
+ other_issues = []
131
+ if speed is None:
132
+ other_issues.append("no_speed_data")
133
+ if not details['has_hierarchy']:
134
+ other_issues.append("no_hierarchy_data")
135
+
136
+ if other_issues:
137
+ validation_status = f"⚠️ Issues: {', '.join(other_issues)}"
138
+ else:
139
+ validation_status = "βœ… Included in optimization"
140
+
141
+ results.append({
142
+ 'Product ID': product_id,
143
+ 'Demand': details['demand'],
144
+ 'Product Type': details['product_type'].title(),
145
+ 'Level': level_name,
146
+ 'Is Standalone Master': "Yes" if details['is_standalone_master'] else "No",
147
+ 'Line Type ID': line_type_id if line_type_id else "N/A",
148
+ 'Line Type': line_name,
149
+ 'UNICEF Staff': details['unicef_staff'],
150
+ 'Humanizer Staff': details['humanizer_staff'],
151
+ 'Total Staff': details['total_staff'],
152
+ 'Production Speed (units/hour)': f"{speed:.1f}" if speed else "N/A",
153
+ 'Production Hours Needed': f"{production_hours_needed:.1f}" if production_hours_needed else "N/A",
154
+ 'Has Line Assignment': "βœ…" if details['has_line_assignment'] else "❌",
155
+ 'Has Staffing Data': "βœ…" if details['has_staffing'] else "❌",
156
+ 'Has Speed Data': "βœ…" if speed is not None else "❌",
157
+ 'Has Hierarchy Data': "βœ…" if details['has_hierarchy'] else "❌",
158
+ 'Excluded from Optimization': not details['is_included_in_optimization'],
159
+ 'Exclusion Reasons': ', '.join(details['exclusion_reasons']) if details['exclusion_reasons'] else '',
160
+ 'Validation Status': validation_status
161
+ })
162
+
163
+ df = pd.DataFrame(results)
164
+
165
+ # Sort by exclusion status first, then by demand
166
+ df = df.sort_values(['Excluded from Optimization', 'Demand'], ascending=[False, False])
167
+
168
+ return df
169
+
170
+ def get_summary_statistics(self, df: pd.DataFrame) -> Dict:
171
+ """Generate summary statistics using filtering module analysis"""
172
+
173
+ # Get analysis from filtering module
174
+ analysis = self.filter_instance.get_complete_product_analysis()
175
+
176
+ # Calculate issues for included products only
177
+ included_df = df[df['Excluded from Optimization'] == False]
178
+
179
+ no_line_assignment = len(included_df[included_df['Has Line Assignment'] == "❌"])
180
+ no_staffing = len(included_df[included_df['Has Staffing Data'] == "❌"])
181
+ no_speed = len(included_df[included_df['Has Speed Data'] == "❌"])
182
+ no_hierarchy = len(included_df[included_df['Has Hierarchy Data'] == "❌"])
183
+
184
+ # Product type and line type distributions
185
+ type_counts = df['Product Type'].value_counts().to_dict()
186
+
187
+ # Staffing summary from analysis
188
+ total_unicef_needed = sum(p['unicef_staff'] for p in analysis['product_details'].values())
189
+ total_humanizer_needed = sum(p['humanizer_staff'] for p in analysis['product_details'].values())
190
+
191
+ return {
192
+ 'total_products': analysis['total_products'],
193
+ 'total_demand': analysis['total_demand'],
194
+ 'included_products': analysis['included_count'],
195
+ 'excluded_products': analysis['excluded_count'],
196
+ 'included_demand': analysis['included_demand'],
197
+ 'excluded_demand': analysis['excluded_demand'],
198
+ 'type_counts': type_counts,
199
+ 'no_line_assignment': no_line_assignment,
200
+ 'no_staffing': no_staffing,
201
+ 'no_speed': no_speed,
202
+ 'no_hierarchy': no_hierarchy,
203
+ 'standalone_masters': analysis['standalone_masters_count'],
204
+ 'total_unicef_needed': total_unicef_needed,
205
+ 'total_humanizer_needed': total_humanizer_needed
206
+ }
207
+
208
+
209
+ def display_demand_validation():
210
+ """
211
+ Display demand validation analysis in Streamlit
212
+ """
213
+ st.header("πŸ“‹ Demand Data Validation")
214
+ st.markdown("Comprehensive analysis of products with demand to identify potential optimization issues.")
215
+
216
+ # Initialize validator
217
+ validator = DemandValidationViz()
218
+
219
+ # Load data
220
+ with st.spinner("Loading data for validation..."):
221
+ if not validator.load_data():
222
+ st.error("Failed to load data for validation.")
223
+ return
224
+
225
+ # Perform validation
226
+ with st.spinner("Analyzing demand data..."):
227
+ validation_df = validator.validate_all_products()
228
+ summary_stats = validator.get_summary_statistics(validation_df)
229
+
230
+ # Display summary statistics
231
+ st.subheader("πŸ“Š Summary Statistics")
232
+
233
+ col1, col2, col3, col4 = st.columns(4)
234
+
235
+ with col1:
236
+ st.metric("Total Products", summary_stats['total_products'])
237
+ st.metric("Included in Optimization", summary_stats['included_products'], delta="Ready for optimization")
238
+
239
+ with col2:
240
+ st.metric("Total Demand", f"{summary_stats['total_demand']:,}")
241
+ st.metric("Excluded from Optimization", summary_stats['excluded_products'], delta="Omitted")
242
+
243
+ with col3:
244
+ st.metric("Included Demand", f"{summary_stats['included_demand']:,}", delta="Will be optimized")
245
+ st.metric("UNICEF Staff Needed", summary_stats['total_unicef_needed'])
246
+
247
+ with col4:
248
+ st.metric("Excluded Demand", f"{summary_stats['excluded_demand']:,}", delta="Omitted")
249
+ st.metric("Humanizer Staff Needed", summary_stats['total_humanizer_needed'])
250
+
251
+ # Product type distribution
252
+ st.subheader("πŸ“ˆ Product Type Distribution")
253
+ if summary_stats['type_counts']:
254
+ col1, col2 = st.columns(2)
255
+
256
+ with col1:
257
+ type_df = pd.DataFrame(list(summary_stats['type_counts'].items()),
258
+ columns=['Product Type', 'Count'])
259
+ st.bar_chart(type_df.set_index('Product Type'))
260
+
261
+ with col2:
262
+ for ptype, count in summary_stats['type_counts'].items():
263
+ percentage = (count / summary_stats['total_products']) * 100
264
+ st.write(f"**{ptype}:** {count} products ({percentage:.1f}%)")
265
+
266
+ # Validation issues summary for included products
267
+ st.subheader("⚠️ Data Quality Issues (Products Included in Optimization)")
268
+ st.write("Issues affecting products that **will be** included in optimization:")
269
+
270
+ col1, col2, col3, col4 = st.columns(4)
271
+
272
+ with col1:
273
+ st.metric("No Line Assignment", summary_stats['no_line_assignment'],
274
+ delta=None if summary_stats['no_line_assignment'] == 0 else "Issue")
275
+
276
+ with col2:
277
+ st.metric("No Staffing Data", summary_stats['no_staffing'],
278
+ delta=None if summary_stats['no_staffing'] == 0 else "Issue")
279
+
280
+ with col3:
281
+ st.metric("No Speed Data", summary_stats['no_speed'],
282
+ delta=None if summary_stats['no_speed'] == 0 else "Issue")
283
+
284
+ with col4:
285
+ st.metric("No Hierarchy Data", summary_stats['no_hierarchy'],
286
+ delta=None if summary_stats['no_hierarchy'] == 0 else "Issue")
287
+
288
+ # Separate the results into included and excluded
289
+ included_df = validation_df[validation_df['Excluded from Optimization'] == False].copy()
290
+ excluded_df = validation_df[validation_df['Excluded from Optimization'] == True].copy()
291
+
292
+ # Products Included in Optimization
293
+ st.subheader("βœ… Products Included in Optimization")
294
+ st.write(f"**{len(included_df)} products** will be included in the optimization with total demand of **{included_df['Demand'].sum():,} units**")
295
+
296
+ if len(included_df) > 0:
297
+ # Filter options for included products
298
+ col1, col2 = st.columns(2)
299
+
300
+ with col1:
301
+ included_type_filter = st.selectbox("Filter included by type",
302
+ options=["All"] + list(included_df['Product Type'].unique()),
303
+ key="included_filter")
304
+
305
+ with col2:
306
+ included_min_demand = st.number_input("Minimum demand (included)", min_value=0, value=0, key="included_demand")
307
+
308
+ # Apply filters to included
309
+ filtered_included = included_df.copy()
310
+ if included_type_filter != "All":
311
+ filtered_included = filtered_included[filtered_included['Product Type'] == included_type_filter]
312
+ if included_min_demand > 0:
313
+ filtered_included = filtered_included[filtered_included['Demand'] >= included_min_demand]
314
+
315
+ # Configure column display for included
316
+ included_columns = ['Product ID', 'Demand', 'Product Type', 'Line Type', 'UNICEF Staff', 'Humanizer Staff', 'Production Speed (units/hour)', 'Validation Status']
317
+
318
+ st.dataframe(
319
+ filtered_included[included_columns],
320
+ use_container_width=True,
321
+ height=300
322
+ )
323
+ else:
324
+ st.warning("No products are included in optimization!")
325
+
326
+ # Products Excluded from Optimization
327
+ st.subheader("🚫 Products Excluded from Optimization")
328
+ st.write(f"**{len(excluded_df)} products** are excluded from optimization with total demand of **{excluded_df['Demand'].sum():,} units**")
329
+ st.info("These products are omitted from optimization due to missing line assignments or zero staffing requirements.")
330
+
331
+ if len(excluded_df) > 0:
332
+ # Show exclusion breakdown
333
+ exclusion_reasons = excluded_df['Exclusion Reasons'].value_counts()
334
+ st.write("**Exclusion reasons:**")
335
+ for reason, count in exclusion_reasons.items():
336
+ st.write(f"β€’ {reason}: {count} products")
337
+
338
+ # Configure column display for excluded
339
+ excluded_columns = ['Product ID', 'Demand', 'Product Type', 'Exclusion Reasons', 'UNICEF Staff', 'Humanizer Staff', 'Line Type']
340
+
341
+ st.dataframe(
342
+ excluded_df[excluded_columns],
343
+ use_container_width=True,
344
+ height=200
345
+ )
346
+
347
+ # Export option
348
+ if st.button("πŸ“₯ Export Validation Results to CSV"):
349
+ csv = validation_df.to_csv(index=False)
350
+ st.download_button(
351
+ label="Download CSV",
352
+ data=csv,
353
+ file_name="demand_validation_results.csv",
354
+ mime="text/csv"
355
+ )
356
+ else:
357
+ st.info("No products match the selected filters.")
358
+
359
+ # Recommendations
360
+ st.subheader("πŸ’‘ Recommendations")
361
+
362
+ recommendations = []
363
+
364
+ # Focus on exclusion criteria first
365
+ if summary_stats['excluded_products'] > 0:
366
+ st.warning(f"**Optimization Scope**: {summary_stats['excluded_products']} products ({summary_stats['excluded_demand']:,} units demand) are excluded from optimization.")
367
+
368
+ # Data quality issues for INCLUDED products only
369
+ if summary_stats['no_line_assignment'] > 0:
370
+ recommendations.append(f"**Line Assignment**: {summary_stats['no_line_assignment']} products included in optimization are missing line assignments.")
371
+
372
+ if summary_stats['no_staffing'] > 0:
373
+ recommendations.append(f"**Staffing Data**: {summary_stats['no_staffing']} products included in optimization are missing staffing requirements.")
374
+
375
+ if summary_stats['no_speed'] > 0:
376
+ recommendations.append(f"**Speed Data**: {summary_stats['no_speed']} products included in optimization are missing production speed data (will use defaults).")
377
+
378
+ if summary_stats['no_hierarchy'] > 0:
379
+ recommendations.append(f"**Hierarchy Data**: {summary_stats['no_hierarchy']} products included in optimization are not in the kit hierarchy.")
380
+
381
+ if recommendations:
382
+ for rec in recommendations:
383
+ st.info(rec)
384
+
385
+ # Overall status
386
+ if summary_stats['included_products'] > 0:
387
+ st.success(f"βœ… **Ready for Optimization**: {summary_stats['included_products']} products with {summary_stats['included_demand']:,} units demand are ready for optimization!")
388
+ else:
389
+ st.error("❌ No products are ready for optimization. Please review the exclusion criteria above.")
390
+
391
+
392
+ if __name__ == "__main__":
393
+ # For testing
394
+ display_demand_validation()
src/etl/extract.py CHANGED
@@ -179,11 +179,17 @@ def get_production_order_data():
179
  kit_levels[subkit_id] = 1
180
  dependencies[subkit_id] = subkit_data.get('dependencies', [])
181
 
182
- # Process prepacks (level 0)
183
  for prepack_id in subkit_data.get('prepacks', []):
184
  if prepack_id not in kit_levels: # Avoid overwriting if already exists
185
  kit_levels[prepack_id] = 0
186
  dependencies[prepack_id] = []
 
 
 
 
 
 
187
 
188
  # Create priority order: prepacks first, then subkits, then masters
189
  priority_order = []
 
179
  kit_levels[subkit_id] = 1
180
  dependencies[subkit_id] = subkit_data.get('dependencies', [])
181
 
182
+ # Process prepacks under subkits (level 0)
183
  for prepack_id in subkit_data.get('prepacks', []):
184
  if prepack_id not in kit_levels: # Avoid overwriting if already exists
185
  kit_levels[prepack_id] = 0
186
  dependencies[prepack_id] = []
187
+
188
+ # Process direct prepacks under master (level 0)
189
+ for prepack_id in master_data.get('direct_prepacks', []):
190
+ if prepack_id not in kit_levels: # Avoid overwriting if already exists
191
+ kit_levels[prepack_id] = 0
192
+ dependencies[prepack_id] = []
193
 
194
  # Create priority order: prepacks first, then subkits, then masters
195
  priority_order = []
src/etl/hierarchy_parser.py CHANGED
@@ -86,6 +86,20 @@ class KitHierarchyParser:
86
  # Add prepack to subkit dependencies
87
  if prepack_id not in hierarchy[master_id]['subkits'][subkit_id]['dependencies']:
88
  hierarchy[master_id]['subkits'][subkit_id]['dependencies'].append(prepack_id)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
 
90
  # Convert defaultdict to regular dict for JSON serialization
91
  self.hierarchy_json = json.loads(json.dumps(hierarchy, default=dict))
 
86
  # Add prepack to subkit dependencies
87
  if prepack_id not in hierarchy[master_id]['subkits'][subkit_id]['dependencies']:
88
  hierarchy[master_id]['subkits'][subkit_id]['dependencies'].append(prepack_id)
89
+
90
+ elif pd.notna(prepack_id):
91
+ # Handle direct master-prepack relationship (no subkit)
92
+ # Add direct_prepacks list to hierarchy if it doesn't exist
93
+ if 'direct_prepacks' not in hierarchy[master_id]:
94
+ hierarchy[master_id]['direct_prepacks'] = []
95
+
96
+ # Add prepack directly to master
97
+ if prepack_id not in hierarchy[master_id]['direct_prepacks']:
98
+ hierarchy[master_id]['direct_prepacks'].append(prepack_id)
99
+
100
+ # Add prepack to master dependencies
101
+ if prepack_id not in hierarchy[master_id]['dependencies']:
102
+ hierarchy[master_id]['dependencies'].append(prepack_id)
103
 
104
  # Convert defaultdict to regular dict for JSON serialization
105
  self.hierarchy_json = json.loads(json.dumps(hierarchy, default=dict))
src/utils/excel_to_csv_converter.py CHANGED
@@ -35,7 +35,6 @@ def analyze_excel_structure(excel_path):
35
  print(f" - Rows: {len(df)}")
36
  print(f" - Columns: {len(df.columns)}")
37
  print(f" - Column names: {list(df.columns)}")
38
- print()
39
 
40
  return sheet_info
41
 
@@ -97,7 +96,7 @@ def main():
97
 
98
  # Define paths
99
  excel_path = "data/real_data_excel/AI Project document.xlsx"
100
- output_dir = "data/converted_csv"
101
 
102
  # Check if Excel file exists
103
  if not os.path.exists(excel_path):
 
35
  print(f" - Rows: {len(df)}")
36
  print(f" - Columns: {len(df.columns)}")
37
  print(f" - Column names: {list(df.columns)}")
 
38
 
39
  return sheet_info
40
 
 
96
 
97
  # Define paths
98
  excel_path = "data/real_data_excel/AI Project document.xlsx"
99
+ output_dir = "data/real_data_excel/converted_csv"
100
 
101
  # Check if Excel file exists
102
  if not os.path.exists(excel_path):
src/utils/kit_composition_cleaner.py CHANGED
@@ -35,17 +35,25 @@ def load_kit_composition_data(file_path: str) -> pd.DataFrame:
35
  def process_master_kits(df: pd.DataFrame) -> pd.DataFrame:
36
  """
37
  Process Master Kits according to business rules:
38
- - Standalone masters (appear only once): line_type = "long line"
39
- - Non-standalone masters: line_type = "" (empty)
40
  """
41
  print("Processing Master Kits...")
42
 
43
- # Get master kit counts to identify standalone masters
44
- master_counts = df['Master Kit'].value_counts()
45
- standalone_masters = set(master_counts[master_counts == 1].index)
 
46
 
47
- print(f"Total unique Master Kits: {len(master_counts)}")
48
- print(f"Standalone masters (appear only once): {len(standalone_masters)}")
 
 
 
 
 
 
 
49
 
50
  # Create master kit records
51
  master_data = []
 
35
  def process_master_kits(df: pd.DataFrame) -> pd.DataFrame:
36
  """
37
  Process Master Kits according to business rules:
38
+ - Standalone masters (no subkits/prepacks, only components): line_type = "long line"
39
+ - Non-standalone masters (have subkits/prepacks): line_type = "" (empty - no production needed)
40
  """
41
  print("Processing Master Kits...")
42
 
43
+ # Identify masters with hierarchy (subkits or prepacks)
44
+ masters_with_subkits = set(df[df['Sub kit'].notna()]['Master Kit'].unique())
45
+ masters_with_prepacks = set(df[df['Prepack'].notna()]['Master Kit'].unique())
46
+ masters_with_hierarchy = masters_with_subkits.union(masters_with_prepacks)
47
 
48
+ # All masters
49
+ all_masters = set(df['Master Kit'].unique())
50
+
51
+ # Standalone masters are those WITHOUT subkits/prepacks (only have components)
52
+ standalone_masters = all_masters - masters_with_hierarchy
53
+
54
+ print(f"Total unique Master Kits: {len(all_masters)}")
55
+ print(f"Masters with subkits/prepacks: {len(masters_with_hierarchy)}")
56
+ print(f"Standalone masters (only components): {len(standalone_masters)}")
57
 
58
  # Create master kit records
59
  master_data = []