haileyhalimj@gmail.com commited on
Commit
acd1110
Β·
1 Parent(s): c9c4af4

Restore improved optimizer and simplify demand validation

Browse files

Major improvements:
- Remove IDLE employee tracking from optimizer_real.py
- Improve variable naming: Z/T/U β†’ Assignment/Hours/Units
- Add new employee tracking system: EMPLOYEE_COUNT and EMPLOYEE_HOURS
- Simplify demand_validation_viz.py (371β†’270 lines)
- Remove idle employee display from optimization_results.py

Code reorganization:
- Rename src/utils to src/preprocess for better organization

Testing: βœ… Optimization runs successfully with 3 products, €419.10 total cost

optimization_results.py CHANGED
@@ -104,8 +104,6 @@ def display_weekly_summary(results):
104
  # Calculate cost per unit
105
  cost_per_unit = total_cost / total_production if total_production > 0 else 0
106
  st.metric("Cost per Unit", f"€{cost_per_unit:.2f}")
107
-
108
- # Remove col5 - no idle employees metrics needed
109
 
110
  # Production vs Demand Chart
111
  st.subheader("🎯 Production vs Demand")
@@ -603,42 +601,7 @@ def display_cost_analysis(results):
603
  'Cost': round(cost, 2)
604
  })
605
 
606
- # Add idle employee costs to the breakdown
607
- if 'idle_employees' in results:
608
- # COST_LIST_PER_EMP_SHIFT already loaded above as dynamic call
609
-
610
- for idle in results['idle_employees']:
611
- if idle['idle_count'] > 0:
612
- emp_type = idle['emp_type']
613
- shift = idle['shift']
614
- day = idle['day']
615
- idle_count = idle['idle_count']
616
-
617
- # Get hourly rate and shift name
618
- hourly_rate = COST_LIST_PER_EMP_SHIFT.get(emp_type, {}).get(shift, 0)
619
- shift_name = shift_names.get(shift, f"Shift {shift}")
620
-
621
- # Idle employees work 0 hours but get paid for full shift
622
- actual_hours = 0
623
- paid_hours = 7.5 # Assuming standard shift length
624
- idle_cost = idle_count * paid_hours * hourly_rate
625
-
626
- if emp_type not in total_cost_by_type:
627
- total_cost_by_type[emp_type] = 0
628
- total_cost_by_type[emp_type] += idle_cost
629
-
630
- cost_data.append({
631
- 'Employee Type': emp_type,
632
- 'Day': f"Day {day}",
633
- 'Shift': f"{shift_name} (Idle)",
634
- 'Line': '-', # No line assignment for idle
635
- 'Product': '-', # No product for idle
636
- 'Actual Hours': actual_hours,
637
- 'Paid Hours': round(paid_hours, 2),
638
- 'Workers': int(idle_count),
639
- 'Hourly Rate': f"€{hourly_rate:.2f}",
640
- 'Cost': round(idle_cost, 2)
641
- })
642
 
643
  # Total cost metrics
644
  total_cost = results['objective']
 
104
  # Calculate cost per unit
105
  cost_per_unit = total_cost / total_production if total_production > 0 else 0
106
  st.metric("Cost per Unit", f"€{cost_per_unit:.2f}")
 
 
107
 
108
  # Production vs Demand Chart
109
  st.subheader("🎯 Production vs Demand")
 
601
  'Cost': round(cost, 2)
602
  })
603
 
604
+ # Note: Idle employee tracking removed - we only track employees actually working on production
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
605
 
606
  # Total cost metrics
607
  total_cost = results['objective']
src/demand_validation_viz.py CHANGED
@@ -2,23 +2,33 @@
2
  """
3
  Demand Data Validation Visualization Module
4
 
5
- This module provides Streamlit visualization for demand data validation,
6
- showing which products are included/excluded from optimization and why.
7
- Uses the demand_filtering module for the actual filtering logic.
8
  """
9
 
10
  import pandas as pd
11
  import streamlit as st
12
- from typing import Dict, List, Tuple, Optional
13
- import json
14
- from src.config.constants import ShiftType, LineType, KitLevel
15
  from src.demand_filtering import DemandFilter
16
 
17
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  class DemandValidationViz:
19
  """
20
- Provides visualization for demand data validation using the filtering module.
21
- This class focuses purely on displaying the results from demand filtering.
22
  """
23
 
24
  def __init__(self):
@@ -26,90 +36,61 @@ class DemandValidationViz:
26
  self.speed_data = None
27
 
28
  def load_data(self):
29
- """Load data needed for visualization"""
30
  try:
31
- # Load speed data for visualization
32
  from src.config import optimization_config
33
  self.speed_data = optimization_config.get_per_product_speed()
34
-
35
- # Load data in the filter instance
36
  return self.filter_instance.load_data()
37
-
38
  except Exception as e:
39
  error_msg = f"Error loading data: {str(e)}"
40
  print(error_msg)
41
- try:
42
  st.error(error_msg)
43
- except:
44
- pass
45
  return False
46
 
47
- # Remove duplicate methods - use filter_instance data directly
48
-
49
- def get_production_speed(self, product_id: str) -> Optional[float]:
50
- """Get production speed for product"""
51
- return self.speed_data.get(product_id, None)
52
-
53
  def validate_all_products(self) -> pd.DataFrame:
54
  """
55
- Create visualization dataframe using complete analysis from filtering module
56
- Returns: DataFrame with validation results for display
57
  """
58
- # Get complete analysis from filtering module
59
  analysis = self.filter_instance.get_complete_product_analysis()
60
  product_details = analysis['product_details']
61
 
62
  results = []
63
-
64
  for product_id, details in product_details.items():
65
- # Get additional data for visualization
66
- speed = self.get_production_speed(product_id)
67
- production_hours_needed = None
68
- if speed and speed > 0:
69
- production_hours_needed = details['demand'] / speed
70
 
71
  # Get line type name
72
  line_type_id = details['line_assignment']
73
- line_name = "no_assignment"
74
- if line_type_id is not None:
75
- from src.config.constants import LineType
76
- line_name = LineType.get_name(line_type_id)
77
 
78
- # Get level name
79
- level_name = "no_hierarchy_data"
80
- if details['product_type'] != "unclassified":
81
- if details['product_type'] == "prepack":
82
- level_name = "prepack"
83
- elif details['product_type'] == "subkit":
84
- level_name = "subkit"
85
- elif details['product_type'] == "master":
86
- if details['is_standalone_master']:
87
- level_name = "standalone_master"
88
- else:
89
- level_name = "master_with_hierarchy"
90
- else:
91
- level_name = f"level_{details['product_type']}"
92
 
93
- # Overall status with enhanced detail
94
  if not details['is_included_in_optimization']:
95
  validation_status = f"🚫 Excluded: {', '.join(details['exclusion_reasons'])}"
96
  else:
97
- # Check for data quality issues that don't exclude from optimization
98
- data_quality_issues = []
99
  if speed is None:
100
- data_quality_issues.append("missing_speed_data (will use default)")
101
  if not details['has_hierarchy']:
102
- data_quality_issues.append("no_hierarchy_data")
103
-
104
- if data_quality_issues:
105
- validation_status = f"⚠️ Data Issues: {', '.join(data_quality_issues)}"
106
- else:
107
- validation_status = "βœ… Ready for optimization"
108
 
109
  results.append({
110
  'Product ID': product_id,
111
  'Demand': details['demand'],
112
- 'Product Type': details['product_type'].title(),
113
  'Level': level_name,
114
  'Is Standalone Master': "Yes" if details['is_standalone_master'] else "No",
115
  'Line Type ID': line_type_id if line_type_id else "N/A",
@@ -118,45 +99,26 @@ class DemandValidationViz:
118
  'Humanizer Staff': details['humanizer_staff'],
119
  'Total Staff': details['total_staff'],
120
  'Production Speed (units/hour)': f"{speed:.1f}" if speed else "N/A",
121
- 'Production Hours Needed': f"{production_hours_needed:.1f}" if production_hours_needed else "N/A",
122
  'Has Line Assignment': "βœ…" if details['has_line_assignment'] else "❌",
123
  'Has Staffing Data': "βœ…" if details['has_staffing'] else "❌",
124
  'Has Speed Data': "βœ…" if speed is not None else "❌ (will use default)",
125
  'Has Hierarchy Data': "βœ…" if details['has_hierarchy'] else "❌",
126
  'Excluded from Optimization': not details['is_included_in_optimization'],
127
  'Exclusion Reasons': ', '.join(details['exclusion_reasons']) if details['exclusion_reasons'] else '',
128
- 'Data Quality Issues': ', '.join(data_quality_issues) if details['is_included_in_optimization'] and data_quality_issues else '',
129
  'Validation Status': validation_status
130
  })
131
 
132
  df = pd.DataFrame(results)
133
-
134
- # Sort by exclusion status first, then by demand
135
  df = df.sort_values(['Excluded from Optimization', 'Demand'], ascending=[False, False])
136
-
137
  return df
138
 
139
  def get_summary_statistics(self, df: pd.DataFrame) -> Dict:
140
- """Generate summary statistics using filtering module analysis"""
141
-
142
- # Get analysis from filtering module
143
  analysis = self.filter_instance.get_complete_product_analysis()
144
-
145
- # Calculate issues for included products only
146
  included_df = df[df['Excluded from Optimization'] == False]
147
 
148
- no_line_assignment = len(included_df[included_df['Has Line Assignment'] == "❌"])
149
- no_staffing = len(included_df[included_df['Has Staffing Data'] == "❌"])
150
- no_speed = len(included_df[included_df['Has Speed Data'] == "❌"])
151
- no_hierarchy = len(included_df[included_df['Has Hierarchy Data'] == "❌"])
152
-
153
- # Product type and line type distributions
154
- type_counts = df['Product Type'].value_counts().to_dict()
155
-
156
- # Staffing summary from analysis
157
- total_unicef_needed = sum(p['unicef_staff'] for p in analysis['product_details'].values())
158
- total_humanizer_needed = sum(p['humanizer_staff'] for p in analysis['product_details'].values())
159
-
160
  return {
161
  'total_products': analysis['total_products'],
162
  'total_demand': analysis['total_demand'],
@@ -164,205 +126,142 @@ class DemandValidationViz:
164
  'excluded_products': analysis['excluded_count'],
165
  'included_demand': analysis['included_demand'],
166
  'excluded_demand': analysis['excluded_demand'],
167
- 'type_counts': type_counts,
168
- 'no_line_assignment': no_line_assignment,
169
- 'no_staffing': no_staffing,
170
- 'no_speed': no_speed,
171
- 'no_hierarchy': no_hierarchy,
172
  'standalone_masters': analysis['standalone_masters_count'],
173
- 'total_unicef_needed': total_unicef_needed,
174
- 'total_humanizer_needed': total_humanizer_needed
175
  }
176
 
177
 
178
  def display_demand_validation():
179
  """
180
  Display demand validation analysis in Streamlit.
181
- Shows the results of demand filtering with detailed data quality analysis.
182
  """
183
  st.header("πŸ“‹ Demand Data Validation")
184
- st.markdown("""**Analysis of Filtered Demand Data**: This page shows which products are included/excluded from optimization
185
- based on the demand filtering criteria, plus data quality assessment for included products.""")
186
 
187
- # Initialize validator
188
  validator = DemandValidationViz()
189
-
190
- # Load data
191
- with st.spinner("Loading data for validation..."):
192
  if not validator.load_data():
193
  st.error("Failed to load data for validation.")
194
  return
195
-
196
- # Perform validation
197
- with st.spinner("Analyzing demand data..."):
198
  validation_df = validator.validate_all_products()
199
- summary_stats = validator.get_summary_statistics(validation_df)
200
 
201
- # Display summary statistics
202
  st.subheader("πŸ“Š Summary Statistics")
203
-
204
  col1, col2, col3, col4 = st.columns(4)
205
-
206
- with col1:
207
- st.metric("Total Products", summary_stats['total_products'])
208
- st.metric("Included in Optimization", summary_stats['included_products'], delta="Ready for optimization")
209
-
210
- with col2:
211
- st.metric("Total Demand", f"{summary_stats['total_demand']:,}")
212
- st.metric("Excluded from Optimization", summary_stats['excluded_products'], delta="Omitted")
213
-
214
- with col3:
215
- st.metric("Included Demand", f"{summary_stats['included_demand']:,}", delta="Will be optimized")
216
- st.metric("UNICEF Staff Needed", summary_stats['total_unicef_needed'])
217
-
218
- with col4:
219
- st.metric("Excluded Demand", f"{summary_stats['excluded_demand']:,}", delta="Omitted")
220
- st.metric("Humanizer Staff Needed", summary_stats['total_humanizer_needed'])
221
-
222
- # Product type distribution
223
  st.subheader("πŸ“ˆ Product Type Distribution")
224
- if summary_stats['type_counts']:
225
  col1, col2 = st.columns(2)
226
-
227
  with col1:
228
- type_df = pd.DataFrame(list(summary_stats['type_counts'].items()),
229
- columns=['Product Type', 'Count'])
230
  st.bar_chart(type_df.set_index('Product Type'))
231
-
232
  with col2:
233
- for ptype, count in summary_stats['type_counts'].items():
234
- percentage = (count / summary_stats['total_products']) * 100
235
  st.write(f"**{ptype}:** {count} products ({percentage:.1f}%)")
236
 
237
- # Validation issues summary for included products
238
- st.subheader("⚠️ Data Quality Issues (Products Included in Optimization)")
239
- st.write("Data quality issues affecting products that **will be** included in optimization (these don't exclude products but may affect optimization quality):")
240
-
241
  col1, col2, col3, col4 = st.columns(4)
242
-
243
- with col1:
244
- st.metric("No Line Assignment", summary_stats['no_line_assignment'],
245
- delta=None if summary_stats['no_line_assignment'] == 0 else "Issue")
246
-
247
- with col2:
248
- st.metric("No Staffing Data", summary_stats['no_staffing'],
249
- delta=None if summary_stats['no_staffing'] == 0 else "Issue")
250
-
251
- with col3:
252
- st.metric("No Speed Data", summary_stats['no_speed'],
253
- delta=None if summary_stats['no_speed'] == 0 else "Will use default")
254
-
255
- with col4:
256
- st.metric("No Hierarchy Data", summary_stats['no_hierarchy'],
257
- delta=None if summary_stats['no_hierarchy'] == 0 else "Issue")
258
-
259
- # Separate the results into included and excluded
260
  included_df = validation_df[validation_df['Excluded from Optimization'] == False].copy()
261
  excluded_df = validation_df[validation_df['Excluded from Optimization'] == True].copy()
262
 
263
- # Products Included in Optimization
264
  st.subheader("βœ… Products Included in Optimization")
265
- st.write(f"**{len(included_df)} products** will be included in the optimization with total demand of **{included_df['Demand'].sum():,} units**")
266
 
267
  if len(included_df) > 0:
268
- # Filter options for included products
269
  col1, col2 = st.columns(2)
 
 
270
 
271
- with col1:
272
- included_type_filter = st.selectbox("Filter included by type",
273
- options=["All"] + list(included_df['Product Type'].unique()),
274
- key="included_filter")
275
-
276
- with col2:
277
- included_min_demand = st.number_input("Minimum demand (included)", min_value=0, value=0, key="included_demand")
278
-
279
- # Apply filters to included
280
- filtered_included = included_df.copy()
281
- if included_type_filter != "All":
282
- filtered_included = filtered_included[filtered_included['Product Type'] == included_type_filter]
283
- if included_min_demand > 0:
284
- filtered_included = filtered_included[filtered_included['Demand'] >= included_min_demand]
285
 
286
- # Configure column display for included
287
- included_columns = ['Product ID', 'Demand', 'Product Type', 'Line Type', 'UNICEF Staff', 'Humanizer Staff', 'Production Speed (units/hour)', 'Data Quality Issues', 'Validation Status']
288
-
289
- st.dataframe(
290
- filtered_included[included_columns],
291
- use_container_width=True,
292
- height=300
293
- )
294
  else:
295
  st.warning("No products are included in optimization!")
296
 
297
- # Products Excluded from Optimization
298
  st.subheader("🚫 Products Excluded from Optimization")
299
- st.write(f"**{len(excluded_df)} products** are excluded from optimization with total demand of **{excluded_df['Demand'].sum():,} units**")
300
- st.info("""These products are **filtered out** from optimization due to:
301
- β€’ Missing line assignments (for non-standalone masters)
302
- β€’ Zero staffing requirements (both UNICEF and Humanizer staff = 0)
303
- β€’ Non-standalone masters (excluded from production planning)""")
304
 
305
  if len(excluded_df) > 0:
306
  # Show exclusion breakdown
307
- exclusion_reasons = excluded_df['Exclusion Reasons'].value_counts()
308
  st.write("**Exclusion reasons:**")
309
- for reason, count in exclusion_reasons.items():
310
  st.write(f"β€’ {reason}: {count} products")
311
 
312
- # Configure column display for excluded
313
- excluded_columns = ['Product ID', 'Demand', 'Product Type', 'Exclusion Reasons', 'UNICEF Staff', 'Humanizer Staff', 'Line Type']
314
-
315
- st.dataframe(
316
- excluded_df[excluded_columns],
317
- use_container_width=True,
318
- height=200
319
- )
320
 
321
- # Export option
322
  if st.button("πŸ“₯ Export Validation Results to CSV"):
323
- csv = validation_df.to_csv(index=False)
324
- st.download_button(
325
- label="Download CSV",
326
- data=csv,
327
- file_name="demand_validation_results.csv",
328
- mime="text/csv"
329
- )
330
- else:
331
- st.info("No products match the selected filters.")
332
 
333
- # Recommendations
334
  st.subheader("πŸ’‘ Recommendations")
335
 
336
- recommendations = []
337
-
338
- # Focus on exclusion criteria first
339
- if summary_stats['excluded_products'] > 0:
340
- st.warning(f"**Optimization Scope**: {summary_stats['excluded_products']} products ({summary_stats['excluded_demand']:,} units demand) are excluded from optimization.")
341
-
342
- # Data quality issues for INCLUDED products only
343
- if summary_stats['no_line_assignment'] > 0:
344
- recommendations.append(f"**Line Assignment**: {summary_stats['no_line_assignment']} products included in optimization are missing line assignments.")
345
-
346
- if summary_stats['no_staffing'] > 0:
347
- recommendations.append(f"**Staffing Data**: {summary_stats['no_staffing']} products included in optimization are missing staffing requirements.")
348
-
349
- if summary_stats['no_speed'] > 0:
350
- recommendations.append(f"**Speed Data**: {summary_stats['no_speed']} products included in optimization are missing production speed data. The optimization will use a default speed of 106.7 units/hour for these products.")
351
-
352
- if summary_stats['no_hierarchy'] > 0:
353
- recommendations.append(f"**Hierarchy Data**: {summary_stats['no_hierarchy']} products included in optimization are not in the kit hierarchy.")
354
 
355
- if recommendations:
356
- for rec in recommendations:
357
- st.info(rec)
 
 
 
 
 
 
358
 
359
  # Overall status
360
- if summary_stats['included_products'] > 0:
361
- st.success(f"βœ… **Optimization Scope**: {summary_stats['included_products']} products with {summary_stats['included_demand']:,} units demand will be included in optimization!")
362
- if summary_stats['no_speed'] == 0 and summary_stats['no_hierarchy'] == 0:
363
- st.info("πŸŽ‰ All included products have complete data - optimization should run smoothly!")
364
  else:
365
- st.error("❌ No products passed the filtering criteria. Please review the exclusion reasons above and check your data configuration.")
366
 
367
 
368
  if __name__ == "__main__":
 
2
  """
3
  Demand Data Validation Visualization Module
4
 
5
+ Provides Streamlit visualization for demand data validation.
6
+ Shows which products are included/excluded from optimization and why.
 
7
  """
8
 
9
  import pandas as pd
10
  import streamlit as st
11
+ from typing import Dict
12
+ from src.config.constants import LineType
 
13
  from src.demand_filtering import DemandFilter
14
 
15
 
16
+ # Simple mapping for product level names
17
+ LEVEL_NAMES = {
18
+ 'prepack': 'prepack',
19
+ 'subkit': 'subkit',
20
+ 'master': {
21
+ 'standalone': 'standalone_master',
22
+ 'with_hierarchy': 'master_with_hierarchy'
23
+ },
24
+ 'unclassified': 'no_hierarchy_data'
25
+ }
26
+
27
+
28
  class DemandValidationViz:
29
  """
30
+ Simple visualization wrapper for demand filtering results.
31
+ All filtering logic is in DemandFilter - this just displays the results.
32
  """
33
 
34
  def __init__(self):
 
36
  self.speed_data = None
37
 
38
  def load_data(self):
39
+ """Load all data needed for visualization"""
40
  try:
 
41
  from src.config import optimization_config
42
  self.speed_data = optimization_config.get_per_product_speed()
 
 
43
  return self.filter_instance.load_data()
 
44
  except Exception as e:
45
  error_msg = f"Error loading data: {str(e)}"
46
  print(error_msg)
47
+ if st:
48
  st.error(error_msg)
 
 
49
  return False
50
 
 
 
 
 
 
 
51
  def validate_all_products(self) -> pd.DataFrame:
52
  """
53
+ Create DataFrame with validation results for all products.
54
+ Main visualization method - converts filtering results to displayable format.
55
  """
56
+ # Get analysis from filtering module
57
  analysis = self.filter_instance.get_complete_product_analysis()
58
  product_details = analysis['product_details']
59
 
60
  results = []
 
61
  for product_id, details in product_details.items():
62
+ # Calculate production hours if speed data available
63
+ speed = self.speed_data.get(product_id) if self.speed_data else None
64
+ production_hours = (details['demand'] / speed) if speed and speed > 0 else None
 
 
65
 
66
  # Get line type name
67
  line_type_id = details['line_assignment']
68
+ line_name = LineType.get_name(line_type_id) if line_type_id is not None else "no_assignment"
 
 
 
69
 
70
+ # Get level name (simplified)
71
+ ptype = details['product_type']
72
+ if ptype == 'unclassified':
73
+ level_name = LEVEL_NAMES['unclassified']
74
+ elif ptype == 'master':
75
+ level_name = LEVEL_NAMES['master']['standalone' if details['is_standalone_master'] else 'with_hierarchy']
76
+ else:
77
+ level_name = LEVEL_NAMES.get(ptype, f"level_{ptype}")
 
 
 
 
 
 
78
 
79
+ # Build validation status message
80
  if not details['is_included_in_optimization']:
81
  validation_status = f"🚫 Excluded: {', '.join(details['exclusion_reasons'])}"
82
  else:
83
+ issues = []
 
84
  if speed is None:
85
+ issues.append("missing_speed_data (will use default)")
86
  if not details['has_hierarchy']:
87
+ issues.append("no_hierarchy_data")
88
+ validation_status = f"⚠️ Data Issues: {', '.join(issues)}" if issues else "βœ… Ready for optimization"
 
 
 
 
89
 
90
  results.append({
91
  'Product ID': product_id,
92
  'Demand': details['demand'],
93
+ 'Product Type': ptype.title(),
94
  'Level': level_name,
95
  'Is Standalone Master': "Yes" if details['is_standalone_master'] else "No",
96
  'Line Type ID': line_type_id if line_type_id else "N/A",
 
99
  'Humanizer Staff': details['humanizer_staff'],
100
  'Total Staff': details['total_staff'],
101
  'Production Speed (units/hour)': f"{speed:.1f}" if speed else "N/A",
102
+ 'Production Hours Needed': f"{production_hours:.1f}" if production_hours else "N/A",
103
  'Has Line Assignment': "βœ…" if details['has_line_assignment'] else "❌",
104
  'Has Staffing Data': "βœ…" if details['has_staffing'] else "❌",
105
  'Has Speed Data': "βœ…" if speed is not None else "❌ (will use default)",
106
  'Has Hierarchy Data': "βœ…" if details['has_hierarchy'] else "❌",
107
  'Excluded from Optimization': not details['is_included_in_optimization'],
108
  'Exclusion Reasons': ', '.join(details['exclusion_reasons']) if details['exclusion_reasons'] else '',
109
+ 'Data Quality Issues': ', '.join(issues) if details['is_included_in_optimization'] and 'issues' in locals() and issues else '',
110
  'Validation Status': validation_status
111
  })
112
 
113
  df = pd.DataFrame(results)
 
 
114
  df = df.sort_values(['Excluded from Optimization', 'Demand'], ascending=[False, False])
 
115
  return df
116
 
117
  def get_summary_statistics(self, df: pd.DataFrame) -> Dict:
118
+ """Calculate summary statistics from validation results"""
 
 
119
  analysis = self.filter_instance.get_complete_product_analysis()
 
 
120
  included_df = df[df['Excluded from Optimization'] == False]
121
 
 
 
 
 
 
 
 
 
 
 
 
 
122
  return {
123
  'total_products': analysis['total_products'],
124
  'total_demand': analysis['total_demand'],
 
126
  'excluded_products': analysis['excluded_count'],
127
  'included_demand': analysis['included_demand'],
128
  'excluded_demand': analysis['excluded_demand'],
129
+ 'type_counts': df['Product Type'].value_counts().to_dict(),
130
+ 'no_line_assignment': len(included_df[included_df['Has Line Assignment'] == "❌"]),
131
+ 'no_staffing': len(included_df[included_df['Has Staffing Data'] == "❌"]),
132
+ 'no_speed': len(included_df[included_df['Has Speed Data'].str.contains("❌")]),
133
+ 'no_hierarchy': len(included_df[included_df['Has Hierarchy Data'] == "❌"]),
134
  'standalone_masters': analysis['standalone_masters_count'],
135
+ 'total_unicef_needed': sum(p['unicef_staff'] for p in analysis['product_details'].values()),
136
+ 'total_humanizer_needed': sum(p['humanizer_staff'] for p in analysis['product_details'].values())
137
  }
138
 
139
 
140
  def display_demand_validation():
141
  """
142
  Display demand validation analysis in Streamlit.
143
+ Main entry point for the validation page.
144
  """
145
  st.header("πŸ“‹ Demand Data Validation")
146
+ st.markdown("Analysis showing which products are included/excluded from optimization and data quality status.")
 
147
 
148
+ # Load and analyze data
149
  validator = DemandValidationViz()
150
+ with st.spinner("Loading and analyzing data..."):
 
 
151
  if not validator.load_data():
152
  st.error("Failed to load data for validation.")
153
  return
 
 
 
154
  validation_df = validator.validate_all_products()
155
+ stats = validator.get_summary_statistics(validation_df)
156
 
157
+ # ===== SUMMARY METRICS =====
158
  st.subheader("πŸ“Š Summary Statistics")
 
159
  col1, col2, col3, col4 = st.columns(4)
160
+ col1.metric("Total Products", stats['total_products'])
161
+ col1.metric("Included in Optimization", stats['included_products'], delta="Ready")
162
+ col2.metric("Total Demand", f"{stats['total_demand']:,}")
163
+ col2.metric("Excluded from Optimization", stats['excluded_products'], delta="Omitted")
164
+ col3.metric("Included Demand", f"{stats['included_demand']:,}", delta="Will be optimized")
165
+ col3.metric("UNICEF Staff Needed", stats['total_unicef_needed'])
166
+ col4.metric("Excluded Demand", f"{stats['excluded_demand']:,}", delta="Omitted")
167
+ col4.metric("Humanizer Staff Needed", stats['total_humanizer_needed'])
168
+
169
+ # ===== PRODUCT TYPE DISTRIBUTION =====
 
 
 
 
 
 
 
 
170
  st.subheader("πŸ“ˆ Product Type Distribution")
171
+ if stats['type_counts']:
172
  col1, col2 = st.columns(2)
 
173
  with col1:
174
+ type_df = pd.DataFrame(list(stats['type_counts'].items()), columns=['Product Type', 'Count'])
 
175
  st.bar_chart(type_df.set_index('Product Type'))
 
176
  with col2:
177
+ for ptype, count in stats['type_counts'].items():
178
+ percentage = (count / stats['total_products']) * 100
179
  st.write(f"**{ptype}:** {count} products ({percentage:.1f}%)")
180
 
181
+ # ===== DATA QUALITY ISSUES (for included products only) =====
182
+ st.subheader("⚠️ Data Quality Issues (Included Products)")
183
+ st.write("Issues affecting products that **will be** included in optimization:")
 
184
  col1, col2, col3, col4 = st.columns(4)
185
+ col1.metric("No Line Assignment", stats['no_line_assignment'],
186
+ delta=None if stats['no_line_assignment'] == 0 else "Issue")
187
+ col2.metric("No Staffing Data", stats['no_staffing'],
188
+ delta=None if stats['no_staffing'] == 0 else "Issue")
189
+ col3.metric("No Speed Data", stats['no_speed'],
190
+ delta=None if stats['no_speed'] == 0 else "Will use default")
191
+ col4.metric("No Hierarchy Data", stats['no_hierarchy'],
192
+ delta=None if stats['no_hierarchy'] == 0 else "Issue")
193
+
194
+ # ===== INCLUDED PRODUCTS TABLE =====
 
 
 
 
 
 
 
 
195
  included_df = validation_df[validation_df['Excluded from Optimization'] == False].copy()
196
  excluded_df = validation_df[validation_df['Excluded from Optimization'] == True].copy()
197
 
 
198
  st.subheader("βœ… Products Included in Optimization")
199
+ st.write(f"**{len(included_df)} products** with total demand of **{included_df['Demand'].sum():,} units**")
200
 
201
  if len(included_df) > 0:
202
+ # Filters
203
  col1, col2 = st.columns(2)
204
+ type_filter = col1.selectbox("Filter by type", ["All"] + list(included_df['Product Type'].unique()), key="inc_filter")
205
+ min_demand = col2.number_input("Minimum demand", min_value=0, value=0, key="inc_demand")
206
 
207
+ # Apply filters
208
+ filtered = included_df.copy()
209
+ if type_filter != "All":
210
+ filtered = filtered[filtered['Product Type'] == type_filter]
211
+ if min_demand > 0:
212
+ filtered = filtered[filtered['Demand'] >= min_demand]
 
 
 
 
 
 
 
 
213
 
214
+ # Display table
215
+ display_cols = ['Product ID', 'Demand', 'Product Type', 'Line Type', 'UNICEF Staff',
216
+ 'Humanizer Staff', 'Production Speed (units/hour)', 'Data Quality Issues', 'Validation Status']
217
+ st.dataframe(filtered[display_cols], use_container_width=True, height=300)
 
 
 
 
218
  else:
219
  st.warning("No products are included in optimization!")
220
 
221
+ # ===== EXCLUDED PRODUCTS TABLE =====
222
  st.subheader("🚫 Products Excluded from Optimization")
223
+ st.write(f"**{len(excluded_df)} products** with total demand of **{excluded_df['Demand'].sum():,} units**")
224
+ st.info("Excluded due to: missing line assignments, zero staffing, or non-standalone masters")
 
 
 
225
 
226
  if len(excluded_df) > 0:
227
  # Show exclusion breakdown
 
228
  st.write("**Exclusion reasons:**")
229
+ for reason, count in excluded_df['Exclusion Reasons'].value_counts().items():
230
  st.write(f"β€’ {reason}: {count} products")
231
 
232
+ # Display table
233
+ display_cols = ['Product ID', 'Demand', 'Product Type', 'Exclusion Reasons',
234
+ 'UNICEF Staff', 'Humanizer Staff', 'Line Type']
235
+ st.dataframe(excluded_df[display_cols], use_container_width=True, height=200)
 
 
 
 
236
 
237
+ # Export button
238
  if st.button("πŸ“₯ Export Validation Results to CSV"):
239
+ st.download_button("Download CSV", validation_df.to_csv(index=False),
240
+ file_name="demand_validation_results.csv", mime="text/csv")
 
 
 
 
 
 
 
241
 
242
+ # ===== RECOMMENDATIONS =====
243
  st.subheader("πŸ’‘ Recommendations")
244
 
245
+ if stats['excluded_products'] > 0:
246
+ st.warning(f"**{stats['excluded_products']} products** ({stats['excluded_demand']:,} units) excluded from optimization")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
247
 
248
+ # Show data quality issues for included products
249
+ if stats['no_line_assignment'] > 0:
250
+ st.info(f"**Line Assignment**: {stats['no_line_assignment']} included products missing line assignments")
251
+ if stats['no_staffing'] > 0:
252
+ st.info(f"**Staffing Data**: {stats['no_staffing']} included products missing staffing requirements")
253
+ if stats['no_speed'] > 0:
254
+ st.info(f"**Speed Data**: {stats['no_speed']} included products missing speed data (will use default 106.7 units/hour)")
255
+ if stats['no_hierarchy'] > 0:
256
+ st.info(f"**Hierarchy Data**: {stats['no_hierarchy']} included products not in kit hierarchy")
257
 
258
  # Overall status
259
+ if stats['included_products'] > 0:
260
+ st.success(f"βœ… **{stats['included_products']} products** with {stats['included_demand']:,} units demand ready for optimization!")
261
+ if stats['no_speed'] == 0 and stats['no_hierarchy'] == 0:
262
+ st.info("πŸŽ‰ All included products have complete data!")
263
  else:
264
+ st.error("❌ No products passed filtering. Review exclusion reasons and check data configuration.")
265
 
266
 
267
  if __name__ == "__main__":
src/models/optimizer_real.py CHANGED
@@ -268,82 +268,124 @@ def run_optimization_for_week():
268
  INF = solver.infinity()
269
 
270
  # --- Variables ---
271
- # Z[p,ell,s,t] ∈ {0,1}: 1 if product p runs on (line,shift,day)
272
- Z, T, U = {}, {}, {} # T: run hours, U: production units
273
  for p in sorted_product_list:
274
  for ell in line_tuples: # ell = (line_type_id, idx)
275
  for s in active_shift_list:
276
  for t in date_span_list:
277
- Z[p, ell, s, t] = solver.BoolVar(f"Z_{p}_{ell[0]}_{ell[1]}_s{s}_d{t}")
278
- T[p, ell, s, t] = solver.NumVar(0, Hmax_s[s], f"T_{p}_{ell[0]}_{ell[1]}_s{s}_d{t}")
279
- U[p, ell, s, t] = solver.NumVar(0, INF, f"U_{p}_{ell[0]}_{ell[1]}_s{s}_d{t}")
 
 
 
280
 
281
- # Idle employee variables: IDLE[e,s,t] = number of idle employees of type e in shift s on day t
282
- IDLE = {}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
283
  for e in employee_type_list:
284
  for s in active_shift_list:
285
  for t in date_span_list:
286
- max_idle = max_employee_type_day[e][t] # Can't have more idle employees than available
287
- IDLE[e, s, t] = solver.IntVar(0, max_idle, f"IDLE_{e}_s{s}_d{t}")
 
 
 
 
 
288
 
289
  # Note: Binary variables for bulk payment are now created inline in the cost calculation
290
 
291
- # --- Objective: total labor cost with payment modes + hierarchy timing penalty ---
 
292
  PAYMENT_MODE_CONFIG = get_payment_mode_config() # Dynamic call
293
  print(f"Payment mode configuration: {PAYMENT_MODE_CONFIG}")
294
 
295
  # Build cost terms based on payment mode
296
  cost_terms = []
297
 
298
- for e in employee_type_list:
299
- for s in active_shift_list:
300
- payment_mode = PAYMENT_MODE_CONFIG.get(s, "partial") # Default to partial if not specified
301
-
302
- if payment_mode == "partial":
303
- # Partial payment: pay for actual hours worked
304
- for p in sorted_product_list:
305
- for ell in line_tuples:
306
- for t in date_span_list:
307
- cost_terms.append(cost[e][s] * TEAM_REQ_PER_PRODUCT[e][p] * T[p, ell, s, t])
308
-
309
- elif payment_mode == "bulk":
310
- # Bulk payment: if employees work ANY hours in a shift, pay them for FULL shift hours
311
- # BUT only pay the employees who actually work, not all employees of that type
312
- for p in sorted_product_list:
313
- for ell in line_tuples:
314
- for t in date_span_list:
315
- # Calculate actual employees working: TEAM_REQ_PER_PRODUCT[e][p] employees work T[p,ell,s,t] hours
316
- # For bulk payment: if T[p,ell,s,t] > 0, pay TEAM_REQ_PER_PRODUCT[e][p] employees for full shift
317
- # We need a binary variable for each (e,s,p,ell,t) combination
318
- # But we can use the existing logic: if T > 0, then those specific employees get bulk pay
319
-
320
- # Create binary variable for this specific work assignment
321
- work_binary = solver.BoolVar(f"work_{e}_s{s}_{p}_{ell[0]}{ell[1]}_d{t}")
322
-
323
- # Link work_binary to T[p,ell,s,t]: work_binary = 1 if T > 0
324
- solver.Add(T[p, ell, s, t] <= Hmax_s[s] * work_binary)
325
- solver.Add(work_binary * 0.001 <= T[p, ell, s, t])
326
-
327
- # Cost: pay the specific working employees for full shift hours
328
- cost_terms.append(cost[e][s] * Hmax_s[s] * TEAM_REQ_PER_PRODUCT[e][p] * work_binary)
329
-
330
- # Add idle employee costs (idle employees are paid for full shift hours)
331
  for e in employee_type_list:
332
  for s in active_shift_list:
333
  for t in date_span_list:
334
- cost_terms.append(cost[e][s] * Hmax_s[s] * IDLE[e, s, t])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
335
 
336
  total_cost = solver.Sum(cost_terms)
337
 
338
- # Objective: minimize total cost only
339
- # Dependency ordering is handled by topological sorting and hard constraints
 
340
  solver.Minimize(total_cost)
341
 
342
  # --- Constraints ---
343
 
344
  # 1) Weekly demand - must meet exactly (no over/under production)
345
  for p in sorted_product_list:
346
- total_production = solver.Sum(U[p, ell, s, t] for ell in line_tuples for s in active_shift_list for t in date_span_list)
347
  demand = DEMAND_DICTIONARY.get(p, 0)
348
 
349
  # Must produce at least the demand
@@ -356,9 +398,9 @@ def run_optimization_for_week():
356
  for ell in line_tuples:
357
  for s in active_shift_list:
358
  for t in date_span_list:
359
- solver.Add(solver.Sum(Z[p, ell, s, t] for p in sorted_product_list) <= 1)
360
  for p in sorted_product_list:
361
- solver.Add(T[p, ell, s, t] <= Hmax_s[s] * Z[p, ell, s, t])
362
 
363
  # 3) Product-line type compatibility + (optional) activity by day
364
  for p in sorted_product_list:
@@ -369,11 +411,11 @@ def run_optimization_for_week():
369
  for s in active_shift_list:
370
  for t in date_span_list:
371
  if ACTIVE[t][p] == 0 or not allowed:
372
- solver.Add(Z[p, ell, s, t] == 0)
373
- solver.Add(T[p, ell, s, t] == 0)
374
- solver.Add(U[p, ell, s, t] == 0)
375
 
376
- # 4) Line throughput: U ≀ product_speed * T
377
  for p in sorted_product_list:
378
  for ell in line_tuples:
379
  for s in active_shift_list:
@@ -384,11 +426,11 @@ def run_optimization_for_week():
384
  speed = PER_PRODUCT_SPEED[p]
385
  # Upper bound: units cannot exceed capacity
386
  solver.Add(
387
- U[p, ell, s, t] <= speed * T[p, ell, s, t]
388
  )
389
  # Lower bound: if working, must produce (prevent phantom work)
390
  solver.Add(
391
- U[p, ell, s, t] >= speed * T[p, ell, s, t]
392
  )
393
  else:
394
  # Default speed if not found
@@ -396,34 +438,40 @@ def run_optimization_for_week():
396
  print(f"Warning: No speed data for product {p}, using default {default_speed:.1f} per hour")
397
  # Upper bound: units cannot exceed capacity
398
  solver.Add(
399
- U[p, ell, s, t] <= default_speed * T[p, ell, s, t]
400
  )
401
  # Lower bound: if working, must produce (prevent phantom work)
402
  solver.Add(
403
- U[p, ell, s, t] >= default_speed * T[p, ell, s, t]
404
  )
405
 
406
- # 5) Per-shift staffing capacity by type: idle employees ≀ available headcount
407
  for e in employee_type_list:
408
  for s in active_shift_list:
409
  for t in date_span_list:
410
- # Idle employees cannot exceed available headcount
411
- # (Active employees are constrained by the working hours constraint below)
412
- solver.Add(IDLE[e, s, t] <= max_employee_type_day[e][t])
413
-
414
- # Working hours constraint: active employees cannot exceed shift hour capacity
415
  solver.Add(
416
- solver.Sum(TEAM_REQ_PER_PRODUCT[e][p] * T[p, ell, s, t] for p in sorted_product_list for ell in line_tuples)
417
  <= Hmax_s[s] * max_employee_type_day[e][t]
418
  )
419
 
420
- # 6) Per-day staffing capacity by type: sum(req*hours across shifts) ≀ 14h * headcount
 
421
  for e in employee_type_list:
422
- for t in date_span_list:
423
- solver.Add(
424
- solver.Sum(TEAM_REQ_PER_PRODUCT[e][p] * T[p, ell, s, t] for s in active_shift_list for p in sorted_product_list for ell in line_tuples)
425
- <= MAX_HOUR_PER_PERSON_PER_DAY * max_employee_type_day[e][t]
426
- )
 
 
 
 
 
 
 
 
 
427
 
428
  # 7) Shift ordering constraints (only apply if shifts are available)
429
  # Evening shift after regular shift
@@ -431,9 +479,9 @@ def run_optimization_for_week():
431
  for e in employee_type_list:
432
  for t in date_span_list:
433
  solver.Add(
434
- solver.Sum(TEAM_REQ_PER_PRODUCT[e][p] * T[p, ell, ShiftType.EVENING, t] for p in sorted_product_list for ell in line_tuples)
435
  <=
436
- solver.Sum(TEAM_REQ_PER_PRODUCT[e][p] * T[p, ell, ShiftType.REGULAR, t] for p in sorted_product_list for ell in line_tuples)
437
  )
438
 
439
  # Overtime should only be used when regular shift is at capacity
@@ -447,13 +495,13 @@ def run_optimization_for_week():
447
 
448
  # Total regular shift usage for this employee type and day
449
  regular_usage = solver.Sum(
450
- TEAM_REQ_PER_PRODUCT[e][p] * T[p, ell, ShiftType.REGULAR, t]
451
  for p in sorted_product_list for ell in line_tuples
452
  )
453
 
454
  # Total overtime usage for this employee type and day
455
  overtime_usage = solver.Sum(
456
- TEAM_REQ_PER_PRODUCT[e][p] * T[p, ell, ShiftType.OVERTIME, t]
457
  for p in sorted_product_list for ell in line_tuples
458
  )
459
 
@@ -476,47 +524,19 @@ def run_optimization_for_week():
476
  # 7.5) Bulk payment linking constraints are now handled inline in the cost calculation
477
 
478
  # 7.6) *** FIXED MINIMUM UNICEF EMPLOYEES CONSTRAINT ***
479
- # Ensure minimum UNICEF fixed-term staff are present every working day
480
- FIXED_MIN_UNICEF_PER_DAY = get_fixed_min_unicef_per_day() # Dynamic call
481
  if 'UNICEF Fixed term' in employee_type_list and FIXED_MIN_UNICEF_PER_DAY > 0:
482
- print(f"\n[FIXED STAFFING] Adding constraint for minimum {FIXED_MIN_UNICEF_PER_DAY} UNICEF employees per day...")
483
-
484
- unicef_constraints_added = 0
485
- for t in date_span_list:
486
- # Method 1: Simple approach - ensure minimum UNICEF employees are scheduled
487
- # regardless of whether they're working or idle
488
- # Sum up all possible UNICEF work assignments + idle UNICEF employees
489
-
490
- # Count all UNICEF work hours across all products, lines, and shifts
491
- all_unicef_hours = solver.Sum(
492
- TEAM_REQ_PER_PRODUCT.get('UNICEF Fixed term', {}).get(p, 0) * T[p, ell, s, t]
493
- for p in sorted_product_list
494
- for ell in line_tuples
495
- for s in active_shift_list
496
- )
497
-
498
- # Count idle UNICEF employees across all shifts
499
- idle_unicef_employees = solver.Sum(
500
- IDLE['UNICEF Fixed term', s, t] for s in active_shift_list
501
- )
502
-
503
- # Constraint: total hours (work + idle*14) must meet minimum staffing
504
- # This ensures at least FIXED_MIN_UNICEF_PER_DAY employees are present
505
- solver.Add(all_unicef_hours + idle_unicef_employees * MAX_HOUR_PER_PERSON_PER_DAY >= FIXED_MIN_UNICEF_PER_DAY * MAX_HOUR_PER_PERSON_PER_DAY)
506
-
507
- # Additional constraint: ensure idle employees are properly linked to total headcount
508
- # This prevents the solver from avoiding the minimum by setting everyone to zero
509
- total_unicef_hours_needed_for_production = solver.Sum(
510
- TEAM_REQ_PER_PRODUCT.get('UNICEF Fixed term', {}).get(p, 0) * T[p, ell, s, t]
511
- for p in sorted_product_list for ell in line_tuples for s in active_shift_list
512
- )
513
-
514
- # Simpler approach: just ensure the basic constraint is strong enough
515
- # The main constraint above should be sufficient: all_unicef_hours + idle*14 >= min*14
516
- # This already forces idle employees when production is insufficient
517
- unicef_constraints_added += 1
518
-
519
- print(f"[FIXED STAFFING] Added {unicef_constraints_added} constraints ensuring >= {FIXED_MIN_UNICEF_PER_DAY} UNICEF employees per day")
520
 
521
  # 8) *** HIERARCHY DEPENDENCY CONSTRAINTS ***
522
  # For subkits with prepack dependencies: dependencies should be produced before or same time
@@ -533,10 +553,10 @@ def run_optimization_for_week():
533
  if dep in sorted_product_list: # Only if dependency is also in production list
534
  # Calculate "completion time" for each product (sum of all production times)
535
  p_completion = solver.Sum(
536
- t * T[p, ell, s, t] for ell in line_tuples for s in active_shift_list for t in date_span_list
537
  )
538
  dep_completion = solver.Sum(
539
- t * T[dep, ell, s, t] for ell in line_tuples for s in active_shift_list for t in date_span_list
540
  )
541
 
542
  # Dependency should complete before or at the same time
@@ -562,7 +582,7 @@ def run_optimization_for_week():
562
  result['objective'] = solver.Objective().Value()
563
 
564
  # Weekly production
565
- prod_week = {p: sum(U[p, ell, s, t].solution_value() for ell in line_tuples for s in active_shift_list for t in date_span_list) for p in sorted_product_list}
566
  result['weekly_production'] = prod_week
567
 
568
  # Which product ran on which line/shift/day
@@ -570,7 +590,7 @@ def run_optimization_for_week():
570
  for t in date_span_list:
571
  for ell in line_tuples:
572
  for s in active_shift_list:
573
- chosen = [p for p in sorted_product_list if Z[p, ell, s, t].solution_value() > 0.5]
574
  if chosen:
575
  p = chosen[0]
576
  schedule.append({
@@ -579,8 +599,8 @@ def run_optimization_for_week():
579
  'line_idx': ell[1],
580
  'shift': s,
581
  'product': p,
582
- 'run_hours': T[p, ell, s, t].solution_value(),
583
- 'units': U[p, ell, s, t].solution_value(),
584
  })
585
  result['run_schedule'] = schedule
586
 
@@ -589,7 +609,7 @@ def run_optimization_for_week():
589
  for e in employee_type_list:
590
  for s in active_shift_list:
591
  for t in date_span_list:
592
- used_ph = sum(TEAM_REQ_PER_PRODUCT[e][p] * T[p, ell, s, t].solution_value() for p in sorted_product_list for ell in line_tuples)
593
  need = ceil(used_ph / (Hmax_s[s] + 1e-9))
594
  headcount.append({'emp_type': e, 'shift': s, 'day': t,
595
  'needed': need, 'available': max_employee_type_day[e][t]})
@@ -599,26 +619,54 @@ def run_optimization_for_week():
599
  ph_by_day = []
600
  for e in employee_type_list:
601
  for t in date_span_list:
602
- used = sum(TEAM_REQ_PER_PRODUCT[e][p] * T[p, ell, s, t].solution_value() for s in active_shift_list for p in sorted_product_list for ell in line_tuples)
603
  ph_by_day.append({'emp_type': e, 'day': t,
604
  'used_person_hours': used,
605
  'cap_person_hours': Hmax_daily * max_employee_type_day[e][t]})
606
  result['person_hours_by_day'] = ph_by_day
607
 
608
- # Idle employee data for visualization
609
- idle_employees = []
610
  for e in employee_type_list:
611
  for s in active_shift_list:
612
  for t in date_span_list:
613
- idle_count = IDLE[e, s, t].solution_value()
614
- if idle_count > 0: # Only include non-zero idle counts
615
- idle_employees.append({
616
- 'emp_type': e,
 
 
 
617
  'shift': s,
618
  'day': t,
619
- 'idle_count': idle_count
 
 
 
620
  })
621
- result['idle_employees'] = idle_employees
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
622
 
623
  # Pretty print
624
  print("Objective (min cost):", result['objective'])
@@ -631,7 +679,7 @@ def run_optimization_for_week():
631
  shift_name = ShiftType.get_name(row['shift'])
632
  line_name = LineType.get_name(row['line_type_id'])
633
  print(f"date_span_list{row['day']} {line_name}-{row['line_idx']} {shift_name}: "
634
- f"{row['product']} T={row['run_hours']:.2f}h U={row['units']:.1f}")
635
 
636
  print("\n--- Implied headcount need (per type/shift/day) ---")
637
  for row in headcount:
@@ -644,19 +692,23 @@ def run_optimization_for_week():
644
  print(f"{row['emp_type']}, date_span_list{row['day']}: used={row['used_person_hours']:.1f} "
645
  f"(cap {row['cap_person_hours']})")
646
 
647
- # Report idle employees
648
- print("\n--- Idle employees (per type/shift/day) ---")
649
- idle_found = False
650
- for e in employee_type_list:
651
- for s in active_shift_list:
652
- for t in date_span_list:
653
- idle_count = IDLE[e, s, t].solution_value()
654
- if idle_count > 0:
655
- shift_name = ShiftType.get_name(s)
656
- print(f"{e}, {shift_name}, date_span_list{t}: idle={idle_count}")
657
- idle_found = True
658
- if not idle_found:
659
- print("No idle employees scheduled")
 
 
 
 
660
 
661
  return result
662
 
 
268
  INF = solver.infinity()
269
 
270
  # --- Variables ---
271
+ # Assignment[p,ell,s,t] ∈ {0,1}: 1 if product p runs on (line,shift,day)
272
+ Assignment, Hours, Units = {}, {}, {} # Hours: run hours, Units: production units
273
  for p in sorted_product_list:
274
  for ell in line_tuples: # ell = (line_type_id, idx)
275
  for s in active_shift_list:
276
  for t in date_span_list:
277
+ #Is product p assigned to run on line ell, during shift s, on day t?
278
+ Assignment[p, ell, s, t] = solver.BoolVar(f"Z_{p}_{ell[0]}_{ell[1]}_s{s}_d{t}")
279
+ #How many hours does product p run on line ell, during shift s, on day t?
280
+ Hours[p, ell, s, t] = solver.NumVar(0, Hmax_s[s], f"T_{p}_{ell[0]}_{ell[1]}_s{s}_d{t}")
281
+ #How many units does product p run on line ell, during shift s, on day t?
282
+ Units[p, ell, s, t] = solver.NumVar(0, INF, f"U_{p}_{ell[0]}_{ell[1]}_s{s}_d{t}")
283
 
284
+ # Note: IDLE variables removed - we only track employees actually working on production
285
+
286
+ # Load fixed minimum UNICEF requirement (needed for EMPLOYEE_COUNT variable creation)
287
+ FIXED_MIN_UNICEF_PER_DAY = get_fixed_min_unicef_per_day() # Dynamic call
288
+
289
+ # Variable to track actual number of employees of each type working each shift each day
290
+ # This represents how many distinct employees of type e are working in shift s on day t
291
+ EMPLOYEE_COUNT = {}
292
+ for e in employee_type_list:
293
+ for s in active_shift_list:
294
+ for t in date_span_list:
295
+ # Note: Minimum staffing is per day, not per shift
296
+ # We'll handle the daily minimum constraint separately
297
+ max_count = max_employee_type_day.get(e, {}).get(t, 100)
298
+ EMPLOYEE_COUNT[e, s, t] = solver.IntVar(
299
+ 0, # No minimum per shift (daily minimum handled separately)
300
+ max_count,
301
+ f"EmpCount_{e}_s{s}_day{t}"
302
+ )
303
+
304
+ # Track total person-hours worked by each employee type per shift per day
305
+ # This is needed for employee-centric wage calculation
306
+ EMPLOYEE_HOURS = {}
307
  for e in employee_type_list:
308
  for s in active_shift_list:
309
  for t in date_span_list:
310
+ # Sum of all work hours for employee type e in shift s on day t
311
+ # This represents total person-hours (e.g., 5 employees Γ— 8 hours = 40 person-hours)
312
+ EMPLOYEE_HOURS[e, s, t] = solver.Sum(
313
+ TEAM_REQ_PER_PRODUCT[e][p] * Hours[p, ell, s, t]
314
+ for p in sorted_product_list
315
+ for ell in line_tuples
316
+ )
317
 
318
  # Note: Binary variables for bulk payment are now created inline in the cost calculation
319
 
320
+ # --- Objective: Minimize total labor cost (wages) ---
321
+ # Employee-centric approach: calculate wages based on actual employees and their hours
322
  PAYMENT_MODE_CONFIG = get_payment_mode_config() # Dynamic call
323
  print(f"Payment mode configuration: {PAYMENT_MODE_CONFIG}")
324
 
325
  # Build cost terms based on payment mode
326
  cost_terms = []
327
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
328
  for e in employee_type_list:
329
  for s in active_shift_list:
330
  for t in date_span_list:
331
+ payment_mode = PAYMENT_MODE_CONFIG.get(s, "partial") # Default to partial if not specified
332
+
333
+ if payment_mode == "partial":
334
+ # Partial payment: pay for actual person-hours worked
335
+ # Cost = hourly_rate Γ— total_person_hours
336
+ # Example: $20/hr Γ— 40 person-hours = $800
337
+ cost_terms.append(cost[e][s] * EMPLOYEE_HOURS[e, s, t])
338
+
339
+ elif payment_mode == "bulk":
340
+ # Bulk payment: if ANY work happens in shift, pay ALL working employees for FULL shift
341
+ # We need to know: did employee type e work at all in shift s on day t?
342
+
343
+ # Create binary: 1 if employee type e worked in this shift
344
+ work_in_shift = solver.BoolVar(f"work_{e}_s{s}_d{t}")
345
+
346
+ # Link binary to work hours
347
+ # If EMPLOYEE_HOURS > 0, then work_in_shift = 1
348
+ # If EMPLOYEE_HOURS = 0, then work_in_shift = 0
349
+ max_possible_hours = Hmax_s[s] * max_employee_type_day[e][t]
350
+ solver.Add(EMPLOYEE_HOURS[e, s, t] <= max_possible_hours * work_in_shift)
351
+ solver.Add(work_in_shift * 0.001 <= EMPLOYEE_HOURS[e, s, t])
352
+
353
+ # Calculate number of employees working in this shift
354
+ # This is approximately: ceil(EMPLOYEE_HOURS / Hmax_s[s])
355
+ # But we can use: employees_working_in_shift
356
+ # For simplicity, use EMPLOYEE_HOURS / Hmax_s[s] as continuous approximation
357
+ # Or better: create a variable for employees per shift
358
+
359
+ # Simpler approach: For bulk payment, assume if work happens,
360
+ # we need approximately EMPLOYEE_HOURS/Hmax_s[s] employees,
361
+ # and each gets paid for full shift
362
+ # Cost β‰ˆ (EMPLOYEE_HOURS / Hmax_s[s]) Γ— Hmax_s[s] Γ— hourly_rate = EMPLOYEE_HOURS Γ— hourly_rate
363
+ # But that's the same as partial! The difference is we round up employees.
364
+
365
+ # Better approach: Create variable for employees working in this specific shift
366
+ employees_in_shift = solver.IntVar(0, max_employee_type_day[e][t], f"emp_{e}_s{s}_d{t}")
367
+
368
+ # Link employees_in_shift to work requirements
369
+ # If EMPLOYEE_HOURS requires N employees, then employees_in_shift >= ceil(N)
370
+ solver.Add(employees_in_shift * Hmax_s[s] >= EMPLOYEE_HOURS[e, s, t])
371
+
372
+ # Cost: pay each employee for full shift
373
+ cost_terms.append(cost[e][s] * Hmax_s[s] * employees_in_shift)
374
+
375
+ # Note: No idle employee costs - only pay for employees actually working
376
 
377
  total_cost = solver.Sum(cost_terms)
378
 
379
+ # Objective: minimize total labor cost (wages)
380
+ # This finds the optimal production schedule (product order, line assignment, timing)
381
+ # that minimizes total wages while meeting all demand and capacity constraints
382
  solver.Minimize(total_cost)
383
 
384
  # --- Constraints ---
385
 
386
  # 1) Weekly demand - must meet exactly (no over/under production)
387
  for p in sorted_product_list:
388
+ total_production = solver.Sum(Units[p, ell, s, t] for ell in line_tuples for s in active_shift_list for t in date_span_list)
389
  demand = DEMAND_DICTIONARY.get(p, 0)
390
 
391
  # Must produce at least the demand
 
398
  for ell in line_tuples:
399
  for s in active_shift_list:
400
  for t in date_span_list:
401
+ solver.Add(solver.Sum(Assignment[p, ell, s, t] for p in sorted_product_list) <= 1)
402
  for p in sorted_product_list:
403
+ solver.Add(Hours[p, ell, s, t] <= Hmax_s[s] * Assignment[p, ell, s, t])
404
 
405
  # 3) Product-line type compatibility + (optional) activity by day
406
  for p in sorted_product_list:
 
411
  for s in active_shift_list:
412
  for t in date_span_list:
413
  if ACTIVE[t][p] == 0 or not allowed:
414
+ solver.Add(Assignment[p, ell, s, t] == 0)
415
+ solver.Add(Hours[p, ell, s, t] == 0)
416
+ solver.Add(Units[p, ell, s, t] == 0)
417
 
418
+ # 4) Line throughput: Units ≀ product_speed * Hours
419
  for p in sorted_product_list:
420
  for ell in line_tuples:
421
  for s in active_shift_list:
 
426
  speed = PER_PRODUCT_SPEED[p]
427
  # Upper bound: units cannot exceed capacity
428
  solver.Add(
429
+ Units[p, ell, s, t] <= speed * Hours[p, ell, s, t]
430
  )
431
  # Lower bound: if working, must produce (prevent phantom work)
432
  solver.Add(
433
+ Units[p, ell, s, t] >= speed * Hours[p, ell, s, t]
434
  )
435
  else:
436
  # Default speed if not found
 
438
  print(f"Warning: No speed data for product {p}, using default {default_speed:.1f} per hour")
439
  # Upper bound: units cannot exceed capacity
440
  solver.Add(
441
+ Units[p, ell, s, t] <= default_speed * Hours[p, ell, s, t]
442
  )
443
  # Lower bound: if working, must produce (prevent phantom work)
444
  solver.Add(
445
+ Units[p, ell, s, t] >= default_speed * Hours[p, ell, s, t]
446
  )
447
 
448
+ # Working hours constraint: active employees cannot exceed shift hour capacity
449
  for e in employee_type_list:
450
  for s in active_shift_list:
451
  for t in date_span_list:
452
+ # No idle employee constraints - employees are only counted when working
 
 
 
 
453
  solver.Add(
454
+ solver.Sum(TEAM_REQ_PER_PRODUCT[e][p] * Hours[p, ell, s, t] for p in sorted_product_list for ell in line_tuples)
455
  <= Hmax_s[s] * max_employee_type_day[e][t]
456
  )
457
 
458
+ # 6) Per-shift staffing capacity by type: link employee count to actual work hours
459
+ # This constraint ensures EMPLOYEE_COUNT[e,s,t] represents the actual number of employees needed in each shift
460
  for e in employee_type_list:
461
+ for s in active_shift_list:
462
+ for t in date_span_list:
463
+ # Total person-hours worked by employee type e in shift s on day t
464
+ total_person_hours_in_shift = solver.Sum(
465
+ TEAM_REQ_PER_PRODUCT[e][p] * Hours[p, ell, s, t]
466
+ for p in sorted_product_list
467
+ for ell in line_tuples
468
+ )
469
+
470
+ # Employee count must be sufficient to cover the work in this shift
471
+ # If employees work H person-hours total and each can work max M hours/shift,
472
+ # then we need at least ceil(H/M) employees
473
+ # Constraint: employee_count Γ— max_hours_per_shift >= total_person_hours_in_shift
474
+ solver.Add(EMPLOYEE_COUNT[e, s, t] * Hmax_s[s] >= total_person_hours_in_shift)
475
 
476
  # 7) Shift ordering constraints (only apply if shifts are available)
477
  # Evening shift after regular shift
 
479
  for e in employee_type_list:
480
  for t in date_span_list:
481
  solver.Add(
482
+ solver.Sum(TEAM_REQ_PER_PRODUCT[e][p] * Hours[p, ell, ShiftType.EVENING, t] for p in sorted_product_list for ell in line_tuples)
483
  <=
484
+ solver.Sum(TEAM_REQ_PER_PRODUCT[e][p] * Hours[p, ell, ShiftType.REGULAR, t] for p in sorted_product_list for ell in line_tuples)
485
  )
486
 
487
  # Overtime should only be used when regular shift is at capacity
 
495
 
496
  # Total regular shift usage for this employee type and day
497
  regular_usage = solver.Sum(
498
+ TEAM_REQ_PER_PRODUCT[e][p] * Hours[p, ell, ShiftType.REGULAR, t]
499
  for p in sorted_product_list for ell in line_tuples
500
  )
501
 
502
  # Total overtime usage for this employee type and day
503
  overtime_usage = solver.Sum(
504
+ TEAM_REQ_PER_PRODUCT[e][p] * Hours[p, ell, ShiftType.OVERTIME, t]
505
  for p in sorted_product_list for ell in line_tuples
506
  )
507
 
 
524
  # 7.5) Bulk payment linking constraints are now handled inline in the cost calculation
525
 
526
  # 7.6) *** FIXED MINIMUM UNICEF EMPLOYEES CONSTRAINT ***
527
+ # Ensure minimum UNICEF fixed-term staff work in the REGULAR shift every day
528
+ # The minimum applies to the regular shift specifically (not overtime or evening)
529
  if 'UNICEF Fixed term' in employee_type_list and FIXED_MIN_UNICEF_PER_DAY > 0:
530
+ if ShiftType.REGULAR in active_shift_list:
531
+ print(f"\n[FIXED STAFFING] Adding constraint for minimum {FIXED_MIN_UNICEF_PER_DAY} UNICEF employees in REGULAR shift per day...")
532
+ for t in date_span_list:
533
+ # At least FIXED_MIN_UNICEF_PER_DAY employees must work in the regular shift each day
534
+ solver.Add(
535
+ EMPLOYEE_COUNT['UNICEF Fixed term', ShiftType.REGULAR, t] >= FIXED_MIN_UNICEF_PER_DAY
536
+ )
537
+ print(f"[FIXED STAFFING] Added {len(date_span_list)} constraints ensuring >= {FIXED_MIN_UNICEF_PER_DAY} UNICEF employees in regular shift per day")
538
+ else:
539
+ print(f"\n[FIXED STAFFING] Warning: Regular shift not available, cannot enforce minimum UNICEF staffing")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
540
 
541
  # 8) *** HIERARCHY DEPENDENCY CONSTRAINTS ***
542
  # For subkits with prepack dependencies: dependencies should be produced before or same time
 
553
  if dep in sorted_product_list: # Only if dependency is also in production list
554
  # Calculate "completion time" for each product (sum of all production times)
555
  p_completion = solver.Sum(
556
+ t * Hours[p, ell, s, t] for ell in line_tuples for s in active_shift_list for t in date_span_list
557
  )
558
  dep_completion = solver.Sum(
559
+ t * Hours[dep, ell, s, t] for ell in line_tuples for s in active_shift_list for t in date_span_list
560
  )
561
 
562
  # Dependency should complete before or at the same time
 
582
  result['objective'] = solver.Objective().Value()
583
 
584
  # Weekly production
585
+ prod_week = {p: sum(Units[p, ell, s, t].solution_value() for ell in line_tuples for s in active_shift_list for t in date_span_list) for p in sorted_product_list}
586
  result['weekly_production'] = prod_week
587
 
588
  # Which product ran on which line/shift/day
 
590
  for t in date_span_list:
591
  for ell in line_tuples:
592
  for s in active_shift_list:
593
+ chosen = [p for p in sorted_product_list if Assignment[p, ell, s, t].solution_value() > 0.5]
594
  if chosen:
595
  p = chosen[0]
596
  schedule.append({
 
599
  'line_idx': ell[1],
600
  'shift': s,
601
  'product': p,
602
+ 'run_hours': Hours[p, ell, s, t].solution_value(),
603
+ 'units': Units[p, ell, s, t].solution_value(),
604
  })
605
  result['run_schedule'] = schedule
606
 
 
609
  for e in employee_type_list:
610
  for s in active_shift_list:
611
  for t in date_span_list:
612
+ used_ph = sum(TEAM_REQ_PER_PRODUCT[e][p] * Hours[p, ell, s, t].solution_value() for p in sorted_product_list for ell in line_tuples)
613
  need = ceil(used_ph / (Hmax_s[s] + 1e-9))
614
  headcount.append({'emp_type': e, 'shift': s, 'day': t,
615
  'needed': need, 'available': max_employee_type_day[e][t]})
 
619
  ph_by_day = []
620
  for e in employee_type_list:
621
  for t in date_span_list:
622
+ used = sum(TEAM_REQ_PER_PRODUCT[e][p] * Hours[p, ell, s, t].solution_value() for s in active_shift_list for p in sorted_product_list for ell in line_tuples)
623
  ph_by_day.append({'emp_type': e, 'day': t,
624
  'used_person_hours': used,
625
  'cap_person_hours': Hmax_daily * max_employee_type_day[e][t]})
626
  result['person_hours_by_day'] = ph_by_day
627
 
628
+ # Actual employee count per type/shift/day (from EMPLOYEE_COUNT variable)
629
+ employee_count_by_shift = []
630
  for e in employee_type_list:
631
  for s in active_shift_list:
632
  for t in date_span_list:
633
+ count = int(EMPLOYEE_COUNT[e, s, t].solution_value())
634
+ used_hours = sum(TEAM_REQ_PER_PRODUCT[e][p] * Hours[p, ell, s, t].solution_value()
635
+ for p in sorted_product_list for ell in line_tuples)
636
+ avg_hours_per_employee = used_hours / count if count > 0 else 0
637
+ if count > 0: # Only add entries where employees are working
638
+ employee_count_by_shift.append({
639
+ 'emp_type': e,
640
  'shift': s,
641
  'day': t,
642
+ 'employee_count': count,
643
+ 'total_person_hours': used_hours,
644
+ 'avg_hours_per_employee': avg_hours_per_employee,
645
+ 'available': max_employee_type_day[e][t]
646
  })
647
+ result['employee_count_by_shift'] = employee_count_by_shift
648
+
649
+ # Also calculate daily totals (summing across shifts)
650
+ employee_count_by_day = []
651
+ for e in employee_type_list:
652
+ for t in date_span_list:
653
+ # Sum employees across all shifts for this day
654
+ total_count = sum(int(EMPLOYEE_COUNT[e, s, t].solution_value()) for s in active_shift_list)
655
+ used_hours = sum(TEAM_REQ_PER_PRODUCT[e][p] * Hours[p, ell, s, t].solution_value()
656
+ for s in active_shift_list for p in sorted_product_list for ell in line_tuples)
657
+ avg_hours_per_employee = used_hours / total_count if total_count > 0 else 0
658
+ if total_count > 0: # Only add days where employees are working
659
+ employee_count_by_day.append({
660
+ 'emp_type': e,
661
+ 'day': t,
662
+ 'employee_count': total_count,
663
+ 'total_person_hours': used_hours,
664
+ 'avg_hours_per_employee': avg_hours_per_employee,
665
+ 'available': max_employee_type_day[e][t]
666
+ })
667
+ result['employee_count_by_day'] = employee_count_by_day
668
+
669
+ # Note: Idle employee tracking removed - only counting employees actually working
670
 
671
  # Pretty print
672
  print("Objective (min cost):", result['objective'])
 
679
  shift_name = ShiftType.get_name(row['shift'])
680
  line_name = LineType.get_name(row['line_type_id'])
681
  print(f"date_span_list{row['day']} {line_name}-{row['line_idx']} {shift_name}: "
682
+ f"{row['product']} Hours={row['run_hours']:.2f}h Units={row['units']:.1f}")
683
 
684
  print("\n--- Implied headcount need (per type/shift/day) ---")
685
  for row in headcount:
 
692
  print(f"{row['emp_type']}, date_span_list{row['day']}: used={row['used_person_hours']:.1f} "
693
  f"(cap {row['cap_person_hours']})")
694
 
695
+ print("\n--- Actual employee count by type/shift/day ---")
696
+ for row in employee_count_by_shift:
697
+ shift_name = ShiftType.get_name(row['shift'])
698
+ print(f"{row['emp_type']}, {shift_name}, date_span_list{row['day']}: "
699
+ f"count={row['employee_count']} employees, "
700
+ f"total_hours={row['total_person_hours']:.1f}h, "
701
+ f"avg={row['avg_hours_per_employee']:.1f}h/employee")
702
+
703
+ print("\n--- Daily employee totals by type/day (sum across shifts) ---")
704
+ for row in employee_count_by_day:
705
+ print(f"{row['emp_type']}, date_span_list{row['day']}: "
706
+ f"count={row['employee_count']} employees total, "
707
+ f"total_hours={row['total_person_hours']:.1f}h, "
708
+ f"avg={row['avg_hours_per_employee']:.1f}h/employee "
709
+ f"(available: {row['available']})")
710
+
711
+ # Note: Idle employee reporting removed - only tracking employees actually working
712
 
713
  return result
714
 
src/{utils β†’ preprocess}/excel_to_csv_converter.py RENAMED
File without changes
src/{utils β†’ preprocess}/kit_composition_cleaner.py RENAMED
File without changes