Spaces:

OOI-FrontierTech
/

supply-roster-optimization

Sleeping

haileyhalimj@gmail.com commited on Oct 1, 2025

Commit

acd1110

1 Parent(s): c9c4af4

Restore improved optimizer and simplify demand validation

Major improvements:
- Remove IDLE employee tracking from optimizer_real.py
- Improve variable naming: Z/T/U → Assignment/Hours/Units
- Add new employee tracking system: EMPLOYEE_COUNT and EMPLOYEE_HOURS
- Simplify demand_validation_viz.py (371→270 lines)
- Remove idle employee display from optimization_results.py

Code reorganization:
- Rename src/utils to src/preprocess for better organization

Testing: ✅ Optimization runs successfully with 3 products, €419.10 total cost

Files changed (5) hide show

optimization_results.py +1 -38
src/demand_validation_viz.py +126 -227
src/models/optimizer_real.py +196 -144
src/{utils → preprocess}/excel_to_csv_converter.py +0 -0
src/{utils → preprocess}/kit_composition_cleaner.py +0 -0

optimization_results.py CHANGED Viewed

@@ -104,8 +104,6 @@ def display_weekly_summary(results):
         # Calculate cost per unit
         cost_per_unit = total_cost / total_production if total_production > 0 else 0
         st.metric("Cost per Unit", f"€{cost_per_unit:.2f}")
-    # Remove col5 - no idle employees metrics needed
     # Production vs Demand Chart
     st.subheader("🎯 Production vs Demand")
@@ -603,42 +601,7 @@ def display_cost_analysis(results):
                     'Cost': round(cost, 2)
                 })
-    # Add idle employee costs to the breakdown
-    if 'idle_employees' in results:
-        # COST_LIST_PER_EMP_SHIFT already loaded above as dynamic call
-        for idle in results['idle_employees']:
-            if idle['idle_count'] > 0:
-                emp_type = idle['emp_type']
-                shift = idle['shift']
-                day = idle['day']
-                idle_count = idle['idle_count']
-                # Get hourly rate and shift name
-                hourly_rate = COST_LIST_PER_EMP_SHIFT.get(emp_type, {}).get(shift, 0)
-                shift_name = shift_names.get(shift, f"Shift {shift}")
-                # Idle employees work 0 hours but get paid for full shift
-                actual_hours = 0
-                paid_hours = 7.5  # Assuming standard shift length
-                idle_cost = idle_count * paid_hours * hourly_rate
-                if emp_type not in total_cost_by_type:
-                    total_cost_by_type[emp_type] = 0
-                total_cost_by_type[emp_type] += idle_cost
-                cost_data.append({
-                    'Employee Type': emp_type,
-                    'Day': f"Day {day}",
-                    'Shift': f"{shift_name} (Idle)",
-                    'Line': '-',  # No line assignment for idle
-                    'Product': '-',  # No product for idle
-                    'Actual Hours': actual_hours,
-                    'Paid Hours': round(paid_hours, 2),
-                    'Workers': int(idle_count),
-                    'Hourly Rate': f"€{hourly_rate:.2f}",
-                    'Cost': round(idle_cost, 2)
-                })
     # Total cost metrics
     total_cost = results['objective']

         # Calculate cost per unit
         cost_per_unit = total_cost / total_production if total_production > 0 else 0
         st.metric("Cost per Unit", f"€{cost_per_unit:.2f}")
     # Production vs Demand Chart
     st.subheader("🎯 Production vs Demand")
                     'Cost': round(cost, 2)
                 })
+    # Note: Idle employee tracking removed - we only track employees actually working on production
     # Total cost metrics
     total_cost = results['objective']

src/demand_validation_viz.py CHANGED Viewed

@@ -2,23 +2,33 @@
 """
 Demand Data Validation Visualization Module
-This module provides Streamlit visualization for demand data validation,
-showing which products are included/excluded from optimization and why.
-Uses the demand_filtering module for the actual filtering logic.
 """
 import pandas as pd
 import streamlit as st
-from typing import Dict, List, Tuple, Optional
-import json
-from src.config.constants import ShiftType, LineType, KitLevel
 from src.demand_filtering import DemandFilter
 class DemandValidationViz:
     """
-    Provides visualization for demand data validation using the filtering module.
-    This class focuses purely on displaying the results from demand filtering.
     """
     def __init__(self):
@@ -26,90 +36,61 @@ class DemandValidationViz:
         self.speed_data = None
     def load_data(self):
-        """Load data needed for visualization"""
         try:
-            # Load speed data for visualization
             from src.config import optimization_config
             self.speed_data = optimization_config.get_per_product_speed()
-            # Load data in the filter instance
             return self.filter_instance.load_data()
         except Exception as e:
             error_msg = f"Error loading data: {str(e)}"
             print(error_msg)
-            try:
                 st.error(error_msg)
-            except:
-                pass
             return False
-    # Remove duplicate methods - use filter_instance data directly
-    def get_production_speed(self, product_id: str) -> Optional[float]:
-        """Get production speed for product"""
-        return self.speed_data.get(product_id, None)
     def validate_all_products(self) -> pd.DataFrame:
         """
-        Create visualization dataframe using complete analysis from filtering module
-        Returns: DataFrame with validation results for display
         """
-        # Get complete analysis from filtering module
         analysis = self.filter_instance.get_complete_product_analysis()
         product_details = analysis['product_details']
         results = []
         for product_id, details in product_details.items():
-            # Get additional data for visualization
-            speed = self.get_production_speed(product_id)
-            production_hours_needed = None
-            if speed and speed > 0:
-                production_hours_needed = details['demand'] / speed
             # Get line type name
             line_type_id = details['line_assignment']
-            line_name = "no_assignment"
-            if line_type_id is not None:
-                from src.config.constants import LineType
-                line_name = LineType.get_name(line_type_id)
-            # Get level name
-            level_name = "no_hierarchy_data"
-            if details['product_type'] != "unclassified":
-                if details['product_type'] == "prepack":
-                    level_name = "prepack"
-                elif details['product_type'] == "subkit":
-                    level_name = "subkit"
-                elif details['product_type'] == "master":
-                    if details['is_standalone_master']:
-                        level_name = "standalone_master"
-                    else:
-                        level_name = "master_with_hierarchy"
-                else:
-                    level_name = f"level_{details['product_type']}"
-            # Overall status with enhanced detail
             if not details['is_included_in_optimization']:
                 validation_status = f"🚫 Excluded: {', '.join(details['exclusion_reasons'])}"
             else:
-                # Check for data quality issues that don't exclude from optimization
-                data_quality_issues = []
                 if speed is None:
-                    data_quality_issues.append("missing_speed_data (will use default)")
                 if not details['has_hierarchy']:
-                    data_quality_issues.append("no_hierarchy_data")
-                if data_quality_issues:
-                    validation_status = f"⚠️ Data Issues: {', '.join(data_quality_issues)}"
-                else:
-                    validation_status = "✅ Ready for optimization"
             results.append({
                 'Product ID': product_id,
                 'Demand': details['demand'],
-                'Product Type': details['product_type'].title(),
                 'Level': level_name,
                 'Is Standalone Master': "Yes" if details['is_standalone_master'] else "No",
                 'Line Type ID': line_type_id if line_type_id else "N/A",
@@ -118,45 +99,26 @@ class DemandValidationViz:
                 'Humanizer Staff': details['humanizer_staff'],
                 'Total Staff': details['total_staff'],
                 'Production Speed (units/hour)': f"{speed:.1f}" if speed else "N/A",
-                'Production Hours Needed': f"{production_hours_needed:.1f}" if production_hours_needed else "N/A",
                 'Has Line Assignment': "✅" if details['has_line_assignment'] else "❌",
                 'Has Staffing Data': "✅" if details['has_staffing'] else "❌",
                 'Has Speed Data': "✅" if speed is not None else "❌ (will use default)",
                 'Has Hierarchy Data': "✅" if details['has_hierarchy'] else "❌",
                 'Excluded from Optimization': not details['is_included_in_optimization'],
                 'Exclusion Reasons': ', '.join(details['exclusion_reasons']) if details['exclusion_reasons'] else '',
-                'Data Quality Issues': ', '.join(data_quality_issues) if details['is_included_in_optimization'] and data_quality_issues else '',
                 'Validation Status': validation_status
             })
         df = pd.DataFrame(results)
-        # Sort by exclusion status first, then by demand
         df = df.sort_values(['Excluded from Optimization', 'Demand'], ascending=[False, False])
         return df
     def get_summary_statistics(self, df: pd.DataFrame) -> Dict:
-        """Generate summary statistics using filtering module analysis"""
-        # Get analysis from filtering module
         analysis = self.filter_instance.get_complete_product_analysis()
-        # Calculate issues for included products only
         included_df = df[df['Excluded from Optimization'] == False]
-        no_line_assignment = len(included_df[included_df['Has Line Assignment'] == "❌"])
-        no_staffing = len(included_df[included_df['Has Staffing Data'] == "❌"])
-        no_speed = len(included_df[included_df['Has Speed Data'] == "❌"])
-        no_hierarchy = len(included_df[included_df['Has Hierarchy Data'] == "❌"])
-        # Product type and line type distributions
-        type_counts = df['Product Type'].value_counts().to_dict()
-        # Staffing summary from analysis
-        total_unicef_needed = sum(p['unicef_staff'] for p in analysis['product_details'].values())
-        total_humanizer_needed = sum(p['humanizer_staff'] for p in analysis['product_details'].values())
         return {
             'total_products': analysis['total_products'],
             'total_demand': analysis['total_demand'],
@@ -164,205 +126,142 @@ class DemandValidationViz:
             'excluded_products': analysis['excluded_count'],
             'included_demand': analysis['included_demand'],
             'excluded_demand': analysis['excluded_demand'],
-            'type_counts': type_counts,
-            'no_line_assignment': no_line_assignment,
-            'no_staffing': no_staffing,
-            'no_speed': no_speed,
-            'no_hierarchy': no_hierarchy,
             'standalone_masters': analysis['standalone_masters_count'],
-            'total_unicef_needed': total_unicef_needed,
-            'total_humanizer_needed': total_humanizer_needed
         }
 def display_demand_validation():
     """
     Display demand validation analysis in Streamlit.
-    Shows the results of demand filtering with detailed data quality analysis.
     """
     st.header("📋 Demand Data Validation")
-    st.markdown("""**Analysis of Filtered Demand Data**: This page shows which products are included/excluded from optimization
-    based on the demand filtering criteria, plus data quality assessment for included products.""")
-    # Initialize validator
     validator = DemandValidationViz()
-    # Load data
-    with st.spinner("Loading data for validation..."):
         if not validator.load_data():
             st.error("Failed to load data for validation.")
             return
-    # Perform validation
-    with st.spinner("Analyzing demand data..."):
         validation_df = validator.validate_all_products()
-        summary_stats = validator.get_summary_statistics(validation_df)
-    # Display summary statistics
     st.subheader("📊 Summary Statistics")
     col1, col2, col3, col4 = st.columns(4)
-    with col1:
-        st.metric("Total Products", summary_stats['total_products'])
-        st.metric("Included in Optimization", summary_stats['included_products'], delta="Ready for optimization")
-    with col2:
-        st.metric("Total Demand", f"{summary_stats['total_demand']:,}")
-        st.metric("Excluded from Optimization", summary_stats['excluded_products'], delta="Omitted")
-    with col3:
-        st.metric("Included Demand", f"{summary_stats['included_demand']:,}", delta="Will be optimized")
-        st.metric("UNICEF Staff Needed", summary_stats['total_unicef_needed'])
-    with col4:
-        st.metric("Excluded Demand", f"{summary_stats['excluded_demand']:,}", delta="Omitted")
-        st.metric("Humanizer Staff Needed", summary_stats['total_humanizer_needed'])
-    # Product type distribution
     st.subheader("📈 Product Type Distribution")
-    if summary_stats['type_counts']:
         col1, col2 = st.columns(2)
         with col1:
-            type_df = pd.DataFrame(list(summary_stats['type_counts'].items()),
-                                 columns=['Product Type', 'Count'])
             st.bar_chart(type_df.set_index('Product Type'))
         with col2:
-            for ptype, count in summary_stats['type_counts'].items():
-                percentage = (count / summary_stats['total_products']) * 100
                 st.write(f"**{ptype}:** {count} products ({percentage:.1f}%)")
-    # Validation issues summary for included products
-    st.subheader("⚠️ Data Quality Issues (Products Included in Optimization)")
-    st.write("Data quality issues affecting products that **will be** included in optimization (these don't exclude products but may affect optimization quality):")
     col1, col2, col3, col4 = st.columns(4)
-    with col1:
-        st.metric("No Line Assignment", summary_stats['no_line_assignment'],
-                 delta=None if summary_stats['no_line_assignment'] == 0 else "Issue")
-    with col2:
-        st.metric("No Staffing Data", summary_stats['no_staffing'],
-                 delta=None if summary_stats['no_staffing'] == 0 else "Issue")
-        with col3:
-            st.metric("No Speed Data", summary_stats['no_speed'],
-                     delta=None if summary_stats['no_speed'] == 0 else "Will use default")
-    with col4:
-        st.metric("No Hierarchy Data", summary_stats['no_hierarchy'],
-                 delta=None if summary_stats['no_hierarchy'] == 0 else "Issue")
-    # Separate the results into included and excluded
     included_df = validation_df[validation_df['Excluded from Optimization'] == False].copy()
     excluded_df = validation_df[validation_df['Excluded from Optimization'] == True].copy()
-    # Products Included in Optimization
     st.subheader("✅ Products Included in Optimization")
-    st.write(f"**{len(included_df)} products** will be included in the optimization with total demand of **{included_df['Demand'].sum():,} units**")
     if len(included_df) > 0:
-        # Filter options for included products
         col1, col2 = st.columns(2)
-        with col1:
-            included_type_filter = st.selectbox("Filter included by type",
-                                              options=["All"] + list(included_df['Product Type'].unique()),
-                                              key="included_filter")
-        with col2:
-            included_min_demand = st.number_input("Minimum demand (included)", min_value=0, value=0, key="included_demand")
-        # Apply filters to included
-        filtered_included = included_df.copy()
-        if included_type_filter != "All":
-            filtered_included = filtered_included[filtered_included['Product Type'] == included_type_filter]
-        if included_min_demand > 0:
-            filtered_included = filtered_included[filtered_included['Demand'] >= included_min_demand]
-        # Configure column display for included
-        included_columns = ['Product ID', 'Demand', 'Product Type', 'Line Type', 'UNICEF Staff', 'Humanizer Staff', 'Production Speed (units/hour)', 'Data Quality Issues', 'Validation Status']
-        st.dataframe(
-            filtered_included[included_columns],
-            use_container_width=True,
-            height=300
-        )
     else:
         st.warning("No products are included in optimization!")
-    # Products Excluded from Optimization
     st.subheader("🚫 Products Excluded from Optimization")
-    st.write(f"**{len(excluded_df)} products** are excluded from optimization with total demand of **{excluded_df['Demand'].sum():,} units**")
-    st.info("""These products are **filtered out** from optimization due to:
-    • Missing line assignments (for non-standalone masters)
-    • Zero staffing requirements (both UNICEF and Humanizer staff = 0)
-    • Non-standalone masters (excluded from production planning)""")
     if len(excluded_df) > 0:
         # Show exclusion breakdown
-        exclusion_reasons = excluded_df['Exclusion Reasons'].value_counts()
         st.write("**Exclusion reasons:**")
-        for reason, count in exclusion_reasons.items():
             st.write(f"• {reason}: {count} products")
-        # Configure column display for excluded
-        excluded_columns = ['Product ID', 'Demand', 'Product Type', 'Exclusion Reasons', 'UNICEF Staff', 'Humanizer Staff', 'Line Type']
-        st.dataframe(
-            excluded_df[excluded_columns],
-            use_container_width=True,
-            height=200
-        )
-        # Export option
         if st.button("📥 Export Validation Results to CSV"):
-            csv = validation_df.to_csv(index=False)
-            st.download_button(
-                label="Download CSV",
-                data=csv,
-                file_name="demand_validation_results.csv",
-                mime="text/csv"
-            )
-    else:
-        st.info("No products match the selected filters.")
-    # Recommendations
     st.subheader("💡 Recommendations")
-    recommendations = []
-    # Focus on exclusion criteria first
-    if summary_stats['excluded_products'] > 0:
-        st.warning(f"**Optimization Scope**: {summary_stats['excluded_products']} products ({summary_stats['excluded_demand']:,} units demand) are excluded from optimization.")
-    # Data quality issues for INCLUDED products only
-    if summary_stats['no_line_assignment'] > 0:
-        recommendations.append(f"**Line Assignment**: {summary_stats['no_line_assignment']} products included in optimization are missing line assignments.")
-    if summary_stats['no_staffing'] > 0:
-        recommendations.append(f"**Staffing Data**: {summary_stats['no_staffing']} products included in optimization are missing staffing requirements.")
-    if summary_stats['no_speed'] > 0:
-        recommendations.append(f"**Speed Data**: {summary_stats['no_speed']} products included in optimization are missing production speed data. The optimization will use a default speed of 106.7 units/hour for these products.")
-    if summary_stats['no_hierarchy'] > 0:
-        recommendations.append(f"**Hierarchy Data**: {summary_stats['no_hierarchy']} products included in optimization are not in the kit hierarchy.")
-    if recommendations:
-        for rec in recommendations:
-            st.info(rec)
     # Overall status
-    if summary_stats['included_products'] > 0:
-        st.success(f"✅ **Optimization Scope**: {summary_stats['included_products']} products with {summary_stats['included_demand']:,} units demand will be included in optimization!")
-        if summary_stats['no_speed'] == 0 and summary_stats['no_hierarchy'] == 0:
-            st.info("🎉 All included products have complete data - optimization should run smoothly!")
     else:
-        st.error("❌ No products passed the filtering criteria. Please review the exclusion reasons above and check your data configuration.")
 if __name__ == "__main__":

 """
 Demand Data Validation Visualization Module
+Provides Streamlit visualization for demand data validation.
+Shows which products are included/excluded from optimization and why.
 """
 import pandas as pd
 import streamlit as st
+from typing import Dict
+from src.config.constants import LineType
 from src.demand_filtering import DemandFilter
+# Simple mapping for product level names
+LEVEL_NAMES = {
+    'prepack': 'prepack',
+    'subkit': 'subkit',
+    'master': {
+        'standalone': 'standalone_master',
+        'with_hierarchy': 'master_with_hierarchy'
+    },
+    'unclassified': 'no_hierarchy_data'
+}
 class DemandValidationViz:
     """
+    Simple visualization wrapper for demand filtering results.
+    All filtering logic is in DemandFilter - this just displays the results.
     """
     def __init__(self):
         self.speed_data = None
     def load_data(self):
+        """Load all data needed for visualization"""
         try:
             from src.config import optimization_config
             self.speed_data = optimization_config.get_per_product_speed()
             return self.filter_instance.load_data()
         except Exception as e:
             error_msg = f"Error loading data: {str(e)}"
             print(error_msg)
+            if st:
                 st.error(error_msg)
             return False
     def validate_all_products(self) -> pd.DataFrame:
         """
+        Create DataFrame with validation results for all products.
+        Main visualization method - converts filtering results to displayable format.
         """
+        # Get analysis from filtering module
         analysis = self.filter_instance.get_complete_product_analysis()
         product_details = analysis['product_details']
         results = []
         for product_id, details in product_details.items():
+            # Calculate production hours if speed data available
+            speed = self.speed_data.get(product_id) if self.speed_data else None
+            production_hours = (details['demand'] / speed) if speed and speed > 0 else None
             # Get line type name
             line_type_id = details['line_assignment']
+            line_name = LineType.get_name(line_type_id) if line_type_id is not None else "no_assignment"
+            # Get level name (simplified)
+            ptype = details['product_type']
+            if ptype == 'unclassified':
+                level_name = LEVEL_NAMES['unclassified']
+            elif ptype == 'master':
+                level_name = LEVEL_NAMES['master']['standalone' if details['is_standalone_master'] else 'with_hierarchy']
+            else:
+                level_name = LEVEL_NAMES.get(ptype, f"level_{ptype}")
+            # Build validation status message
             if not details['is_included_in_optimization']:
                 validation_status = f"🚫 Excluded: {', '.join(details['exclusion_reasons'])}"
             else:
+                issues = []
                 if speed is None:
+                    issues.append("missing_speed_data (will use default)")
                 if not details['has_hierarchy']:
+                    issues.append("no_hierarchy_data")
+                validation_status = f"⚠️ Data Issues: {', '.join(issues)}" if issues else "✅ Ready for optimization"
             results.append({
                 'Product ID': product_id,
                 'Demand': details['demand'],
+                'Product Type': ptype.title(),
                 'Level': level_name,
                 'Is Standalone Master': "Yes" if details['is_standalone_master'] else "No",
                 'Line Type ID': line_type_id if line_type_id else "N/A",
                 'Humanizer Staff': details['humanizer_staff'],
                 'Total Staff': details['total_staff'],
                 'Production Speed (units/hour)': f"{speed:.1f}" if speed else "N/A",
+                'Production Hours Needed': f"{production_hours:.1f}" if production_hours else "N/A",
                 'Has Line Assignment': "✅" if details['has_line_assignment'] else "❌",
                 'Has Staffing Data': "✅" if details['has_staffing'] else "❌",
                 'Has Speed Data': "✅" if speed is not None else "❌ (will use default)",
                 'Has Hierarchy Data': "✅" if details['has_hierarchy'] else "❌",
                 'Excluded from Optimization': not details['is_included_in_optimization'],
                 'Exclusion Reasons': ', '.join(details['exclusion_reasons']) if details['exclusion_reasons'] else '',
+                'Data Quality Issues': ', '.join(issues) if details['is_included_in_optimization'] and 'issues' in locals() and issues else '',
                 'Validation Status': validation_status
             })
         df = pd.DataFrame(results)
         df = df.sort_values(['Excluded from Optimization', 'Demand'], ascending=[False, False])
         return df
     def get_summary_statistics(self, df: pd.DataFrame) -> Dict:
+        """Calculate summary statistics from validation results"""
         analysis = self.filter_instance.get_complete_product_analysis()
         included_df = df[df['Excluded from Optimization'] == False]
         return {
             'total_products': analysis['total_products'],
             'total_demand': analysis['total_demand'],
             'excluded_products': analysis['excluded_count'],
             'included_demand': analysis['included_demand'],
             'excluded_demand': analysis['excluded_demand'],
+            'type_counts': df['Product Type'].value_counts().to_dict(),
+            'no_line_assignment': len(included_df[included_df['Has Line Assignment'] == "❌"]),
+            'no_staffing': len(included_df[included_df['Has Staffing Data'] == "❌"]),
+            'no_speed': len(included_df[included_df['Has Speed Data'].str.contains("❌")]),
+            'no_hierarchy': len(included_df[included_df['Has Hierarchy Data'] == "❌"]),
             'standalone_masters': analysis['standalone_masters_count'],
+            'total_unicef_needed': sum(p['unicef_staff'] for p in analysis['product_details'].values()),
+            'total_humanizer_needed': sum(p['humanizer_staff'] for p in analysis['product_details'].values())
         }
 def display_demand_validation():
     """
     Display demand validation analysis in Streamlit.
+    Main entry point for the validation page.
     """
     st.header("📋 Demand Data Validation")
+    st.markdown("Analysis showing which products are included/excluded from optimization and data quality status.")
+    # Load and analyze data
     validator = DemandValidationViz()
+    with st.spinner("Loading and analyzing data..."):
         if not validator.load_data():
             st.error("Failed to load data for validation.")
             return
         validation_df = validator.validate_all_products()
+        stats = validator.get_summary_statistics(validation_df)
+    # ===== SUMMARY METRICS =====
     st.subheader("📊 Summary Statistics")
     col1, col2, col3, col4 = st.columns(4)
+    col1.metric("Total Products", stats['total_products'])
+    col1.metric("Included in Optimization", stats['included_products'], delta="Ready")
+    col2.metric("Total Demand", f"{stats['total_demand']:,}")
+    col2.metric("Excluded from Optimization", stats['excluded_products'], delta="Omitted")
+    col3.metric("Included Demand", f"{stats['included_demand']:,}", delta="Will be optimized")
+    col3.metric("UNICEF Staff Needed", stats['total_unicef_needed'])
+    col4.metric("Excluded Demand", f"{stats['excluded_demand']:,}", delta="Omitted")
+    col4.metric("Humanizer Staff Needed", stats['total_humanizer_needed'])
+    # ===== PRODUCT TYPE DISTRIBUTION =====
     st.subheader("📈 Product Type Distribution")
+    if stats['type_counts']:
         col1, col2 = st.columns(2)
         with col1:
+            type_df = pd.DataFrame(list(stats['type_counts'].items()), columns=['Product Type', 'Count'])
             st.bar_chart(type_df.set_index('Product Type'))
         with col2:
+            for ptype, count in stats['type_counts'].items():
+                percentage = (count / stats['total_products']) * 100
                 st.write(f"**{ptype}:** {count} products ({percentage:.1f}%)")
+    # ===== DATA QUALITY ISSUES (for included products only) =====
+    st.subheader("⚠️ Data Quality Issues (Included Products)")
+    st.write("Issues affecting products that **will be** included in optimization:")
     col1, col2, col3, col4 = st.columns(4)
+    col1.metric("No Line Assignment", stats['no_line_assignment'],
+                delta=None if stats['no_line_assignment'] == 0 else "Issue")
+    col2.metric("No Staffing Data", stats['no_staffing'],
+                delta=None if stats['no_staffing'] == 0 else "Issue")
+    col3.metric("No Speed Data", stats['no_speed'],
+                delta=None if stats['no_speed'] == 0 else "Will use default")
+    col4.metric("No Hierarchy Data", stats['no_hierarchy'],
+                delta=None if stats['no_hierarchy'] == 0 else "Issue")
+    # ===== INCLUDED PRODUCTS TABLE =====
     included_df = validation_df[validation_df['Excluded from Optimization'] == False].copy()
     excluded_df = validation_df[validation_df['Excluded from Optimization'] == True].copy()
     st.subheader("✅ Products Included in Optimization")
+    st.write(f"**{len(included_df)} products** with total demand of **{included_df['Demand'].sum():,} units**")
     if len(included_df) > 0:
+        # Filters
         col1, col2 = st.columns(2)
+        type_filter = col1.selectbox("Filter by type", ["All"] + list(included_df['Product Type'].unique()), key="inc_filter")
+        min_demand = col2.number_input("Minimum demand", min_value=0, value=0, key="inc_demand")
+        # Apply filters
+        filtered = included_df.copy()
+        if type_filter != "All":
+            filtered = filtered[filtered['Product Type'] == type_filter]
+        if min_demand > 0:
+            filtered = filtered[filtered['Demand'] >= min_demand]
+        # Display table
+        display_cols = ['Product ID', 'Demand', 'Product Type', 'Line Type', 'UNICEF Staff',
+                       'Humanizer Staff', 'Production Speed (units/hour)', 'Data Quality Issues', 'Validation Status']
+        st.dataframe(filtered[display_cols], use_container_width=True, height=300)
     else:
         st.warning("No products are included in optimization!")
+    # ===== EXCLUDED PRODUCTS TABLE =====
     st.subheader("🚫 Products Excluded from Optimization")
+    st.write(f"**{len(excluded_df)} products** with total demand of **{excluded_df['Demand'].sum():,} units**")
+    st.info("Excluded due to: missing line assignments, zero staffing, or non-standalone masters")
     if len(excluded_df) > 0:
         # Show exclusion breakdown
         st.write("**Exclusion reasons:**")
+        for reason, count in excluded_df['Exclusion Reasons'].value_counts().items():
             st.write(f"• {reason}: {count} products")
+        # Display table
+        display_cols = ['Product ID', 'Demand', 'Product Type', 'Exclusion Reasons',
+                       'UNICEF Staff', 'Humanizer Staff', 'Line Type']
+        st.dataframe(excluded_df[display_cols], use_container_width=True, height=200)
+        # Export button
         if st.button("📥 Export Validation Results to CSV"):
+            st.download_button("Download CSV", validation_df.to_csv(index=False),
+                             file_name="demand_validation_results.csv", mime="text/csv")
+    # ===== RECOMMENDATIONS =====
     st.subheader("💡 Recommendations")
+    if stats['excluded_products'] > 0:
+        st.warning(f"**{stats['excluded_products']} products** ({stats['excluded_demand']:,} units) excluded from optimization")
+    # Show data quality issues for included products
+    if stats['no_line_assignment'] > 0:
+        st.info(f"**Line Assignment**: {stats['no_line_assignment']} included products missing line assignments")
+    if stats['no_staffing'] > 0:
+        st.info(f"**Staffing Data**: {stats['no_staffing']} included products missing staffing requirements")
+    if stats['no_speed'] > 0:
+        st.info(f"**Speed Data**: {stats['no_speed']} included products missing speed data (will use default 106.7 units/hour)")
+    if stats['no_hierarchy'] > 0:
+        st.info(f"**Hierarchy Data**: {stats['no_hierarchy']} included products not in kit hierarchy")
     # Overall status
+    if stats['included_products'] > 0:
+        st.success(f"✅ **{stats['included_products']} products** with {stats['included_demand']:,} units demand ready for optimization!")
+        if stats['no_speed'] == 0 and stats['no_hierarchy'] == 0:
+            st.info("🎉 All included products have complete data!")
     else:
+        st.error("❌ No products passed filtering. Review exclusion reasons and check data configuration.")
 if __name__ == "__main__":

src/models/optimizer_real.py CHANGED Viewed

@@ -268,82 +268,124 @@ def run_optimization_for_week():
     INF = solver.infinity()
     # --- Variables ---
-    # Z[p,ell,s,t] ∈ {0,1}: 1 if product p runs on (line,shift,day)
-    Z, T, U = {}, {}, {}  # T: run hours, U: production units
     for p in sorted_product_list:
         for ell in line_tuples:     # ell = (line_type_id, idx)
             for s in active_shift_list:
                 for t in date_span_list:
-                    Z[p, ell, s, t] = solver.BoolVar(f"Z_{p}_{ell[0]}_{ell[1]}_s{s}_d{t}")
-                    T[p, ell, s, t] = solver.NumVar(0, Hmax_s[s], f"T_{p}_{ell[0]}_{ell[1]}_s{s}_d{t}")
-                    U[p, ell, s, t] = solver.NumVar(0, INF,       f"U_{p}_{ell[0]}_{ell[1]}_s{s}_d{t}")
-    # Idle employee variables: IDLE[e,s,t] = number of idle employees of type e in shift s on day t
-    IDLE = {}
     for e in employee_type_list:
         for s in active_shift_list:
             for t in date_span_list:
-                max_idle = max_employee_type_day[e][t]  # Can't have more idle employees than available
-                IDLE[e, s, t] = solver.IntVar(0, max_idle, f"IDLE_{e}_s{s}_d{t}")
     # Note: Binary variables for bulk payment are now created inline in the cost calculation
-    # --- Objective: total labor cost with payment modes + hierarchy timing penalty ---
     PAYMENT_MODE_CONFIG = get_payment_mode_config()  # Dynamic call
     print(f"Payment mode configuration: {PAYMENT_MODE_CONFIG}")
     # Build cost terms based on payment mode
     cost_terms = []
-    for e in employee_type_list:
-        for s in active_shift_list:
-            payment_mode = PAYMENT_MODE_CONFIG.get(s, "partial")  # Default to partial if not specified
-            if payment_mode == "partial":
-                # Partial payment: pay for actual hours worked
-                for p in sorted_product_list:
-                    for ell in line_tuples:
-                        for t in date_span_list:
-                            cost_terms.append(cost[e][s] * TEAM_REQ_PER_PRODUCT[e][p] * T[p, ell, s, t])
-            elif payment_mode == "bulk":
-                # Bulk payment: if employees work ANY hours in a shift, pay them for FULL shift hours
-                # BUT only pay the employees who actually work, not all employees of that type
-                for p in sorted_product_list:
-                    for ell in line_tuples:
-                        for t in date_span_list:
-                            # Calculate actual employees working: TEAM_REQ_PER_PRODUCT[e][p] employees work T[p,ell,s,t] hours
-                            # For bulk payment: if T[p,ell,s,t] > 0, pay TEAM_REQ_PER_PRODUCT[e][p] employees for full shift
-                            # We need a binary variable for each (e,s,p,ell,t) combination
-                            # But we can use the existing logic: if T > 0, then those specific employees get bulk pay
-                            # Create binary variable for this specific work assignment
-                            work_binary = solver.BoolVar(f"work_{e}_s{s}_{p}_{ell[0]}{ell[1]}_d{t}")
-                            # Link work_binary to T[p,ell,s,t]: work_binary = 1 if T > 0
-                            solver.Add(T[p, ell, s, t] <= Hmax_s[s] * work_binary)
-                            solver.Add(work_binary * 0.001 <= T[p, ell, s, t])
-                            # Cost: pay the specific working employees for full shift hours
-                            cost_terms.append(cost[e][s] * Hmax_s[s] * TEAM_REQ_PER_PRODUCT[e][p] * work_binary)
-    # Add idle employee costs (idle employees are paid for full shift hours)
     for e in employee_type_list:
         for s in active_shift_list:
             for t in date_span_list:
-                cost_terms.append(cost[e][s] * Hmax_s[s] * IDLE[e, s, t])
     total_cost = solver.Sum(cost_terms)
-    # Objective: minimize total cost only
-    # Dependency ordering is handled by topological sorting and hard constraints
     solver.Minimize(total_cost)
     # --- Constraints ---
     # 1) Weekly demand - must meet exactly (no over/under production)
     for p in sorted_product_list:
-        total_production = solver.Sum(U[p, ell, s, t] for ell in line_tuples for s in active_shift_list for t in date_span_list)
         demand = DEMAND_DICTIONARY.get(p, 0)
         # Must produce at least the demand
@@ -356,9 +398,9 @@ def run_optimization_for_week():
     for ell in line_tuples:
         for s in active_shift_list:
             for t in date_span_list:
-                solver.Add(solver.Sum(Z[p, ell, s, t] for p in sorted_product_list) <= 1)
                 for p in sorted_product_list:
-                    solver.Add(T[p, ell, s, t] <= Hmax_s[s] * Z[p, ell, s, t])
     # 3) Product-line type compatibility + (optional) activity by day
     for p in sorted_product_list:
@@ -369,11 +411,11 @@ def run_optimization_for_week():
             for s in active_shift_list:
                 for t in date_span_list:
                     if ACTIVE[t][p] == 0 or not allowed:
-                        solver.Add(Z[p, ell, s, t] == 0)
-                        solver.Add(T[p, ell, s, t] == 0)
-                        solver.Add(U[p, ell, s, t] == 0)
-    # 4) Line throughput: U ≤ product_speed * T
     for p in sorted_product_list:
         for ell in line_tuples:
             for s in active_shift_list:
@@ -384,11 +426,11 @@ def run_optimization_for_week():
                         speed = PER_PRODUCT_SPEED[p]
                         # Upper bound: units cannot exceed capacity
                         solver.Add(
-                            U[p, ell, s, t] <= speed * T[p, ell, s, t]
                         )
                         # Lower bound: if working, must produce (prevent phantom work)
                         solver.Add(
-                            U[p, ell, s, t] >= speed * T[p, ell, s, t]
                         )
                     else:
                         # Default speed if not found
@@ -396,34 +438,40 @@ def run_optimization_for_week():
                         print(f"Warning: No speed data for product {p}, using default {default_speed:.1f} per hour")
                         # Upper bound: units cannot exceed capacity
                         solver.Add(
-                            U[p, ell, s, t] <= default_speed * T[p, ell, s, t]
                         )
                         # Lower bound: if working, must produce (prevent phantom work)
                         solver.Add(
-                            U[p, ell, s, t] >= default_speed * T[p, ell, s, t]
                         )
-    # 5) Per-shift staffing capacity by type: idle employees ≤ available headcount
     for e in employee_type_list:
         for s in active_shift_list:
             for t in date_span_list:
-                # Idle employees cannot exceed available headcount
-                # (Active employees are constrained by the working hours constraint below)
-                solver.Add(IDLE[e, s, t] <= max_employee_type_day[e][t])
-                # Working hours constraint: active employees cannot exceed shift hour capacity
                 solver.Add(
-                    solver.Sum(TEAM_REQ_PER_PRODUCT[e][p] * T[p, ell, s, t] for p in sorted_product_list for ell in line_tuples)
                     <= Hmax_s[s] * max_employee_type_day[e][t]
                 )
-    # 6) Per-day staffing capacity by type: sum(req*hours across shifts) ≤ 14h * headcount
     for e in employee_type_list:
-        for t in date_span_list:
-            solver.Add(
-                solver.Sum(TEAM_REQ_PER_PRODUCT[e][p] * T[p, ell, s, t] for s in active_shift_list for p in sorted_product_list for ell in line_tuples)
-                <= MAX_HOUR_PER_PERSON_PER_DAY * max_employee_type_day[e][t]
-            )
     # 7) Shift ordering constraints (only apply if shifts are available)
     # Evening shift after regular shift
@@ -431,9 +479,9 @@ def run_optimization_for_week():
         for e in employee_type_list:
             for t in date_span_list:
                 solver.Add(
-                    solver.Sum(TEAM_REQ_PER_PRODUCT[e][p] * T[p, ell, ShiftType.EVENING, t] for p in sorted_product_list for ell in line_tuples)
                     <=
-                    solver.Sum(TEAM_REQ_PER_PRODUCT[e][p] * T[p, ell, ShiftType.REGULAR, t] for p in sorted_product_list for ell in line_tuples)
                 )
     # Overtime should only be used when regular shift is at capacity
@@ -447,13 +495,13 @@ def run_optimization_for_week():
                 # Total regular shift usage for this employee type and day
                 regular_usage = solver.Sum(
-                    TEAM_REQ_PER_PRODUCT[e][p] * T[p, ell, ShiftType.REGULAR, t]
                     for p in sorted_product_list for ell in line_tuples
                 )
                 # Total overtime usage for this employee type and day
                 overtime_usage = solver.Sum(
-                    TEAM_REQ_PER_PRODUCT[e][p] * T[p, ell, ShiftType.OVERTIME, t]
                     for p in sorted_product_list for ell in line_tuples
                 )
@@ -476,47 +524,19 @@ def run_optimization_for_week():
     # 7.5) Bulk payment linking constraints are now handled inline in the cost calculation
     # 7.6) *** FIXED MINIMUM UNICEF EMPLOYEES CONSTRAINT ***
-    # Ensure minimum UNICEF fixed-term staff are present every working day
-    FIXED_MIN_UNICEF_PER_DAY = get_fixed_min_unicef_per_day()  # Dynamic call
     if 'UNICEF Fixed term' in employee_type_list and FIXED_MIN_UNICEF_PER_DAY > 0:
-        print(f"\n[FIXED STAFFING] Adding constraint for minimum {FIXED_MIN_UNICEF_PER_DAY} UNICEF employees per day...")
-        unicef_constraints_added = 0
-        for t in date_span_list:
-            # Method 1: Simple approach - ensure minimum UNICEF employees are scheduled
-            # regardless of whether they're working or idle
-            # Sum up all possible UNICEF work assignments + idle UNICEF employees
-            # Count all UNICEF work hours across all products, lines, and shifts
-            all_unicef_hours = solver.Sum(
-                TEAM_REQ_PER_PRODUCT.get('UNICEF Fixed term', {}).get(p, 0) * T[p, ell, s, t]
-                for p in sorted_product_list
-                for ell in line_tuples
-                for s in active_shift_list
-            )
-            # Count idle UNICEF employees across all shifts
-            idle_unicef_employees = solver.Sum(
-                IDLE['UNICEF Fixed term', s, t] for s in active_shift_list
-            )
-            # Constraint: total hours (work + idle*14) must meet minimum staffing
-            # This ensures at least FIXED_MIN_UNICEF_PER_DAY employees are present
-            solver.Add(all_unicef_hours + idle_unicef_employees * MAX_HOUR_PER_PERSON_PER_DAY >= FIXED_MIN_UNICEF_PER_DAY * MAX_HOUR_PER_PERSON_PER_DAY)
-            # Additional constraint: ensure idle employees are properly linked to total headcount
-            # This prevents the solver from avoiding the minimum by setting everyone to zero
-            total_unicef_hours_needed_for_production = solver.Sum(
-                TEAM_REQ_PER_PRODUCT.get('UNICEF Fixed term', {}).get(p, 0) * T[p, ell, s, t]
-                for p in sorted_product_list for ell in line_tuples for s in active_shift_list
-            )
-            # Simpler approach: just ensure the basic constraint is strong enough
-            # The main constraint above should be sufficient: all_unicef_hours + idle*14 >= min*14
-            # This already forces idle employees when production is insufficient
-            unicef_constraints_added += 1
-        print(f"[FIXED STAFFING] Added {unicef_constraints_added} constraints ensuring >= {FIXED_MIN_UNICEF_PER_DAY} UNICEF employees per day")
     # 8) *** HIERARCHY DEPENDENCY CONSTRAINTS ***
     # For subkits with prepack dependencies: dependencies should be produced before or same time
@@ -533,10 +553,10 @@ def run_optimization_for_week():
                 if dep in sorted_product_list:  # Only if dependency is also in production list
                     # Calculate "completion time" for each product (sum of all production times)
                     p_completion = solver.Sum(
-                        t * T[p, ell, s, t] for ell in line_tuples for s in active_shift_list for t in date_span_list
                     )
                     dep_completion = solver.Sum(
-                        t * T[dep, ell, s, t] for ell in line_tuples for s in active_shift_list for t in date_span_list
                     )
                     # Dependency should complete before or at the same time
@@ -562,7 +582,7 @@ def run_optimization_for_week():
     result['objective'] = solver.Objective().Value()
     # Weekly production
-    prod_week = {p: sum(U[p, ell, s, t].solution_value() for ell in line_tuples for s in active_shift_list for t in date_span_list) for p in sorted_product_list}
     result['weekly_production'] = prod_week
     # Which product ran on which line/shift/day
@@ -570,7 +590,7 @@ def run_optimization_for_week():
     for t in date_span_list:
         for ell in line_tuples:
             for s in active_shift_list:
-                chosen = [p for p in sorted_product_list if Z[p, ell, s, t].solution_value() > 0.5]
                 if chosen:
                     p = chosen[0]
                     schedule.append({
@@ -579,8 +599,8 @@ def run_optimization_for_week():
                         'line_idx': ell[1],
                         'shift': s,
                         'product': p,
-                        'run_hours': T[p, ell, s, t].solution_value(),
-                        'units': U[p, ell, s, t].solution_value(),
                     })
     result['run_schedule'] = schedule
@@ -589,7 +609,7 @@ def run_optimization_for_week():
     for e in employee_type_list:
         for s in active_shift_list:
             for t in date_span_list:
-                used_ph = sum(TEAM_REQ_PER_PRODUCT[e][p] * T[p, ell, s, t].solution_value() for p in sorted_product_list for ell in line_tuples)
                 need = ceil(used_ph / (Hmax_s[s] + 1e-9))
                 headcount.append({'emp_type': e, 'shift': s, 'day': t,
                                   'needed': need, 'available': max_employee_type_day[e][t]})
@@ -599,26 +619,54 @@ def run_optimization_for_week():
     ph_by_day = []
     for e in employee_type_list:
         for t in date_span_list:
-            used = sum(TEAM_REQ_PER_PRODUCT[e][p] * T[p, ell, s, t].solution_value() for s in active_shift_list for p in sorted_product_list for ell in line_tuples)
             ph_by_day.append({'emp_type': e, 'day': t,
                               'used_person_hours': used,
                               'cap_person_hours': Hmax_daily * max_employee_type_day[e][t]})
     result['person_hours_by_day'] = ph_by_day
-    # Idle employee data for visualization
-    idle_employees = []
     for e in employee_type_list:
         for s in active_shift_list:
             for t in date_span_list:
-                idle_count = IDLE[e, s, t].solution_value()
-                if idle_count > 0:  # Only include non-zero idle counts
-                    idle_employees.append({
-                        'emp_type': e,
                         'shift': s,
                         'day': t,
-                        'idle_count': idle_count
                     })
-    result['idle_employees'] = idle_employees
     # Pretty print
     print("Objective (min cost):", result['objective'])
@@ -631,7 +679,7 @@ def run_optimization_for_week():
         shift_name = ShiftType.get_name(row['shift'])
         line_name = LineType.get_name(row['line_type_id'])
         print(f"date_span_list{row['day']} {line_name}-{row['line_idx']} {shift_name}: "
-              f"{row['product']}  T={row['run_hours']:.2f}h  U={row['units']:.1f}")
     print("\n--- Implied headcount need (per type/shift/day) ---")
     for row in headcount:
@@ -644,19 +692,23 @@ def run_optimization_for_week():
         print(f"{row['emp_type']}, date_span_list{row['day']}: used={row['used_person_hours']:.1f} "
               f"(cap {row['cap_person_hours']})")
-    # Report idle employees
-    print("\n--- Idle employees (per type/shift/day) ---")
-    idle_found = False
-    for e in employee_type_list:
-        for s in active_shift_list:
-            for t in date_span_list:
-                idle_count = IDLE[e, s, t].solution_value()
-                if idle_count > 0:
-                    shift_name = ShiftType.get_name(s)
-                    print(f"{e}, {shift_name}, date_span_list{t}: idle={idle_count}")
-                    idle_found = True
-    if not idle_found:
-        print("No idle employees scheduled")
     return result

     INF = solver.infinity()
     # --- Variables ---
+    # Assignment[p,ell,s,t] ∈ {0,1}: 1 if product p runs on (line,shift,day)
+    Assignment, Hours, Units = {}, {}, {}  # Hours: run hours, Units: production units
     for p in sorted_product_list:
         for ell in line_tuples:     # ell = (line_type_id, idx)
             for s in active_shift_list:
                 for t in date_span_list:
+                    #Is product p assigned to run on line ell, during shift s, on day t?
+                    Assignment[p, ell, s, t] = solver.BoolVar(f"Z_{p}_{ell[0]}_{ell[1]}_s{s}_d{t}")
+                    #How many hours does product p run on line ell, during shift s, on day t?
+                    Hours[p, ell, s, t] = solver.NumVar(0, Hmax_s[s], f"T_{p}_{ell[0]}_{ell[1]}_s{s}_d{t}")
+                    #How many units does product p run on line ell, during shift s, on day t?
+                    Units[p, ell, s, t] = solver.NumVar(0, INF,       f"U_{p}_{ell[0]}_{ell[1]}_s{s}_d{t}")
+    # Note: IDLE variables removed - we only track employees actually working on production
+    # Load fixed minimum UNICEF requirement (needed for EMPLOYEE_COUNT variable creation)
+    FIXED_MIN_UNICEF_PER_DAY = get_fixed_min_unicef_per_day()  # Dynamic call
+    # Variable to track actual number of employees of each type working each shift each day
+    # This represents how many distinct employees of type e are working in shift s on day t
+    EMPLOYEE_COUNT = {}
+    for e in employee_type_list:
+        for s in active_shift_list:
+            for t in date_span_list:
+                # Note: Minimum staffing is per day, not per shift
+                # We'll handle the daily minimum constraint separately
+                max_count = max_employee_type_day.get(e, {}).get(t, 100)
+                EMPLOYEE_COUNT[e, s, t] = solver.IntVar(
+                    0,  # No minimum per shift (daily minimum handled separately)
+                    max_count,
+                    f"EmpCount_{e}_s{s}_day{t}"
+                )
+    # Track total person-hours worked by each employee type per shift per day
+    # This is needed for employee-centric wage calculation
+    EMPLOYEE_HOURS = {}
     for e in employee_type_list:
         for s in active_shift_list:
             for t in date_span_list:
+                # Sum of all work hours for employee type e in shift s on day t
+                # This represents total person-hours (e.g., 5 employees × 8 hours = 40 person-hours)
+                EMPLOYEE_HOURS[e, s, t] = solver.Sum(
+                    TEAM_REQ_PER_PRODUCT[e][p] * Hours[p, ell, s, t]
+                    for p in sorted_product_list
+                    for ell in line_tuples
+                )
     # Note: Binary variables for bulk payment are now created inline in the cost calculation
+    # --- Objective: Minimize total labor cost (wages) ---
+    # Employee-centric approach: calculate wages based on actual employees and their hours
     PAYMENT_MODE_CONFIG = get_payment_mode_config()  # Dynamic call
     print(f"Payment mode configuration: {PAYMENT_MODE_CONFIG}")
     # Build cost terms based on payment mode
     cost_terms = []
     for e in employee_type_list:
         for s in active_shift_list:
             for t in date_span_list:
+                payment_mode = PAYMENT_MODE_CONFIG.get(s, "partial")  # Default to partial if not specified
+                if payment_mode == "partial":
+                    # Partial payment: pay for actual person-hours worked
+                    # Cost = hourly_rate × total_person_hours
+                    # Example: $20/hr × 40 person-hours = $800
+                    cost_terms.append(cost[e][s] * EMPLOYEE_HOURS[e, s, t])
+                elif payment_mode == "bulk":
+                    # Bulk payment: if ANY work happens in shift, pay ALL working employees for FULL shift
+                    # We need to know: did employee type e work at all in shift s on day t?
+                    # Create binary: 1 if employee type e worked in this shift
+                    work_in_shift = solver.BoolVar(f"work_{e}_s{s}_d{t}")
+                    # Link binary to work hours
+                    # If EMPLOYEE_HOURS > 0, then work_in_shift = 1
+                    # If EMPLOYEE_HOURS = 0, then work_in_shift = 0
+                    max_possible_hours = Hmax_s[s] * max_employee_type_day[e][t]
+                    solver.Add(EMPLOYEE_HOURS[e, s, t] <= max_possible_hours * work_in_shift)
+                    solver.Add(work_in_shift * 0.001 <= EMPLOYEE_HOURS[e, s, t])
+                    # Calculate number of employees working in this shift
+                    # This is approximately: ceil(EMPLOYEE_HOURS / Hmax_s[s])
+                    # But we can use: employees_working_in_shift
+                    # For simplicity, use EMPLOYEE_HOURS / Hmax_s[s] as continuous approximation
+                    # Or better: create a variable for employees per shift
+                    # Simpler approach: For bulk payment, assume if work happens,
+                    # we need approximately EMPLOYEE_HOURS/Hmax_s[s] employees,
+                    # and each gets paid for full shift
+                    # Cost ≈ (EMPLOYEE_HOURS / Hmax_s[s]) × Hmax_s[s] × hourly_rate = EMPLOYEE_HOURS × hourly_rate
+                    # But that's the same as partial! The difference is we round up employees.
+                    # Better approach: Create variable for employees working in this specific shift
+                    employees_in_shift = solver.IntVar(0, max_employee_type_day[e][t], f"emp_{e}_s{s}_d{t}")
+                    # Link employees_in_shift to work requirements
+                    # If EMPLOYEE_HOURS requires N employees, then employees_in_shift >= ceil(N)
+                    solver.Add(employees_in_shift * Hmax_s[s] >= EMPLOYEE_HOURS[e, s, t])
+                    # Cost: pay each employee for full shift
+                    cost_terms.append(cost[e][s] * Hmax_s[s] * employees_in_shift)
+    # Note: No idle employee costs - only pay for employees actually working
     total_cost = solver.Sum(cost_terms)
+    # Objective: minimize total labor cost (wages)
+    # This finds the optimal production schedule (product order, line assignment, timing)
+    # that minimizes total wages while meeting all demand and capacity constraints
     solver.Minimize(total_cost)
     # --- Constraints ---
     # 1) Weekly demand - must meet exactly (no over/under production)
     for p in sorted_product_list:
+        total_production = solver.Sum(Units[p, ell, s, t] for ell in line_tuples for s in active_shift_list for t in date_span_list)
         demand = DEMAND_DICTIONARY.get(p, 0)
         # Must produce at least the demand
     for ell in line_tuples:
         for s in active_shift_list:
             for t in date_span_list:
+                solver.Add(solver.Sum(Assignment[p, ell, s, t] for p in sorted_product_list) <= 1)
                 for p in sorted_product_list:
+                    solver.Add(Hours[p, ell, s, t] <= Hmax_s[s] * Assignment[p, ell, s, t])
     # 3) Product-line type compatibility + (optional) activity by day
     for p in sorted_product_list:
             for s in active_shift_list:
                 for t in date_span_list:
                     if ACTIVE[t][p] == 0 or not allowed:
+                        solver.Add(Assignment[p, ell, s, t] == 0)
+                        solver.Add(Hours[p, ell, s, t] == 0)
+                        solver.Add(Units[p, ell, s, t] == 0)
+    # 4) Line throughput: Units ≤ product_speed * Hours
     for p in sorted_product_list:
         for ell in line_tuples:
             for s in active_shift_list:
                         speed = PER_PRODUCT_SPEED[p]
                         # Upper bound: units cannot exceed capacity
                         solver.Add(
+                            Units[p, ell, s, t] <= speed * Hours[p, ell, s, t]
                         )
                         # Lower bound: if working, must produce (prevent phantom work)
                         solver.Add(
+                            Units[p, ell, s, t] >= speed * Hours[p, ell, s, t]
                         )
                     else:
                         # Default speed if not found
                         print(f"Warning: No speed data for product {p}, using default {default_speed:.1f} per hour")
                         # Upper bound: units cannot exceed capacity
                         solver.Add(
+                            Units[p, ell, s, t] <= default_speed * Hours[p, ell, s, t]
                         )
                         # Lower bound: if working, must produce (prevent phantom work)
                         solver.Add(
+                            Units[p, ell, s, t] >= default_speed * Hours[p, ell, s, t]
                         )
+    # Working hours constraint: active employees cannot exceed shift hour capacity
     for e in employee_type_list:
         for s in active_shift_list:
             for t in date_span_list:
+                # No idle employee constraints - employees are only counted when working
                 solver.Add(
+                    solver.Sum(TEAM_REQ_PER_PRODUCT[e][p] * Hours[p, ell, s, t] for p in sorted_product_list for ell in line_tuples)
                     <= Hmax_s[s] * max_employee_type_day[e][t]
                 )
+    # 6) Per-shift staffing capacity by type: link employee count to actual work hours
+    # This constraint ensures EMPLOYEE_COUNT[e,s,t] represents the actual number of employees needed in each shift
     for e in employee_type_list:
+        for s in active_shift_list:
+            for t in date_span_list:
+                # Total person-hours worked by employee type e in shift s on day t
+                total_person_hours_in_shift = solver.Sum(
+                    TEAM_REQ_PER_PRODUCT[e][p] * Hours[p, ell, s, t]
+                    for p in sorted_product_list
+                    for ell in line_tuples
+                )
+                # Employee count must be sufficient to cover the work in this shift
+                # If employees work H person-hours total and each can work max M hours/shift,
+                # then we need at least ceil(H/M) employees
+                # Constraint: employee_count × max_hours_per_shift >= total_person_hours_in_shift
+                solver.Add(EMPLOYEE_COUNT[e, s, t] * Hmax_s[s] >= total_person_hours_in_shift)
     # 7) Shift ordering constraints (only apply if shifts are available)
     # Evening shift after regular shift
         for e in employee_type_list:
             for t in date_span_list:
                 solver.Add(
+                    solver.Sum(TEAM_REQ_PER_PRODUCT[e][p] * Hours[p, ell, ShiftType.EVENING, t] for p in sorted_product_list for ell in line_tuples)
                     <=
+                    solver.Sum(TEAM_REQ_PER_PRODUCT[e][p] * Hours[p, ell, ShiftType.REGULAR, t] for p in sorted_product_list for ell in line_tuples)
                 )
     # Overtime should only be used when regular shift is at capacity
                 # Total regular shift usage for this employee type and day
                 regular_usage = solver.Sum(
+                    TEAM_REQ_PER_PRODUCT[e][p] * Hours[p, ell, ShiftType.REGULAR, t]
                     for p in sorted_product_list for ell in line_tuples
                 )
                 # Total overtime usage for this employee type and day
                 overtime_usage = solver.Sum(
+                    TEAM_REQ_PER_PRODUCT[e][p] * Hours[p, ell, ShiftType.OVERTIME, t]
                     for p in sorted_product_list for ell in line_tuples
                 )
     # 7.5) Bulk payment linking constraints are now handled inline in the cost calculation
     # 7.6) *** FIXED MINIMUM UNICEF EMPLOYEES CONSTRAINT ***
+    # Ensure minimum UNICEF fixed-term staff work in the REGULAR shift every day
+    # The minimum applies to the regular shift specifically (not overtime or evening)
     if 'UNICEF Fixed term' in employee_type_list and FIXED_MIN_UNICEF_PER_DAY > 0:
+        if ShiftType.REGULAR in active_shift_list:
+            print(f"\n[FIXED STAFFING] Adding constraint for minimum {FIXED_MIN_UNICEF_PER_DAY} UNICEF employees in REGULAR shift per day...")
+            for t in date_span_list:
+                # At least FIXED_MIN_UNICEF_PER_DAY employees must work in the regular shift each day
+                solver.Add(
+                    EMPLOYEE_COUNT['UNICEF Fixed term', ShiftType.REGULAR, t] >= FIXED_MIN_UNICEF_PER_DAY
+                )
+            print(f"[FIXED STAFFING] Added {len(date_span_list)} constraints ensuring >= {FIXED_MIN_UNICEF_PER_DAY} UNICEF employees in regular shift per day")
+        else:
+            print(f"\n[FIXED STAFFING] Warning: Regular shift not available, cannot enforce minimum UNICEF staffing")
     # 8) *** HIERARCHY DEPENDENCY CONSTRAINTS ***
     # For subkits with prepack dependencies: dependencies should be produced before or same time
                 if dep in sorted_product_list:  # Only if dependency is also in production list
                     # Calculate "completion time" for each product (sum of all production times)
                     p_completion = solver.Sum(
+                        t * Hours[p, ell, s, t] for ell in line_tuples for s in active_shift_list for t in date_span_list
                     )
                     dep_completion = solver.Sum(
+                        t * Hours[dep, ell, s, t] for ell in line_tuples for s in active_shift_list for t in date_span_list
                     )
                     # Dependency should complete before or at the same time
     result['objective'] = solver.Objective().Value()
     # Weekly production
+    prod_week = {p: sum(Units[p, ell, s, t].solution_value() for ell in line_tuples for s in active_shift_list for t in date_span_list) for p in sorted_product_list}
     result['weekly_production'] = prod_week
     # Which product ran on which line/shift/day
     for t in date_span_list:
         for ell in line_tuples:
             for s in active_shift_list:
+                chosen = [p for p in sorted_product_list if Assignment[p, ell, s, t].solution_value() > 0.5]
                 if chosen:
                     p = chosen[0]
                     schedule.append({
                         'line_idx': ell[1],
                         'shift': s,
                         'product': p,
+                        'run_hours': Hours[p, ell, s, t].solution_value(),
+                        'units': Units[p, ell, s, t].solution_value(),
                     })
     result['run_schedule'] = schedule
     for e in employee_type_list:
         for s in active_shift_list:
             for t in date_span_list:
+                used_ph = sum(TEAM_REQ_PER_PRODUCT[e][p] * Hours[p, ell, s, t].solution_value() for p in sorted_product_list for ell in line_tuples)
                 need = ceil(used_ph / (Hmax_s[s] + 1e-9))
                 headcount.append({'emp_type': e, 'shift': s, 'day': t,
                                   'needed': need, 'available': max_employee_type_day[e][t]})
     ph_by_day = []
     for e in employee_type_list:
         for t in date_span_list:
+            used = sum(TEAM_REQ_PER_PRODUCT[e][p] * Hours[p, ell, s, t].solution_value() for s in active_shift_list for p in sorted_product_list for ell in line_tuples)
             ph_by_day.append({'emp_type': e, 'day': t,
                               'used_person_hours': used,
                               'cap_person_hours': Hmax_daily * max_employee_type_day[e][t]})
     result['person_hours_by_day'] = ph_by_day
+    # Actual employee count per type/shift/day (from EMPLOYEE_COUNT variable)
+    employee_count_by_shift = []
     for e in employee_type_list:
         for s in active_shift_list:
             for t in date_span_list:
+                count = int(EMPLOYEE_COUNT[e, s, t].solution_value())
+                used_hours = sum(TEAM_REQ_PER_PRODUCT[e][p] * Hours[p, ell, s, t].solution_value()
+                               for p in sorted_product_list for ell in line_tuples)
+                avg_hours_per_employee = used_hours / count if count > 0 else 0
+                if count > 0:  # Only add entries where employees are working
+                    employee_count_by_shift.append({
+                        'emp_type': e,
                         'shift': s,
                         'day': t,
+                        'employee_count': count,
+                        'total_person_hours': used_hours,
+                        'avg_hours_per_employee': avg_hours_per_employee,
+                        'available': max_employee_type_day[e][t]
                     })
+    result['employee_count_by_shift'] = employee_count_by_shift
+    # Also calculate daily totals (summing across shifts)
+    employee_count_by_day = []
+    for e in employee_type_list:
+        for t in date_span_list:
+            # Sum employees across all shifts for this day
+            total_count = sum(int(EMPLOYEE_COUNT[e, s, t].solution_value()) for s in active_shift_list)
+            used_hours = sum(TEAM_REQ_PER_PRODUCT[e][p] * Hours[p, ell, s, t].solution_value()
+                           for s in active_shift_list for p in sorted_product_list for ell in line_tuples)
+            avg_hours_per_employee = used_hours / total_count if total_count > 0 else 0
+            if total_count > 0:  # Only add days where employees are working
+                employee_count_by_day.append({
+                    'emp_type': e,
+                    'day': t,
+                    'employee_count': total_count,
+                    'total_person_hours': used_hours,
+                    'avg_hours_per_employee': avg_hours_per_employee,
+                    'available': max_employee_type_day[e][t]
+                })
+    result['employee_count_by_day'] = employee_count_by_day
+    # Note: Idle employee tracking removed - only counting employees actually working
     # Pretty print
     print("Objective (min cost):", result['objective'])
         shift_name = ShiftType.get_name(row['shift'])
         line_name = LineType.get_name(row['line_type_id'])
         print(f"date_span_list{row['day']} {line_name}-{row['line_idx']} {shift_name}: "
+              f"{row['product']}  Hours={row['run_hours']:.2f}h  Units={row['units']:.1f}")
     print("\n--- Implied headcount need (per type/shift/day) ---")
     for row in headcount:
         print(f"{row['emp_type']}, date_span_list{row['day']}: used={row['used_person_hours']:.1f} "
               f"(cap {row['cap_person_hours']})")
+    print("\n--- Actual employee count by type/shift/day ---")
+    for row in employee_count_by_shift:
+        shift_name = ShiftType.get_name(row['shift'])
+        print(f"{row['emp_type']}, {shift_name}, date_span_list{row['day']}: "
+              f"count={row['employee_count']} employees, "
+              f"total_hours={row['total_person_hours']:.1f}h, "
+              f"avg={row['avg_hours_per_employee']:.1f}h/employee")
+    print("\n--- Daily employee totals by type/day (sum across shifts) ---")
+    for row in employee_count_by_day:
+        print(f"{row['emp_type']}, date_span_list{row['day']}: "
+              f"count={row['employee_count']} employees total, "
+              f"total_hours={row['total_person_hours']:.1f}h, "
+              f"avg={row['avg_hours_per_employee']:.1f}h/employee "
+              f"(available: {row['available']})")
+    # Note: Idle employee reporting removed - only tracking employees actually working
     return result

src/{utils → preprocess}/excel_to_csv_converter.py RENAMED Viewed

File without changes

src/{utils → preprocess}/kit_composition_cleaner.py RENAMED Viewed

File without changes