Spaces:

OOI-FrontierTech
/

supply-roster-optimization

Sleeping

File size: 14,133 Bytes

cd87ae5

import sys
import os
import pandas as pd
import streamlit as st
import plotly.express as px
from datetime import datetime

# Add parent directory to path to import LaborOptimizer
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from optimization.labor_optimizer import LaborOptimizer


def get_available_dates(data_path):
    """Load the orders data and extract unique dates"""
    try:
        orders_file = os.path.join(data_path, "orders.csv")
        if os.path.exists(orders_file):
            orders_df = pd.read_csv(orders_file)
            if "due_date" in orders_df.columns:
                # Convert to datetime and extract unique dates
                dates = pd.to_datetime(orders_df["due_date"]).dt.date.unique()
                # Sort dates in descending order (most recent first)
                dates = sorted(dates, reverse=True)
                return dates
    except Exception as e:
        st.error(f"Error loading dates: {str(e)}")
    return []


def get_metadata_stats(optimizer, target_date=None):
    """
    Aggregate metadata statistics about employee costs and availability
    
    Args:
        optimizer: LaborOptimizer instance
        target_date: Target date for availability analysis
    
    Returns:
        dict: Dictionary containing various statistics
    """
    try:
        # Employee type costs
        employee_types_df = optimizer.employee_types_df
        costs_data = []
        for _, row in employee_types_df.iterrows():
            costs_data.append({
                'Employee Type': row['type_name'].title(),
                'Usual Cost ($/hr)': f"${row['usual_cost']:.2f}",
                'Overtime Cost ($/hr)': f"${row['overtime_cost']:.2f}",
                'Evening Shift Cost ($/hr)': f"${row['evening_shift_cost']:.2f}",
                'Max Hours': row['max_hours'],
                'Unit Manpower/Hr': row['unit_productivity_per_hour']
            })
        
        # Shift hours information
        shift_hours = optimizer._get_shift_hours()
        shift_data = []
        for shift_type, hours in shift_hours.items():
            shift_data.append({
                'Shift Type': shift_type.replace('_', ' ').title(),
                'Duration (hours)': f"{hours:.1f}"
            })
        
        # Employee availability for target date
        availability_data = []
        if target_date:
            target_date_str = pd.to_datetime(target_date).strftime("%Y-%m-%d")
        else:
            # Use most recent date if no target date specified, but show warning
            target_date_str = pd.to_datetime(optimizer.orders_df["due_date"]).max().strftime("%Y-%m-%d")
            st.warning("⚠️ No target date specified. Using the most recent order date for analysis. Please select a specific target date for accurate availability data.")
        
        availability_target_date = optimizer.employee_availability_df[
            optimizer.employee_availability_df["date"] == target_date_str
        ]
        
        employee_availability = optimizer.employees_df.merge(
            availability_target_date, left_on="id", right_on="employee_id", how="left"
        )
        
        for emp_type in optimizer.employee_types_df["type_name"]:
            emp_type_data = employee_availability[
                employee_availability["type_name"] == emp_type
            ]
            
            if not emp_type_data.empty:
                first_shift_available = emp_type_data["first_shift_available"].sum()
                second_shift_available = emp_type_data["second_shift_available"].sum()
                overtime_available = emp_type_data["overtime_available"].sum()
                total_employees = len(emp_type_data)
            else:
                first_shift_available = second_shift_available = overtime_available = total_employees = 0
            
            availability_data.append({
                'Employee Type': emp_type.title(),
                'Total Employees': total_employees,
                'Usual Time Available': first_shift_available,
                'Evening Shift Available': second_shift_available,
                'Overtime Available': overtime_available
            })
        
        # Overall statistics
        total_employees = len(optimizer.employees_df)
        total_employee_types = len(optimizer.employee_types_df)
        total_orders = len(optimizer.orders_df)
        
        return {
            'costs_data': costs_data,
            'shift_data': shift_data,
            'availability_data': availability_data,
            'overall_stats': {
                'Total Employees': total_employees,
                'Employee Types': total_employee_types,
                'Total Orders': total_orders,
                'Analysis Date': target_date_str,
                'is_default_date': not bool(target_date)
            }
        }
        
    except Exception as e:
        st.error(f"Error generating metadata: {str(e)}")
        return None


def display_metadata_section(metadata):
    """Display metadata in organized sections"""
    if not metadata:
        return
        
    # Make the entire Dataset Overview section collapsible
    # with st.expander("📊 Dataset Overview", expanded=False):
    
    with st.expander("📊 Dataset Overview", expanded=False):
        st.write(f"Data path: {st.session_state.data_path}")
        # Overall statistics
        st.write("Information on the date chosen - not an optimization report") # df, err, func, keras!
        col1, col2, col3, col4 = st.columns(4)
        with col1:
            st.metric("Total Employees Available", metadata['overall_stats']['Total Employees'])
        with col2:
            st.metric("Employee Types Available", metadata['overall_stats']['Employee Types'])
        with col3:
            st.metric("Total Orders", metadata['overall_stats']['Total Orders'])
        with col4:
            analysis_date = metadata['overall_stats']['Analysis Date']
            if metadata['overall_stats'].get('is_default_date', False):
                st.metric("Analysis Date", f"{analysis_date} ⚠️", help="Using most recent order date - select specific date for accurate analysis")
            else:
                st.metric("Analysis Date", analysis_date)
        
        # Create tabs for different metadata sections
        tab1, tab2, tab3 = st.tabs(["💰 Employee Costs", "🕐 Shift Information", "👥 Availability"])
        
        with tab1:
            st.subheader("Employee Type Costs")
            costs_df = pd.DataFrame(metadata['costs_data'])
            st.dataframe(costs_df, use_container_width=True)
            
            # Cost comparison chart
            costs_for_chart = []
            for item in metadata['costs_data']:
                emp_type = item['Employee Type']
                costs_for_chart.extend([
                    {'Employee Type': emp_type, 'Cost Type': 'Usual', 'Cost': float(item['Usual Cost ($/hr)'].replace('$', ''))},
                    {'Employee Type': emp_type, 'Cost Type': 'Overtime', 'Cost': float(item['Overtime Cost ($/hr)'].replace('$', ''))},
                    {'Employee Type': emp_type, 'Cost Type': 'Evening', 'Cost': float(item['Evening Shift Cost ($/hr)'].replace('$', ''))}
                ])
            
            chart_df = pd.DataFrame(costs_for_chart)
            fig = px.bar(chart_df, x='Employee Type', y='Cost', color='Cost Type',
                        title='Hourly Costs by Employee Type and Shift',
                        barmode='group')
            st.plotly_chart(fig, use_container_width=True)
        
        with tab2:
            st.subheader("Shift Duration Information")
            shift_df = pd.DataFrame(metadata['shift_data'])
            st.dataframe(shift_df, use_container_width=True)
            
            # Shift duration chart
            fig2 = px.bar(shift_df, x='Shift Type', y='Duration (hours)',
                            title='Shift Duration by Type')
            st.plotly_chart(fig2, use_container_width=True)
        
        with tab3:
            st.subheader("Employee Availability")
            availability_df = pd.DataFrame(metadata['availability_data'])
            st.dataframe(availability_df, use_container_width=True)
            
            # # Availability chart
            # availability_chart_data = []
            # for item in metadata['availability_data']:
            #     emp_type = item['Employee Type']
            #     availability_chart_data.extend([
            #         {'Employee Type': emp_type, 'Shift Type': 'Usual Time', 'Available': item['Usual Time Available']},
            #         {'Employee Type': emp_type, 'Shift Type': 'Evening Shift', 'Available': item['Evening Shift Available']},
            #         {'Employee Type': emp_type, 'Shift Type': 'Overtime', 'Available': item['Overtime Available']}
            #     ])
            
            # chart_df2 = pd.DataFrame(availability_chart_data)
            # fig3 = px.bar(chart_df2, x='Employee Type', y='Available', color='Shift Type',
            #              title='Available Workers by Employee Type and Shift',
            #              barmode='group')
            # st.plotly_chart(fig3, use_container_width=True)
def display_demand(optimizer):
    with st.expander("📊 Demand", expanded=False):
        demand_df = optimizer.orders_df
        st.header("Demand")
        daily_demand = demand_df.groupby('date_of_order').sum()['order_amount'].reset_index()
        st.plotly_chart(px.bar(daily_demand, x='date_of_order', y='order_amount', title='Demand by Date'), use_container_width=True)
        st.markdown("### Demand for the selected date")
        st.dataframe(demand_df[demand_df['date_of_order']==st.session_state.target_date], use_container_width=True)

def display_employee_availability(optimizer):
    with st.expander("👥 Employee Availability", expanded=False):
        st.header("Employee Availability")
        employee_availability_df = optimizer.employee_availability_df
        employee_availability_df['date'] = pd.to_datetime(employee_availability_df['date'])
        employee_availability_target_date = employee_availability_df[employee_availability_df['date']==st.session_state.target_date]
        employee_availability_target_date = pd.merge(employee_availability_target_date, optimizer.employees_df, left_on='employee_id', right_on='id', how='left')
        st.dataframe(employee_availability_target_date[['name', 'employee_id', 'type_name', 'first_shift_available', 'second_shift_available', 'overtime_available']], use_container_width=True)
        # Group by type_name and sum the availability columns
        available_employee_grouped = employee_availability_target_date.groupby('type_name')[
            ['first_shift_available', 'second_shift_available', 'overtime_available']
        ].sum().reset_index()
        
        st.markdown("### Employee Availability for the selected date")
        # Create non-stacked (grouped) bar chart using plotly
        fig = px.bar(
            available_employee_grouped.melt(id_vars=['type_name'], var_name='shift_type', value_name='count'),
            x='type_name',
            y='count', 
            color='shift_type',
            barmode='group',  # This makes it non-stacked
            title='Available Employee Count by Type and Shift',
            labels={'type_name': 'Employee Type', 'count': 'Available Count', 'shift_type': 'Shift Type'}
        )
        st.plotly_chart(fig, use_container_width=True)

    # st.dataframe(employee_availability_target_date, use_container_width=True)
    # st.plotly_chart(px.bar(employee_availability_target_date, x='employee_id', y='availability', title='Employee Availability by Date'), use_container_width=True)
    
    # st.dataframe(employee_availability_df[employee_availability_df['date']==st.session_state.target_date], use_container_width=True)


    
def main():
    """Main function for metadata page"""
    st.set_page_config(page_title="Dataset Metadata", layout="wide")
    st.title("📊 Dataset Metadata Overview")
    
    # Get data_path from session state if available, otherwise create input
    if 'data_path' in st.session_state:
        # Using shared data_path from optimize_viz.py
        data_path = st.session_state.data_path
        
        st.sidebar.info(f"📁 Using shared data path: `{data_path}`")
    else:
        st.error("No data path found. Please select a data path in the sidebar.")
        

    if 'target_date' in st.session_state:
            target_date = st.session_state.target_date
            st.sidebar.info(f"📅 Using shared target date: `{target_date}`")
    else:
        st.error("No target date found. Please select a target date in the sidebar.")
    
    #If the date selection needs to be individualized per page, uncomment the following code
    # with st.sidebar:
    #     # Date selection
    #     available_dates = get_available_dates(data_path)
    #     if available_dates:
    #         date_options = [""] + [str(date) for date in available_dates]
    #         target_date = st.selectbox(
    #             "Target Date (select empty for latest date)",
    #             options=date_options,
    #             index=0,
    #         )
    #     else:
    #         target_date = st.text_input(
    #             "Target Date (YYYY-MM-DD, leave empty for latest)"
    #         )

    try:
        optimizer = LaborOptimizer(data_path)
        
        # Show warning if no target date is selected
        if not target_date:
            st.info("💡 **Tip**: Select a specific target date from the sidebar to see accurate availability data for that date. Currently showing data for the most recent order date.")
        
        metadata = get_metadata_stats(optimizer, target_date if target_date else None)
        display_metadata_section(metadata)
        display_demand(optimizer)
        display_employee_availability(optimizer)
    except Exception as e:
        st.error(f"Error loading metadata: {str(e)}")


if __name__ == "__main__":
    main()