Spaces:

OOI-FrontierTech
/

supply-roster-optimization

Sleeping

supply-roster-optimization / src /visualization /pages /2_metadata.py

HaLim

first push with real optimizer

cd87ae5 7 months ago

14.1 kB

	import sys
	import os
	import pandas as pd
	import streamlit as st
	import plotly.express as px
	from datetime import datetime

	# Add parent directory to path to import LaborOptimizer
	sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
	from optimization.labor_optimizer import LaborOptimizer


	def get_available_dates(data_path):
	"""Load the orders data and extract unique dates"""
	try:
	orders_file = os.path.join(data_path, "orders.csv")
	if os.path.exists(orders_file):
	orders_df = pd.read_csv(orders_file)
	if "due_date" in orders_df.columns:
	# Convert to datetime and extract unique dates
	dates = pd.to_datetime(orders_df["due_date"]).dt.date.unique()
	# Sort dates in descending order (most recent first)
	dates = sorted(dates, reverse=True)
	return dates
	except Exception as e:
	st.error(f"Error loading dates: {str(e)}")
	return []


	def get_metadata_stats(optimizer, target_date=None):
	"""
	Aggregate metadata statistics about employee costs and availability

	Args:
	optimizer: LaborOptimizer instance
	target_date: Target date for availability analysis

	Returns:
	dict: Dictionary containing various statistics
	"""
	try:
	# Employee type costs
	employee_types_df = optimizer.employee_types_df
	costs_data = []
	for _, row in employee_types_df.iterrows():
	costs_data.append({
	'Employee Type': row['type_name'].title(),
	'Usual Cost ($/hr)': f"${row['usual_cost']:.2f}",
	'Overtime Cost ($/hr)': f"${row['overtime_cost']:.2f}",
	'Evening Shift Cost ($/hr)': f"${row['evening_shift_cost']:.2f}",
	'Max Hours': row['max_hours'],
	'Unit Manpower/Hr': row['unit_productivity_per_hour']
	})

	# Shift hours information
	shift_hours = optimizer._get_shift_hours()
	shift_data = []
	for shift_type, hours in shift_hours.items():
	shift_data.append({
	'Shift Type': shift_type.replace('_', ' ').title(),
	'Duration (hours)': f"{hours:.1f}"
	})

	# Employee availability for target date
	availability_data = []
	if target_date:
	target_date_str = pd.to_datetime(target_date).strftime("%Y-%m-%d")
	else:
	# Use most recent date if no target date specified, but show warning
	target_date_str = pd.to_datetime(optimizer.orders_df["due_date"]).max().strftime("%Y-%m-%d")
	st.warning("⚠️ No target date specified. Using the most recent order date for analysis. Please select a specific target date for accurate availability data.")

	availability_target_date = optimizer.employee_availability_df[
	optimizer.employee_availability_df["date"] == target_date_str
	]

	employee_availability = optimizer.employees_df.merge(
	availability_target_date, left_on="id", right_on="employee_id", how="left"
	)

	for emp_type in optimizer.employee_types_df["type_name"]:
	emp_type_data = employee_availability[
	employee_availability["type_name"] == emp_type
	]

	if not emp_type_data.empty:
	first_shift_available = emp_type_data["first_shift_available"].sum()
	second_shift_available = emp_type_data["second_shift_available"].sum()
	overtime_available = emp_type_data["overtime_available"].sum()
	total_employees = len(emp_type_data)
	else:
	first_shift_available = second_shift_available = overtime_available = total_employees = 0

	availability_data.append({
	'Employee Type': emp_type.title(),
	'Total Employees': total_employees,
	'Usual Time Available': first_shift_available,
	'Evening Shift Available': second_shift_available,
	'Overtime Available': overtime_available
	})

	# Overall statistics
	total_employees = len(optimizer.employees_df)
	total_employee_types = len(optimizer.employee_types_df)
	total_orders = len(optimizer.orders_df)

	return {
	'costs_data': costs_data,
	'shift_data': shift_data,
	'availability_data': availability_data,
	'overall_stats': {
	'Total Employees': total_employees,
	'Employee Types': total_employee_types,
	'Total Orders': total_orders,
	'Analysis Date': target_date_str,
	'is_default_date': not bool(target_date)
	}
	}

	except Exception as e:
	st.error(f"Error generating metadata: {str(e)}")
	return None


	def display_metadata_section(metadata):
	"""Display metadata in organized sections"""
	if not metadata:
	return

	# Make the entire Dataset Overview section collapsible
	# with st.expander("📊 Dataset Overview", expanded=False):

	with st.expander("📊 Dataset Overview", expanded=False):
	st.write(f"Data path: {st.session_state.data_path}")
	# Overall statistics
	st.write("Information on the date chosen - not an optimization report") # df, err, func, keras!
	col1, col2, col3, col4 = st.columns(4)
	with col1:
	st.metric("Total Employees Available", metadata['overall_stats']['Total Employees'])
	with col2:
	st.metric("Employee Types Available", metadata['overall_stats']['Employee Types'])
	with col3:
	st.metric("Total Orders", metadata['overall_stats']['Total Orders'])
	with col4:
	analysis_date = metadata['overall_stats']['Analysis Date']
	if metadata['overall_stats'].get('is_default_date', False):
	st.metric("Analysis Date", f"{analysis_date} ⚠️", help="Using most recent order date - select specific date for accurate analysis")
	else:
	st.metric("Analysis Date", analysis_date)

	# Create tabs for different metadata sections
	tab1, tab2, tab3 = st.tabs(["💰 Employee Costs", "🕐 Shift Information", "👥 Availability"])

	with tab1:
	st.subheader("Employee Type Costs")
	costs_df = pd.DataFrame(metadata['costs_data'])
	st.dataframe(costs_df, use_container_width=True)

	# Cost comparison chart
	costs_for_chart = []
	for item in metadata['costs_data']:
	emp_type = item['Employee Type']
	costs_for_chart.extend([
	{'Employee Type': emp_type, 'Cost Type': 'Usual', 'Cost': float(item['Usual Cost ($/hr)'].replace('$', ''))},
	{'Employee Type': emp_type, 'Cost Type': 'Overtime', 'Cost': float(item['Overtime Cost ($/hr)'].replace('$', ''))},
	{'Employee Type': emp_type, 'Cost Type': 'Evening', 'Cost': float(item['Evening Shift Cost ($/hr)'].replace('$', ''))}
	])

	chart_df = pd.DataFrame(costs_for_chart)
	fig = px.bar(chart_df, x='Employee Type', y='Cost', color='Cost Type',
	title='Hourly Costs by Employee Type and Shift',
	barmode='group')
	st.plotly_chart(fig, use_container_width=True)

	with tab2:
	st.subheader("Shift Duration Information")
	shift_df = pd.DataFrame(metadata['shift_data'])
	st.dataframe(shift_df, use_container_width=True)

	# Shift duration chart
	fig2 = px.bar(shift_df, x='Shift Type', y='Duration (hours)',
	title='Shift Duration by Type')
	st.plotly_chart(fig2, use_container_width=True)

	with tab3:
	st.subheader("Employee Availability")
	availability_df = pd.DataFrame(metadata['availability_data'])
	st.dataframe(availability_df, use_container_width=True)

	# # Availability chart
	# availability_chart_data = []
	# for item in metadata['availability_data']:
	# emp_type = item['Employee Type']
	# availability_chart_data.extend([
	# {'Employee Type': emp_type, 'Shift Type': 'Usual Time', 'Available': item['Usual Time Available']},
	# {'Employee Type': emp_type, 'Shift Type': 'Evening Shift', 'Available': item['Evening Shift Available']},
	# {'Employee Type': emp_type, 'Shift Type': 'Overtime', 'Available': item['Overtime Available']}
	# ])

	# chart_df2 = pd.DataFrame(availability_chart_data)
	# fig3 = px.bar(chart_df2, x='Employee Type', y='Available', color='Shift Type',
	# title='Available Workers by Employee Type and Shift',
	# barmode='group')
	# st.plotly_chart(fig3, use_container_width=True)
	def display_demand(optimizer):
	with st.expander("📊 Demand", expanded=False):
	demand_df = optimizer.orders_df
	st.header("Demand")
	daily_demand = demand_df.groupby('date_of_order').sum()['order_amount'].reset_index()
	st.plotly_chart(px.bar(daily_demand, x='date_of_order', y='order_amount', title='Demand by Date'), use_container_width=True)
	st.markdown("### Demand for the selected date")
	st.dataframe(demand_df[demand_df['date_of_order']==st.session_state.target_date], use_container_width=True)

	def display_employee_availability(optimizer):
	with st.expander("👥 Employee Availability", expanded=False):
	st.header("Employee Availability")
	employee_availability_df = optimizer.employee_availability_df
	employee_availability_df['date'] = pd.to_datetime(employee_availability_df['date'])
	employee_availability_target_date = employee_availability_df[employee_availability_df['date']==st.session_state.target_date]
	employee_availability_target_date = pd.merge(employee_availability_target_date, optimizer.employees_df, left_on='employee_id', right_on='id', how='left')
	st.dataframe(employee_availability_target_date[['name', 'employee_id', 'type_name', 'first_shift_available', 'second_shift_available', 'overtime_available']], use_container_width=True)
	# Group by type_name and sum the availability columns
	available_employee_grouped = employee_availability_target_date.groupby('type_name')[
	['first_shift_available', 'second_shift_available', 'overtime_available']
	].sum().reset_index()

	st.markdown("### Employee Availability for the selected date")
	# Create non-stacked (grouped) bar chart using plotly
	fig = px.bar(
	available_employee_grouped.melt(id_vars=['type_name'], var_name='shift_type', value_name='count'),
	x='type_name',
	y='count',
	color='shift_type',
	barmode='group', # This makes it non-stacked
	title='Available Employee Count by Type and Shift',
	labels={'type_name': 'Employee Type', 'count': 'Available Count', 'shift_type': 'Shift Type'}
	)
	st.plotly_chart(fig, use_container_width=True)

	# st.dataframe(employee_availability_target_date, use_container_width=True)
	# st.plotly_chart(px.bar(employee_availability_target_date, x='employee_id', y='availability', title='Employee Availability by Date'), use_container_width=True)

	# st.dataframe(employee_availability_df[employee_availability_df['date']==st.session_state.target_date], use_container_width=True)



	def main():
	"""Main function for metadata page"""
	st.set_page_config(page_title="Dataset Metadata", layout="wide")
	st.title("📊 Dataset Metadata Overview")

	# Get data_path from session state if available, otherwise create input
	if 'data_path' in st.session_state:
	# Using shared data_path from optimize_viz.py
	data_path = st.session_state.data_path

	st.sidebar.info(f"📁 Using shared data path: `{data_path}`")
	else:
	st.error("No data path found. Please select a data path in the sidebar.")


	if 'target_date' in st.session_state:
	target_date = st.session_state.target_date
	st.sidebar.info(f"📅 Using shared target date: `{target_date}`")
	else:
	st.error("No target date found. Please select a target date in the sidebar.")

	#If the date selection needs to be individualized per page, uncomment the following code
	# with st.sidebar:
	# # Date selection
	# available_dates = get_available_dates(data_path)
	# if available_dates:
	# date_options = [""] + [str(date) for date in available_dates]
	# target_date = st.selectbox(
	# "Target Date (select empty for latest date)",
	# options=date_options,
	# index=0,
	# )
	# else:
	# target_date = st.text_input(
	# "Target Date (YYYY-MM-DD, leave empty for latest)"
	# )

	try:
	optimizer = LaborOptimizer(data_path)

	# Show warning if no target date is selected
	if not target_date:
	st.info("💡 Tip: Select a specific target date from the sidebar to see accurate availability data for that date. Currently showing data for the most recent order date.")

	metadata = get_metadata_stats(optimizer, target_date if target_date else None)
	display_metadata_section(metadata)
	display_demand(optimizer)
	display_employee_availability(optimizer)
	except Exception as e:
	st.error(f"Error loading metadata: {str(e)}")


	if __name__ == "__main__":
	main()