# auto_metrics.py import pandas as pd import numpy as np from typing import Dict, List, Any, Tuple def build_data_findings_markdown(data_registry, mapping) -> Tuple[str, List[str]]: """Build markdown summary of data findings with healthcare-specific metrics.""" findings = [] missing_keys = [] # Facility distribution findings if "facility_distribution" in mapping.resolved: facility_file = mapping.resolved["facility_distribution"] df = data_registry.get(facility_file) if df is not None: findings.append("### Facility Distribution Findings") # Total facilities total_facilities = len(df) findings.append(f"- Total healthcare facilities: {total_facilities}") # Facility type breakdown if 'facility_type' in df.columns: type_counts = df['facility_type'].value_counts() findings.append("- Facility type distribution:") for ftype, count in type_counts.items(): findings.append(f" - {ftype}: {count}") # Geographic distribution if 'city' in df.columns: top_cities = df['city'].value_counts().head(5) findings.append("- Top 5 cities by facility count:") for city, count in top_cities.items(): findings.append(f" - {city}: {count}") else: missing_keys.append("facility_distribution") # Bed capacity findings if "bed_capacity" in mapping.resolved: bed_file = mapping.resolved["bed_capacity"] df = data_registry.get(bed_file) if df is not None: findings.append("### Bed Capacity Findings") # Total beds if 'beds_current' in df.columns: total_current = df['beds_current'].sum() total_prev = df['beds_prev'].sum() total_change = total_current - total_prev total_pct = (total_change / total_prev) * 100 if total_prev > 0 else 0 findings.append(f"- Total staffed beds (current): {total_current}") findings.append(f"- Total staffed beds (previous): {total_prev}") findings.append(f"- Overall change: {total_change} ({total_pct:.1f}%)") # Zone-level analysis if 'zone' in df.columns: zone_summary = df.groupby('zone').agg({ 'beds_current': 'sum', 'beds_prev': 'sum' }).reset_index() zone_summary['change'] = zone_summary['beds_current'] - zone_summary['beds_prev'] zone_summary['percent_change'] = (zone_summary['change'] / zone_summary['beds_prev']) * 100 findings.append("- Zone-level bed capacity:") for _, row in zone_summary.iterrows(): findings.append(f" - {row['zone']}: {row['beds_current']} beds ({row['percent_change']:.1f}% change)") # Identify worst-performing zone worst_zone = zone_summary.loc[zone_summary['percent_change'].idxmin()] findings.append(f"- Largest percentage decrease: {worst_zone['zone']} ({worst_zone['percent_change']:.1f}%)") else: missing_keys.append("bed_capacity") # Long-term care findings if "long_term_care" in mapping.resolved: findings.append("### Long-Term Care Findings") findings.append("- Long-term care capacity analysis requires facility distribution data") else: missing_keys.append("long_term_care") return "\n".join(findings), missing_keys else: md = "### Healthcare Data Analysis Results\n\nNo analyzable healthcare patterns found in the provided data. Consider uploading data with healthcare facility, service, or outcome metrics." return md, missing