# auto_metrics.py
import pandas as pd
import numpy as np
from typing import Dict, List, Any, Tuple

def build_data_findings_markdown(data_registry, mapping) -> Tuple[str, List[str]]:
    """Build markdown summary of data findings with healthcare-specific metrics."""
    findings = []
    missing_keys = []
    
    # Facility distribution findings
    if "facility_distribution" in mapping.resolved:
        facility_file = mapping.resolved["facility_distribution"]
        df = data_registry.get(facility_file)
        
        if df is not None:
            findings.append("### Facility Distribution Findings")
            
            # Total facilities
            total_facilities = len(df)
            findings.append(f"- Total healthcare facilities: {total_facilities}")
            
            # Facility type breakdown
            if 'facility_type' in df.columns:
                type_counts = df['facility_type'].value_counts()
                findings.append("- Facility type distribution:")
                for ftype, count in type_counts.items():
                    findings.append(f"  - {ftype}: {count}")
            
            # Geographic distribution
            if 'city' in df.columns:
                top_cities = df['city'].value_counts().head(5)
                findings.append("- Top 5 cities by facility count:")
                for city, count in top_cities.items():
                    findings.append(f"  - {city}: {count}")
    else:
        missing_keys.append("facility_distribution")
    
    # Bed capacity findings
    if "bed_capacity" in mapping.resolved:
        bed_file = mapping.resolved["bed_capacity"]
        df = data_registry.get(bed_file)
        
        if df is not None:
            findings.append("### Bed Capacity Findings")
            
            # Total beds
            if 'beds_current' in df.columns:
                total_current = df['beds_current'].sum()
                total_prev = df['beds_prev'].sum()
                total_change = total_current - total_prev
                total_pct = (total_change / total_prev) * 100 if total_prev > 0 else 0
                
                findings.append(f"- Total staffed beds (current): {total_current}")
                findings.append(f"- Total staffed beds (previous): {total_prev}")
                findings.append(f"- Overall change: {total_change} ({total_pct:.1f}%)")
            
            # Zone-level analysis
            if 'zone' in df.columns:
                zone_summary = df.groupby('zone').agg({
                    'beds_current': 'sum',
                    'beds_prev': 'sum'
                }).reset_index()
                
                zone_summary['change'] = zone_summary['beds_current'] - zone_summary['beds_prev']
                zone_summary['percent_change'] = (zone_summary['change'] / zone_summary['beds_prev']) * 100
                
                findings.append("- Zone-level bed capacity:")
                for _, row in zone_summary.iterrows():
                    findings.append(f"  - {row['zone']}: {row['beds_current']} beds ({row['percent_change']:.1f}% change)")
                
                # Identify worst-performing zone
                worst_zone = zone_summary.loc[zone_summary['percent_change'].idxmin()]
                findings.append(f"- Largest percentage decrease: {worst_zone['zone']} ({worst_zone['percent_change']:.1f}%)")
    else:
        missing_keys.append("bed_capacity")
    
    # Long-term care findings
    if "long_term_care" in mapping.resolved:
        findings.append("### Long-Term Care Findings")
        findings.append("- Long-term care capacity analysis requires facility distribution data")
    else:
        missing_keys.append("long_term_care")
    
    return "\n".join(findings), missing_keys
    else:
        md = "### Healthcare Data Analysis Results\n\nNo analyzable healthcare patterns found in the provided data. Consider uploading data with healthcare facility, service, or outcome metrics."
    
    return md, missing