Spaces:
Sleeping
Sleeping
| # auto_metrics.py | |
| import pandas as pd | |
| import numpy as np | |
| from typing import Dict, List, Any, Tuple | |
| def build_data_findings_markdown(data_registry, mapping) -> Tuple[str, List[str]]: | |
| """Build markdown summary of data findings with healthcare-specific metrics.""" | |
| findings = [] | |
| missing_keys = [] | |
| # Facility distribution findings | |
| if "facility_distribution" in mapping.resolved: | |
| facility_file = mapping.resolved["facility_distribution"] | |
| df = data_registry.get(facility_file) | |
| if df is not None: | |
| findings.append("### Facility Distribution Findings") | |
| # Total facilities | |
| total_facilities = len(df) | |
| findings.append(f"- Total healthcare facilities: {total_facilities}") | |
| # Facility type breakdown | |
| if 'facility_type' in df.columns: | |
| type_counts = df['facility_type'].value_counts() | |
| findings.append("- Facility type distribution:") | |
| for ftype, count in type_counts.items(): | |
| findings.append(f" - {ftype}: {count}") | |
| # Geographic distribution | |
| if 'city' in df.columns: | |
| top_cities = df['city'].value_counts().head(5) | |
| findings.append("- Top 5 cities by facility count:") | |
| for city, count in top_cities.items(): | |
| findings.append(f" - {city}: {count}") | |
| else: | |
| missing_keys.append("facility_distribution") | |
| # Bed capacity findings | |
| if "bed_capacity" in mapping.resolved: | |
| bed_file = mapping.resolved["bed_capacity"] | |
| df = data_registry.get(bed_file) | |
| if df is not None: | |
| findings.append("### Bed Capacity Findings") | |
| # Total beds | |
| if 'beds_current' in df.columns: | |
| total_current = df['beds_current'].sum() | |
| total_prev = df['beds_prev'].sum() | |
| total_change = total_current - total_prev | |
| total_pct = (total_change / total_prev) * 100 if total_prev > 0 else 0 | |
| findings.append(f"- Total staffed beds (current): {total_current}") | |
| findings.append(f"- Total staffed beds (previous): {total_prev}") | |
| findings.append(f"- Overall change: {total_change} ({total_pct:.1f}%)") | |
| # Zone-level analysis | |
| if 'zone' in df.columns: | |
| zone_summary = df.groupby('zone').agg({ | |
| 'beds_current': 'sum', | |
| 'beds_prev': 'sum' | |
| }).reset_index() | |
| zone_summary['change'] = zone_summary['beds_current'] - zone_summary['beds_prev'] | |
| zone_summary['percent_change'] = (zone_summary['change'] / zone_summary['beds_prev']) * 100 | |
| findings.append("- Zone-level bed capacity:") | |
| for _, row in zone_summary.iterrows(): | |
| findings.append(f" - {row['zone']}: {row['beds_current']} beds ({row['percent_change']:.1f}% change)") | |
| # Identify worst-performing zone | |
| worst_zone = zone_summary.loc[zone_summary['percent_change'].idxmin()] | |
| findings.append(f"- Largest percentage decrease: {worst_zone['zone']} ({worst_zone['percent_change']:.1f}%)") | |
| else: | |
| missing_keys.append("bed_capacity") | |
| # Long-term care findings | |
| if "long_term_care" in mapping.resolved: | |
| findings.append("### Long-Term Care Findings") | |
| findings.append("- Long-term care capacity analysis requires facility distribution data") | |
| else: | |
| missing_keys.append("long_term_care") | |
| return "\n".join(findings), missing_keys | |
| else: | |
| md = "### Healthcare Data Analysis Results\n\nNo analyzable healthcare patterns found in the provided data. Consider uploading data with healthcare facility, service, or outcome metrics." | |
| return md, missing |