Spaces:

VEDAGI1
/

Medica_DecisionSupportAI

Sleeping

App Files Files Community

Rajan Sharma commited on Sep 19

Commit

ed780b7

verified ·

1 Parent(s): 49f10c8

Create auto_metrics.py

Browse files

Files changed (1) hide show

auto_metrics.py +145 -0

auto_metrics.py ADDED Viewed

	@@ -0,0 +1,145 @@

+from __future__ import annotations
+from typing import Dict, Any, Tuple, Optional, List
+import pandas as pd
+import numpy as np
+from data_registry import DataRegistry
+from schema_mapper import MappingResult
+def _get(reg: DataRegistry, mapping: MappingResult, concept: str) -> Tuple[Optional[pd.DataFrame], Optional[str]]:
+    if concept not in mapping.resolved:
+        return None, None
+    tname, col = mapping.resolved[concept]
+    return reg.get(tname), col
+def _fmt_tbl(df: pd.DataFrame, max_rows: int = 20) -> str:
+    if df is None or df.empty:
+        return "_<empty table>_"
+    df2 = df.copy()
+    if len(df2) > max_rows:
+        df2 = df2.head(max_rows)
+    return df2.to_markdown(index=False)
+def compute_facility_wait_ranks(reg: DataRegistry, mapping: MappingResult) -> Optional[pd.DataFrame]:
+    df_fac, col_fac = _get(reg, mapping, "facility")
+    if df_fac is None or col_fac is None:
+        return None
+    wait_col = None
+    for key in ("wait_median", "wait_days", "wait_p90"):
+        dfw, colw = _get(reg, mapping, key)
+        if dfw is not None and colw is not None and dfw is df_fac:
+            wait_col = colw
+            break
+    if wait_col is None:
+        return None
+    g = df_fac.groupby(col_fac, dropna=True)[wait_col].apply(pd.to_numeric, errors="coerce").mean().reset_index()
+    g = g.rename(columns={wait_col: "avg_wait"})
+    g = g.sort_values("avg_wait", ascending=False)
+    g["rank"] = np.arange(1, len(g) + 1)
+    return g[[col_fac, "avg_wait", "rank"]]
+def compute_specialty_wait_ranks(reg: DataRegistry, mapping: MappingResult) -> Optional[pd.DataFrame]:
+    df, col_spec = _get(reg, mapping, "specialty")
+    if df is None or col_spec is None:
+        return None
+    wait_col = None
+    for key in ("wait_median", "wait_days", "wait_p90"):
+        dfw, colw = _get(reg, mapping, key)
+        if dfw is not None and colw is not None and dfw is df:
+            wait_col = colw
+            break
+    if wait_col is None:
+        return None
+    g = df.groupby(col_spec, dropna=True)[wait_col].apply(pd.to_numeric, errors="coerce").mean().reset_index()
+    g = g.rename(columns={wait_col: "avg_wait"})
+    g = g.sort_values("avg_wait", ascending=False)
+    g["rank"] = np.arange(1, len(g) + 1)
+    return g[[col_spec, "avg_wait", "rank"]]
+def compute_zone_comparison(reg: DataRegistry, mapping: MappingResult) -> Optional[pd.DataFrame]:
+    df, col_zone = _get(reg, mapping, "zone")
+    if df is None or col_zone is None:
+        return None
+    wait_col = None
+    for key in ("wait_median", "wait_days", "wait_p90"):
+        dfw, colw = _get(reg, mapping, key)
+        if dfw is not None and colw is not None and dfw is df:
+            wait_col = colw
+            break
+    if wait_col is None:
+        return None
+    g = df.groupby(col_zone, dropna=True)[wait_col].apply(pd.to_numeric, errors="coerce").mean().reset_index()
+    g = g.rename(columns={wait_col: "avg_wait"})
+    g = g.sort_values("avg_wait", ascending=False)
+    return g[[col_zone, "avg_wait"]]
+def compute_capacity_snapshot(reg: DataRegistry, mapping: MappingResult) -> Optional[pd.DataFrame]:
+    df, col_beds = _get(reg, mapping, "capacity_beds")
+    if df is None or col_beds is None:
+        return None
+    s = pd.to_numeric(df[col_beds], errors="coerce")
+    out = pd.DataFrame({
+        "metric": ["staffed_beds_total", "staffed_beds_mean"],
+        "value": [int(np.nansum(s)), float(np.nanmean(s))]
+    })
+    return out
+def compute_costs_example(reg: DataRegistry, mapping: MappingResult, n_clients: int = 1200) -> Optional[pd.DataFrame]:
+    dfF, colF = _get(reg, mapping, "cost_fixed")
+    dfV, colV = _get(reg, mapping, "cost_variable")
+    if colV is None and colF is None:
+        return None
+    fixed = float(pd.to_numeric(dfF[colF], errors="coerce").sum()) if (dfF is not None and colF is not None) else 0.0
+    var = float(pd.to_numeric(dfV[colV], errors="coerce").mean()) if (dfV is not None and colV is not None) else np.nan
+    total = fixed + (var * n_clients if np.isfinite(var) else np.nan)
+    return pd.DataFrame({
+        "component": ["fixed_total", "variable_per_client", f"program_total_for_{n_clients}"],
+        "value": [fixed, var, total]
+    })
+def build_data_findings_markdown(reg: DataRegistry, mapping: MappingResult, topn: int = 5):
+    missing: List[str] = []
+    fac = compute_facility_wait_ranks(reg, mapping)
+    if fac is None or fac.empty:
+        missing.append("facility_wait_ranks")
+        fac_md = "_Not available (need facility + wait columns in the same table)._"
+    else:
+        fac_md = fac.head(topn).to_markdown(index=False)
+    spec = compute_specialty_wait_ranks(reg, mapping)
+    if spec is None or spec.empty:
+        missing.append("specialty_wait_ranks")
+        spec_md = "_Not available (need specialty + wait columns in the same table)._"
+    else:
+        spec_md = spec.head(topn).to_markdown(index=False)
+    zone = compute_zone_comparison(reg, mapping)
+    if zone is None or zone.empty:
+        missing.append("zone_wait_comparison")
+        zone_md = "_Not available (need zone + wait columns)._"
+    else:
+        zone_md = zone.to_markdown(index=False)
+    cap = compute_capacity_snapshot(reg, mapping)
+    if cap is None or cap.empty:
+        missing.append("capacity_snapshot")
+        cap_md = "_Not available (need staffed beds column)._"
+    else:
+        cap_md = cap.to_markdown(index=False)
+    costs = compute_costs_example(reg, mapping, n_clients=1200)
+    if costs is None or costs.empty:
+        missing.append("costs")
+        costs_md = "_Not available (need fixed/variable costs)._"
+    else:
+        costs_md = costs.to_markdown(index=False)
+    md = (
+        "### Data-Derived Findings (computed in Python)\n\n"
+        "**Top Facilities by Avg Wait**\n\n" + fac_md + "\n\n"
+        "**Top Specialties by Avg Wait**\n\n" + spec_md + "\n\n"
+        "**Zone Comparison (Avg Wait)**\n\n" + zone_md + "\n\n"
+        "**Capacity Snapshot**\n\n" + cap_md + "\n\n"
+        "**Cost Illustration (for 1,200 clients)**\n\n" + costs_md + "\n"
+    )
+    return md, missing