Rajan Sharma commited on
Commit
ed780b7
·
verified ·
1 Parent(s): 49f10c8

Create auto_metrics.py

Browse files
Files changed (1) hide show
  1. auto_metrics.py +145 -0
auto_metrics.py ADDED
@@ -0,0 +1,145 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+ from typing import Dict, Any, Tuple, Optional, List
3
+ import pandas as pd
4
+ import numpy as np
5
+ from data_registry import DataRegistry
6
+ from schema_mapper import MappingResult
7
+
8
+ def _get(reg: DataRegistry, mapping: MappingResult, concept: str) -> Tuple[Optional[pd.DataFrame], Optional[str]]:
9
+ if concept not in mapping.resolved:
10
+ return None, None
11
+ tname, col = mapping.resolved[concept]
12
+ return reg.get(tname), col
13
+
14
+ def _fmt_tbl(df: pd.DataFrame, max_rows: int = 20) -> str:
15
+ if df is None or df.empty:
16
+ return "_<empty table>_"
17
+ df2 = df.copy()
18
+ if len(df2) > max_rows:
19
+ df2 = df2.head(max_rows)
20
+ return df2.to_markdown(index=False)
21
+
22
+ def compute_facility_wait_ranks(reg: DataRegistry, mapping: MappingResult) -> Optional[pd.DataFrame]:
23
+ df_fac, col_fac = _get(reg, mapping, "facility")
24
+ if df_fac is None or col_fac is None:
25
+ return None
26
+ wait_col = None
27
+ for key in ("wait_median", "wait_days", "wait_p90"):
28
+ dfw, colw = _get(reg, mapping, key)
29
+ if dfw is not None and colw is not None and dfw is df_fac:
30
+ wait_col = colw
31
+ break
32
+ if wait_col is None:
33
+ return None
34
+ g = df_fac.groupby(col_fac, dropna=True)[wait_col].apply(pd.to_numeric, errors="coerce").mean().reset_index()
35
+ g = g.rename(columns={wait_col: "avg_wait"})
36
+ g = g.sort_values("avg_wait", ascending=False)
37
+ g["rank"] = np.arange(1, len(g) + 1)
38
+ return g[[col_fac, "avg_wait", "rank"]]
39
+
40
+ def compute_specialty_wait_ranks(reg: DataRegistry, mapping: MappingResult) -> Optional[pd.DataFrame]:
41
+ df, col_spec = _get(reg, mapping, "specialty")
42
+ if df is None or col_spec is None:
43
+ return None
44
+ wait_col = None
45
+ for key in ("wait_median", "wait_days", "wait_p90"):
46
+ dfw, colw = _get(reg, mapping, key)
47
+ if dfw is not None and colw is not None and dfw is df:
48
+ wait_col = colw
49
+ break
50
+ if wait_col is None:
51
+ return None
52
+ g = df.groupby(col_spec, dropna=True)[wait_col].apply(pd.to_numeric, errors="coerce").mean().reset_index()
53
+ g = g.rename(columns={wait_col: "avg_wait"})
54
+ g = g.sort_values("avg_wait", ascending=False)
55
+ g["rank"] = np.arange(1, len(g) + 1)
56
+ return g[[col_spec, "avg_wait", "rank"]]
57
+
58
+ def compute_zone_comparison(reg: DataRegistry, mapping: MappingResult) -> Optional[pd.DataFrame]:
59
+ df, col_zone = _get(reg, mapping, "zone")
60
+ if df is None or col_zone is None:
61
+ return None
62
+ wait_col = None
63
+ for key in ("wait_median", "wait_days", "wait_p90"):
64
+ dfw, colw = _get(reg, mapping, key)
65
+ if dfw is not None and colw is not None and dfw is df:
66
+ wait_col = colw
67
+ break
68
+ if wait_col is None:
69
+ return None
70
+ g = df.groupby(col_zone, dropna=True)[wait_col].apply(pd.to_numeric, errors="coerce").mean().reset_index()
71
+ g = g.rename(columns={wait_col: "avg_wait"})
72
+ g = g.sort_values("avg_wait", ascending=False)
73
+ return g[[col_zone, "avg_wait"]]
74
+
75
+ def compute_capacity_snapshot(reg: DataRegistry, mapping: MappingResult) -> Optional[pd.DataFrame]:
76
+ df, col_beds = _get(reg, mapping, "capacity_beds")
77
+ if df is None or col_beds is None:
78
+ return None
79
+ s = pd.to_numeric(df[col_beds], errors="coerce")
80
+ out = pd.DataFrame({
81
+ "metric": ["staffed_beds_total", "staffed_beds_mean"],
82
+ "value": [int(np.nansum(s)), float(np.nanmean(s))]
83
+ })
84
+ return out
85
+
86
+ def compute_costs_example(reg: DataRegistry, mapping: MappingResult, n_clients: int = 1200) -> Optional[pd.DataFrame]:
87
+ dfF, colF = _get(reg, mapping, "cost_fixed")
88
+ dfV, colV = _get(reg, mapping, "cost_variable")
89
+ if colV is None and colF is None:
90
+ return None
91
+ fixed = float(pd.to_numeric(dfF[colF], errors="coerce").sum()) if (dfF is not None and colF is not None) else 0.0
92
+ var = float(pd.to_numeric(dfV[colV], errors="coerce").mean()) if (dfV is not None and colV is not None) else np.nan
93
+ total = fixed + (var * n_clients if np.isfinite(var) else np.nan)
94
+ return pd.DataFrame({
95
+ "component": ["fixed_total", "variable_per_client", f"program_total_for_{n_clients}"],
96
+ "value": [fixed, var, total]
97
+ })
98
+
99
+ def build_data_findings_markdown(reg: DataRegistry, mapping: MappingResult, topn: int = 5):
100
+ missing: List[str] = []
101
+
102
+ fac = compute_facility_wait_ranks(reg, mapping)
103
+ if fac is None or fac.empty:
104
+ missing.append("facility_wait_ranks")
105
+ fac_md = "_Not available (need facility + wait columns in the same table)._"
106
+ else:
107
+ fac_md = fac.head(topn).to_markdown(index=False)
108
+
109
+ spec = compute_specialty_wait_ranks(reg, mapping)
110
+ if spec is None or spec.empty:
111
+ missing.append("specialty_wait_ranks")
112
+ spec_md = "_Not available (need specialty + wait columns in the same table)._"
113
+ else:
114
+ spec_md = spec.head(topn).to_markdown(index=False)
115
+
116
+ zone = compute_zone_comparison(reg, mapping)
117
+ if zone is None or zone.empty:
118
+ missing.append("zone_wait_comparison")
119
+ zone_md = "_Not available (need zone + wait columns)._"
120
+ else:
121
+ zone_md = zone.to_markdown(index=False)
122
+
123
+ cap = compute_capacity_snapshot(reg, mapping)
124
+ if cap is None or cap.empty:
125
+ missing.append("capacity_snapshot")
126
+ cap_md = "_Not available (need staffed beds column)._"
127
+ else:
128
+ cap_md = cap.to_markdown(index=False)
129
+
130
+ costs = compute_costs_example(reg, mapping, n_clients=1200)
131
+ if costs is None or costs.empty:
132
+ missing.append("costs")
133
+ costs_md = "_Not available (need fixed/variable costs)._"
134
+ else:
135
+ costs_md = costs.to_markdown(index=False)
136
+
137
+ md = (
138
+ "### Data-Derived Findings (computed in Python)\n\n"
139
+ "**Top Facilities by Avg Wait**\n\n" + fac_md + "\n\n"
140
+ "**Top Specialties by Avg Wait**\n\n" + spec_md + "\n\n"
141
+ "**Zone Comparison (Avg Wait)**\n\n" + zone_md + "\n\n"
142
+ "**Capacity Snapshot**\n\n" + cap_md + "\n\n"
143
+ "**Cost Illustration (for 1,200 clients)**\n\n" + costs_md + "\n"
144
+ )
145
+ return md, missing