Spaces:

VEDAGI1
/

Medica_DecisionSupportAI

Sleeping

Medica_DecisionSupportAI / narrative_safetynet.py

Rajan Sharma

Update narrative_safetynet.py

5651d3e verified 3 months ago

4.3 kB

	# narrative_safetynet.py
	from __future__ import annotations
	from typing import Dict, Any, List, Optional, Tuple
	import re
	import math
	import numpy as np
	import pandas as pd

	# -------------------- helpers: dtype / formatting --------------------

	_DEF_MIN_SAMPLE = 5 # generic caution threshold for group sizes

	_HINT_METRICS_DEFAULT = [
	"surgery_median", "consult_median",
	"surgery_90th", "consult_90th",
	"surgery", "consult",
	"wait", "median", "p90", "90th"
	]

	_HINT_GROUPS_DEFAULT = [
	"facility", "specialty", "zone",
	"hospital", "city", "region"
	]

	_BAD_METRIC_NAMES = ["index", "id", "row", "unnamed"]

	def _nanlike_to_nan(df: pd.DataFrame) -> pd.DataFrame:
	dff = df.copy()
	for c in dff.columns:
	if dff[c].dtype == "object":
	dff[c] = dff[c].replace({r"^\s*$": np.nan, r"^[-–—]$": np.nan}, regex=True)
	return dff

	def _is_numeric_series(s: pd.Series) -> bool:
	try:
	return pd.api.types.is_numeric_dtype(s)
	except Exception:
	return False

	def _to_numeric(s: pd.Series) -> pd.Series:
	return pd.to_numeric(s, errors="coerce")

	def _fmt_num(x: Any, decimals: int = 1) -> str:
	try:
	if x is None or (isinstance(x, float) and math.isnan(x)):
	return "n/a"
	if isinstance(x, (int, np.integer)) or (isinstance(x, float) and float(x).is_integer()):
	return f"{int(round(float(x))):,}"
	return f"{float(x):,.{decimals}f}"
	except Exception:
	return str(x)

	# -------------------- metric & dataset selection (dynamic) --------------------

	def _score_metric_name(col: str, hints: List[str]) -> int:
	name = (col or "").lower()
	if any(bad in name for bad in _BAD_METRIC_NAMES):
	return -10**6 # disqualify obvious counters/ids
	score = 0
	for h in hints:
	if h in name:
	score += 3
	return score

	def _choose_df_and_metric(
	datasets: Dict[str, Any],
	metric_hints: List[str]
	) -> Optional[Tuple[str, pd.DataFrame, str]]:
	"""
	Sweep all dataframes & numeric columns. Pick the (df, metric) with best score:
	+3 per hint match; +1 if non-constant numeric. Disqualify id-like names.
	"""
	best: Optional[Tuple[int, str, pd.DataFrame, str]] = None
	for key, v in datasets.items():
	if not isinstance(v, pd.DataFrame) or v.empty:
	continue
	df = _nanlike_to_nan(v)
	for col in df.columns:
	col_num = _to_numeric(df[col])
	if not _is_numeric_series(col_num):
	continue
	s = _score_metric_name(col, metric_hints)
	if col_num.nunique(dropna=True) > 1:
	s += 1
	if best is None or s > best[0]:
	best = (s, key, df, col)
	if best is None:
	return None
	_, key, df, metric = best
	return key, df, metric

	# -------------------- grouping detection (dynamic) --------------------

	def _find_group_col(df: pd.DataFrame, candidates: List[str], avoid: Optional[List[str]] = None) -> Optional[str]:
	avoid = [a.lower() for a in (avoid or [])]
	cols = list(df.columns)
	# prefer name matches
	for cand in candidates:
	for c in cols:
	cname = c.lower()
	if cand.lower() in cname and all(a not in cname for a in avoid):
	return c
	# fallback: a categorical with reasonable cardinality
	obj_cols = [c for c in cols if df[c].dtype == "object"]
	for c in obj_cols:
	nuniq = df[c].nunique(dropna=True)
	if 1 < nuniq < max(50, len(df)//10):
	return c
	return None

	# -------------------- labels & cautions --------------------

	def _label_vs_baseline(x: float, mu: float, band: float = 0.05) -> str:
	if pd.isna(x) or pd.isna(mu) or mu == 0:
	return "unknown"
	rel = (x - mu) / mu
	if rel > band:
	return "higher than average"
	if rel < -band:
	return "lower than average"
	return "about average"

	def _small_sample_note(n: int, min_n: int = _DEF_MIN_SAMPLE) -> Optional[str]:
	return f"Interpret averages cautiously (only {n} records)." if n < min_n else None

	def _pluralize(word: str, n: int) -> str:
	return f"{word}{'' if n == 1 else 's'}"

	# -------------------- geo join (Top-5 only) --------------------

	def _canon(s: str) -> s_