Spaces:
Sleeping
Sleeping
Rajan Sharma
commited on
Update narrative_safetynet.py
Browse files- narrative_safetynet.py +93 -222
narrative_safetynet.py
CHANGED
|
@@ -1,262 +1,133 @@
|
|
| 1 |
# narrative_safetynet.py
|
| 2 |
from __future__ import annotations
|
| 3 |
-
from typing import Dict, Any, List, Optional
|
|
|
|
| 4 |
import math
|
| 5 |
import numpy as np
|
| 6 |
import pandas as pd
|
| 7 |
-
import re
|
| 8 |
|
| 9 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
|
| 11 |
-
|
| 12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
|
| 14 |
def _fmt_num(x: Any, decimals: int = 1) -> str:
|
| 15 |
try:
|
| 16 |
if x is None or (isinstance(x, float) and math.isnan(x)):
|
| 17 |
return "n/a"
|
| 18 |
-
if isinstance(x, (int, np.integer)):
|
| 19 |
-
return f"{x:,}"
|
| 20 |
return f"{float(x):,.{decimals}f}"
|
| 21 |
except Exception:
|
| 22 |
return str(x)
|
| 23 |
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
for h in hints:
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
for c in cols:
|
| 32 |
-
if _is_numeric(df[c]):
|
| 33 |
-
return c
|
| 34 |
-
return None
|
| 35 |
|
| 36 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
cols = list(df.columns)
|
|
|
|
| 38 |
for cand in candidates:
|
| 39 |
for c in cols:
|
| 40 |
-
|
|
|
|
| 41 |
return c
|
| 42 |
-
# fallback:
|
| 43 |
obj_cols = [c for c in cols if df[c].dtype == "object"]
|
| 44 |
for c in obj_cols:
|
| 45 |
nuniq = df[c].nunique(dropna=True)
|
| 46 |
-
if 1 < nuniq < max(50, len(df)
|
| 47 |
return c
|
| 48 |
return None
|
| 49 |
|
| 50 |
-
|
| 51 |
-
dff = df.copy()
|
| 52 |
-
for c in dff.columns:
|
| 53 |
-
if dff[c].dtype == "object":
|
| 54 |
-
dff[c] = dff[c].replace({r"^\s*$": np.nan, r"^[-–—]$": np.nan}, regex=True)
|
| 55 |
-
return dff
|
| 56 |
-
|
| 57 |
-
def _small_sample_note(n: int, min_n: int = _DEF_MIN_SAMPLE) -> Optional[str]:
|
| 58 |
-
return f"Interpret averages cautiously (only {n} records)." if n < min_n else None
|
| 59 |
|
| 60 |
-
def
|
| 61 |
-
if
|
| 62 |
return "unknown"
|
| 63 |
rel = (x - mu) / mu
|
| 64 |
-
if rel >
|
| 65 |
return "higher than average"
|
| 66 |
-
if rel < -
|
| 67 |
return "lower than average"
|
| 68 |
-
if abs(rel) <= max(tol, 0.05):
|
| 69 |
-
return "about average"
|
| 70 |
return "about average"
|
| 71 |
|
| 72 |
-
def
|
| 73 |
-
return f"{
|
| 74 |
-
|
| 75 |
-
def build_narrative(
|
| 76 |
-
scenario_text: str,
|
| 77 |
-
datasets: Dict[str, Any],
|
| 78 |
-
structured_tables: Optional[Dict[str, pd.DataFrame]] = None,
|
| 79 |
-
metric_hints: Optional[List[str]] = None,
|
| 80 |
-
group_hints: Optional[List[str]] = None,
|
| 81 |
-
min_sample: int = _DEF_MIN_SAMPLE
|
| 82 |
-
) -> str:
|
| 83 |
-
"""
|
| 84 |
-
Scenario-agnostic narrative fallback:
|
| 85 |
-
- Picks numeric metric & groupings dynamically
|
| 86 |
-
- Computes overall baseline + deviations
|
| 87 |
-
- Warns on small samples
|
| 88 |
-
- Optional geographic notes if city/lat/lon exist
|
| 89 |
-
"""
|
| 90 |
-
metric_hints = metric_hints or ["surgery_median", "consult_median", "wait", "median", "p90", "90th"]
|
| 91 |
-
group_hints = group_hints or ["facility", "specialty", "zone", "hospital", "city", "region"]
|
| 92 |
-
|
| 93 |
-
# 1) choose first non-empty table-like dataset
|
| 94 |
-
df = None
|
| 95 |
-
df_key = None
|
| 96 |
-
for k, v in datasets.items():
|
| 97 |
-
if isinstance(v, pd.DataFrame) and not v.empty:
|
| 98 |
-
df = _nanlike_to_nan(v)
|
| 99 |
-
df_key = k
|
| 100 |
-
break
|
| 101 |
-
if df is None:
|
| 102 |
-
return "No tabular data available. Unable to generate a narrative."
|
| 103 |
-
|
| 104 |
-
# 2) metrics
|
| 105 |
-
primary_metric = _pick_numeric(df, metric_hints) # e.g., Surgery_Median
|
| 106 |
-
if not primary_metric:
|
| 107 |
-
return "No numeric metric found to summarize; please ensure at least one numeric wait-time column is present."
|
| 108 |
-
|
| 109 |
-
other_numeric = [c for c in df.columns if _is_numeric(df[c]) and c != primary_metric]
|
| 110 |
-
comparator_metric = next(
|
| 111 |
-
(c for c in other_numeric if any(h in c.lower() for h in ["consult", "wait", "median", "p90", "90th"])),
|
| 112 |
-
None
|
| 113 |
-
)
|
| 114 |
-
|
| 115 |
-
# 3) groups
|
| 116 |
-
group1 = _find_group_col(df, group_hints) # e.g., Facility
|
| 117 |
-
group2 = None
|
| 118 |
-
if group1:
|
| 119 |
-
alt_hints = [h for h in group_hints if h.lower() not in group1.lower()]
|
| 120 |
-
group2 = _find_group_col(df.drop(columns=[group1], errors="ignore"), alt_hints)
|
| 121 |
-
|
| 122 |
-
# 4) baseline + grouped
|
| 123 |
-
baseline = pd.to_numeric(df[primary_metric], errors="coerce").mean(skipna=True)
|
| 124 |
-
|
| 125 |
-
def _group_stats(col: str) -> Optional[pd.DataFrame]:
|
| 126 |
-
if not col:
|
| 127 |
-
return None
|
| 128 |
-
tmp = df.copy()
|
| 129 |
-
tmp[primary_metric] = pd.to_numeric(tmp[primary_metric], errors="coerce")
|
| 130 |
-
comp_col = comparator_metric or primary_metric
|
| 131 |
-
if comp_col in tmp.columns:
|
| 132 |
-
tmp[comp_col] = pd.to_numeric(tmp[comp_col], errors="coerce")
|
| 133 |
-
agg = (
|
| 134 |
-
tmp.groupby(col, dropna=False)
|
| 135 |
-
.agg(
|
| 136 |
-
metric=(primary_metric, "mean"),
|
| 137 |
-
count=(primary_metric, "count"),
|
| 138 |
-
comp=(comp_col, "mean") if comp_col in tmp.columns else (primary_metric, "mean"),
|
| 139 |
-
)
|
| 140 |
-
.reset_index()
|
| 141 |
-
)
|
| 142 |
-
return agg
|
| 143 |
-
|
| 144 |
-
g1 = _group_stats(group1)
|
| 145 |
-
g2 = _group_stats(group2)
|
| 146 |
-
|
| 147 |
-
# 5) Top groups (by primary metric) from group1
|
| 148 |
-
top_lines: List[str] = []
|
| 149 |
-
if isinstance(g1, pd.DataFrame) and not g1.empty:
|
| 150 |
-
g1 = g1.sort_values(by="metric", ascending=False)
|
| 151 |
-
k = min(5, len(g1))
|
| 152 |
-
for i, row in enumerate(g1.head(k).itertuples(index=False), 1):
|
| 153 |
-
label = getattr(row, group1)
|
| 154 |
-
metric = getattr(row, "metric")
|
| 155 |
-
comp = getattr(row, "comp")
|
| 156 |
-
cnt = getattr(row, "count")
|
| 157 |
-
devlab = _deviation_label(metric, baseline)
|
| 158 |
-
caution = _small_sample_note(int(cnt), min_sample)
|
| 159 |
-
msg = f"{i}. **{label}** — {primary_metric}: {_fmt_num(metric)}"
|
| 160 |
-
if comparator_metric:
|
| 161 |
-
msg += f"; {comparator_metric}: {_fmt_num(comp)}"
|
| 162 |
-
msg += f"; {_pluralize('record', int(cnt))}: {cnt}"
|
| 163 |
-
msg += f" → {devlab}"
|
| 164 |
-
if caution:
|
| 165 |
-
msg += f" ({caution})"
|
| 166 |
-
top_lines.append(msg)
|
| 167 |
-
|
| 168 |
-
# 6) Group2 overview
|
| 169 |
-
region_lines: List[str] = []
|
| 170 |
-
if isinstance(g2, pd.DataFrame) and not g2.empty:
|
| 171 |
-
g2 = g2.sort_values(by="metric", ascending=False)
|
| 172 |
-
for row in g2.itertuples(index=False):
|
| 173 |
-
label = getattr(row, group2)
|
| 174 |
-
metric = getattr(row, "metric")
|
| 175 |
-
comp = getattr(row, "comp")
|
| 176 |
-
cnt = getattr(row, "count")
|
| 177 |
-
devlab = _deviation_label(metric, baseline)
|
| 178 |
-
caution = _small_sample_note(int(cnt), min_sample)
|
| 179 |
-
line = f"- **{label}**: {_fmt_num(metric)} (vs. overall {_fmt_num(baseline)} → {devlab}); n={cnt}"
|
| 180 |
-
if comparator_metric:
|
| 181 |
-
line += f"; {comparator_metric}: {_fmt_num(comp)}"
|
| 182 |
-
if caution:
|
| 183 |
-
line += f" — {caution}"
|
| 184 |
-
region_lines.append(line)
|
| 185 |
-
|
| 186 |
-
# 7) Geographic notes (optional)
|
| 187 |
-
geo_notes: List[str] = []
|
| 188 |
-
city_col = next((c for c in df.columns if re.search(r"\bcity\b", c, re.I)), None)
|
| 189 |
-
lat_col = next((c for c in df.columns if re.search(r"\b(lat|latitude)\b", c, re.I)), None)
|
| 190 |
-
lon_col = next((c for c in df.columns if re.search(r"\b(lon|longitude)\b", c, re.I)), None)
|
| 191 |
-
if group1 and city_col and (lat_col and lon_col):
|
| 192 |
-
if isinstance(g1, pd.DataFrame) and not g1.empty and group1 in df.columns:
|
| 193 |
-
top_labels = g1[group1].astype(str).head(10).tolist()
|
| 194 |
-
sub = df[df[group1].astype(str).isin(top_labels)].copy()
|
| 195 |
-
if not sub.empty:
|
| 196 |
-
sub[primary_metric] = pd.to_numeric(sub[primary_metric], errors="coerce")
|
| 197 |
-
by_city = (
|
| 198 |
-
sub.groupby(city_col, dropna=False)[primary_metric]
|
| 199 |
-
.mean()
|
| 200 |
-
.reset_index()
|
| 201 |
-
.sort_values(by=primary_metric, ascending=False)
|
| 202 |
-
)
|
| 203 |
-
for r in by_city.head(3).to_dict(orient="records"):
|
| 204 |
-
cname = r.get(city_col)
|
| 205 |
-
val = r.get(primary_metric)
|
| 206 |
-
geo_notes.append(f"- **{cname}** shows higher average {primary_metric} among top groups ({_fmt_num(val)}).")
|
| 207 |
-
|
| 208 |
-
# 8) Methodology (auto)
|
| 209 |
-
methodology: List[str] = []
|
| 210 |
-
na_counts = df.isna().sum().sum()
|
| 211 |
-
if na_counts > 0:
|
| 212 |
-
methodology.append("Missing values (blank/dash) were treated as nulls and excluded from means.")
|
| 213 |
-
methodology.append(f"Primary metric: **{primary_metric}**; overall average: **{_fmt_num(baseline)}**.")
|
| 214 |
-
if comparator_metric:
|
| 215 |
-
methodology.append(f"Comparator metric detected: **{comparator_metric}** (means shown when available).")
|
| 216 |
-
if group1:
|
| 217 |
-
methodology.append(f"Primary grouping inferred: **{group1}**.")
|
| 218 |
-
if group2:
|
| 219 |
-
methodology.append(f"Secondary grouping inferred: **{group2}**.")
|
| 220 |
-
if min_sample != _DEF_MIN_SAMPLE:
|
| 221 |
-
methodology.append(f"Small-sample threshold set to {min_sample} records.")
|
| 222 |
-
|
| 223 |
-
# 9) Compose markdown
|
| 224 |
-
lines: List[str] = []
|
| 225 |
-
lines.append("## Methodology (Auto-generated)")
|
| 226 |
-
for m in methodology:
|
| 227 |
-
lines.append(f"- {m}")
|
| 228 |
-
lines.append("")
|
| 229 |
-
|
| 230 |
-
if top_lines:
|
| 231 |
-
lines.append("## Highest average values by group")
|
| 232 |
-
lines.extend(top_lines)
|
| 233 |
-
lines.append("")
|
| 234 |
-
|
| 235 |
-
if region_lines:
|
| 236 |
-
lines.append(f"## {group2 or 'Region/Category'} comparison vs overall")
|
| 237 |
-
lines.extend(region_lines)
|
| 238 |
-
lines.append("")
|
| 239 |
|
| 240 |
-
|
| 241 |
-
|
| 242 |
-
lines.extend(geo_notes)
|
| 243 |
-
lines.append("")
|
| 244 |
|
| 245 |
-
|
| 246 |
-
if top_lines:
|
| 247 |
-
recs.append("Prioritize resources to the highest-average groups (above overall baseline), especially those with sufficient volume.")
|
| 248 |
-
if comparator_metric:
|
| 249 |
-
recs.append(f"Cross-check {comparator_metric} trends to identify upstream bottlenecks (e.g., long consult waits pushing surgery waits).")
|
| 250 |
-
if isinstance(g2, pd.DataFrame) and not g2.empty:
|
| 251 |
-
high = g2[g2["metric"] > baseline]
|
| 252 |
-
if not high.empty:
|
| 253 |
-
recs.append(f"Address disparities where average **{primary_metric}** exceeds the overall baseline.")
|
| 254 |
-
recs.append("For very small groups, validate data quality and consider pooling across similar categories to stabilize estimates.")
|
| 255 |
-
recs.append("Validate coding differences (similar specialties or labels spelled differently) to ensure apples-to-apples comparison.")
|
| 256 |
|
| 257 |
-
|
| 258 |
-
for r in recs:
|
| 259 |
-
lines.append(f"- {r}")
|
| 260 |
|
| 261 |
-
return "\n".join(lines).strip()
|
| 262 |
|
|
|
|
| 1 |
# narrative_safetynet.py
|
| 2 |
from __future__ import annotations
|
| 3 |
+
from typing import Dict, Any, List, Optional, Tuple
|
| 4 |
+
import re
|
| 5 |
import math
|
| 6 |
import numpy as np
|
| 7 |
import pandas as pd
|
|
|
|
| 8 |
|
| 9 |
+
# -------------------- helpers: dtype / formatting --------------------
|
| 10 |
+
|
| 11 |
+
_DEF_MIN_SAMPLE = 5 # generic caution threshold for group sizes
|
| 12 |
+
|
| 13 |
+
_HINT_METRICS_DEFAULT = [
|
| 14 |
+
"surgery_median", "consult_median",
|
| 15 |
+
"surgery_90th", "consult_90th",
|
| 16 |
+
"surgery", "consult",
|
| 17 |
+
"wait", "median", "p90", "90th"
|
| 18 |
+
]
|
| 19 |
|
| 20 |
+
_HINT_GROUPS_DEFAULT = [
|
| 21 |
+
"facility", "specialty", "zone",
|
| 22 |
+
"hospital", "city", "region"
|
| 23 |
+
]
|
| 24 |
+
|
| 25 |
+
_BAD_METRIC_NAMES = ["index", "id", "row", "unnamed"]
|
| 26 |
+
|
| 27 |
+
def _nanlike_to_nan(df: pd.DataFrame) -> pd.DataFrame:
|
| 28 |
+
dff = df.copy()
|
| 29 |
+
for c in dff.columns:
|
| 30 |
+
if dff[c].dtype == "object":
|
| 31 |
+
dff[c] = dff[c].replace({r"^\s*$": np.nan, r"^[-–—]$": np.nan}, regex=True)
|
| 32 |
+
return dff
|
| 33 |
+
|
| 34 |
+
def _is_numeric_series(s: pd.Series) -> bool:
|
| 35 |
+
try:
|
| 36 |
+
return pd.api.types.is_numeric_dtype(s)
|
| 37 |
+
except Exception:
|
| 38 |
+
return False
|
| 39 |
+
|
| 40 |
+
def _to_numeric(s: pd.Series) -> pd.Series:
|
| 41 |
+
return pd.to_numeric(s, errors="coerce")
|
| 42 |
|
| 43 |
def _fmt_num(x: Any, decimals: int = 1) -> str:
|
| 44 |
try:
|
| 45 |
if x is None or (isinstance(x, float) and math.isnan(x)):
|
| 46 |
return "n/a"
|
| 47 |
+
if isinstance(x, (int, np.integer)) or (isinstance(x, float) and float(x).is_integer()):
|
| 48 |
+
return f"{int(round(float(x))):,}"
|
| 49 |
return f"{float(x):,.{decimals}f}"
|
| 50 |
except Exception:
|
| 51 |
return str(x)
|
| 52 |
|
| 53 |
+
# -------------------- metric & dataset selection (dynamic) --------------------
|
| 54 |
+
|
| 55 |
+
def _score_metric_name(col: str, hints: List[str]) -> int:
|
| 56 |
+
name = (col or "").lower()
|
| 57 |
+
if any(bad in name for bad in _BAD_METRIC_NAMES):
|
| 58 |
+
return -10**6 # disqualify obvious counters/ids
|
| 59 |
+
score = 0
|
| 60 |
for h in hints:
|
| 61 |
+
if h in name:
|
| 62 |
+
score += 3
|
| 63 |
+
return score
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
|
| 65 |
+
def _choose_df_and_metric(
|
| 66 |
+
datasets: Dict[str, Any],
|
| 67 |
+
metric_hints: List[str]
|
| 68 |
+
) -> Optional[Tuple[str, pd.DataFrame, str]]:
|
| 69 |
+
"""
|
| 70 |
+
Sweep all dataframes & numeric columns. Pick the (df, metric) with best score:
|
| 71 |
+
+3 per hint match; +1 if non-constant numeric. Disqualify id-like names.
|
| 72 |
+
"""
|
| 73 |
+
best: Optional[Tuple[int, str, pd.DataFrame, str]] = None
|
| 74 |
+
for key, v in datasets.items():
|
| 75 |
+
if not isinstance(v, pd.DataFrame) or v.empty:
|
| 76 |
+
continue
|
| 77 |
+
df = _nanlike_to_nan(v)
|
| 78 |
+
for col in df.columns:
|
| 79 |
+
col_num = _to_numeric(df[col])
|
| 80 |
+
if not _is_numeric_series(col_num):
|
| 81 |
+
continue
|
| 82 |
+
s = _score_metric_name(col, metric_hints)
|
| 83 |
+
if col_num.nunique(dropna=True) > 1:
|
| 84 |
+
s += 1
|
| 85 |
+
if best is None or s > best[0]:
|
| 86 |
+
best = (s, key, df, col)
|
| 87 |
+
if best is None:
|
| 88 |
+
return None
|
| 89 |
+
_, key, df, metric = best
|
| 90 |
+
return key, df, metric
|
| 91 |
+
|
| 92 |
+
# -------------------- grouping detection (dynamic) --------------------
|
| 93 |
+
|
| 94 |
+
def _find_group_col(df: pd.DataFrame, candidates: List[str], avoid: Optional[List[str]] = None) -> Optional[str]:
|
| 95 |
+
avoid = [a.lower() for a in (avoid or [])]
|
| 96 |
cols = list(df.columns)
|
| 97 |
+
# prefer name matches
|
| 98 |
for cand in candidates:
|
| 99 |
for c in cols:
|
| 100 |
+
cname = c.lower()
|
| 101 |
+
if cand.lower() in cname and all(a not in cname for a in avoid):
|
| 102 |
return c
|
| 103 |
+
# fallback: a categorical with reasonable cardinality
|
| 104 |
obj_cols = [c for c in cols if df[c].dtype == "object"]
|
| 105 |
for c in obj_cols:
|
| 106 |
nuniq = df[c].nunique(dropna=True)
|
| 107 |
+
if 1 < nuniq < max(50, len(df)//10):
|
| 108 |
return c
|
| 109 |
return None
|
| 110 |
|
| 111 |
+
# -------------------- labels & cautions --------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 112 |
|
| 113 |
+
def _label_vs_baseline(x: float, mu: float, band: float = 0.05) -> str:
|
| 114 |
+
if pd.isna(x) or pd.isna(mu) or mu == 0:
|
| 115 |
return "unknown"
|
| 116 |
rel = (x - mu) / mu
|
| 117 |
+
if rel > band:
|
| 118 |
return "higher than average"
|
| 119 |
+
if rel < -band:
|
| 120 |
return "lower than average"
|
|
|
|
|
|
|
| 121 |
return "about average"
|
| 122 |
|
| 123 |
+
def _small_sample_note(n: int, min_n: int = _DEF_MIN_SAMPLE) -> Optional[str]:
|
| 124 |
+
return f"Interpret averages cautiously (only {n} records)." if n < min_n else None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 125 |
|
| 126 |
+
def _pluralize(word: str, n: int) -> str:
|
| 127 |
+
return f"{word}{'' if n == 1 else 's'}"
|
|
|
|
|
|
|
| 128 |
|
| 129 |
+
# -------------------- geo join (Top-5 only) --------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 130 |
|
| 131 |
+
def _canon(s: str) -> s_
|
|
|
|
|
|
|
| 132 |
|
|
|
|
| 133 |
|