Update app.py
Browse files
app.py
CHANGED
|
@@ -4,8 +4,8 @@ import numpy as np
|
|
| 4 |
import pandas as pd
|
| 5 |
import streamlit as st
|
| 6 |
import joblib
|
| 7 |
-
|
| 8 |
-
|
| 9 |
import os
|
| 10 |
from huggingface_hub import hf_hub_download, HfApi
|
| 11 |
import hmac
|
|
@@ -33,12 +33,13 @@ from sklearn.model_selection import train_test_split
|
|
| 33 |
|
| 34 |
#Figures setting block
|
| 35 |
import io
|
| 36 |
-
|
| 37 |
|
| 38 |
|
| 39 |
|
| 40 |
# REPLACE make_fig with this (or add this and stop using plt.plot directly)
|
| 41 |
def make_fig(figsize=(5.5, 3.6), dpi=120):
|
|
|
|
| 42 |
fig, ax = plt.subplots(figsize=figsize, dpi=dpi)
|
| 43 |
return fig, ax
|
| 44 |
|
|
@@ -582,8 +583,8 @@ def train_and_save(
|
|
| 582 |
use_feature_selection: bool,
|
| 583 |
l1_C: float,
|
| 584 |
use_dimred: bool,
|
| 585 |
-
svd_components: int,
|
| 586 |
-
|
| 587 |
X = df[feature_cols].copy()
|
| 588 |
y_raw = df[LABEL_COL].copy()
|
| 589 |
|
|
@@ -832,6 +833,7 @@ def train_and_save(
|
|
| 832 |
# SHAP
|
| 833 |
# ============================================================
|
| 834 |
def build_shap_explainer(pipe, X_bg, max_bg=200):
|
|
|
|
| 835 |
if len(X_bg) > max_bg:
|
| 836 |
X_bg = X_bg.sample(max_bg, random_state=42)
|
| 837 |
|
|
@@ -1259,7 +1261,7 @@ def options_for(col: str, df: pd.DataFrame | None):
|
|
| 1259 |
return [""] + out
|
| 1260 |
|
| 1261 |
import re
|
| 1262 |
-
|
| 1263 |
|
| 1264 |
# Canonical region labels you can use for analysis
|
| 1265 |
# (UN-style: Africa, Americas, Asia, Europe, Oceania; you can later refine into subregions)
|
|
@@ -1320,26 +1322,32 @@ def normalize_country_name(x: str) -> str | None:
|
|
| 1320 |
# country_converter can handle many variants; pass through as-is
|
| 1321 |
return s.strip()
|
| 1322 |
|
| 1323 |
-
|
|
|
|
|
|
|
| 1324 |
"""
|
| 1325 |
Map a country name to a broad region for analytics.
|
| 1326 |
Returns one of: Africa, Americas, Asia, Europe, Oceania, Unknown.
|
|
|
|
| 1327 |
"""
|
| 1328 |
-
if not country:
|
| 1329 |
return REGION_UNKNOWN
|
| 1330 |
|
| 1331 |
-
|
|
|
|
|
|
|
|
|
|
| 1332 |
r = coco.convert(names=country, to="continent")
|
|
|
|
| 1333 |
if not r or str(r).lower() in ("not found", "nan", "none"):
|
| 1334 |
return REGION_UNKNOWN
|
| 1335 |
|
| 1336 |
-
# Normalize labels to your preferred set
|
| 1337 |
-
# coco usually returns: Africa, America, Asia, Europe, Oceania
|
| 1338 |
-
# Convert "America" -> "Americas"
|
| 1339 |
if r == "America":
|
| 1340 |
return "Americas"
|
|
|
|
| 1341 |
return str(r)
|
| 1342 |
|
|
|
|
| 1343 |
def add_ethnicity_region(df: pd.DataFrame, eth_col: str = "Ethnicity", out_col: str = "Ethnicity_Region") -> pd.DataFrame:
|
| 1344 |
"""Adds an analytics-only region column derived from the Ethnicity/nationality column."""
|
| 1345 |
if eth_col not in df.columns:
|
|
|
|
| 4 |
import pandas as pd
|
| 5 |
import streamlit as st
|
| 6 |
import joblib
|
| 7 |
+
|
| 8 |
+
|
| 9 |
import os
|
| 10 |
from huggingface_hub import hf_hub_download, HfApi
|
| 11 |
import hmac
|
|
|
|
| 33 |
|
| 34 |
#Figures setting block
|
| 35 |
import io
|
| 36 |
+
|
| 37 |
|
| 38 |
|
| 39 |
|
| 40 |
# REPLACE make_fig with this (or add this and stop using plt.plot directly)
|
| 41 |
def make_fig(figsize=(5.5, 3.6), dpi=120):
|
| 42 |
+
import matplotlib.pyplot as plt
|
| 43 |
fig, ax = plt.subplots(figsize=figsize, dpi=dpi)
|
| 44 |
return fig, ax
|
| 45 |
|
|
|
|
| 583 |
use_feature_selection: bool,
|
| 584 |
l1_C: float,
|
| 585 |
use_dimred: bool,
|
| 586 |
+
svd_components: int,):
|
| 587 |
+
from lifelines import CoxPHFitter
|
| 588 |
X = df[feature_cols].copy()
|
| 589 |
y_raw = df[LABEL_COL].copy()
|
| 590 |
|
|
|
|
| 833 |
# SHAP
|
| 834 |
# ============================================================
|
| 835 |
def build_shap_explainer(pipe, X_bg, max_bg=200):
|
| 836 |
+
import shap
|
| 837 |
if len(X_bg) > max_bg:
|
| 838 |
X_bg = X_bg.sample(max_bg, random_state=42)
|
| 839 |
|
|
|
|
| 1261 |
return [""] + out
|
| 1262 |
|
| 1263 |
import re
|
| 1264 |
+
|
| 1265 |
|
| 1266 |
# Canonical region labels you can use for analysis
|
| 1267 |
# (UN-style: Africa, Americas, Asia, Europe, Oceania; you can later refine into subregions)
|
|
|
|
| 1322 |
# country_converter can handle many variants; pass through as-is
|
| 1323 |
return s.strip()
|
| 1324 |
|
| 1325 |
+
from typing import Optional
|
| 1326 |
+
|
| 1327 |
+
def country_to_region(country: Optional[str]) -> str:
|
| 1328 |
"""
|
| 1329 |
Map a country name to a broad region for analytics.
|
| 1330 |
Returns one of: Africa, Americas, Asia, Europe, Oceania, Unknown.
|
| 1331 |
+
Lazy-imports country_converter to reduce startup memory.
|
| 1332 |
"""
|
| 1333 |
+
if not country or pd.isna(country):
|
| 1334 |
return REGION_UNKNOWN
|
| 1335 |
|
| 1336 |
+
country = str(country).strip()
|
| 1337 |
+
|
| 1338 |
+
import country_converter as coco # lazy import
|
| 1339 |
+
|
| 1340 |
r = coco.convert(names=country, to="continent")
|
| 1341 |
+
|
| 1342 |
if not r or str(r).lower() in ("not found", "nan", "none"):
|
| 1343 |
return REGION_UNKNOWN
|
| 1344 |
|
|
|
|
|
|
|
|
|
|
| 1345 |
if r == "America":
|
| 1346 |
return "Americas"
|
| 1347 |
+
|
| 1348 |
return str(r)
|
| 1349 |
|
| 1350 |
+
|
| 1351 |
def add_ethnicity_region(df: pd.DataFrame, eth_col: str = "Ethnicity", out_col: str = "Ethnicity_Region") -> pd.DataFrame:
|
| 1352 |
"""Adds an analytics-only region column derived from the Ethnicity/nationality column."""
|
| 1353 |
if eth_col not in df.columns:
|