Synav commited on
Commit
6d61737
·
verified ·
1 Parent(s): c6babca

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -12
app.py CHANGED
@@ -4,8 +4,8 @@ import numpy as np
4
  import pandas as pd
5
  import streamlit as st
6
  import joblib
7
- import shap
8
- import matplotlib.pyplot as plt
9
  import os
10
  from huggingface_hub import hf_hub_download, HfApi
11
  import hmac
@@ -33,12 +33,13 @@ from sklearn.model_selection import train_test_split
33
 
34
  #Figures setting block
35
  import io
36
- from lifelines import CoxPHFitter
37
 
38
 
39
 
40
  # REPLACE make_fig with this (or add this and stop using plt.plot directly)
41
  def make_fig(figsize=(5.5, 3.6), dpi=120):
 
42
  fig, ax = plt.subplots(figsize=figsize, dpi=dpi)
43
  return fig, ax
44
 
@@ -582,8 +583,8 @@ def train_and_save(
582
  use_feature_selection: bool,
583
  l1_C: float,
584
  use_dimred: bool,
585
- svd_components: int,
586
- ):
587
  X = df[feature_cols].copy()
588
  y_raw = df[LABEL_COL].copy()
589
 
@@ -832,6 +833,7 @@ def train_and_save(
832
  # SHAP
833
  # ============================================================
834
  def build_shap_explainer(pipe, X_bg, max_bg=200):
 
835
  if len(X_bg) > max_bg:
836
  X_bg = X_bg.sample(max_bg, random_state=42)
837
 
@@ -1259,7 +1261,7 @@ def options_for(col: str, df: pd.DataFrame | None):
1259
  return [""] + out
1260
 
1261
  import re
1262
- import country_converter as coco
1263
 
1264
  # Canonical region labels you can use for analysis
1265
  # (UN-style: Africa, Americas, Asia, Europe, Oceania; you can later refine into subregions)
@@ -1320,26 +1322,32 @@ def normalize_country_name(x: str) -> str | None:
1320
  # country_converter can handle many variants; pass through as-is
1321
  return s.strip()
1322
 
1323
- def country_to_region(country: str | None) -> str:
 
 
1324
  """
1325
  Map a country name to a broad region for analytics.
1326
  Returns one of: Africa, Americas, Asia, Europe, Oceania, Unknown.
 
1327
  """
1328
- if not country:
1329
  return REGION_UNKNOWN
1330
 
1331
- # coco returns 'not found' if it can't map; we convert to Unknown
 
 
 
1332
  r = coco.convert(names=country, to="continent")
 
1333
  if not r or str(r).lower() in ("not found", "nan", "none"):
1334
  return REGION_UNKNOWN
1335
 
1336
- # Normalize labels to your preferred set
1337
- # coco usually returns: Africa, America, Asia, Europe, Oceania
1338
- # Convert "America" -> "Americas"
1339
  if r == "America":
1340
  return "Americas"
 
1341
  return str(r)
1342
 
 
1343
  def add_ethnicity_region(df: pd.DataFrame, eth_col: str = "Ethnicity", out_col: str = "Ethnicity_Region") -> pd.DataFrame:
1344
  """Adds an analytics-only region column derived from the Ethnicity/nationality column."""
1345
  if eth_col not in df.columns:
 
4
  import pandas as pd
5
  import streamlit as st
6
  import joblib
7
+
8
+
9
  import os
10
  from huggingface_hub import hf_hub_download, HfApi
11
  import hmac
 
33
 
34
  #Figures setting block
35
  import io
36
+
37
 
38
 
39
 
40
  # REPLACE make_fig with this (or add this and stop using plt.plot directly)
41
  def make_fig(figsize=(5.5, 3.6), dpi=120):
42
+ import matplotlib.pyplot as plt
43
  fig, ax = plt.subplots(figsize=figsize, dpi=dpi)
44
  return fig, ax
45
 
 
583
  use_feature_selection: bool,
584
  l1_C: float,
585
  use_dimred: bool,
586
+ svd_components: int,):
587
+ from lifelines import CoxPHFitter
588
  X = df[feature_cols].copy()
589
  y_raw = df[LABEL_COL].copy()
590
 
 
833
  # SHAP
834
  # ============================================================
835
  def build_shap_explainer(pipe, X_bg, max_bg=200):
836
+ import shap
837
  if len(X_bg) > max_bg:
838
  X_bg = X_bg.sample(max_bg, random_state=42)
839
 
 
1261
  return [""] + out
1262
 
1263
  import re
1264
+
1265
 
1266
  # Canonical region labels you can use for analysis
1267
  # (UN-style: Africa, Americas, Asia, Europe, Oceania; you can later refine into subregions)
 
1322
  # country_converter can handle many variants; pass through as-is
1323
  return s.strip()
1324
 
1325
+ from typing import Optional
1326
+
1327
+ def country_to_region(country: Optional[str]) -> str:
1328
  """
1329
  Map a country name to a broad region for analytics.
1330
  Returns one of: Africa, Americas, Asia, Europe, Oceania, Unknown.
1331
+ Lazy-imports country_converter to reduce startup memory.
1332
  """
1333
+ if not country or pd.isna(country):
1334
  return REGION_UNKNOWN
1335
 
1336
+ country = str(country).strip()
1337
+
1338
+ import country_converter as coco # lazy import
1339
+
1340
  r = coco.convert(names=country, to="continent")
1341
+
1342
  if not r or str(r).lower() in ("not found", "nan", "none"):
1343
  return REGION_UNKNOWN
1344
 
 
 
 
1345
  if r == "America":
1346
  return "Americas"
1347
+
1348
  return str(r)
1349
 
1350
+
1351
  def add_ethnicity_region(df: pd.DataFrame, eth_col: str = "Ethnicity", out_col: str = "Ethnicity_Region") -> pd.DataFrame:
1352
  """Adds an analytics-only region column derived from the Ethnicity/nationality column."""
1353
  if eth_col not in df.columns: