Instructions to use getachewgetu/stunting-risk-model with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Scikit-learn
How to use getachewgetu/stunting-risk-model with Scikit-learn:
from huggingface_hub import hf_hub_download import joblib model = joblib.load( hf_hub_download("getachewgetu/stunting-risk-model", "sklearn_model.joblib") ) # only load pickle files from sources you trust # read more about it here https://skops.readthedocs.io/en/stable/persistence.html - Notebooks
- Google Colab
- Kaggle
| """ | |
| Feature engineering and preprocessing utilities for the stunting risk ML pipeline. | |
| Shared by the Jupyter notebook, training script, and RiskScorer inference class. | |
| """ | |
| import pandas as pd | |
| import numpy as np | |
| from sklearn.preprocessing import StandardScaler | |
| from sklearn.pipeline import Pipeline | |
| # --------------------------------------------------------------------------- | |
| # Ordinal encoding maps (higher value = higher risk) | |
| # --------------------------------------------------------------------------- | |
| WATER_MAP = { | |
| 'piped_into_dwelling': 0, | |
| 'public_tap': 1, | |
| 'protected_well': 2, | |
| 'unprotected_well': 3, | |
| 'surface_water': 4, | |
| } | |
| SANITATION_MAP = { | |
| 'improved': 0, | |
| 'basic': 1, | |
| 'unimproved': 2, | |
| 'none': 3, | |
| } | |
| INCOME_MAP = { | |
| 'high': 0, | |
| 'medium': 1, | |
| 'low': 2, | |
| } | |
| # Ordered feature list used by the model | |
| FEATURE_NAMES = [ | |
| 'avg_meal_count', | |
| 'water_source_enc', | |
| 'sanitation_tier_enc', | |
| 'income_band_enc', | |
| 'children_under5', | |
| 'meal_x_water', | |
| 'deprivation_index', | |
| ] | |
| def encode_categoricals(df: pd.DataFrame) -> pd.DataFrame: | |
| """ | |
| Apply ordinal encoding to water_source, sanitation_tier, income_band. | |
| Raises ValueError for any unrecognised categorical value. | |
| Returns a copy of df with three new *_enc columns added. | |
| """ | |
| df = df.copy() | |
| for col, mapping in [ | |
| ('water_source', WATER_MAP), | |
| ('sanitation_tier', SANITATION_MAP), | |
| ('income_band', INCOME_MAP), | |
| ]: | |
| unknown = set(df[col].dropna().unique()) - set(mapping.keys()) | |
| if unknown: | |
| raise ValueError( | |
| f"Unrecognised value(s) in '{col}': {unknown}. " | |
| f"Expected one of {set(mapping.keys())}" | |
| ) | |
| enc_col = col.replace('water_source', 'water_source_enc') \ | |
| .replace('sanitation_tier', 'sanitation_tier_enc') \ | |
| .replace('income_band', 'income_band_enc') | |
| # Build enc col name properly | |
| enc_col = col + '_enc' | |
| df[enc_col] = df[col].map(mapping) | |
| return df | |
| def engineer_features(df: pd.DataFrame) -> pd.DataFrame: | |
| """ | |
| Add interaction and composite features. | |
| Expects water_source_enc, sanitation_tier_enc, income_band_enc to already exist. | |
| Returns a copy with meal_x_water and deprivation_index added. | |
| """ | |
| df = df.copy() | |
| df['meal_x_water'] = df['avg_meal_count'] * df['water_source_enc'] | |
| df['deprivation_index'] = ( | |
| df['water_source_enc'] + | |
| df['sanitation_tier_enc'] + | |
| df['income_band_enc'] | |
| ) | |
| return df | |
| def build_preprocessor() -> StandardScaler: | |
| """ | |
| Returns a StandardScaler that will be fit on the feature matrix. | |
| The feature matrix is already numerically encoded before this step. | |
| """ | |
| return StandardScaler() | |
| def prepare_feature_matrix(df: pd.DataFrame) -> pd.DataFrame: | |
| """ | |
| Full preprocessing pipeline: | |
| 1. encode_categoricals | |
| 2. engineer_features | |
| 3. select FEATURE_NAMES columns | |
| Returns a DataFrame with exactly FEATURE_NAMES columns. | |
| """ | |
| df = encode_categoricals(df) | |
| df = engineer_features(df) | |
| return df[FEATURE_NAMES] | |