Heart-Attack-Risk-Rate / test_predict.py
kbssrikar7
Ready for Render deployment - Heart Attack Risk Predictor Ensemble Model
55fb1d9
import os
import json
import joblib
import pandas as pd
import numpy as np
# Paths inside the container image
APP_DIR = "/app"
ASSETS_DIR = os.path.join(APP_DIR, "model_assets")
# Resolve model paths with fallbacks
XGB_CANDIDATES = [
"XGB_spw.joblib", "XGBoost_best_5cv.joblib", "XGBoost_best.joblib",
"XGBoost.joblib", "xgb_model.joblib", "xgb_full.joblib"
]
CAT_CANDIDATES = [
"CAT_cw.joblib", "CatBoost_best_5cv.joblib", "CatBoost_best.joblib",
"CatBoost.joblib", "catboost.joblib", "cat_model.joblib", "cat_full.joblib"
]
def find_first(path_list):
for name in path_list:
p = os.path.join(ASSETS_DIR, name)
if os.path.exists(p):
return p
return None
def build_sample_input():
# Use values close to the UI defaults
gender = 1
height = 170
weight = 70.0
ap_hi = 120
ap_lo = 80
cholesterol = 1
gluc = 1
smoke = 0
alco = 0
active = 1
age_years = 50
age_days = age_years * 365
# Derived features
bmi = weight / ((height / 100) ** 2)
bp_diff = ap_hi - ap_lo
systolic_pressure = ap_hi
map_value = ap_lo + (bp_diff / 3)
pulse_ratio = bp_diff / ap_hi if ap_hi > 0 else 0
obesity_flag = 1 if bmi >= 30 else 0
hypertension_flag = 1 if (ap_hi >= 140 or ap_lo >= 90) else 0
lifestyle_score = (1 if smoke == 1 else 0) + (1 if alco == 1 else 0) + (1 if active == 0 else 0)
health_risk_score = lifestyle_score + obesity_flag + hypertension_flag
smoker_alcoholic = 1 if (smoke == 1 or alco == 1) else 0
age_group = "50-59"
bmi_category = (
"Underweight" if bmi < 18.5 else "Normal" if bmi < 25 else "Overweight" if bmi < 30 else "Obese"
)
if ap_hi < 120 and ap_lo < 80:
bp_category = "Normal"
elif ap_hi < 130 and ap_lo < 80:
bp_category = "Elevated"
elif ap_hi < 140 or ap_lo < 90:
bp_category = "Stage 1"
else:
bp_category = "Stage 2"
risk_level = "Low" if health_risk_score <= 2 else "Medium" if health_risk_score <= 4 else "High"
risk_age = age_years + (health_risk_score * 5)
protein_level = 14.0
ejection_fraction = 60.0
feature_cols = [
'age','gender','height','weight','ap_hi','ap_lo','cholesterol','gluc','smoke','alco','active','BMI','BP_diff',
'Systolic_Pressure','age_years','Age_Group','Lifestyle_Score','Obesity_Flag','Hypertension_Flag','Health_Risk_Score',
'Pulse_Pressure_Ratio','MAP','BMI_Category','Smoker_Alcoholic','BP_Category','Risk_Age','Risk_Level','Protein_Level','Ejection_Fraction'
]
row = {
'age': age_days,
'gender': gender,
'height': height,
'weight': weight,
'ap_hi': ap_hi,
'ap_lo': ap_lo,
'cholesterol': cholesterol,
'gluc': gluc,
'smoke': smoke,
'alco': alco,
'active': active,
'BMI': bmi,
'BP_diff': bp_diff,
'Systolic_Pressure': systolic_pressure,
'age_years': age_years,
'Age_Group': age_group,
'Lifestyle_Score': lifestyle_score,
'Obesity_Flag': obesity_flag,
'Hypertension_Flag': hypertension_flag,
'Health_Risk_Score': health_risk_score,
'Pulse_Pressure_Ratio': pulse_ratio,
'MAP': map_value,
'BMI_Category': bmi_category,
'Smoker_Alcoholic': smoker_alcoholic,
'BP_Category': bp_category,
'Risk_Age': risk_age,
'Risk_Level': risk_level,
'Protein_Level': protein_level,
'Ejection_Fraction': ejection_fraction,
}
X = pd.DataFrame([row])[feature_cols]
# One-hot encode categoricals using the same fallback values as app
cat_cols = ['Age_Group', 'BMI_Category', 'BP_Category', 'Risk_Level']
cat_values = {
'Age_Group': ['20-29', '30-39', '40-49', '50-59', '60+'],
'BMI_Category': ['Underweight', 'Normal', 'Overweight', 'Obese'],
'BP_Category': ['Normal', 'Elevated', 'Stage 1', 'Stage 2'],
'Risk_Level': ['Low', 'Medium', 'High'],
}
numeric_cols = [c for c in X.columns if c not in cat_cols]
Xn = X[numeric_cols].copy()
parts = []
for col in cat_cols:
if col in X.columns:
for v in cat_values[col]:
parts.append(pd.Series([1 if X[col].iloc[0] == v else 0], name=f"{col}_{v}"))
Xe = pd.concat(parts, axis=1) if parts else pd.DataFrame(index=X.index)
Xp = pd.concat([Xn, Xe], axis=1).astype(float)
return Xp
def align_for_model(model, Xp):
# Align dataframe columns to model expectations (by name when available)
X_aligned = Xp
if hasattr(model, 'feature_names_in_'):
expected = list(model.feature_names_in_)
Xa = pd.DataFrame(0.0, index=Xp.index, columns=expected)
for c in Xp.columns:
if c in Xa.columns:
Xa[c] = Xp[c].values
X_aligned = Xa[expected]
else:
try:
# xgboost booster feature names
booster = getattr(model, 'get_booster', lambda: None)()
if booster is not None and getattr(booster, 'feature_names', None):
expected = list(booster.feature_names)
Xa = pd.DataFrame(0.0, index=Xp.index, columns=expected)
for c in Xp.columns:
if c in Xa.columns:
Xa[c] = Xp[c].values
X_aligned = Xa[expected]
elif hasattr(model, 'n_features_in_'):
n = int(getattr(model, 'n_features_in_', Xp.shape[1]))
# Fallback: trim or pad to match expected number of features
if Xp.shape[1] >= n:
X_aligned = Xp.iloc[:, :n].copy()
else:
# pad with zero columns
pad = pd.DataFrame(0.0, index=Xp.index, columns=[f"pad_{i}" for i in range(n - Xp.shape[1])])
X_aligned = pd.concat([Xp, pad], axis=1)
except Exception:
pass
return X_aligned
def main():
xgb_path = find_first(XGB_CANDIDATES)
cat_path = find_first(CAT_CANDIDATES)
assert xgb_path and os.path.exists(xgb_path), f"XGBoost artifact not found in {ASSETS_DIR}"
assert cat_path and os.path.exists(cat_path), f"CatBoost artifact not found in {ASSETS_DIR}"
xgb = joblib.load(xgb_path)
cat = joblib.load(cat_path)
Xp = build_sample_input()
# Force shape match for XGBoost using n_features_in_
n_xgb = int(getattr(xgb, 'n_features_in_', Xp.shape[1]))
X_xgb = Xp.iloc[:, :n_xgb].values
print(f"DBG: n_xgb={n_xgb}, Xp.shape={Xp.shape}, X_xgb.shape={X_xgb.shape}")
# Align for CatBoost (by names if available), otherwise force shape
if hasattr(cat, 'feature_names_in_'):
X_cat = align_for_model(cat, Xp)
else:
# CatBoost models often don't expose names; pass full matrix
X_cat = Xp.values
print(f"DBG: X_cat.shape={X_cat.shape}")
if hasattr(xgb, 'predict_proba'):
px = float(xgb.predict_proba(X_xgb)[0, 1])
else:
px = float(xgb.predict(X_xgb)[0])
if hasattr(cat, 'predict_proba'):
pc = float(cat.predict_proba(X_cat)[0, 1])
else:
pc = float(cat.predict(X_cat)[0])
pe = 0.5 * px + 0.5 * pc
out = {
'xgb_prob': px,
'cat_prob': pc,
'ensemble_prob': pe,
'ensemble_risk_percent': pe * 100.0,
}
print(json.dumps(out, indent=2))
if __name__ == "__main__":
main()