|
|
import numpy as np |
|
|
import pandas as pd |
|
|
import joblib |
|
|
import streamlit as st |
|
|
from catboost import CatBoostClassifier |
|
|
from xgboost import XGBClassifier |
|
|
|
|
|
MODEL_DIR = "models" |
|
|
THRESHOLD_S1 = 0.463 |
|
|
|
|
|
|
|
|
@st.cache_resource |
|
|
def load_models(): |
|
|
model_s1 = CatBoostClassifier() |
|
|
model_s1.load_model(f"{MODEL_DIR}/stage1_catboost.cbm") |
|
|
|
|
|
model_s2 = XGBClassifier() |
|
|
model_s2.load_model(f"{MODEL_DIR}/stage2_xgb.json") |
|
|
|
|
|
encoder = joblib.load(f"{MODEL_DIR}/encoder_s2.pkl") |
|
|
|
|
|
return model_s1, model_s2, encoder |
|
|
|
|
|
|
|
|
def run_inference(X_emb, meta, threshold): |
|
|
model_s1, model_s2, encoder = load_models() |
|
|
|
|
|
df_meta = pd.DataFrame(meta) |
|
|
|
|
|
probs_s1 = model_s1.predict_proba(X_emb)[:, 1] |
|
|
mask = probs_s1 >= THRESHOLD_S1 |
|
|
|
|
|
X_pass = X_emb[mask] |
|
|
df_pass = df_meta.loc[mask].copy() |
|
|
|
|
|
probs_s2 = model_s2.predict_proba(X_pass)[:, 1] |
|
|
final_score = 0.4 * probs_s1[mask] + 0.6 * probs_s2 |
|
|
|
|
|
df_pass["Final_Score"] = final_score |
|
|
df_pass["Final_Prediction"] = (final_score >= threshold).astype(int) |
|
|
|
|
|
return df_pass |
|
|
|