File size: 1,039 Bytes
e783348 4b88fce e783348 4b88fce e783348 4b88fce e783348 4b88fce e783348 4b88fce e783348 4b88fce | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 | import numpy as np
import pandas as pd
import joblib
import streamlit as st
from catboost import CatBoostClassifier
from xgboost import XGBClassifier
MODEL_DIR = "models"
THRESHOLD_S1 = 0.463
@st.cache_resource
def load_models():
model_s1 = CatBoostClassifier()
model_s1.load_model(f"{MODEL_DIR}/stage1_catboost.cbm")
model_s2 = XGBClassifier()
model_s2.load_model(f"{MODEL_DIR}/stage2_xgb.json")
encoder = joblib.load(f"{MODEL_DIR}/encoder_s2.pkl")
return model_s1, model_s2, encoder
def run_inference(X_emb, meta, threshold):
model_s1, model_s2, encoder = load_models()
df_meta = pd.DataFrame(meta)
probs_s1 = model_s1.predict_proba(X_emb)[:, 1]
mask = probs_s1 >= THRESHOLD_S1
X_pass = X_emb[mask]
df_pass = df_meta.loc[mask].copy()
probs_s2 = model_s2.predict_proba(X_pass)[:, 1]
final_score = 0.4 * probs_s1[mask] + 0.6 * probs_s2
df_pass["Final_Score"] = final_score
df_pass["Final_Prediction"] = (final_score >= threshold).astype(int)
return df_pass
|