File size: 1,039 Bytes
e783348
 
 
4b88fce
e783348
 
 
 
4b88fce
e783348
 
4b88fce
 
 
 
e783348
4b88fce
 
 
 
 
 
e783348
 
 
4b88fce
 
e783348
 
 
 
 
 
 
 
 
 
 
 
 
 
4b88fce
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
import numpy as np
import pandas as pd
import joblib
import streamlit as st
from catboost import CatBoostClassifier
from xgboost import XGBClassifier

MODEL_DIR = "models"
THRESHOLD_S1 = 0.463


@st.cache_resource
def load_models():
    model_s1 = CatBoostClassifier()
    model_s1.load_model(f"{MODEL_DIR}/stage1_catboost.cbm")

    model_s2 = XGBClassifier()
    model_s2.load_model(f"{MODEL_DIR}/stage2_xgb.json")

    encoder = joblib.load(f"{MODEL_DIR}/encoder_s2.pkl")

    return model_s1, model_s2, encoder


def run_inference(X_emb, meta, threshold):
    model_s1, model_s2, encoder = load_models()

    df_meta = pd.DataFrame(meta)

    probs_s1 = model_s1.predict_proba(X_emb)[:, 1]
    mask = probs_s1 >= THRESHOLD_S1

    X_pass = X_emb[mask]
    df_pass = df_meta.loc[mask].copy()

    probs_s2 = model_s2.predict_proba(X_pass)[:, 1]
    final_score = 0.4 * probs_s1[mask] + 0.6 * probs_s2

    df_pass["Final_Score"] = final_score
    df_pass["Final_Prediction"] = (final_score >= threshold).astype(int)

    return df_pass