import os, json, joblib import numpy as np import pandas as pd from sentence_transformers import SentenceTransformer def load_bundle(bundle_dir): with open(os.path.join(bundle_dir, "config.json"), "r", encoding="utf-8") as f: cfg = json.load(f) emb_model = SentenceTransformer(cfg["emb_model_name"]) models = {} for c in cfg["label_cols"]: p = os.path.join(bundle_dir, f"xgb_{c}.joblib") if os.path.exists(p): models[c] = joblib.load(p) return emb_model, models, cfg def predict_weighted_risk(texts, emb_model, models, cfg): if isinstance(texts, str): texts = [texts] X = emb_model.encode(list(texts), convert_to_numpy=True) label_cols = cfg["label_cols"] label_thresholds = cfg["label_thresholds"] weights = pd.Series(cfg["weights"]) risk_thr = cfg["risk_threshold"] # Proba proba = {} for c in label_cols: if c in models: proba[c] = models[c].predict_proba(X)[:,1] else: proba[c] = np.zeros(len(texts)) proba = pd.DataFrame(proba)[label_cols] # 0/1 tahmin yhat_bin = pd.DataFrame({c: (proba[c] >= label_thresholds.get(c, 0.5)).astype(int) for c in label_cols}) # Ağırlıklı skor + risk weighted = (yhat_bin * weights).sum(axis=1) risk_pred = (weighted >= risk_thr).astype(int) return { "proba": proba, "yhat_bin": yhat_bin, "weighted_score": weighted, "risk_pred": risk_pred } if __name__ == "__main__": bundle_dir = "alz_sbert_weighted_export" emb, mdl, cfg = load_bundle(bundle_dir) sample = "Dün sabah erken kalktım, kahvaltıdan sonra markete gittim. Akşam ailemle buluştum." out = predict_weighted_risk(sample, emb, mdl, cfg) print("Risk tahmini:", int(out["risk_pred"].values[0])) print("Ağırlıklı skor:", float(out["weighted_score"].values[0])) print("Etiket 0/1:", out["yhat_bin"].to_dict(orient="list"))