File size: 1,965 Bytes
8530801 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 |
import os, json, joblib
import numpy as np
import pandas as pd
from sentence_transformers import SentenceTransformer
def load_bundle(bundle_dir):
with open(os.path.join(bundle_dir, "config.json"), "r", encoding="utf-8") as f:
cfg = json.load(f)
emb_model = SentenceTransformer(cfg["emb_model_name"])
models = {}
for c in cfg["label_cols"]:
p = os.path.join(bundle_dir, f"xgb_{c}.joblib")
if os.path.exists(p):
models[c] = joblib.load(p)
return emb_model, models, cfg
def predict_weighted_risk(texts, emb_model, models, cfg):
if isinstance(texts, str):
texts = [texts]
X = emb_model.encode(list(texts), convert_to_numpy=True)
label_cols = cfg["label_cols"]
label_thresholds = cfg["label_thresholds"]
weights = pd.Series(cfg["weights"])
risk_thr = cfg["risk_threshold"]
# Proba
proba = {}
for c in label_cols:
if c in models:
proba[c] = models[c].predict_proba(X)[:,1]
else:
proba[c] = np.zeros(len(texts))
proba = pd.DataFrame(proba)[label_cols]
# 0/1 tahmin
yhat_bin = pd.DataFrame({c: (proba[c] >= label_thresholds.get(c, 0.5)).astype(int) for c in label_cols})
# Ağırlıklı skor + risk
weighted = (yhat_bin * weights).sum(axis=1)
risk_pred = (weighted >= risk_thr).astype(int)
return {
"proba": proba,
"yhat_bin": yhat_bin,
"weighted_score": weighted,
"risk_pred": risk_pred
}
if __name__ == "__main__":
bundle_dir = "alz_sbert_weighted_export"
emb, mdl, cfg = load_bundle(bundle_dir)
sample = "Dün sabah erken kalktım, kahvaltıdan sonra markete gittim. Akşam ailemle buluştum."
out = predict_weighted_risk(sample, emb, mdl, cfg)
print("Risk tahmini:", int(out["risk_pred"].values[0]))
print("Ağırlıklı skor:", float(out["weighted_score"].values[0]))
print("Etiket 0/1:", out["yhat_bin"].to_dict(orient="list"))
|