|
|
import os, json, joblib |
|
|
import numpy as np |
|
|
import pandas as pd |
|
|
from sentence_transformers import SentenceTransformer |
|
|
|
|
|
def load_bundle(bundle_dir): |
|
|
with open(os.path.join(bundle_dir, "config.json"), "r", encoding="utf-8") as f: |
|
|
cfg = json.load(f) |
|
|
emb_model = SentenceTransformer(cfg["emb_model_name"]) |
|
|
models = {} |
|
|
for c in cfg["label_cols"]: |
|
|
p = os.path.join(bundle_dir, f"xgb_{c}.joblib") |
|
|
if os.path.exists(p): |
|
|
models[c] = joblib.load(p) |
|
|
return emb_model, models, cfg |
|
|
|
|
|
def predict_weighted_risk(texts, emb_model, models, cfg): |
|
|
if isinstance(texts, str): |
|
|
texts = [texts] |
|
|
X = emb_model.encode(list(texts), convert_to_numpy=True) |
|
|
label_cols = cfg["label_cols"] |
|
|
label_thresholds = cfg["label_thresholds"] |
|
|
weights = pd.Series(cfg["weights"]) |
|
|
risk_thr = cfg["risk_threshold"] |
|
|
|
|
|
|
|
|
proba = {} |
|
|
for c in label_cols: |
|
|
if c in models: |
|
|
proba[c] = models[c].predict_proba(X)[:,1] |
|
|
else: |
|
|
proba[c] = np.zeros(len(texts)) |
|
|
proba = pd.DataFrame(proba)[label_cols] |
|
|
|
|
|
|
|
|
yhat_bin = pd.DataFrame({c: (proba[c] >= label_thresholds.get(c, 0.5)).astype(int) for c in label_cols}) |
|
|
|
|
|
|
|
|
weighted = (yhat_bin * weights).sum(axis=1) |
|
|
risk_pred = (weighted >= risk_thr).astype(int) |
|
|
|
|
|
return { |
|
|
"proba": proba, |
|
|
"yhat_bin": yhat_bin, |
|
|
"weighted_score": weighted, |
|
|
"risk_pred": risk_pred |
|
|
} |
|
|
|
|
|
if __name__ == "__main__": |
|
|
bundle_dir = "alz_sbert_weighted_export" |
|
|
emb, mdl, cfg = load_bundle(bundle_dir) |
|
|
sample = "Dün sabah erken kalktım, kahvaltıdan sonra markete gittim. Akşam ailemle buluştum." |
|
|
out = predict_weighted_risk(sample, emb, mdl, cfg) |
|
|
print("Risk tahmini:", int(out["risk_pred"].values[0])) |
|
|
print("Ağırlıklı skor:", float(out["weighted_score"].values[0])) |
|
|
print("Etiket 0/1:", out["yhat_bin"].to_dict(orient="list")) |
|
|
|