First upload
Browse files- config.json +30 -0
- predict_runtime.py +56 -0
- requirements.txt +5 -0
- xgb_anlam_belirsizligi.joblib +3 -0
- xgb_baglac_hatalari.joblib +3 -0
- xgb_cumle_uzunlugu.joblib +3 -0
- xgb_kelime_sikligi.joblib +3 -0
- xgb_sozcuk_seciminde_tutarsizlik.joblib +3 -0
- xgb_zamir_hatalari.joblib +3 -0
config.json
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"emb_model_name": "paraphrase-multilingual-MiniLM-L12-v2",
|
| 3 |
+
"text_col": "Metin",
|
| 4 |
+
"label_cols": [
|
| 5 |
+
"cumle_uzunlugu",
|
| 6 |
+
"kelime_sikligi",
|
| 7 |
+
"zamir_hatalari",
|
| 8 |
+
"baglac_hatalari",
|
| 9 |
+
"anlam_belirsizligi",
|
| 10 |
+
"sozcuk_seciminde_tutarsizlik"
|
| 11 |
+
],
|
| 12 |
+
"label_thresholds": {
|
| 13 |
+
"cumle_uzunlugu": 0.5,
|
| 14 |
+
"kelime_sikligi": 0.30000000000000004,
|
| 15 |
+
"zamir_hatalari": 0.65,
|
| 16 |
+
"baglac_hatalari": 0.30000000000000004,
|
| 17 |
+
"anlam_belirsizligi": 0.45000000000000007,
|
| 18 |
+
"sozcuk_seciminde_tutarsizlik": 0.55
|
| 19 |
+
},
|
| 20 |
+
"weights": {
|
| 21 |
+
"cumle_uzunlugu": 1.0,
|
| 22 |
+
"kelime_sikligi": 1.0,
|
| 23 |
+
"zamir_hatalari": 1.5,
|
| 24 |
+
"baglac_hatalari": 1.0,
|
| 25 |
+
"anlam_belirsizligi": 1.0,
|
| 26 |
+
"sozcuk_seciminde_tutarsizlik": 1.0
|
| 27 |
+
},
|
| 28 |
+
"risk_threshold": 1.5,
|
| 29 |
+
"min_pos_needed": 2
|
| 30 |
+
}
|
predict_runtime.py
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os, json, joblib
|
| 2 |
+
import numpy as np
|
| 3 |
+
import pandas as pd
|
| 4 |
+
from sentence_transformers import SentenceTransformer
|
| 5 |
+
|
| 6 |
+
def load_bundle(bundle_dir):
|
| 7 |
+
with open(os.path.join(bundle_dir, "config.json"), "r", encoding="utf-8") as f:
|
| 8 |
+
cfg = json.load(f)
|
| 9 |
+
emb_model = SentenceTransformer(cfg["emb_model_name"])
|
| 10 |
+
models = {}
|
| 11 |
+
for c in cfg["label_cols"]:
|
| 12 |
+
p = os.path.join(bundle_dir, f"xgb_{c}.joblib")
|
| 13 |
+
if os.path.exists(p):
|
| 14 |
+
models[c] = joblib.load(p)
|
| 15 |
+
return emb_model, models, cfg
|
| 16 |
+
|
| 17 |
+
def predict_weighted_risk(texts, emb_model, models, cfg):
|
| 18 |
+
if isinstance(texts, str):
|
| 19 |
+
texts = [texts]
|
| 20 |
+
X = emb_model.encode(list(texts), convert_to_numpy=True)
|
| 21 |
+
label_cols = cfg["label_cols"]
|
| 22 |
+
label_thresholds = cfg["label_thresholds"]
|
| 23 |
+
weights = pd.Series(cfg["weights"])
|
| 24 |
+
risk_thr = cfg["risk_threshold"]
|
| 25 |
+
|
| 26 |
+
# Proba
|
| 27 |
+
proba = {}
|
| 28 |
+
for c in label_cols:
|
| 29 |
+
if c in models:
|
| 30 |
+
proba[c] = models[c].predict_proba(X)[:,1]
|
| 31 |
+
else:
|
| 32 |
+
proba[c] = np.zeros(len(texts))
|
| 33 |
+
proba = pd.DataFrame(proba)[label_cols]
|
| 34 |
+
|
| 35 |
+
# 0/1 tahmin
|
| 36 |
+
yhat_bin = pd.DataFrame({c: (proba[c] >= label_thresholds.get(c, 0.5)).astype(int) for c in label_cols})
|
| 37 |
+
|
| 38 |
+
# Ağırlıklı skor + risk
|
| 39 |
+
weighted = (yhat_bin * weights).sum(axis=1)
|
| 40 |
+
risk_pred = (weighted >= risk_thr).astype(int)
|
| 41 |
+
|
| 42 |
+
return {
|
| 43 |
+
"proba": proba,
|
| 44 |
+
"yhat_bin": yhat_bin,
|
| 45 |
+
"weighted_score": weighted,
|
| 46 |
+
"risk_pred": risk_pred
|
| 47 |
+
}
|
| 48 |
+
|
| 49 |
+
if __name__ == "__main__":
|
| 50 |
+
bundle_dir = "alz_sbert_weighted_export"
|
| 51 |
+
emb, mdl, cfg = load_bundle(bundle_dir)
|
| 52 |
+
sample = "Dün sabah erken kalktım, kahvaltıdan sonra markete gittim. Akşam ailemle buluştum."
|
| 53 |
+
out = predict_weighted_risk(sample, emb, mdl, cfg)
|
| 54 |
+
print("Risk tahmini:", int(out["risk_pred"].values[0]))
|
| 55 |
+
print("Ağırlıklı skor:", float(out["weighted_score"].values[0]))
|
| 56 |
+
print("Etiket 0/1:", out["yhat_bin"].to_dict(orient="list"))
|
requirements.txt
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
sentence-transformers
|
| 2 |
+
xgboost
|
| 3 |
+
joblib
|
| 4 |
+
pandas
|
| 5 |
+
scikit-learn
|
xgb_anlam_belirsizligi.joblib
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:31ee6ad9a1685cec6a90039f7db6aafb52112362ffabcc5055fe97a470ab5904
|
| 3 |
+
size 630429
|
xgb_baglac_hatalari.joblib
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2d4ab3adfd0535d87aba530c56e3338b9fa16c24b65a173213259447616446ab
|
| 3 |
+
size 596767
|
xgb_cumle_uzunlugu.joblib
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4607a0a63200d4aaf8c45788e3f45e3c3f5fd1d92af4d2e424af31336bcc06bf
|
| 3 |
+
size 647293
|
xgb_kelime_sikligi.joblib
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:52efc12518c2255d25ef1cc66c117b47560d4e520d665c217354649933e48d37
|
| 3 |
+
size 639201
|
xgb_sozcuk_seciminde_tutarsizlik.joblib
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3441479049f1d8d991f9742b2694cd399baf5a4328aae1feab153dc0b3f901f0
|
| 3 |
+
size 652597
|
xgb_zamir_hatalari.joblib
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:19ef8d724c9b1a5f8f05722795f1dab37bf5204046f1ef17cc9b49b76e703792
|
| 3 |
+
size 621793
|