naile1 commited on
Commit
8530801
·
verified ·
1 Parent(s): 082b828

First upload

Browse files
config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "emb_model_name": "paraphrase-multilingual-MiniLM-L12-v2",
3
+ "text_col": "Metin",
4
+ "label_cols": [
5
+ "cumle_uzunlugu",
6
+ "kelime_sikligi",
7
+ "zamir_hatalari",
8
+ "baglac_hatalari",
9
+ "anlam_belirsizligi",
10
+ "sozcuk_seciminde_tutarsizlik"
11
+ ],
12
+ "label_thresholds": {
13
+ "cumle_uzunlugu": 0.5,
14
+ "kelime_sikligi": 0.30000000000000004,
15
+ "zamir_hatalari": 0.65,
16
+ "baglac_hatalari": 0.30000000000000004,
17
+ "anlam_belirsizligi": 0.45000000000000007,
18
+ "sozcuk_seciminde_tutarsizlik": 0.55
19
+ },
20
+ "weights": {
21
+ "cumle_uzunlugu": 1.0,
22
+ "kelime_sikligi": 1.0,
23
+ "zamir_hatalari": 1.5,
24
+ "baglac_hatalari": 1.0,
25
+ "anlam_belirsizligi": 1.0,
26
+ "sozcuk_seciminde_tutarsizlik": 1.0
27
+ },
28
+ "risk_threshold": 1.5,
29
+ "min_pos_needed": 2
30
+ }
predict_runtime.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, json, joblib
2
+ import numpy as np
3
+ import pandas as pd
4
+ from sentence_transformers import SentenceTransformer
5
+
6
+ def load_bundle(bundle_dir):
7
+ with open(os.path.join(bundle_dir, "config.json"), "r", encoding="utf-8") as f:
8
+ cfg = json.load(f)
9
+ emb_model = SentenceTransformer(cfg["emb_model_name"])
10
+ models = {}
11
+ for c in cfg["label_cols"]:
12
+ p = os.path.join(bundle_dir, f"xgb_{c}.joblib")
13
+ if os.path.exists(p):
14
+ models[c] = joblib.load(p)
15
+ return emb_model, models, cfg
16
+
17
+ def predict_weighted_risk(texts, emb_model, models, cfg):
18
+ if isinstance(texts, str):
19
+ texts = [texts]
20
+ X = emb_model.encode(list(texts), convert_to_numpy=True)
21
+ label_cols = cfg["label_cols"]
22
+ label_thresholds = cfg["label_thresholds"]
23
+ weights = pd.Series(cfg["weights"])
24
+ risk_thr = cfg["risk_threshold"]
25
+
26
+ # Proba
27
+ proba = {}
28
+ for c in label_cols:
29
+ if c in models:
30
+ proba[c] = models[c].predict_proba(X)[:,1]
31
+ else:
32
+ proba[c] = np.zeros(len(texts))
33
+ proba = pd.DataFrame(proba)[label_cols]
34
+
35
+ # 0/1 tahmin
36
+ yhat_bin = pd.DataFrame({c: (proba[c] >= label_thresholds.get(c, 0.5)).astype(int) for c in label_cols})
37
+
38
+ # Ağırlıklı skor + risk
39
+ weighted = (yhat_bin * weights).sum(axis=1)
40
+ risk_pred = (weighted >= risk_thr).astype(int)
41
+
42
+ return {
43
+ "proba": proba,
44
+ "yhat_bin": yhat_bin,
45
+ "weighted_score": weighted,
46
+ "risk_pred": risk_pred
47
+ }
48
+
49
+ if __name__ == "__main__":
50
+ bundle_dir = "alz_sbert_weighted_export"
51
+ emb, mdl, cfg = load_bundle(bundle_dir)
52
+ sample = "Dün sabah erken kalktım, kahvaltıdan sonra markete gittim. Akşam ailemle buluştum."
53
+ out = predict_weighted_risk(sample, emb, mdl, cfg)
54
+ print("Risk tahmini:", int(out["risk_pred"].values[0]))
55
+ print("Ağırlıklı skor:", float(out["weighted_score"].values[0]))
56
+ print("Etiket 0/1:", out["yhat_bin"].to_dict(orient="list"))
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ sentence-transformers
2
+ xgboost
3
+ joblib
4
+ pandas
5
+ scikit-learn
xgb_anlam_belirsizligi.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31ee6ad9a1685cec6a90039f7db6aafb52112362ffabcc5055fe97a470ab5904
3
+ size 630429
xgb_baglac_hatalari.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d4ab3adfd0535d87aba530c56e3338b9fa16c24b65a173213259447616446ab
3
+ size 596767
xgb_cumle_uzunlugu.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4607a0a63200d4aaf8c45788e3f45e3c3f5fd1d92af4d2e424af31336bcc06bf
3
+ size 647293
xgb_kelime_sikligi.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:52efc12518c2255d25ef1cc66c117b47560d4e520d665c217354649933e48d37
3
+ size 639201
xgb_sozcuk_seciminde_tutarsizlik.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3441479049f1d8d991f9742b2694cd399baf5a4328aae1feab153dc0b3f901f0
3
+ size 652597
xgb_zamir_hatalari.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:19ef8d724c9b1a5f8f05722795f1dab37bf5204046f1ef17cc9b49b76e703792
3
+ size 621793