noranisa commited on
Commit
8efad72
·
verified ·
1 Parent(s): 0079a6c

Create training/cross_validation.py

Browse files
Files changed (1) hide show
  1. training/cross_validation.py +48 -0
training/cross_validation.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ from sklearn.model_selection import StratifiedKFold
3
+ from sklearn.metrics import accuracy_score
4
+ from transformers import pipeline
5
+
6
+ # pakai model yang sama dengan sistem
7
+ model = pipeline("sentiment-analysis",
8
+ model="w11wo/indonesian-roberta-base-sentiment-classifier")
9
+
10
+ def predict(texts):
11
+ outputs = model(texts)
12
+ preds = []
13
+ for o in outputs:
14
+ l = o['label'].lower()
15
+ if "positive" in l:
16
+ preds.append("positive")
17
+ elif "negative" in l:
18
+ preds.append("negative")
19
+ else:
20
+ preds.append("neutral")
21
+ return preds
22
+
23
+
24
+ def run_cv(path="data/eval_dataset.csv", k=5):
25
+ df = pd.read_csv(path)
26
+
27
+ X = df["text"]
28
+ y = df["label"]
29
+
30
+ skf = StratifiedKFold(n_splits=k, shuffle=True, random_state=42)
31
+
32
+ scores = []
33
+
34
+ for train_idx, test_idx in skf.split(X, y):
35
+ X_test = X.iloc[test_idx].tolist()
36
+ y_test = y.iloc[test_idx].tolist()
37
+
38
+ y_pred = predict(X_test)
39
+
40
+ acc = accuracy_score(y_test, y_pred)
41
+ scores.append(acc)
42
+
43
+ print("Cross-validation scores:", scores)
44
+ print("Mean accuracy:", sum(scores)/len(scores))
45
+
46
+
47
+ if __name__ == "__main__":
48
+ run_cv()