import pandas as pd from sklearn.model_selection import StratifiedKFold from sklearn.metrics import accuracy_score from transformers import pipeline # pakai model yang sama dengan sistem model = pipeline("sentiment-analysis", model="w11wo/indonesian-roberta-base-sentiment-classifier") def predict(texts): outputs = model(texts) preds = [] for o in outputs: l = o['label'].lower() if "positive" in l: preds.append("positive") elif "negative" in l: preds.append("negative") else: preds.append("neutral") return preds def run_cv(path="data/eval_dataset.csv", k=5): df = pd.read_csv(path) X = df["text"] y = df["label"] skf = StratifiedKFold(n_splits=k, shuffle=True, random_state=42) scores = [] for train_idx, test_idx in skf.split(X, y): X_test = X.iloc[test_idx].tolist() y_test = y.iloc[test_idx].tolist() y_pred = predict(X_test) acc = accuracy_score(y_test, y_pred) scores.append(acc) print("Cross-validation scores:", scores) print("Mean accuracy:", sum(scores)/len(scores)) if __name__ == "__main__": run_cv()