Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| from sklearn.model_selection import StratifiedKFold | |
| from sklearn.metrics import accuracy_score | |
| from transformers import pipeline | |
| # pakai model yang sama dengan sistem | |
| model = pipeline("sentiment-analysis", | |
| model="w11wo/indonesian-roberta-base-sentiment-classifier") | |
| def predict(texts): | |
| outputs = model(texts) | |
| preds = [] | |
| for o in outputs: | |
| l = o['label'].lower() | |
| if "positive" in l: | |
| preds.append("positive") | |
| elif "negative" in l: | |
| preds.append("negative") | |
| else: | |
| preds.append("neutral") | |
| return preds | |
| def run_cv(path="data/eval_dataset.csv", k=5): | |
| df = pd.read_csv(path) | |
| X = df["text"] | |
| y = df["label"] | |
| skf = StratifiedKFold(n_splits=k, shuffle=True, random_state=42) | |
| scores = [] | |
| for train_idx, test_idx in skf.split(X, y): | |
| X_test = X.iloc[test_idx].tolist() | |
| y_test = y.iloc[test_idx].tolist() | |
| y_pred = predict(X_test) | |
| acc = accuracy_score(y_test, y_pred) | |
| scores.append(acc) | |
| print("Cross-validation scores:", scores) | |
| print("Mean accuracy:", sum(scores)/len(scores)) | |
| if __name__ == "__main__": | |
| run_cv() |