Spaces:
Running
Running
| import numpy as np | |
| import pandas as pd | |
| import torch | |
| import matplotlib.pyplot as plt | |
| import seaborn as sns | |
| from transformers import AutoTokenizer, AutoModelForSequenceClassification | |
| from sklearn.metrics import ( | |
| accuracy_score, f1_score, | |
| classification_report, confusion_matrix, | |
| ) | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # CONFIG | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| MODEL_DIR = "models/finbert-finetuned" | |
| TEST_CSV = "data/test_set.csv" | |
| ID2LABEL = {0: "negative", 1: "neutral", 2: "positive"} | |
| LABEL2ID = {"negative": 0, "neutral": 1, "positive": 2} | |
| MAX_LENGTH = 128 | |
| BATCH_SIZE = 32 | |
| COLORS = {"negative": "#e74c3c", "neutral": "#95a5a6", "positive": "#2ecc71"} | |
| DEVICE = ( | |
| "cuda" if torch.cuda.is_available() | |
| else "mps" if torch.backends.mps.is_available() | |
| else "cpu" | |
| ) | |
| print(f"Cihaz: {DEVICE}") | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # 1. MODEL & TOKENΔ°ZER YΓKLE | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| print("Model yΓΌkleniyor...") | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_DIR) | |
| model = AutoModelForSequenceClassification.from_pretrained(MODEL_DIR).to(DEVICE) | |
| model.eval() | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # 2. TAHMΔ°N FONKSΔ°YONU | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def predict(texts: list[str]) -> tuple[np.ndarray, np.ndarray]: | |
| """Batch tahmin β logits ve predicted label id'leri dΓΆner.""" | |
| all_preds, all_probs = [], [] | |
| for i in range(0, len(texts), BATCH_SIZE): | |
| batch = texts[i : i + BATCH_SIZE] | |
| enc = tokenizer( | |
| batch, | |
| padding=True, | |
| truncation=True, | |
| max_length=MAX_LENGTH, | |
| return_tensors="pt", | |
| ).to(DEVICE) | |
| with torch.no_grad(): | |
| logits = model(**enc).logits | |
| probs = torch.softmax(logits, dim=-1).cpu().numpy() | |
| preds = np.argmax(probs, axis=-1) | |
| all_preds.append(preds) | |
| all_probs.append(probs) | |
| return np.concatenate(all_preds), np.vstack(all_probs) | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # 3. TEST SETΔ° TAHMΔ°NLERΔ° | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| df = pd.read_csv(TEST_CSV) | |
| df["label"] = df["label_str"].map(LABEL2ID) | |
| print(f"Test seti: {len(df)} ΓΆrnek") | |
| preds, probs = predict(df["sentence"].tolist()) | |
| df["pred"] = preds | |
| df["pred_str"] = df["pred"].map(ID2LABEL) | |
| df["confidence"] = probs.max(axis=1) | |
| df["correct"] = df["label"] == df["pred"] | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # 4. METRΔ°KLER | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| acc = accuracy_score(df["label"], df["pred"]) | |
| f1_macro = f1_score(df["label"], df["pred"], average="macro") | |
| f1_weighted= f1_score(df["label"], df["pred"], average="weighted") | |
| print("\n" + "="*55) | |
| print(" TEST METRΔ°KLERΔ°") | |
| print("="*55) | |
| print(f" Accuracy : {acc:.4f}") | |
| print(f" F1 Macro : {f1_macro:.4f}") | |
| print(f" F1 Weighted : {f1_weighted:.4f}") | |
| print("\n--- Classification Report ---") | |
| print(classification_report( | |
| df["label"], df["pred"], | |
| target_names=["negative", "neutral", "positive"] | |
| )) | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # 5. GΓRSELLEΕTΔ°RME | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| fig, axes = plt.subplots(1, 3, figsize=(16, 5)) | |
| fig.suptitle("Model DeΔerlendirme β Test Seti", fontweight="bold") | |
| # β 5a. Confusion Matrix β | |
| cm = confusion_matrix(df["label"], df["pred"]) | |
| labels = ["negative", "neutral", "positive"] | |
| sns.heatmap( | |
| cm, annot=True, fmt="d", cmap="Blues", | |
| xticklabels=labels, yticklabels=labels, | |
| ax=axes[0], cbar=False, | |
| annot_kws={"size": 13, "weight": "bold"}, | |
| ) | |
| axes[0].set_title("Confusion Matrix") | |
| axes[0].set_ylabel("GerΓ§ek") | |
| axes[0].set_xlabel("Tahmin") | |
| # β 5b. Confidence daΔΔ±lΔ±mΔ± (doΔru vs. yanlΔ±Ε) β | |
| ax = axes[1] | |
| for correct, label, color in [(True, "DoΔru", "#2ecc71"), (False, "YanlΔ±Ε", "#e74c3c")]: | |
| subset = df[df["correct"] == correct]["confidence"] | |
| ax.hist(subset, bins=20, alpha=0.7, color=color, label=f"{label} ({len(subset)})") | |
| ax.set_title("Tahmin GΓΌven Skoru") | |
| ax.set_xlabel("Confidence (softmax max)") | |
| ax.set_ylabel("Frekans") | |
| ax.legend() | |
| ax.spines[["top", "right"]].set_visible(False) | |
| # β 5c. SΔ±nΔ±f bazΔ±nda F1 β | |
| ax = axes[2] | |
| report = classification_report( | |
| df["label"], df["pred"], | |
| target_names=labels, output_dict=True | |
| ) | |
| f1s = [report[l]["f1-score"] for l in labels] | |
| bars = ax.bar(labels, f1s, color=[COLORS[l] for l in labels], edgecolor="white") | |
| for bar, val in zip(bars, f1s): | |
| ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.01, | |
| f"{val:.3f}", ha="center", fontweight="bold") | |
| ax.set_title("SΔ±nΔ±f BazΔ±nda F1 Skoru") | |
| ax.set_ylim(0, 1.15) | |
| ax.set_ylabel("F1 Score") | |
| ax.axhline(y=f1_macro, color="gray", linestyle="--", alpha=0.7, label=f"Macro avg: {f1_macro:.3f}") | |
| ax.legend() | |
| ax.spines[["top", "right"]].set_visible(False) | |
| plt.tight_layout() | |
| plt.savefig("data/evaluation_plots.png", bbox_inches="tight") | |
| print("Grafik kaydedildi: data/evaluation_plots.png") | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # 6. HATA ANALΔ°ZΔ° β modelin yanΔ±ldΔ±ΔΔ± ΓΆrnekler | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| errors = df[~df["correct"]].sort_values("confidence", ascending=False) | |
| print(f"\n{'='*55}") | |
| print(f" HATA ANALΔ°ZΔ° β {len(errors)} yanlΔ±Ε tahmin") | |
| print(f"{'='*55}") | |
| if len(errors) > 0: | |
| print(f"\nEn gΓΌvenli yanlΔ±Ε tahminler (yΓΌksek confidence ama yanlΔ±Ε):") | |
| for _, row in errors.head(5).iterrows(): | |
| print(f"\n CΓΌmle : {row['sentence'][:100]}...") | |
| print(f" GerΓ§ek : {row['label_str']:<10} Tahmin: {row['pred_str']:<10} Conf: {row['confidence']:.3f}") | |
| else: | |
| print("Hata yok β mΓΌkemmel test performansΔ±!") | |
| # HatalarΔ± kaydet | |
| errors[["sentence","label_str","pred_str","confidence"]].to_csv( | |
| "data/errors.csv", index=False | |
| ) | |
| print(f"\nHatalar kaydedildi: data/errors.csv") | |