Spaces:
Sleeping
Sleeping
| from transformers import AutoModelForSequenceClassification | |
| from datasets import load_from_disk | |
| from sklearn.metrics import accuracy_score, f1_score, confusion_matrix | |
| import torch | |
| import json | |
| import os | |
| from src.train_model import train_model | |
| from datasets import ClassLabel | |
| ACCURACY_THRESHOLD = 0.75 | |
| MODEL_PATH = "models/sentiment_model" | |
| TWEET_PATH = "data/processed/tweet_eval_tokenized" | |
| YT_PATH = "data/processed/youtube_tokenized" | |
| REPORTS_DIR = "reports" | |
| def evaluate_model(model, dataset, dataset_name, sample_size=300): | |
| print(f"Valutazione su {dataset_name}") | |
| # Prendo il sottoinsieme dei dati | |
| if "test" in dataset: | |
| subset = dataset["test"].select(range(min(sample_size, len(dataset["test"])))) | |
| else: | |
| subset = dataset["train"].train_test_split(test_size=0.1)["test"] | |
| input_ids = torch.tensor(subset["input_ids"]) | |
| attention_mask = torch.tensor(subset["attention_mask"]) | |
| labels = torch.tensor(subset["label"]) | |
| with torch.no_grad(): | |
| outputs = model(input_ids=input_ids, attention_mask=attention_mask) | |
| preds = torch.argmax(outputs.logits, dim=1) | |
| acc = accuracy_score(labels.numpy(), preds.numpy()) | |
| f1 = f1_score(labels.numpy(), preds.numpy(), average="weighted") | |
| cm = confusion_matrix(labels.numpy(), preds.numpy()).tolist() | |
| print(f"{dataset_name} — Accuracy: {acc:.3f}, F1: {f1:.3f}") | |
| return {"dataset": dataset_name, "accuracy": acc, "f1": f1, "confusion_matrix": cm} | |
| def retrain_on_youtube_sample(): | |
| youtube_data = load_from_disk(YT_PATH)["train"] | |
| youtube_sample = youtube_data.shuffle(seed=42).select(range(500)) | |
| youtube_sample = youtube_sample.remove_columns( | |
| [col for col in youtube_sample.column_names if col not in ["text", "label"]] | |
| ) | |
| label_class = ClassLabel(names=["negative", "neutral", "positive"]) | |
| youtube_sample = youtube_sample.cast_column("label", label_class) | |
| train_model(additional_data=youtube_sample, output_dir=MODEL_PATH) | |
| def main(): | |
| print("Caricamento del modello") | |
| if os.path.exists(MODEL_PATH): | |
| model = AutoModelForSequenceClassification.from_pretrained(MODEL_PATH) | |
| else: | |
| print("Modello locale non trovato. Uso modello pre-addestrato di default.") | |
| model = AutoModelForSequenceClassification.from_pretrained( | |
| "cardiffnlp/twitter-roberta-base-sentiment-latest" | |
| ) | |
| model.eval() | |
| tweet_ds = load_from_disk(TWEET_PATH) | |
| youtube_ds = load_from_disk(YT_PATH) | |
| tweet_metrics = evaluate_model(model, tweet_ds, "TweetEval") | |
| youtube_metrics = evaluate_model(model, youtube_ds, "YouTube Comments") | |
| print(f"Accuracy su YouTube: {youtube_metrics['accuracy']:.3f}") | |
| if youtube_metrics["accuracy"] < ACCURACY_THRESHOLD: | |
| print("Performance sotto la soglia. Avvio retraining parziale...") | |
| retrain_on_youtube_sample() | |
| os.makedirs(REPORTS_DIR, exist_ok=True) | |
| metrics_path = os.path.join(REPORTS_DIR, "metrics.json") | |
| results = {"TweetEval": tweet_metrics, "YouTube": youtube_metrics} | |
| with open(metrics_path, "w") as f: | |
| json.dump(results, f, indent=4) | |
| print(f"Risultati salvati in: {metrics_path}") | |
| if __name__ == "__main__": | |
| main() | |