sentiment-fastapi / src /monitoring.py
LorenzoBioinfo
Test
598af1c
from transformers import AutoModelForSequenceClassification
from datasets import load_from_disk
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix
import torch
import json
import os
from src.train_model import train_model
from datasets import ClassLabel
ACCURACY_THRESHOLD = 0.75
MODEL_PATH = "models/sentiment_model"
TWEET_PATH = "data/processed/tweet_eval_tokenized"
YT_PATH = "data/processed/youtube_tokenized"
REPORTS_DIR = "reports"
def evaluate_model(model, dataset, dataset_name, sample_size=300):
print(f"Valutazione su {dataset_name}")
# Prendo il sottoinsieme dei dati
if "test" in dataset:
subset = dataset["test"].select(range(min(sample_size, len(dataset["test"]))))
else:
subset = dataset["train"].train_test_split(test_size=0.1)["test"]
input_ids = torch.tensor(subset["input_ids"])
attention_mask = torch.tensor(subset["attention_mask"])
labels = torch.tensor(subset["label"])
with torch.no_grad():
outputs = model(input_ids=input_ids, attention_mask=attention_mask)
preds = torch.argmax(outputs.logits, dim=1)
acc = accuracy_score(labels.numpy(), preds.numpy())
f1 = f1_score(labels.numpy(), preds.numpy(), average="weighted")
cm = confusion_matrix(labels.numpy(), preds.numpy()).tolist()
print(f"{dataset_name} — Accuracy: {acc:.3f}, F1: {f1:.3f}")
return {"dataset": dataset_name, "accuracy": acc, "f1": f1, "confusion_matrix": cm}
def retrain_on_youtube_sample():
youtube_data = load_from_disk(YT_PATH)["train"]
youtube_sample = youtube_data.shuffle(seed=42).select(range(500))
youtube_sample = youtube_sample.remove_columns(
[col for col in youtube_sample.column_names if col not in ["text", "label"]]
)
label_class = ClassLabel(names=["negative", "neutral", "positive"])
youtube_sample = youtube_sample.cast_column("label", label_class)
train_model(additional_data=youtube_sample, output_dir=MODEL_PATH)
def main():
print("Caricamento del modello")
if os.path.exists(MODEL_PATH):
model = AutoModelForSequenceClassification.from_pretrained(MODEL_PATH)
else:
print("Modello locale non trovato. Uso modello pre-addestrato di default.")
model = AutoModelForSequenceClassification.from_pretrained(
"cardiffnlp/twitter-roberta-base-sentiment-latest"
)
model.eval()
tweet_ds = load_from_disk(TWEET_PATH)
youtube_ds = load_from_disk(YT_PATH)
tweet_metrics = evaluate_model(model, tweet_ds, "TweetEval")
youtube_metrics = evaluate_model(model, youtube_ds, "YouTube Comments")
print(f"Accuracy su YouTube: {youtube_metrics['accuracy']:.3f}")
if youtube_metrics["accuracy"] < ACCURACY_THRESHOLD:
print("Performance sotto la soglia. Avvio retraining parziale...")
retrain_on_youtube_sample()
os.makedirs(REPORTS_DIR, exist_ok=True)
metrics_path = os.path.join(REPORTS_DIR, "metrics.json")
results = {"TweetEval": tweet_metrics, "YouTube": youtube_metrics}
with open(metrics_path, "w") as f:
json.dump(results, f, indent=4)
print(f"Risultati salvati in: {metrics_path}")
if __name__ == "__main__":
main()