Spaces:
Sleeping
Sleeping
| import os | |
| from fastapi import FastAPI, Request, Form | |
| from pydantic import BaseModel | |
| from fastapi.responses import HTMLResponse | |
| from fastapi.templating import Jinja2Templates | |
| from transformers import AutoTokenizer, AutoModelForSequenceClassification | |
| from datasets import load_from_disk | |
| import torch | |
| import random | |
| import subprocess | |
| import json | |
| # Caricamento del modello e dei dati se già scaricati | |
| MODEL = "cardiffnlp/twitter-roberta-base-sentiment-latest" | |
| TWEET_PROCESSED_PATH = "data/processed/tweet_eval_tokenized" | |
| YT_PROCESSED_PATH = "data/processed/youtube_tokenized" | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL) | |
| model = AutoModelForSequenceClassification.from_pretrained(MODEL) | |
| labels = ["negative", "neutral", "positive"] | |
| # TWEET EVAL | |
| if not os.path.exists(TWEET_PROCESSED_PATH): | |
| print(f"Dataset Tweet Eval non trovato in {TWEET_PROCESSED_PATH}. Lo genero...") | |
| if not os.environ.get("SKIP_DATA_PREP"): | |
| if not os.path.exists(TWEET_PROCESSED_PATH): | |
| subprocess.run(["python", "src/data_preparation.py", "tweet_eval"], check=True) | |
| tweet_eval = load_from_disk(TWEET_PROCESSED_PATH) | |
| # YOUTUBE COMMENTS | |
| if not os.path.exists(YT_PROCESSED_PATH): | |
| print(f" Dataset YouTube non trovato in {YT_PROCESSED_PATH}. Lo genero...") | |
| if not os.environ.get("SKIP_DATA_PREP"): | |
| if not os.path.exists(YT_PROCESSED_PATH): | |
| subprocess.run(["python", "src/data_preparation.py", "youtube"], check=True) | |
| youtube_ds = load_from_disk(YT_PROCESSED_PATH) | |
| app = FastAPI(title="Sentiment Analysis API") | |
| templates = Jinja2Templates(directory="app_templates/") | |
| class TextInput(BaseModel): | |
| text: str | |
| def predict_sentiment(text: str): | |
| inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True) | |
| with torch.no_grad(): | |
| outputs = model(**inputs) | |
| probs = torch.nn.functional.softmax(outputs.logits, dim=-1) | |
| pred = torch.argmax(probs, dim=1).item() | |
| confidence = probs[0][pred].item() | |
| return {"label": labels[pred], "confidence": round(confidence, 3)} | |
| async def home(request: Request): | |
| return templates.TemplateResponse("index.html", {"request": request}) | |
| def random_tweet(request: Request): | |
| # sample = random.choice(tweet_eval["test"]) | |
| sample = tweet_eval["test"][random.randrange(len(tweet_eval["test"]))] | |
| text = ( | |
| sample["text"] | |
| if "text" in sample | |
| else tokenizer.decode(sample["input_ids"], skip_special_tokens=True) | |
| ) | |
| result = predict_sentiment(text) | |
| true_label = labels[sample["label"]] | |
| return templates.TemplateResponse( | |
| "random_tweet.html", | |
| {"request": request, "text": text, "true_label": true_label, "result": result}, | |
| ) | |
| def predict_page(request: Request): | |
| return templates.TemplateResponse("predict.html", {"request": request, "result": None}) | |
| def predict_text(request: Request, text: str = Form(...)): | |
| result = predict_sentiment(text) | |
| return templates.TemplateResponse( | |
| "predict.html", {"request": request, "text": text, "result": result} | |
| ) | |
| def random_youtube_comment(request: Request): | |
| sample = random.choice(youtube_ds["train"]) | |
| text = sample["text"] if "text" in sample else sample["text"] | |
| true_label = sample["label"] if "label" in sample else "N/A" | |
| if isinstance(true_label, int): | |
| label_map = {0: "negative", 1: "neutral", 2: "positive"} | |
| true_label = label_map.get(true_label, "N/A") | |
| result = predict_sentiment(text) | |
| return templates.TemplateResponse( | |
| "random_youtube.html", | |
| {"request": request, "text": text, "true_label": true_label, "result": result}, | |
| ) | |
| async def admin_dashboard(request: Request): | |
| """Pagina principale dell'area admin.""" | |
| metrics = None | |
| metrics_path = "reports/metrics.json" | |
| if os.path.exists(metrics_path): | |
| with open(metrics_path, "r") as f: | |
| metrics = json.load(f) | |
| return templates.TemplateResponse("admin.html", {"request": request, "metrics": metrics}) | |
| async def retrain_model(): | |
| """Lancia lo script di training.""" | |
| subprocess.run(["python", "src/train.py"], check=True) | |
| return {"status": "Training completato"} | |
| async def run_monitoring(): | |
| """Esegue il monitoring e aggiorna metrics.json.""" | |
| subprocess.run(["python", "src/monitoring.py"], check=True) | |
| return {"status": "Monitoring completato"} | |
| def view_metrics(request: Request): | |
| """Visualizza i risultati del monitoring in forma tabellare e grafica.""" | |
| metrics_path = "reports/metrics.json" | |
| metrics = None | |
| if os.path.exists(metrics_path): | |
| with open(metrics_path, "r") as f: | |
| metrics = json.load(f) | |
| return templates.TemplateResponse("metrics.html", {"request": request, "metrics": metrics}) | |
| if __name__ == "__main__": | |
| import uvicorn | |
| uvicorn.run(app, host="0.0.0.0", port=8000) | |