Spaces:

Lordemarco
/

sentiment-fastapi

Sleeping

App Files Files Community

LorenzoBioinfo commited on Nov 1, 2025

Commit

66028cc

1 Parent(s): 1754526

Add app and first data

Browse files

Files changed (7) hide show

app_templates/__init__.py +0 -0
app_templates/index.html +16 -0
app_templates/predict.html +28 -0
app_templates/random_tweet.html +33 -0
app_templates/random_youtube.html +34 -0
src/app.py +132 -0
src/data_preparation.py +63 -0

app_templates/__init__.py ADDED Viewed

File without changes

app_templates/index.html ADDED Viewed

	@@ -0,0 +1,16 @@

+<!DOCTYPE html>
+<html lang="it">
+<head>
+    <meta charset="UTF-8">
+    <title>Sentiment Analysis App</title>
+    <script src="https://cdn.tailwindcss.com"></script>
+</head>
+<body class="bg-gradient-to-br from-blue-50 to-purple-100 flex flex-col items-center justify-center h-screen text-center">
+    <h1 class="text-4xl font-bold text-gray-800 mb-8">🎯 Benvenuto nella Sentiment Analysis App di MachineInnovators Inc.</h1>
+    <div class="flex flex-col gap-4">
+        <a href="/random_tweet" class="px-6 py-3 bg-blue-600 text-white rounded-xl hover:bg-blue-700 shadow-lg transition">🧪 Testa il modello su dati di training (Twitter)</a>
+        <a href="/predict" class="px-6 py-3 bg-green-600 text-white rounded-xl hover:bg-green-700 shadow-lg transition">🧠 Testa il modello con un tuo testo</a>
+        <a href="/random_youtube_comment" class="px-6 py-3 bg-purple-600 text-white rounded-xl hover:bg-purple-700 shadow-lg transition">🌍 Testa il modello su nuovi dati (YouTube)</a>
+    </div>
+</body>
+</html>

app_templates/predict.html ADDED Viewed

	@@ -0,0 +1,28 @@

+<!DOCTYPE html>
+<html lang="it">
+<head>
+    <meta charset="UTF-8">
+    <title>Predici il Sentiment</title>
+    <script src="https://cdn.tailwindcss.com"></script>
+</head>
+<body class="bg-gray-50 flex flex-col items-center justify-center min-h-screen">
+    <h2 class="text-3xl font-semibold text-gray-800 mb-6">🧠 Testa il Modello con un tuo testo</h2>
+    <form method="post" class="bg-white rounded-2xl shadow-md p-6 w-3/4 text-center">
+        <textarea name="text" rows="3" class="w-full border border-gray-300 rounded-lg p-3 focus:outline-none focus:ring-2 focus:ring-blue-400" placeholder="Scrivi qui il tuo testo...">{{ text if text else "" }}</textarea>
+        <button type="submit" class="mt-4 px-6 py-2 bg-blue-600 text-white rounded-lg hover:bg-blue-700 transition">Analizza Sentiment</button>
+    </form>
+    {% if result %}
+    <div class="mt-6 bg-white rounded-xl shadow p-4 w-3/4 text-center">
+        <p class="text-gray-700 text-lg mb-2">Risultato:</p>
+        <p class="text-2xl font-bold text-blue-600">{{ result.label }}</p>
+        <p class="text-sm text-gray-500 mt-1">Confidence: {{ result.confidence }}</p>
+    </div>
+    {% endif %}
+    <div class="mt-6">
+        <a href="/" class="text-blue-600 hover:underline">⬅️ Torna alla Home</a>
+    </div>
+</body>
+</html>

app_templates/random_tweet.html ADDED Viewed

	@@ -0,0 +1,33 @@

+<!DOCTYPE html>
+<html lang="it">
+<head>
+    <meta charset="UTF-8">
+    <title>Random Tweet</title>
+    <script src="https://cdn.tailwindcss.com"></script>
+</head>
+<body class="bg-gray-50 flex flex-col items-center justify-center min-h-screen">
+    <h2 class="text-3xl font-semibold text-gray-800 mb-4">🔀 Random Tweet Test</h2>
+    <div class="bg-white rounded-2xl shadow-md p-6 w-3/4 text-center">
+        <p class="text-lg text-gray-700 italic mb-4">"{{ text }}"</p>
+        <div class="grid grid-cols-2 gap-4 mt-4">
+            <div class="p-3 border rounded-xl">
+                <h3 class="text-gray-600 text-sm">🧩 Predizione del Modello</h3>
+                <p class="text-xl font-semibold text-blue-600">{{ result.label }}</p>
+                <p class="text-xs text-gray-500">Confidence: {{ result.confidence }}</p>
+            </div>
+            <div class="p-3 border rounded-xl">
+                <h3 class="text-gray-600 text-sm">🎯 Etichetta Reale</h3>
+                <p class="text-xl font-semibold text-green-600">{{ true_label }}</p>
+            </div>
+        </div>
+    </div>
+    <div class="mt-6 flex gap-4">
+        <a href="/random_tweet" class="px-5 py-2 bg-blue-600 text-white rounded-lg hover:bg-blue-700 transition">🔁 Altro Tweet</a>
+        <a href="/" class="px-5 py-2 bg-gray-300 text-gray-800 rounded-lg hover:bg-gray-400 transition">⬅️ Torna alla Home</a>
+    </div>
+</body>
+</html>

app_templates/random_youtube.html ADDED Viewed

	@@ -0,0 +1,34 @@

+<!DOCTYPE html>
+<html lang="it">
+<head>
+    <meta charset="UTF-8">
+    <title>Random YouTube Comment</title>
+    <script src="https://cdn.tailwindcss.com"></script>
+</head>
+<body class="bg-gray-50 flex flex-col items-center justify-center min-h-screen">
+    <h2 class="text-3xl font-semibold text-gray-800 mb-4">🌍 Test su Dati Nuovi (YouTube Comments)</h2>
+    <p class="text-gray-600 mb-6">Questa sezione testa il modello su dati reali non visti durante il training (generalizzazione).</p>
+    <div class="bg-white rounded-2xl shadow-md p-6 w-3/4 text-center">
+        <p class="text-lg text-gray-700 italic mb-4">"{{ text }}"</p>
+        <div class="grid grid-cols-2 gap-4 mt-4">
+            <div class="p-3 border rounded-xl">
+                <h3 class="text-gray-600 text-sm">🧩 Predizione del Modello</h3>
+                <p class="text-xl font-semibold text-blue-600">{{ result.label }}</p>
+                <p class="text-xs text-gray-500">Confidence: {{ result.confidence }}</p>
+            </div>
+            <div class="p-3 border rounded-xl">
+                <h3 class="text-gray-600 text-sm">🎯 Etichetta Reale</h3>
+                <p class="text-xl font-semibold text-green-600">{{ true_label }}</p>
+            </div>
+        </div>
+    </div>
+    <div class="mt-6 flex gap-4">
+        <a href="/random_youtube_comment" class="px-5 py-2 bg-blue-600 text-white rounded-lg hover:bg-blue-700 transition">🔁 Altro Commento</a>
+        <a href="/" class="px-5 py-2 bg-gray-300 text-gray-800 rounded-lg hover:bg-gray-400 transition">⬅️ Torna alla Home</a>
+    </div>
+</body>
+</html>

src/app.py ADDED Viewed

	@@ -0,0 +1,132 @@

+import os
+from fastapi import FastAPI, Request, Form
+from pydantic import BaseModel
+from fastapi.responses import HTMLResponse
+from fastapi.templating import Jinja2Templates
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+from datasets import load_dataset, load_from_disk
+import torch
+import random
+# Caricamento del modello e dei dati se già scaricati
+MODEL= "cardiffnlp/twitter-roberta-base-sentiment-latest"
+TWEET_PROCESSED_PATH = "data/processed/tweet_eval_tokenized"
+YT_PROCESSED_PATH = "data/processed/youtube_tokenized"
+tokenizer = AutoTokenizer.from_pretrained(MODEL)
+model = AutoModelForSequenceClassification.from_pretrained(MODEL)
+labels = ["negative", "neutral", "positive"]
+if not os.path.exists(TWEET_PROCESSED_PATH):
+    tweet_eval = load_dataset("tweet_eval", "sentiment")
+    raise FileNotFoundError(
+        f"Dati non trovati in {TWEET_PROCESSED_PATH}. "
+        "Esegui src/data_preparation.py per crearlo."
+    )
+tweet_eval = load_from_disk(TWEET_PROCESSED_PATH)
+if not os.path.exists(YT_PROCESSED_PATH):
+    youtube_ds = load_dataset("AmaanP314/youtube-comment-sentiment")
+    raise FileNotFoundError(
+        f"Dati non trovati in {YT_PROCESSED_PATH}. "
+        "Esegui src/data_preparation.py per crearlo."
+    )
+youtube_ds = load_from_disk(YT_PROCESSED_PATH)
+app = FastAPI(
+    title="Sentiment Analysis API",
+    description="Testa il modello RoBERTa di CardiffNLP su frasi personalizzate o su esempi random dal dataset TweetEval."
+)
+templates = Jinja2Templates(directory="app_templates/")
+class TextInput(BaseModel):
+    text: str
+def predict_sentiment(text: str):
+    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
+    with torch.no_grad():
+        outputs = model(**inputs)
+        probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
+        pred = torch.argmax(probs, dim=1).item()
+        confidence = probs[0][pred].item()
+    return {"label": labels[pred], "confidence": round(confidence, 3)}
+@app.get("/",response_class=HTMLResponse)
+async def home( request: Request):
+    #return "Ciao Mondo!"
+    #return {"message": "Benvenuto nell'App di MachineInnovators Inc. per la sentiment analysis. Usa /predict o /random_tweet."}
+    return templates.TemplateResponse("index.html", {"request": request})
+@app.get("/random_tweet", response_class=HTMLResponse)
+def random_tweet(request: Request):
+    sample = random.choice(tweet_eval["test"])
+    text = sample["text"] if "text" in sample else tokenizer.decode(sample["input_ids"], skip_special_tokens=True)
+    result = predict_sentiment(text)
+    true_label=labels[sample["label"]]
+    return templates.TemplateResponse(
+        "random_tweet.html",
+        {
+            "request": request,
+            "text": text,
+            "true_label": true_label,
+            "result": result
+        }
+    )
+@app.get("/predict", response_class=HTMLResponse)
+def predict_page(request: Request):
+    return templates.TemplateResponse("predict.html", {"request": request, "result": None})
+@app.post("/predict", response_class=HTMLResponse)
+def predict_text(request: Request, text: str = Form(...)):
+    result = predict_sentiment(text)
+    return templates.TemplateResponse(
+        "predict.html",
+        {"request": request, "text": text, "result": result}
+    )
+@app.get("/random_youtube_comment", response_class=HTMLResponse)
+def random_youtube_comment(request: Request):
+    sample = random.choice(youtube_ds["train"])
+    text = sample["text"] if "text" in sample else sample["text"]
+    true_label = sample["label"] if "label" in sample else "N/A"
+    if isinstance(true_label, int):
+        label_map = {0: "negative", 1: "neutral", 2: "positive"}
+        true_label = label_map.get(true_label, "N/A")
+    result = predict_sentiment(text)
+    return templates.TemplateResponse(
+        "random_youtube.html",
+        {
+            "request": request,
+            "text": text,
+            "true_label": true_label,
+            "result": result
+        }
+    )
+if __name__=="__main__":
+    import uvicorn
+    uvicorn.run(app,host="0.0.0.0",port=8000)

src/data_preparation.py ADDED Viewed

	@@ -0,0 +1,63 @@

+from datasets import load_dataset
+from transformers import AutoTokenizer
+import re
+import os
+MODEL_NAME = "cardiffnlp/twitter-roberta-base-sentiment-latest"
+PROCESSED_DIR = "data/processed/"
+if not os.path.exists(PROCESSED_DIR):
+    os.makedirs(PROCESSED_DIR, exist_ok=True)
+### Funzioni di supporto
+def clean_text(text):
+    """Pulisce il testo da URL, menzioni, hashtag, simboli HTM"""
+    text = re.sub(r"http\S+", "", text)
+    text = re.sub(r"@\w+", "", text)
+    text = re.sub(r"#\w+", "", text)
+    text = re.sub(r"&[a-z]+;", "", text)
+    text = re.sub(r"\s+", " ", text)
+    return text.strip()
+def map_label(label):
+    """
+    Mappa le etichette di sentiment a numeri.
+    - 0: negativo
+    - 1: neutro
+    - 2: positivo
+    """
+    mapping = {"negative": 0, "neutral": 1, "positive": 2}
+    if isinstance(label, str):
+        return mapping.get(label.lower(), 1)
+    return label
+# Download tweet_eval
+tweet_eval = load_dataset("tweet_eval", "sentiment")
+# Download youtub comment dataset
+youtube = load_dataset("AmaanP314/youtube-comment-sentiment")
+tweet_eval = tweet_eval.map(lambda x: {"text": clean_text(x["text"])})
+youtube = youtube.map(lambda x: {"text": clean_text(x["CommentText"])})
+youtube = youtube.map(lambda x: {"label": map_label(x["Sentiment"])})
+tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+def tokenize_function(examples):
+    return tokenizer(
+        examples["text"],
+        truncation=True,
+        padding="max_length",
+        max_length=128,
+    )
+tweet_tokenized = tweet_eval.map(tokenize_function, batched=True)
+youtube_tokenized = youtube.map(tokenize_function, batched=True)
+tweet_tokenized.save_to_disk(os.path.join(PROCESSED_DIR, "tweet_eval_tokenized"))
+youtube_tokenized.save_to_disk(os.path.join(PROCESSED_DIR, "youtube_tokenized"))