Spaces:
Sleeping
Sleeping
LorenzoBioinfo commited on
Commit ·
26ff02c
1
Parent(s): 4e05a46
Add test first part
Browse files- tests/integration/test_app.py +35 -0
- tests/unit/test_data.py +40 -0
- tests/unit/test_model.py +26 -0
tests/integration/test_app.py
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pytest
|
| 2 |
+
from fastapi.testclient import TestClient
|
| 3 |
+
from src.app import app
|
| 4 |
+
|
| 5 |
+
client = TestClient(app)
|
| 6 |
+
|
| 7 |
+
def test_home_page():
|
| 8 |
+
response = client.get("/")
|
| 9 |
+
assert response.status_code == 200
|
| 10 |
+
assert "Benvenuto" in response.text
|
| 11 |
+
|
| 12 |
+
def test_predict_endpoint_get():
|
| 13 |
+
response = client.get("/predict")
|
| 14 |
+
assert response.status_code == 200
|
| 15 |
+
assert "Testa il Modello" in response.text
|
| 16 |
+
|
| 17 |
+
def test_predict_endpoint_post():
|
| 18 |
+
response = client.post("/predict", data={"text": "I love this!"})
|
| 19 |
+
assert response.status_code == 200
|
| 20 |
+
assert any(label in response.text for label in ["positive", "neutral", "negative"])
|
| 21 |
+
|
| 22 |
+
@pytest.mark.asyncio
|
| 23 |
+
def test_random_tweet_page():
|
| 24 |
+
response = client.get("/random_tweet")
|
| 25 |
+
assert response.status_code == 200
|
| 26 |
+
assert "Sentiment" in response.text
|
| 27 |
+
|
| 28 |
+
@pytest.mark.asyncio
|
| 29 |
+
def test_random_youtube_page():
|
| 30 |
+
response = client.get("/random_youtube_comment")
|
| 31 |
+
assert response.status_code == 200
|
| 32 |
+
assert "Sentiment" in response.text
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
|
tests/unit/test_data.py
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# tests/test_data.py
|
| 2 |
+
import os
|
| 3 |
+
import subprocess
|
| 4 |
+
from datasets import load_from_disk
|
| 5 |
+
|
| 6 |
+
TWEET_PROCESSED_PATH = "data/processed/tweet_eval_tokenized"
|
| 7 |
+
YT_PROCESSED_PATH = "data/processed/youtube_comments"
|
| 8 |
+
|
| 9 |
+
def run_data_preparation(dataset_name):
|
| 10 |
+
"""Esegue lo script di data preparation per il dataset richiesto."""
|
| 11 |
+
print(f"⚙️ Avvio data_preparation.py per il dataset: {dataset_name}")
|
| 12 |
+
subprocess.run(
|
| 13 |
+
["python", "src/data_preparation.py", "--dataset", dataset_name],
|
| 14 |
+
check=True
|
| 15 |
+
)
|
| 16 |
+
|
| 17 |
+
def test_tweet_eval_dataset_exists_or_create():
|
| 18 |
+
"""Controlla o crea il dataset Tweet Eval preprocessato."""
|
| 19 |
+
if not os.path.exists(TWEET_PROCESSED_PATH):
|
| 20 |
+
run_data_preparation("tweet_eval")
|
| 21 |
+
assert os.path.exists(TWEET_PROCESSED_PATH), "Tweet Eval non disponibile dopo la preparazione"
|
| 22 |
+
|
| 23 |
+
def test_youtube_dataset_exists_or_create():
|
| 24 |
+
"""Controlla o crea il dataset YouTube preprocessato."""
|
| 25 |
+
if not os.path.exists(YT_PROCESSED_PATH):
|
| 26 |
+
run_data_preparation("youtube")
|
| 27 |
+
assert os.path.exists(YT_PROCESSED_PATH), "YouTube dataset non disponibile dopo la preparazione"
|
| 28 |
+
|
| 29 |
+
def test_tweet_eval_structure():
|
| 30 |
+
"""Verifica che il dataset Tweet Eval abbia la struttura corretta."""
|
| 31 |
+
ds = load_from_disk(TWEET_PROCESSED_PATH)
|
| 32 |
+
assert "text" in ds["test"].features, "Campo 'text' mancante in Tweet Eval"
|
| 33 |
+
assert "label" in ds["test"].features, "Campo 'label' mancante in Tweet Eval"
|
| 34 |
+
|
| 35 |
+
def test_youtube_structure():
|
| 36 |
+
"""Verifica che il dataset YouTube abbia la struttura corretta."""
|
| 37 |
+
ds = load_from_disk(YT_PROCESSED_PATH)
|
| 38 |
+
assert "CommentText" in ds["train"].features or "CommentText" in ds["train"].features, \
|
| 39 |
+
"Campo testuale mancante in YouTube dataset"
|
| 40 |
+
assert "Sentiment" in ds["train"].features, "Campo 'label' mancante in YouTube dataset"
|
tests/unit/test_model.py
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
| 2 |
+
import torch
|
| 3 |
+
|
| 4 |
+
MODEL_NAME = "cardiffnlp/twitter-roberta-base-sentiment-latest"
|
| 5 |
+
LABELS = ["negative", "neutral", "positive"]
|
| 6 |
+
|
| 7 |
+
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
|
| 8 |
+
model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)
|
| 9 |
+
|
| 10 |
+
def test_model_loads():
|
| 11 |
+
assert model is not None
|
| 12 |
+
assert tokenizer is not None
|
| 13 |
+
|
| 14 |
+
def test_model_prediction_shape():
|
| 15 |
+
text = "I love this product!"
|
| 16 |
+
inputs = tokenizer(text, return_tensors="pt")
|
| 17 |
+
with torch.no_grad():
|
| 18 |
+
outputs = model(**inputs)
|
| 19 |
+
assert outputs.logits.shape[-1] == len(LABELS)
|
| 20 |
+
|
| 21 |
+
def test_sentiment_confidence():
|
| 22 |
+
text = "I hate this"
|
| 23 |
+
inputs = tokenizer(text, return_tensors="pt")
|
| 24 |
+
with torch.no_grad():
|
| 25 |
+
probs = torch.nn.functional.softmax(model(**inputs).logits, dim=-1)
|
| 26 |
+
assert torch.isclose(probs.sum(), torch.tensor(1.0), atol=1e-3)
|