LorenzoBioinfo commited on
Commit
26ff02c
·
1 Parent(s): 4e05a46

Add test first part

Browse files
tests/integration/test_app.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pytest
2
+ from fastapi.testclient import TestClient
3
+ from src.app import app
4
+
5
+ client = TestClient(app)
6
+
7
+ def test_home_page():
8
+ response = client.get("/")
9
+ assert response.status_code == 200
10
+ assert "Benvenuto" in response.text
11
+
12
+ def test_predict_endpoint_get():
13
+ response = client.get("/predict")
14
+ assert response.status_code == 200
15
+ assert "Testa il Modello" in response.text
16
+
17
+ def test_predict_endpoint_post():
18
+ response = client.post("/predict", data={"text": "I love this!"})
19
+ assert response.status_code == 200
20
+ assert any(label in response.text for label in ["positive", "neutral", "negative"])
21
+
22
+ @pytest.mark.asyncio
23
+ def test_random_tweet_page():
24
+ response = client.get("/random_tweet")
25
+ assert response.status_code == 200
26
+ assert "Sentiment" in response.text
27
+
28
+ @pytest.mark.asyncio
29
+ def test_random_youtube_page():
30
+ response = client.get("/random_youtube_comment")
31
+ assert response.status_code == 200
32
+ assert "Sentiment" in response.text
33
+
34
+
35
+
tests/unit/test_data.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # tests/test_data.py
2
+ import os
3
+ import subprocess
4
+ from datasets import load_from_disk
5
+
6
+ TWEET_PROCESSED_PATH = "data/processed/tweet_eval_tokenized"
7
+ YT_PROCESSED_PATH = "data/processed/youtube_comments"
8
+
9
+ def run_data_preparation(dataset_name):
10
+ """Esegue lo script di data preparation per il dataset richiesto."""
11
+ print(f"⚙️ Avvio data_preparation.py per il dataset: {dataset_name}")
12
+ subprocess.run(
13
+ ["python", "src/data_preparation.py", "--dataset", dataset_name],
14
+ check=True
15
+ )
16
+
17
+ def test_tweet_eval_dataset_exists_or_create():
18
+ """Controlla o crea il dataset Tweet Eval preprocessato."""
19
+ if not os.path.exists(TWEET_PROCESSED_PATH):
20
+ run_data_preparation("tweet_eval")
21
+ assert os.path.exists(TWEET_PROCESSED_PATH), "Tweet Eval non disponibile dopo la preparazione"
22
+
23
+ def test_youtube_dataset_exists_or_create():
24
+ """Controlla o crea il dataset YouTube preprocessato."""
25
+ if not os.path.exists(YT_PROCESSED_PATH):
26
+ run_data_preparation("youtube")
27
+ assert os.path.exists(YT_PROCESSED_PATH), "YouTube dataset non disponibile dopo la preparazione"
28
+
29
+ def test_tweet_eval_structure():
30
+ """Verifica che il dataset Tweet Eval abbia la struttura corretta."""
31
+ ds = load_from_disk(TWEET_PROCESSED_PATH)
32
+ assert "text" in ds["test"].features, "Campo 'text' mancante in Tweet Eval"
33
+ assert "label" in ds["test"].features, "Campo 'label' mancante in Tweet Eval"
34
+
35
+ def test_youtube_structure():
36
+ """Verifica che il dataset YouTube abbia la struttura corretta."""
37
+ ds = load_from_disk(YT_PROCESSED_PATH)
38
+ assert "CommentText" in ds["train"].features or "CommentText" in ds["train"].features, \
39
+ "Campo testuale mancante in YouTube dataset"
40
+ assert "Sentiment" in ds["train"].features, "Campo 'label' mancante in YouTube dataset"
tests/unit/test_model.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
2
+ import torch
3
+
4
+ MODEL_NAME = "cardiffnlp/twitter-roberta-base-sentiment-latest"
5
+ LABELS = ["negative", "neutral", "positive"]
6
+
7
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
8
+ model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)
9
+
10
+ def test_model_loads():
11
+ assert model is not None
12
+ assert tokenizer is not None
13
+
14
+ def test_model_prediction_shape():
15
+ text = "I love this product!"
16
+ inputs = tokenizer(text, return_tensors="pt")
17
+ with torch.no_grad():
18
+ outputs = model(**inputs)
19
+ assert outputs.logits.shape[-1] == len(LABELS)
20
+
21
+ def test_sentiment_confidence():
22
+ text = "I hate this"
23
+ inputs = tokenizer(text, return_tensors="pt")
24
+ with torch.no_grad():
25
+ probs = torch.nn.functional.softmax(model(**inputs).logits, dim=-1)
26
+ assert torch.isclose(probs.sum(), torch.tensor(1.0), atol=1e-3)