Spaces:

devrup404
/

SignalMod

Running

App Files Files Community

Mirae Kang commited on 5 days ago

Commit

447c4a0

1 Parent(s): df89287

feat: implement unit tests, #13

Browse files

Files changed (6) hide show

tests/.gitkeep +0 -0
tests/conftest.py +40 -0
tests/test_api.py +60 -0
tests/test_model.py +45 -0
tests/test_preprocessor.py +37 -0
tests/test_vectorizer.py +36 -0

tests/.gitkeep DELETED Viewed

File without changes

tests/conftest.py ADDED Viewed

	@@ -0,0 +1,40 @@

+"""Fixtures compartidas para tests del proyecto."""
+import os
+import sys
+from pathlib import Path
+import pytest
+PROJECT_ROOT = Path(__file__).resolve().parents[1]
+if str(PROJECT_ROOT) not in sys.path:
+    sys.path.insert(0, str(PROJECT_ROOT))
+@pytest.fixture(scope="session", autouse=True)
+def _project_cwd():
+    """Los módulos y configs usan rutas relativas al root del repo."""
+    prev = os.getcwd()
+    os.chdir(PROJECT_ROOT)
+    yield
+    os.chdir(prev)
+@pytest.fixture(scope="session")
+def project_root() -> Path:
+    return PROJECT_ROOT
+@pytest.fixture(scope="session")
+def features_config(project_root: Path) -> str:
+    return str(project_root / "configs" / "features.yaml")
+@pytest.fixture(scope="session")
+def models_config(project_root: Path) -> str:
+    return str(project_root / "configs" / "models.yaml")
+@pytest.fixture(scope="session")
+def best_params_config(project_root: Path) -> str:
+    return str(project_root / "configs" / "best_params.yaml")

tests/test_api.py ADDED Viewed

	@@ -0,0 +1,60 @@

+"""Tests del endpoint POST /predict."""
+from unittest.mock import MagicMock
+import pytest
+from fastapi.testclient import TestClient
+from src.api import main as api_main
+PREDICT_RESPONSE_KEYS = {
+    "text",
+    "is_toxic",
+    "probability",
+    "labels",
+    "model_used",
+    "latency_ms",
+}
+@pytest.fixture
+def client():
+    mock_service = MagicMock()
+    mock_service.predict.return_value = {
+        "is_toxic": False,
+        "probability": 0.12,
+        "labels": [],
+        "model_used": "LR + TF-IDF (local)",
+    }
+    with TestClient(api_main.app) as test_client:
+        api_main._state["service"] = mock_service
+        api_main._state["model_name"] = "LR + TF-IDF (local)"
+        api_main._state["predictions_served"] = 0
+        yield test_client
+    api_main._state["service"] = None
+    api_main._state["model_name"] = None
+def test_predict_returns_correct_structure(client: TestClient):
+    response = client.post(
+        "/predict",
+        json={"text": "This is a sample comment", "threshold": 0.5},
+    )
+    assert response.status_code == 200
+    data = response.json()
+    assert PREDICT_RESPONSE_KEYS <= set(data.keys())
+    assert data["text"] == "This is a sample comment"
+    assert isinstance(data["is_toxic"], bool)
+    assert 0.0 <= data["probability"] <= 1.0
+    assert isinstance(data["labels"], list)
+    assert isinstance(data["model_used"], str)
+    assert isinstance(data["latency_ms"], (int, float))
+def test_predict_rejects_empty_text(client: TestClient):
+    response = client.post("/predict", json={"text": "   "})
+    assert response.status_code == 422

tests/test_model.py ADDED Viewed

	@@ -0,0 +1,45 @@

+"""Tests de salida binaria de modelos sklearn."""
+import numpy as np
+import pytest
+from src.models.baseline import build_model
+X_TRAIN = [
+    "the quick brown fox is nice",
+    "the lazy dog sleeps well",
+    "the fox and dog are friends",
+    "another calm peaceful day today",
+    "you are stupid and worthless idiot",
+    "kill them all right now attack",
+]
+Y_TRAIN = [0, 0, 0, 0, 1, 1]
+X_PRED = ["the fox is calm", "you idiot fool"]
+@pytest.fixture(scope="module")
+def trained_lr(models_config: str, features_config: str, best_params_config: str):
+    model = build_model(
+        "lr",
+        config_path=models_config,
+        feat_config_path=features_config,
+        best_params_path=best_params_config,
+    )
+    model.fit(X_TRAIN, Y_TRAIN)
+    return model
+def test_predict_binary_labels(trained_lr):
+    preds = trained_lr.predict(X_PRED)
+    assert preds.shape == (len(X_PRED),)
+    assert set(np.unique(preds)).issubset({0, 1})
+def test_predict_proba_valid_binary_distribution(trained_lr):
+    proba = trained_lr.predict_proba(X_PRED)
+    assert proba.shape == (len(X_PRED), 2)
+    assert np.all(proba >= 0.0)
+    assert np.all(proba <= 1.0)
+    np.testing.assert_allclose(proba.sum(axis=1), 1.0, rtol=1e-5)

tests/test_preprocessor.py ADDED Viewed

	@@ -0,0 +1,37 @@

+"""Tests del pipeline de preprocesamiento de texto."""
+import re
+import pytest
+from src.features.text_preprocessor import TextPreprocessor
+@pytest.fixture(scope="module")
+def preprocessor(features_config: str) -> TextPreprocessor:
+    return TextPreprocessor(config_path=features_config)
+def test_empty_text_returns_empty_string(preprocessor: TextPreprocessor):
+    assert preprocessor.transform("") == ""
+    assert preprocessor.transform("   ") == ""
+def test_url_text_removes_urls(preprocessor: TextPreprocessor):
+    raw = "Visit https://example.com/path and www.test.org now"
+    clean = preprocessor.transform(raw)
+    assert "http" not in clean
+    assert "www." not in clean
+    assert "example.com" not in clean
+    assert re.search(r"https?://", clean) is None
+def test_normal_text_lowercase_and_lemmatized(preprocessor: TextPreprocessor):
+    raw = "The runners are running quickly"
+    clean = preprocessor.transform(raw)
+    assert isinstance(clean, str)
+    assert clean == clean.lower()
+    assert clean != ""
+    assert "run" in clean.split()

tests/test_vectorizer.py ADDED Viewed

	@@ -0,0 +1,36 @@

+"""Tests del vectorizador TF-IDF."""
+import pytest
+from src.features.vectorizer import Vectorizer
+# min_df=3 en configs/features.yaml: términos deben aparecer en ≥3 documentos
+CORPUS_TRAIN = [
+    "the quick brown fox jumps",
+    "the lazy dog runs fast",
+    "the fox and dog play together",
+    "another quick fox story here",
+]
+CORPUS_TEST = ["the fox is quick today"]
+@pytest.fixture(scope="module")
+def vectorizer(features_config: str) -> Vectorizer:
+    return Vectorizer(config_path=features_config, method="tfidf")
+def test_fit_transform_output_shape(vectorizer: Vectorizer):
+    matrix = vectorizer.fit_transform(CORPUS_TRAIN)
+    assert matrix.shape[0] == len(CORPUS_TRAIN)
+    assert matrix.shape[1] > 0
+    assert matrix.shape[1] <= 5000
+def test_transform_preserves_sample_count(vectorizer: Vectorizer):
+    train_matrix = vectorizer.fit_transform(CORPUS_TRAIN)
+    test_matrix = vectorizer.transform(CORPUS_TEST)
+    assert test_matrix.shape[0] == len(CORPUS_TEST)
+    assert test_matrix.shape[1] == train_matrix.shape[1]