SignalMod / tests /test_vectorizer.py
Mirae Kang
feat: implement unit tests, #13
447c4a0
raw
history blame
1.05 kB
"""Tests del vectorizador TF-IDF."""
import pytest
from src.features.vectorizer import Vectorizer
# min_df=3 en configs/features.yaml: términos deben aparecer en ≥3 documentos
CORPUS_TRAIN = [
"the quick brown fox jumps",
"the lazy dog runs fast",
"the fox and dog play together",
"another quick fox story here",
]
CORPUS_TEST = ["the fox is quick today"]
@pytest.fixture(scope="module")
def vectorizer(features_config: str) -> Vectorizer:
return Vectorizer(config_path=features_config, method="tfidf")
def test_fit_transform_output_shape(vectorizer: Vectorizer):
matrix = vectorizer.fit_transform(CORPUS_TRAIN)
assert matrix.shape[0] == len(CORPUS_TRAIN)
assert matrix.shape[1] > 0
assert matrix.shape[1] <= 5000
def test_transform_preserves_sample_count(vectorizer: Vectorizer):
train_matrix = vectorizer.fit_transform(CORPUS_TRAIN)
test_matrix = vectorizer.transform(CORPUS_TEST)
assert test_matrix.shape[0] == len(CORPUS_TEST)
assert test_matrix.shape[1] == train_matrix.shape[1]