Spaces:
Running
Running
| """Tests for the baseline classifier and label encoding.""" | |
| import pandas as pd | |
| import pytest | |
| from sklearn.pipeline import Pipeline | |
| from src.models.baseline import build_pipeline | |
| from src.models.intent_classifier import LABEL2ID, ID2LABEL, INTENT_CATEGORIES | |
| def _make_dummy_df(n: int = 20) -> pd.DataFrame: | |
| import itertools | |
| labels = list(itertools.islice(itertools.cycle(sorted(INTENT_CATEGORIES)), n)) | |
| texts = [f"sample query number {i} for {labels[i]}" for i in range(n)] | |
| return pd.DataFrame({"text": texts, "label": labels}) | |
| def test_label_encoding_roundtrip(): | |
| for label in INTENT_CATEGORIES: | |
| idx = LABEL2ID[label] | |
| assert ID2LABEL[idx] == label | |
| def test_label_encoding_count(): | |
| assert len(LABEL2ID) == 6 | |
| assert len(ID2LABEL) == 6 | |
| def test_build_pipeline_returns_sklearn_pipeline(): | |
| pipeline = build_pipeline() | |
| assert isinstance(pipeline, Pipeline) | |
| def test_baseline_fit_predict(): | |
| df = _make_dummy_df(60) | |
| pipeline = build_pipeline(max_features=100, min_df=1) | |
| pipeline.fit(df["text"], df["label"]) | |
| preds = pipeline.predict(df["text"][:5]) | |
| assert len(preds) == 5 | |
| for pred in preds: | |
| assert pred in INTENT_CATEGORIES | |