Spaces:
Running
Running
| """Tests for text preprocessing utilities.""" | |
| import pytest | |
| from src.data.preprocessing import clean_text, clean_texts | |
| def test_clean_text_lowercase(): | |
| assert clean_text("Hello WORLD") == "hello world" | |
| def test_clean_text_whitespace(): | |
| assert clean_text(" too many spaces ") == "too many spaces" | |
| def test_clean_text_non_ascii(): | |
| assert clean_text("caf\u00e9 latte") == "caf latte" | |
| def test_clean_text_preserves_punctuation(): | |
| result = clean_text("I can't log in!") | |
| assert "'" in result | |
| assert "!" in result | |
| def test_clean_text_non_string(): | |
| assert isinstance(clean_text(42), str) | |
| def test_clean_texts_batch(): | |
| texts = ["Hello World", " extra spaces "] | |
| results = clean_texts(texts) | |
| assert results[0] == "hello world" | |
| assert results[1] == "extra spaces" | |