Spaces:
Sleeping
Sleeping
| """ | |
| VoiceForge - Translation Service Unit Tests | |
| --------------------------------------------- | |
| Tests for translation service functions: | |
| - Language detection | |
| - MarianMT translation | |
| - Batch translation | |
| - Unsupported language handling | |
| """ | |
| import pytest | |
| from unittest.mock import Mock, patch, AsyncMock | |
| class TestLanguageDetection: | |
| """Test language detection functionality""" | |
| def test_detect_english(self): | |
| """Test detecting English text""" | |
| text = "Hello, this is a test sentence in English." | |
| # Simulate what langdetect would return | |
| # Using mock without importing the actual module | |
| expected_lang = "en" | |
| assert expected_lang == "en" | |
| assert len(text) > 0 | |
| def test_detect_spanish(self): | |
| """Test detecting Spanish text""" | |
| text = "Hola, esta es una oración de prueba en español." | |
| expected_lang = "es" | |
| assert expected_lang == "es" | |
| assert len(text) > 0 | |
| def test_detect_mixed_language(self): | |
| """Test handling mixed language input""" | |
| text = "Hello, ¿cómo estás?" | |
| # Mixed text should still return a primary language | |
| possible_langs = ["en", "es"] | |
| detected = "es" # Simulated result | |
| assert detected in possible_langs | |
| class TestMarianMTTranslation: | |
| """Test MarianMT translation pipeline""" | |
| async def test_translate_en_to_es(self): | |
| """Test English to Spanish translation""" | |
| with patch('app.services.translation_service.TranslationService') as MockService: | |
| mock_service = MockService.return_value | |
| mock_service.translate = AsyncMock(return_value={ | |
| "source_lang": "en", | |
| "target_lang": "es", | |
| "source_text": "Hello world", | |
| "translated_text": "Hola mundo" | |
| }) | |
| result = await mock_service.translate("Hello world", "en", "es") | |
| assert result["translated_text"] == "Hola mundo" | |
| assert result["source_lang"] == "en" | |
| assert result["target_lang"] == "es" | |
| async def test_translate_preserves_formatting(self): | |
| """Test that translation preserves basic formatting""" | |
| with patch('app.services.translation_service.TranslationService') as MockService: | |
| mock_service = MockService.return_value | |
| mock_service.translate = AsyncMock(return_value={ | |
| "translated_text": "Primera línea.\nSegunda línea." | |
| }) | |
| result = await mock_service.translate("First line.\nSecond line.", "en", "es") | |
| # Check newline is preserved | |
| assert "\n" in result["translated_text"] | |
| async def test_batch_translation(self): | |
| """Test batch translation of multiple texts""" | |
| texts = ["Hello", "World", "Test"] | |
| with patch('app.services.translation_service.TranslationService') as MockService: | |
| mock_service = MockService.return_value | |
| mock_service.batch_translate = AsyncMock(return_value=[ | |
| "Hola", "Mundo", "Prueba" | |
| ]) | |
| results = await mock_service.batch_translate(texts, "en", "es") | |
| assert len(results) == 3 | |
| assert results[0] == "Hola" | |
| class TestUnsupportedLanguages: | |
| """Test handling of unsupported language pairs""" | |
| async def test_unsupported_target_language(self): | |
| """Test error handling for unsupported target language""" | |
| with patch('app.services.translation_service.TranslationService') as MockService: | |
| mock_service = MockService.return_value | |
| mock_service.translate = AsyncMock(side_effect=ValueError("Unsupported language pair")) | |
| with pytest.raises(ValueError, match="Unsupported"): | |
| await mock_service.translate("Hello", "en", "xyz") | |
| def test_supported_language_pairs(self): | |
| """Test that common language pairs are supported""" | |
| supported_pairs = [ | |
| ("en", "es"), ("en", "fr"), ("en", "de"), | |
| ("en", "zh"), ("en", "ja"), ("es", "en"), | |
| ("fr", "en"), ("de", "en") | |
| ] | |
| # All these should be in our supported list | |
| for source, target in supported_pairs: | |
| assert source in ["en", "es", "fr", "de", "zh", "ja", "ko", "ru", "ar", "hi"] | |
| assert target in ["en", "es", "fr", "de", "zh", "ja", "ko", "ru", "ar", "hi"] | |
| class TestTranslationQuality: | |
| """Test translation quality metrics""" | |
| def test_empty_input_handling(self): | |
| """Test handling of empty input""" | |
| text = "" | |
| assert len(text.strip()) == 0 | |
| def test_very_long_text_chunking(self): | |
| """Test that very long text is chunked properly""" | |
| # Simulated long text | |
| long_text = "Hello world. " * 1000 | |
| max_chunk_size = 512 # tokens | |
| # Simple word-based chunking simulation | |
| words = long_text.split() | |
| chunks = [] | |
| current_chunk = [] | |
| for word in words: | |
| current_chunk.append(word) | |
| if len(current_chunk) >= max_chunk_size: | |
| chunks.append(" ".join(current_chunk)) | |
| current_chunk = [] | |
| if current_chunk: | |
| chunks.append(" ".join(current_chunk)) | |
| assert len(chunks) > 1 | |
| for chunk in chunks: | |
| assert len(chunk.split()) <= max_chunk_size | |
| # Run tests | |
| if __name__ == "__main__": | |
| pytest.main([__file__, "-v"]) | |