Spaces:
Running
Running
| """ | |
| tests/test_phase3.py | |
| ==================== | |
| Phase 3 — ASR & Voice Input Tests | |
| Tests: | |
| - QueryPreprocessor: normalization, filler removal, intent classification, | |
| language detection, edge cases | |
| - WhisperTranscriber: VAD check, model loading (mocked), result schema, | |
| fallback behavior | |
| Whisper model tests use mocks — actual GPU transcription is tested | |
| via manual/integration testing on HuggingFace Spaces. | |
| Run with: pytest tests/test_phase3.py -v | |
| """ | |
| from __future__ import annotations | |
| import io | |
| import struct | |
| import wave | |
| from pathlib import Path | |
| from unittest.mock import MagicMock, patch | |
| import numpy as np | |
| import pytest | |
| from voicevault.asr.query_preprocessor import PreprocessedQuery, QueryPreprocessor | |
| from voicevault.models import TranscriptResult | |
| # ------------------------------------------------------------------ # | |
| # QueryPreprocessor Tests # | |
| # ------------------------------------------------------------------ # | |
| class TestQueryPreprocessorNormalization: | |
| """Test transcript cleaning and normalization.""" | |
| def setup_method(self) -> None: | |
| self.proc = QueryPreprocessor() | |
| def test_lowercases_input(self) -> None: | |
| result = self.proc.process("What IS Machine Learning?") | |
| assert result.processed_query == result.processed_query.lower() | |
| def test_removes_leading_trailing_whitespace(self) -> None: | |
| result = self.proc.process(" what is AI ") | |
| assert result.processed_query == result.processed_query.strip() | |
| def test_collapses_multiple_spaces(self) -> None: | |
| result = self.proc.process("what is machine learning") | |
| assert " " not in result.processed_query | |
| def test_removes_um_filler(self) -> None: | |
| result = self.proc.process("um what is machine learning") | |
| assert "um" not in result.processed_query.split() | |
| def test_removes_uh_filler(self) -> None: | |
| result = self.proc.process("uh can you explain neural networks") | |
| assert "uh" not in result.processed_query.split() | |
| def test_removes_like_filler(self) -> None: | |
| result = self.proc.process("what is like machine learning") | |
| assert result.processed_query.count("like") == 0 | |
| def test_removes_you_know_filler(self) -> None: | |
| result = self.proc.process("you know what is deep learning") | |
| assert "you know" not in result.processed_query | |
| def test_removes_multiple_fillers(self) -> None: | |
| raw = "um uh so what is like you know machine learning" | |
| result = self.proc.process(raw) | |
| for filler in ["um", "uh", "so", "like"]: | |
| assert filler not in result.processed_query.split(), f"Filler '{filler}' not removed" | |
| def test_preserves_non_filler_words(self) -> None: | |
| result = self.proc.process("what is machine learning") | |
| assert "machine" in result.processed_query | |
| assert "learning" in result.processed_query | |
| def test_empty_string_handled(self) -> None: | |
| result = self.proc.process("") | |
| assert result.processed_query == "" | |
| assert result.query_type == "factual" | |
| assert result.language == "en" | |
| def test_whitespace_only_handled(self) -> None: | |
| result = self.proc.process(" ") | |
| assert result.processed_query == "" | |
| def test_raw_query_preserved(self) -> None: | |
| raw = "Um WHAT is Machine Learning?" | |
| result = self.proc.process(raw) | |
| assert result.raw_query == raw | |
| class TestQueryPreprocessorIntentClassification: | |
| """Test query type classification.""" | |
| def setup_method(self) -> None: | |
| self.proc = QueryPreprocessor() | |
| def test_what_is_classified_factual(self) -> None: | |
| result = self.proc.process("what is transformer architecture") | |
| assert result.query_type == "factual" | |
| def test_what_are_classified_factual(self) -> None: | |
| result = self.proc.process("what are the main types of machine learning") | |
| assert result.query_type == "factual" | |
| def test_who_classified_factual(self) -> None: | |
| result = self.proc.process("who invented the transformer model") | |
| assert result.query_type == "factual" | |
| def test_when_classified_factual(self) -> None: | |
| result = self.proc.process("when was GPT-4 released") | |
| assert result.query_type == "factual" | |
| def test_where_classified_factual(self) -> None: | |
| result = self.proc.process("where was OpenAI founded") | |
| assert result.query_type == "factual" | |
| def test_summarize_classified_summary(self) -> None: | |
| result = self.proc.process("summarize the research paper") | |
| assert result.query_type == "summary" | |
| def test_summarise_british_spelling(self) -> None: | |
| result = self.proc.process("summarise the document") | |
| assert result.query_type == "summary" | |
| def test_give_overview_classified_summary(self) -> None: | |
| result = self.proc.process("give me an overview of the project") | |
| assert result.query_type == "summary" | |
| def test_explain_classified_summary(self) -> None: | |
| result = self.proc.process("explain how neural networks work") | |
| assert result.query_type == "summary" | |
| def test_describe_classified_summary(self) -> None: | |
| result = self.proc.process("describe the methodology used") | |
| assert result.query_type == "summary" | |
| def test_compare_classified_compare(self) -> None: | |
| result = self.proc.process("compare BERT and GPT") | |
| assert result.query_type == "compare" | |
| def test_difference_classified_compare(self) -> None: | |
| result = self.proc.process("what is the difference between RNN and LSTM") | |
| assert result.query_type == "compare" | |
| def test_versus_classified_compare(self) -> None: | |
| result = self.proc.process("supervised versus unsupervised learning") | |
| assert result.query_type == "compare" | |
| def test_vs_classified_compare(self) -> None: | |
| result = self.proc.process("precision vs recall tradeoff") | |
| assert result.query_type == "compare" | |
| def test_compare_takes_priority_over_summary(self) -> None: | |
| result = self.proc.process("summarize and compare the two approaches") | |
| assert result.query_type == "compare" | |
| def test_unknown_query_defaults_to_factual(self) -> None: | |
| result = self.proc.process("machine learning accuracy results") | |
| assert result.query_type == "factual" | |
| class TestQueryPreprocessorLanguageDetection: | |
| """Test language detection.""" | |
| def setup_method(self) -> None: | |
| self.proc = QueryPreprocessor() | |
| def test_english_detected(self) -> None: | |
| result = self.proc.process("what is machine learning and artificial intelligence") | |
| assert result.language == "en" | |
| def test_short_query_defaults_to_english(self) -> None: | |
| result = self.proc.process("AI") | |
| assert result.language == "en" | |
| def test_returns_string(self) -> None: | |
| result = self.proc.process("what is the accuracy of this model") | |
| assert isinstance(result.language, str) | |
| assert len(result.language) >= 2 | |
| # ------------------------------------------------------------------ # | |
| # WhisperTranscriber Tests (Mocked) # | |
| # ------------------------------------------------------------------ # | |
| def _make_wav_file(tmp_path: Path, duration_s: float = 2.0, rms_amplitude: float = 0.1) -> Path: | |
| """Create a WAV file with a sine wave for testing.""" | |
| sample_rate = 16000 | |
| n_samples = int(duration_s * sample_rate) | |
| t = np.linspace(0, duration_s, n_samples) | |
| audio = (rms_amplitude * np.sin(2 * np.pi * 440 * t)).astype(np.float32) | |
| wav_path = tmp_path / "test_audio.wav" | |
| with wave.open(str(wav_path), "w") as wf: | |
| wf.setnchannels(1) | |
| wf.setsampwidth(2) | |
| wf.setframerate(sample_rate) | |
| pcm = (audio * 32767).astype(np.int16) | |
| wf.writeframes(pcm.tobytes()) | |
| return wav_path | |
| def _make_silent_wav(tmp_path: Path, duration_s: float = 2.0) -> Path: | |
| """Create a silent WAV file for VAD testing.""" | |
| return _make_wav_file(tmp_path, duration_s=duration_s, rms_amplitude=0.0) | |
| class TestWhisperTranscriberVAD: | |
| """Tests for Voice Activity Detection (no Whisper model loaded).""" | |
| def test_valid_audio_passes_vad(self, tmp_path: Path) -> None: | |
| from voicevault.asr.whisper_transcriber import WhisperTranscriber | |
| transcriber = WhisperTranscriber() | |
| wav_path = _make_wav_file(tmp_path, duration_s=2.0) | |
| # Should not raise | |
| try: | |
| import soundfile # Only test VAD if soundfile is available | |
| transcriber._vad_check(wav_path) | |
| except ImportError: | |
| pytest.skip("soundfile not installed") | |
| def test_silent_audio_raises(self, tmp_path: Path) -> None: | |
| from voicevault.asr.whisper_transcriber import WhisperTranscriber, WhisperTranscriberError | |
| transcriber = WhisperTranscriber() | |
| silent_path = _make_silent_wav(tmp_path) | |
| try: | |
| import soundfile | |
| with pytest.raises(WhisperTranscriberError, match="No speech"): | |
| transcriber._vad_check(silent_path) | |
| except ImportError: | |
| pytest.skip("soundfile not installed") | |
| def test_missing_file_raises(self, tmp_path: Path) -> None: | |
| from voicevault.asr.whisper_transcriber import WhisperTranscriber, WhisperTranscriberError | |
| transcriber = WhisperTranscriber() | |
| with pytest.raises(WhisperTranscriberError, match="not found"): | |
| transcriber.transcribe(tmp_path / "nonexistent.wav") | |
| class TestWhisperTranscriberMocked: | |
| """Tests for WhisperTranscriber using mocked Whisper pipeline.""" | |
| def _make_transcriber_with_mock(self, mock_text: str = "what is machine learning"): | |
| from voicevault.asr.whisper_transcriber import WhisperTranscriber | |
| transcriber = WhisperTranscriber(force_cpu=True) | |
| mock_pipe = MagicMock(return_value={"text": mock_text}) | |
| transcriber._pipeline = mock_pipe | |
| transcriber._model_used = "mock-whisper" | |
| return transcriber | |
| def test_transcribe_returns_transcript_result(self, tmp_path: Path) -> None: | |
| from voicevault.asr.whisper_transcriber import WhisperTranscriber | |
| wav_path = _make_wav_file(tmp_path) | |
| transcriber = self._make_transcriber_with_mock("what is machine learning") | |
| with patch.object(transcriber, "_vad_check"): | |
| result = transcriber.transcribe(wav_path) | |
| assert isinstance(result, TranscriptResult) | |
| def test_transcribe_cleans_transcript(self, tmp_path: Path) -> None: | |
| wav_path = _make_wav_file(tmp_path) | |
| transcriber = self._make_transcriber_with_mock("Um, what is, like, machine learning?") | |
| with patch.object(transcriber, "_vad_check"): | |
| result = transcriber.transcribe(wav_path) | |
| assert "um" not in result.transcript.split() | |
| assert "like" not in result.transcript.split() | |
| def test_transcribe_classifies_factual(self, tmp_path: Path) -> None: | |
| wav_path = _make_wav_file(tmp_path) | |
| transcriber = self._make_transcriber_with_mock("what is the accuracy of the model") | |
| with patch.object(transcriber, "_vad_check"): | |
| result = transcriber.transcribe(wav_path) | |
| assert result.query_type == "factual" | |
| def test_transcribe_classifies_summary(self, tmp_path: Path) -> None: | |
| wav_path = _make_wav_file(tmp_path) | |
| transcriber = self._make_transcriber_with_mock("summarize the research findings") | |
| with patch.object(transcriber, "_vad_check"): | |
| result = transcriber.transcribe(wav_path) | |
| assert result.query_type == "summary" | |
| def test_transcribe_records_model_used(self, tmp_path: Path) -> None: | |
| wav_path = _make_wav_file(tmp_path) | |
| transcriber = self._make_transcriber_with_mock() | |
| with patch.object(transcriber, "_vad_check"): | |
| result = transcriber.transcribe(wav_path) | |
| assert result.model_used == "mock-whisper" | |
| def test_transcribe_records_latency(self, tmp_path: Path) -> None: | |
| wav_path = _make_wav_file(tmp_path) | |
| transcriber = self._make_transcriber_with_mock() | |
| with patch.object(transcriber, "_vad_check"): | |
| result = transcriber.transcribe(wav_path) | |
| assert result.latency_ms >= 0 | |
| def test_transcribe_preserves_raw_transcript(self, tmp_path: Path) -> None: | |
| raw = "Um, what is, LIKE, machine learning?" | |
| wav_path = _make_wav_file(tmp_path) | |
| transcriber = self._make_transcriber_with_mock(raw) | |
| with patch.object(transcriber, "_vad_check"): | |
| result = transcriber.transcribe(wav_path) | |
| assert result.raw_transcript == raw | |
| def test_is_ready_true_after_loading(self, tmp_path: Path) -> None: | |
| transcriber = self._make_transcriber_with_mock() | |
| assert transcriber.is_ready() is True | |
| def test_is_ready_false_before_loading(self) -> None: | |
| from voicevault.asr.whisper_transcriber import WhisperTranscriber | |
| transcriber = WhisperTranscriber() | |
| assert transcriber.is_ready() is False | |
| def test_transcribe_raises_on_pipeline_error(self, tmp_path: Path) -> None: | |
| from voicevault.asr.whisper_transcriber import WhisperTranscriber, WhisperTranscriberError | |
| wav_path = _make_wav_file(tmp_path) | |
| transcriber = WhisperTranscriber() | |
| transcriber._pipeline = MagicMock(side_effect=RuntimeError("CUDA OOM")) | |
| transcriber._model_used = "mock-whisper" | |
| with patch.object(transcriber, "_vad_check"): | |
| with pytest.raises(WhisperTranscriberError, match="Transcription failed"): | |
| transcriber.transcribe(wav_path) | |
| class TestTranscriptResultModel: | |
| """Verify TranscriptResult Pydantic model.""" | |
| def test_default_confidence_is_one(self) -> None: | |
| result = TranscriptResult( | |
| transcript="what is AI", | |
| raw_transcript="what is AI", | |
| model_used="whisper-large-v3", | |
| ) | |
| assert result.confidence == 1.0 | |
| def test_default_query_type_is_factual(self) -> None: | |
| result = TranscriptResult( | |
| transcript="AI", | |
| raw_transcript="AI", | |
| model_used="whisper-large-v3", | |
| ) | |
| assert result.query_type == "factual" | |
| def test_default_language_is_en(self) -> None: | |
| result = TranscriptResult( | |
| transcript="hello", | |
| raw_transcript="hello", | |
| model_used="whisper-large-v3", | |
| ) | |
| assert result.language == "en" | |