""" tests/test_phase3.py ==================== Phase 3 — ASR & Voice Input Tests Tests: - QueryPreprocessor: normalization, filler removal, intent classification, language detection, edge cases - WhisperTranscriber: VAD check, model loading (mocked), result schema, fallback behavior Whisper model tests use mocks — actual GPU transcription is tested via manual/integration testing on HuggingFace Spaces. Run with: pytest tests/test_phase3.py -v """ from __future__ import annotations import io import struct import wave from pathlib import Path from unittest.mock import MagicMock, patch import numpy as np import pytest from voicevault.asr.query_preprocessor import PreprocessedQuery, QueryPreprocessor from voicevault.models import TranscriptResult # ------------------------------------------------------------------ # # QueryPreprocessor Tests # # ------------------------------------------------------------------ # class TestQueryPreprocessorNormalization: """Test transcript cleaning and normalization.""" def setup_method(self) -> None: self.proc = QueryPreprocessor() def test_lowercases_input(self) -> None: result = self.proc.process("What IS Machine Learning?") assert result.processed_query == result.processed_query.lower() def test_removes_leading_trailing_whitespace(self) -> None: result = self.proc.process(" what is AI ") assert result.processed_query == result.processed_query.strip() def test_collapses_multiple_spaces(self) -> None: result = self.proc.process("what is machine learning") assert " " not in result.processed_query def test_removes_um_filler(self) -> None: result = self.proc.process("um what is machine learning") assert "um" not in result.processed_query.split() def test_removes_uh_filler(self) -> None: result = self.proc.process("uh can you explain neural networks") assert "uh" not in result.processed_query.split() def test_removes_like_filler(self) -> None: result = self.proc.process("what is like machine learning") assert result.processed_query.count("like") == 0 def test_removes_you_know_filler(self) -> None: result = self.proc.process("you know what is deep learning") assert "you know" not in result.processed_query def test_removes_multiple_fillers(self) -> None: raw = "um uh so what is like you know machine learning" result = self.proc.process(raw) for filler in ["um", "uh", "so", "like"]: assert filler not in result.processed_query.split(), f"Filler '{filler}' not removed" def test_preserves_non_filler_words(self) -> None: result = self.proc.process("what is machine learning") assert "machine" in result.processed_query assert "learning" in result.processed_query def test_empty_string_handled(self) -> None: result = self.proc.process("") assert result.processed_query == "" assert result.query_type == "factual" assert result.language == "en" def test_whitespace_only_handled(self) -> None: result = self.proc.process(" ") assert result.processed_query == "" def test_raw_query_preserved(self) -> None: raw = "Um WHAT is Machine Learning?" result = self.proc.process(raw) assert result.raw_query == raw class TestQueryPreprocessorIntentClassification: """Test query type classification.""" def setup_method(self) -> None: self.proc = QueryPreprocessor() def test_what_is_classified_factual(self) -> None: result = self.proc.process("what is transformer architecture") assert result.query_type == "factual" def test_what_are_classified_factual(self) -> None: result = self.proc.process("what are the main types of machine learning") assert result.query_type == "factual" def test_who_classified_factual(self) -> None: result = self.proc.process("who invented the transformer model") assert result.query_type == "factual" def test_when_classified_factual(self) -> None: result = self.proc.process("when was GPT-4 released") assert result.query_type == "factual" def test_where_classified_factual(self) -> None: result = self.proc.process("where was OpenAI founded") assert result.query_type == "factual" def test_summarize_classified_summary(self) -> None: result = self.proc.process("summarize the research paper") assert result.query_type == "summary" def test_summarise_british_spelling(self) -> None: result = self.proc.process("summarise the document") assert result.query_type == "summary" def test_give_overview_classified_summary(self) -> None: result = self.proc.process("give me an overview of the project") assert result.query_type == "summary" def test_explain_classified_summary(self) -> None: result = self.proc.process("explain how neural networks work") assert result.query_type == "summary" def test_describe_classified_summary(self) -> None: result = self.proc.process("describe the methodology used") assert result.query_type == "summary" def test_compare_classified_compare(self) -> None: result = self.proc.process("compare BERT and GPT") assert result.query_type == "compare" def test_difference_classified_compare(self) -> None: result = self.proc.process("what is the difference between RNN and LSTM") assert result.query_type == "compare" def test_versus_classified_compare(self) -> None: result = self.proc.process("supervised versus unsupervised learning") assert result.query_type == "compare" def test_vs_classified_compare(self) -> None: result = self.proc.process("precision vs recall tradeoff") assert result.query_type == "compare" def test_compare_takes_priority_over_summary(self) -> None: result = self.proc.process("summarize and compare the two approaches") assert result.query_type == "compare" def test_unknown_query_defaults_to_factual(self) -> None: result = self.proc.process("machine learning accuracy results") assert result.query_type == "factual" class TestQueryPreprocessorLanguageDetection: """Test language detection.""" def setup_method(self) -> None: self.proc = QueryPreprocessor() def test_english_detected(self) -> None: result = self.proc.process("what is machine learning and artificial intelligence") assert result.language == "en" def test_short_query_defaults_to_english(self) -> None: result = self.proc.process("AI") assert result.language == "en" def test_returns_string(self) -> None: result = self.proc.process("what is the accuracy of this model") assert isinstance(result.language, str) assert len(result.language) >= 2 # ------------------------------------------------------------------ # # WhisperTranscriber Tests (Mocked) # # ------------------------------------------------------------------ # def _make_wav_file(tmp_path: Path, duration_s: float = 2.0, rms_amplitude: float = 0.1) -> Path: """Create a WAV file with a sine wave for testing.""" sample_rate = 16000 n_samples = int(duration_s * sample_rate) t = np.linspace(0, duration_s, n_samples) audio = (rms_amplitude * np.sin(2 * np.pi * 440 * t)).astype(np.float32) wav_path = tmp_path / "test_audio.wav" with wave.open(str(wav_path), "w") as wf: wf.setnchannels(1) wf.setsampwidth(2) wf.setframerate(sample_rate) pcm = (audio * 32767).astype(np.int16) wf.writeframes(pcm.tobytes()) return wav_path def _make_silent_wav(tmp_path: Path, duration_s: float = 2.0) -> Path: """Create a silent WAV file for VAD testing.""" return _make_wav_file(tmp_path, duration_s=duration_s, rms_amplitude=0.0) class TestWhisperTranscriberVAD: """Tests for Voice Activity Detection (no Whisper model loaded).""" def test_valid_audio_passes_vad(self, tmp_path: Path) -> None: from voicevault.asr.whisper_transcriber import WhisperTranscriber transcriber = WhisperTranscriber() wav_path = _make_wav_file(tmp_path, duration_s=2.0) # Should not raise try: import soundfile # Only test VAD if soundfile is available transcriber._vad_check(wav_path) except ImportError: pytest.skip("soundfile not installed") def test_silent_audio_raises(self, tmp_path: Path) -> None: from voicevault.asr.whisper_transcriber import WhisperTranscriber, WhisperTranscriberError transcriber = WhisperTranscriber() silent_path = _make_silent_wav(tmp_path) try: import soundfile with pytest.raises(WhisperTranscriberError, match="No speech"): transcriber._vad_check(silent_path) except ImportError: pytest.skip("soundfile not installed") def test_missing_file_raises(self, tmp_path: Path) -> None: from voicevault.asr.whisper_transcriber import WhisperTranscriber, WhisperTranscriberError transcriber = WhisperTranscriber() with pytest.raises(WhisperTranscriberError, match="not found"): transcriber.transcribe(tmp_path / "nonexistent.wav") class TestWhisperTranscriberMocked: """Tests for WhisperTranscriber using mocked Whisper pipeline.""" def _make_transcriber_with_mock(self, mock_text: str = "what is machine learning"): from voicevault.asr.whisper_transcriber import WhisperTranscriber transcriber = WhisperTranscriber(force_cpu=True) mock_pipe = MagicMock(return_value={"text": mock_text}) transcriber._pipeline = mock_pipe transcriber._model_used = "mock-whisper" return transcriber def test_transcribe_returns_transcript_result(self, tmp_path: Path) -> None: from voicevault.asr.whisper_transcriber import WhisperTranscriber wav_path = _make_wav_file(tmp_path) transcriber = self._make_transcriber_with_mock("what is machine learning") with patch.object(transcriber, "_vad_check"): result = transcriber.transcribe(wav_path) assert isinstance(result, TranscriptResult) def test_transcribe_cleans_transcript(self, tmp_path: Path) -> None: wav_path = _make_wav_file(tmp_path) transcriber = self._make_transcriber_with_mock("Um, what is, like, machine learning?") with patch.object(transcriber, "_vad_check"): result = transcriber.transcribe(wav_path) assert "um" not in result.transcript.split() assert "like" not in result.transcript.split() def test_transcribe_classifies_factual(self, tmp_path: Path) -> None: wav_path = _make_wav_file(tmp_path) transcriber = self._make_transcriber_with_mock("what is the accuracy of the model") with patch.object(transcriber, "_vad_check"): result = transcriber.transcribe(wav_path) assert result.query_type == "factual" def test_transcribe_classifies_summary(self, tmp_path: Path) -> None: wav_path = _make_wav_file(tmp_path) transcriber = self._make_transcriber_with_mock("summarize the research findings") with patch.object(transcriber, "_vad_check"): result = transcriber.transcribe(wav_path) assert result.query_type == "summary" def test_transcribe_records_model_used(self, tmp_path: Path) -> None: wav_path = _make_wav_file(tmp_path) transcriber = self._make_transcriber_with_mock() with patch.object(transcriber, "_vad_check"): result = transcriber.transcribe(wav_path) assert result.model_used == "mock-whisper" def test_transcribe_records_latency(self, tmp_path: Path) -> None: wav_path = _make_wav_file(tmp_path) transcriber = self._make_transcriber_with_mock() with patch.object(transcriber, "_vad_check"): result = transcriber.transcribe(wav_path) assert result.latency_ms >= 0 def test_transcribe_preserves_raw_transcript(self, tmp_path: Path) -> None: raw = "Um, what is, LIKE, machine learning?" wav_path = _make_wav_file(tmp_path) transcriber = self._make_transcriber_with_mock(raw) with patch.object(transcriber, "_vad_check"): result = transcriber.transcribe(wav_path) assert result.raw_transcript == raw def test_is_ready_true_after_loading(self, tmp_path: Path) -> None: transcriber = self._make_transcriber_with_mock() assert transcriber.is_ready() is True def test_is_ready_false_before_loading(self) -> None: from voicevault.asr.whisper_transcriber import WhisperTranscriber transcriber = WhisperTranscriber() assert transcriber.is_ready() is False def test_transcribe_raises_on_pipeline_error(self, tmp_path: Path) -> None: from voicevault.asr.whisper_transcriber import WhisperTranscriber, WhisperTranscriberError wav_path = _make_wav_file(tmp_path) transcriber = WhisperTranscriber() transcriber._pipeline = MagicMock(side_effect=RuntimeError("CUDA OOM")) transcriber._model_used = "mock-whisper" with patch.object(transcriber, "_vad_check"): with pytest.raises(WhisperTranscriberError, match="Transcription failed"): transcriber.transcribe(wav_path) class TestTranscriptResultModel: """Verify TranscriptResult Pydantic model.""" def test_default_confidence_is_one(self) -> None: result = TranscriptResult( transcript="what is AI", raw_transcript="what is AI", model_used="whisper-large-v3", ) assert result.confidence == 1.0 def test_default_query_type_is_factual(self) -> None: result = TranscriptResult( transcript="AI", raw_transcript="AI", model_used="whisper-large-v3", ) assert result.query_type == "factual" def test_default_language_is_en(self) -> None: result = TranscriptResult( transcript="hello", raw_transcript="hello", model_used="whisper-large-v3", ) assert result.language == "en"