Spaces:

NinjainPJs
/

VoiceVault

Running

File size: 14,637 Bytes

85f900d

"""
tests/test_phase3.py
====================
Phase 3 — ASR & Voice Input Tests

Tests:
  - QueryPreprocessor: normalization, filler removal, intent classification,
                       language detection, edge cases
  - WhisperTranscriber: VAD check, model loading (mocked), result schema,
                        fallback behavior

Whisper model tests use mocks — actual GPU transcription is tested
via manual/integration testing on HuggingFace Spaces.

Run with: pytest tests/test_phase3.py -v
"""

from __future__ import annotations

import io
import struct
import wave
from pathlib import Path
from unittest.mock import MagicMock, patch

import numpy as np
import pytest

from voicevault.asr.query_preprocessor import PreprocessedQuery, QueryPreprocessor
from voicevault.models import TranscriptResult


# ------------------------------------------------------------------ #
# QueryPreprocessor Tests                                               #
# ------------------------------------------------------------------ #


class TestQueryPreprocessorNormalization:
    """Test transcript cleaning and normalization."""

    def setup_method(self) -> None:
        self.proc = QueryPreprocessor()

    def test_lowercases_input(self) -> None:
        result = self.proc.process("What IS Machine Learning?")
        assert result.processed_query == result.processed_query.lower()

    def test_removes_leading_trailing_whitespace(self) -> None:
        result = self.proc.process("   what is AI   ")
        assert result.processed_query == result.processed_query.strip()

    def test_collapses_multiple_spaces(self) -> None:
        result = self.proc.process("what  is   machine    learning")
        assert "  " not in result.processed_query

    def test_removes_um_filler(self) -> None:
        result = self.proc.process("um what is machine learning")
        assert "um" not in result.processed_query.split()

    def test_removes_uh_filler(self) -> None:
        result = self.proc.process("uh can you explain neural networks")
        assert "uh" not in result.processed_query.split()

    def test_removes_like_filler(self) -> None:
        result = self.proc.process("what is like machine learning")
        assert result.processed_query.count("like") == 0

    def test_removes_you_know_filler(self) -> None:
        result = self.proc.process("you know what is deep learning")
        assert "you know" not in result.processed_query

    def test_removes_multiple_fillers(self) -> None:
        raw = "um uh so what is like you know machine learning"
        result = self.proc.process(raw)
        for filler in ["um", "uh", "so", "like"]:
            assert filler not in result.processed_query.split(), f"Filler '{filler}' not removed"

    def test_preserves_non_filler_words(self) -> None:
        result = self.proc.process("what is machine learning")
        assert "machine" in result.processed_query
        assert "learning" in result.processed_query

    def test_empty_string_handled(self) -> None:
        result = self.proc.process("")
        assert result.processed_query == ""
        assert result.query_type == "factual"
        assert result.language == "en"

    def test_whitespace_only_handled(self) -> None:
        result = self.proc.process("   ")
        assert result.processed_query == ""

    def test_raw_query_preserved(self) -> None:
        raw = "Um WHAT is Machine Learning?"
        result = self.proc.process(raw)
        assert result.raw_query == raw


class TestQueryPreprocessorIntentClassification:
    """Test query type classification."""

    def setup_method(self) -> None:
        self.proc = QueryPreprocessor()

    def test_what_is_classified_factual(self) -> None:
        result = self.proc.process("what is transformer architecture")
        assert result.query_type == "factual"

    def test_what_are_classified_factual(self) -> None:
        result = self.proc.process("what are the main types of machine learning")
        assert result.query_type == "factual"

    def test_who_classified_factual(self) -> None:
        result = self.proc.process("who invented the transformer model")
        assert result.query_type == "factual"

    def test_when_classified_factual(self) -> None:
        result = self.proc.process("when was GPT-4 released")
        assert result.query_type == "factual"

    def test_where_classified_factual(self) -> None:
        result = self.proc.process("where was OpenAI founded")
        assert result.query_type == "factual"

    def test_summarize_classified_summary(self) -> None:
        result = self.proc.process("summarize the research paper")
        assert result.query_type == "summary"

    def test_summarise_british_spelling(self) -> None:
        result = self.proc.process("summarise the document")
        assert result.query_type == "summary"

    def test_give_overview_classified_summary(self) -> None:
        result = self.proc.process("give me an overview of the project")
        assert result.query_type == "summary"

    def test_explain_classified_summary(self) -> None:
        result = self.proc.process("explain how neural networks work")
        assert result.query_type == "summary"

    def test_describe_classified_summary(self) -> None:
        result = self.proc.process("describe the methodology used")
        assert result.query_type == "summary"

    def test_compare_classified_compare(self) -> None:
        result = self.proc.process("compare BERT and GPT")
        assert result.query_type == "compare"

    def test_difference_classified_compare(self) -> None:
        result = self.proc.process("what is the difference between RNN and LSTM")
        assert result.query_type == "compare"

    def test_versus_classified_compare(self) -> None:
        result = self.proc.process("supervised versus unsupervised learning")
        assert result.query_type == "compare"

    def test_vs_classified_compare(self) -> None:
        result = self.proc.process("precision vs recall tradeoff")
        assert result.query_type == "compare"

    def test_compare_takes_priority_over_summary(self) -> None:
        result = self.proc.process("summarize and compare the two approaches")
        assert result.query_type == "compare"

    def test_unknown_query_defaults_to_factual(self) -> None:
        result = self.proc.process("machine learning accuracy results")
        assert result.query_type == "factual"


class TestQueryPreprocessorLanguageDetection:
    """Test language detection."""

    def setup_method(self) -> None:
        self.proc = QueryPreprocessor()

    def test_english_detected(self) -> None:
        result = self.proc.process("what is machine learning and artificial intelligence")
        assert result.language == "en"

    def test_short_query_defaults_to_english(self) -> None:
        result = self.proc.process("AI")
        assert result.language == "en"

    def test_returns_string(self) -> None:
        result = self.proc.process("what is the accuracy of this model")
        assert isinstance(result.language, str)
        assert len(result.language) >= 2


# ------------------------------------------------------------------ #
# WhisperTranscriber Tests (Mocked)                                     #
# ------------------------------------------------------------------ #


def _make_wav_file(tmp_path: Path, duration_s: float = 2.0, rms_amplitude: float = 0.1) -> Path:
    """Create a WAV file with a sine wave for testing."""
    sample_rate = 16000
    n_samples = int(duration_s * sample_rate)
    t = np.linspace(0, duration_s, n_samples)
    audio = (rms_amplitude * np.sin(2 * np.pi * 440 * t)).astype(np.float32)

    wav_path = tmp_path / "test_audio.wav"
    with wave.open(str(wav_path), "w") as wf:
        wf.setnchannels(1)
        wf.setsampwidth(2)
        wf.setframerate(sample_rate)
        pcm = (audio * 32767).astype(np.int16)
        wf.writeframes(pcm.tobytes())

    return wav_path


def _make_silent_wav(tmp_path: Path, duration_s: float = 2.0) -> Path:
    """Create a silent WAV file for VAD testing."""
    return _make_wav_file(tmp_path, duration_s=duration_s, rms_amplitude=0.0)


class TestWhisperTranscriberVAD:
    """Tests for Voice Activity Detection (no Whisper model loaded)."""

    def test_valid_audio_passes_vad(self, tmp_path: Path) -> None:
        from voicevault.asr.whisper_transcriber import WhisperTranscriber
        transcriber = WhisperTranscriber()
        wav_path = _make_wav_file(tmp_path, duration_s=2.0)
        # Should not raise
        try:
            import soundfile  # Only test VAD if soundfile is available
            transcriber._vad_check(wav_path)
        except ImportError:
            pytest.skip("soundfile not installed")

    def test_silent_audio_raises(self, tmp_path: Path) -> None:
        from voicevault.asr.whisper_transcriber import WhisperTranscriber, WhisperTranscriberError
        transcriber = WhisperTranscriber()
        silent_path = _make_silent_wav(tmp_path)
        try:
            import soundfile
            with pytest.raises(WhisperTranscriberError, match="No speech"):
                transcriber._vad_check(silent_path)
        except ImportError:
            pytest.skip("soundfile not installed")

    def test_missing_file_raises(self, tmp_path: Path) -> None:
        from voicevault.asr.whisper_transcriber import WhisperTranscriber, WhisperTranscriberError
        transcriber = WhisperTranscriber()
        with pytest.raises(WhisperTranscriberError, match="not found"):
            transcriber.transcribe(tmp_path / "nonexistent.wav")


class TestWhisperTranscriberMocked:
    """Tests for WhisperTranscriber using mocked Whisper pipeline."""

    def _make_transcriber_with_mock(self, mock_text: str = "what is machine learning"):
        from voicevault.asr.whisper_transcriber import WhisperTranscriber
        transcriber = WhisperTranscriber(force_cpu=True)
        mock_pipe = MagicMock(return_value={"text": mock_text})
        transcriber._pipeline = mock_pipe
        transcriber._model_used = "mock-whisper"
        return transcriber

    def test_transcribe_returns_transcript_result(self, tmp_path: Path) -> None:
        from voicevault.asr.whisper_transcriber import WhisperTranscriber
        wav_path = _make_wav_file(tmp_path)
        transcriber = self._make_transcriber_with_mock("what is machine learning")

        with patch.object(transcriber, "_vad_check"):
            result = transcriber.transcribe(wav_path)

        assert isinstance(result, TranscriptResult)

    def test_transcribe_cleans_transcript(self, tmp_path: Path) -> None:
        wav_path = _make_wav_file(tmp_path)
        transcriber = self._make_transcriber_with_mock("Um, what is, like, machine learning?")

        with patch.object(transcriber, "_vad_check"):
            result = transcriber.transcribe(wav_path)

        assert "um" not in result.transcript.split()
        assert "like" not in result.transcript.split()

    def test_transcribe_classifies_factual(self, tmp_path: Path) -> None:
        wav_path = _make_wav_file(tmp_path)
        transcriber = self._make_transcriber_with_mock("what is the accuracy of the model")

        with patch.object(transcriber, "_vad_check"):
            result = transcriber.transcribe(wav_path)

        assert result.query_type == "factual"

    def test_transcribe_classifies_summary(self, tmp_path: Path) -> None:
        wav_path = _make_wav_file(tmp_path)
        transcriber = self._make_transcriber_with_mock("summarize the research findings")

        with patch.object(transcriber, "_vad_check"):
            result = transcriber.transcribe(wav_path)

        assert result.query_type == "summary"

    def test_transcribe_records_model_used(self, tmp_path: Path) -> None:
        wav_path = _make_wav_file(tmp_path)
        transcriber = self._make_transcriber_with_mock()

        with patch.object(transcriber, "_vad_check"):
            result = transcriber.transcribe(wav_path)

        assert result.model_used == "mock-whisper"

    def test_transcribe_records_latency(self, tmp_path: Path) -> None:
        wav_path = _make_wav_file(tmp_path)
        transcriber = self._make_transcriber_with_mock()

        with patch.object(transcriber, "_vad_check"):
            result = transcriber.transcribe(wav_path)

        assert result.latency_ms >= 0

    def test_transcribe_preserves_raw_transcript(self, tmp_path: Path) -> None:
        raw = "Um, what is, LIKE, machine learning?"
        wav_path = _make_wav_file(tmp_path)
        transcriber = self._make_transcriber_with_mock(raw)

        with patch.object(transcriber, "_vad_check"):
            result = transcriber.transcribe(wav_path)

        assert result.raw_transcript == raw

    def test_is_ready_true_after_loading(self, tmp_path: Path) -> None:
        transcriber = self._make_transcriber_with_mock()
        assert transcriber.is_ready() is True

    def test_is_ready_false_before_loading(self) -> None:
        from voicevault.asr.whisper_transcriber import WhisperTranscriber
        transcriber = WhisperTranscriber()
        assert transcriber.is_ready() is False

    def test_transcribe_raises_on_pipeline_error(self, tmp_path: Path) -> None:
        from voicevault.asr.whisper_transcriber import WhisperTranscriber, WhisperTranscriberError
        wav_path = _make_wav_file(tmp_path)
        transcriber = WhisperTranscriber()
        transcriber._pipeline = MagicMock(side_effect=RuntimeError("CUDA OOM"))
        transcriber._model_used = "mock-whisper"

        with patch.object(transcriber, "_vad_check"):
            with pytest.raises(WhisperTranscriberError, match="Transcription failed"):
                transcriber.transcribe(wav_path)


class TestTranscriptResultModel:
    """Verify TranscriptResult Pydantic model."""

    def test_default_confidence_is_one(self) -> None:
        result = TranscriptResult(
            transcript="what is AI",
            raw_transcript="what is AI",
            model_used="whisper-large-v3",
        )
        assert result.confidence == 1.0

    def test_default_query_type_is_factual(self) -> None:
        result = TranscriptResult(
            transcript="AI",
            raw_transcript="AI",
            model_used="whisper-large-v3",
        )
        assert result.query_type == "factual"

    def test_default_language_is_en(self) -> None:
        result = TranscriptResult(
            transcript="hello",
            raw_transcript="hello",
            model_used="whisper-large-v3",
        )
        assert result.language == "en"