File size: 2,278 Bytes
85f900d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
"""
tests/conftest.py
=================
Shared pytest fixtures for all VoiceVault tests.

Fixtures:
    tmp_db          — temporary SQLite database (auto-destroyed after each test)
    tmp_data_dir    — temporary data directory tree
    sample_chunk    — a DocumentChunk instance for retrieval tests
    sample_citation — a Citation instance for generation tests
"""

from __future__ import annotations

import os
from pathlib import Path

# Force CPU for all tests — avoids CUDA compatibility issues on unsupported GPUs
# (e.g. RTX 5070 / sm_120 not yet supported by packaged PyTorch builds).
# "-1" hides all CUDA devices from PyTorch (unlike "", which is ignored on Windows).
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

import pytest

from voicevault.models import Citation, DocumentChunk
from voicevault.storage.sqlite_store import initialize_database


@pytest.fixture
def tmp_db(tmp_path: Path):
    """
    Provide a fresh, initialized SQLite database in a temp directory.
    Database is automatically deleted when the test completes.
    """
    db_path = tmp_path / "voicevault.db"
    initialize_database(db_path)
    return db_path


@pytest.fixture
def tmp_data_dir(tmp_path: Path) -> Path:
    """
    Provide a temporary data directory structure mirroring the runtime layout.
    """
    data_dir = tmp_path / "data"
    (data_dir / "uploads").mkdir(parents=True)
    (data_dir / "test_kb" / "chroma").mkdir(parents=True)
    return data_dir


@pytest.fixture
def sample_chunk() -> DocumentChunk:
    """Return a minimal valid DocumentChunk for use in retrieval tests."""
    return DocumentChunk(
        kb_name="test_kb",
        source_file="sample.pdf",
        page_number=1,
        section="Introduction",
        chunk_index=0,
        text="VoiceVault uses hybrid BM25 and vector search to retrieve relevant document chunks.",
        text_hash="abc123",
        token_count=16,
        language="en",
    )


@pytest.fixture
def sample_citation() -> Citation:
    """Return a minimal valid Citation for use in generation tests."""
    return Citation(
        source_file="sample.pdf",
        page_number=1,
        section="Introduction",
        excerpt="VoiceVault uses hybrid BM25 and vector search.",
        relevance_score=0.92,
    )