Spaces:
Running
Running
File size: 3,621 Bytes
634117a | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 | """
tests/test_core.py
Unit tests for the KerdosRAG public API (no HF token required).
"""
import os
import sys
import tempfile
import pytest
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
from kerdos_rag import KerdosRAG
# ── Fixtures ──────────────────────────────────────────────────────────────────
@pytest.fixture
def engine():
"""A fresh KerdosRAG instance for each test."""
return KerdosRAG(hf_token="hf_dummy") # token won't be used in indexing tests
@pytest.fixture
def indexed_engine(tmp_path):
"""Engine with one plain-text document already indexed."""
doc = tmp_path / "policy.txt"
doc.write_text(
"The refund policy allows returns within 30 days of purchase. "
"Contact support at support@example.com for assistance.",
encoding="utf-8",
)
eng = KerdosRAG(hf_token="hf_dummy")
eng.index([str(doc)])
return eng
# ── Tests ─────────────────────────────────────────────────────────────────────
def test_initial_state(engine):
assert engine.is_ready is False
assert engine.chunk_count == 0
assert engine.indexed_sources == set()
def test_index_returns_correct_metadata(indexed_engine):
assert indexed_engine.is_ready
assert indexed_engine.chunk_count > 0
assert "policy.txt" in indexed_engine.indexed_sources
def test_index_skips_duplicates(indexed_engine, tmp_path):
doc = tmp_path / "policy.txt"
doc.write_text("Some extra content.", encoding="utf-8")
result = indexed_engine.index([str(doc)])
assert "policy.txt" in result["skipped"]
assert "policy.txt" not in result["indexed"]
def test_index_multiple_files(engine, tmp_path):
(tmp_path / "a.txt").write_text("Alpha content here.", encoding="utf-8")
(tmp_path / "b.txt").write_text("Beta content here.", encoding="utf-8")
result = engine.index([str(tmp_path / "a.txt"), str(tmp_path / "b.txt")])
assert len(result["indexed"]) == 2
assert result["chunk_count"] > 0
def test_reset_clears_index(indexed_engine):
assert indexed_engine.is_ready
indexed_engine.reset()
assert not indexed_engine.is_ready
assert indexed_engine.chunk_count == 0
assert indexed_engine.indexed_sources == set()
def test_chat_raises_when_not_indexed(engine):
with pytest.raises(RuntimeError, match="No documents indexed"):
list(engine.chat("What is the policy?"))
def test_chat_raises_without_token(tmp_path):
doc = tmp_path / "doc.txt"
doc.write_text("Hello world.", encoding="utf-8")
eng = KerdosRAG(hf_token="")
eng.index([str(doc)])
with pytest.raises(ValueError, match="No Hugging Face token"):
list(eng.chat("What does it say?"))
def test_save_and_load(indexed_engine, tmp_path):
save_dir = tmp_path / "saved_index"
indexed_engine.save(str(save_dir))
assert (save_dir / "kerdos_index.faiss").exists()
assert (save_dir / "kerdos_meta.pkl").exists()
restored = KerdosRAG.load(str(save_dir), hf_token="hf_dummy")
assert restored.is_ready
assert restored.chunk_count == indexed_engine.chunk_count
assert restored.indexed_sources == indexed_engine.indexed_sources
def test_save_raises_when_empty(engine, tmp_path):
with pytest.raises(RuntimeError, match="Nothing to save"):
engine.save(str(tmp_path / "empty"))
|