Custom-LLM-Chat / tests /test_core.py
Bhaskar Ram
feat: Python package, FastAPI REST server, TypeScript SDK
634117a
"""
tests/test_core.py
Unit tests for the KerdosRAG public API (no HF token required).
"""
import os
import sys
import tempfile
import pytest
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
from kerdos_rag import KerdosRAG
# ── Fixtures ──────────────────────────────────────────────────────────────────
@pytest.fixture
def engine():
"""A fresh KerdosRAG instance for each test."""
return KerdosRAG(hf_token="hf_dummy") # token won't be used in indexing tests
@pytest.fixture
def indexed_engine(tmp_path):
"""Engine with one plain-text document already indexed."""
doc = tmp_path / "policy.txt"
doc.write_text(
"The refund policy allows returns within 30 days of purchase. "
"Contact support at support@example.com for assistance.",
encoding="utf-8",
)
eng = KerdosRAG(hf_token="hf_dummy")
eng.index([str(doc)])
return eng
# ── Tests ─────────────────────────────────────────────────────────────────────
def test_initial_state(engine):
assert engine.is_ready is False
assert engine.chunk_count == 0
assert engine.indexed_sources == set()
def test_index_returns_correct_metadata(indexed_engine):
assert indexed_engine.is_ready
assert indexed_engine.chunk_count > 0
assert "policy.txt" in indexed_engine.indexed_sources
def test_index_skips_duplicates(indexed_engine, tmp_path):
doc = tmp_path / "policy.txt"
doc.write_text("Some extra content.", encoding="utf-8")
result = indexed_engine.index([str(doc)])
assert "policy.txt" in result["skipped"]
assert "policy.txt" not in result["indexed"]
def test_index_multiple_files(engine, tmp_path):
(tmp_path / "a.txt").write_text("Alpha content here.", encoding="utf-8")
(tmp_path / "b.txt").write_text("Beta content here.", encoding="utf-8")
result = engine.index([str(tmp_path / "a.txt"), str(tmp_path / "b.txt")])
assert len(result["indexed"]) == 2
assert result["chunk_count"] > 0
def test_reset_clears_index(indexed_engine):
assert indexed_engine.is_ready
indexed_engine.reset()
assert not indexed_engine.is_ready
assert indexed_engine.chunk_count == 0
assert indexed_engine.indexed_sources == set()
def test_chat_raises_when_not_indexed(engine):
with pytest.raises(RuntimeError, match="No documents indexed"):
list(engine.chat("What is the policy?"))
def test_chat_raises_without_token(tmp_path):
doc = tmp_path / "doc.txt"
doc.write_text("Hello world.", encoding="utf-8")
eng = KerdosRAG(hf_token="")
eng.index([str(doc)])
with pytest.raises(ValueError, match="No Hugging Face token"):
list(eng.chat("What does it say?"))
def test_save_and_load(indexed_engine, tmp_path):
save_dir = tmp_path / "saved_index"
indexed_engine.save(str(save_dir))
assert (save_dir / "kerdos_index.faiss").exists()
assert (save_dir / "kerdos_meta.pkl").exists()
restored = KerdosRAG.load(str(save_dir), hf_token="hf_dummy")
assert restored.is_ready
assert restored.chunk_count == indexed_engine.chunk_count
assert restored.indexed_sources == indexed_engine.indexed_sources
def test_save_raises_when_empty(engine, tmp_path):
with pytest.raises(RuntimeError, match="Nothing to save"):
engine.save(str(tmp_path / "empty"))