""" tests/test_core.py Unit tests for the KerdosRAG public API (no HF token required). """ import os import sys import tempfile import pytest sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) from kerdos_rag import KerdosRAG # ── Fixtures ────────────────────────────────────────────────────────────────── @pytest.fixture def engine(): """A fresh KerdosRAG instance for each test.""" return KerdosRAG(hf_token="hf_dummy") # token won't be used in indexing tests @pytest.fixture def indexed_engine(tmp_path): """Engine with one plain-text document already indexed.""" doc = tmp_path / "policy.txt" doc.write_text( "The refund policy allows returns within 30 days of purchase. " "Contact support at support@example.com for assistance.", encoding="utf-8", ) eng = KerdosRAG(hf_token="hf_dummy") eng.index([str(doc)]) return eng # ── Tests ───────────────────────────────────────────────────────────────────── def test_initial_state(engine): assert engine.is_ready is False assert engine.chunk_count == 0 assert engine.indexed_sources == set() def test_index_returns_correct_metadata(indexed_engine): assert indexed_engine.is_ready assert indexed_engine.chunk_count > 0 assert "policy.txt" in indexed_engine.indexed_sources def test_index_skips_duplicates(indexed_engine, tmp_path): doc = tmp_path / "policy.txt" doc.write_text("Some extra content.", encoding="utf-8") result = indexed_engine.index([str(doc)]) assert "policy.txt" in result["skipped"] assert "policy.txt" not in result["indexed"] def test_index_multiple_files(engine, tmp_path): (tmp_path / "a.txt").write_text("Alpha content here.", encoding="utf-8") (tmp_path / "b.txt").write_text("Beta content here.", encoding="utf-8") result = engine.index([str(tmp_path / "a.txt"), str(tmp_path / "b.txt")]) assert len(result["indexed"]) == 2 assert result["chunk_count"] > 0 def test_reset_clears_index(indexed_engine): assert indexed_engine.is_ready indexed_engine.reset() assert not indexed_engine.is_ready assert indexed_engine.chunk_count == 0 assert indexed_engine.indexed_sources == set() def test_chat_raises_when_not_indexed(engine): with pytest.raises(RuntimeError, match="No documents indexed"): list(engine.chat("What is the policy?")) def test_chat_raises_without_token(tmp_path): doc = tmp_path / "doc.txt" doc.write_text("Hello world.", encoding="utf-8") eng = KerdosRAG(hf_token="") eng.index([str(doc)]) with pytest.raises(ValueError, match="No Hugging Face token"): list(eng.chat("What does it say?")) def test_save_and_load(indexed_engine, tmp_path): save_dir = tmp_path / "saved_index" indexed_engine.save(str(save_dir)) assert (save_dir / "kerdos_index.faiss").exists() assert (save_dir / "kerdos_meta.pkl").exists() restored = KerdosRAG.load(str(save_dir), hf_token="hf_dummy") assert restored.is_ready assert restored.chunk_count == indexed_engine.chunk_count assert restored.indexed_sources == indexed_engine.indexed_sources def test_save_raises_when_empty(engine, tmp_path): with pytest.raises(RuntimeError, match="Nothing to save"): engine.save(str(tmp_path / "empty"))