Spaces:
Running
Running
| """ | |
| tests/test_core.py | |
| Unit tests for the KerdosRAG public API (no HF token required). | |
| """ | |
| import os | |
| import sys | |
| import tempfile | |
| import pytest | |
| sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) | |
| from kerdos_rag import KerdosRAG | |
| # ── Fixtures ────────────────────────────────────────────────────────────────── | |
| def engine(): | |
| """A fresh KerdosRAG instance for each test.""" | |
| return KerdosRAG(hf_token="hf_dummy") # token won't be used in indexing tests | |
| def indexed_engine(tmp_path): | |
| """Engine with one plain-text document already indexed.""" | |
| doc = tmp_path / "policy.txt" | |
| doc.write_text( | |
| "The refund policy allows returns within 30 days of purchase. " | |
| "Contact support at support@example.com for assistance.", | |
| encoding="utf-8", | |
| ) | |
| eng = KerdosRAG(hf_token="hf_dummy") | |
| eng.index([str(doc)]) | |
| return eng | |
| # ── Tests ───────────────────────────────────────────────────────────────────── | |
| def test_initial_state(engine): | |
| assert engine.is_ready is False | |
| assert engine.chunk_count == 0 | |
| assert engine.indexed_sources == set() | |
| def test_index_returns_correct_metadata(indexed_engine): | |
| assert indexed_engine.is_ready | |
| assert indexed_engine.chunk_count > 0 | |
| assert "policy.txt" in indexed_engine.indexed_sources | |
| def test_index_skips_duplicates(indexed_engine, tmp_path): | |
| doc = tmp_path / "policy.txt" | |
| doc.write_text("Some extra content.", encoding="utf-8") | |
| result = indexed_engine.index([str(doc)]) | |
| assert "policy.txt" in result["skipped"] | |
| assert "policy.txt" not in result["indexed"] | |
| def test_index_multiple_files(engine, tmp_path): | |
| (tmp_path / "a.txt").write_text("Alpha content here.", encoding="utf-8") | |
| (tmp_path / "b.txt").write_text("Beta content here.", encoding="utf-8") | |
| result = engine.index([str(tmp_path / "a.txt"), str(tmp_path / "b.txt")]) | |
| assert len(result["indexed"]) == 2 | |
| assert result["chunk_count"] > 0 | |
| def test_reset_clears_index(indexed_engine): | |
| assert indexed_engine.is_ready | |
| indexed_engine.reset() | |
| assert not indexed_engine.is_ready | |
| assert indexed_engine.chunk_count == 0 | |
| assert indexed_engine.indexed_sources == set() | |
| def test_chat_raises_when_not_indexed(engine): | |
| with pytest.raises(RuntimeError, match="No documents indexed"): | |
| list(engine.chat("What is the policy?")) | |
| def test_chat_raises_without_token(tmp_path): | |
| doc = tmp_path / "doc.txt" | |
| doc.write_text("Hello world.", encoding="utf-8") | |
| eng = KerdosRAG(hf_token="") | |
| eng.index([str(doc)]) | |
| with pytest.raises(ValueError, match="No Hugging Face token"): | |
| list(eng.chat("What does it say?")) | |
| def test_save_and_load(indexed_engine, tmp_path): | |
| save_dir = tmp_path / "saved_index" | |
| indexed_engine.save(str(save_dir)) | |
| assert (save_dir / "kerdos_index.faiss").exists() | |
| assert (save_dir / "kerdos_meta.pkl").exists() | |
| restored = KerdosRAG.load(str(save_dir), hf_token="hf_dummy") | |
| assert restored.is_ready | |
| assert restored.chunk_count == indexed_engine.chunk_count | |
| assert restored.indexed_sources == indexed_engine.indexed_sources | |
| def test_save_raises_when_empty(engine, tmp_path): | |
| with pytest.raises(RuntimeError, match="Nothing to save"): | |
| engine.save(str(tmp_path / "empty")) | |