Spaces:
Running
feat: Python package, FastAPI REST server, TypeScript SDK
Browse filesPhase 1 — Python Package (pip install kerdos-rag):
- pyproject.toml: PEP 517 metadata, entry points, pytest/ruff config
- kerdos_rag/__init__.py: public export of KerdosRAG
- kerdos_rag/core.py: KerdosRAG class with index(), chat() generator,
reset(), save() and load() for FAISS persistence
- kerdos_rag/cli.py: kerdos-rag CLI with serve/api/index subcommands
Phase 2 — FastAPI REST Server:
- kerdos_rag/server.py: GET /health, GET /status, POST /index (multipart),
POST /chat (SSE streaming), DELETE /reset; optional X-Api-Key auth
- Dockerfile: multi-stage build with MODE=serve|api build arg
- requirements.txt: added fastapi, uvicorn[standard], python-multipart
Phase 3 — TypeScript/JS SDK:
- sdk/typescript/src/index.ts: KerdosRAGClient class with health(),
status(), indexFiles(), chat() async generator, reset()
- sdk/typescript/package.json, tsconfig.json, README.md
Tests: 19/19 passing (smoke + test_core + test_api)
- Dockerfile +27 -7
- kerdos_rag/__init__.py +6 -0
- kerdos_rag/cli.py +101 -0
- kerdos_rag/core.py +229 -0
- kerdos_rag/server.py +186 -0
- pyproject.toml +62 -0
- requirements-dev.txt +1 -0
- requirements.txt +4 -0
- sdk/typescript/README.md +124 -0
- sdk/typescript/package.json +38 -0
- sdk/typescript/src/index.ts +234 -0
- sdk/typescript/tsconfig.json +18 -0
- tests/test_api.py +110 -0
- tests/test_core.py +108 -0
|
@@ -1,12 +1,22 @@
|
|
| 1 |
-
# Kerdos AI — Custom LLM Chat
|
| 2 |
-
# Multi-stage Docker build
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
|
| 4 |
FROM python:3.11-slim AS base
|
| 5 |
|
| 6 |
# System dependencies for PyMuPDF and FAISS
|
| 7 |
RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 8 |
-
|
| 9 |
-
|
| 10 |
&& rm -rf /var/lib/apt/lists/*
|
| 11 |
|
| 12 |
WORKDIR /app
|
|
@@ -18,10 +28,20 @@ RUN pip install --no-cache-dir -r requirements.txt
|
|
| 18 |
# Copy source
|
| 19 |
COPY . .
|
| 20 |
|
| 21 |
-
#
|
| 22 |
-
|
| 23 |
|
|
|
|
|
|
|
|
|
|
| 24 |
ENV GRADIO_SERVER_NAME=0.0.0.0
|
| 25 |
ENV GRADIO_SERVER_PORT=7860
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
|
| 27 |
-
|
|
|
|
|
|
| 1 |
+
# Kerdos AI — Custom LLM Chat (Demo)
|
| 2 |
+
# Multi-stage Docker build — supports both Gradio UI and FastAPI REST server
|
| 3 |
+
#
|
| 4 |
+
# Build for Gradio (default):
|
| 5 |
+
# docker build -t kerdos-rag .
|
| 6 |
+
# docker run -p 7860:7860 -e HF_TOKEN=hf_... kerdos-rag
|
| 7 |
+
#
|
| 8 |
+
# Build for REST API:
|
| 9 |
+
# docker build --build-arg MODE=api -t kerdos-rag-api .
|
| 10 |
+
# docker run -p 8000:8000 -e HF_TOKEN=hf_... kerdos-rag-api
|
| 11 |
+
|
| 12 |
+
ARG MODE=serve
|
| 13 |
|
| 14 |
FROM python:3.11-slim AS base
|
| 15 |
|
| 16 |
# System dependencies for PyMuPDF and FAISS
|
| 17 |
RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 18 |
+
build-essential \
|
| 19 |
+
libgomp1 \
|
| 20 |
&& rm -rf /var/lib/apt/lists/*
|
| 21 |
|
| 22 |
WORKDIR /app
|
|
|
|
| 28 |
# Copy source
|
| 29 |
COPY . .
|
| 30 |
|
| 31 |
+
# Install the package in editable mode so kerdos-rag CLI is available
|
| 32 |
+
RUN pip install --no-cache-dir -e .
|
| 33 |
|
| 34 |
+
# ── Gradio mode ───────────────────────────────────────────────
|
| 35 |
+
FROM base AS serve
|
| 36 |
+
EXPOSE 7860
|
| 37 |
ENV GRADIO_SERVER_NAME=0.0.0.0
|
| 38 |
ENV GRADIO_SERVER_PORT=7860
|
| 39 |
+
CMD ["kerdos-rag", "serve", "--host", "0.0.0.0", "--port", "7860"]
|
| 40 |
+
|
| 41 |
+
# ── FastAPI REST mode ─────────────────────────────────────────
|
| 42 |
+
FROM base AS api
|
| 43 |
+
EXPOSE 8000
|
| 44 |
+
CMD ["kerdos-rag", "api", "--host", "0.0.0.0", "--port", "8000"]
|
| 45 |
|
| 46 |
+
# Select the right stage based on BUILD ARG
|
| 47 |
+
FROM ${MODE}
|
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""kerdos_rag — public API surface."""
|
| 2 |
+
|
| 3 |
+
from kerdos_rag.core import KerdosRAG
|
| 4 |
+
|
| 5 |
+
__all__ = ["KerdosRAG"]
|
| 6 |
+
__version__ = "0.1.0"
|
|
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
kerdos_rag/cli.py
|
| 3 |
+
Command-line interface for the Kerdos RAG engine.
|
| 4 |
+
|
| 5 |
+
kerdos-rag serve # start Gradio UI (default port 7860)
|
| 6 |
+
kerdos-rag api # start FastAPI REST server (default port 8000)
|
| 7 |
+
kerdos-rag index <files> # index documents from terminal
|
| 8 |
+
"""
|
| 9 |
+
|
| 10 |
+
from __future__ import annotations
|
| 11 |
+
|
| 12 |
+
import argparse
|
| 13 |
+
import sys
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
def _cmd_serve(args: argparse.Namespace) -> None:
|
| 17 |
+
"""Launch the Gradio web UI."""
|
| 18 |
+
import importlib.util, os
|
| 19 |
+
|
| 20 |
+
os.environ.setdefault("GRADIO_SERVER_PORT", str(args.port))
|
| 21 |
+
os.environ.setdefault("GRADIO_SERVER_NAME", args.host)
|
| 22 |
+
|
| 23 |
+
# app.py lives at the repo root — import it as a module
|
| 24 |
+
spec = importlib.util.spec_from_file_location("app", _repo_root() / "app.py")
|
| 25 |
+
mod = importlib.util.module_from_spec(spec)
|
| 26 |
+
spec.loader.exec_module(mod)
|
| 27 |
+
mod.demo.queue()
|
| 28 |
+
mod.demo.launch(css=mod.CSS, theme=__import__("gradio").themes.Soft())
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
def _cmd_api(args: argparse.Namespace) -> None:
|
| 32 |
+
"""Launch the FastAPI REST server."""
|
| 33 |
+
import uvicorn
|
| 34 |
+
from kerdos_rag.server import app # noqa: F401
|
| 35 |
+
|
| 36 |
+
print(f"[kerdos-rag] Starting REST API on http://{args.host}:{args.port}")
|
| 37 |
+
uvicorn.run(
|
| 38 |
+
"kerdos_rag.server:app",
|
| 39 |
+
host=args.host,
|
| 40 |
+
port=args.port,
|
| 41 |
+
reload=args.reload,
|
| 42 |
+
log_level="info",
|
| 43 |
+
)
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
def _cmd_index(args: argparse.Namespace) -> None:
|
| 47 |
+
"""Index documents from the command line and print a summary."""
|
| 48 |
+
from kerdos_rag import KerdosRAG
|
| 49 |
+
|
| 50 |
+
engine = KerdosRAG() # token not needed for pure indexing
|
| 51 |
+
result = engine.index(args.files)
|
| 52 |
+
|
| 53 |
+
if result["indexed"]:
|
| 54 |
+
print(f"✅ Indexed: {', '.join(result['indexed'])}")
|
| 55 |
+
if result["skipped"]:
|
| 56 |
+
print(f"⚠️ Skipped (already indexed): {', '.join(result['skipped'])}")
|
| 57 |
+
print(f"📦 Total chunks: {result['chunk_count']}")
|
| 58 |
+
|
| 59 |
+
if args.save:
|
| 60 |
+
engine.save(args.save)
|
| 61 |
+
print(f"💾 Index saved to: {args.save}")
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
def _repo_root():
|
| 65 |
+
"""Return the directory containing this package."""
|
| 66 |
+
from pathlib import Path
|
| 67 |
+
return Path(__file__).resolve().parent.parent
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
def main(argv: list[str] | None = None) -> None:
|
| 71 |
+
parser = argparse.ArgumentParser(
|
| 72 |
+
prog="kerdos-rag",
|
| 73 |
+
description="Kerdos RAG — Enterprise Document Q&A engine",
|
| 74 |
+
)
|
| 75 |
+
sub = parser.add_subparsers(dest="command", required=True)
|
| 76 |
+
|
| 77 |
+
# ── serve ────────────────────────────────────────────────────────────────
|
| 78 |
+
p_serve = sub.add_parser("serve", help="Start the Gradio web UI")
|
| 79 |
+
p_serve.add_argument("--host", default="0.0.0.0")
|
| 80 |
+
p_serve.add_argument("--port", type=int, default=7860)
|
| 81 |
+
p_serve.set_defaults(func=_cmd_serve)
|
| 82 |
+
|
| 83 |
+
# ── api ──────────────────────────────────────────────────────────────────
|
| 84 |
+
p_api = sub.add_parser("api", help="Start the FastAPI REST server")
|
| 85 |
+
p_api.add_argument("--host", default="0.0.0.0")
|
| 86 |
+
p_api.add_argument("--port", type=int, default=8000)
|
| 87 |
+
p_api.add_argument("--reload", action="store_true", help="Enable auto-reload (dev only)")
|
| 88 |
+
p_api.set_defaults(func=_cmd_api)
|
| 89 |
+
|
| 90 |
+
# ── index ─────────────────────────────────────────────────────────────────
|
| 91 |
+
p_idx = sub.add_parser("index", help="Index documents from the terminal")
|
| 92 |
+
p_idx.add_argument("files", nargs="+", metavar="FILE")
|
| 93 |
+
p_idx.add_argument("--save", metavar="DIR", default="", help="Save index to directory")
|
| 94 |
+
p_idx.set_defaults(func=_cmd_index)
|
| 95 |
+
|
| 96 |
+
args = parser.parse_args(argv)
|
| 97 |
+
args.func(args)
|
| 98 |
+
|
| 99 |
+
|
| 100 |
+
if __name__ == "__main__":
|
| 101 |
+
main()
|
|
@@ -0,0 +1,229 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
kerdos_rag/core.py
|
| 3 |
+
High-level KerdosRAG façade — the primary interface for library consumers.
|
| 4 |
+
|
| 5 |
+
Usage:
|
| 6 |
+
from kerdos_rag import KerdosRAG
|
| 7 |
+
|
| 8 |
+
engine = KerdosRAG(hf_token="hf_...")
|
| 9 |
+
engine.index(["policy.pdf", "manual.docx"])
|
| 10 |
+
for token in engine.chat("What is the refund policy?"):
|
| 11 |
+
print(token, end="", flush=True)
|
| 12 |
+
"""
|
| 13 |
+
|
| 14 |
+
from __future__ import annotations
|
| 15 |
+
|
| 16 |
+
import json
|
| 17 |
+
import os
|
| 18 |
+
import pickle
|
| 19 |
+
from pathlib import Path
|
| 20 |
+
from typing import Generator
|
| 21 |
+
|
| 22 |
+
from rag.document_loader import load_documents
|
| 23 |
+
from rag.embedder import VectorIndex, build_index, add_to_index
|
| 24 |
+
from rag.retriever import retrieve
|
| 25 |
+
from rag.chain import answer_stream
|
| 26 |
+
|
| 27 |
+
_DEFAULT_MODEL = "meta-llama/Llama-3.1-8B-Instruct"
|
| 28 |
+
_DEFAULT_TOP_K = 5
|
| 29 |
+
_DEFAULT_MIN_SCORE = 0.30
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
class KerdosRAG:
|
| 33 |
+
"""
|
| 34 |
+
Batteries-included RAG engine.
|
| 35 |
+
|
| 36 |
+
Args:
|
| 37 |
+
hf_token: Hugging Face API token. Falls back to HF_TOKEN env var.
|
| 38 |
+
model: HF model ID (e.g. 'mistralai/Mistral-7B-Instruct-v0.3').
|
| 39 |
+
Falls back to LLM_MODEL env var, then Llama 3.1 8B.
|
| 40 |
+
top_k: Number of chunks to retrieve per query.
|
| 41 |
+
min_score: Minimum cosine similarity threshold (chunks below this
|
| 42 |
+
are dropped before being sent to the LLM).
|
| 43 |
+
"""
|
| 44 |
+
|
| 45 |
+
def __init__(
|
| 46 |
+
self,
|
| 47 |
+
hf_token: str = "",
|
| 48 |
+
model: str | None = None,
|
| 49 |
+
top_k: int = _DEFAULT_TOP_K,
|
| 50 |
+
min_score: float = _DEFAULT_MIN_SCORE,
|
| 51 |
+
) -> None:
|
| 52 |
+
self.hf_token: str = hf_token.strip() or os.environ.get("HF_TOKEN", "")
|
| 53 |
+
self.model: str = model or os.environ.get("LLM_MODEL", _DEFAULT_MODEL)
|
| 54 |
+
self.top_k: int = top_k
|
| 55 |
+
self.min_score: float = min_score
|
| 56 |
+
|
| 57 |
+
self._index: VectorIndex | None = None
|
| 58 |
+
self._indexed_sources: set[str] = set()
|
| 59 |
+
|
| 60 |
+
# ── Properties ────────────────────────────────────────────────────────────
|
| 61 |
+
|
| 62 |
+
@property
|
| 63 |
+
def indexed_sources(self) -> set[str]:
|
| 64 |
+
"""File names currently in the knowledge base."""
|
| 65 |
+
return set(self._indexed_sources)
|
| 66 |
+
|
| 67 |
+
@property
|
| 68 |
+
def chunk_count(self) -> int:
|
| 69 |
+
"""Total number of vector chunks in the index."""
|
| 70 |
+
return self._index.index.ntotal if self._index else 0
|
| 71 |
+
|
| 72 |
+
@property
|
| 73 |
+
def is_ready(self) -> bool:
|
| 74 |
+
"""True when at least one document has been indexed."""
|
| 75 |
+
return self._index is not None and self.chunk_count > 0
|
| 76 |
+
|
| 77 |
+
# ── Core operations ───────────────────────────────────────────────────────
|
| 78 |
+
|
| 79 |
+
def index(self, file_paths: list[str]) -> dict:
|
| 80 |
+
"""
|
| 81 |
+
Parse and index documents into the knowledge base.
|
| 82 |
+
|
| 83 |
+
Duplicate filenames are automatically skipped.
|
| 84 |
+
|
| 85 |
+
Args:
|
| 86 |
+
file_paths: Absolute or relative paths to PDF, DOCX, TXT, MD, or CSV files.
|
| 87 |
+
|
| 88 |
+
Returns:
|
| 89 |
+
{
|
| 90 |
+
"indexed": ["file1.pdf", ...], # newly indexed
|
| 91 |
+
"skipped": ["dup.pdf", ...], # already in index
|
| 92 |
+
"chunk_count": 142 # total chunks
|
| 93 |
+
}
|
| 94 |
+
"""
|
| 95 |
+
paths = [str(p) for p in file_paths]
|
| 96 |
+
|
| 97 |
+
new_paths, skipped = [], []
|
| 98 |
+
for p in paths:
|
| 99 |
+
name = Path(p).name
|
| 100 |
+
if name in self._indexed_sources:
|
| 101 |
+
skipped.append(name)
|
| 102 |
+
else:
|
| 103 |
+
new_paths.append(p)
|
| 104 |
+
|
| 105 |
+
if not new_paths:
|
| 106 |
+
return {"indexed": [], "skipped": skipped, "chunk_count": self.chunk_count}
|
| 107 |
+
|
| 108 |
+
docs = load_documents(new_paths)
|
| 109 |
+
if not docs:
|
| 110 |
+
raise ValueError("Could not extract text from any of the provided files.")
|
| 111 |
+
|
| 112 |
+
if self._index is None:
|
| 113 |
+
self._index = build_index(docs)
|
| 114 |
+
else:
|
| 115 |
+
self._index = add_to_index(self._index, docs)
|
| 116 |
+
|
| 117 |
+
newly_indexed = list({d["source"] for d in docs})
|
| 118 |
+
self._indexed_sources.update(newly_indexed)
|
| 119 |
+
|
| 120 |
+
return {
|
| 121 |
+
"indexed": newly_indexed,
|
| 122 |
+
"skipped": skipped,
|
| 123 |
+
"chunk_count": self.chunk_count,
|
| 124 |
+
}
|
| 125 |
+
|
| 126 |
+
def chat(
|
| 127 |
+
self,
|
| 128 |
+
query: str,
|
| 129 |
+
history: list[dict] | None = None,
|
| 130 |
+
) -> Generator[str, None, None]:
|
| 131 |
+
"""
|
| 132 |
+
Ask a question and stream the answer token-by-token.
|
| 133 |
+
|
| 134 |
+
Args:
|
| 135 |
+
query: The user's question.
|
| 136 |
+
history: Optional list of prior messages in
|
| 137 |
+
[{"role": "user"|"assistant", "content": "..."}] format.
|
| 138 |
+
|
| 139 |
+
Yields:
|
| 140 |
+
Progressively-growing answer strings (suitable for real-time display).
|
| 141 |
+
|
| 142 |
+
Raises:
|
| 143 |
+
RuntimeError: If no documents have been indexed yet.
|
| 144 |
+
ValueError: If no HF token is available.
|
| 145 |
+
"""
|
| 146 |
+
if not self.is_ready:
|
| 147 |
+
raise RuntimeError("No documents indexed. Call engine.index(file_paths) first.")
|
| 148 |
+
if not self.hf_token:
|
| 149 |
+
raise ValueError(
|
| 150 |
+
"No Hugging Face token. Pass hf_token= to KerdosRAG() or set HF_TOKEN env var."
|
| 151 |
+
)
|
| 152 |
+
|
| 153 |
+
# Temporarily patch retriever's MIN_SCORE with instance setting
|
| 154 |
+
import rag.retriever as _r
|
| 155 |
+
original_min = _r.MIN_SCORE
|
| 156 |
+
_r.MIN_SCORE = self.min_score
|
| 157 |
+
try:
|
| 158 |
+
chunks = retrieve(query, self._index, top_k=self.top_k)
|
| 159 |
+
yield from answer_stream(query, chunks, self.hf_token, chat_history=history)
|
| 160 |
+
finally:
|
| 161 |
+
_r.MIN_SCORE = original_min
|
| 162 |
+
|
| 163 |
+
def reset(self) -> None:
|
| 164 |
+
"""Clear the knowledge base."""
|
| 165 |
+
self._index = None
|
| 166 |
+
self._indexed_sources = set()
|
| 167 |
+
|
| 168 |
+
# ── Persistence ───────────────────────────────────────────────────────────
|
| 169 |
+
|
| 170 |
+
def save(self, directory: str | Path) -> None:
|
| 171 |
+
"""
|
| 172 |
+
Persist the index to disk so it can be reloaded across sessions.
|
| 173 |
+
|
| 174 |
+
Creates two files in `directory`:
|
| 175 |
+
- ``kerdos_index.faiss`` — the raw FAISS vectors
|
| 176 |
+
- ``kerdos_meta.pkl`` — chunks + source tracking
|
| 177 |
+
|
| 178 |
+
Args:
|
| 179 |
+
directory: Path to a folder (will be created if needed).
|
| 180 |
+
"""
|
| 181 |
+
import faiss
|
| 182 |
+
|
| 183 |
+
if not self.is_ready:
|
| 184 |
+
raise RuntimeError("Nothing to save — index is empty.")
|
| 185 |
+
|
| 186 |
+
out = Path(directory)
|
| 187 |
+
out.mkdir(parents=True, exist_ok=True)
|
| 188 |
+
|
| 189 |
+
faiss.write_index(self._index.index, str(out / "kerdos_index.faiss"))
|
| 190 |
+
meta = {
|
| 191 |
+
"chunks": self._index.chunks,
|
| 192 |
+
"indexed_sources": list(self._indexed_sources),
|
| 193 |
+
"model": self.model,
|
| 194 |
+
"top_k": self.top_k,
|
| 195 |
+
"min_score": self.min_score,
|
| 196 |
+
}
|
| 197 |
+
with open(out / "kerdos_meta.pkl", "wb") as f:
|
| 198 |
+
pickle.dump(meta, f)
|
| 199 |
+
|
| 200 |
+
@classmethod
|
| 201 |
+
def load(cls, directory: str | Path, hf_token: str = "") -> "KerdosRAG":
|
| 202 |
+
"""
|
| 203 |
+
Restore an engine from a directory previously written by :meth:`save`.
|
| 204 |
+
|
| 205 |
+
Args:
|
| 206 |
+
directory: Folder containing ``kerdos_index.faiss`` and ``kerdos_meta.pkl``.
|
| 207 |
+
hf_token: HF token for chat (can also be set via HF_TOKEN env var).
|
| 208 |
+
|
| 209 |
+
Returns:
|
| 210 |
+
A fully initialised :class:`KerdosRAG` instance.
|
| 211 |
+
"""
|
| 212 |
+
import faiss
|
| 213 |
+
from rag.embedder import _get_model
|
| 214 |
+
|
| 215 |
+
d = Path(directory)
|
| 216 |
+
with open(d / "kerdos_meta.pkl", "rb") as f:
|
| 217 |
+
meta = pickle.load(f)
|
| 218 |
+
|
| 219 |
+
engine = cls(
|
| 220 |
+
hf_token=hf_token,
|
| 221 |
+
model=meta["model"],
|
| 222 |
+
top_k=meta["top_k"],
|
| 223 |
+
min_score=meta["min_score"],
|
| 224 |
+
)
|
| 225 |
+
model = _get_model()
|
| 226 |
+
idx = faiss.read_index(str(d / "kerdos_index.faiss"))
|
| 227 |
+
engine._index = VectorIndex(chunks=meta["chunks"], index=idx, embedder=model)
|
| 228 |
+
engine._indexed_sources = set(meta["indexed_sources"])
|
| 229 |
+
return engine
|
|
@@ -0,0 +1,186 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
kerdos_rag/server.py
|
| 3 |
+
FastAPI REST server exposing the KerdosRAG engine over HTTP.
|
| 4 |
+
|
| 5 |
+
Endpoints:
|
| 6 |
+
GET /health — liveness probe
|
| 7 |
+
GET /status — knowledge-base metadata
|
| 8 |
+
POST /index — upload + index documents (multipart/form-data)
|
| 9 |
+
POST /chat — ask a question (SSE streaming response)
|
| 10 |
+
DELETE /reset — clear the knowledge base
|
| 11 |
+
|
| 12 |
+
Authentication (optional):
|
| 13 |
+
Set API_KEY env var to enable X-Api-Key header validation.
|
| 14 |
+
Leave unset to run in open mode (suitable for local / trusted environments).
|
| 15 |
+
"""
|
| 16 |
+
|
| 17 |
+
from __future__ import annotations
|
| 18 |
+
|
| 19 |
+
import os
|
| 20 |
+
import asyncio
|
| 21 |
+
from typing import AsyncGenerator
|
| 22 |
+
|
| 23 |
+
from fastapi import FastAPI, File, UploadFile, HTTPException, Depends, Header, Request
|
| 24 |
+
from fastapi.middleware.cors import CORSMiddleware
|
| 25 |
+
from fastapi.responses import StreamingResponse, JSONResponse
|
| 26 |
+
from pydantic import BaseModel
|
| 27 |
+
|
| 28 |
+
from kerdos_rag.core import KerdosRAG
|
| 29 |
+
|
| 30 |
+
# ── App & CORS ────────────────────────────────────────────────────────────────
|
| 31 |
+
app = FastAPI(
|
| 32 |
+
title="Kerdos RAG API",
|
| 33 |
+
description="Enterprise Document Q&A engine by Kerdos Infrasoft",
|
| 34 |
+
version="0.1.0",
|
| 35 |
+
contact={"name": "Kerdos Infrasoft", "url": "https://kerdos.in", "email": "partnership@kerdos.in"},
|
| 36 |
+
license_info={"name": "MIT"},
|
| 37 |
+
)
|
| 38 |
+
|
| 39 |
+
app.add_middleware(
|
| 40 |
+
CORSMiddleware,
|
| 41 |
+
allow_origins=["*"],
|
| 42 |
+
allow_methods=["*"],
|
| 43 |
+
allow_headers=["*"],
|
| 44 |
+
)
|
| 45 |
+
|
| 46 |
+
# ── Singleton engine ───────────────────────────────────────────────────────────
|
| 47 |
+
_engine = KerdosRAG()
|
| 48 |
+
|
| 49 |
+
# ── Auth ──────────────────────────────────────────────────────────────────────
|
| 50 |
+
_API_KEY = os.environ.get("API_KEY", "")
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
def _check_auth(x_api_key: str | None = Header(default=None)) -> None:
|
| 54 |
+
"""If API_KEY env var is set, validate X-Api-Key header."""
|
| 55 |
+
if _API_KEY and x_api_key != _API_KEY:
|
| 56 |
+
raise HTTPException(status_code=401, detail="Invalid or missing X-Api-Key header.")
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
# ── Request / Response models ──────────────────────────────────────────────────
|
| 60 |
+
class ChatRequest(BaseModel):
|
| 61 |
+
query: str
|
| 62 |
+
history: list[dict] | None = None
|
| 63 |
+
top_k: int | None = None
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
class StatusResponse(BaseModel):
|
| 67 |
+
indexed_sources: list[str]
|
| 68 |
+
chunk_count: int
|
| 69 |
+
model: str
|
| 70 |
+
top_k: int
|
| 71 |
+
min_score: float
|
| 72 |
+
|
| 73 |
+
|
| 74 |
+
# ── Endpoints ─────────────────────────────────────────────────────────────────
|
| 75 |
+
|
| 76 |
+
@app.get("/health", tags=["Meta"])
|
| 77 |
+
def health() -> dict:
|
| 78 |
+
"""Liveness probe — always returns 200 OK."""
|
| 79 |
+
return {"status": "ok", "version": "0.1.0"}
|
| 80 |
+
|
| 81 |
+
|
| 82 |
+
@app.get("/status", response_model=StatusResponse, tags=["Meta"])
|
| 83 |
+
def status(_: None = Depends(_check_auth)) -> StatusResponse:
|
| 84 |
+
"""Return current knowledge-base metadata."""
|
| 85 |
+
return StatusResponse(
|
| 86 |
+
indexed_sources=list(_engine.indexed_sources),
|
| 87 |
+
chunk_count=_engine.chunk_count,
|
| 88 |
+
model=_engine.model,
|
| 89 |
+
top_k=_engine.top_k,
|
| 90 |
+
min_score=_engine.min_score,
|
| 91 |
+
)
|
| 92 |
+
|
| 93 |
+
|
| 94 |
+
@app.post("/index", tags=["RAG"])
|
| 95 |
+
async def index_documents(
|
| 96 |
+
files: list[UploadFile] = File(...),
|
| 97 |
+
_: None = Depends(_check_auth),
|
| 98 |
+
) -> JSONResponse:
|
| 99 |
+
"""
|
| 100 |
+
Upload and index one or more documents.
|
| 101 |
+
|
| 102 |
+
Accepts: PDF (.pdf), Word (.docx), plain text (.txt, .md, .csv).
|
| 103 |
+
Duplicate filenames are automatically skipped.
|
| 104 |
+
"""
|
| 105 |
+
import tempfile, shutil
|
| 106 |
+
from pathlib import Path
|
| 107 |
+
|
| 108 |
+
saved_paths: list[str] = []
|
| 109 |
+
tmp_dir = tempfile.mkdtemp(prefix="kerdos_upload_")
|
| 110 |
+
|
| 111 |
+
try:
|
| 112 |
+
for upload in files:
|
| 113 |
+
dest = Path(tmp_dir) / upload.filename
|
| 114 |
+
with open(dest, "wb") as f:
|
| 115 |
+
shutil.copyfileobj(upload.file, f)
|
| 116 |
+
saved_paths.append(str(dest))
|
| 117 |
+
|
| 118 |
+
result = _engine.index(saved_paths)
|
| 119 |
+
finally:
|
| 120 |
+
shutil.rmtree(tmp_dir, ignore_errors=True)
|
| 121 |
+
|
| 122 |
+
return JSONResponse(content=result)
|
| 123 |
+
|
| 124 |
+
|
| 125 |
+
@app.post("/chat", tags=["RAG"])
|
| 126 |
+
async def chat(req: ChatRequest, _: None = Depends(_check_auth)) -> StreamingResponse:
|
| 127 |
+
"""
|
| 128 |
+
Ask a question and receive a **Server-Sent Events** stream of tokens.
|
| 129 |
+
|
| 130 |
+
Each SSE event has the form:
|
| 131 |
+
data: <partial answer so far>\\n\\n
|
| 132 |
+
|
| 133 |
+
The stream ends with:
|
| 134 |
+
data: [DONE]\\n\\n
|
| 135 |
+
|
| 136 |
+
Example (curl):
|
| 137 |
+
curl -X POST http://localhost:8000/chat \\
|
| 138 |
+
-H "Content-Type: application/json" \\
|
| 139 |
+
-d '{"query": "What is the refund policy?"}' \\
|
| 140 |
+
--no-buffer
|
| 141 |
+
"""
|
| 142 |
+
if not _engine.is_ready:
|
| 143 |
+
raise HTTPException(
|
| 144 |
+
status_code=422,
|
| 145 |
+
detail="Knowledge base is empty. POST files to /index first.",
|
| 146 |
+
)
|
| 147 |
+
|
| 148 |
+
hf_token = _engine.hf_token
|
| 149 |
+
if not hf_token:
|
| 150 |
+
raise HTTPException(
|
| 151 |
+
status_code=401,
|
| 152 |
+
detail="No Hugging Face token configured. Set HF_TOKEN env var.",
|
| 153 |
+
)
|
| 154 |
+
|
| 155 |
+
# Temporarily override top_k if caller specified it
|
| 156 |
+
original_top_k = _engine.top_k
|
| 157 |
+
if req.top_k is not None:
|
| 158 |
+
_engine.top_k = req.top_k
|
| 159 |
+
|
| 160 |
+
async def event_generator() -> AsyncGenerator[str, None]:
|
| 161 |
+
try:
|
| 162 |
+
# answer_stream is a sync generator — run in thread pool
|
| 163 |
+
loop = asyncio.get_event_loop()
|
| 164 |
+
gen = _engine.chat(req.query, history=req.history)
|
| 165 |
+
|
| 166 |
+
while True:
|
| 167 |
+
try:
|
| 168 |
+
token = await loop.run_in_executor(None, next, gen)
|
| 169 |
+
# SSE format: escape newlines in the data value
|
| 170 |
+
escaped = token.replace("\n", "\\n")
|
| 171 |
+
yield f"data: {escaped}\n\n"
|
| 172 |
+
except StopIteration:
|
| 173 |
+
break
|
| 174 |
+
finally:
|
| 175 |
+
_engine.top_k = original_top_k
|
| 176 |
+
|
| 177 |
+
yield "data: [DONE]\n\n"
|
| 178 |
+
|
| 179 |
+
return StreamingResponse(event_generator(), media_type="text/event-stream")
|
| 180 |
+
|
| 181 |
+
|
| 182 |
+
@app.delete("/reset", tags=["RAG"])
|
| 183 |
+
def reset(_: None = Depends(_check_auth)) -> dict:
|
| 184 |
+
"""Clear the entire knowledge base."""
|
| 185 |
+
_engine.reset()
|
| 186 |
+
return {"ok": True, "message": "Knowledge base cleared."}
|
|
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[build-system]
|
| 2 |
+
requires = ["setuptools>=68", "wheel"]
|
| 3 |
+
build-backend = "setuptools.backends.legacy:build"
|
| 4 |
+
|
| 5 |
+
[project]
|
| 6 |
+
name = "kerdos-rag"
|
| 7 |
+
version = "0.1.0"
|
| 8 |
+
description = "Enterprise Document Q&A RAG engine — by Kerdos Infrasoft"
|
| 9 |
+
readme = "README.md"
|
| 10 |
+
license = { text = "MIT" }
|
| 11 |
+
authors = [{ name = "Kerdos Infrasoft Private Limited", email = "partnership@kerdos.in" }]
|
| 12 |
+
requires-python = ">=3.10"
|
| 13 |
+
keywords = ["rag", "llm", "document-qa", "faiss", "enterprise", "kerdos"]
|
| 14 |
+
classifiers = [
|
| 15 |
+
"Development Status :: 4 - Beta",
|
| 16 |
+
"Intended Audience :: Developers",
|
| 17 |
+
"License :: OSI Approved :: MIT License",
|
| 18 |
+
"Programming Language :: Python :: 3",
|
| 19 |
+
"Programming Language :: Python :: 3.10",
|
| 20 |
+
"Programming Language :: Python :: 3.11",
|
| 21 |
+
"Programming Language :: Python :: 3.12",
|
| 22 |
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
| 23 |
+
]
|
| 24 |
+
dependencies = [
|
| 25 |
+
"gradio>=6.6.0",
|
| 26 |
+
"sentence-transformers>=5.0.0",
|
| 27 |
+
"faiss-cpu>=1.9.0",
|
| 28 |
+
"PyMuPDF>=1.24.0",
|
| 29 |
+
"python-docx>=1.1.0",
|
| 30 |
+
"huggingface-hub>=0.28.0",
|
| 31 |
+
"numpy>=1.26.0,<3",
|
| 32 |
+
"python-dotenv>=1.0.0",
|
| 33 |
+
"tenacity>=8.2.0",
|
| 34 |
+
"fastapi>=0.111.0",
|
| 35 |
+
"uvicorn[standard]>=0.29.0",
|
| 36 |
+
"python-multipart>=0.0.9",
|
| 37 |
+
]
|
| 38 |
+
|
| 39 |
+
[project.optional-dependencies]
|
| 40 |
+
dev = ["pytest>=8.0.0", "black>=24.0.0", "ruff>=0.4.0", "httpx>=0.27.0"]
|
| 41 |
+
|
| 42 |
+
[project.scripts]
|
| 43 |
+
kerdos-rag = "kerdos_rag.cli:main"
|
| 44 |
+
|
| 45 |
+
[project.urls]
|
| 46 |
+
Homepage = "https://kerdos.in"
|
| 47 |
+
Repository = "https://huggingface.co/spaces/kerdosdotio/Custom-LLM-Chat"
|
| 48 |
+
"Bug Tracker" = "https://kerdos.in/contact"
|
| 49 |
+
|
| 50 |
+
[tool.setuptools.packages.find]
|
| 51 |
+
include = ["kerdos_rag*", "rag*"]
|
| 52 |
+
|
| 53 |
+
[tool.pytest.ini_options]
|
| 54 |
+
pythonpath = ["."]
|
| 55 |
+
testpaths = ["tests"]
|
| 56 |
+
|
| 57 |
+
[tool.ruff]
|
| 58 |
+
line-length = 100
|
| 59 |
+
target-version = "py310"
|
| 60 |
+
|
| 61 |
+
[tool.ruff.lint]
|
| 62 |
+
select = ["E", "F", "I", "UP"]
|
|
@@ -2,3 +2,4 @@
|
|
| 2 |
pytest>=8.0.0
|
| 3 |
black>=24.0.0
|
| 4 |
ruff>=0.4.0
|
|
|
|
|
|
| 2 |
pytest>=8.0.0
|
| 3 |
black>=24.0.0
|
| 4 |
ruff>=0.4.0
|
| 5 |
+
httpx>=0.27.0
|
|
@@ -1,3 +1,4 @@
|
|
|
|
|
| 1 |
sentence-transformers>=5.0.0
|
| 2 |
faiss-cpu>=1.9.0
|
| 3 |
PyMuPDF>=1.24.0
|
|
@@ -6,3 +7,6 @@ huggingface-hub>=0.28.0
|
|
| 6 |
numpy>=1.26.0,<3
|
| 7 |
python-dotenv>=1.0.0
|
| 8 |
tenacity>=8.2.0
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
gradio>=6.6.0
|
| 2 |
sentence-transformers>=5.0.0
|
| 3 |
faiss-cpu>=1.9.0
|
| 4 |
PyMuPDF>=1.24.0
|
|
|
|
| 7 |
numpy>=1.26.0,<3
|
| 8 |
python-dotenv>=1.0.0
|
| 9 |
tenacity>=8.2.0
|
| 10 |
+
fastapi>=0.111.0
|
| 11 |
+
uvicorn[standard]>=0.29.0
|
| 12 |
+
python-multipart>=0.0.9
|
|
@@ -0,0 +1,124 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# `@kerdos/rag-client`
|
| 2 |
+
|
| 3 |
+
> TypeScript/JavaScript client for the **Kerdos RAG REST API**.
|
| 4 |
+
> Zero runtime dependencies — works in Node.js ≥ 18 and modern browsers.
|
| 5 |
+
|
| 6 |
+
---
|
| 7 |
+
|
| 8 |
+
## Installation
|
| 9 |
+
|
| 10 |
+
```bash
|
| 11 |
+
# npm
|
| 12 |
+
npm install @kerdos/rag-client
|
| 13 |
+
|
| 14 |
+
# pnpm
|
| 15 |
+
pnpm add @kerdos/rag-client
|
| 16 |
+
|
| 17 |
+
# yarn
|
| 18 |
+
yarn add @kerdos/rag-client
|
| 19 |
+
```
|
| 20 |
+
|
| 21 |
+
---
|
| 22 |
+
|
| 23 |
+
## Quick Start
|
| 24 |
+
|
| 25 |
+
```typescript
|
| 26 |
+
import { KerdosRAGClient } from "@kerdos/rag-client";
|
| 27 |
+
|
| 28 |
+
const client = new KerdosRAGClient({
|
| 29 |
+
baseUrl: "http://localhost:8000",
|
| 30 |
+
apiKey: "your-secret", // optional — only if server has API_KEY set
|
| 31 |
+
});
|
| 32 |
+
|
| 33 |
+
// 1. Index documents
|
| 34 |
+
const result = await client.indexFiles([
|
| 35 |
+
new File([pdfBuffer], "policy.pdf"),
|
| 36 |
+
new File([txtContent], "manual.txt"),
|
| 37 |
+
]);
|
| 38 |
+
console.log("Indexed:", result.indexed);
|
| 39 |
+
console.log("Skipped:", result.skipped);
|
| 40 |
+
|
| 41 |
+
// 2. Stream an answer
|
| 42 |
+
let answer = "";
|
| 43 |
+
for await (const token of client.chat("What is the refund policy?")) {
|
| 44 |
+
answer = token; // each yield is the full cumulative answer
|
| 45 |
+
process.stdout.write("\r" + answer);
|
| 46 |
+
}
|
| 47 |
+
|
| 48 |
+
// 3. Multi-turn conversation
|
| 49 |
+
const history = [
|
| 50 |
+
{ role: "user", content: "What is the refund policy?" },
|
| 51 |
+
{ role: "assistant", content: answer },
|
| 52 |
+
];
|
| 53 |
+
for await (const token of client.chat("Who do I contact for refunds?", {
|
| 54 |
+
history,
|
| 55 |
+
})) {
|
| 56 |
+
process.stdout.write("\r" + token);
|
| 57 |
+
}
|
| 58 |
+
|
| 59 |
+
// 4. Status & reset
|
| 60 |
+
const status = await client.status();
|
| 61 |
+
console.log("Chunks:", status.chunk_count);
|
| 62 |
+
|
| 63 |
+
await client.reset();
|
| 64 |
+
```
|
| 65 |
+
|
| 66 |
+
---
|
| 67 |
+
|
| 68 |
+
## API Reference
|
| 69 |
+
|
| 70 |
+
### `new KerdosRAGClient(options)`
|
| 71 |
+
|
| 72 |
+
| Option | Type | Default | Description |
|
| 73 |
+
| ----------- | -------- | ----------- | ----------------------------------------- |
|
| 74 |
+
| `baseUrl` | `string` | — | Server URL (e.g. `http://localhost:8000`) |
|
| 75 |
+
| `apiKey` | `string` | `undefined` | Sent as `X-Api-Key` header |
|
| 76 |
+
| `timeoutMs` | `number` | `30000` | Request timeout in ms |
|
| 77 |
+
|
| 78 |
+
### Methods
|
| 79 |
+
|
| 80 |
+
| Method | Returns | Description |
|
| 81 |
+
| -------------------- | ------------------------- | -------------------- |
|
| 82 |
+
| `health()` | `Promise<HealthResponse>` | Liveness probe |
|
| 83 |
+
| `status()` | `Promise<StatusResponse>` | KB metadata |
|
| 84 |
+
| `indexFiles(files)` | `Promise<IndexResult>` | Upload & index files |
|
| 85 |
+
| `chat(query, opts?)` | `AsyncGenerator<string>` | Stream answer tokens |
|
| 86 |
+
| `reset()` | `Promise<{ok, message}>` | Clear knowledge base |
|
| 87 |
+
|
| 88 |
+
---
|
| 89 |
+
|
| 90 |
+
## Server Setup
|
| 91 |
+
|
| 92 |
+
Start the REST server (Python backend required):
|
| 93 |
+
|
| 94 |
+
```bash
|
| 95 |
+
pip install kerdos-rag
|
| 96 |
+
kerdos-rag api --port 8000
|
| 97 |
+
```
|
| 98 |
+
|
| 99 |
+
Or with Docker:
|
| 100 |
+
|
| 101 |
+
```bash
|
| 102 |
+
docker build --build-arg MODE=api -t kerdos-rag .
|
| 103 |
+
docker run -p 8000:8000 -e HF_TOKEN=hf_... kerdos-rag
|
| 104 |
+
```
|
| 105 |
+
|
| 106 |
+
---
|
| 107 |
+
|
| 108 |
+
## Error Handling
|
| 109 |
+
|
| 110 |
+
```typescript
|
| 111 |
+
import { KerdosAPIError } from "@kerdos/rag-client";
|
| 112 |
+
|
| 113 |
+
try {
|
| 114 |
+
for await (const token of client.chat("...")) { ... }
|
| 115 |
+
} catch (err) {
|
| 116 |
+
if (err instanceof KerdosAPIError) {
|
| 117 |
+
console.error(`API ${err.statusCode}:`, err.message);
|
| 118 |
+
}
|
| 119 |
+
}
|
| 120 |
+
```
|
| 121 |
+
|
| 122 |
+
---
|
| 123 |
+
|
| 124 |
+
_© 2024–2026 Kerdos Infrasoft Private Limited | [kerdos.in](https://kerdos.in)_
|
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"name": "@kerdos/rag-client",
|
| 3 |
+
"version": "0.1.0",
|
| 4 |
+
"description": "TypeScript/JS client for the Kerdos RAG REST API",
|
| 5 |
+
"main": "dist/index.js",
|
| 6 |
+
"module": "dist/index.mjs",
|
| 7 |
+
"types": "dist/index.d.ts",
|
| 8 |
+
"files": [
|
| 9 |
+
"dist",
|
| 10 |
+
"README.md"
|
| 11 |
+
],
|
| 12 |
+
"scripts": {
|
| 13 |
+
"build": "tsc --project tsconfig.json",
|
| 14 |
+
"dev": "tsc --watch",
|
| 15 |
+
"test": "node --test dist/index.test.js"
|
| 16 |
+
},
|
| 17 |
+
"keywords": [
|
| 18 |
+
"rag",
|
| 19 |
+
"llm",
|
| 20 |
+
"document-qa",
|
| 21 |
+
"kerdos",
|
| 22 |
+
"enterprise",
|
| 23 |
+
"ai"
|
| 24 |
+
],
|
| 25 |
+
"author": "Kerdos Infrasoft Private Limited <partnership@kerdos.in>",
|
| 26 |
+
"license": "MIT",
|
| 27 |
+
"homepage": "https://kerdos.in",
|
| 28 |
+
"repository": {
|
| 29 |
+
"type": "git",
|
| 30 |
+
"url": "https://huggingface.co/spaces/kerdosdotio/Custom-LLM-Chat"
|
| 31 |
+
},
|
| 32 |
+
"engines": {
|
| 33 |
+
"node": ">=18"
|
| 34 |
+
},
|
| 35 |
+
"devDependencies": {
|
| 36 |
+
"typescript": "^5.4.0"
|
| 37 |
+
}
|
| 38 |
+
}
|
|
@@ -0,0 +1,234 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/**
|
| 2 |
+
* @kerdos/rag-client
|
| 3 |
+
* TypeScript/JavaScript client for the Kerdos RAG REST API.
|
| 4 |
+
*
|
| 5 |
+
* @example
|
| 6 |
+
* ```ts
|
| 7 |
+
* import { KerdosRAGClient } from "@kerdos/rag-client";
|
| 8 |
+
*
|
| 9 |
+
* const client = new KerdosRAGClient({ baseUrl: "http://localhost:8000" });
|
| 10 |
+
*
|
| 11 |
+
* // Index documents
|
| 12 |
+
* const result = await client.indexFiles([fileInput.files[0]]);
|
| 13 |
+
* console.log(result.indexed);
|
| 14 |
+
*
|
| 15 |
+
* // Stream an answer
|
| 16 |
+
* for await (const token of client.chat("What is the refund policy?")) {
|
| 17 |
+
* process.stdout.write(token);
|
| 18 |
+
* }
|
| 19 |
+
* ```
|
| 20 |
+
*/
|
| 21 |
+
|
| 22 |
+
// ── Types ─────────────────────────────────────────────────────────────────────
|
| 23 |
+
|
| 24 |
+
export interface KerdosRAGClientOptions {
|
| 25 |
+
/** Base URL of the Kerdos RAG REST server, e.g. "http://localhost:8000" */
|
| 26 |
+
baseUrl: string;
|
| 27 |
+
/** Optional API key sent as X-Api-Key header (required if server has API_KEY set). */
|
| 28 |
+
apiKey?: string;
|
| 29 |
+
/** Request timeout in milliseconds. Default: 30 000 */
|
| 30 |
+
timeoutMs?: number;
|
| 31 |
+
}
|
| 32 |
+
|
| 33 |
+
export interface HealthResponse {
|
| 34 |
+
status: "ok";
|
| 35 |
+
version: string;
|
| 36 |
+
}
|
| 37 |
+
|
| 38 |
+
export interface StatusResponse {
|
| 39 |
+
indexed_sources: string[];
|
| 40 |
+
chunk_count: number;
|
| 41 |
+
model: string;
|
| 42 |
+
top_k: number;
|
| 43 |
+
min_score: number;
|
| 44 |
+
}
|
| 45 |
+
|
| 46 |
+
export interface IndexResult {
|
| 47 |
+
indexed: string[];
|
| 48 |
+
skipped: string[];
|
| 49 |
+
chunk_count: number;
|
| 50 |
+
}
|
| 51 |
+
|
| 52 |
+
export interface ChatMessage {
|
| 53 |
+
role: "user" | "assistant";
|
| 54 |
+
content: string;
|
| 55 |
+
}
|
| 56 |
+
|
| 57 |
+
export interface ChatOptions {
|
| 58 |
+
/** Prior conversation turns. */
|
| 59 |
+
history?: ChatMessage[];
|
| 60 |
+
/** Override top-K for this request only. */
|
| 61 |
+
topK?: number;
|
| 62 |
+
}
|
| 63 |
+
|
| 64 |
+
// ── Client ────────────────────────────────────────────────────────────────────
|
| 65 |
+
|
| 66 |
+
export class KerdosRAGClient {
|
| 67 |
+
private readonly baseUrl: string;
|
| 68 |
+
private readonly headers: Record<string, string>;
|
| 69 |
+
private readonly timeoutMs: number;
|
| 70 |
+
|
| 71 |
+
constructor(options: KerdosRAGClientOptions) {
|
| 72 |
+
this.baseUrl = options.baseUrl.replace(/\/$/, ""); // strip trailing slash
|
| 73 |
+
this.timeoutMs = options.timeoutMs ?? 30_000;
|
| 74 |
+
this.headers = {
|
| 75 |
+
...(options.apiKey ? { "X-Api-Key": options.apiKey } : {}),
|
| 76 |
+
};
|
| 77 |
+
}
|
| 78 |
+
|
| 79 |
+
// ── Internal helpers ────────────────────────────────────────────────────────
|
| 80 |
+
|
| 81 |
+
private url(path: string): string {
|
| 82 |
+
return `${this.baseUrl}${path}`;
|
| 83 |
+
}
|
| 84 |
+
|
| 85 |
+
private async fetchJSON<T>(path: string, init?: RequestInit): Promise<T> {
|
| 86 |
+
const controller = new AbortController();
|
| 87 |
+
const timer = setTimeout(() => controller.abort(), this.timeoutMs);
|
| 88 |
+
|
| 89 |
+
try {
|
| 90 |
+
const res = await fetch(this.url(path), {
|
| 91 |
+
...init,
|
| 92 |
+
headers: { ...this.headers, ...(init?.headers ?? {}) },
|
| 93 |
+
signal: controller.signal,
|
| 94 |
+
});
|
| 95 |
+
|
| 96 |
+
if (!res.ok) {
|
| 97 |
+
const text = await res.text().catch(() => res.statusText);
|
| 98 |
+
throw new KerdosAPIError(res.status, text);
|
| 99 |
+
}
|
| 100 |
+
|
| 101 |
+
return res.json() as Promise<T>;
|
| 102 |
+
} finally {
|
| 103 |
+
clearTimeout(timer);
|
| 104 |
+
}
|
| 105 |
+
}
|
| 106 |
+
|
| 107 |
+
// ── Public API ──────────────────────────────────────────────────────────────
|
| 108 |
+
|
| 109 |
+
/**
|
| 110 |
+
* Check server liveness.
|
| 111 |
+
*
|
| 112 |
+
* @returns `{ status: "ok", version: "0.1.0" }`
|
| 113 |
+
*/
|
| 114 |
+
async health(): Promise<HealthResponse> {
|
| 115 |
+
return this.fetchJSON<HealthResponse>("/health");
|
| 116 |
+
}
|
| 117 |
+
|
| 118 |
+
/**
|
| 119 |
+
* Get current knowledge-base metadata.
|
| 120 |
+
*/
|
| 121 |
+
async status(): Promise<StatusResponse> {
|
| 122 |
+
return this.fetchJSON<StatusResponse>("/status");
|
| 123 |
+
}
|
| 124 |
+
|
| 125 |
+
/**
|
| 126 |
+
* Upload and index one or more {@link File} objects.
|
| 127 |
+
*
|
| 128 |
+
* Works in both **browser** (`File` from `<input type="file">`) and
|
| 129 |
+
* **Node.js ≥ 18** (`File` from the `buffer` module or `Blob`).
|
| 130 |
+
*
|
| 131 |
+
* @param files Array of File / Blob objects.
|
| 132 |
+
* @returns Summary of what was indexed and skipped.
|
| 133 |
+
*/
|
| 134 |
+
async indexFiles(files: File[] | Blob[]): Promise<IndexResult> {
|
| 135 |
+
const form = new FormData();
|
| 136 |
+
files.forEach((f) => form.append("files", f));
|
| 137 |
+
|
| 138 |
+
return this.fetchJSON<IndexResult>("/index", {
|
| 139 |
+
method: "POST",
|
| 140 |
+
body: form,
|
| 141 |
+
});
|
| 142 |
+
}
|
| 143 |
+
|
| 144 |
+
/**
|
| 145 |
+
* Ask a question and receive a streamed answer via an async iterator.
|
| 146 |
+
*
|
| 147 |
+
* Internally consumes the Server-Sent Events stream from `POST /chat`.
|
| 148 |
+
*
|
| 149 |
+
* @param query The user's question.
|
| 150 |
+
* @param options Optional history and top-K override.
|
| 151 |
+
*
|
| 152 |
+
* @yields Each partial answer string as tokens arrive.
|
| 153 |
+
*
|
| 154 |
+
* @example
|
| 155 |
+
* ```ts
|
| 156 |
+
* let answer = "";
|
| 157 |
+
* for await (const token of client.chat("Summarize the contract.")) {
|
| 158 |
+
* answer = token; // token is the full answer so far (progressive)
|
| 159 |
+
* renderUI(answer);
|
| 160 |
+
* }
|
| 161 |
+
* ```
|
| 162 |
+
*/
|
| 163 |
+
async *chat(query: string, options: ChatOptions = {}): AsyncGenerator<string> {
|
| 164 |
+
const body = JSON.stringify({
|
| 165 |
+
query,
|
| 166 |
+
history: options.history ?? null,
|
| 167 |
+
top_k: options.topK ?? null,
|
| 168 |
+
});
|
| 169 |
+
|
| 170 |
+
const controller = new AbortController();
|
| 171 |
+
|
| 172 |
+
const res = await fetch(this.url("/chat"), {
|
| 173 |
+
method: "POST",
|
| 174 |
+
headers: {
|
| 175 |
+
...this.headers,
|
| 176 |
+
"Content-Type": "application/json",
|
| 177 |
+
"Accept": "text/event-stream",
|
| 178 |
+
},
|
| 179 |
+
body,
|
| 180 |
+
signal: controller.signal,
|
| 181 |
+
});
|
| 182 |
+
|
| 183 |
+
if (!res.ok) {
|
| 184 |
+
const text = await res.text().catch(() => res.statusText);
|
| 185 |
+
throw new KerdosAPIError(res.status, text);
|
| 186 |
+
}
|
| 187 |
+
|
| 188 |
+
if (!res.body) throw new Error("Response body is null — SSE stream unavailable.");
|
| 189 |
+
|
| 190 |
+
const reader = res.body.getReader();
|
| 191 |
+
const decoder = new TextDecoder();
|
| 192 |
+
let buffer = "";
|
| 193 |
+
|
| 194 |
+
try {
|
| 195 |
+
while (true) {
|
| 196 |
+
const { done, value } = await reader.read();
|
| 197 |
+
if (done) break;
|
| 198 |
+
|
| 199 |
+
buffer += decoder.decode(value, { stream: true });
|
| 200 |
+
const lines = buffer.split("\n");
|
| 201 |
+
buffer = lines.pop() ?? ""; // retain incomplete last line
|
| 202 |
+
|
| 203 |
+
for (const line of lines) {
|
| 204 |
+
if (!line.startsWith("data: ")) continue;
|
| 205 |
+
const data = line.slice(6).trim();
|
| 206 |
+
if (data === "[DONE]") return;
|
| 207 |
+
// Un-escape newlines escaped by the server
|
| 208 |
+
yield data.replace(/\\n/g, "\n");
|
| 209 |
+
}
|
| 210 |
+
}
|
| 211 |
+
} finally {
|
| 212 |
+
reader.releaseLock();
|
| 213 |
+
}
|
| 214 |
+
}
|
| 215 |
+
|
| 216 |
+
/**
|
| 217 |
+
* Clear the entire knowledge base on the server.
|
| 218 |
+
*/
|
| 219 |
+
async reset(): Promise<{ ok: boolean; message: string }> {
|
| 220 |
+
return this.fetchJSON("/reset", { method: "DELETE" });
|
| 221 |
+
}
|
| 222 |
+
}
|
| 223 |
+
|
| 224 |
+
// ── Error type ────────────────────────────────────────────────────────────────
|
| 225 |
+
|
| 226 |
+
export class KerdosAPIError extends Error {
|
| 227 |
+
constructor(
|
| 228 |
+
public readonly statusCode: number,
|
| 229 |
+
message: string,
|
| 230 |
+
) {
|
| 231 |
+
super(`KerdosRAG API error ${statusCode}: ${message}`);
|
| 232 |
+
this.name = "KerdosAPIError";
|
| 233 |
+
}
|
| 234 |
+
}
|
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"compilerOptions": {
|
| 3 |
+
"target": "ES2020",
|
| 4 |
+
"module": "ESNext",
|
| 5 |
+
"moduleResolution": "bundler",
|
| 6 |
+
"lib": ["ES2020", "DOM"],
|
| 7 |
+
"outDir": "./dist",
|
| 8 |
+
"declaration": true,
|
| 9 |
+
"declarationMap": true,
|
| 10 |
+
"sourceMap": true,
|
| 11 |
+
"strict": true,
|
| 12 |
+
"esModuleInterop": true,
|
| 13 |
+
"skipLibCheck": true,
|
| 14 |
+
"forceConsistentCasingInFileNames": true
|
| 15 |
+
},
|
| 16 |
+
"include": ["src/**/*.ts"],
|
| 17 |
+
"exclude": ["node_modules", "dist"]
|
| 18 |
+
}
|
|
@@ -0,0 +1,110 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
tests/test_api.py
|
| 3 |
+
FastAPI endpoint tests using httpx + Starlette TestClient.
|
| 4 |
+
No HF token or real LLM calls are needed.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import os
|
| 8 |
+
import sys
|
| 9 |
+
import pytest
|
| 10 |
+
|
| 11 |
+
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
@pytest.fixture(autouse=True)
|
| 15 |
+
def reset_engine():
|
| 16 |
+
"""Reset the server's singleton engine before each test."""
|
| 17 |
+
from kerdos_rag.server import _engine
|
| 18 |
+
_engine.reset()
|
| 19 |
+
yield
|
| 20 |
+
_engine.reset()
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
@pytest.fixture
|
| 24 |
+
def client():
|
| 25 |
+
from fastapi.testclient import TestClient
|
| 26 |
+
from kerdos_rag.server import app
|
| 27 |
+
return TestClient(app)
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
# ── /health ───────────────────────────────────────────────────────────────────
|
| 31 |
+
|
| 32 |
+
def test_health(client):
|
| 33 |
+
r = client.get("/health")
|
| 34 |
+
assert r.status_code == 200
|
| 35 |
+
assert r.json()["status"] == "ok"
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
# ── /status ───────────────────────────────────────────────────────────────────
|
| 39 |
+
|
| 40 |
+
def test_status_empty(client):
|
| 41 |
+
r = client.get("/status")
|
| 42 |
+
assert r.status_code == 200
|
| 43 |
+
data = r.json()
|
| 44 |
+
assert data["chunk_count"] == 0
|
| 45 |
+
assert data["indexed_sources"] == []
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
# ── /index ────────────────────────────────────────────────────────────────────
|
| 49 |
+
|
| 50 |
+
def test_index_txt_file(client, tmp_path):
|
| 51 |
+
doc = tmp_path / "info.txt"
|
| 52 |
+
doc.write_text("The return policy allows 30-day refunds.", encoding="utf-8")
|
| 53 |
+
|
| 54 |
+
with open(doc, "rb") as f:
|
| 55 |
+
r = client.post("/index", files={"files": ("info.txt", f, "text/plain")})
|
| 56 |
+
|
| 57 |
+
assert r.status_code == 200
|
| 58 |
+
body = r.json()
|
| 59 |
+
assert "info.txt" in body["indexed"]
|
| 60 |
+
assert body["chunk_count"] > 0
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
def test_index_reflects_in_status(client, tmp_path):
|
| 64 |
+
doc = tmp_path / "data.txt"
|
| 65 |
+
doc.write_text("Important enterprise data.", encoding="utf-8")
|
| 66 |
+
|
| 67 |
+
with open(doc, "rb") as f:
|
| 68 |
+
client.post("/index", files={"files": ("data.txt", f, "text/plain")})
|
| 69 |
+
|
| 70 |
+
status = client.get("/status").json()
|
| 71 |
+
assert "data.txt" in status["indexed_sources"]
|
| 72 |
+
assert status["chunk_count"] > 0
|
| 73 |
+
|
| 74 |
+
|
| 75 |
+
def test_index_skips_duplicate(client, tmp_path):
|
| 76 |
+
doc = tmp_path / "dup.txt"
|
| 77 |
+
doc.write_text("Some content.", encoding="utf-8")
|
| 78 |
+
|
| 79 |
+
with open(doc, "rb") as f:
|
| 80 |
+
client.post("/index", files={"files": ("dup.txt", f, "text/plain")})
|
| 81 |
+
with open(doc, "rb") as f:
|
| 82 |
+
r = client.post("/index", files={"files": ("dup.txt", f, "text/plain")})
|
| 83 |
+
|
| 84 |
+
body = r.json()
|
| 85 |
+
assert "dup.txt" in body["skipped"]
|
| 86 |
+
assert body["indexed"] == []
|
| 87 |
+
|
| 88 |
+
|
| 89 |
+
# ── /chat ─────────────────────────────────────────────────────────────────────
|
| 90 |
+
|
| 91 |
+
def test_chat_422_when_empty(client):
|
| 92 |
+
r = client.post("/chat", json={"query": "What is the policy?"})
|
| 93 |
+
assert r.status_code == 422
|
| 94 |
+
|
| 95 |
+
|
| 96 |
+
# ── /reset ────────────────────────────────────────────────────────────────────
|
| 97 |
+
|
| 98 |
+
def test_reset(client, tmp_path):
|
| 99 |
+
doc = tmp_path / "file.txt"
|
| 100 |
+
doc.write_text("Some data.", encoding="utf-8")
|
| 101 |
+
|
| 102 |
+
with open(doc, "rb") as f:
|
| 103 |
+
client.post("/index", files={"files": ("file.txt", f, "text/plain")})
|
| 104 |
+
|
| 105 |
+
assert client.get("/status").json()["chunk_count"] > 0
|
| 106 |
+
|
| 107 |
+
r = client.delete("/reset")
|
| 108 |
+
assert r.status_code == 200
|
| 109 |
+
assert r.json()["ok"] is True
|
| 110 |
+
assert client.get("/status").json()["chunk_count"] == 0
|
|
@@ -0,0 +1,108 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
tests/test_core.py
|
| 3 |
+
Unit tests for the KerdosRAG public API (no HF token required).
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import os
|
| 7 |
+
import sys
|
| 8 |
+
import tempfile
|
| 9 |
+
|
| 10 |
+
import pytest
|
| 11 |
+
|
| 12 |
+
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
|
| 13 |
+
|
| 14 |
+
from kerdos_rag import KerdosRAG
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
# ── Fixtures ──────────────────────────────────────────────────────────────────
|
| 18 |
+
|
| 19 |
+
@pytest.fixture
|
| 20 |
+
def engine():
|
| 21 |
+
"""A fresh KerdosRAG instance for each test."""
|
| 22 |
+
return KerdosRAG(hf_token="hf_dummy") # token won't be used in indexing tests
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
@pytest.fixture
|
| 26 |
+
def indexed_engine(tmp_path):
|
| 27 |
+
"""Engine with one plain-text document already indexed."""
|
| 28 |
+
doc = tmp_path / "policy.txt"
|
| 29 |
+
doc.write_text(
|
| 30 |
+
"The refund policy allows returns within 30 days of purchase. "
|
| 31 |
+
"Contact support at support@example.com for assistance.",
|
| 32 |
+
encoding="utf-8",
|
| 33 |
+
)
|
| 34 |
+
eng = KerdosRAG(hf_token="hf_dummy")
|
| 35 |
+
eng.index([str(doc)])
|
| 36 |
+
return eng
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
# ── Tests ─────────────────────────────────────────────────────────────────────
|
| 40 |
+
|
| 41 |
+
def test_initial_state(engine):
|
| 42 |
+
assert engine.is_ready is False
|
| 43 |
+
assert engine.chunk_count == 0
|
| 44 |
+
assert engine.indexed_sources == set()
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
def test_index_returns_correct_metadata(indexed_engine):
|
| 48 |
+
assert indexed_engine.is_ready
|
| 49 |
+
assert indexed_engine.chunk_count > 0
|
| 50 |
+
assert "policy.txt" in indexed_engine.indexed_sources
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
def test_index_skips_duplicates(indexed_engine, tmp_path):
|
| 54 |
+
doc = tmp_path / "policy.txt"
|
| 55 |
+
doc.write_text("Some extra content.", encoding="utf-8")
|
| 56 |
+
|
| 57 |
+
result = indexed_engine.index([str(doc)])
|
| 58 |
+
assert "policy.txt" in result["skipped"]
|
| 59 |
+
assert "policy.txt" not in result["indexed"]
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
def test_index_multiple_files(engine, tmp_path):
|
| 63 |
+
(tmp_path / "a.txt").write_text("Alpha content here.", encoding="utf-8")
|
| 64 |
+
(tmp_path / "b.txt").write_text("Beta content here.", encoding="utf-8")
|
| 65 |
+
|
| 66 |
+
result = engine.index([str(tmp_path / "a.txt"), str(tmp_path / "b.txt")])
|
| 67 |
+
assert len(result["indexed"]) == 2
|
| 68 |
+
assert result["chunk_count"] > 0
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
def test_reset_clears_index(indexed_engine):
|
| 72 |
+
assert indexed_engine.is_ready
|
| 73 |
+
indexed_engine.reset()
|
| 74 |
+
assert not indexed_engine.is_ready
|
| 75 |
+
assert indexed_engine.chunk_count == 0
|
| 76 |
+
assert indexed_engine.indexed_sources == set()
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
def test_chat_raises_when_not_indexed(engine):
|
| 80 |
+
with pytest.raises(RuntimeError, match="No documents indexed"):
|
| 81 |
+
list(engine.chat("What is the policy?"))
|
| 82 |
+
|
| 83 |
+
|
| 84 |
+
def test_chat_raises_without_token(tmp_path):
|
| 85 |
+
doc = tmp_path / "doc.txt"
|
| 86 |
+
doc.write_text("Hello world.", encoding="utf-8")
|
| 87 |
+
eng = KerdosRAG(hf_token="")
|
| 88 |
+
eng.index([str(doc)])
|
| 89 |
+
with pytest.raises(ValueError, match="No Hugging Face token"):
|
| 90 |
+
list(eng.chat("What does it say?"))
|
| 91 |
+
|
| 92 |
+
|
| 93 |
+
def test_save_and_load(indexed_engine, tmp_path):
|
| 94 |
+
save_dir = tmp_path / "saved_index"
|
| 95 |
+
indexed_engine.save(str(save_dir))
|
| 96 |
+
|
| 97 |
+
assert (save_dir / "kerdos_index.faiss").exists()
|
| 98 |
+
assert (save_dir / "kerdos_meta.pkl").exists()
|
| 99 |
+
|
| 100 |
+
restored = KerdosRAG.load(str(save_dir), hf_token="hf_dummy")
|
| 101 |
+
assert restored.is_ready
|
| 102 |
+
assert restored.chunk_count == indexed_engine.chunk_count
|
| 103 |
+
assert restored.indexed_sources == indexed_engine.indexed_sources
|
| 104 |
+
|
| 105 |
+
|
| 106 |
+
def test_save_raises_when_empty(engine, tmp_path):
|
| 107 |
+
with pytest.raises(RuntimeError, match="Nothing to save"):
|
| 108 |
+
engine.save(str(tmp_path / "empty"))
|