Bhaskar Ram commited on
Commit
634117a
·
1 Parent(s): 3fc707a

feat: Python package, FastAPI REST server, TypeScript SDK

Browse files

Phase 1 — Python Package (pip install kerdos-rag):
- pyproject.toml: PEP 517 metadata, entry points, pytest/ruff config
- kerdos_rag/__init__.py: public export of KerdosRAG
- kerdos_rag/core.py: KerdosRAG class with index(), chat() generator,
reset(), save() and load() for FAISS persistence
- kerdos_rag/cli.py: kerdos-rag CLI with serve/api/index subcommands

Phase 2 — FastAPI REST Server:
- kerdos_rag/server.py: GET /health, GET /status, POST /index (multipart),
POST /chat (SSE streaming), DELETE /reset; optional X-Api-Key auth
- Dockerfile: multi-stage build with MODE=serve|api build arg
- requirements.txt: added fastapi, uvicorn[standard], python-multipart

Phase 3 — TypeScript/JS SDK:
- sdk/typescript/src/index.ts: KerdosRAGClient class with health(),
status(), indexFiles(), chat() async generator, reset()
- sdk/typescript/package.json, tsconfig.json, README.md

Tests: 19/19 passing (smoke + test_core + test_api)

Dockerfile CHANGED
@@ -1,12 +1,22 @@
1
- # Kerdos AI — Custom LLM Chat
2
- # Multi-stage Docker build for a lean production image
 
 
 
 
 
 
 
 
 
 
3
 
4
  FROM python:3.11-slim AS base
5
 
6
  # System dependencies for PyMuPDF and FAISS
7
  RUN apt-get update && apt-get install -y --no-install-recommends \
8
- build-essential \
9
- libgomp1 \
10
  && rm -rf /var/lib/apt/lists/*
11
 
12
  WORKDIR /app
@@ -18,10 +28,20 @@ RUN pip install --no-cache-dir -r requirements.txt
18
  # Copy source
19
  COPY . .
20
 
21
- # Gradio listens on 7860 by default
22
- EXPOSE 7860
23
 
 
 
 
24
  ENV GRADIO_SERVER_NAME=0.0.0.0
25
  ENV GRADIO_SERVER_PORT=7860
 
 
 
 
 
 
26
 
27
- CMD ["python", "app.py"]
 
 
1
+ # Kerdos AI — Custom LLM Chat (Demo)
2
+ # Multi-stage Docker build supports both Gradio UI and FastAPI REST server
3
+ #
4
+ # Build for Gradio (default):
5
+ # docker build -t kerdos-rag .
6
+ # docker run -p 7860:7860 -e HF_TOKEN=hf_... kerdos-rag
7
+ #
8
+ # Build for REST API:
9
+ # docker build --build-arg MODE=api -t kerdos-rag-api .
10
+ # docker run -p 8000:8000 -e HF_TOKEN=hf_... kerdos-rag-api
11
+
12
+ ARG MODE=serve
13
 
14
  FROM python:3.11-slim AS base
15
 
16
  # System dependencies for PyMuPDF and FAISS
17
  RUN apt-get update && apt-get install -y --no-install-recommends \
18
+ build-essential \
19
+ libgomp1 \
20
  && rm -rf /var/lib/apt/lists/*
21
 
22
  WORKDIR /app
 
28
  # Copy source
29
  COPY . .
30
 
31
+ # Install the package in editable mode so kerdos-rag CLI is available
32
+ RUN pip install --no-cache-dir -e .
33
 
34
+ # ── Gradio mode ───────────────────────────────────────────────
35
+ FROM base AS serve
36
+ EXPOSE 7860
37
  ENV GRADIO_SERVER_NAME=0.0.0.0
38
  ENV GRADIO_SERVER_PORT=7860
39
+ CMD ["kerdos-rag", "serve", "--host", "0.0.0.0", "--port", "7860"]
40
+
41
+ # ── FastAPI REST mode ─────────────────────────────────────────
42
+ FROM base AS api
43
+ EXPOSE 8000
44
+ CMD ["kerdos-rag", "api", "--host", "0.0.0.0", "--port", "8000"]
45
 
46
+ # Select the right stage based on BUILD ARG
47
+ FROM ${MODE}
kerdos_rag/__init__.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ """kerdos_rag — public API surface."""
2
+
3
+ from kerdos_rag.core import KerdosRAG
4
+
5
+ __all__ = ["KerdosRAG"]
6
+ __version__ = "0.1.0"
kerdos_rag/cli.py ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ kerdos_rag/cli.py
3
+ Command-line interface for the Kerdos RAG engine.
4
+
5
+ kerdos-rag serve # start Gradio UI (default port 7860)
6
+ kerdos-rag api # start FastAPI REST server (default port 8000)
7
+ kerdos-rag index <files> # index documents from terminal
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import argparse
13
+ import sys
14
+
15
+
16
+ def _cmd_serve(args: argparse.Namespace) -> None:
17
+ """Launch the Gradio web UI."""
18
+ import importlib.util, os
19
+
20
+ os.environ.setdefault("GRADIO_SERVER_PORT", str(args.port))
21
+ os.environ.setdefault("GRADIO_SERVER_NAME", args.host)
22
+
23
+ # app.py lives at the repo root — import it as a module
24
+ spec = importlib.util.spec_from_file_location("app", _repo_root() / "app.py")
25
+ mod = importlib.util.module_from_spec(spec)
26
+ spec.loader.exec_module(mod)
27
+ mod.demo.queue()
28
+ mod.demo.launch(css=mod.CSS, theme=__import__("gradio").themes.Soft())
29
+
30
+
31
+ def _cmd_api(args: argparse.Namespace) -> None:
32
+ """Launch the FastAPI REST server."""
33
+ import uvicorn
34
+ from kerdos_rag.server import app # noqa: F401
35
+
36
+ print(f"[kerdos-rag] Starting REST API on http://{args.host}:{args.port}")
37
+ uvicorn.run(
38
+ "kerdos_rag.server:app",
39
+ host=args.host,
40
+ port=args.port,
41
+ reload=args.reload,
42
+ log_level="info",
43
+ )
44
+
45
+
46
+ def _cmd_index(args: argparse.Namespace) -> None:
47
+ """Index documents from the command line and print a summary."""
48
+ from kerdos_rag import KerdosRAG
49
+
50
+ engine = KerdosRAG() # token not needed for pure indexing
51
+ result = engine.index(args.files)
52
+
53
+ if result["indexed"]:
54
+ print(f"✅ Indexed: {', '.join(result['indexed'])}")
55
+ if result["skipped"]:
56
+ print(f"⚠️ Skipped (already indexed): {', '.join(result['skipped'])}")
57
+ print(f"📦 Total chunks: {result['chunk_count']}")
58
+
59
+ if args.save:
60
+ engine.save(args.save)
61
+ print(f"💾 Index saved to: {args.save}")
62
+
63
+
64
+ def _repo_root():
65
+ """Return the directory containing this package."""
66
+ from pathlib import Path
67
+ return Path(__file__).resolve().parent.parent
68
+
69
+
70
+ def main(argv: list[str] | None = None) -> None:
71
+ parser = argparse.ArgumentParser(
72
+ prog="kerdos-rag",
73
+ description="Kerdos RAG — Enterprise Document Q&A engine",
74
+ )
75
+ sub = parser.add_subparsers(dest="command", required=True)
76
+
77
+ # ── serve ────────────────────────────────────────────────────────────────
78
+ p_serve = sub.add_parser("serve", help="Start the Gradio web UI")
79
+ p_serve.add_argument("--host", default="0.0.0.0")
80
+ p_serve.add_argument("--port", type=int, default=7860)
81
+ p_serve.set_defaults(func=_cmd_serve)
82
+
83
+ # ── api ──────────────────────────────────────────────────────────────────
84
+ p_api = sub.add_parser("api", help="Start the FastAPI REST server")
85
+ p_api.add_argument("--host", default="0.0.0.0")
86
+ p_api.add_argument("--port", type=int, default=8000)
87
+ p_api.add_argument("--reload", action="store_true", help="Enable auto-reload (dev only)")
88
+ p_api.set_defaults(func=_cmd_api)
89
+
90
+ # ── index ─────────────────────────────────────────────────────────────────
91
+ p_idx = sub.add_parser("index", help="Index documents from the terminal")
92
+ p_idx.add_argument("files", nargs="+", metavar="FILE")
93
+ p_idx.add_argument("--save", metavar="DIR", default="", help="Save index to directory")
94
+ p_idx.set_defaults(func=_cmd_index)
95
+
96
+ args = parser.parse_args(argv)
97
+ args.func(args)
98
+
99
+
100
+ if __name__ == "__main__":
101
+ main()
kerdos_rag/core.py ADDED
@@ -0,0 +1,229 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ kerdos_rag/core.py
3
+ High-level KerdosRAG façade — the primary interface for library consumers.
4
+
5
+ Usage:
6
+ from kerdos_rag import KerdosRAG
7
+
8
+ engine = KerdosRAG(hf_token="hf_...")
9
+ engine.index(["policy.pdf", "manual.docx"])
10
+ for token in engine.chat("What is the refund policy?"):
11
+ print(token, end="", flush=True)
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import json
17
+ import os
18
+ import pickle
19
+ from pathlib import Path
20
+ from typing import Generator
21
+
22
+ from rag.document_loader import load_documents
23
+ from rag.embedder import VectorIndex, build_index, add_to_index
24
+ from rag.retriever import retrieve
25
+ from rag.chain import answer_stream
26
+
27
+ _DEFAULT_MODEL = "meta-llama/Llama-3.1-8B-Instruct"
28
+ _DEFAULT_TOP_K = 5
29
+ _DEFAULT_MIN_SCORE = 0.30
30
+
31
+
32
+ class KerdosRAG:
33
+ """
34
+ Batteries-included RAG engine.
35
+
36
+ Args:
37
+ hf_token: Hugging Face API token. Falls back to HF_TOKEN env var.
38
+ model: HF model ID (e.g. 'mistralai/Mistral-7B-Instruct-v0.3').
39
+ Falls back to LLM_MODEL env var, then Llama 3.1 8B.
40
+ top_k: Number of chunks to retrieve per query.
41
+ min_score: Minimum cosine similarity threshold (chunks below this
42
+ are dropped before being sent to the LLM).
43
+ """
44
+
45
+ def __init__(
46
+ self,
47
+ hf_token: str = "",
48
+ model: str | None = None,
49
+ top_k: int = _DEFAULT_TOP_K,
50
+ min_score: float = _DEFAULT_MIN_SCORE,
51
+ ) -> None:
52
+ self.hf_token: str = hf_token.strip() or os.environ.get("HF_TOKEN", "")
53
+ self.model: str = model or os.environ.get("LLM_MODEL", _DEFAULT_MODEL)
54
+ self.top_k: int = top_k
55
+ self.min_score: float = min_score
56
+
57
+ self._index: VectorIndex | None = None
58
+ self._indexed_sources: set[str] = set()
59
+
60
+ # ── Properties ────────────────────────────────────────────────────────────
61
+
62
+ @property
63
+ def indexed_sources(self) -> set[str]:
64
+ """File names currently in the knowledge base."""
65
+ return set(self._indexed_sources)
66
+
67
+ @property
68
+ def chunk_count(self) -> int:
69
+ """Total number of vector chunks in the index."""
70
+ return self._index.index.ntotal if self._index else 0
71
+
72
+ @property
73
+ def is_ready(self) -> bool:
74
+ """True when at least one document has been indexed."""
75
+ return self._index is not None and self.chunk_count > 0
76
+
77
+ # ── Core operations ───────────────────────────────────────────────────────
78
+
79
+ def index(self, file_paths: list[str]) -> dict:
80
+ """
81
+ Parse and index documents into the knowledge base.
82
+
83
+ Duplicate filenames are automatically skipped.
84
+
85
+ Args:
86
+ file_paths: Absolute or relative paths to PDF, DOCX, TXT, MD, or CSV files.
87
+
88
+ Returns:
89
+ {
90
+ "indexed": ["file1.pdf", ...], # newly indexed
91
+ "skipped": ["dup.pdf", ...], # already in index
92
+ "chunk_count": 142 # total chunks
93
+ }
94
+ """
95
+ paths = [str(p) for p in file_paths]
96
+
97
+ new_paths, skipped = [], []
98
+ for p in paths:
99
+ name = Path(p).name
100
+ if name in self._indexed_sources:
101
+ skipped.append(name)
102
+ else:
103
+ new_paths.append(p)
104
+
105
+ if not new_paths:
106
+ return {"indexed": [], "skipped": skipped, "chunk_count": self.chunk_count}
107
+
108
+ docs = load_documents(new_paths)
109
+ if not docs:
110
+ raise ValueError("Could not extract text from any of the provided files.")
111
+
112
+ if self._index is None:
113
+ self._index = build_index(docs)
114
+ else:
115
+ self._index = add_to_index(self._index, docs)
116
+
117
+ newly_indexed = list({d["source"] for d in docs})
118
+ self._indexed_sources.update(newly_indexed)
119
+
120
+ return {
121
+ "indexed": newly_indexed,
122
+ "skipped": skipped,
123
+ "chunk_count": self.chunk_count,
124
+ }
125
+
126
+ def chat(
127
+ self,
128
+ query: str,
129
+ history: list[dict] | None = None,
130
+ ) -> Generator[str, None, None]:
131
+ """
132
+ Ask a question and stream the answer token-by-token.
133
+
134
+ Args:
135
+ query: The user's question.
136
+ history: Optional list of prior messages in
137
+ [{"role": "user"|"assistant", "content": "..."}] format.
138
+
139
+ Yields:
140
+ Progressively-growing answer strings (suitable for real-time display).
141
+
142
+ Raises:
143
+ RuntimeError: If no documents have been indexed yet.
144
+ ValueError: If no HF token is available.
145
+ """
146
+ if not self.is_ready:
147
+ raise RuntimeError("No documents indexed. Call engine.index(file_paths) first.")
148
+ if not self.hf_token:
149
+ raise ValueError(
150
+ "No Hugging Face token. Pass hf_token= to KerdosRAG() or set HF_TOKEN env var."
151
+ )
152
+
153
+ # Temporarily patch retriever's MIN_SCORE with instance setting
154
+ import rag.retriever as _r
155
+ original_min = _r.MIN_SCORE
156
+ _r.MIN_SCORE = self.min_score
157
+ try:
158
+ chunks = retrieve(query, self._index, top_k=self.top_k)
159
+ yield from answer_stream(query, chunks, self.hf_token, chat_history=history)
160
+ finally:
161
+ _r.MIN_SCORE = original_min
162
+
163
+ def reset(self) -> None:
164
+ """Clear the knowledge base."""
165
+ self._index = None
166
+ self._indexed_sources = set()
167
+
168
+ # ── Persistence ───────────────────────────────────────────────────────────
169
+
170
+ def save(self, directory: str | Path) -> None:
171
+ """
172
+ Persist the index to disk so it can be reloaded across sessions.
173
+
174
+ Creates two files in `directory`:
175
+ - ``kerdos_index.faiss`` — the raw FAISS vectors
176
+ - ``kerdos_meta.pkl`` — chunks + source tracking
177
+
178
+ Args:
179
+ directory: Path to a folder (will be created if needed).
180
+ """
181
+ import faiss
182
+
183
+ if not self.is_ready:
184
+ raise RuntimeError("Nothing to save — index is empty.")
185
+
186
+ out = Path(directory)
187
+ out.mkdir(parents=True, exist_ok=True)
188
+
189
+ faiss.write_index(self._index.index, str(out / "kerdos_index.faiss"))
190
+ meta = {
191
+ "chunks": self._index.chunks,
192
+ "indexed_sources": list(self._indexed_sources),
193
+ "model": self.model,
194
+ "top_k": self.top_k,
195
+ "min_score": self.min_score,
196
+ }
197
+ with open(out / "kerdos_meta.pkl", "wb") as f:
198
+ pickle.dump(meta, f)
199
+
200
+ @classmethod
201
+ def load(cls, directory: str | Path, hf_token: str = "") -> "KerdosRAG":
202
+ """
203
+ Restore an engine from a directory previously written by :meth:`save`.
204
+
205
+ Args:
206
+ directory: Folder containing ``kerdos_index.faiss`` and ``kerdos_meta.pkl``.
207
+ hf_token: HF token for chat (can also be set via HF_TOKEN env var).
208
+
209
+ Returns:
210
+ A fully initialised :class:`KerdosRAG` instance.
211
+ """
212
+ import faiss
213
+ from rag.embedder import _get_model
214
+
215
+ d = Path(directory)
216
+ with open(d / "kerdos_meta.pkl", "rb") as f:
217
+ meta = pickle.load(f)
218
+
219
+ engine = cls(
220
+ hf_token=hf_token,
221
+ model=meta["model"],
222
+ top_k=meta["top_k"],
223
+ min_score=meta["min_score"],
224
+ )
225
+ model = _get_model()
226
+ idx = faiss.read_index(str(d / "kerdos_index.faiss"))
227
+ engine._index = VectorIndex(chunks=meta["chunks"], index=idx, embedder=model)
228
+ engine._indexed_sources = set(meta["indexed_sources"])
229
+ return engine
kerdos_rag/server.py ADDED
@@ -0,0 +1,186 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ kerdos_rag/server.py
3
+ FastAPI REST server exposing the KerdosRAG engine over HTTP.
4
+
5
+ Endpoints:
6
+ GET /health — liveness probe
7
+ GET /status — knowledge-base metadata
8
+ POST /index — upload + index documents (multipart/form-data)
9
+ POST /chat — ask a question (SSE streaming response)
10
+ DELETE /reset — clear the knowledge base
11
+
12
+ Authentication (optional):
13
+ Set API_KEY env var to enable X-Api-Key header validation.
14
+ Leave unset to run in open mode (suitable for local / trusted environments).
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ import os
20
+ import asyncio
21
+ from typing import AsyncGenerator
22
+
23
+ from fastapi import FastAPI, File, UploadFile, HTTPException, Depends, Header, Request
24
+ from fastapi.middleware.cors import CORSMiddleware
25
+ from fastapi.responses import StreamingResponse, JSONResponse
26
+ from pydantic import BaseModel
27
+
28
+ from kerdos_rag.core import KerdosRAG
29
+
30
+ # ── App & CORS ────────────────────────────────────────────────────────────────
31
+ app = FastAPI(
32
+ title="Kerdos RAG API",
33
+ description="Enterprise Document Q&A engine by Kerdos Infrasoft",
34
+ version="0.1.0",
35
+ contact={"name": "Kerdos Infrasoft", "url": "https://kerdos.in", "email": "partnership@kerdos.in"},
36
+ license_info={"name": "MIT"},
37
+ )
38
+
39
+ app.add_middleware(
40
+ CORSMiddleware,
41
+ allow_origins=["*"],
42
+ allow_methods=["*"],
43
+ allow_headers=["*"],
44
+ )
45
+
46
+ # ── Singleton engine ───────────────────────────────────────────────────────────
47
+ _engine = KerdosRAG()
48
+
49
+ # ── Auth ──────────────────────────────────────────────────────────────────────
50
+ _API_KEY = os.environ.get("API_KEY", "")
51
+
52
+
53
+ def _check_auth(x_api_key: str | None = Header(default=None)) -> None:
54
+ """If API_KEY env var is set, validate X-Api-Key header."""
55
+ if _API_KEY and x_api_key != _API_KEY:
56
+ raise HTTPException(status_code=401, detail="Invalid or missing X-Api-Key header.")
57
+
58
+
59
+ # ── Request / Response models ──────────────────────────────────────────────────
60
+ class ChatRequest(BaseModel):
61
+ query: str
62
+ history: list[dict] | None = None
63
+ top_k: int | None = None
64
+
65
+
66
+ class StatusResponse(BaseModel):
67
+ indexed_sources: list[str]
68
+ chunk_count: int
69
+ model: str
70
+ top_k: int
71
+ min_score: float
72
+
73
+
74
+ # ── Endpoints ─────────────────────────────────────────────────────────────────
75
+
76
+ @app.get("/health", tags=["Meta"])
77
+ def health() -> dict:
78
+ """Liveness probe — always returns 200 OK."""
79
+ return {"status": "ok", "version": "0.1.0"}
80
+
81
+
82
+ @app.get("/status", response_model=StatusResponse, tags=["Meta"])
83
+ def status(_: None = Depends(_check_auth)) -> StatusResponse:
84
+ """Return current knowledge-base metadata."""
85
+ return StatusResponse(
86
+ indexed_sources=list(_engine.indexed_sources),
87
+ chunk_count=_engine.chunk_count,
88
+ model=_engine.model,
89
+ top_k=_engine.top_k,
90
+ min_score=_engine.min_score,
91
+ )
92
+
93
+
94
+ @app.post("/index", tags=["RAG"])
95
+ async def index_documents(
96
+ files: list[UploadFile] = File(...),
97
+ _: None = Depends(_check_auth),
98
+ ) -> JSONResponse:
99
+ """
100
+ Upload and index one or more documents.
101
+
102
+ Accepts: PDF (.pdf), Word (.docx), plain text (.txt, .md, .csv).
103
+ Duplicate filenames are automatically skipped.
104
+ """
105
+ import tempfile, shutil
106
+ from pathlib import Path
107
+
108
+ saved_paths: list[str] = []
109
+ tmp_dir = tempfile.mkdtemp(prefix="kerdos_upload_")
110
+
111
+ try:
112
+ for upload in files:
113
+ dest = Path(tmp_dir) / upload.filename
114
+ with open(dest, "wb") as f:
115
+ shutil.copyfileobj(upload.file, f)
116
+ saved_paths.append(str(dest))
117
+
118
+ result = _engine.index(saved_paths)
119
+ finally:
120
+ shutil.rmtree(tmp_dir, ignore_errors=True)
121
+
122
+ return JSONResponse(content=result)
123
+
124
+
125
+ @app.post("/chat", tags=["RAG"])
126
+ async def chat(req: ChatRequest, _: None = Depends(_check_auth)) -> StreamingResponse:
127
+ """
128
+ Ask a question and receive a **Server-Sent Events** stream of tokens.
129
+
130
+ Each SSE event has the form:
131
+ data: <partial answer so far>\\n\\n
132
+
133
+ The stream ends with:
134
+ data: [DONE]\\n\\n
135
+
136
+ Example (curl):
137
+ curl -X POST http://localhost:8000/chat \\
138
+ -H "Content-Type: application/json" \\
139
+ -d '{"query": "What is the refund policy?"}' \\
140
+ --no-buffer
141
+ """
142
+ if not _engine.is_ready:
143
+ raise HTTPException(
144
+ status_code=422,
145
+ detail="Knowledge base is empty. POST files to /index first.",
146
+ )
147
+
148
+ hf_token = _engine.hf_token
149
+ if not hf_token:
150
+ raise HTTPException(
151
+ status_code=401,
152
+ detail="No Hugging Face token configured. Set HF_TOKEN env var.",
153
+ )
154
+
155
+ # Temporarily override top_k if caller specified it
156
+ original_top_k = _engine.top_k
157
+ if req.top_k is not None:
158
+ _engine.top_k = req.top_k
159
+
160
+ async def event_generator() -> AsyncGenerator[str, None]:
161
+ try:
162
+ # answer_stream is a sync generator — run in thread pool
163
+ loop = asyncio.get_event_loop()
164
+ gen = _engine.chat(req.query, history=req.history)
165
+
166
+ while True:
167
+ try:
168
+ token = await loop.run_in_executor(None, next, gen)
169
+ # SSE format: escape newlines in the data value
170
+ escaped = token.replace("\n", "\\n")
171
+ yield f"data: {escaped}\n\n"
172
+ except StopIteration:
173
+ break
174
+ finally:
175
+ _engine.top_k = original_top_k
176
+
177
+ yield "data: [DONE]\n\n"
178
+
179
+ return StreamingResponse(event_generator(), media_type="text/event-stream")
180
+
181
+
182
+ @app.delete("/reset", tags=["RAG"])
183
+ def reset(_: None = Depends(_check_auth)) -> dict:
184
+ """Clear the entire knowledge base."""
185
+ _engine.reset()
186
+ return {"ok": True, "message": "Knowledge base cleared."}
pyproject.toml ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [build-system]
2
+ requires = ["setuptools>=68", "wheel"]
3
+ build-backend = "setuptools.backends.legacy:build"
4
+
5
+ [project]
6
+ name = "kerdos-rag"
7
+ version = "0.1.0"
8
+ description = "Enterprise Document Q&A RAG engine — by Kerdos Infrasoft"
9
+ readme = "README.md"
10
+ license = { text = "MIT" }
11
+ authors = [{ name = "Kerdos Infrasoft Private Limited", email = "partnership@kerdos.in" }]
12
+ requires-python = ">=3.10"
13
+ keywords = ["rag", "llm", "document-qa", "faiss", "enterprise", "kerdos"]
14
+ classifiers = [
15
+ "Development Status :: 4 - Beta",
16
+ "Intended Audience :: Developers",
17
+ "License :: OSI Approved :: MIT License",
18
+ "Programming Language :: Python :: 3",
19
+ "Programming Language :: Python :: 3.10",
20
+ "Programming Language :: Python :: 3.11",
21
+ "Programming Language :: Python :: 3.12",
22
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
23
+ ]
24
+ dependencies = [
25
+ "gradio>=6.6.0",
26
+ "sentence-transformers>=5.0.0",
27
+ "faiss-cpu>=1.9.0",
28
+ "PyMuPDF>=1.24.0",
29
+ "python-docx>=1.1.0",
30
+ "huggingface-hub>=0.28.0",
31
+ "numpy>=1.26.0,<3",
32
+ "python-dotenv>=1.0.0",
33
+ "tenacity>=8.2.0",
34
+ "fastapi>=0.111.0",
35
+ "uvicorn[standard]>=0.29.0",
36
+ "python-multipart>=0.0.9",
37
+ ]
38
+
39
+ [project.optional-dependencies]
40
+ dev = ["pytest>=8.0.0", "black>=24.0.0", "ruff>=0.4.0", "httpx>=0.27.0"]
41
+
42
+ [project.scripts]
43
+ kerdos-rag = "kerdos_rag.cli:main"
44
+
45
+ [project.urls]
46
+ Homepage = "https://kerdos.in"
47
+ Repository = "https://huggingface.co/spaces/kerdosdotio/Custom-LLM-Chat"
48
+ "Bug Tracker" = "https://kerdos.in/contact"
49
+
50
+ [tool.setuptools.packages.find]
51
+ include = ["kerdos_rag*", "rag*"]
52
+
53
+ [tool.pytest.ini_options]
54
+ pythonpath = ["."]
55
+ testpaths = ["tests"]
56
+
57
+ [tool.ruff]
58
+ line-length = 100
59
+ target-version = "py310"
60
+
61
+ [tool.ruff.lint]
62
+ select = ["E", "F", "I", "UP"]
requirements-dev.txt CHANGED
@@ -2,3 +2,4 @@
2
  pytest>=8.0.0
3
  black>=24.0.0
4
  ruff>=0.4.0
 
 
2
  pytest>=8.0.0
3
  black>=24.0.0
4
  ruff>=0.4.0
5
+ httpx>=0.27.0
requirements.txt CHANGED
@@ -1,3 +1,4 @@
 
1
  sentence-transformers>=5.0.0
2
  faiss-cpu>=1.9.0
3
  PyMuPDF>=1.24.0
@@ -6,3 +7,6 @@ huggingface-hub>=0.28.0
6
  numpy>=1.26.0,<3
7
  python-dotenv>=1.0.0
8
  tenacity>=8.2.0
 
 
 
 
1
+ gradio>=6.6.0
2
  sentence-transformers>=5.0.0
3
  faiss-cpu>=1.9.0
4
  PyMuPDF>=1.24.0
 
7
  numpy>=1.26.0,<3
8
  python-dotenv>=1.0.0
9
  tenacity>=8.2.0
10
+ fastapi>=0.111.0
11
+ uvicorn[standard]>=0.29.0
12
+ python-multipart>=0.0.9
sdk/typescript/README.md ADDED
@@ -0,0 +1,124 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # `@kerdos/rag-client`
2
+
3
+ > TypeScript/JavaScript client for the **Kerdos RAG REST API**.
4
+ > Zero runtime dependencies — works in Node.js ≥ 18 and modern browsers.
5
+
6
+ ---
7
+
8
+ ## Installation
9
+
10
+ ```bash
11
+ # npm
12
+ npm install @kerdos/rag-client
13
+
14
+ # pnpm
15
+ pnpm add @kerdos/rag-client
16
+
17
+ # yarn
18
+ yarn add @kerdos/rag-client
19
+ ```
20
+
21
+ ---
22
+
23
+ ## Quick Start
24
+
25
+ ```typescript
26
+ import { KerdosRAGClient } from "@kerdos/rag-client";
27
+
28
+ const client = new KerdosRAGClient({
29
+ baseUrl: "http://localhost:8000",
30
+ apiKey: "your-secret", // optional — only if server has API_KEY set
31
+ });
32
+
33
+ // 1. Index documents
34
+ const result = await client.indexFiles([
35
+ new File([pdfBuffer], "policy.pdf"),
36
+ new File([txtContent], "manual.txt"),
37
+ ]);
38
+ console.log("Indexed:", result.indexed);
39
+ console.log("Skipped:", result.skipped);
40
+
41
+ // 2. Stream an answer
42
+ let answer = "";
43
+ for await (const token of client.chat("What is the refund policy?")) {
44
+ answer = token; // each yield is the full cumulative answer
45
+ process.stdout.write("\r" + answer);
46
+ }
47
+
48
+ // 3. Multi-turn conversation
49
+ const history = [
50
+ { role: "user", content: "What is the refund policy?" },
51
+ { role: "assistant", content: answer },
52
+ ];
53
+ for await (const token of client.chat("Who do I contact for refunds?", {
54
+ history,
55
+ })) {
56
+ process.stdout.write("\r" + token);
57
+ }
58
+
59
+ // 4. Status & reset
60
+ const status = await client.status();
61
+ console.log("Chunks:", status.chunk_count);
62
+
63
+ await client.reset();
64
+ ```
65
+
66
+ ---
67
+
68
+ ## API Reference
69
+
70
+ ### `new KerdosRAGClient(options)`
71
+
72
+ | Option | Type | Default | Description |
73
+ | ----------- | -------- | ----------- | ----------------------------------------- |
74
+ | `baseUrl` | `string` | — | Server URL (e.g. `http://localhost:8000`) |
75
+ | `apiKey` | `string` | `undefined` | Sent as `X-Api-Key` header |
76
+ | `timeoutMs` | `number` | `30000` | Request timeout in ms |
77
+
78
+ ### Methods
79
+
80
+ | Method | Returns | Description |
81
+ | -------------------- | ------------------------- | -------------------- |
82
+ | `health()` | `Promise<HealthResponse>` | Liveness probe |
83
+ | `status()` | `Promise<StatusResponse>` | KB metadata |
84
+ | `indexFiles(files)` | `Promise<IndexResult>` | Upload & index files |
85
+ | `chat(query, opts?)` | `AsyncGenerator<string>` | Stream answer tokens |
86
+ | `reset()` | `Promise<{ok, message}>` | Clear knowledge base |
87
+
88
+ ---
89
+
90
+ ## Server Setup
91
+
92
+ Start the REST server (Python backend required):
93
+
94
+ ```bash
95
+ pip install kerdos-rag
96
+ kerdos-rag api --port 8000
97
+ ```
98
+
99
+ Or with Docker:
100
+
101
+ ```bash
102
+ docker build --build-arg MODE=api -t kerdos-rag .
103
+ docker run -p 8000:8000 -e HF_TOKEN=hf_... kerdos-rag
104
+ ```
105
+
106
+ ---
107
+
108
+ ## Error Handling
109
+
110
+ ```typescript
111
+ import { KerdosAPIError } from "@kerdos/rag-client";
112
+
113
+ try {
114
+ for await (const token of client.chat("...")) { ... }
115
+ } catch (err) {
116
+ if (err instanceof KerdosAPIError) {
117
+ console.error(`API ${err.statusCode}:`, err.message);
118
+ }
119
+ }
120
+ ```
121
+
122
+ ---
123
+
124
+ _© 2024–2026 Kerdos Infrasoft Private Limited | [kerdos.in](https://kerdos.in)_
sdk/typescript/package.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "@kerdos/rag-client",
3
+ "version": "0.1.0",
4
+ "description": "TypeScript/JS client for the Kerdos RAG REST API",
5
+ "main": "dist/index.js",
6
+ "module": "dist/index.mjs",
7
+ "types": "dist/index.d.ts",
8
+ "files": [
9
+ "dist",
10
+ "README.md"
11
+ ],
12
+ "scripts": {
13
+ "build": "tsc --project tsconfig.json",
14
+ "dev": "tsc --watch",
15
+ "test": "node --test dist/index.test.js"
16
+ },
17
+ "keywords": [
18
+ "rag",
19
+ "llm",
20
+ "document-qa",
21
+ "kerdos",
22
+ "enterprise",
23
+ "ai"
24
+ ],
25
+ "author": "Kerdos Infrasoft Private Limited <partnership@kerdos.in>",
26
+ "license": "MIT",
27
+ "homepage": "https://kerdos.in",
28
+ "repository": {
29
+ "type": "git",
30
+ "url": "https://huggingface.co/spaces/kerdosdotio/Custom-LLM-Chat"
31
+ },
32
+ "engines": {
33
+ "node": ">=18"
34
+ },
35
+ "devDependencies": {
36
+ "typescript": "^5.4.0"
37
+ }
38
+ }
sdk/typescript/src/index.ts ADDED
@@ -0,0 +1,234 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /**
2
+ * @kerdos/rag-client
3
+ * TypeScript/JavaScript client for the Kerdos RAG REST API.
4
+ *
5
+ * @example
6
+ * ```ts
7
+ * import { KerdosRAGClient } from "@kerdos/rag-client";
8
+ *
9
+ * const client = new KerdosRAGClient({ baseUrl: "http://localhost:8000" });
10
+ *
11
+ * // Index documents
12
+ * const result = await client.indexFiles([fileInput.files[0]]);
13
+ * console.log(result.indexed);
14
+ *
15
+ * // Stream an answer
16
+ * for await (const token of client.chat("What is the refund policy?")) {
17
+ * process.stdout.write(token);
18
+ * }
19
+ * ```
20
+ */
21
+
22
+ // ── Types ─────────────────────────────────────────────────────────────────────
23
+
24
+ export interface KerdosRAGClientOptions {
25
+ /** Base URL of the Kerdos RAG REST server, e.g. "http://localhost:8000" */
26
+ baseUrl: string;
27
+ /** Optional API key sent as X-Api-Key header (required if server has API_KEY set). */
28
+ apiKey?: string;
29
+ /** Request timeout in milliseconds. Default: 30 000 */
30
+ timeoutMs?: number;
31
+ }
32
+
33
+ export interface HealthResponse {
34
+ status: "ok";
35
+ version: string;
36
+ }
37
+
38
+ export interface StatusResponse {
39
+ indexed_sources: string[];
40
+ chunk_count: number;
41
+ model: string;
42
+ top_k: number;
43
+ min_score: number;
44
+ }
45
+
46
+ export interface IndexResult {
47
+ indexed: string[];
48
+ skipped: string[];
49
+ chunk_count: number;
50
+ }
51
+
52
+ export interface ChatMessage {
53
+ role: "user" | "assistant";
54
+ content: string;
55
+ }
56
+
57
+ export interface ChatOptions {
58
+ /** Prior conversation turns. */
59
+ history?: ChatMessage[];
60
+ /** Override top-K for this request only. */
61
+ topK?: number;
62
+ }
63
+
64
+ // ── Client ────────────────────────────────────────────────────────────────────
65
+
66
+ export class KerdosRAGClient {
67
+ private readonly baseUrl: string;
68
+ private readonly headers: Record<string, string>;
69
+ private readonly timeoutMs: number;
70
+
71
+ constructor(options: KerdosRAGClientOptions) {
72
+ this.baseUrl = options.baseUrl.replace(/\/$/, ""); // strip trailing slash
73
+ this.timeoutMs = options.timeoutMs ?? 30_000;
74
+ this.headers = {
75
+ ...(options.apiKey ? { "X-Api-Key": options.apiKey } : {}),
76
+ };
77
+ }
78
+
79
+ // ── Internal helpers ────────────────────────────────────────────────────────
80
+
81
+ private url(path: string): string {
82
+ return `${this.baseUrl}${path}`;
83
+ }
84
+
85
+ private async fetchJSON<T>(path: string, init?: RequestInit): Promise<T> {
86
+ const controller = new AbortController();
87
+ const timer = setTimeout(() => controller.abort(), this.timeoutMs);
88
+
89
+ try {
90
+ const res = await fetch(this.url(path), {
91
+ ...init,
92
+ headers: { ...this.headers, ...(init?.headers ?? {}) },
93
+ signal: controller.signal,
94
+ });
95
+
96
+ if (!res.ok) {
97
+ const text = await res.text().catch(() => res.statusText);
98
+ throw new KerdosAPIError(res.status, text);
99
+ }
100
+
101
+ return res.json() as Promise<T>;
102
+ } finally {
103
+ clearTimeout(timer);
104
+ }
105
+ }
106
+
107
+ // ── Public API ──────────────────────────────────────────────────────────────
108
+
109
+ /**
110
+ * Check server liveness.
111
+ *
112
+ * @returns `{ status: "ok", version: "0.1.0" }`
113
+ */
114
+ async health(): Promise<HealthResponse> {
115
+ return this.fetchJSON<HealthResponse>("/health");
116
+ }
117
+
118
+ /**
119
+ * Get current knowledge-base metadata.
120
+ */
121
+ async status(): Promise<StatusResponse> {
122
+ return this.fetchJSON<StatusResponse>("/status");
123
+ }
124
+
125
+ /**
126
+ * Upload and index one or more {@link File} objects.
127
+ *
128
+ * Works in both **browser** (`File` from `<input type="file">`) and
129
+ * **Node.js ≥ 18** (`File` from the `buffer` module or `Blob`).
130
+ *
131
+ * @param files Array of File / Blob objects.
132
+ * @returns Summary of what was indexed and skipped.
133
+ */
134
+ async indexFiles(files: File[] | Blob[]): Promise<IndexResult> {
135
+ const form = new FormData();
136
+ files.forEach((f) => form.append("files", f));
137
+
138
+ return this.fetchJSON<IndexResult>("/index", {
139
+ method: "POST",
140
+ body: form,
141
+ });
142
+ }
143
+
144
+ /**
145
+ * Ask a question and receive a streamed answer via an async iterator.
146
+ *
147
+ * Internally consumes the Server-Sent Events stream from `POST /chat`.
148
+ *
149
+ * @param query The user's question.
150
+ * @param options Optional history and top-K override.
151
+ *
152
+ * @yields Each partial answer string as tokens arrive.
153
+ *
154
+ * @example
155
+ * ```ts
156
+ * let answer = "";
157
+ * for await (const token of client.chat("Summarize the contract.")) {
158
+ * answer = token; // token is the full answer so far (progressive)
159
+ * renderUI(answer);
160
+ * }
161
+ * ```
162
+ */
163
+ async *chat(query: string, options: ChatOptions = {}): AsyncGenerator<string> {
164
+ const body = JSON.stringify({
165
+ query,
166
+ history: options.history ?? null,
167
+ top_k: options.topK ?? null,
168
+ });
169
+
170
+ const controller = new AbortController();
171
+
172
+ const res = await fetch(this.url("/chat"), {
173
+ method: "POST",
174
+ headers: {
175
+ ...this.headers,
176
+ "Content-Type": "application/json",
177
+ "Accept": "text/event-stream",
178
+ },
179
+ body,
180
+ signal: controller.signal,
181
+ });
182
+
183
+ if (!res.ok) {
184
+ const text = await res.text().catch(() => res.statusText);
185
+ throw new KerdosAPIError(res.status, text);
186
+ }
187
+
188
+ if (!res.body) throw new Error("Response body is null — SSE stream unavailable.");
189
+
190
+ const reader = res.body.getReader();
191
+ const decoder = new TextDecoder();
192
+ let buffer = "";
193
+
194
+ try {
195
+ while (true) {
196
+ const { done, value } = await reader.read();
197
+ if (done) break;
198
+
199
+ buffer += decoder.decode(value, { stream: true });
200
+ const lines = buffer.split("\n");
201
+ buffer = lines.pop() ?? ""; // retain incomplete last line
202
+
203
+ for (const line of lines) {
204
+ if (!line.startsWith("data: ")) continue;
205
+ const data = line.slice(6).trim();
206
+ if (data === "[DONE]") return;
207
+ // Un-escape newlines escaped by the server
208
+ yield data.replace(/\\n/g, "\n");
209
+ }
210
+ }
211
+ } finally {
212
+ reader.releaseLock();
213
+ }
214
+ }
215
+
216
+ /**
217
+ * Clear the entire knowledge base on the server.
218
+ */
219
+ async reset(): Promise<{ ok: boolean; message: string }> {
220
+ return this.fetchJSON("/reset", { method: "DELETE" });
221
+ }
222
+ }
223
+
224
+ // ── Error type ────────────────────────────────────────────────────────────────
225
+
226
+ export class KerdosAPIError extends Error {
227
+ constructor(
228
+ public readonly statusCode: number,
229
+ message: string,
230
+ ) {
231
+ super(`KerdosRAG API error ${statusCode}: ${message}`);
232
+ this.name = "KerdosAPIError";
233
+ }
234
+ }
sdk/typescript/tsconfig.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "compilerOptions": {
3
+ "target": "ES2020",
4
+ "module": "ESNext",
5
+ "moduleResolution": "bundler",
6
+ "lib": ["ES2020", "DOM"],
7
+ "outDir": "./dist",
8
+ "declaration": true,
9
+ "declarationMap": true,
10
+ "sourceMap": true,
11
+ "strict": true,
12
+ "esModuleInterop": true,
13
+ "skipLibCheck": true,
14
+ "forceConsistentCasingInFileNames": true
15
+ },
16
+ "include": ["src/**/*.ts"],
17
+ "exclude": ["node_modules", "dist"]
18
+ }
tests/test_api.py ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ tests/test_api.py
3
+ FastAPI endpoint tests using httpx + Starlette TestClient.
4
+ No HF token or real LLM calls are needed.
5
+ """
6
+
7
+ import os
8
+ import sys
9
+ import pytest
10
+
11
+ sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
12
+
13
+
14
+ @pytest.fixture(autouse=True)
15
+ def reset_engine():
16
+ """Reset the server's singleton engine before each test."""
17
+ from kerdos_rag.server import _engine
18
+ _engine.reset()
19
+ yield
20
+ _engine.reset()
21
+
22
+
23
+ @pytest.fixture
24
+ def client():
25
+ from fastapi.testclient import TestClient
26
+ from kerdos_rag.server import app
27
+ return TestClient(app)
28
+
29
+
30
+ # ── /health ───────────────────────────────────────────────────────────────────
31
+
32
+ def test_health(client):
33
+ r = client.get("/health")
34
+ assert r.status_code == 200
35
+ assert r.json()["status"] == "ok"
36
+
37
+
38
+ # ── /status ───────────────────────────────────────────────────────────────────
39
+
40
+ def test_status_empty(client):
41
+ r = client.get("/status")
42
+ assert r.status_code == 200
43
+ data = r.json()
44
+ assert data["chunk_count"] == 0
45
+ assert data["indexed_sources"] == []
46
+
47
+
48
+ # ── /index ────────────────────────────────────────────────────────────────────
49
+
50
+ def test_index_txt_file(client, tmp_path):
51
+ doc = tmp_path / "info.txt"
52
+ doc.write_text("The return policy allows 30-day refunds.", encoding="utf-8")
53
+
54
+ with open(doc, "rb") as f:
55
+ r = client.post("/index", files={"files": ("info.txt", f, "text/plain")})
56
+
57
+ assert r.status_code == 200
58
+ body = r.json()
59
+ assert "info.txt" in body["indexed"]
60
+ assert body["chunk_count"] > 0
61
+
62
+
63
+ def test_index_reflects_in_status(client, tmp_path):
64
+ doc = tmp_path / "data.txt"
65
+ doc.write_text("Important enterprise data.", encoding="utf-8")
66
+
67
+ with open(doc, "rb") as f:
68
+ client.post("/index", files={"files": ("data.txt", f, "text/plain")})
69
+
70
+ status = client.get("/status").json()
71
+ assert "data.txt" in status["indexed_sources"]
72
+ assert status["chunk_count"] > 0
73
+
74
+
75
+ def test_index_skips_duplicate(client, tmp_path):
76
+ doc = tmp_path / "dup.txt"
77
+ doc.write_text("Some content.", encoding="utf-8")
78
+
79
+ with open(doc, "rb") as f:
80
+ client.post("/index", files={"files": ("dup.txt", f, "text/plain")})
81
+ with open(doc, "rb") as f:
82
+ r = client.post("/index", files={"files": ("dup.txt", f, "text/plain")})
83
+
84
+ body = r.json()
85
+ assert "dup.txt" in body["skipped"]
86
+ assert body["indexed"] == []
87
+
88
+
89
+ # ── /chat ─────────────────────────────────────────────────────────────────────
90
+
91
+ def test_chat_422_when_empty(client):
92
+ r = client.post("/chat", json={"query": "What is the policy?"})
93
+ assert r.status_code == 422
94
+
95
+
96
+ # ── /reset ────────────────────────────────────────────────────────────────────
97
+
98
+ def test_reset(client, tmp_path):
99
+ doc = tmp_path / "file.txt"
100
+ doc.write_text("Some data.", encoding="utf-8")
101
+
102
+ with open(doc, "rb") as f:
103
+ client.post("/index", files={"files": ("file.txt", f, "text/plain")})
104
+
105
+ assert client.get("/status").json()["chunk_count"] > 0
106
+
107
+ r = client.delete("/reset")
108
+ assert r.status_code == 200
109
+ assert r.json()["ok"] is True
110
+ assert client.get("/status").json()["chunk_count"] == 0
tests/test_core.py ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ tests/test_core.py
3
+ Unit tests for the KerdosRAG public API (no HF token required).
4
+ """
5
+
6
+ import os
7
+ import sys
8
+ import tempfile
9
+
10
+ import pytest
11
+
12
+ sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
13
+
14
+ from kerdos_rag import KerdosRAG
15
+
16
+
17
+ # ── Fixtures ──────────────────────────────────────────────────────────────────
18
+
19
+ @pytest.fixture
20
+ def engine():
21
+ """A fresh KerdosRAG instance for each test."""
22
+ return KerdosRAG(hf_token="hf_dummy") # token won't be used in indexing tests
23
+
24
+
25
+ @pytest.fixture
26
+ def indexed_engine(tmp_path):
27
+ """Engine with one plain-text document already indexed."""
28
+ doc = tmp_path / "policy.txt"
29
+ doc.write_text(
30
+ "The refund policy allows returns within 30 days of purchase. "
31
+ "Contact support at support@example.com for assistance.",
32
+ encoding="utf-8",
33
+ )
34
+ eng = KerdosRAG(hf_token="hf_dummy")
35
+ eng.index([str(doc)])
36
+ return eng
37
+
38
+
39
+ # ── Tests ─────────────────────────────────────────────────────────────────────
40
+
41
+ def test_initial_state(engine):
42
+ assert engine.is_ready is False
43
+ assert engine.chunk_count == 0
44
+ assert engine.indexed_sources == set()
45
+
46
+
47
+ def test_index_returns_correct_metadata(indexed_engine):
48
+ assert indexed_engine.is_ready
49
+ assert indexed_engine.chunk_count > 0
50
+ assert "policy.txt" in indexed_engine.indexed_sources
51
+
52
+
53
+ def test_index_skips_duplicates(indexed_engine, tmp_path):
54
+ doc = tmp_path / "policy.txt"
55
+ doc.write_text("Some extra content.", encoding="utf-8")
56
+
57
+ result = indexed_engine.index([str(doc)])
58
+ assert "policy.txt" in result["skipped"]
59
+ assert "policy.txt" not in result["indexed"]
60
+
61
+
62
+ def test_index_multiple_files(engine, tmp_path):
63
+ (tmp_path / "a.txt").write_text("Alpha content here.", encoding="utf-8")
64
+ (tmp_path / "b.txt").write_text("Beta content here.", encoding="utf-8")
65
+
66
+ result = engine.index([str(tmp_path / "a.txt"), str(tmp_path / "b.txt")])
67
+ assert len(result["indexed"]) == 2
68
+ assert result["chunk_count"] > 0
69
+
70
+
71
+ def test_reset_clears_index(indexed_engine):
72
+ assert indexed_engine.is_ready
73
+ indexed_engine.reset()
74
+ assert not indexed_engine.is_ready
75
+ assert indexed_engine.chunk_count == 0
76
+ assert indexed_engine.indexed_sources == set()
77
+
78
+
79
+ def test_chat_raises_when_not_indexed(engine):
80
+ with pytest.raises(RuntimeError, match="No documents indexed"):
81
+ list(engine.chat("What is the policy?"))
82
+
83
+
84
+ def test_chat_raises_without_token(tmp_path):
85
+ doc = tmp_path / "doc.txt"
86
+ doc.write_text("Hello world.", encoding="utf-8")
87
+ eng = KerdosRAG(hf_token="")
88
+ eng.index([str(doc)])
89
+ with pytest.raises(ValueError, match="No Hugging Face token"):
90
+ list(eng.chat("What does it say?"))
91
+
92
+
93
+ def test_save_and_load(indexed_engine, tmp_path):
94
+ save_dir = tmp_path / "saved_index"
95
+ indexed_engine.save(str(save_dir))
96
+
97
+ assert (save_dir / "kerdos_index.faiss").exists()
98
+ assert (save_dir / "kerdos_meta.pkl").exists()
99
+
100
+ restored = KerdosRAG.load(str(save_dir), hf_token="hf_dummy")
101
+ assert restored.is_ready
102
+ assert restored.chunk_count == indexed_engine.chunk_count
103
+ assert restored.indexed_sources == indexed_engine.indexed_sources
104
+
105
+
106
+ def test_save_raises_when_empty(engine, tmp_path):
107
+ with pytest.raises(RuntimeError, match="Nothing to save"):
108
+ engine.save(str(tmp_path / "empty"))