Spaces:

kerdosdotio
/

Custom-LLM-Chat

Running

Bhaskar Ram commited on Mar 4

Commit

634117a

1 Parent(s): 3fc707a

feat: Python package, FastAPI REST server, TypeScript SDK

Phase 1 — Python Package (pip install kerdos-rag):
- pyproject.toml: PEP 517 metadata, entry points, pytest/ruff config
- kerdos_rag/__init__.py: public export of KerdosRAG
- kerdos_rag/core.py: KerdosRAG class with index(), chat() generator,
reset(), save() and load() for FAISS persistence
- kerdos_rag/cli.py: kerdos-rag CLI with serve/api/index subcommands

Phase 2 — FastAPI REST Server:
- kerdos_rag/server.py: GET /health, GET /status, POST /index (multipart),
POST /chat (SSE streaming), DELETE /reset; optional X-Api-Key auth
- Dockerfile: multi-stage build with MODE=serve|api build arg
- requirements.txt: added fastapi, uvicorn[standard], python-multipart

Phase 3 — TypeScript/JS SDK:
- sdk/typescript/src/index.ts: KerdosRAGClient class with health(),
status(), indexFiles(), chat() async generator, reset()
- sdk/typescript/package.json, tsconfig.json, README.md

Tests: 19/19 passing (smoke + test_core + test_api)

Files changed (14) hide show

Dockerfile +27 -7
kerdos_rag/__init__.py +6 -0
kerdos_rag/cli.py +101 -0
kerdos_rag/core.py +229 -0
kerdos_rag/server.py +186 -0
pyproject.toml +62 -0
requirements-dev.txt +1 -0
requirements.txt +4 -0
sdk/typescript/README.md +124 -0
sdk/typescript/package.json +38 -0
sdk/typescript/src/index.ts +234 -0
sdk/typescript/tsconfig.json +18 -0
tests/test_api.py +110 -0
tests/test_core.py +108 -0

Dockerfile CHANGED Viewed

@@ -1,12 +1,22 @@
-# Kerdos AI — Custom LLM Chat
-# Multi-stage Docker build for a lean production image
 FROM python:3.11-slim AS base
 # System dependencies for PyMuPDF and FAISS
 RUN apt-get update && apt-get install -y --no-install-recommends \
-        build-essential \
-        libgomp1 \
     && rm -rf /var/lib/apt/lists/*
 WORKDIR /app
@@ -18,10 +28,20 @@ RUN pip install --no-cache-dir -r requirements.txt
 # Copy source
 COPY . .
-# Gradio listens on 7860 by default
-EXPOSE 7860
 ENV GRADIO_SERVER_NAME=0.0.0.0
 ENV GRADIO_SERVER_PORT=7860
-CMD ["python", "app.py"]

+# Kerdos AI — Custom LLM Chat (Demo)
+# Multi-stage Docker build — supports both Gradio UI and FastAPI REST server
+#
+# Build for Gradio (default):
+#   docker build -t kerdos-rag .
+#   docker run -p 7860:7860 -e HF_TOKEN=hf_... kerdos-rag
+#
+# Build for REST API:
+#   docker build --build-arg MODE=api -t kerdos-rag-api .
+#   docker run -p 8000:8000 -e HF_TOKEN=hf_... kerdos-rag-api
+ARG MODE=serve
 FROM python:3.11-slim AS base
 # System dependencies for PyMuPDF and FAISS
 RUN apt-get update && apt-get install -y --no-install-recommends \
+    build-essential \
+    libgomp1 \
     && rm -rf /var/lib/apt/lists/*
 WORKDIR /app
 # Copy source
 COPY . .
+# Install the package in editable mode so kerdos-rag CLI is available
+RUN pip install --no-cache-dir -e .
+# ── Gradio mode ───────────────────────────────────────────────
+FROM base AS serve
+EXPOSE 7860
 ENV GRADIO_SERVER_NAME=0.0.0.0
 ENV GRADIO_SERVER_PORT=7860
+CMD ["kerdos-rag", "serve", "--host", "0.0.0.0", "--port", "7860"]
+# ── FastAPI REST mode ─────────────────────────────────────────
+FROM base AS api
+EXPOSE 8000
+CMD ["kerdos-rag", "api", "--host", "0.0.0.0", "--port", "8000"]
+# Select the right stage based on BUILD ARG
+FROM ${MODE}

kerdos_rag/__init__.py ADDED Viewed

	@@ -0,0 +1,6 @@

+"""kerdos_rag — public API surface."""
+from kerdos_rag.core import KerdosRAG
+__all__ = ["KerdosRAG"]
+__version__ = "0.1.0"

kerdos_rag/cli.py ADDED Viewed

	@@ -0,0 +1,101 @@

+"""
+kerdos_rag/cli.py
+Command-line interface for the Kerdos RAG engine.
+    kerdos-rag serve          # start Gradio UI  (default port 7860)
+    kerdos-rag api            # start FastAPI REST server  (default port 8000)
+    kerdos-rag index <files>  # index documents from terminal
+"""
+from __future__ import annotations
+import argparse
+import sys
+def _cmd_serve(args: argparse.Namespace) -> None:
+    """Launch the Gradio web UI."""
+    import importlib.util, os
+    os.environ.setdefault("GRADIO_SERVER_PORT", str(args.port))
+    os.environ.setdefault("GRADIO_SERVER_NAME", args.host)
+    # app.py lives at the repo root — import it as a module
+    spec = importlib.util.spec_from_file_location("app", _repo_root() / "app.py")
+    mod = importlib.util.module_from_spec(spec)
+    spec.loader.exec_module(mod)
+    mod.demo.queue()
+    mod.demo.launch(css=mod.CSS, theme=__import__("gradio").themes.Soft())
+def _cmd_api(args: argparse.Namespace) -> None:
+    """Launch the FastAPI REST server."""
+    import uvicorn
+    from kerdos_rag.server import app  # noqa: F401
+    print(f"[kerdos-rag] Starting REST API on http://{args.host}:{args.port}")
+    uvicorn.run(
+        "kerdos_rag.server:app",
+        host=args.host,
+        port=args.port,
+        reload=args.reload,
+        log_level="info",
+    )
+def _cmd_index(args: argparse.Namespace) -> None:
+    """Index documents from the command line and print a summary."""
+    from kerdos_rag import KerdosRAG
+    engine = KerdosRAG()  # token not needed for pure indexing
+    result = engine.index(args.files)
+    if result["indexed"]:
+        print(f"✅ Indexed: {', '.join(result['indexed'])}")
+    if result["skipped"]:
+        print(f"⚠️  Skipped (already indexed): {', '.join(result['skipped'])}")
+    print(f"📦 Total chunks: {result['chunk_count']}")
+    if args.save:
+        engine.save(args.save)
+        print(f"💾 Index saved to: {args.save}")
+def _repo_root():
+    """Return the directory containing this package."""
+    from pathlib import Path
+    return Path(__file__).resolve().parent.parent
+def main(argv: list[str] | None = None) -> None:
+    parser = argparse.ArgumentParser(
+        prog="kerdos-rag",
+        description="Kerdos RAG — Enterprise Document Q&A engine",
+    )
+    sub = parser.add_subparsers(dest="command", required=True)
+    # ── serve ────────────────────────────────────────────────────────────────
+    p_serve = sub.add_parser("serve", help="Start the Gradio web UI")
+    p_serve.add_argument("--host", default="0.0.0.0")
+    p_serve.add_argument("--port", type=int, default=7860)
+    p_serve.set_defaults(func=_cmd_serve)
+    # ── api ──────────────────────────────────────────────────────────────────
+    p_api = sub.add_parser("api", help="Start the FastAPI REST server")
+    p_api.add_argument("--host", default="0.0.0.0")
+    p_api.add_argument("--port", type=int, default=8000)
+    p_api.add_argument("--reload", action="store_true", help="Enable auto-reload (dev only)")
+    p_api.set_defaults(func=_cmd_api)
+    # ── index ─────────────────────────────────────────────────────────────────
+    p_idx = sub.add_parser("index", help="Index documents from the terminal")
+    p_idx.add_argument("files", nargs="+", metavar="FILE")
+    p_idx.add_argument("--save", metavar="DIR", default="", help="Save index to directory")
+    p_idx.set_defaults(func=_cmd_index)
+    args = parser.parse_args(argv)
+    args.func(args)
+if __name__ == "__main__":
+    main()

kerdos_rag/core.py ADDED Viewed

	@@ -0,0 +1,229 @@

+"""
+kerdos_rag/core.py
+High-level KerdosRAG façade — the primary interface for library consumers.
+Usage:
+    from kerdos_rag import KerdosRAG
+    engine = KerdosRAG(hf_token="hf_...")
+    engine.index(["policy.pdf", "manual.docx"])
+    for token in engine.chat("What is the refund policy?"):
+        print(token, end="", flush=True)
+"""
+from __future__ import annotations
+import json
+import os
+import pickle
+from pathlib import Path
+from typing import Generator
+from rag.document_loader import load_documents
+from rag.embedder import VectorIndex, build_index, add_to_index
+from rag.retriever import retrieve
+from rag.chain import answer_stream
+_DEFAULT_MODEL = "meta-llama/Llama-3.1-8B-Instruct"
+_DEFAULT_TOP_K = 5
+_DEFAULT_MIN_SCORE = 0.30
+class KerdosRAG:
+    """
+    Batteries-included RAG engine.
+    Args:
+        hf_token:   Hugging Face API token. Falls back to HF_TOKEN env var.
+        model:      HF model ID (e.g. 'mistralai/Mistral-7B-Instruct-v0.3').
+                    Falls back to LLM_MODEL env var, then Llama 3.1 8B.
+        top_k:      Number of chunks to retrieve per query.
+        min_score:  Minimum cosine similarity threshold (chunks below this
+                    are dropped before being sent to the LLM).
+    """
+    def __init__(
+        self,
+        hf_token: str = "",
+        model: str | None = None,
+        top_k: int = _DEFAULT_TOP_K,
+        min_score: float = _DEFAULT_MIN_SCORE,
+    ) -> None:
+        self.hf_token: str = hf_token.strip() or os.environ.get("HF_TOKEN", "")
+        self.model: str = model or os.environ.get("LLM_MODEL", _DEFAULT_MODEL)
+        self.top_k: int = top_k
+        self.min_score: float = min_score
+        self._index: VectorIndex | None = None
+        self._indexed_sources: set[str] = set()
+    # ── Properties ────────────────────────────────────────────────────────────
+    @property
+    def indexed_sources(self) -> set[str]:
+        """File names currently in the knowledge base."""
+        return set(self._indexed_sources)
+    @property
+    def chunk_count(self) -> int:
+        """Total number of vector chunks in the index."""
+        return self._index.index.ntotal if self._index else 0
+    @property
+    def is_ready(self) -> bool:
+        """True when at least one document has been indexed."""
+        return self._index is not None and self.chunk_count > 0
+    # ── Core operations ───────────────────────────────────────────────────────
+    def index(self, file_paths: list[str]) -> dict:
+        """
+        Parse and index documents into the knowledge base.
+        Duplicate filenames are automatically skipped.
+        Args:
+            file_paths: Absolute or relative paths to PDF, DOCX, TXT, MD, or CSV files.
+        Returns:
+            {
+              "indexed": ["file1.pdf", ...],   # newly indexed
+              "skipped": ["dup.pdf", ...],      # already in index
+              "chunk_count": 142               # total chunks
+            }
+        """
+        paths = [str(p) for p in file_paths]
+        new_paths, skipped = [], []
+        for p in paths:
+            name = Path(p).name
+            if name in self._indexed_sources:
+                skipped.append(name)
+            else:
+                new_paths.append(p)
+        if not new_paths:
+            return {"indexed": [], "skipped": skipped, "chunk_count": self.chunk_count}
+        docs = load_documents(new_paths)
+        if not docs:
+            raise ValueError("Could not extract text from any of the provided files.")
+        if self._index is None:
+            self._index = build_index(docs)
+        else:
+            self._index = add_to_index(self._index, docs)
+        newly_indexed = list({d["source"] for d in docs})
+        self._indexed_sources.update(newly_indexed)
+        return {
+            "indexed": newly_indexed,
+            "skipped": skipped,
+            "chunk_count": self.chunk_count,
+        }
+    def chat(
+        self,
+        query: str,
+        history: list[dict] | None = None,
+    ) -> Generator[str, None, None]:
+        """
+        Ask a question and stream the answer token-by-token.
+        Args:
+            query:   The user's question.
+            history: Optional list of prior messages in
+                     [{"role": "user"|"assistant", "content": "..."}] format.
+        Yields:
+            Progressively-growing answer strings (suitable for real-time display).
+        Raises:
+            RuntimeError: If no documents have been indexed yet.
+            ValueError:   If no HF token is available.
+        """
+        if not self.is_ready:
+            raise RuntimeError("No documents indexed. Call engine.index(file_paths) first.")
+        if not self.hf_token:
+            raise ValueError(
+                "No Hugging Face token. Pass hf_token= to KerdosRAG() or set HF_TOKEN env var."
+            )
+        # Temporarily patch retriever's MIN_SCORE with instance setting
+        import rag.retriever as _r
+        original_min = _r.MIN_SCORE
+        _r.MIN_SCORE = self.min_score
+        try:
+            chunks = retrieve(query, self._index, top_k=self.top_k)
+            yield from answer_stream(query, chunks, self.hf_token, chat_history=history)
+        finally:
+            _r.MIN_SCORE = original_min
+    def reset(self) -> None:
+        """Clear the knowledge base."""
+        self._index = None
+        self._indexed_sources = set()
+    # ── Persistence ───────────────────────────────────────────────────────────
+    def save(self, directory: str | Path) -> None:
+        """
+        Persist the index to disk so it can be reloaded across sessions.
+        Creates two files in `directory`:
+          - ``kerdos_index.faiss``  — the raw FAISS vectors
+          - ``kerdos_meta.pkl``     — chunks + source tracking
+        Args:
+            directory: Path to a folder (will be created if needed).
+        """
+        import faiss
+        if not self.is_ready:
+            raise RuntimeError("Nothing to save — index is empty.")
+        out = Path(directory)
+        out.mkdir(parents=True, exist_ok=True)
+        faiss.write_index(self._index.index, str(out / "kerdos_index.faiss"))
+        meta = {
+            "chunks": self._index.chunks,
+            "indexed_sources": list(self._indexed_sources),
+            "model": self.model,
+            "top_k": self.top_k,
+            "min_score": self.min_score,
+        }
+        with open(out / "kerdos_meta.pkl", "wb") as f:
+            pickle.dump(meta, f)
+    @classmethod
+    def load(cls, directory: str | Path, hf_token: str = "") -> "KerdosRAG":
+        """
+        Restore an engine from a directory previously written by :meth:`save`.
+        Args:
+            directory: Folder containing ``kerdos_index.faiss`` and ``kerdos_meta.pkl``.
+            hf_token:  HF token for chat (can also be set via HF_TOKEN env var).
+        Returns:
+            A fully initialised :class:`KerdosRAG` instance.
+        """
+        import faiss
+        from rag.embedder import _get_model
+        d = Path(directory)
+        with open(d / "kerdos_meta.pkl", "rb") as f:
+            meta = pickle.load(f)
+        engine = cls(
+            hf_token=hf_token,
+            model=meta["model"],
+            top_k=meta["top_k"],
+            min_score=meta["min_score"],
+        )
+        model = _get_model()
+        idx = faiss.read_index(str(d / "kerdos_index.faiss"))
+        engine._index = VectorIndex(chunks=meta["chunks"], index=idx, embedder=model)
+        engine._indexed_sources = set(meta["indexed_sources"])
+        return engine

kerdos_rag/server.py ADDED Viewed

	@@ -0,0 +1,186 @@

+"""
+kerdos_rag/server.py
+FastAPI REST server exposing the KerdosRAG engine over HTTP.
+Endpoints:
+    GET  /health          — liveness probe
+    GET  /status          — knowledge-base metadata
+    POST /index           — upload + index documents (multipart/form-data)
+    POST /chat            — ask a question (SSE streaming response)
+    DELETE /reset         — clear the knowledge base
+Authentication (optional):
+    Set API_KEY env var to enable X-Api-Key header validation.
+    Leave unset to run in open mode (suitable for local / trusted environments).
+"""
+from __future__ import annotations
+import os
+import asyncio
+from typing import AsyncGenerator
+from fastapi import FastAPI, File, UploadFile, HTTPException, Depends, Header, Request
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import StreamingResponse, JSONResponse
+from pydantic import BaseModel
+from kerdos_rag.core import KerdosRAG
+# ── App & CORS ────────────────────────────────────────────────────────────────
+app = FastAPI(
+    title="Kerdos RAG API",
+    description="Enterprise Document Q&A engine by Kerdos Infrasoft",
+    version="0.1.0",
+    contact={"name": "Kerdos Infrasoft", "url": "https://kerdos.in", "email": "partnership@kerdos.in"},
+    license_info={"name": "MIT"},
+)
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# ── Singleton engine ───────────────────────────────────────────────────────────
+_engine = KerdosRAG()
+# ── Auth ──────────────────────────────────────────────────────────────────────
+_API_KEY = os.environ.get("API_KEY", "")
+def _check_auth(x_api_key: str | None = Header(default=None)) -> None:
+    """If API_KEY env var is set, validate X-Api-Key header."""
+    if _API_KEY and x_api_key != _API_KEY:
+        raise HTTPException(status_code=401, detail="Invalid or missing X-Api-Key header.")
+# ── Request / Response models ──────────────────────────────────────────────────
+class ChatRequest(BaseModel):
+    query: str
+    history: list[dict] | None = None
+    top_k: int | None = None
+class StatusResponse(BaseModel):
+    indexed_sources: list[str]
+    chunk_count: int
+    model: str
+    top_k: int
+    min_score: float
+# ── Endpoints ─────────────────────────────────────────────────────────────────
+@app.get("/health", tags=["Meta"])
+def health() -> dict:
+    """Liveness probe — always returns 200 OK."""
+    return {"status": "ok", "version": "0.1.0"}
+@app.get("/status", response_model=StatusResponse, tags=["Meta"])
+def status(_: None = Depends(_check_auth)) -> StatusResponse:
+    """Return current knowledge-base metadata."""
+    return StatusResponse(
+        indexed_sources=list(_engine.indexed_sources),
+        chunk_count=_engine.chunk_count,
+        model=_engine.model,
+        top_k=_engine.top_k,
+        min_score=_engine.min_score,
+    )
+@app.post("/index", tags=["RAG"])
+async def index_documents(
+    files: list[UploadFile] = File(...),
+    _: None = Depends(_check_auth),
+) -> JSONResponse:
+    """
+    Upload and index one or more documents.
+    Accepts: PDF (.pdf), Word (.docx), plain text (.txt, .md, .csv).
+    Duplicate filenames are automatically skipped.
+    """
+    import tempfile, shutil
+    from pathlib import Path
+    saved_paths: list[str] = []
+    tmp_dir = tempfile.mkdtemp(prefix="kerdos_upload_")
+    try:
+        for upload in files:
+            dest = Path(tmp_dir) / upload.filename
+            with open(dest, "wb") as f:
+                shutil.copyfileobj(upload.file, f)
+            saved_paths.append(str(dest))
+        result = _engine.index(saved_paths)
+    finally:
+        shutil.rmtree(tmp_dir, ignore_errors=True)
+    return JSONResponse(content=result)
+@app.post("/chat", tags=["RAG"])
+async def chat(req: ChatRequest, _: None = Depends(_check_auth)) -> StreamingResponse:
+    """
+    Ask a question and receive a **Server-Sent Events** stream of tokens.
+    Each SSE event has the form:
+        data: <partial answer so far>\\n\\n
+    The stream ends with:
+        data: [DONE]\\n\\n
+    Example (curl):
+        curl -X POST http://localhost:8000/chat \\
+             -H "Content-Type: application/json" \\
+             -d '{"query": "What is the refund policy?"}' \\
+             --no-buffer
+    """
+    if not _engine.is_ready:
+        raise HTTPException(
+            status_code=422,
+            detail="Knowledge base is empty. POST files to /index first.",
+        )
+    hf_token = _engine.hf_token
+    if not hf_token:
+        raise HTTPException(
+            status_code=401,
+            detail="No Hugging Face token configured. Set HF_TOKEN env var.",
+        )
+    # Temporarily override top_k if caller specified it
+    original_top_k = _engine.top_k
+    if req.top_k is not None:
+        _engine.top_k = req.top_k
+    async def event_generator() -> AsyncGenerator[str, None]:
+        try:
+            # answer_stream is a sync generator — run in thread pool
+            loop = asyncio.get_event_loop()
+            gen = _engine.chat(req.query, history=req.history)
+            while True:
+                try:
+                    token = await loop.run_in_executor(None, next, gen)
+                    # SSE format: escape newlines in the data value
+                    escaped = token.replace("\n", "\\n")
+                    yield f"data: {escaped}\n\n"
+                except StopIteration:
+                    break
+        finally:
+            _engine.top_k = original_top_k
+        yield "data: [DONE]\n\n"
+    return StreamingResponse(event_generator(), media_type="text/event-stream")
+@app.delete("/reset", tags=["RAG"])
+def reset(_: None = Depends(_check_auth)) -> dict:
+    """Clear the entire knowledge base."""
+    _engine.reset()
+    return {"ok": True, "message": "Knowledge base cleared."}

pyproject.toml ADDED Viewed

	@@ -0,0 +1,62 @@

+[build-system]
+requires = ["setuptools>=68", "wheel"]
+build-backend = "setuptools.backends.legacy:build"
+[project]
+name = "kerdos-rag"
+version = "0.1.0"
+description = "Enterprise Document Q&A RAG engine — by Kerdos Infrasoft"
+readme = "README.md"
+license = { text = "MIT" }
+authors = [{ name = "Kerdos Infrasoft Private Limited", email = "partnership@kerdos.in" }]
+requires-python = ">=3.10"
+keywords = ["rag", "llm", "document-qa", "faiss", "enterprise", "kerdos"]
+classifiers = [
+    "Development Status :: 4 - Beta",
+    "Intended Audience :: Developers",
+    "License :: OSI Approved :: MIT License",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+    "Topic :: Scientific/Engineering :: Artificial Intelligence",
+]
+dependencies = [
+    "gradio>=6.6.0",
+    "sentence-transformers>=5.0.0",
+    "faiss-cpu>=1.9.0",
+    "PyMuPDF>=1.24.0",
+    "python-docx>=1.1.0",
+    "huggingface-hub>=0.28.0",
+    "numpy>=1.26.0,<3",
+    "python-dotenv>=1.0.0",
+    "tenacity>=8.2.0",
+    "fastapi>=0.111.0",
+    "uvicorn[standard]>=0.29.0",
+    "python-multipart>=0.0.9",
+]
+[project.optional-dependencies]
+dev = ["pytest>=8.0.0", "black>=24.0.0", "ruff>=0.4.0", "httpx>=0.27.0"]
+[project.scripts]
+kerdos-rag = "kerdos_rag.cli:main"
+[project.urls]
+Homepage = "https://kerdos.in"
+Repository = "https://huggingface.co/spaces/kerdosdotio/Custom-LLM-Chat"
+"Bug Tracker" = "https://kerdos.in/contact"
+[tool.setuptools.packages.find]
+include = ["kerdos_rag*", "rag*"]
+[tool.pytest.ini_options]
+pythonpath = ["."]
+testpaths = ["tests"]
+[tool.ruff]
+line-length = 100
+target-version = "py310"
+[tool.ruff.lint]
+select = ["E", "F", "I", "UP"]

requirements-dev.txt CHANGED Viewed

@@ -2,3 +2,4 @@
 pytest>=8.0.0
 black>=24.0.0
 ruff>=0.4.0

 pytest>=8.0.0
 black>=24.0.0
 ruff>=0.4.0
+httpx>=0.27.0

requirements.txt CHANGED Viewed

@@ -1,3 +1,4 @@
 sentence-transformers>=5.0.0
 faiss-cpu>=1.9.0
 PyMuPDF>=1.24.0
@@ -6,3 +7,6 @@ huggingface-hub>=0.28.0
 numpy>=1.26.0,<3
 python-dotenv>=1.0.0
 tenacity>=8.2.0

+gradio>=6.6.0
 sentence-transformers>=5.0.0
 faiss-cpu>=1.9.0
 PyMuPDF>=1.24.0
 numpy>=1.26.0,<3
 python-dotenv>=1.0.0
 tenacity>=8.2.0
+fastapi>=0.111.0
+uvicorn[standard]>=0.29.0
+python-multipart>=0.0.9

sdk/typescript/README.md ADDED Viewed

	@@ -0,0 +1,124 @@

+# `@kerdos/rag-client`
+> TypeScript/JavaScript client for the **Kerdos RAG REST API**.
+> Zero runtime dependencies — works in Node.js ≥ 18 and modern browsers.
+---
+## Installation
+```bash
+# npm
+npm install @kerdos/rag-client
+# pnpm
+pnpm add @kerdos/rag-client
+# yarn
+yarn add @kerdos/rag-client
+```
+---
+## Quick Start
+```typescript
+import { KerdosRAGClient } from "@kerdos/rag-client";
+const client = new KerdosRAGClient({
+  baseUrl: "http://localhost:8000",
+  apiKey: "your-secret", // optional — only if server has API_KEY set
+});
+// 1. Index documents
+const result = await client.indexFiles([
+  new File([pdfBuffer], "policy.pdf"),
+  new File([txtContent], "manual.txt"),
+]);
+console.log("Indexed:", result.indexed);
+console.log("Skipped:", result.skipped);
+// 2. Stream an answer
+let answer = "";
+for await (const token of client.chat("What is the refund policy?")) {
+  answer = token; // each yield is the full cumulative answer
+  process.stdout.write("\r" + answer);
+}
+// 3. Multi-turn conversation
+const history = [
+  { role: "user", content: "What is the refund policy?" },
+  { role: "assistant", content: answer },
+];
+for await (const token of client.chat("Who do I contact for refunds?", {
+  history,
+})) {
+  process.stdout.write("\r" + token);
+}
+// 4. Status & reset
+const status = await client.status();
+console.log("Chunks:", status.chunk_count);
+await client.reset();
+```
+---
+## API Reference
+### `new KerdosRAGClient(options)`
+| Option      | Type     | Default     | Description                               |
+| ----------- | -------- | ----------- | ----------------------------------------- |
+| `baseUrl`   | `string` | —           | Server URL (e.g. `http://localhost:8000`) |
+| `apiKey`    | `string` | `undefined` | Sent as `X-Api-Key` header                |
+| `timeoutMs` | `number` | `30000`     | Request timeout in ms                     |
+### Methods
+| Method               | Returns                   | Description          |
+| -------------------- | ------------------------- | -------------------- |
+| `health()`           | `Promise<HealthResponse>` | Liveness probe       |
+| `status()`           | `Promise<StatusResponse>` | KB metadata          |
+| `indexFiles(files)`  | `Promise<IndexResult>`    | Upload & index files |
+| `chat(query, opts?)` | `AsyncGenerator<string>`  | Stream answer tokens |
+| `reset()`            | `Promise<{ok, message}>`  | Clear knowledge base |
+---
+## Server Setup
+Start the REST server (Python backend required):
+```bash
+pip install kerdos-rag
+kerdos-rag api --port 8000
+```
+Or with Docker:
+```bash
+docker build --build-arg MODE=api -t kerdos-rag .
+docker run -p 8000:8000 -e HF_TOKEN=hf_... kerdos-rag
+```
+---
+## Error Handling
+```typescript
+import { KerdosAPIError } from "@kerdos/rag-client";
+try {
+  for await (const token of client.chat("...")) { ... }
+} catch (err) {
+  if (err instanceof KerdosAPIError) {
+    console.error(`API ${err.statusCode}:`, err.message);
+  }
+}
+```
+---
+_© 2024–2026 Kerdos Infrasoft Private Limited | [kerdos.in](https://kerdos.in)_

sdk/typescript/package.json ADDED Viewed

	@@ -0,0 +1,38 @@

+{
+  "name": "@kerdos/rag-client",
+  "version": "0.1.0",
+  "description": "TypeScript/JS client for the Kerdos RAG REST API",
+  "main": "dist/index.js",
+  "module": "dist/index.mjs",
+  "types": "dist/index.d.ts",
+  "files": [
+    "dist",
+    "README.md"
+  ],
+  "scripts": {
+    "build": "tsc --project tsconfig.json",
+    "dev": "tsc --watch",
+    "test": "node --test dist/index.test.js"
+  },
+  "keywords": [
+    "rag",
+    "llm",
+    "document-qa",
+    "kerdos",
+    "enterprise",
+    "ai"
+  ],
+  "author": "Kerdos Infrasoft Private Limited <partnership@kerdos.in>",
+  "license": "MIT",
+  "homepage": "https://kerdos.in",
+  "repository": {
+    "type": "git",
+    "url": "https://huggingface.co/spaces/kerdosdotio/Custom-LLM-Chat"
+  },
+  "engines": {
+    "node": ">=18"
+  },
+  "devDependencies": {
+    "typescript": "^5.4.0"
+  }
+}

sdk/typescript/src/index.ts ADDED Viewed

	@@ -0,0 +1,234 @@

+/**
+ * @kerdos/rag-client
+ * TypeScript/JavaScript client for the Kerdos RAG REST API.
+ *
+ * @example
+ * ```ts
+ * import { KerdosRAGClient } from "@kerdos/rag-client";
+ *
+ * const client = new KerdosRAGClient({ baseUrl: "http://localhost:8000" });
+ *
+ * // Index documents
+ * const result = await client.indexFiles([fileInput.files[0]]);
+ * console.log(result.indexed);
+ *
+ * // Stream an answer
+ * for await (const token of client.chat("What is the refund policy?")) {
+ *   process.stdout.write(token);
+ * }
+ * ```
+ */
+// ── Types ─────────────────────────────────────────────────────────────────────
+export interface KerdosRAGClientOptions {
+  /** Base URL of the Kerdos RAG REST server, e.g. "http://localhost:8000" */
+  baseUrl: string;
+  /** Optional API key sent as X-Api-Key header (required if server has API_KEY set). */
+  apiKey?: string;
+  /** Request timeout in milliseconds. Default: 30 000 */
+  timeoutMs?: number;
+}
+export interface HealthResponse {
+  status: "ok";
+  version: string;
+}
+export interface StatusResponse {
+  indexed_sources: string[];
+  chunk_count: number;
+  model: string;
+  top_k: number;
+  min_score: number;
+}
+export interface IndexResult {
+  indexed: string[];
+  skipped: string[];
+  chunk_count: number;
+}
+export interface ChatMessage {
+  role: "user" | "assistant";
+  content: string;
+}
+export interface ChatOptions {
+  /** Prior conversation turns. */
+  history?: ChatMessage[];
+  /** Override top-K for this request only. */
+  topK?: number;
+}
+// ── Client ────────────────────────────────────────────────────────────────────
+export class KerdosRAGClient {
+  private readonly baseUrl: string;
+  private readonly headers: Record<string, string>;
+  private readonly timeoutMs: number;
+  constructor(options: KerdosRAGClientOptions) {
+    this.baseUrl = options.baseUrl.replace(/\/$/, ""); // strip trailing slash
+    this.timeoutMs = options.timeoutMs ?? 30_000;
+    this.headers = {
+      ...(options.apiKey ? { "X-Api-Key": options.apiKey } : {}),
+    };
+  }
+  // ── Internal helpers ────────────────────────────────────────────────────────
+  private url(path: string): string {
+    return `${this.baseUrl}${path}`;
+  }
+  private async fetchJSON<T>(path: string, init?: RequestInit): Promise<T> {
+    const controller = new AbortController();
+    const timer = setTimeout(() => controller.abort(), this.timeoutMs);
+    try {
+      const res = await fetch(this.url(path), {
+        ...init,
+        headers: { ...this.headers, ...(init?.headers ?? {}) },
+        signal: controller.signal,
+      });
+      if (!res.ok) {
+        const text = await res.text().catch(() => res.statusText);
+        throw new KerdosAPIError(res.status, text);
+      }
+      return res.json() as Promise<T>;
+    } finally {
+      clearTimeout(timer);
+    }
+  }
+  // ── Public API ──────────────────────────────────────────────────────────────
+  /**
+   * Check server liveness.
+   *
+   * @returns `{ status: "ok", version: "0.1.0" }`
+   */
+  async health(): Promise<HealthResponse> {
+    return this.fetchJSON<HealthResponse>("/health");
+  }
+  /**
+   * Get current knowledge-base metadata.
+   */
+  async status(): Promise<StatusResponse> {
+    return this.fetchJSON<StatusResponse>("/status");
+  }
+  /**
+   * Upload and index one or more {@link File} objects.
+   *
+   * Works in both **browser** (`File` from `<input type="file">`) and
+   * **Node.js ≥ 18** (`File` from the `buffer` module or `Blob`).
+   *
+   * @param files  Array of File / Blob objects.
+   * @returns Summary of what was indexed and skipped.
+   */
+  async indexFiles(files: File[] | Blob[]): Promise<IndexResult> {
+    const form = new FormData();
+    files.forEach((f) => form.append("files", f));
+    return this.fetchJSON<IndexResult>("/index", {
+      method: "POST",
+      body: form,
+    });
+  }
+  /**
+   * Ask a question and receive a streamed answer via an async iterator.
+   *
+   * Internally consumes the Server-Sent Events stream from `POST /chat`.
+   *
+   * @param query    The user's question.
+   * @param options  Optional history and top-K override.
+   *
+   * @yields Each partial answer string as tokens arrive.
+   *
+   * @example
+   * ```ts
+   * let answer = "";
+   * for await (const token of client.chat("Summarize the contract.")) {
+   *   answer = token;            // token is the full answer so far (progressive)
+   *   renderUI(answer);
+   * }
+   * ```
+   */
+  async *chat(query: string, options: ChatOptions = {}): AsyncGenerator<string> {
+    const body = JSON.stringify({
+      query,
+      history: options.history ?? null,
+      top_k: options.topK ?? null,
+    });
+    const controller = new AbortController();
+    const res = await fetch(this.url("/chat"), {
+      method: "POST",
+      headers: {
+        ...this.headers,
+        "Content-Type": "application/json",
+        "Accept": "text/event-stream",
+      },
+      body,
+      signal: controller.signal,
+    });
+    if (!res.ok) {
+      const text = await res.text().catch(() => res.statusText);
+      throw new KerdosAPIError(res.status, text);
+    }
+    if (!res.body) throw new Error("Response body is null — SSE stream unavailable.");
+    const reader = res.body.getReader();
+    const decoder = new TextDecoder();
+    let buffer = "";
+    try {
+      while (true) {
+        const { done, value } = await reader.read();
+        if (done) break;
+        buffer += decoder.decode(value, { stream: true });
+        const lines = buffer.split("\n");
+        buffer = lines.pop() ?? ""; // retain incomplete last line
+        for (const line of lines) {
+          if (!line.startsWith("data: ")) continue;
+          const data = line.slice(6).trim();
+          if (data === "[DONE]") return;
+          // Un-escape newlines escaped by the server
+          yield data.replace(/\\n/g, "\n");
+        }
+      }
+    } finally {
+      reader.releaseLock();
+    }
+  }
+  /**
+   * Clear the entire knowledge base on the server.
+   */
+  async reset(): Promise<{ ok: boolean; message: string }> {
+    return this.fetchJSON("/reset", { method: "DELETE" });
+  }
+}
+// ── Error type ────────────────────────────────────────────────────────────────
+export class KerdosAPIError extends Error {
+  constructor(
+    public readonly statusCode: number,
+    message: string,
+  ) {
+    super(`KerdosRAG API error ${statusCode}: ${message}`);
+    this.name = "KerdosAPIError";
+  }
+}

sdk/typescript/tsconfig.json ADDED Viewed

	@@ -0,0 +1,18 @@

+{
+  "compilerOptions": {
+    "target": "ES2020",
+    "module": "ESNext",
+    "moduleResolution": "bundler",
+    "lib": ["ES2020", "DOM"],
+    "outDir": "./dist",
+    "declaration": true,
+    "declarationMap": true,
+    "sourceMap": true,
+    "strict": true,
+    "esModuleInterop": true,
+    "skipLibCheck": true,
+    "forceConsistentCasingInFileNames": true
+  },
+  "include": ["src/**/*.ts"],
+  "exclude": ["node_modules", "dist"]
+}

tests/test_api.py ADDED Viewed

	@@ -0,0 +1,110 @@

+"""
+tests/test_api.py
+FastAPI endpoint tests using httpx + Starlette TestClient.
+No HF token or real LLM calls are needed.
+"""
+import os
+import sys
+import pytest
+sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
+@pytest.fixture(autouse=True)
+def reset_engine():
+    """Reset the server's singleton engine before each test."""
+    from kerdos_rag.server import _engine
+    _engine.reset()
+    yield
+    _engine.reset()
+@pytest.fixture
+def client():
+    from fastapi.testclient import TestClient
+    from kerdos_rag.server import app
+    return TestClient(app)
+# ── /health ───────────────────────────────────────────────────────────────────
+def test_health(client):
+    r = client.get("/health")
+    assert r.status_code == 200
+    assert r.json()["status"] == "ok"
+# ── /status ───────────────────────────────────────────────────────────────────
+def test_status_empty(client):
+    r = client.get("/status")
+    assert r.status_code == 200
+    data = r.json()
+    assert data["chunk_count"] == 0
+    assert data["indexed_sources"] == []
+# ── /index ────────────────────────────────────────────────────────────────────
+def test_index_txt_file(client, tmp_path):
+    doc = tmp_path / "info.txt"
+    doc.write_text("The return policy allows 30-day refunds.", encoding="utf-8")
+    with open(doc, "rb") as f:
+        r = client.post("/index", files={"files": ("info.txt", f, "text/plain")})
+    assert r.status_code == 200
+    body = r.json()
+    assert "info.txt" in body["indexed"]
+    assert body["chunk_count"] > 0
+def test_index_reflects_in_status(client, tmp_path):
+    doc = tmp_path / "data.txt"
+    doc.write_text("Important enterprise data.", encoding="utf-8")
+    with open(doc, "rb") as f:
+        client.post("/index", files={"files": ("data.txt", f, "text/plain")})
+    status = client.get("/status").json()
+    assert "data.txt" in status["indexed_sources"]
+    assert status["chunk_count"] > 0
+def test_index_skips_duplicate(client, tmp_path):
+    doc = tmp_path / "dup.txt"
+    doc.write_text("Some content.", encoding="utf-8")
+    with open(doc, "rb") as f:
+        client.post("/index", files={"files": ("dup.txt", f, "text/plain")})
+    with open(doc, "rb") as f:
+        r = client.post("/index", files={"files": ("dup.txt", f, "text/plain")})
+    body = r.json()
+    assert "dup.txt" in body["skipped"]
+    assert body["indexed"] == []
+# ── /chat ─────────────────────────────────────────────────────────────────────
+def test_chat_422_when_empty(client):
+    r = client.post("/chat", json={"query": "What is the policy?"})
+    assert r.status_code == 422
+# ── /reset ────────────────────────────────────────────────────────────────────
+def test_reset(client, tmp_path):
+    doc = tmp_path / "file.txt"
+    doc.write_text("Some data.", encoding="utf-8")
+    with open(doc, "rb") as f:
+        client.post("/index", files={"files": ("file.txt", f, "text/plain")})
+    assert client.get("/status").json()["chunk_count"] > 0
+    r = client.delete("/reset")
+    assert r.status_code == 200
+    assert r.json()["ok"] is True
+    assert client.get("/status").json()["chunk_count"] == 0

tests/test_core.py ADDED Viewed

	@@ -0,0 +1,108 @@

+"""
+tests/test_core.py
+Unit tests for the KerdosRAG public API (no HF token required).
+"""
+import os
+import sys
+import tempfile
+import pytest
+sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
+from kerdos_rag import KerdosRAG
+# ── Fixtures ──────────────────────────────────────────────────────────────────
+@pytest.fixture
+def engine():
+    """A fresh KerdosRAG instance for each test."""
+    return KerdosRAG(hf_token="hf_dummy")  # token won't be used in indexing tests
+@pytest.fixture
+def indexed_engine(tmp_path):
+    """Engine with one plain-text document already indexed."""
+    doc = tmp_path / "policy.txt"
+    doc.write_text(
+        "The refund policy allows returns within 30 days of purchase. "
+        "Contact support at support@example.com for assistance.",
+        encoding="utf-8",
+    )
+    eng = KerdosRAG(hf_token="hf_dummy")
+    eng.index([str(doc)])
+    return eng
+# ── Tests ─────────────────────────────────────────────────────────────────────
+def test_initial_state(engine):
+    assert engine.is_ready is False
+    assert engine.chunk_count == 0
+    assert engine.indexed_sources == set()
+def test_index_returns_correct_metadata(indexed_engine):
+    assert indexed_engine.is_ready
+    assert indexed_engine.chunk_count > 0
+    assert "policy.txt" in indexed_engine.indexed_sources
+def test_index_skips_duplicates(indexed_engine, tmp_path):
+    doc = tmp_path / "policy.txt"
+    doc.write_text("Some extra content.", encoding="utf-8")
+    result = indexed_engine.index([str(doc)])
+    assert "policy.txt" in result["skipped"]
+    assert "policy.txt" not in result["indexed"]
+def test_index_multiple_files(engine, tmp_path):
+    (tmp_path / "a.txt").write_text("Alpha content here.", encoding="utf-8")
+    (tmp_path / "b.txt").write_text("Beta content here.", encoding="utf-8")
+    result = engine.index([str(tmp_path / "a.txt"), str(tmp_path / "b.txt")])
+    assert len(result["indexed"]) == 2
+    assert result["chunk_count"] > 0
+def test_reset_clears_index(indexed_engine):
+    assert indexed_engine.is_ready
+    indexed_engine.reset()
+    assert not indexed_engine.is_ready
+    assert indexed_engine.chunk_count == 0
+    assert indexed_engine.indexed_sources == set()
+def test_chat_raises_when_not_indexed(engine):
+    with pytest.raises(RuntimeError, match="No documents indexed"):
+        list(engine.chat("What is the policy?"))
+def test_chat_raises_without_token(tmp_path):
+    doc = tmp_path / "doc.txt"
+    doc.write_text("Hello world.", encoding="utf-8")
+    eng = KerdosRAG(hf_token="")
+    eng.index([str(doc)])
+    with pytest.raises(ValueError, match="No Hugging Face token"):
+        list(eng.chat("What does it say?"))
+def test_save_and_load(indexed_engine, tmp_path):
+    save_dir = tmp_path / "saved_index"
+    indexed_engine.save(str(save_dir))
+    assert (save_dir / "kerdos_index.faiss").exists()
+    assert (save_dir / "kerdos_meta.pkl").exists()
+    restored = KerdosRAG.load(str(save_dir), hf_token="hf_dummy")
+    assert restored.is_ready
+    assert restored.chunk_count == indexed_engine.chunk_count
+    assert restored.indexed_sources == indexed_engine.indexed_sources
+def test_save_raises_when_empty(engine, tmp_path):
+    with pytest.raises(RuntimeError, match="Nothing to save"):
+        engine.save(str(tmp_path / "empty"))