Spaces:
Runtime error
Runtime error
prepare hugging face deployment and enable gitguardian
Browse files- .dockerignore +12 -0
- .gitignore +3 -1
- .pre-commit-config.yaml +6 -0
- Dockerfile +47 -0
- README.md +53 -8
- backend/__init__.py +3 -0
- backend/api.py +37 -1
- frontend/components/FlowCanvas.tsx +4 -1
- frontend/hooks/useQueryStream.ts +6 -1
- frontend/next.config.ts +2 -1
- frontend/package.json +3 -3
- main.py +96 -22
- pyproject.toml +1 -1
- start.sh +21 -0
.dockerignore
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
.venv/
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.pyc
|
| 4 |
+
node_modules/
|
| 5 |
+
frontend/node_modules/
|
| 6 |
+
frontend/.next/
|
| 7 |
+
frontend/out/
|
| 8 |
+
lightrag_store/
|
| 9 |
+
.git/
|
| 10 |
+
.DS_Store
|
| 11 |
+
.env
|
| 12 |
+
frontend/.env.local
|
.gitignore
CHANGED
|
@@ -25,6 +25,7 @@ build/
|
|
| 25 |
|
| 26 |
# ── RAG / vector stores (can be large and contain indexed private data) ───
|
| 27 |
lightrag_store*/
|
|
|
|
| 28 |
|
| 29 |
# ── Generated files ──────────────────────────────────────────────────────
|
| 30 |
.files/
|
|
@@ -42,4 +43,5 @@ Thumbs.db
|
|
| 42 |
.idea/
|
| 43 |
.vscode/
|
| 44 |
*.swp
|
| 45 |
-
*.swo
|
|
|
|
|
|
| 25 |
|
| 26 |
# ── RAG / vector stores (can be large and contain indexed private data) ───
|
| 27 |
lightrag_store*/
|
| 28 |
+
.tools/
|
| 29 |
|
| 30 |
# ── Generated files ──────────────────────────────────────────────────────
|
| 31 |
.files/
|
|
|
|
| 43 |
.idea/
|
| 44 |
.vscode/
|
| 45 |
*.swp
|
| 46 |
+
*.swo
|
| 47 |
+
AGENTS.md
|
.pre-commit-config.yaml
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
repos:
|
| 2 |
+
- repo: https://github.com/GitGuardian/ggshield
|
| 3 |
+
rev: v1.24.0
|
| 4 |
+
hooks:
|
| 5 |
+
- id: ggshield
|
| 6 |
+
stages: [pre-commit, pre-push]
|
Dockerfile
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.11-slim
|
| 2 |
+
|
| 3 |
+
ENV NODE_MAJOR=20 \
|
| 4 |
+
PYTHONUNBUFFERED=1 \
|
| 5 |
+
PIP_NO_CACHE_DIR=1 \
|
| 6 |
+
NEXT_TELEMETRY_DISABLED=1
|
| 7 |
+
|
| 8 |
+
WORKDIR /app
|
| 9 |
+
|
| 10 |
+
# System dependencies (Node.js for building frontend + git for HF datasets)
|
| 11 |
+
RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 12 |
+
build-essential \
|
| 13 |
+
curl \
|
| 14 |
+
git && \
|
| 15 |
+
curl -fsSL "https://deb.nodesource.com/setup_${NODE_MAJOR}.x" | bash - && \
|
| 16 |
+
apt-get install -y --no-install-recommends nodejs && \
|
| 17 |
+
apt-get clean && \
|
| 18 |
+
rm -rf /var/lib/apt/lists/*
|
| 19 |
+
|
| 20 |
+
# Python dependencies (copy metadata first for caching)
|
| 21 |
+
COPY pyproject.toml README.md ./
|
| 22 |
+
COPY backend/ ./backend/
|
| 23 |
+
RUN pip install --upgrade pip && \
|
| 24 |
+
pip install -e .
|
| 25 |
+
|
| 26 |
+
# Frontend dependencies
|
| 27 |
+
COPY frontend/package.json ./frontend/
|
| 28 |
+
RUN cd frontend && npm install
|
| 29 |
+
|
| 30 |
+
# Project sources
|
| 31 |
+
COPY frontend/ ./frontend/
|
| 32 |
+
COPY main.py ask.py start.sh ./
|
| 33 |
+
RUN chmod +x start.sh
|
| 34 |
+
|
| 35 |
+
# Build static Next.js export
|
| 36 |
+
RUN cd frontend && npm run build
|
| 37 |
+
|
| 38 |
+
# Prepare runtime directories
|
| 39 |
+
RUN mkdir -p /app/lightrag_store
|
| 40 |
+
|
| 41 |
+
ENV PYTHONPATH=/app \
|
| 42 |
+
RAG_WORKING_DIR=/app/lightrag_store \
|
| 43 |
+
PORT=7860
|
| 44 |
+
|
| 45 |
+
EXPOSE 7860
|
| 46 |
+
|
| 47 |
+
CMD ["./start.sh"]
|
README.md
CHANGED
|
@@ -1,12 +1,23 @@
|
|
| 1 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
|
| 3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
|
| 5 |
## Run Locally
|
| 6 |
|
| 7 |
### Prerequisites
|
| 8 |
|
| 9 |
-
- Python 3.
|
| 10 |
- Node.js 20+
|
| 11 |
- npm
|
| 12 |
|
|
@@ -74,8 +85,8 @@ Notes:
|
|
| 74 |
|
| 75 |
- LightRAG (retrieval-augmented generation)
|
| 76 |
- LlamaIndex (RAG orchestration)
|
| 77 |
-
-
|
| 78 |
-
- Model for answer generation: `openai/gpt-
|
| 79 |
- Langfuse (observability and traces)
|
| 80 |
|
| 81 |
## Dataset Used
|
|
@@ -107,7 +118,41 @@ python ask.py --query "How does Chomsky connect corporate power to public discou
|
|
| 107 |
Notes:
|
| 108 |
|
| 109 |
- LightRAG uses your OpenRouter key from `.env` (`openrouter_key`) for answer generation.
|
| 110 |
-
- This setup uses local embeddings with `BAAI/bge-base-en-v1.5`.
|
| 111 |
- Available query modes: `naive`, `local`, `global`, `hybrid`, `mix`.
|
| 112 |
-
|
| 113 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: AskChomsky
|
| 3 |
+
emoji: 🧠
|
| 4 |
+
colorFrom: blue
|
| 5 |
+
colorTo: purple
|
| 6 |
+
sdk: docker
|
| 7 |
+
app_port: 7860
|
| 8 |
+
---
|
| 9 |
|
| 10 |
+
# AskChomsky
|
| 11 |
+
|
| 12 |
+
Ask questions about Noam Chomsky's work, grounded in a curated corpus with citations.
|
| 13 |
+
|
| 14 |
+
Powered by LightRAG + Next.js.
|
| 15 |
|
| 16 |
## Run Locally
|
| 17 |
|
| 18 |
### Prerequisites
|
| 19 |
|
| 20 |
+
- Python 3.11+
|
| 21 |
- Node.js 20+
|
| 22 |
- npm
|
| 23 |
|
|
|
|
| 85 |
|
| 86 |
- LightRAG (retrieval-augmented generation)
|
| 87 |
- LlamaIndex (RAG orchestration)
|
| 88 |
+
- OpenAI embeddings: `openai/text-embedding-3-small` (via OpenRouter)
|
| 89 |
+
- Model for answer generation: `openai/gpt-4o-mini`
|
| 90 |
- Langfuse (observability and traces)
|
| 91 |
|
| 92 |
## Dataset Used
|
|
|
|
| 118 |
Notes:
|
| 119 |
|
| 120 |
- LightRAG uses your OpenRouter key from `.env` (`openrouter_key`) for answer generation.
|
|
|
|
| 121 |
- Available query modes: `naive`, `local`, `global`, `hybrid`, `mix`.
|
| 122 |
+
- In production you can set `RAG_WORKING_DIR` to control where the LightRAG index is stored
|
| 123 |
+
(the backend uses `RAG_WORKING_DIR` or defaults to `./lightrag_store`).
|
| 124 |
+
- Identical queries are cached by default (24h TTL, configurable via `QUERY_CACHE_TTL`).
|
| 125 |
+
|
| 126 |
+
## Deploy to Hugging Face Spaces
|
| 127 |
+
|
| 128 |
+
Use the bundled `Dockerfile` when configuring the Space (`sdk: docker` is already declared in this README header).
|
| 129 |
+
|
| 130 |
+
- **Repository:** Push this project to the Space or set it as the linked Git repository; the build looks for `Dockerfile` at the root.
|
| 131 |
+
- **Secrets:** In the Space settings add `openrouter_key` (and optional `LANGFUSE_*` keys) under *Variables & secrets*; the container refuses to start without an LLM key.
|
| 132 |
+
- **Resources:** The default `INGEST_DOC_LIMIT` is 200; override it in *Environment variables* if you need a smaller corpus for faster cold starts.
|
| 133 |
+
- **Networking:** The app listens on `$PORT` (default `7860`) and serves both the FastAPI backend and the statically exported Next.js frontend from the same origin.
|
| 134 |
+
- **Persistence:** The LightRAG store lives in `/app/lightrag_store`; Spaces reset storage between restarts, so ingestion runs automatically whenever the cache is empty.
|
| 135 |
+
|
| 136 |
+
After each push Hugging Face rebuilds the image, runs `start.sh`, ingests the corpus if needed, and exposes the UI at the Space URL.
|
| 137 |
+
|
| 138 |
+
## Secret Scanning (GitGuardian)
|
| 139 |
+
|
| 140 |
+
This repository ships with a pre-commit hook configuration that runs GitGuardian's `ggshield` scanner on every commit and push.
|
| 141 |
+
|
| 142 |
+
1. Provision the dedicated security tooling venv (one-time):
|
| 143 |
+
```bash
|
| 144 |
+
python3 -m venv .tools/ggshield
|
| 145 |
+
.tools/ggshield/bin/python -m pip install --upgrade pip
|
| 146 |
+
.tools/ggshield/bin/python -m pip install pre-commit ggshield
|
| 147 |
+
.tools/ggshield/bin/ggshield auth login # already completed
|
| 148 |
+
```
|
| 149 |
+
2. Enable the hooks in your local clone:
|
| 150 |
+
```bash
|
| 151 |
+
.tools/ggshield/bin/pre-commit install --install-hooks
|
| 152 |
+
```
|
| 153 |
+
3. (Optional) Run a full scan at any time:
|
| 154 |
+
```bash
|
| 155 |
+
.tools/ggshield/bin/ggshield secret scan repo .
|
| 156 |
+
```
|
| 157 |
+
|
| 158 |
+
Commits that introduce high-risk secrets will be blocked until the secret is removed or revoked.
|
backend/__init__.py
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""AskChomsky backend package."""
|
| 2 |
+
|
| 3 |
+
__all__ = ["api"]
|
backend/api.py
CHANGED
|
@@ -34,9 +34,10 @@ from pydantic import BaseModel
|
|
| 34 |
from main import (
|
| 35 |
CITATION_SYSTEM_PROMPT,
|
| 36 |
DEFAULT_WORKING_DIR,
|
|
|
|
|
|
|
| 37 |
initialize_rag,
|
| 38 |
llm_model_func,
|
| 39 |
-
query_rag,
|
| 40 |
)
|
| 41 |
|
| 42 |
# ---------------------------------------------------------------------------
|
|
@@ -544,6 +545,14 @@ async def _stream_pipeline(
|
|
| 544 |
detail=f"Original: {question}\n\nRewritten: {rewritten}",
|
| 545 |
)
|
| 546 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 547 |
# ── Stage: RAG Init ──────────────────────────────────────────────
|
| 548 |
yield _stage_event("rag_init", "Loading RAG Store", "running")
|
| 549 |
# RAG_WORKING_DIR controls where the LightRAG index is stored.
|
|
@@ -729,6 +738,9 @@ async def _stream_pipeline(
|
|
| 729 |
|
| 730 |
yield _sse("done", done_payload)
|
| 731 |
|
|
|
|
|
|
|
|
|
|
| 732 |
except Exception as exc:
|
| 733 |
yield _stage_event("answer", "Answer", "error", detail=str(exc))
|
| 734 |
yield _sse("error", {"message": str(exc)})
|
|
@@ -802,3 +814,27 @@ async def compare(req: CompareRequest) -> dict:
|
|
| 802 |
"mode_b": mode_b,
|
| 803 |
"answer_b": answer_b,
|
| 804 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
from main import (
|
| 35 |
CITATION_SYSTEM_PROMPT,
|
| 36 |
DEFAULT_WORKING_DIR,
|
| 37 |
+
cache_answer,
|
| 38 |
+
get_cached_answer,
|
| 39 |
initialize_rag,
|
| 40 |
llm_model_func,
|
|
|
|
| 41 |
)
|
| 42 |
|
| 43 |
# ---------------------------------------------------------------------------
|
|
|
|
| 545 |
detail=f"Original: {question}\n\nRewritten: {rewritten}",
|
| 546 |
)
|
| 547 |
|
| 548 |
+
# ── Stage: Cache Check ───────────────────────────────────────────
|
| 549 |
+
mode = mode_override or os.getenv("CHAINLIT_MODE") or "hybrid"
|
| 550 |
+
cached = get_cached_answer(question, mode)
|
| 551 |
+
if cached is not None:
|
| 552 |
+
yield _stage_event("cache", "Cache", "done", detail="Served from cache")
|
| 553 |
+
yield _sse("done", {"answer": cached})
|
| 554 |
+
return
|
| 555 |
+
|
| 556 |
# ── Stage: RAG Init ──────────────────────────────────────────────
|
| 557 |
yield _stage_event("rag_init", "Loading RAG Store", "running")
|
| 558 |
# RAG_WORKING_DIR controls where the LightRAG index is stored.
|
|
|
|
| 738 |
|
| 739 |
yield _sse("done", done_payload)
|
| 740 |
|
| 741 |
+
# Cache the final answer
|
| 742 |
+
cache_answer(question, mode, final)
|
| 743 |
+
|
| 744 |
except Exception as exc:
|
| 745 |
yield _stage_event("answer", "Answer", "error", detail=str(exc))
|
| 746 |
yield _sse("error", {"message": str(exc)})
|
|
|
|
| 814 |
"mode_b": mode_b,
|
| 815 |
"answer_b": answer_b,
|
| 816 |
}
|
| 817 |
+
|
| 818 |
+
|
| 819 |
+
# ---------------------------------------------------------------------------
|
| 820 |
+
# Serve Next.js static build (production)
|
| 821 |
+
# ---------------------------------------------------------------------------
|
| 822 |
+
from fastapi.staticfiles import StaticFiles
|
| 823 |
+
from fastapi.responses import FileResponse
|
| 824 |
+
|
| 825 |
+
NEXTJS_OUT = os.path.join(PROJECT_ROOT, "frontend", "out")
|
| 826 |
+
|
| 827 |
+
if os.path.isdir(NEXTJS_OUT):
|
| 828 |
+
app.mount(
|
| 829 |
+
"/_next", StaticFiles(directory=os.path.join(NEXTJS_OUT, "_next")), name="_next"
|
| 830 |
+
)
|
| 831 |
+
|
| 832 |
+
@app.get("/{full_path:path}")
|
| 833 |
+
async def serve_frontend(full_path: str):
|
| 834 |
+
file_path = os.path.join(NEXTJS_OUT, full_path, "index.html")
|
| 835 |
+
if os.path.isfile(file_path):
|
| 836 |
+
return FileResponse(file_path)
|
| 837 |
+
index_path = os.path.join(NEXTJS_OUT, "index.html")
|
| 838 |
+
if os.path.isfile(index_path):
|
| 839 |
+
return FileResponse(index_path)
|
| 840 |
+
return {"error": "Not found"}
|
frontend/components/FlowCanvas.tsx
CHANGED
|
@@ -29,6 +29,7 @@ interface PipelineNodeDataShape extends Record<string, unknown> {
|
|
| 29 |
const ICONS: Record<string, string> = {
|
| 30 |
intent: "🧭",
|
| 31 |
rewrite: "✏️",
|
|
|
|
| 32 |
rag_init: "🗄️",
|
| 33 |
retrieval_1: "🔍",
|
| 34 |
retrieval_2: "🔄",
|
|
@@ -43,6 +44,7 @@ const ICONS: Record<string, string> = {
|
|
| 43 |
const POSITIONS: Record<string, { x: number; y: number }> = {
|
| 44 |
intent: { x: 0, y: 0 },
|
| 45 |
rewrite: { x: 0, y: 160 },
|
|
|
|
| 46 |
rag_init: { x: 0, y: 320 },
|
| 47 |
retrieval_1: { x: 0, y: 480 },
|
| 48 |
retrieval_2: { x: 300, y: 480 },
|
|
@@ -55,7 +57,8 @@ const POSITIONS: Record<string, { x: number; y: number }> = {
|
|
| 55 |
// ── Edge definitions ────────────────────────────────────────────────────────
|
| 56 |
const STATIC_EDGES: Edge[] = [
|
| 57 |
{ id: "e-intent-rewrite", source: "intent", target: "rewrite" },
|
| 58 |
-
{ id: "e-rewrite-
|
|
|
|
| 59 |
{ id: "e-rag-r1", source: "rag_init", target: "retrieval_1" },
|
| 60 |
{ id: "e-r1-r2", source: "retrieval_1", target: "retrieval_2" },
|
| 61 |
{ id: "e-r2-r3", source: "retrieval_2", target: "retrieval_3" },
|
|
|
|
| 29 |
const ICONS: Record<string, string> = {
|
| 30 |
intent: "🧭",
|
| 31 |
rewrite: "✏️",
|
| 32 |
+
cache: "⚡",
|
| 33 |
rag_init: "🗄️",
|
| 34 |
retrieval_1: "🔍",
|
| 35 |
retrieval_2: "🔄",
|
|
|
|
| 44 |
const POSITIONS: Record<string, { x: number; y: number }> = {
|
| 45 |
intent: { x: 0, y: 0 },
|
| 46 |
rewrite: { x: 0, y: 160 },
|
| 47 |
+
cache: { x: 300, y: 160 },
|
| 48 |
rag_init: { x: 0, y: 320 },
|
| 49 |
retrieval_1: { x: 0, y: 480 },
|
| 50 |
retrieval_2: { x: 300, y: 480 },
|
|
|
|
| 57 |
// ── Edge definitions ────────────────────────────────────────────────────────
|
| 58 |
const STATIC_EDGES: Edge[] = [
|
| 59 |
{ id: "e-intent-rewrite", source: "intent", target: "rewrite" },
|
| 60 |
+
{ id: "e-rewrite-cache", source: "rewrite", target: "cache" },
|
| 61 |
+
{ id: "e-cache-rag", source: "cache", target: "rag_init" },
|
| 62 |
{ id: "e-rag-r1", source: "rag_init", target: "retrieval_1" },
|
| 63 |
{ id: "e-r1-r2", source: "retrieval_1", target: "retrieval_2" },
|
| 64 |
{ id: "e-r2-r3", source: "retrieval_2", target: "retrieval_3" },
|
frontend/hooks/useQueryStream.ts
CHANGED
|
@@ -3,12 +3,17 @@
|
|
| 3 |
import { useCallback, useRef, useState } from "react";
|
| 4 |
import type { NodeState, StageEvent } from "@/types/pipeline";
|
| 5 |
|
| 6 |
-
const API_URL =
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
|
| 8 |
// Default idle nodes shown before any query is run
|
| 9 |
const DEFAULT_NODES: NodeState[] = [
|
| 10 |
{ id: "intent", label: "Intent Router", status: "idle", detail: "" },
|
| 11 |
{ id: "rewrite", label: "Query Rewrite", status: "idle", detail: "" },
|
|
|
|
| 12 |
{ id: "rag_init", label: "Loading RAG Store", status: "idle", detail: "" },
|
| 13 |
{ id: "retrieval_1", label: "Retrieval", status: "idle", detail: "" },
|
| 14 |
{ id: "retrieval_2", label: "Retrieval (retry)", status: "idle", detail: "" },
|
|
|
|
| 3 |
import { useCallback, useRef, useState } from "react";
|
| 4 |
import type { NodeState, StageEvent } from "@/types/pipeline";
|
| 5 |
|
| 6 |
+
const API_URL =
|
| 7 |
+
process.env.NEXT_PUBLIC_API_URL ??
|
| 8 |
+
(typeof window !== "undefined" && window.location.origin !== "http://localhost:3000"
|
| 9 |
+
? window.location.origin
|
| 10 |
+
: "http://localhost:8001");
|
| 11 |
|
| 12 |
// Default idle nodes shown before any query is run
|
| 13 |
const DEFAULT_NODES: NodeState[] = [
|
| 14 |
{ id: "intent", label: "Intent Router", status: "idle", detail: "" },
|
| 15 |
{ id: "rewrite", label: "Query Rewrite", status: "idle", detail: "" },
|
| 16 |
+
{ id: "cache", label: "Cache Check", status: "idle", detail: "" },
|
| 17 |
{ id: "rag_init", label: "Loading RAG Store", status: "idle", detail: "" },
|
| 18 |
{ id: "retrieval_1", label: "Retrieval", status: "idle", detail: "" },
|
| 19 |
{ id: "retrieval_2", label: "Retrieval (retry)", status: "idle", detail: "" },
|
frontend/next.config.ts
CHANGED
|
@@ -1,7 +1,8 @@
|
|
| 1 |
import type { NextConfig } from "next";
|
| 2 |
|
| 3 |
const nextConfig: NextConfig = {
|
| 4 |
-
|
|
|
|
| 5 |
};
|
| 6 |
|
| 7 |
export default nextConfig;
|
|
|
|
| 1 |
import type { NextConfig } from "next";
|
| 2 |
|
| 3 |
const nextConfig: NextConfig = {
|
| 4 |
+
output: "export",
|
| 5 |
+
trailingSlash: true,
|
| 6 |
};
|
| 7 |
|
| 8 |
export default nextConfig;
|
frontend/package.json
CHANGED
|
@@ -15,11 +15,11 @@
|
|
| 15 |
"react-markdown": "10.1.0"
|
| 16 |
},
|
| 17 |
"devDependencies": {
|
| 18 |
-
"@tailwindcss/postcss": "4.
|
| 19 |
"@types/node": "20.0.0",
|
| 20 |
"@types/react": "19.0.0",
|
| 21 |
"@types/react-dom": "19.0.0",
|
| 22 |
-
"tailwindcss": "4.
|
| 23 |
-
"typescript": "5.0.0"
|
| 24 |
}
|
| 25 |
}
|
|
|
|
| 15 |
"react-markdown": "10.1.0"
|
| 16 |
},
|
| 17 |
"devDependencies": {
|
| 18 |
+
"@tailwindcss/postcss": "^4.1.0",
|
| 19 |
"@types/node": "20.0.0",
|
| 20 |
"@types/react": "19.0.0",
|
| 21 |
"@types/react-dom": "19.0.0",
|
| 22 |
+
"tailwindcss": "^4.1.0",
|
| 23 |
+
"typescript": "^5.0.0"
|
| 24 |
}
|
| 25 |
}
|
main.py
CHANGED
|
@@ -1,11 +1,12 @@
|
|
| 1 |
import argparse
|
| 2 |
import asyncio
|
|
|
|
| 3 |
import json
|
| 4 |
import logging
|
| 5 |
import os
|
| 6 |
import re
|
| 7 |
import sys
|
| 8 |
-
|
| 9 |
from typing import Any, TYPE_CHECKING
|
| 10 |
|
| 11 |
|
|
@@ -31,12 +32,6 @@ import numpy as np
|
|
| 31 |
from datasets import load_dataset
|
| 32 |
from dotenv import load_dotenv
|
| 33 |
|
| 34 |
-
if TYPE_CHECKING:
|
| 35 |
-
# Imported only for type checking; the actual import of
|
| 36 |
-
# SentenceTransformer happens lazily inside get_embedder to
|
| 37 |
-
# keep module import (and thus API startup) lightweight.
|
| 38 |
-
from sentence_transformers import SentenceTransformer
|
| 39 |
-
|
| 40 |
|
| 41 |
load_dotenv()
|
| 42 |
|
|
@@ -80,13 +75,16 @@ configure_logging()
|
|
| 80 |
|
| 81 |
OPENROUTER_BASE_URL = "https://openrouter.ai/api/v1"
|
| 82 |
LLM_MODEL = os.getenv("ASKCHOMSKY_LLM_MODEL", "openai/gpt-4o-mini")
|
| 83 |
-
EMBED_MODEL = "
|
|
|
|
| 84 |
DEFAULT_WORKING_DIR = "./lightrag_store"
|
| 85 |
LLM_TIMEOUT_SECONDS = int(os.getenv("LLM_TIMEOUT", "600"))
|
| 86 |
MAX_ASYNC_LLM_CALLS = int(os.getenv("MAX_ASYNC", "2"))
|
| 87 |
MAX_PARALLEL_INSERT = int(os.getenv("MAX_PARALLEL_INSERT", "2"))
|
| 88 |
REWRITE_QUERY_ENABLED = os.getenv("REWRITE_QUERY", "true").lower() == "true"
|
| 89 |
VERIFY_CLAIMS_ENABLED = os.getenv("VERIFY_CLAIMS", "true").lower() == "true"
|
|
|
|
|
|
|
| 90 |
|
| 91 |
|
| 92 |
CITATION_SYSTEM_PROMPT = """You are a retrieval-grounded assistant.
|
|
@@ -150,28 +148,98 @@ def configure_langfuse() -> bool:
|
|
| 150 |
return get_langfuse_client() is not None
|
| 151 |
|
| 152 |
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
from sentence_transformers import SentenceTransformer
|
| 158 |
|
| 159 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 160 |
|
| 161 |
|
| 162 |
def embed_texts(texts: list[str]) -> np.ndarray:
|
| 163 |
-
embeddings =
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
return np.asarray(embeddings, dtype=np.float32)
|
| 169 |
|
| 170 |
|
| 171 |
async def embedding_func(texts: list[str]) -> np.ndarray:
|
| 172 |
return await asyncio.to_thread(embed_texts, texts)
|
| 173 |
|
| 174 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 175 |
async def llm_model_func(
|
| 176 |
prompt,
|
| 177 |
system_prompt=None,
|
|
@@ -218,7 +286,7 @@ async def initialize_rag(working_dir: str = DEFAULT_WORKING_DIR) -> "LightRAG":
|
|
| 218 |
llm_model_max_async=MAX_ASYNC_LLM_CALLS,
|
| 219 |
max_parallel_insert=MAX_PARALLEL_INSERT,
|
| 220 |
embedding_func=EmbeddingFunc(
|
| 221 |
-
embedding_dim=
|
| 222 |
max_token_size=8192,
|
| 223 |
model_name=EMBED_MODEL,
|
| 224 |
func=embedding_func,
|
|
@@ -462,6 +530,10 @@ async def query_rag(
|
|
| 462 |
except Exception:
|
| 463 |
return ""
|
| 464 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 465 |
rag = None
|
| 466 |
try:
|
| 467 |
rag = await initialize_rag(working_dir)
|
|
@@ -500,7 +572,9 @@ async def query_rag(
|
|
| 500 |
answer_with_citations = _enforce_citation_answer(answer_text, references)
|
| 501 |
verification_summary = await _verify_claims(answer_with_citations, chunks)
|
| 502 |
|
| 503 |
-
|
|
|
|
|
|
|
| 504 |
finally:
|
| 505 |
if rag is not None:
|
| 506 |
await rag.finalize_storages()
|
|
|
|
| 1 |
import argparse
|
| 2 |
import asyncio
|
| 3 |
+
import hashlib
|
| 4 |
import json
|
| 5 |
import logging
|
| 6 |
import os
|
| 7 |
import re
|
| 8 |
import sys
|
| 9 |
+
import time
|
| 10 |
from typing import Any, TYPE_CHECKING
|
| 11 |
|
| 12 |
|
|
|
|
| 32 |
from datasets import load_dataset
|
| 33 |
from dotenv import load_dotenv
|
| 34 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
|
| 36 |
load_dotenv()
|
| 37 |
|
|
|
|
| 75 |
|
| 76 |
OPENROUTER_BASE_URL = "https://openrouter.ai/api/v1"
|
| 77 |
LLM_MODEL = os.getenv("ASKCHOMSKY_LLM_MODEL", "openai/gpt-4o-mini")
|
| 78 |
+
EMBED_MODEL = os.getenv("ASKCHOMSKY_EMBED_MODEL", "openai/text-embedding-3-small")
|
| 79 |
+
EMBED_DIM = 1536
|
| 80 |
DEFAULT_WORKING_DIR = "./lightrag_store"
|
| 81 |
LLM_TIMEOUT_SECONDS = int(os.getenv("LLM_TIMEOUT", "600"))
|
| 82 |
MAX_ASYNC_LLM_CALLS = int(os.getenv("MAX_ASYNC", "2"))
|
| 83 |
MAX_PARALLEL_INSERT = int(os.getenv("MAX_PARALLEL_INSERT", "2"))
|
| 84 |
REWRITE_QUERY_ENABLED = os.getenv("REWRITE_QUERY", "true").lower() == "true"
|
| 85 |
VERIFY_CLAIMS_ENABLED = os.getenv("VERIFY_CLAIMS", "true").lower() == "true"
|
| 86 |
+
QUERY_CACHE_TTL_SECONDS = int(os.getenv("QUERY_CACHE_TTL", "86400"))
|
| 87 |
+
QUERY_CACHE_PATH = os.path.join(DEFAULT_WORKING_DIR, "query_cache.json")
|
| 88 |
|
| 89 |
|
| 90 |
CITATION_SYSTEM_PROMPT = """You are a retrieval-grounded assistant.
|
|
|
|
| 148 |
return get_langfuse_client() is not None
|
| 149 |
|
| 150 |
|
| 151 |
+
# ---------------------------------------------------------------------------
|
| 152 |
+
# API-based embeddings (OpenRouter / OpenAI-compatible)
|
| 153 |
+
# ---------------------------------------------------------------------------
|
| 154 |
+
|
|
|
|
| 155 |
|
| 156 |
+
def _get_api_key() -> str:
|
| 157 |
+
api_key = os.getenv("openrouter_key") or os.getenv("OPENAI_API_KEY", "")
|
| 158 |
+
if not api_key:
|
| 159 |
+
raise ValueError("Missing openrouter_key or OPENAI_API_KEY in .env")
|
| 160 |
+
return api_key
|
| 161 |
+
|
| 162 |
+
|
| 163 |
+
def _api_embed_single(text: str) -> list[float]:
|
| 164 |
+
import httpx
|
| 165 |
+
|
| 166 |
+
api_key = _get_api_key()
|
| 167 |
+
payload = {"input": text, "model": EMBED_MODEL}
|
| 168 |
+
headers = {
|
| 169 |
+
"Authorization": f"Bearer {api_key}",
|
| 170 |
+
"Content-Type": "application/json",
|
| 171 |
+
}
|
| 172 |
+
with httpx.Client(timeout=30.0) as client:
|
| 173 |
+
resp = client.post(
|
| 174 |
+
OPENROUTER_BASE_URL + "/embeddings", json=payload, headers=headers
|
| 175 |
+
)
|
| 176 |
+
resp.raise_for_status()
|
| 177 |
+
data = resp.json()
|
| 178 |
+
return data["data"][0]["embedding"]
|
| 179 |
|
| 180 |
|
| 181 |
def embed_texts(texts: list[str]) -> np.ndarray:
|
| 182 |
+
embeddings = [_api_embed_single(t) for t in texts]
|
| 183 |
+
arr = np.array(embeddings, dtype=np.float32)
|
| 184 |
+
norms = np.linalg.norm(arr, axis=1, keepdims=True)
|
| 185 |
+
norms[norms == 0] = 1.0
|
| 186 |
+
return arr / norms
|
|
|
|
| 187 |
|
| 188 |
|
| 189 |
async def embedding_func(texts: list[str]) -> np.ndarray:
|
| 190 |
return await asyncio.to_thread(embed_texts, texts)
|
| 191 |
|
| 192 |
|
| 193 |
+
# ---------------------------------------------------------------------------
|
| 194 |
+
# Query result cache (disk-based, TTL-evicted)
|
| 195 |
+
# ---------------------------------------------------------------------------
|
| 196 |
+
|
| 197 |
+
|
| 198 |
+
def _load_query_cache() -> dict[str, dict[str, Any]]:
|
| 199 |
+
if not os.path.exists(QUERY_CACHE_PATH):
|
| 200 |
+
return {}
|
| 201 |
+
try:
|
| 202 |
+
with open(QUERY_CACHE_PATH, "r") as f:
|
| 203 |
+
return json.load(f)
|
| 204 |
+
except (json.JSONDecodeError, OSError):
|
| 205 |
+
return {}
|
| 206 |
+
|
| 207 |
+
|
| 208 |
+
def _save_query_cache(cache: dict[str, dict[str, Any]]) -> None:
|
| 209 |
+
os.makedirs(os.path.dirname(QUERY_CACHE_PATH), exist_ok=True)
|
| 210 |
+
with open(QUERY_CACHE_PATH, "w") as f:
|
| 211 |
+
json.dump(cache, f)
|
| 212 |
+
|
| 213 |
+
|
| 214 |
+
def _cache_key(question: str, mode: str) -> str:
|
| 215 |
+
raw = f"{question.strip().lower()}|{mode}"
|
| 216 |
+
return hashlib.sha256(raw.encode()).hexdigest()
|
| 217 |
+
|
| 218 |
+
|
| 219 |
+
def get_cached_answer(question: str, mode: str) -> str | None:
|
| 220 |
+
if QUERY_CACHE_TTL_SECONDS <= 0:
|
| 221 |
+
return None
|
| 222 |
+
key = _cache_key(question, mode)
|
| 223 |
+
cache = _load_query_cache()
|
| 224 |
+
entry = cache.get(key)
|
| 225 |
+
if not entry:
|
| 226 |
+
return None
|
| 227 |
+
if time.time() - entry.get("ts", 0) > QUERY_CACHE_TTL_SECONDS:
|
| 228 |
+
del cache[key]
|
| 229 |
+
_save_query_cache(cache)
|
| 230 |
+
return None
|
| 231 |
+
return entry.get("answer")
|
| 232 |
+
|
| 233 |
+
|
| 234 |
+
def cache_answer(question: str, mode: str, answer: str) -> None:
|
| 235 |
+
if QUERY_CACHE_TTL_SECONDS <= 0:
|
| 236 |
+
return
|
| 237 |
+
key = _cache_key(question, mode)
|
| 238 |
+
cache = _load_query_cache()
|
| 239 |
+
cache[key] = {"answer": answer, "ts": time.time()}
|
| 240 |
+
_save_query_cache(cache)
|
| 241 |
+
|
| 242 |
+
|
| 243 |
async def llm_model_func(
|
| 244 |
prompt,
|
| 245 |
system_prompt=None,
|
|
|
|
| 286 |
llm_model_max_async=MAX_ASYNC_LLM_CALLS,
|
| 287 |
max_parallel_insert=MAX_PARALLEL_INSERT,
|
| 288 |
embedding_func=EmbeddingFunc(
|
| 289 |
+
embedding_dim=EMBED_DIM,
|
| 290 |
max_token_size=8192,
|
| 291 |
model_name=EMBED_MODEL,
|
| 292 |
func=embedding_func,
|
|
|
|
| 530 |
except Exception:
|
| 531 |
return ""
|
| 532 |
|
| 533 |
+
cached = get_cached_answer(question, mode)
|
| 534 |
+
if cached is not None:
|
| 535 |
+
return cached
|
| 536 |
+
|
| 537 |
rag = None
|
| 538 |
try:
|
| 539 |
rag = await initialize_rag(working_dir)
|
|
|
|
| 572 |
answer_with_citations = _enforce_citation_answer(answer_text, references)
|
| 573 |
verification_summary = await _verify_claims(answer_with_citations, chunks)
|
| 574 |
|
| 575 |
+
final_answer = f"{answer_with_citations}{verification_summary}".strip()
|
| 576 |
+
cache_answer(question, mode, final_answer)
|
| 577 |
+
return final_answer
|
| 578 |
finally:
|
| 579 |
if rag is not None:
|
| 580 |
await rag.finalize_storages()
|
pyproject.toml
CHANGED
|
@@ -13,9 +13,9 @@ dependencies = [
|
|
| 13 |
"uvicorn[standard]>=0.30.0",
|
| 14 |
"datasets>=4.8.2",
|
| 15 |
"langfuse>=4.0.1",
|
|
|
|
| 16 |
"lightrag-hku>=1.4.11",
|
| 17 |
"llama-index-core>=0.14.18",
|
| 18 |
-
"llama-index-embeddings-huggingface>=0.7.0",
|
| 19 |
"llama-index-llms-openai>=0.7.2",
|
| 20 |
"numpy>=2.4.3",
|
| 21 |
"poetry>=2.3.2",
|
|
|
|
| 13 |
"uvicorn[standard]>=0.30.0",
|
| 14 |
"datasets>=4.8.2",
|
| 15 |
"langfuse>=4.0.1",
|
| 16 |
+
"httpx>=0.27.0",
|
| 17 |
"lightrag-hku>=1.4.11",
|
| 18 |
"llama-index-core>=0.14.18",
|
|
|
|
| 19 |
"llama-index-llms-openai>=0.7.2",
|
| 20 |
"numpy>=2.4.3",
|
| 21 |
"poetry>=2.3.2",
|
start.sh
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
set -e
|
| 3 |
+
|
| 4 |
+
# Respect Space-provided directory + doc limit overrides
|
| 5 |
+
DOC_LIMIT=${INGEST_DOC_LIMIT:-200}
|
| 6 |
+
PORT=${PORT:-7860}
|
| 7 |
+
RAG_WORKING_DIR=${RAG_WORKING_DIR:-/app/lightrag_store}
|
| 8 |
+
|
| 9 |
+
mkdir -p "$RAG_WORKING_DIR"
|
| 10 |
+
|
| 11 |
+
# Ingest corpus if RAG store is empty
|
| 12 |
+
if [ ! -d "$RAG_WORKING_DIR/graphml" ]; then
|
| 13 |
+
echo "Ingesting corpus..."
|
| 14 |
+
python ask.py --ingest --doc-limit "$DOC_LIMIT" --working-dir "$RAG_WORKING_DIR"
|
| 15 |
+
echo "Ingestion complete."
|
| 16 |
+
else
|
| 17 |
+
echo "RAG store found, skipping ingestion."
|
| 18 |
+
fi
|
| 19 |
+
|
| 20 |
+
# Start FastAPI on configured port (serves both API + static frontend)
|
| 21 |
+
exec uvicorn backend.api:app --host 0.0.0.0 --port "$PORT"
|