Spaces:
Sleeping
Sleeping
Deploy backend Docker app
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- .dockerignore +12 -0
- Dockerfile +23 -0
- LICENSE +21 -0
- backend/.env.example +38 -0
- backend/README.md +441 -0
- backend/app/__pycache__/main.cpython-313.pyc +0 -0
- backend/app/core/__pycache__/cache.cpython-313.pyc +0 -0
- backend/app/core/__pycache__/config.cpython-313.pyc +0 -0
- backend/app/core/__pycache__/errors.cpython-313.pyc +0 -0
- backend/app/core/__pycache__/logging.cpython-313.pyc +0 -0
- backend/app/core/__pycache__/metrics.cpython-313.pyc +0 -0
- backend/app/core/__pycache__/rate_limit.cpython-313.pyc +0 -0
- backend/app/core/__pycache__/runtime.cpython-313.pyc +0 -0
- backend/app/core/__pycache__/security.cpython-313.pyc +0 -0
- backend/app/core/__pycache__/tracing.cpython-313.pyc +0 -0
- backend/app/core/cache.py +162 -0
- backend/app/core/config.py +118 -0
- backend/app/core/errors.py +83 -0
- backend/app/core/logging.py +19 -0
- backend/app/core/metrics.py +129 -0
- backend/app/core/rate_limit.py +58 -0
- backend/app/core/runtime.py +31 -0
- backend/app/core/security.py +116 -0
- backend/app/core/tracing.py +60 -0
- backend/app/main.py +61 -0
- backend/app/routers/__pycache__/chat.cpython-313.pyc +0 -0
- backend/app/routers/__pycache__/documents.cpython-313.pyc +0 -0
- backend/app/routers/__pycache__/health.cpython-313.pyc +0 -0
- backend/app/routers/__pycache__/ingest.cpython-313.pyc +0 -0
- backend/app/routers/__pycache__/metrics.cpython-313.pyc +0 -0
- backend/app/routers/__pycache__/search.cpython-313.pyc +0 -0
- backend/app/routers/chat.py +280 -0
- backend/app/routers/documents.py +100 -0
- backend/app/routers/health.py +19 -0
- backend/app/routers/ingest.py +194 -0
- backend/app/routers/metrics.py +18 -0
- backend/app/routers/search.py +90 -0
- backend/app/schemas/__pycache__/chat.cpython-313.pyc +0 -0
- backend/app/schemas/__pycache__/documents.cpython-313.pyc +0 -0
- backend/app/schemas/__pycache__/ingest.cpython-313.pyc +0 -0
- backend/app/schemas/__pycache__/search.cpython-313.pyc +0 -0
- backend/app/schemas/chat.py +128 -0
- backend/app/schemas/documents.py +34 -0
- backend/app/schemas/ingest.py +56 -0
- backend/app/schemas/search.py +33 -0
- backend/app/services/__pycache__/chunking.cpython-313.pyc +0 -0
- backend/app/services/__pycache__/dedupe.cpython-313.pyc +0 -0
- backend/app/services/__pycache__/normalize.cpython-313.pyc +0 -0
- backend/app/services/__pycache__/pinecone_store.cpython-313.pyc +0 -0
- backend/app/services/chat/__pycache__/graph.cpython-313.pyc +0 -0
.dockerignore
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
**/__pycache__/
|
| 2 |
+
**/*.pyc
|
| 3 |
+
**/.pytest_cache/
|
| 4 |
+
**/.mypy_cache/
|
| 5 |
+
**/.ruff_cache/
|
| 6 |
+
**/.venv/
|
| 7 |
+
**/venv/
|
| 8 |
+
**/.env
|
| 9 |
+
**/.env.*
|
| 10 |
+
.git/
|
| 11 |
+
.gitignore
|
| 12 |
+
.DS_Store
|
Dockerfile
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.11-slim
|
| 2 |
+
|
| 3 |
+
ENV PYTHONDONTWRITEBYTECODE=1
|
| 4 |
+
ENV PYTHONUNBUFFERED=1
|
| 5 |
+
|
| 6 |
+
WORKDIR /app
|
| 7 |
+
|
| 8 |
+
# Install dependencies
|
| 9 |
+
COPY backend/requirements.txt /app/requirements.txt
|
| 10 |
+
RUN pip install --no-cache-dir -r /app/requirements.txt
|
| 11 |
+
|
| 12 |
+
# Copy application code
|
| 13 |
+
COPY backend /app
|
| 14 |
+
|
| 15 |
+
# Hugging Face Spaces typically exposes port 7860 and sets PORT dynamically.
|
| 16 |
+
EXPOSE 7860
|
| 17 |
+
|
| 18 |
+
ENV PINECONE_NAMESPACE=dev
|
| 19 |
+
ENV LOG_LEVEL=INFO
|
| 20 |
+
|
| 21 |
+
# Use the PORT environment variable if provided (e.g. on Hugging Face Spaces),
|
| 22 |
+
# otherwise default to 7860. Shell form allows env substitution.
|
| 23 |
+
CMD uvicorn app.main:app --host 0.0.0.0 --port ${PORT:-7860}
|
LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
MIT License
|
| 2 |
+
|
| 3 |
+
Copyright (c) 2026 Brejesh Balakrishnan
|
| 4 |
+
|
| 5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
| 6 |
+
of this software and associated documentation files (the "Software"), to deal
|
| 7 |
+
in the Software without restriction, including without limitation the rights
|
| 8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
| 9 |
+
copies of the Software, and to permit persons to whom the Software is
|
| 10 |
+
furnished to do so, subject to the following conditions:
|
| 11 |
+
|
| 12 |
+
The above copyright notice and this permission notice shall be included in all
|
| 13 |
+
copies or substantial portions of the Software.
|
| 14 |
+
|
| 15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
| 16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
| 17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
| 18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
| 19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
| 20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
| 21 |
+
SOFTWARE.
|
backend/.env.example
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
PINECONE_API_KEY=your-pinecone-api-key
|
| 2 |
+
PINECONE_INDEX_NAME=your-index-name
|
| 3 |
+
PINECONE_HOST=https://your-index-host.pinecone.io
|
| 4 |
+
PINECONE_NAMESPACE=dev
|
| 5 |
+
PINECONE_TEXT_FIELD=chunk_text
|
| 6 |
+
|
| 7 |
+
# Groq (LLM for /chat)
|
| 8 |
+
GROQ_API_KEY=your-groq-api-key
|
| 9 |
+
GROQ_BASE_URL=https://api.groq.com/openai/v1
|
| 10 |
+
GROQ_MODEL=llama-3.1-8b-instant
|
| 11 |
+
|
| 12 |
+
# Tavily (optional web search fallback for /chat)
|
| 13 |
+
TAVILY_API_KEY=your-tavily-api-key
|
| 14 |
+
|
| 15 |
+
# Optional: LangSmith / LangChain tracing
|
| 16 |
+
LANGCHAIN_TRACING_V2=false
|
| 17 |
+
LANGCHAIN_API_KEY=your-langsmith-api-key
|
| 18 |
+
LANGCHAIN_PROJECT=rag-agent-workbench
|
| 19 |
+
|
| 20 |
+
# Optional: basic API protection
|
| 21 |
+
# When set, /ingest/*, /documents/*, /search, and /chat* require header X-API-Key
|
| 22 |
+
API_KEY=your-backend-api-key
|
| 23 |
+
|
| 24 |
+
# Optional: CORS
|
| 25 |
+
# Comma-separated list of allowed origins, e.g.
|
| 26 |
+
# ALLOWED_ORIGINS=http://localhost:8501,https://your-streamlit-app.streamlit.app
|
| 27 |
+
# When unset, defaults to "*".
|
| 28 |
+
ALLOWED_ORIGINS=
|
| 29 |
+
|
| 30 |
+
# Optional: rate limiting and caching toggles
|
| 31 |
+
# Set to "false" to disable.
|
| 32 |
+
RATE_LIMIT_ENABLED=true
|
| 33 |
+
CACHE_ENABLED=true
|
| 34 |
+
|
| 35 |
+
# Optional: override the default Wikimedia User-Agent string used for Wikipedia requests.
|
| 36 |
+
# If not set, a descriptive default is used to comply with Wikimedia API policy.
|
| 37 |
+
WIKIMEDIA_USER_AGENT="rag-agent-workbench/0.1 (+https://github.com/..; contact: ..)"
|
| 38 |
+
LOG_LEVEL=INFO
|
backend/README.md
ADDED
|
@@ -0,0 +1,441 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# RAG Agent Workbench – Backend
|
| 2 |
+
|
| 3 |
+
Lightweight FastAPI backend for ingesting documents into Pinecone (with integrated embeddings), searching over them, and serving a production-style RAG chat endpoint.
|
| 4 |
+
|
| 5 |
+
## Prerequisites
|
| 6 |
+
|
| 7 |
+
- Python 3.11+
|
| 8 |
+
- A Pinecone account and an index configured with **integrated embeddings**
|
| 9 |
+
- A Groq account and API key for chat
|
| 10 |
+
- (Optional) Tavily API key for web search fallback
|
| 11 |
+
- (Optional) LangSmith account + API key for tracing
|
| 12 |
+
- Environment variables set (see `.env.example`)
|
| 13 |
+
|
| 14 |
+
## Setup
|
| 15 |
+
|
| 16 |
+
```bash
|
| 17 |
+
cd backend
|
| 18 |
+
python -m venv .venv
|
| 19 |
+
source .venv/bin/activate # On Windows: .venv\Scripts\activate
|
| 20 |
+
pip install -r requirements.txt
|
| 21 |
+
cp .env.example .env # then edit with your Pinecone, Groq, and optional Tavily/LangSmith credentials
|
| 22 |
+
```
|
| 23 |
+
|
| 24 |
+
Required `.env` values:
|
| 25 |
+
|
| 26 |
+
- `PINECONE_API_KEY` – your Pinecone API key
|
| 27 |
+
- `PINECONE_INDEX_NAME` – the index name (used for configuration checks)
|
| 28 |
+
- `PINECONE_HOST` – the index host URL (use host targeting for production)
|
| 29 |
+
- `PINECONE_NAMESPACE` – default namespace (e.g. `dev`)
|
| 30 |
+
- `PINECONE_TEXT_FIELD` – text field name used by the integrated embedding index (e.g. `chunk_text` or `content`)
|
| 31 |
+
- `LOG_LEVEL` – e.g. `INFO`, `DEBUG`
|
| 32 |
+
|
| 33 |
+
Required for `/chat`:
|
| 34 |
+
|
| 35 |
+
- `GROQ_API_KEY` – your Groq API key
|
| 36 |
+
- `GROQ_BASE_URL` – Groq OpenAI-compatible endpoint (default `https://api.groq.com/openai/v1`)
|
| 37 |
+
- `GROQ_MODEL` – Groq chat model name (default `llama-3.1-8b-instant`)
|
| 38 |
+
|
| 39 |
+
Optional for web search fallback:
|
| 40 |
+
|
| 41 |
+
- `TAVILY_API_KEY` – Tavily API key (enables web search in `/chat` when retrieval is weak)
|
| 42 |
+
|
| 43 |
+
Optional for LangSmith tracing:
|
| 44 |
+
|
| 45 |
+
- `LANGCHAIN_TRACING_V2` – set to `true` to enable tracing
|
| 46 |
+
- `LANGCHAIN_API_KEY` – your LangSmith API key
|
| 47 |
+
- `LANGCHAIN_PROJECT` – project name for traces (e.g. `rag-agent-workbench`)
|
| 48 |
+
|
| 49 |
+
Optional for basic API protection:
|
| 50 |
+
|
| 51 |
+
- `API_KEY` – when set, `/ingest/*`, `/documents/*`, `/search`, and `/chat*` require `X-API-Key` header.
|
| 52 |
+
|
| 53 |
+
Optional for CORS:
|
| 54 |
+
|
| 55 |
+
- `ALLOWED_ORIGINS` – comma-separated list of allowed origins.
|
| 56 |
+
- If unset, defaults to `"*"` (useful for local dev and quick demos).
|
| 57 |
+
|
| 58 |
+
Optional for rate limiting and caching:
|
| 59 |
+
|
| 60 |
+
- `RATE_LIMIT_ENABLED` – defaults to `true`. Set to `false` to disable SlowAPI limits.
|
| 61 |
+
- `CACHE_ENABLED` – defaults to `true`. Set to `false` to disable in-memory TTL caches.
|
| 62 |
+
|
| 63 |
+
Your Pinecone index **must** be configured for integrated embeddings (e.g. via `create_index_for_model` or `configure_index(embed=...)`), with a field mapping that includes the configured `PINECONE_TEXT_FIELD`.
|
| 64 |
+
|
| 65 |
+
## Run locally
|
| 66 |
+
|
| 67 |
+
```bash
|
| 68 |
+
cd backend
|
| 69 |
+
uvicorn app.main:app --reload --port 8000
|
| 70 |
+
```
|
| 71 |
+
|
| 72 |
+
The API will be available at `http://localhost:8000`.
|
| 73 |
+
|
| 74 |
+
## Sample endpoints
|
| 75 |
+
|
| 76 |
+
### Health
|
| 77 |
+
|
| 78 |
+
```bash
|
| 79 |
+
curl http://localhost:8000/health
|
| 80 |
+
```
|
| 81 |
+
|
| 82 |
+
### Ingest from arXiv
|
| 83 |
+
|
| 84 |
+
```bash
|
| 85 |
+
curl -X POST "http://localhost:8000/ingest/arxiv" \
|
| 86 |
+
-H "Content-Type: application/json" \
|
| 87 |
+
-d '{
|
| 88 |
+
"query": "retrieval augmented generation",
|
| 89 |
+
"max_docs": 5,
|
| 90 |
+
"namespace": "dev",
|
| 91 |
+
"category": "papers"
|
| 92 |
+
}'
|
| 93 |
+
```
|
| 94 |
+
|
| 95 |
+
### Ingest from OpenAlex
|
| 96 |
+
|
| 97 |
+
```bash
|
| 98 |
+
curl -X POST "http://localhost:8000/ingest/openalex" \
|
| 99 |
+
-H "Content-Type: application/json" \
|
| 100 |
+
-d '{
|
| 101 |
+
"query": "retrieval augmented generation",
|
| 102 |
+
"max_docs": 5,
|
| 103 |
+
"namespace": "dev",
|
| 104 |
+
"mailto": "you@example.com"
|
| 105 |
+
}'
|
| 106 |
+
```
|
| 107 |
+
|
| 108 |
+
### Ingest from Wikipedia
|
| 109 |
+
|
| 110 |
+
```bash
|
| 111 |
+
curl -X POST "http://localhost:8000/ingest/wiki" \
|
| 112 |
+
-H "Content-Type: application/json" \
|
| 113 |
+
-d '{
|
| 114 |
+
"titles": ["Retrieval-augmented generation", "Vector database"],
|
| 115 |
+
"namespace": "dev"
|
| 116 |
+
}'
|
| 117 |
+
```
|
| 118 |
+
|
| 119 |
+
### Manual text upload
|
| 120 |
+
|
| 121 |
+
```bash
|
| 122 |
+
curl -X POST "http://localhost:8000/documents/upload-text" \
|
| 123 |
+
-H "Content-Type: application/json" \
|
| 124 |
+
-d '{
|
| 125 |
+
"title": "My manual note",
|
| 126 |
+
"source": "manual",
|
| 127 |
+
"text": "This is some example text describing RAG pipelines...",
|
| 128 |
+
"namespace": "dev",
|
| 129 |
+
"metadata": {
|
| 130 |
+
"url": "https://example.com/my-note"
|
| 131 |
+
}
|
| 132 |
+
}'
|
| 133 |
+
```
|
| 134 |
+
|
| 135 |
+
### Search
|
| 136 |
+
|
| 137 |
+
```bash
|
| 138 |
+
curl -X POST "http://localhost:8000/search" \
|
| 139 |
+
-H "Content-Type: application/json" \
|
| 140 |
+
-H "X-API-Key: $API_KEY" \ # only if API_KEY is enabled
|
| 141 |
+
-d '{
|
| 142 |
+
"query": "what is RAG",
|
| 143 |
+
"top_k": 5,
|
| 144 |
+
"namespace": "dev",
|
| 145 |
+
"filters": {"source": "arxiv"}
|
| 146 |
+
}'
|
| 147 |
+
```
|
| 148 |
+
|
| 149 |
+
### Document stats
|
| 150 |
+
|
| 151 |
+
```bash
|
| 152 |
+
curl "http://localhost:8000/documents/stats?namespace=dev"
|
| 153 |
+
```
|
| 154 |
+
|
| 155 |
+
### Chat (non-streaming)
|
| 156 |
+
|
| 157 |
+
```bash
|
| 158 |
+
curl -X POST "http://localhost:8000/chat" \
|
| 159 |
+
-H "Content-Type: application/json" \
|
| 160 |
+
-H "X-API-Key: $API_KEY" \ # only if API_KEY is enabled
|
| 161 |
+
-d '{
|
| 162 |
+
"query": "What is retrieval-augmented generation?",
|
| 163 |
+
"namespace": "dev",
|
| 164 |
+
"top_k": 5,
|
| 165 |
+
"use_web_fallback": true,
|
| 166 |
+
"min_score": 0.25,
|
| 167 |
+
"max_web_results": 5,
|
| 168 |
+
"chat_history": [
|
| 169 |
+
{"role": "user", "content": "You are helping me understand RAG."}
|
| 170 |
+
]
|
| 171 |
+
}'
|
| 172 |
+
```
|
| 173 |
+
|
| 174 |
+
Example JSON response:
|
| 175 |
+
|
| 176 |
+
```json
|
| 177 |
+
{
|
| 178 |
+
"answer": "...",
|
| 179 |
+
"sources": [
|
| 180 |
+
{
|
| 181 |
+
"source": "wiki",
|
| 182 |
+
"title": "Retrieval-augmented generation",
|
| 183 |
+
"url": "https://en.wikipedia.org/wiki/...",
|
| 184 |
+
"score": 0.91,
|
| 185 |
+
"chunk_text": "..."
|
| 186 |
+
}
|
| 187 |
+
],
|
| 188 |
+
"timings": {
|
| 189 |
+
"retrieve_ms": 35.2,
|
| 190 |
+
"web_ms": 0.0,
|
| 191 |
+
"generate_ms": 420.7,
|
| 192 |
+
"total_ms": 470.1
|
| 193 |
+
},
|
| 194 |
+
"trace": {
|
| 195 |
+
"langsmith_project": "rag-agent-workbench",
|
| 196 |
+
"trace_enabled": true
|
| 197 |
+
}
|
| 198 |
+
}
|
| 199 |
+
```
|
| 200 |
+
|
| 201 |
+
### Chat (SSE streaming)
|
| 202 |
+
|
| 203 |
+
```bash
|
| 204 |
+
curl -N -X POST "http://localhost:8000/chat/stream" \
|
| 205 |
+
-H "Content-Type: application/json" \
|
| 206 |
+
-H "X-API-Key: $API_KEY" \ # only if API_KEY is enabled
|
| 207 |
+
-d '{
|
| 208 |
+
"query": "Summarise retrieval-augmented generation.",
|
| 209 |
+
"namespace": "dev",
|
| 210 |
+
"top_k": 5,
|
| 211 |
+
"use_web_fallback": true
|
| 212 |
+
}'
|
| 213 |
+
```
|
| 214 |
+
|
| 215 |
+
- The response will be `text/event-stream`.
|
| 216 |
+
- Individual SSE events stream tokens (space-delimited).
|
| 217 |
+
- The final event (`event: end`) includes the full JSON payload as in `/chat`.
|
| 218 |
+
|
| 219 |
+
### Metrics
|
| 220 |
+
|
| 221 |
+
```bash
|
| 222 |
+
curl "http://localhost:8000/metrics"
|
| 223 |
+
```
|
| 224 |
+
|
| 225 |
+
Returns JSON with:
|
| 226 |
+
|
| 227 |
+
- `requests_by_path` and `errors_by_path`
|
| 228 |
+
- `timings` (average and p50/p95 for `retrieve_ms`, `web_ms`, `generate_ms`, `total_ms`)
|
| 229 |
+
- `cache` stats
|
| 230 |
+
- Last 20 timing samples for chat.
|
| 231 |
+
|
| 232 |
+
## Seeding data
|
| 233 |
+
|
| 234 |
+
A helper script is provided to seed the index with multiple arXiv and OpenAlex queries:
|
| 235 |
+
|
| 236 |
+
```bash
|
| 237 |
+
python ../scripts/seed_ingest.py --base-url http://localhost:8000 --namespace dev --mailto you@example.com
|
| 238 |
+
```
|
| 239 |
+
|
| 240 |
+
## Docling integration (external script)
|
| 241 |
+
|
| 242 |
+
Docling is used via a separate script so the backend container stays small. To convert a local PDF and upload it as text:
|
| 243 |
+
|
| 244 |
+
```bash
|
| 245 |
+
cd scripts
|
| 246 |
+
pip install docling
|
| 247 |
+
python docling_convert_and_upload.py \
|
| 248 |
+
--pdf-path /path/to/file.pdf \
|
| 249 |
+
--backend-url http://localhost:8000 \
|
| 250 |
+
--namespace dev \
|
| 251 |
+
--title "My PDF via Docling" \
|
| 252 |
+
--source docling
|
| 253 |
+
```
|
| 254 |
+
|
| 255 |
+
## Deploy Backend on Hugging Face Spaces (Docker)
|
| 256 |
+
|
| 257 |
+
1. **Create a new Space**
|
| 258 |
+
- Go to Hugging Face → *New Space*.
|
| 259 |
+
- Choose:
|
| 260 |
+
- **SDK**: Docker
|
| 261 |
+
- **Space name**: e.g. `your-name/rag-agent-workbench-backend`.
|
| 262 |
+
- Point the Space to this repository and configure it to use the `backend/` subdirectory (or copy `backend/Dockerfile` to the root if you prefer).
|
| 263 |
+
|
| 264 |
+
2. **Environment variables / secrets**
|
| 265 |
+
|
| 266 |
+
In the Space settings, configure the following (as “Secrets” where appropriate):
|
| 267 |
+
|
| 268 |
+
Required:
|
| 269 |
+
|
| 270 |
+
- `PINECONE_API_KEY`
|
| 271 |
+
- `PINECONE_HOST`
|
| 272 |
+
- `PINECONE_INDEX_NAME`
|
| 273 |
+
- `PINECONE_NAMESPACE`
|
| 274 |
+
- `PINECONE_TEXT_FIELD=content` (or your actual text field)
|
| 275 |
+
- `GROQ_API_KEY`
|
| 276 |
+
- `GROQ_BASE_URL` (optional, defaults to `https://api.groq.com/openai/v1`)
|
| 277 |
+
- `GROQ_MODEL` (optional, defaults to `llama-3.1-8b-instant`)
|
| 278 |
+
|
| 279 |
+
Optional:
|
| 280 |
+
|
| 281 |
+
- `TAVILY_API_KEY` (web search fallback for `/chat`)
|
| 282 |
+
- `LANGCHAIN_TRACING_V2`
|
| 283 |
+
- `LANGCHAIN_API_KEY`
|
| 284 |
+
- `LANGCHAIN_PROJECT`
|
| 285 |
+
- `API_KEY` (to protect `/ingest/*`, `/documents/*`, `/search`, `/chat*`)
|
| 286 |
+
- `ALLOWED_ORIGINS` (e.g. your Streamlit frontend origin)
|
| 287 |
+
- `RATE_LIMIT_ENABLED` and `CACHE_ENABLED` (rarely need to change from defaults)
|
| 288 |
+
|
| 289 |
+
3. **Ports and startup**
|
| 290 |
+
|
| 291 |
+
- The Docker image exposes port **7860** by default.
|
| 292 |
+
- Hugging Face Spaces sets the `PORT` environment variable; the `CMD` honours it:
|
| 293 |
+
- `uvicorn app.main:app --host 0.0.0.0 --port ${PORT:-7860}`
|
| 294 |
+
- On successful startup, logs include:
|
| 295 |
+
- `Starting on port=<port> hf_spaces_mode=<bool>`
|
| 296 |
+
|
| 297 |
+
4. **Verify**
|
| 298 |
+
|
| 299 |
+
- Open your Space URL:
|
| 300 |
+
- `https://<your-space>.hf.space/docs` – interactive API docs.
|
| 301 |
+
- `https://<your-space>.hf.space/health` – health check.
|
| 302 |
+
- If `API_KEY` is set, test protected endpoints using `X-API-Key`.
|
| 303 |
+
|
| 304 |
+
## Deploy Frontend on Streamlit Community Cloud
|
| 305 |
+
|
| 306 |
+
1. **Prepare the repo**
|
| 307 |
+
|
| 308 |
+
- The minimal Streamlit frontend lives under `frontend/app.py`.
|
| 309 |
+
- Root `requirements.txt` includes:
|
| 310 |
+
- `streamlit`
|
| 311 |
+
- `httpx`
|
| 312 |
+
|
| 313 |
+
2. **Create Streamlit app**
|
| 314 |
+
|
| 315 |
+
- Go to Streamlit Community Cloud and create a new app.
|
| 316 |
+
- Point it at this repository.
|
| 317 |
+
- Set the main file to `frontend/app.py`.
|
| 318 |
+
|
| 319 |
+
3. **Configure Streamlit secrets**
|
| 320 |
+
|
| 321 |
+
- In the Streamlit app settings, configure *Secrets* (YAML):
|
| 322 |
+
|
| 323 |
+
```yaml
|
| 324 |
+
BACKEND_BASE_URL: "https://<your-backend-space>.hf.space"
|
| 325 |
+
API_KEY: "your-backend-api-key" # only if backend API_KEY is set
|
| 326 |
+
```
|
| 327 |
+
|
| 328 |
+
- **Do not** commit secrets into the repo.
|
| 329 |
+
|
| 330 |
+
4. **Verify connectivity**
|
| 331 |
+
|
| 332 |
+
- Open the Streamlit app.
|
| 333 |
+
- In the sidebar “Connectivity” panel:
|
| 334 |
+
- Confirm the backend URL is correct.
|
| 335 |
+
- Click “Ping /health” to verify backend connectivity.
|
| 336 |
+
- Use the chat panel to send a question:
|
| 337 |
+
- The app will call `/chat` on the backend and display answer, timings, and sources.
|
| 338 |
+
|
| 339 |
+
## Local Test Checklist – Work Package C
|
| 340 |
+
|
| 341 |
+
1. **Configure environment**
|
| 342 |
+
|
| 343 |
+
- Set `PINECONE_*` variables for an integrated embeddings index.
|
| 344 |
+
- Set `GROQ_API_KEY` (and optionally override `GROQ_BASE_URL`, `GROQ_MODEL`).
|
| 345 |
+
- Optionally set `TAVILY_API_KEY` for web fallback.
|
| 346 |
+
- Optionally enable LangSmith:
|
| 347 |
+
- `LANGCHAIN_TRACING_V2=true`
|
| 348 |
+
- `LANGCHAIN_API_KEY=...`
|
| 349 |
+
- `LANGCHAIN_PROJECT=rag-agent-workbench`
|
| 350 |
+
- Optionally set:
|
| 351 |
+
- `API_KEY` for basic protection.
|
| 352 |
+
- `ALLOWED_ORIGINS` if you are calling from a browser origin.
|
| 353 |
+
- `RATE_LIMIT_ENABLED` / `CACHE_ENABLED` for tuning.
|
| 354 |
+
|
| 355 |
+
2. **Start the backend**
|
| 356 |
+
|
| 357 |
+
```bash
|
| 358 |
+
cd backend
|
| 359 |
+
uvicorn app.main:app --reload --port 8000
|
| 360 |
+
```
|
| 361 |
+
|
| 362 |
+
3. **Ingest data**
|
| 363 |
+
|
| 364 |
+
- Quick Wikipedia smoke test (also see `scripts/smoke_chat.py`):
|
| 365 |
+
|
| 366 |
+
```bash
|
| 367 |
+
python ../scripts/smoke_chat.py --backend-url http://localhost:8000 --namespace dev
|
| 368 |
+
```
|
| 369 |
+
|
| 370 |
+
4. **Test `/search`**
|
| 371 |
+
|
| 372 |
+
```bash
|
| 373 |
+
curl -X POST "http://localhost:8000/search" \
|
| 374 |
+
-H "Content-Type: application/json" \
|
| 375 |
+
-H "X-API-Key: $API_KEY" \ # only if API_KEY is enabled
|
| 376 |
+
-d '{"query": "what is RAG", "namespace": "dev", "top_k": 5}'
|
| 377 |
+
```
|
| 378 |
+
|
| 379 |
+
5. **Test `/chat`**
|
| 380 |
+
|
| 381 |
+
- Use the curl example above or run:
|
| 382 |
+
|
| 383 |
+
```bash
|
| 384 |
+
curl -X POST "http://localhost:8000/chat" \
|
| 385 |
+
-H "Content-Type: application/json" \
|
| 386 |
+
-H "X-API-Key: $API_KEY" \ # only if API_KEY is enabled
|
| 387 |
+
-d '{"query": "What is retrieval-augmented generation?", "namespace": "dev"}'
|
| 388 |
+
```
|
| 389 |
+
|
| 390 |
+
6. **Test `/chat` with web fallback**
|
| 391 |
+
|
| 392 |
+
- Requires `TAVILY_API_KEY`:
|
| 393 |
+
|
| 394 |
+
```bash
|
| 395 |
+
python ../scripts/smoke_chat_web.py --backend-url http://localhost:8000 --namespace dev
|
| 396 |
+
```
|
| 397 |
+
|
| 398 |
+
7. **Inspect `/metrics`**
|
| 399 |
+
|
| 400 |
+
```bash
|
| 401 |
+
curl "http://localhost:8000/metrics"
|
| 402 |
+
```
|
| 403 |
+
|
| 404 |
+
- Confirm:
|
| 405 |
+
- Request counts are increasing.
|
| 406 |
+
- Timing stats (`average_ms`, `p50_ms`, `p95_ms`) are populated after several `/chat` calls.
|
| 407 |
+
- Cache hit/miss counters change when repeating identical `/search` or `/chat` requests.
|
| 408 |
+
|
| 409 |
+
8. **Run the benchmark script**
|
| 410 |
+
|
| 411 |
+
- From the repo root:
|
| 412 |
+
|
| 413 |
+
```bash
|
| 414 |
+
python scripts/bench_local.py \
|
| 415 |
+
--backend-url http://localhost:8000 \
|
| 416 |
+
--namespace dev \
|
| 417 |
+
--concurrency 10 \
|
| 418 |
+
--requests 50 \
|
| 419 |
+
--api-key "$API_KEY"
|
| 420 |
+
```
|
| 421 |
+
|
| 422 |
+
- Review reported:
|
| 423 |
+
- Average latency.
|
| 424 |
+
- p50 / p95 latency.
|
| 425 |
+
- Error rate.
|
| 426 |
+
|
| 427 |
+
9. **Optional: Test Streamlit frontend locally**
|
| 428 |
+
|
| 429 |
+
- Install root requirements:
|
| 430 |
+
|
| 431 |
+
```bash
|
| 432 |
+
pip install -r requirements.txt
|
| 433 |
+
```
|
| 434 |
+
|
| 435 |
+
- Run:
|
| 436 |
+
|
| 437 |
+
```bash
|
| 438 |
+
streamlit run frontend/app.py
|
| 439 |
+
```
|
| 440 |
+
|
| 441 |
+
- Configure `BACKEND_BASE_URL` and `API_KEY` via environment or `.streamlit/secrets.toml`, and verify chat works end-to-end.
|
backend/app/__pycache__/main.cpython-313.pyc
ADDED
|
Binary file (2.45 kB). View file
|
|
|
backend/app/core/__pycache__/cache.cpython-313.pyc
ADDED
|
Binary file (5.12 kB). View file
|
|
|
backend/app/core/__pycache__/config.cpython-313.pyc
ADDED
|
Binary file (4.09 kB). View file
|
|
|
backend/app/core/__pycache__/errors.cpython-313.pyc
ADDED
|
Binary file (4.89 kB). View file
|
|
|
backend/app/core/__pycache__/logging.cpython-313.pyc
ADDED
|
Binary file (1.11 kB). View file
|
|
|
backend/app/core/__pycache__/metrics.cpython-313.pyc
ADDED
|
Binary file (5.86 kB). View file
|
|
|
backend/app/core/__pycache__/rate_limit.cpython-313.pyc
ADDED
|
Binary file (2.53 kB). View file
|
|
|
backend/app/core/__pycache__/runtime.cpython-313.pyc
ADDED
|
Binary file (1.35 kB). View file
|
|
|
backend/app/core/__pycache__/security.cpython-313.pyc
ADDED
|
Binary file (5.04 kB). View file
|
|
|
backend/app/core/__pycache__/tracing.cpython-313.pyc
ADDED
|
Binary file (2.75 kB). View file
|
|
|
backend/app/core/cache.py
ADDED
|
@@ -0,0 +1,162 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
from threading import Lock
|
| 3 |
+
from typing import Any, Dict, Hashable, Optional, Tuple
|
| 4 |
+
|
| 5 |
+
from cachetools import TTLCache
|
| 6 |
+
|
| 7 |
+
from app.core.config import get_settings
|
| 8 |
+
from app.core.logging import get_logger
|
| 9 |
+
|
| 10 |
+
logger = get_logger(__name__)
|
| 11 |
+
|
| 12 |
+
_settings = get_settings()
|
| 13 |
+
_CACHE_ENABLED: bool = getattr(_settings, "CACHE_ENABLED", True)
|
| 14 |
+
|
| 15 |
+
# TTLs are intentionally short and in-code defaults; no env required.
|
| 16 |
+
_SEARCH_TTL_SECONDS = 60
|
| 17 |
+
_CHAT_TTL_SECONDS = 60
|
| 18 |
+
|
| 19 |
+
_search_cache: TTLCache = TTLCache(maxsize=1024, ttl=_SEARCH_TTL_SECONDS)
|
| 20 |
+
_chat_cache: TTLCache = TTLCache(maxsize=512, ttl=_CHAT_TTL_SECONDS)
|
| 21 |
+
|
| 22 |
+
_lock = Lock()
|
| 23 |
+
|
| 24 |
+
_search_hits: int = 0
|
| 25 |
+
_search_misses: int = 0
|
| 26 |
+
_chat_hits: int = 0
|
| 27 |
+
_chat_misses: int = 0
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
def cache_enabled() -> bool:
|
| 31 |
+
return _CACHE_ENABLED
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
def _make_search_key(
|
| 35 |
+
namespace: str,
|
| 36 |
+
query: str,
|
| 37 |
+
top_k: int,
|
| 38 |
+
filters: Optional[Dict[str, Any]],
|
| 39 |
+
) -> Hashable:
|
| 40 |
+
filters_json = (
|
| 41 |
+
json.dumps(filters, sort_keys=True, separators=(",", ":"))
|
| 42 |
+
if filters is not None
|
| 43 |
+
else ""
|
| 44 |
+
)
|
| 45 |
+
return (namespace, query, int(top_k), filters_json)
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
def _make_chat_key(
|
| 49 |
+
namespace: str,
|
| 50 |
+
query: str,
|
| 51 |
+
top_k: int,
|
| 52 |
+
min_score: float,
|
| 53 |
+
use_web_fallback: bool,
|
| 54 |
+
) -> Hashable:
|
| 55 |
+
return (namespace, query, int(top_k), float(min_score), bool(use_web_fallback))
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
def get_search_cached(
|
| 59 |
+
namespace: str,
|
| 60 |
+
query: str,
|
| 61 |
+
top_k: int,
|
| 62 |
+
filters: Optional[Dict[str, Any]],
|
| 63 |
+
) -> Optional[Any]:
|
| 64 |
+
"""Return cached search results or None."""
|
| 65 |
+
global _search_hits, _search_misses
|
| 66 |
+
if not _CACHE_ENABLED:
|
| 67 |
+
return None
|
| 68 |
+
|
| 69 |
+
key = _make_search_key(namespace, query, top_k, filters)
|
| 70 |
+
with _lock:
|
| 71 |
+
if key in _search_cache:
|
| 72 |
+
_search_hits += 1
|
| 73 |
+
logger.info(
|
| 74 |
+
"Search cache hit namespace='%s' query='%s' top_k=%d",
|
| 75 |
+
namespace,
|
| 76 |
+
query,
|
| 77 |
+
top_k,
|
| 78 |
+
)
|
| 79 |
+
return _search_cache[key]
|
| 80 |
+
_search_misses += 1
|
| 81 |
+
logger.info(
|
| 82 |
+
"Search cache miss namespace='%s' query='%s' top_k=%d",
|
| 83 |
+
namespace,
|
| 84 |
+
query,
|
| 85 |
+
top_k,
|
| 86 |
+
)
|
| 87 |
+
return None
|
| 88 |
+
|
| 89 |
+
|
| 90 |
+
def set_search_cached(
|
| 91 |
+
namespace: str,
|
| 92 |
+
query: str,
|
| 93 |
+
top_k: int,
|
| 94 |
+
filters: Optional[Dict[str, Any]],
|
| 95 |
+
value: Any,
|
| 96 |
+
) -> None:
|
| 97 |
+
if not _CACHE_ENABLED:
|
| 98 |
+
return
|
| 99 |
+
key = _make_search_key(namespace, query, top_k, filters)
|
| 100 |
+
with _lock:
|
| 101 |
+
_search_cache[key] = value
|
| 102 |
+
|
| 103 |
+
|
| 104 |
+
def get_chat_cached(
|
| 105 |
+
namespace: str,
|
| 106 |
+
query: str,
|
| 107 |
+
top_k: int,
|
| 108 |
+
min_score: float,
|
| 109 |
+
use_web_fallback: bool,
|
| 110 |
+
) -> Optional[Any]:
|
| 111 |
+
"""Return cached chat response or None.
|
| 112 |
+
|
| 113 |
+
Only used when chat_history is empty.
|
| 114 |
+
"""
|
| 115 |
+
global _chat_hits, _chat_misses
|
| 116 |
+
if not _CACHE_ENABLED:
|
| 117 |
+
return None
|
| 118 |
+
|
| 119 |
+
key = _make_chat_key(namespace, query, top_k, min_score, use_web_fallback)
|
| 120 |
+
with _lock:
|
| 121 |
+
if key in _chat_cache:
|
| 122 |
+
_chat_hits += 1
|
| 123 |
+
logger.info(
|
| 124 |
+
"Chat cache hit namespace='%s' query='%s' top_k=%d",
|
| 125 |
+
namespace,
|
| 126 |
+
query,
|
| 127 |
+
top_k,
|
| 128 |
+
)
|
| 129 |
+
return _chat_cache[key]
|
| 130 |
+
_chat_misses += 1
|
| 131 |
+
logger.info(
|
| 132 |
+
"Chat cache miss namespace='%s' query='%s' top_k=%d",
|
| 133 |
+
namespace,
|
| 134 |
+
query,
|
| 135 |
+
top_k,
|
| 136 |
+
)
|
| 137 |
+
return None
|
| 138 |
+
|
| 139 |
+
|
| 140 |
+
def set_chat_cached(
|
| 141 |
+
namespace: str,
|
| 142 |
+
query: str,
|
| 143 |
+
top_k: int,
|
| 144 |
+
min_score: float,
|
| 145 |
+
use_web_fallback: bool,
|
| 146 |
+
value: Any,
|
| 147 |
+
) -> None:
|
| 148 |
+
if not _CACHE_ENABLED:
|
| 149 |
+
return
|
| 150 |
+
key = _make_chat_key(namespace, query, top_k, min_score, use_web_fallback)
|
| 151 |
+
with _lock:
|
| 152 |
+
_chat_cache[key] = value
|
| 153 |
+
|
| 154 |
+
|
| 155 |
+
def get_cache_stats() -> Dict[str, int]:
|
| 156 |
+
"""Return a snapshot of cache hit/miss counters."""
|
| 157 |
+
return {
|
| 158 |
+
"search_hits": _search_hits,
|
| 159 |
+
"search_misses": _search_misses,
|
| 160 |
+
"chat_hits": _chat_hits,
|
| 161 |
+
"chat_misses": _chat_misses,
|
| 162 |
+
}
|
backend/app/core/config.py
ADDED
|
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from functools import lru_cache
|
| 3 |
+
from typing import Optional
|
| 4 |
+
|
| 5 |
+
from dotenv import load_dotenv
|
| 6 |
+
from pydantic import Field
|
| 7 |
+
from pydantic_settings import BaseSettings, SettingsConfigDict
|
| 8 |
+
|
| 9 |
+
# Load environment variables from a local .env file if present
|
| 10 |
+
load_dotenv()
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
class Settings(BaseSettings):
|
| 14 |
+
"""Application settings loaded from environment variables."""
|
| 15 |
+
|
| 16 |
+
# App
|
| 17 |
+
APP_NAME: str = Field(default="rag-agent-workbench")
|
| 18 |
+
APP_VERSION: str = Field(default="0.1.0")
|
| 19 |
+
|
| 20 |
+
# Pinecone
|
| 21 |
+
PINECONE_API_KEY: str = Field(..., description="Pinecone API key")
|
| 22 |
+
PINECONE_INDEX_NAME: str = Field(
|
| 23 |
+
..., description="Name of the Pinecone index (used for configuration checks)"
|
| 24 |
+
)
|
| 25 |
+
PINECONE_HOST: str = Field(
|
| 26 |
+
..., description="Pinecone index host URL for data-plane operations"
|
| 27 |
+
)
|
| 28 |
+
PINECONE_NAMESPACE: str = Field(
|
| 29 |
+
default="dev", description="Default Pinecone namespace"
|
| 30 |
+
)
|
| 31 |
+
PINECONE_TEXT_FIELD: str = Field(
|
| 32 |
+
default="chunk_text",
|
| 33 |
+
description=(
|
| 34 |
+
"Text field name used by the Pinecone integrated embedding index. "
|
| 35 |
+
"For example, set to 'content' if your index field_map uses that name."
|
| 36 |
+
),
|
| 37 |
+
)
|
| 38 |
+
|
| 39 |
+
# Logging
|
| 40 |
+
LOG_LEVEL: str = Field(default="INFO", description="Application log level")
|
| 41 |
+
|
| 42 |
+
# HTTP client defaults
|
| 43 |
+
HTTP_TIMEOUT_SECONDS: float = Field(
|
| 44 |
+
default=10.0, description="Default timeout for outbound HTTP requests"
|
| 45 |
+
)
|
| 46 |
+
HTTP_MAX_RETRIES: int = Field(
|
| 47 |
+
default=3, description="Max retries for outbound HTTP requests"
|
| 48 |
+
)
|
| 49 |
+
|
| 50 |
+
# Groq / LLM
|
| 51 |
+
GROQ_API_KEY: Optional[str] = Field(
|
| 52 |
+
default=None,
|
| 53 |
+
description="Groq API key (required for /chat endpoints)",
|
| 54 |
+
)
|
| 55 |
+
GROQ_BASE_URL: str = Field(
|
| 56 |
+
default="https://api.groq.com/openai/v1",
|
| 57 |
+
description="Groq OpenAI-compatible base URL",
|
| 58 |
+
)
|
| 59 |
+
GROQ_MODEL: str = Field(
|
| 60 |
+
default="llama-3.1-8b-instant",
|
| 61 |
+
description="Default Groq chat model used for /chat",
|
| 62 |
+
)
|
| 63 |
+
|
| 64 |
+
# Web search / Tavily
|
| 65 |
+
TAVILY_API_KEY: Optional[str] = Field(
|
| 66 |
+
default=None,
|
| 67 |
+
description="Tavily API key for web search fallback (optional)",
|
| 68 |
+
)
|
| 69 |
+
|
| 70 |
+
# RAG defaults
|
| 71 |
+
RAG_DEFAULT_TOP_K: int = Field(
|
| 72 |
+
default=5,
|
| 73 |
+
ge=1,
|
| 74 |
+
le=100,
|
| 75 |
+
description="Default number of documents to retrieve for RAG",
|
| 76 |
+
)
|
| 77 |
+
RAG_MIN_SCORE: float = Field(
|
| 78 |
+
default=0.25,
|
| 79 |
+
ge=0.0,
|
| 80 |
+
le=1.0,
|
| 81 |
+
description="Default minimum relevance score to trust retrieval without web fallback",
|
| 82 |
+
)
|
| 83 |
+
RAG_MAX_WEB_RESULTS: int = Field(
|
| 84 |
+
default=5,
|
| 85 |
+
ge=1,
|
| 86 |
+
le=20,
|
| 87 |
+
description="Maximum number of web search results to fetch when using Tavily",
|
| 88 |
+
)
|
| 89 |
+
|
| 90 |
+
# Operational toggles
|
| 91 |
+
RATE_LIMIT_ENABLED: bool = Field(
|
| 92 |
+
default=True,
|
| 93 |
+
description="Enable SlowAPI rate limiting middleware when true.",
|
| 94 |
+
)
|
| 95 |
+
CACHE_ENABLED: bool = Field(
|
| 96 |
+
default=True,
|
| 97 |
+
description="Enable in-memory TTL caching for /search and /chat when true.",
|
| 98 |
+
)
|
| 99 |
+
|
| 100 |
+
model_config = SettingsConfigDict(
|
| 101 |
+
env_file=".env",
|
| 102 |
+
env_file_encoding="utf-8",
|
| 103 |
+
extra="ignore",
|
| 104 |
+
)
|
| 105 |
+
|
| 106 |
+
|
| 107 |
+
@lru_cache(maxsize=1)
|
| 108 |
+
def get_settings() -> Settings:
|
| 109 |
+
"""Return a cached Settings instance."""
|
| 110 |
+
return Settings() # type: ignore[call-arg]
|
| 111 |
+
|
| 112 |
+
|
| 113 |
+
def get_env_bool(name: str, default: bool = False) -> bool:
|
| 114 |
+
"""Utility to parse boolean flags from environment variables."""
|
| 115 |
+
raw: Optional[str] = os.getenv(name)
|
| 116 |
+
if raw is None:
|
| 117 |
+
return default
|
| 118 |
+
return raw.strip().lower() in {"1", "true", "yes", "on"}
|
backend/app/core/errors.py
ADDED
|
@@ -0,0 +1,83 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import logging
|
| 2 |
+
from typing import Any
|
| 3 |
+
|
| 4 |
+
from fastapi import FastAPI, HTTPException, Request
|
| 5 |
+
from fastapi.exceptions import RequestValidationError
|
| 6 |
+
from fastapi.responses import JSONResponse
|
| 7 |
+
from starlette import status
|
| 8 |
+
|
| 9 |
+
logger = logging.getLogger(__name__)
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
class PineconeIndexConfigError(RuntimeError):
|
| 13 |
+
"""Raised when the Pinecone index is not configured for integrated embeddings."""
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
class UpstreamServiceError(RuntimeError):
|
| 17 |
+
"""Raised when an upstream dependency (LLM, web search, etc.) fails."""
|
| 18 |
+
|
| 19 |
+
def __init__(self, service: str, message: str) -> None:
|
| 20 |
+
self.service = service
|
| 21 |
+
super().__init__(message)
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
def setup_exception_handlers(app: FastAPI) -> None:
|
| 25 |
+
"""Register global exception handlers on the FastAPI app."""
|
| 26 |
+
|
| 27 |
+
@app.exception_handler(PineconeIndexConfigError)
|
| 28 |
+
async def pinecone_index_config_error_handler(
|
| 29 |
+
request: Request, exc: PineconeIndexConfigError
|
| 30 |
+
) -> JSONResponse:
|
| 31 |
+
logger.error("Pinecone index configuration error: %s", exc)
|
| 32 |
+
return JSONResponse(
|
| 33 |
+
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
| 34 |
+
content={"detail": str(exc)},
|
| 35 |
+
)
|
| 36 |
+
|
| 37 |
+
@app.exception_handler(UpstreamServiceError)
|
| 38 |
+
async def upstream_service_error_handler(
|
| 39 |
+
request: Request, exc: UpstreamServiceError
|
| 40 |
+
) -> JSONResponse:
|
| 41 |
+
logger.error("Upstream service error from %s: %s", exc.service, exc)
|
| 42 |
+
return JSONResponse(
|
| 43 |
+
status_code=status.HTTP_502_BAD_GATEWAY,
|
| 44 |
+
content={"detail": str(exc)},
|
| 45 |
+
)
|
| 46 |
+
|
| 47 |
+
@app.exception_handler(RequestValidationError)
|
| 48 |
+
async def validation_exception_handler(
|
| 49 |
+
request: Request, exc: RequestValidationError
|
| 50 |
+
) -> JSONResponse:
|
| 51 |
+
logger.warning("Request validation error: %s", exc)
|
| 52 |
+
return JSONResponse(
|
| 53 |
+
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
|
| 54 |
+
content={"detail": exc.errors()},
|
| 55 |
+
)
|
| 56 |
+
|
| 57 |
+
@app.exception_handler(HTTPException)
|
| 58 |
+
async def http_exception_handler(
|
| 59 |
+
request: Request, exc: HTTPException
|
| 60 |
+
) -> JSONResponse:
|
| 61 |
+
# Let FastAPI-style HTTPException pass through with its status and detail.
|
| 62 |
+
logger.warning(
|
| 63 |
+
"HTTPException raised: status=%s detail=%s",
|
| 64 |
+
exc.status_code,
|
| 65 |
+
exc.detail,
|
| 66 |
+
)
|
| 67 |
+
return JSONResponse(
|
| 68 |
+
status_code=exc.status_code,
|
| 69 |
+
content={"detail": exc.detail},
|
| 70 |
+
)
|
| 71 |
+
|
| 72 |
+
@app.exception_handler(Exception)
|
| 73 |
+
async def generic_exception_handler(
|
| 74 |
+
request: Request, exc: Exception
|
| 75 |
+
) -> JSONResponse:
|
| 76 |
+
logger.exception("Unhandled error", exc_info=exc)
|
| 77 |
+
return JSONResponse(
|
| 78 |
+
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
| 79 |
+
content={"detail": "Internal server error"},
|
| 80 |
+
)
|
| 81 |
+
|
| 82 |
+
|
| 83 |
+
__all__ = ["PineconeIndexConfigError", "setup_exception_handlers"]
|
backend/app/core/logging.py
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import logging
|
| 2 |
+
from typing import Optional
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
def configure_logging(log_level: str) -> None:
|
| 6 |
+
"""Configure root logging for the application.
|
| 7 |
+
|
| 8 |
+
This keeps configuration minimal while ensuring consistent formatting.
|
| 9 |
+
"""
|
| 10 |
+
level = getattr(logging, log_level.upper(), logging.INFO)
|
| 11 |
+
logging.basicConfig(
|
| 12 |
+
level=level,
|
| 13 |
+
format="%(asctime)s | %(levelname)s | %(name)s | %(message)s",
|
| 14 |
+
)
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
def get_logger(name: Optional[str] = None) -> logging.Logger:
|
| 18 |
+
"""Return a module-level logger."""
|
| 19 |
+
return logging.getLogger(name or "app")
|
backend/app/core/metrics.py
ADDED
|
@@ -0,0 +1,129 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from collections import defaultdict, deque
|
| 2 |
+
from threading import Lock
|
| 3 |
+
from time import perf_counter
|
| 4 |
+
from typing import Deque, Dict, List, Mapping, MutableMapping
|
| 5 |
+
|
| 6 |
+
from fastapi import FastAPI, Request
|
| 7 |
+
from fastapi.responses import JSONResponse
|
| 8 |
+
|
| 9 |
+
from app.core.cache import get_cache_stats
|
| 10 |
+
from app.core.logging import get_logger
|
| 11 |
+
|
| 12 |
+
logger = get_logger(__name__)
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
# Request and error counters by path.
|
| 16 |
+
_request_counts: MutableMapping[str, int] = defaultdict(int)
|
| 17 |
+
_error_counts: MutableMapping[str, int] = defaultdict(int)
|
| 18 |
+
|
| 19 |
+
# Timing samples for chat requests: last N samples.
|
| 20 |
+
_TIMING_FIELDS = ["retrieve_ms", "web_ms", "generate_ms", "total_ms"]
|
| 21 |
+
_TIMING_BUFFER_SIZE = 20
|
| 22 |
+
_timing_samples: Deque[Dict[str, float]] = deque(maxlen=_TIMING_BUFFER_SIZE)
|
| 23 |
+
|
| 24 |
+
# Aggregated sums and counts for averages.
|
| 25 |
+
_timing_sums: Dict[str, float] = {f: 0.0 for f in _TIMING_FIELDS}
|
| 26 |
+
_timing_count: int = 0
|
| 27 |
+
|
| 28 |
+
_lock = Lock()
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
async def metrics_middleware(request: Request, call_next):
|
| 32 |
+
"""Middleware capturing request counts and error counts by path."""
|
| 33 |
+
path = request.url.path or "/"
|
| 34 |
+
start = perf_counter()
|
| 35 |
+
try:
|
| 36 |
+
response = await call_next(request)
|
| 37 |
+
except Exception: # noqa: BLE001
|
| 38 |
+
elapsed = (perf_counter() - start) * 1000.0
|
| 39 |
+
with _lock:
|
| 40 |
+
_request_counts[path] += 1
|
| 41 |
+
_error_counts[path] += 1
|
| 42 |
+
logger.exception("Unhandled error for path=%s elapsed_ms=%.2f", path, elapsed)
|
| 43 |
+
raise
|
| 44 |
+
|
| 45 |
+
elapsed = (perf_counter() - start) * 1000.0
|
| 46 |
+
status = response.status_code
|
| 47 |
+
with _lock:
|
| 48 |
+
_request_counts[path] += 1
|
| 49 |
+
if status >= 400:
|
| 50 |
+
_error_counts[path] += 1
|
| 51 |
+
|
| 52 |
+
logger.debug(
|
| 53 |
+
"Request path=%s status=%s elapsed_ms=%.2f", path, status, elapsed
|
| 54 |
+
)
|
| 55 |
+
return response
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
def record_chat_timings(timings: Mapping[str, float]) -> None:
|
| 59 |
+
"""Record timing metrics from a chat request.
|
| 60 |
+
|
| 61 |
+
Expects a mapping with keys retrieve_ms, web_ms, generate_ms, total_ms.
|
| 62 |
+
"""
|
| 63 |
+
global _timing_count
|
| 64 |
+
sample = {field: float(timings.get(field, 0.0)) for field in _TIMING_FIELDS}
|
| 65 |
+
with _lock:
|
| 66 |
+
_timing_samples.append(sample)
|
| 67 |
+
for field, value in sample.items():
|
| 68 |
+
_timing_sums[field] += value
|
| 69 |
+
_timing_count += 1
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
def _percentile(values: List[float], p: float) -> float:
|
| 73 |
+
if not values:
|
| 74 |
+
return 0.0
|
| 75 |
+
values_sorted = sorted(values)
|
| 76 |
+
k = max(0, min(len(values_sorted) - 1, int(round((p / 100.0) * (len(values_sorted) - 1)))))
|
| 77 |
+
return values_sorted[k]
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
def get_metrics_snapshot() -> Dict[str, object]:
|
| 81 |
+
"""Return a stable snapshot of metrics suitable for /metrics responses."""
|
| 82 |
+
with _lock:
|
| 83 |
+
requests_by_path = dict(_request_counts)
|
| 84 |
+
errors_by_path = dict(_error_counts)
|
| 85 |
+
samples = list(_timing_samples)
|
| 86 |
+
sums = dict(_timing_sums)
|
| 87 |
+
count = int(_timing_count)
|
| 88 |
+
|
| 89 |
+
averages: Dict[str, float] = {}
|
| 90 |
+
if count > 0:
|
| 91 |
+
for field in _TIMING_FIELDS:
|
| 92 |
+
averages[field] = sums.get(field, 0.0) / count
|
| 93 |
+
else:
|
| 94 |
+
for field in _TIMING_FIELDS:
|
| 95 |
+
averages[field] = 0.0
|
| 96 |
+
|
| 97 |
+
# Compute percentiles over the last N samples.
|
| 98 |
+
p50: Dict[str, float] = {}
|
| 99 |
+
p95: Dict[str, float] = {}
|
| 100 |
+
if samples:
|
| 101 |
+
for field in _TIMING_FIELDS:
|
| 102 |
+
values = [s.get(field, 0.0) for s in samples]
|
| 103 |
+
p50[field] = _percentile(values, 50.0)
|
| 104 |
+
p95[field] = _percentile(values, 95.0)
|
| 105 |
+
else:
|
| 106 |
+
for field in _TIMING_FIELDS:
|
| 107 |
+
p50[field] = 0.0
|
| 108 |
+
p95[field] = 0.0
|
| 109 |
+
|
| 110 |
+
cache_stats = get_cache_stats()
|
| 111 |
+
|
| 112 |
+
return {
|
| 113 |
+
"requests_by_path": requests_by_path,
|
| 114 |
+
"errors_by_path": errors_by_path,
|
| 115 |
+
"timings": {
|
| 116 |
+
"average_ms": averages,
|
| 117 |
+
"p50_ms": p50,
|
| 118 |
+
"p95_ms": p95,
|
| 119 |
+
},
|
| 120 |
+
"cache": cache_stats,
|
| 121 |
+
"sample_count": count,
|
| 122 |
+
"samples": samples,
|
| 123 |
+
}
|
| 124 |
+
|
| 125 |
+
|
| 126 |
+
def setup_metrics(app: FastAPI) -> None:
|
| 127 |
+
"""Attach metrics middleware to the app."""
|
| 128 |
+
logger.info("Metrics middleware enabled.")
|
| 129 |
+
app.middleware("http")(metrics_middleware)
|
backend/app/core/rate_limit.py
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import Any
|
| 2 |
+
|
| 3 |
+
from fastapi import FastAPI, Request
|
| 4 |
+
from fastapi.responses import JSONResponse
|
| 5 |
+
from slowapi import Limiter
|
| 6 |
+
from slowapi.errors import RateLimitExceeded
|
| 7 |
+
from slowapi.middleware import SlowAPIMiddleware
|
| 8 |
+
from slowapi.util import get_remote_address
|
| 9 |
+
|
| 10 |
+
from app.core.config import get_settings
|
| 11 |
+
from app.core.logging import get_logger
|
| 12 |
+
|
| 13 |
+
logger = get_logger(__name__)
|
| 14 |
+
|
| 15 |
+
# Global limiter instance used for decorators.
|
| 16 |
+
limiter = Limiter(key_func=get_remote_address)
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
def setup_rate_limiter(app: FastAPI) -> None:
|
| 20 |
+
"""Configure SlowAPI rate limiting middleware and handlers.
|
| 21 |
+
|
| 22 |
+
Limits are enabled/disabled via Settings.RATE_LIMIT_ENABLED.
|
| 23 |
+
"""
|
| 24 |
+
settings = get_settings()
|
| 25 |
+
if not getattr(settings, "RATE_LIMIT_ENABLED", True):
|
| 26 |
+
logger.info("Rate limiting is disabled via settings.")
|
| 27 |
+
return
|
| 28 |
+
|
| 29 |
+
logger.info("Rate limiting enabled with SlowAPI.")
|
| 30 |
+
|
| 31 |
+
app.state.limiter = limiter # type: ignore[attr-defined]
|
| 32 |
+
|
| 33 |
+
@app.exception_handler(RateLimitExceeded)
|
| 34 |
+
async def rate_limit_exceeded_handler( # type: ignore[no-redef]
|
| 35 |
+
request: Request,
|
| 36 |
+
exc: RateLimitExceeded,
|
| 37 |
+
) -> JSONResponse:
|
| 38 |
+
retry_after: str | None = None
|
| 39 |
+
try:
|
| 40 |
+
retry_after = exc.headers.get("Retry-After") # type: ignore[assignment]
|
| 41 |
+
except Exception: # noqa: BLE001
|
| 42 |
+
retry_after = None
|
| 43 |
+
|
| 44 |
+
logger.warning(
|
| 45 |
+
"Rate limit exceeded path=%s client=%s limit=%s",
|
| 46 |
+
request.url.path,
|
| 47 |
+
get_remote_address(request),
|
| 48 |
+
exc.detail,
|
| 49 |
+
)
|
| 50 |
+
content: dict[str, Any] = {
|
| 51 |
+
"detail": "Rate limit exceeded. Please slow down your requests.",
|
| 52 |
+
}
|
| 53 |
+
if retry_after is not None:
|
| 54 |
+
content["retry_after"] = retry_after
|
| 55 |
+
return JSONResponse(status_code=429, content=content)
|
| 56 |
+
|
| 57 |
+
# Attach SlowAPI middleware
|
| 58 |
+
app.add_middleware(SlowAPIMiddleware)
|
backend/app/core/runtime.py
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
|
| 3 |
+
from app.core.logging import get_logger
|
| 4 |
+
|
| 5 |
+
logger = get_logger(__name__)
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
def get_port(default: int = 7860) -> int:
|
| 9 |
+
"""Return the port the application should bind to.
|
| 10 |
+
|
| 11 |
+
- Uses the PORT environment variable when set (e.g. on Hugging Face Spaces).
|
| 12 |
+
- Falls back to the provided default (7860 for Spaces compatibility).
|
| 13 |
+
- Logs a message indicating the chosen port and whether we appear to be
|
| 14 |
+
running inside a Hugging Face Spaces environment.
|
| 15 |
+
"""
|
| 16 |
+
raw = os.getenv("PORT")
|
| 17 |
+
try:
|
| 18 |
+
port = int(raw) if raw else default
|
| 19 |
+
except (TypeError, ValueError):
|
| 20 |
+
port = default
|
| 21 |
+
|
| 22 |
+
# Heuristic to detect HF Spaces: SPACE_ID or SPACE_REPO_ID are usually set.
|
| 23 |
+
hf_spaces_mode = bool(os.getenv("SPACE_ID") or os.getenv("SPACE_REPO_ID"))
|
| 24 |
+
|
| 25 |
+
logger.info(
|
| 26 |
+
"Starting on port=%d hf_spaces_mode=%s",
|
| 27 |
+
port,
|
| 28 |
+
hf_spaces_mode,
|
| 29 |
+
)
|
| 30 |
+
|
| 31 |
+
return port
|
backend/app/core/security.py
ADDED
|
@@ -0,0 +1,116 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from typing import Iterable, List, Optional
|
| 3 |
+
|
| 4 |
+
from fastapi import FastAPI, Request
|
| 5 |
+
from fastapi.responses import JSONResponse
|
| 6 |
+
from fastapi.middleware.cors import CORSMiddleware
|
| 7 |
+
from starlette.middleware.base import BaseHTTPMiddleware
|
| 8 |
+
|
| 9 |
+
from app.core.logging import get_logger
|
| 10 |
+
|
| 11 |
+
logger = get_logger(__name__)
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def _get_allowed_origins() -> List[str]:
|
| 15 |
+
raw = os.getenv("ALLOWED_ORIGINS")
|
| 16 |
+
if not raw:
|
| 17 |
+
# Default: permissive for local development and simple frontends.
|
| 18 |
+
origins = ["*"]
|
| 19 |
+
else:
|
| 20 |
+
origins = [item.strip() for item in raw.split(",") if item.strip()]
|
| 21 |
+
if not origins:
|
| 22 |
+
origins = ["*"]
|
| 23 |
+
return origins
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
class APIKeyMiddleware(BaseHTTPMiddleware):
|
| 27 |
+
"""Optional API key protection for selected endpoints.
|
| 28 |
+
|
| 29 |
+
When the API_KEY environment variable is set, this middleware enforces the
|
| 30 |
+
presence of an `X-API-Key` header with a matching value for:
|
| 31 |
+
|
| 32 |
+
- /ingest/*
|
| 33 |
+
- /documents/*
|
| 34 |
+
- /chat*
|
| 35 |
+
- /search
|
| 36 |
+
|
| 37 |
+
The following paths remain public regardless of API_KEY:
|
| 38 |
+
|
| 39 |
+
- /health
|
| 40 |
+
- /docs
|
| 41 |
+
- /openapi.json
|
| 42 |
+
- /redoc
|
| 43 |
+
- /metrics
|
| 44 |
+
|
| 45 |
+
When API_KEY is not set, the middleware is not installed and the API is open.
|
| 46 |
+
"""
|
| 47 |
+
|
| 48 |
+
def __init__(self, app: FastAPI, api_key: str) -> None: # type: ignore[override]
|
| 49 |
+
super().__init__(app)
|
| 50 |
+
self.api_key = api_key
|
| 51 |
+
|
| 52 |
+
self._protected_prefixes: List[str] = [
|
| 53 |
+
"/ingest",
|
| 54 |
+
"/documents",
|
| 55 |
+
"/chat",
|
| 56 |
+
"/search",
|
| 57 |
+
]
|
| 58 |
+
self._public_prefixes: List[str] = [
|
| 59 |
+
"/health",
|
| 60 |
+
"/docs",
|
| 61 |
+
"/openapi.json",
|
| 62 |
+
"/redoc",
|
| 63 |
+
"/metrics",
|
| 64 |
+
]
|
| 65 |
+
|
| 66 |
+
async def dispatch(self, request: Request, call_next): # type: ignore[override]
|
| 67 |
+
path = request.url.path or "/"
|
| 68 |
+
|
| 69 |
+
# Public endpoints stay open.
|
| 70 |
+
if any(path.startswith(prefix) for prefix in self._public_prefixes):
|
| 71 |
+
return await call_next(request)
|
| 72 |
+
|
| 73 |
+
# Only enforce for protected prefixes.
|
| 74 |
+
if not any(path.startswith(prefix) for prefix in self._protected_prefixes):
|
| 75 |
+
return await call_next(request)
|
| 76 |
+
|
| 77 |
+
header_key: Optional[str] = request.headers.get("X-API-Key")
|
| 78 |
+
if not header_key or header_key != self.api_key:
|
| 79 |
+
logger.warning("Rejected request with missing/invalid API key path=%s", path)
|
| 80 |
+
return JSONResponse(
|
| 81 |
+
status_code=401,
|
| 82 |
+
content={
|
| 83 |
+
"detail": (
|
| 84 |
+
"Missing or invalid API key. Provide X-API-Key header with "
|
| 85 |
+
"a valid key to access this endpoint."
|
| 86 |
+
)
|
| 87 |
+
},
|
| 88 |
+
)
|
| 89 |
+
|
| 90 |
+
return await call_next(request)
|
| 91 |
+
|
| 92 |
+
|
| 93 |
+
def configure_security(app: FastAPI) -> None:
|
| 94 |
+
"""Configure CORS and optional API key protection on the FastAPI app."""
|
| 95 |
+
# CORS
|
| 96 |
+
origins = _get_allowed_origins()
|
| 97 |
+
app.add_middleware(
|
| 98 |
+
CORSMiddleware,
|
| 99 |
+
allow_origins=origins,
|
| 100 |
+
allow_credentials=True,
|
| 101 |
+
allow_methods=["*"],
|
| 102 |
+
allow_headers=["*"],
|
| 103 |
+
)
|
| 104 |
+
logger.info("CORS configured allow_origins=%s", origins)
|
| 105 |
+
|
| 106 |
+
# Optional API key middleware
|
| 107 |
+
api_key = os.getenv("API_KEY")
|
| 108 |
+
if not api_key:
|
| 109 |
+
logger.warning(
|
| 110 |
+
"API key disabled; protected endpoints are open. "
|
| 111 |
+
"Set API_KEY environment variable to enable X-API-Key protection."
|
| 112 |
+
)
|
| 113 |
+
return
|
| 114 |
+
|
| 115 |
+
logger.info("API key protection enabled for ingest, documents, search, and chat.")
|
| 116 |
+
app.add_middleware(APIKeyMiddleware, api_key=api_key)
|
backend/app/core/tracing.py
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from functools import lru_cache
|
| 3 |
+
from typing import Any, Dict, List, Optional
|
| 4 |
+
|
| 5 |
+
from app.core.config import get_env_bool
|
| 6 |
+
from app.core.logging import get_logger
|
| 7 |
+
|
| 8 |
+
logger = get_logger(__name__)
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
def is_tracing_enabled() -> bool:
|
| 12 |
+
"""Return True if LangSmith / LangChain tracing is enabled via environment."""
|
| 13 |
+
tracing_flag = get_env_bool("LANGCHAIN_TRACING_V2", False)
|
| 14 |
+
api_key = os.getenv("LANGCHAIN_API_KEY")
|
| 15 |
+
return tracing_flag and bool(api_key)
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
def get_langsmith_project() -> Optional[str]:
|
| 19 |
+
"""Return the LangSmith project name, if configured."""
|
| 20 |
+
return os.getenv("LANGCHAIN_PROJECT")
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
@lru_cache(maxsize=1)
|
| 24 |
+
def get_tracing_callbacks() -> List[Any]:
|
| 25 |
+
"""Return LangChain callback handlers for tracing, if available.
|
| 26 |
+
|
| 27 |
+
When LANGCHAIN_TRACING_V2=true and LANGCHAIN_API_KEY is set, this will
|
| 28 |
+
attempt to create a LangChainTracer instance. If tracing is not enabled
|
| 29 |
+
or the tracer is unavailable, an empty list is returned.
|
| 30 |
+
"""
|
| 31 |
+
if not is_tracing_enabled():
|
| 32 |
+
logger.info(
|
| 33 |
+
"LangSmith tracing disabled (set LANGCHAIN_TRACING_V2=true and "
|
| 34 |
+
"LANGCHAIN_API_KEY to enable)."
|
| 35 |
+
)
|
| 36 |
+
return []
|
| 37 |
+
|
| 38 |
+
try:
|
| 39 |
+
from langchain_core.tracers import LangChainTracer # type: ignore[import]
|
| 40 |
+
except Exception as exc: # noqa: BLE001
|
| 41 |
+
logger.warning(
|
| 42 |
+
"LangSmith tracing requested but LangChainTracer is unavailable: %s", exc
|
| 43 |
+
)
|
| 44 |
+
return []
|
| 45 |
+
|
| 46 |
+
project = get_langsmith_project()
|
| 47 |
+
tracer = LangChainTracer(project_name=project)
|
| 48 |
+
logger.info(
|
| 49 |
+
"LangSmith tracing enabled for project='%s'",
|
| 50 |
+
project or "(default)",
|
| 51 |
+
)
|
| 52 |
+
return [tracer]
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
def get_tracing_response_metadata() -> Dict[str, Any]:
|
| 56 |
+
"""Return trace metadata suitable for API responses."""
|
| 57 |
+
return {
|
| 58 |
+
"langsmith_project": get_langsmith_project(),
|
| 59 |
+
"trace_enabled": is_tracing_enabled(),
|
| 60 |
+
}
|
backend/app/main.py
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import FastAPI
|
| 2 |
+
from fastapi.responses import ORJSONResponse
|
| 3 |
+
|
| 4 |
+
from app.core.config import get_settings
|
| 5 |
+
from app.core.errors import PineconeIndexConfigError, setup_exception_handlers
|
| 6 |
+
from app.core.logging import configure_logging, get_logger
|
| 7 |
+
from app.core.metrics import setup_metrics
|
| 8 |
+
from app.core.rate_limit import setup_rate_limiter
|
| 9 |
+
from app.core.runtime import get_port
|
| 10 |
+
from app.core.security import configure_security
|
| 11 |
+
from app.routers.documents import router as documents_router
|
| 12 |
+
from app.routers.health import router as health_router
|
| 13 |
+
from app.routers.ingest import router as ingest_router
|
| 14 |
+
from app.routers.search import router as search_router
|
| 15 |
+
from app.routers.chat import router as chat_router
|
| 16 |
+
from app.routers.metrics import router as metrics_router
|
| 17 |
+
from app.services.pinecone_store import init_pinecone
|
| 18 |
+
|
| 19 |
+
settings = get_settings()
|
| 20 |
+
configure_logging(settings.LOG_LEVEL)
|
| 21 |
+
logger = get_logger(__name__)
|
| 22 |
+
|
| 23 |
+
# Log runtime port / environment context at import time for easier diagnostics.
|
| 24 |
+
get_port()
|
| 25 |
+
|
| 26 |
+
app = FastAPI(
|
| 27 |
+
title="RAG Agent Workbench API",
|
| 28 |
+
version=settings.APP_VERSION,
|
| 29 |
+
default_response_class=ORJSONResponse,
|
| 30 |
+
docs_url="/docs",
|
| 31 |
+
redoc_url="/redoc",
|
| 32 |
+
openapi_url="/openapi.json",
|
| 33 |
+
)
|
| 34 |
+
|
| 35 |
+
# Core app configuration
|
| 36 |
+
configure_security(app)
|
| 37 |
+
setup_rate_limiter(app)
|
| 38 |
+
setup_metrics(app)
|
| 39 |
+
|
| 40 |
+
# Register routers with tags and ensure they are included in the schema
|
| 41 |
+
app.include_router(health_router, tags=["health"])
|
| 42 |
+
app.include_router(ingest_router, tags=["ingest"])
|
| 43 |
+
app.include_router(search_router, tags=["search"])
|
| 44 |
+
app.include_router(documents_router, tags=["documents"])
|
| 45 |
+
app.include_router(chat_router, tags=["chat"])
|
| 46 |
+
app.include_router(metrics_router, tags=["metrics"])
|
| 47 |
+
|
| 48 |
+
# Register exception handlers
|
| 49 |
+
setup_exception_handlers(app)
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
@app.on_event("startup")
|
| 53 |
+
async def startup_event() -> None:
|
| 54 |
+
"""Application startup hook."""
|
| 55 |
+
try:
|
| 56 |
+
init_pinecone(settings)
|
| 57 |
+
logger.info("Pinecone initialisation completed")
|
| 58 |
+
except PineconeIndexConfigError:
|
| 59 |
+
# Let the exception handler and FastAPI/uvicorn deal with the error.
|
| 60 |
+
# Re-raise to fail fast on misconfiguration.
|
| 61 |
+
raise
|
backend/app/routers/__pycache__/chat.cpython-313.pyc
ADDED
|
Binary file (11.4 kB). View file
|
|
|
backend/app/routers/__pycache__/documents.cpython-313.pyc
ADDED
|
Binary file (4.17 kB). View file
|
|
|
backend/app/routers/__pycache__/health.cpython-313.pyc
ADDED
|
Binary file (792 Bytes). View file
|
|
|
backend/app/routers/__pycache__/ingest.cpython-313.pyc
ADDED
|
Binary file (8.27 kB). View file
|
|
|
backend/app/routers/__pycache__/metrics.cpython-313.pyc
ADDED
|
Binary file (789 Bytes). View file
|
|
|
backend/app/routers/__pycache__/search.cpython-313.pyc
ADDED
|
Binary file (3.32 kB). View file
|
|
|
backend/app/routers/chat.py
ADDED
|
@@ -0,0 +1,280 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
from time import perf_counter
|
| 3 |
+
from typing import AsyncGenerator, Dict, List, Optional
|
| 4 |
+
|
| 5 |
+
from fastapi import APIRouter, Request
|
| 6 |
+
from fastapi.concurrency import run_in_threadpool
|
| 7 |
+
from fastapi.responses import StreamingResponse
|
| 8 |
+
|
| 9 |
+
from app.core.cache import cache_enabled, get_chat_cached, set_chat_cached
|
| 10 |
+
from app.core.config import get_settings
|
| 11 |
+
from app.core.logging import get_logger
|
| 12 |
+
from app.core.metrics import record_chat_timings
|
| 13 |
+
from app.core.rate_limit import limiter
|
| 14 |
+
from app.core.tracing import (
|
| 15 |
+
get_tracing_callbacks,
|
| 16 |
+
get_tracing_response_metadata,
|
| 17 |
+
)
|
| 18 |
+
from app.schemas.chat import (
|
| 19 |
+
ChatRequest,
|
| 20 |
+
ChatResponse,
|
| 21 |
+
ChatTimings,
|
| 22 |
+
ChatTraceMetadata,
|
| 23 |
+
SourceHit,
|
| 24 |
+
)
|
| 25 |
+
from app.services.chat.graph import get_chat_graph
|
| 26 |
+
|
| 27 |
+
logger = get_logger(__name__)
|
| 28 |
+
|
| 29 |
+
router = APIRouter(tags=["chat"])
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
def _build_chat_response(state: Dict) -> ChatResponse:
|
| 33 |
+
"""Convert graph state into a ChatResponse model."""
|
| 34 |
+
timings_raw = state.get("timings") or {}
|
| 35 |
+
timings = ChatTimings(
|
| 36 |
+
retrieve_ms=float(timings_raw.get("retrieve_ms") or 0.0),
|
| 37 |
+
web_ms=float(timings_raw.get("web_ms") or 0.0),
|
| 38 |
+
generate_ms=float(timings_raw.get("generate_ms") or 0.0),
|
| 39 |
+
total_ms=float(timings_raw.get("total_ms") or 0.0),
|
| 40 |
+
)
|
| 41 |
+
|
| 42 |
+
sources_raw: List[Dict] = (state.get("retrieved") or []) + (
|
| 43 |
+
state.get("web_results") or []
|
| 44 |
+
)
|
| 45 |
+
sources: List[SourceHit] = [
|
| 46 |
+
SourceHit(
|
| 47 |
+
source=str(src.get("source") or "unknown"),
|
| 48 |
+
title=str(src.get("title") or ""),
|
| 49 |
+
url=str(src.get("url") or ""),
|
| 50 |
+
score=float(src.get("score") or 0.0),
|
| 51 |
+
chunk_text=str(src.get("chunk_text") or ""),
|
| 52 |
+
)
|
| 53 |
+
for src in sources_raw
|
| 54 |
+
]
|
| 55 |
+
|
| 56 |
+
trace_meta = ChatTraceMetadata(**get_tracing_response_metadata())
|
| 57 |
+
|
| 58 |
+
return ChatResponse(
|
| 59 |
+
answer=str(state.get("answer") or ""),
|
| 60 |
+
sources=sources,
|
| 61 |
+
timings=timings,
|
| 62 |
+
trace=trace_meta,
|
| 63 |
+
)
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
@router.post(
|
| 67 |
+
"/chat",
|
| 68 |
+
response_model=ChatResponse,
|
| 69 |
+
summary="Production-style RAG chat endpoint",
|
| 70 |
+
description=(
|
| 71 |
+
"Runs an agentic RAG flow using Pinecone retrieval, optional Tavily web "
|
| 72 |
+
"fallback, and a Groq-backed LLM to generate an answer. "
|
| 73 |
+
"Returns the answer, source snippets, timings, and LangSmith trace metadata."
|
| 74 |
+
),
|
| 75 |
+
)
|
| 76 |
+
@limiter.limit("30/minute")
|
| 77 |
+
async def chat(request: Request, payload: ChatRequest) -> ChatResponse: # noqa: ARG001
|
| 78 |
+
settings = get_settings()
|
| 79 |
+
namespace = payload.namespace or settings.PINECONE_NAMESPACE
|
| 80 |
+
|
| 81 |
+
logger.info(
|
| 82 |
+
"Received /chat request namespace='%s' top_k=%d use_web_fallback=%s",
|
| 83 |
+
namespace,
|
| 84 |
+
payload.top_k,
|
| 85 |
+
payload.use_web_fallback,
|
| 86 |
+
)
|
| 87 |
+
|
| 88 |
+
use_cache = cache_enabled() and not payload.chat_history
|
| 89 |
+
cached_response: Optional[ChatResponse] = None
|
| 90 |
+
if use_cache:
|
| 91 |
+
cached = get_chat_cached(
|
| 92 |
+
namespace=namespace,
|
| 93 |
+
query=payload.query,
|
| 94 |
+
top_k=payload.top_k,
|
| 95 |
+
min_score=payload.min_score,
|
| 96 |
+
use_web_fallback=payload.use_web_fallback,
|
| 97 |
+
)
|
| 98 |
+
if cached is not None:
|
| 99 |
+
logger.info(
|
| 100 |
+
"Serving /chat response from cache namespace='%s' query='%s'",
|
| 101 |
+
namespace,
|
| 102 |
+
payload.query,
|
| 103 |
+
)
|
| 104 |
+
cached_response = cached
|
| 105 |
+
|
| 106 |
+
if cached_response is not None:
|
| 107 |
+
# Still record timings and metrics based on the cached response.
|
| 108 |
+
record_chat_timings(
|
| 109 |
+
{
|
| 110 |
+
"retrieve_ms": cached_response.timings.retrieve_ms,
|
| 111 |
+
"web_ms": cached_response.timings.web_ms,
|
| 112 |
+
"generate_ms": cached_response.timings.generate_ms,
|
| 113 |
+
"total_ms": cached_response.timings.total_ms,
|
| 114 |
+
}
|
| 115 |
+
)
|
| 116 |
+
return cached_response
|
| 117 |
+
|
| 118 |
+
graph = get_chat_graph()
|
| 119 |
+
callbacks = get_tracing_callbacks()
|
| 120 |
+
config: Dict = {}
|
| 121 |
+
if callbacks:
|
| 122 |
+
config["callbacks"] = callbacks
|
| 123 |
+
|
| 124 |
+
initial_state = {
|
| 125 |
+
"query": payload.query,
|
| 126 |
+
"namespace": namespace,
|
| 127 |
+
"top_k": payload.top_k,
|
| 128 |
+
"use_web_fallback": payload.use_web_fallback,
|
| 129 |
+
"min_score": payload.min_score,
|
| 130 |
+
"max_web_results": payload.max_web_results,
|
| 131 |
+
"chat_history": [
|
| 132 |
+
{"role": msg.role, "content": msg.content}
|
| 133 |
+
for msg in (payload.chat_history or [])
|
| 134 |
+
],
|
| 135 |
+
}
|
| 136 |
+
|
| 137 |
+
start_total = perf_counter()
|
| 138 |
+
|
| 139 |
+
def _invoke_graph() -> Dict:
|
| 140 |
+
return graph.invoke(initial_state, config=config)
|
| 141 |
+
|
| 142 |
+
# Exceptions (including UpstreamServiceError) are handled by global handlers.
|
| 143 |
+
state = await run_in_threadpool(_invoke_graph)
|
| 144 |
+
|
| 145 |
+
total_ms = (perf_counter() - start_total) * 1000.0
|
| 146 |
+
timings = state.get("timings") or {}
|
| 147 |
+
timings["total_ms"] = total_ms
|
| 148 |
+
state["timings"] = timings
|
| 149 |
+
|
| 150 |
+
web_used = bool(state.get("web_fallback_used"))
|
| 151 |
+
top_score = float(state.get("top_score") or 0.0)
|
| 152 |
+
logger.info(
|
| 153 |
+
"Chat request completed namespace='%s' web_fallback_used=%s "
|
| 154 |
+
"retrieve_ms=%.2f web_ms=%.2f generate_ms=%.2f total_ms=%.2f top_score=%.4f",
|
| 155 |
+
namespace,
|
| 156 |
+
web_used,
|
| 157 |
+
float(timings.get("retrieve_ms") or 0.0),
|
| 158 |
+
float(timings.get("web_ms") or 0.0),
|
| 159 |
+
float(timings.get("generate_ms") or 0.0),
|
| 160 |
+
float(timings.get("total_ms") or 0.0),
|
| 161 |
+
top_score,
|
| 162 |
+
)
|
| 163 |
+
|
| 164 |
+
response_model = _build_chat_response(state)
|
| 165 |
+
|
| 166 |
+
# Record metrics based on this response.
|
| 167 |
+
record_chat_timings(
|
| 168 |
+
{
|
| 169 |
+
"retrieve_ms": response_model.timings.retrieve_ms,
|
| 170 |
+
"web_ms": response_model.timings.web_ms,
|
| 171 |
+
"generate_ms": response_model.timings.generate_ms,
|
| 172 |
+
"total_ms": response_model.timings.total_ms,
|
| 173 |
+
}
|
| 174 |
+
)
|
| 175 |
+
|
| 176 |
+
# Cache only when chat_history is empty.
|
| 177 |
+
if use_cache:
|
| 178 |
+
set_chat_cached(
|
| 179 |
+
namespace=namespace,
|
| 180 |
+
query=payload.query,
|
| 181 |
+
top_k=payload.top_k,
|
| 182 |
+
min_score=payload.min_score,
|
| 183 |
+
use_web_fallback=payload.use_web_fallback,
|
| 184 |
+
value=response_model,
|
| 185 |
+
)
|
| 186 |
+
|
| 187 |
+
return response_model
|
| 188 |
+
|
| 189 |
+
|
| 190 |
+
@router.post(
|
| 191 |
+
"/chat/stream",
|
| 192 |
+
summary="Streaming RAG chat endpoint (SSE)",
|
| 193 |
+
description=(
|
| 194 |
+
"Same behaviour as /chat but streams the answer over Server-Sent Events "
|
| 195 |
+
"(SSE). The final event includes the full JSON payload with answer, sources, "
|
| 196 |
+
"timings, and trace metadata."
|
| 197 |
+
),
|
| 198 |
+
)
|
| 199 |
+
@limiter.limit("30/minute")
|
| 200 |
+
async def chat_stream(request: Request, payload: ChatRequest) -> StreamingResponse: # noqa: ARG001
|
| 201 |
+
settings = get_settings()
|
| 202 |
+
namespace = payload.namespace or settings.PINECONE_NAMESPACE
|
| 203 |
+
|
| 204 |
+
logger.info(
|
| 205 |
+
"Received /chat/stream request namespace='%s' top_k=%d use_web_fallback=%s",
|
| 206 |
+
namespace,
|
| 207 |
+
payload.top_k,
|
| 208 |
+
payload.use_web_fallback,
|
| 209 |
+
)
|
| 210 |
+
|
| 211 |
+
graph = get_chat_graph()
|
| 212 |
+
callbacks = get_tracing_callbacks()
|
| 213 |
+
config: Dict = {}
|
| 214 |
+
if callbacks:
|
| 215 |
+
config["callbacks"] = callbacks
|
| 216 |
+
|
| 217 |
+
initial_state = {
|
| 218 |
+
"query": payload.query,
|
| 219 |
+
"namespace": namespace,
|
| 220 |
+
"top_k": payload.top_k,
|
| 221 |
+
"use_web_fallback": payload.use_web_fallback,
|
| 222 |
+
"min_score": payload.min_score,
|
| 223 |
+
"max_web_results": payload.max_web_results,
|
| 224 |
+
"chat_history": [
|
| 225 |
+
{"role": msg.role, "content": msg.content}
|
| 226 |
+
for msg in (payload.chat_history or [])
|
| 227 |
+
],
|
| 228 |
+
}
|
| 229 |
+
|
| 230 |
+
start_total = perf_counter()
|
| 231 |
+
|
| 232 |
+
def _invoke_graph() -> Dict:
|
| 233 |
+
return graph.invoke(initial_state, config=config)
|
| 234 |
+
|
| 235 |
+
# Exceptions (including UpstreamServiceError) are handled by global handlers.
|
| 236 |
+
state = await run_in_threadpool(_invoke_graph)
|
| 237 |
+
|
| 238 |
+
total_ms = (perf_counter() - start_total) * 1000.0
|
| 239 |
+
timings = state.get("timings") or {}
|
| 240 |
+
timings["total_ms"] = total_ms
|
| 241 |
+
state["timings"] = timings
|
| 242 |
+
|
| 243 |
+
web_used = bool(state.get("web_fallback_used"))
|
| 244 |
+
top_score = float(state.get("top_score") or 0.0)
|
| 245 |
+
logger.info(
|
| 246 |
+
"Streaming chat completed namespace='%s' web_fallback_used=%s "
|
| 247 |
+
"retrieve_ms=%.2f web_ms=%.2f generate_ms=%.2f total_ms=%.2f top_score=%.4f",
|
| 248 |
+
namespace,
|
| 249 |
+
web_used,
|
| 250 |
+
float(timings.get("retrieve_ms") or 0.0),
|
| 251 |
+
float(timings.get("web_ms") or 0.0),
|
| 252 |
+
float(timings.get("generate_ms") or 0.0),
|
| 253 |
+
float(timings.get("total_ms") or 0.0),
|
| 254 |
+
top_score,
|
| 255 |
+
)
|
| 256 |
+
|
| 257 |
+
response_model = _build_chat_response(state)
|
| 258 |
+
answer_text = response_model.answer
|
| 259 |
+
|
| 260 |
+
# Record metrics based on this response as well.
|
| 261 |
+
record_chat_timings(
|
| 262 |
+
{
|
| 263 |
+
"retrieve_ms": response_model.timings.retrieve_ms,
|
| 264 |
+
"web_ms": response_model.timings.web_ms,
|
| 265 |
+
"generate_ms": response_model.timings.generate_ms,
|
| 266 |
+
"total_ms": response_model.timings.total_ms,
|
| 267 |
+
}
|
| 268 |
+
)
|
| 269 |
+
|
| 270 |
+
async def event_generator() -> AsyncGenerator[str, None]:
|
| 271 |
+
# Stream the answer token-by-token (space-delimited) as simple SSE events.
|
| 272 |
+
for token in answer_text.split():
|
| 273 |
+
yield f"data: {token}\n\n"
|
| 274 |
+
|
| 275 |
+
# Send a final event containing the full JSON payload for clients that
|
| 276 |
+
# want metadata and sources.
|
| 277 |
+
final_payload = response_model.model_dump()
|
| 278 |
+
yield f"event: end\ndata: {json.dumps(final_payload)}\n\n"
|
| 279 |
+
|
| 280 |
+
return StreamingResponse(event_generator(), media_type="text/event-stream")
|
backend/app/routers/documents.py
ADDED
|
@@ -0,0 +1,100 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import Any, Dict, List
|
| 2 |
+
|
| 3 |
+
from fastapi import APIRouter, Query
|
| 4 |
+
from fastapi.concurrency import run_in_threadpool
|
| 5 |
+
from langchain_core.documents import Document
|
| 6 |
+
|
| 7 |
+
from app.core.config import get_settings
|
| 8 |
+
from app.core.logging import get_logger
|
| 9 |
+
from app.schemas.documents import (
|
| 10 |
+
DocumentsStatsResponse,
|
| 11 |
+
NamespaceStat,
|
| 12 |
+
UploadTextRequest,
|
| 13 |
+
UploadTextResponse,
|
| 14 |
+
)
|
| 15 |
+
from app.services import chunking as chunking_service
|
| 16 |
+
from app.services import dedupe as dedupe_service
|
| 17 |
+
from app.services.normalize import make_doc_id, normalize_text, is_valid_document
|
| 18 |
+
from app.services.pinecone_store import describe_index_stats, upsert_records
|
| 19 |
+
|
| 20 |
+
logger = get_logger(__name__)
|
| 21 |
+
|
| 22 |
+
router = APIRouter(prefix="/documents", tags=["documents"])
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
@router.post(
|
| 26 |
+
"/upload-text",
|
| 27 |
+
response_model=UploadTextResponse,
|
| 28 |
+
summary="Upload raw text or Docling output",
|
| 29 |
+
description=(
|
| 30 |
+
"Accepts manual text uploads or Docling-converted content, normalizes and "
|
| 31 |
+
"chunks the text, and upserts it into Pinecone."
|
| 32 |
+
),
|
| 33 |
+
)
|
| 34 |
+
async def upload_text(payload: UploadTextRequest) -> UploadTextResponse:
|
| 35 |
+
settings = get_settings()
|
| 36 |
+
namespace = payload.namespace or settings.PINECONE_NAMESPACE
|
| 37 |
+
|
| 38 |
+
normalized = normalize_text(payload.text)
|
| 39 |
+
if not is_valid_document(normalized):
|
| 40 |
+
logger.info(
|
| 41 |
+
"Skipping manual upload for title='%s' due to insufficient length (len=%d)",
|
| 42 |
+
payload.title,
|
| 43 |
+
len(normalized),
|
| 44 |
+
)
|
| 45 |
+
return UploadTextResponse(
|
| 46 |
+
namespace=namespace,
|
| 47 |
+
source=payload.source,
|
| 48 |
+
ingested_documents=0,
|
| 49 |
+
ingested_chunks=0,
|
| 50 |
+
)
|
| 51 |
+
|
| 52 |
+
metadata: Dict[str, Any] = payload.metadata.copy() if payload.metadata else {}
|
| 53 |
+
url = metadata.get("url", "")
|
| 54 |
+
published = metadata.get("published", "")
|
| 55 |
+
|
| 56 |
+
doc_id = make_doc_id(source=payload.source, title=payload.title, url=url)
|
| 57 |
+
metadata.update(
|
| 58 |
+
{
|
| 59 |
+
"title": payload.title,
|
| 60 |
+
"source": payload.source,
|
| 61 |
+
"url": url,
|
| 62 |
+
"published": published,
|
| 63 |
+
"doc_id": doc_id,
|
| 64 |
+
}
|
| 65 |
+
)
|
| 66 |
+
|
| 67 |
+
document = Document(page_content=normalized, metadata=metadata)
|
| 68 |
+
records = chunking_service.documents_to_records([document])
|
| 69 |
+
records = dedupe_service.dedupe_records(records)
|
| 70 |
+
|
| 71 |
+
total_upserted = await run_in_threadpool(upsert_records, namespace, records)
|
| 72 |
+
|
| 73 |
+
return UploadTextResponse(
|
| 74 |
+
namespace=namespace,
|
| 75 |
+
source=payload.source,
|
| 76 |
+
ingested_documents=1,
|
| 77 |
+
ingested_chunks=total_upserted,
|
| 78 |
+
)
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
@router.get(
|
| 82 |
+
"/stats",
|
| 83 |
+
response_model=DocumentsStatsResponse,
|
| 84 |
+
summary="Get document statistics",
|
| 85 |
+
description="Returns vector counts per namespace from the configured Pinecone index.",
|
| 86 |
+
)
|
| 87 |
+
async def documents_stats(
|
| 88 |
+
namespace: str | None = Query(
|
| 89 |
+
default=None,
|
| 90 |
+
description="Optional namespace filter; if omitted, stats for all namespaces are returned",
|
| 91 |
+
),
|
| 92 |
+
) -> DocumentsStatsResponse:
|
| 93 |
+
raw_stats = await run_in_threadpool(describe_index_stats, namespace)
|
| 94 |
+
|
| 95 |
+
stats: Dict[str, NamespaceStat] = {
|
| 96 |
+
ns_name: NamespaceStat(vector_count=ns_info.get("vector_count", 0))
|
| 97 |
+
for ns_name, ns_info in raw_stats.items()
|
| 98 |
+
}
|
| 99 |
+
|
| 100 |
+
return DocumentsStatsResponse(namespaces=stats)
|
backend/app/routers/health.py
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import APIRouter
|
| 2 |
+
|
| 3 |
+
from app.core.config import get_settings
|
| 4 |
+
|
| 5 |
+
router = APIRouter(tags=["health"])
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
@router.get(
|
| 9 |
+
"/health",
|
| 10 |
+
summary="Health check",
|
| 11 |
+
description="Returns service status, name, and version.",
|
| 12 |
+
)
|
| 13 |
+
async def health() -> dict:
|
| 14 |
+
settings = get_settings()
|
| 15 |
+
return {
|
| 16 |
+
"status": "ok",
|
| 17 |
+
"service": settings.APP_NAME,
|
| 18 |
+
"version": settings.APP_VERSION,
|
| 19 |
+
}
|
backend/app/routers/ingest.py
ADDED
|
@@ -0,0 +1,194 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import Any, Dict, List
|
| 2 |
+
from collections import Counter
|
| 3 |
+
|
| 4 |
+
import httpx
|
| 5 |
+
from fastapi import APIRouter, HTTPException, Request
|
| 6 |
+
from fastapi.concurrency import run_in_threadpool
|
| 7 |
+
from langchain_core.documents import Document
|
| 8 |
+
|
| 9 |
+
from app.core.config import get_settings
|
| 10 |
+
from app.core.logging import get_logger
|
| 11 |
+
from app.core.rate_limit import limiter
|
| 12 |
+
from app.schemas.ingest import (
|
| 13 |
+
ArxivIngestRequest,
|
| 14 |
+
IngestResponse,
|
| 15 |
+
OpenAlexIngestRequest,
|
| 16 |
+
WikiIngestRequest,
|
| 17 |
+
)
|
| 18 |
+
from app.services import dedupe as dedupe_service
|
| 19 |
+
from app.services import chunking as chunking_service
|
| 20 |
+
from app.services.ingestors.arxiv import fetch_arxiv_documents
|
| 21 |
+
from app.services.ingestors.openalex import fetch_openalex_documents
|
| 22 |
+
from app.services.ingestors.wiki import fetch_wiki_documents
|
| 23 |
+
from app.services.pinecone_store import upsert_records
|
| 24 |
+
|
| 25 |
+
logger = get_logger(__name__)
|
| 26 |
+
|
| 27 |
+
router = APIRouter(prefix="/ingest", tags=["ingest"])
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
async def _process_and_upsert(
|
| 31 |
+
documents: List[Document],
|
| 32 |
+
namespace: str,
|
| 33 |
+
source: str,
|
| 34 |
+
details: dict | None = None,
|
| 35 |
+
) -> IngestResponse:
|
| 36 |
+
"""Shared helper to chunk, dedupe and upsert documents."""
|
| 37 |
+
if not documents:
|
| 38 |
+
return IngestResponse(
|
| 39 |
+
namespace=namespace,
|
| 40 |
+
source=source,
|
| 41 |
+
ingested_documents=0,
|
| 42 |
+
ingested_chunks=0,
|
| 43 |
+
skipped_documents=0,
|
| 44 |
+
details=details or {"reason": "no_documents_after_filtering"},
|
| 45 |
+
)
|
| 46 |
+
|
| 47 |
+
records = chunking_service.documents_to_records(documents)
|
| 48 |
+
records = dedupe_service.dedupe_records(records)
|
| 49 |
+
|
| 50 |
+
total_upserted = await run_in_threadpool(upsert_records, namespace, records)
|
| 51 |
+
|
| 52 |
+
return IngestResponse(
|
| 53 |
+
namespace=namespace,
|
| 54 |
+
source=source,
|
| 55 |
+
ingested_documents=len(documents),
|
| 56 |
+
ingested_chunks=total_upserted,
|
| 57 |
+
skipped_documents=0,
|
| 58 |
+
details=details,
|
| 59 |
+
)
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
@router.post(
|
| 63 |
+
"/arxiv",
|
| 64 |
+
response_model=IngestResponse,
|
| 65 |
+
summary="Ingest documents from arXiv",
|
| 66 |
+
description="Fetches recent arXiv entries for a query and upserts them into Pinecone.",
|
| 67 |
+
)
|
| 68 |
+
@limiter.limit("10/minute")
|
| 69 |
+
async def ingest_arxiv(request: Request, payload: ArxivIngestRequest) -> IngestResponse: # noqa: ARG001
|
| 70 |
+
settings = get_settings()
|
| 71 |
+
namespace = payload.namespace or settings.PINECONE_NAMESPACE
|
| 72 |
+
max_docs = min(payload.max_docs, 20)
|
| 73 |
+
|
| 74 |
+
logger.info(
|
| 75 |
+
"Starting arXiv ingestion query='%s' max_docs=%d namespace='%s'",
|
| 76 |
+
payload.query,
|
| 77 |
+
max_docs,
|
| 78 |
+
namespace,
|
| 79 |
+
)
|
| 80 |
+
|
| 81 |
+
try:
|
| 82 |
+
documents = await fetch_arxiv_documents(
|
| 83 |
+
query=payload.query,
|
| 84 |
+
max_results=max_docs,
|
| 85 |
+
category=payload.category,
|
| 86 |
+
)
|
| 87 |
+
except (httpx.HTTPStatusError, httpx.RequestError) as exc:
|
| 88 |
+
status = None
|
| 89 |
+
reason = ""
|
| 90 |
+
url = None
|
| 91 |
+
if isinstance(exc, httpx.HTTPStatusError):
|
| 92 |
+
if exc.response is not None:
|
| 93 |
+
status = exc.response.status_code
|
| 94 |
+
reason = exc.response.reason_phrase
|
| 95 |
+
url = str(exc.response.url)
|
| 96 |
+
if hasattr(exc, "request") and getattr(exc, "request") is not None and url is None:
|
| 97 |
+
try:
|
| 98 |
+
url = str(exc.request.url)
|
| 99 |
+
except Exception: # noqa: BLE001
|
| 100 |
+
url = None
|
| 101 |
+
|
| 102 |
+
logger.error(
|
| 103 |
+
"Upstream arXiv error (url=%s): %s",
|
| 104 |
+
url or "unknown",
|
| 105 |
+
exc,
|
| 106 |
+
)
|
| 107 |
+
status_display = status if status is not None else "unknown"
|
| 108 |
+
detail = f"Upstream arXiv error: {status_display} {reason}".strip()
|
| 109 |
+
raise HTTPException(
|
| 110 |
+
status_code=502,
|
| 111 |
+
detail=detail,
|
| 112 |
+
) from exc
|
| 113 |
+
|
| 114 |
+
return await _process_and_upsert(documents, namespace=namespace, source="arxiv")
|
| 115 |
+
|
| 116 |
+
|
| 117 |
+
@router.post(
|
| 118 |
+
"/openalex",
|
| 119 |
+
response_model=IngestResponse,
|
| 120 |
+
summary="Ingest documents from OpenAlex",
|
| 121 |
+
description="Fetches works from OpenAlex for a query and upserts them into Pinecone.",
|
| 122 |
+
)
|
| 123 |
+
@limiter.limit("10/minute")
|
| 124 |
+
async def ingest_openalex(request: Request, payload: OpenAlexIngestRequest) -> IngestResponse: # noqa: ARG001
|
| 125 |
+
settings = get_settings()
|
| 126 |
+
namespace = payload.namespace or settings.PINECONE_NAMESPACE
|
| 127 |
+
max_docs = min(payload.max_docs, 20)
|
| 128 |
+
|
| 129 |
+
logger.info(
|
| 130 |
+
"Starting OpenAlex ingestion query='%s' max_docs=%d namespace='%s'",
|
| 131 |
+
payload.query,
|
| 132 |
+
max_docs,
|
| 133 |
+
namespace,
|
| 134 |
+
)
|
| 135 |
+
|
| 136 |
+
try:
|
| 137 |
+
documents = await fetch_openalex_documents(
|
| 138 |
+
query=payload.query,
|
| 139 |
+
max_results=max_docs,
|
| 140 |
+
mailto=payload.mailto,
|
| 141 |
+
)
|
| 142 |
+
except (httpx.HTTPStatusError, httpx.RequestError) as exc:
|
| 143 |
+
logger.error("Upstream OpenAlex error: %s", exc)
|
| 144 |
+
raise HTTPException(
|
| 145 |
+
status_code=502,
|
| 146 |
+
detail="Upstream OpenAlex error: unable to retrieve content. "
|
| 147 |
+
"Try again later.",
|
| 148 |
+
) from exc
|
| 149 |
+
|
| 150 |
+
return await _process_and_upsert(documents, namespace=namespace, source="openalex")
|
| 151 |
+
|
| 152 |
+
|
| 153 |
+
@router.post(
|
| 154 |
+
"/wiki",
|
| 155 |
+
response_model=IngestResponse,
|
| 156 |
+
summary="Ingest documents from Wikipedia",
|
| 157 |
+
description=(
|
| 158 |
+
"Fetches articles from Wikipedia using the REST API with Action API fallback "
|
| 159 |
+
"and upserts them into Pinecone."
|
| 160 |
+
),
|
| 161 |
+
)
|
| 162 |
+
@limiter.limit("10/minute")
|
| 163 |
+
async def ingest_wiki(request: Request, payload: WikiIngestRequest) -> IngestResponse: # noqa: ARG001
|
| 164 |
+
settings = get_settings()
|
| 165 |
+
namespace = payload.namespace or settings.PINECONE_NAMESPACE
|
| 166 |
+
|
| 167 |
+
titles = payload.titles[:20]
|
| 168 |
+
logger.info(
|
| 169 |
+
"Starting Wikipedia ingestion titles=%d namespace='%s'",
|
| 170 |
+
len(titles),
|
| 171 |
+
namespace,
|
| 172 |
+
)
|
| 173 |
+
|
| 174 |
+
try:
|
| 175 |
+
documents = await fetch_wiki_documents(titles=titles)
|
| 176 |
+
except (httpx.HTTPStatusError, httpx.RequestError) as exc:
|
| 177 |
+
logger.error("Upstream Wikimedia error: %s", exc)
|
| 178 |
+
raise HTTPException(
|
| 179 |
+
status_code=502,
|
| 180 |
+
detail=(
|
| 181 |
+
"Upstream Wikimedia error: unable to retrieve content. "
|
| 182 |
+
"Try again later or use Action API fallback."
|
| 183 |
+
),
|
| 184 |
+
) from exc
|
| 185 |
+
|
| 186 |
+
# Summarise which backend was used (REST vs Action API) for debugging.
|
| 187 |
+
backend_counts: Dict[str, int] = Counter(
|
| 188 |
+
doc.metadata.get("wikimedia_backend", "unknown") for doc in documents
|
| 189 |
+
)
|
| 190 |
+
details: Dict[str, Any] = {"wikimedia_backend_counts": dict(backend_counts)}
|
| 191 |
+
|
| 192 |
+
return await _process_and_upsert(
|
| 193 |
+
documents, namespace=namespace, source="wiki", details=details
|
| 194 |
+
)
|
backend/app/routers/metrics.py
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import APIRouter
|
| 2 |
+
|
| 3 |
+
from app.core.metrics import get_metrics_snapshot
|
| 4 |
+
|
| 5 |
+
router = APIRouter(tags=["metrics"])
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
@router.get(
|
| 9 |
+
"/metrics",
|
| 10 |
+
summary="In-memory metrics snapshot",
|
| 11 |
+
description=(
|
| 12 |
+
"Returns request and error counts by path, timing statistics for chat "
|
| 13 |
+
"requests (average and p50/p95), cache hit/miss counters, and the last "
|
| 14 |
+
"20 timing samples."
|
| 15 |
+
),
|
| 16 |
+
)
|
| 17 |
+
async def metrics() -> dict:
|
| 18 |
+
return get_metrics_snapshot()
|
backend/app/routers/search.py
ADDED
|
@@ -0,0 +1,90 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import Any, Dict, List
|
| 2 |
+
|
| 3 |
+
from fastapi import APIRouter, Request
|
| 4 |
+
from fastapi.concurrency import run_in_threadpool
|
| 5 |
+
|
| 6 |
+
from app.core.cache import get_search_cached, set_search_cached
|
| 7 |
+
from app.core.config import get_settings
|
| 8 |
+
from app.core.logging import get_logger
|
| 9 |
+
from app.core.rate_limit import limiter
|
| 10 |
+
from app.schemas.search import SearchHit, SearchRequest, SearchResponse
|
| 11 |
+
from app.services.pinecone_store import search as pinecone_search
|
| 12 |
+
|
| 13 |
+
logger = get_logger(__name__)
|
| 14 |
+
|
| 15 |
+
router = APIRouter(tags=["search"])
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
@router.post(
|
| 19 |
+
"/search",
|
| 20 |
+
response_model=SearchResponse,
|
| 21 |
+
summary="Semantic search over ingested documents",
|
| 22 |
+
description=(
|
| 23 |
+
"Performs integrated embedding search over documents stored in Pinecone and "
|
| 24 |
+
"returns the top matching chunks."
|
| 25 |
+
),
|
| 26 |
+
)
|
| 27 |
+
@limiter.limit("60/minute")
|
| 28 |
+
async def search(request: Request, payload: SearchRequest) -> SearchResponse: # noqa: ARG001
|
| 29 |
+
settings = get_settings()
|
| 30 |
+
namespace = payload.namespace or settings.PINECONE_NAMESPACE
|
| 31 |
+
text_field = settings.PINECONE_TEXT_FIELD
|
| 32 |
+
|
| 33 |
+
logger.info(
|
| 34 |
+
"Received search request namespace='%s' top_k=%d",
|
| 35 |
+
namespace,
|
| 36 |
+
payload.top_k,
|
| 37 |
+
)
|
| 38 |
+
|
| 39 |
+
cached = get_search_cached(
|
| 40 |
+
namespace=namespace,
|
| 41 |
+
query=payload.query,
|
| 42 |
+
top_k=payload.top_k,
|
| 43 |
+
filters=payload.filters,
|
| 44 |
+
)
|
| 45 |
+
if cached is not None:
|
| 46 |
+
hits_raw = cached
|
| 47 |
+
else:
|
| 48 |
+
hits_raw: List[Dict[str, Any]] = await run_in_threadpool(
|
| 49 |
+
pinecone_search,
|
| 50 |
+
namespace,
|
| 51 |
+
payload.query,
|
| 52 |
+
payload.top_k,
|
| 53 |
+
payload.filters,
|
| 54 |
+
None,
|
| 55 |
+
)
|
| 56 |
+
set_search_cached(
|
| 57 |
+
namespace=namespace,
|
| 58 |
+
query=payload.query,
|
| 59 |
+
top_k=payload.top_k,
|
| 60 |
+
filters=payload.filters,
|
| 61 |
+
value=hits_raw,
|
| 62 |
+
)
|
| 63 |
+
|
| 64 |
+
hits: List[SearchHit] = []
|
| 65 |
+
for hit in hits_raw:
|
| 66 |
+
hit_id = hit.get("_id") or hit.get("id") or ""
|
| 67 |
+
score = float(hit.get("_score") or hit.get("score") or 0.0)
|
| 68 |
+
raw_fields: Dict[str, Any] = hit.get("fields") or {}
|
| 69 |
+
|
| 70 |
+
# Map the configured Pinecone text field back to a stable 'chunk_text' key
|
| 71 |
+
returned_text = raw_fields.get(text_field, "")
|
| 72 |
+
fields: Dict[str, Any] = dict(raw_fields)
|
| 73 |
+
if text_field in fields and text_field != "chunk_text":
|
| 74 |
+
fields.pop(text_field, None)
|
| 75 |
+
fields["chunk_text"] = returned_text
|
| 76 |
+
|
| 77 |
+
hits.append(
|
| 78 |
+
SearchHit(
|
| 79 |
+
id=hit_id,
|
| 80 |
+
score=score,
|
| 81 |
+
fields=fields,
|
| 82 |
+
)
|
| 83 |
+
)
|
| 84 |
+
|
| 85 |
+
return SearchResponse(
|
| 86 |
+
namespace=namespace,
|
| 87 |
+
query=payload.query,
|
| 88 |
+
top_k=payload.top_k,
|
| 89 |
+
hits=hits,
|
| 90 |
+
)
|
backend/app/schemas/__pycache__/chat.cpython-313.pyc
ADDED
|
Binary file (4.89 kB). View file
|
|
|
backend/app/schemas/__pycache__/documents.cpython-313.pyc
ADDED
|
Binary file (1.97 kB). View file
|
|
|
backend/app/schemas/__pycache__/ingest.cpython-313.pyc
ADDED
|
Binary file (2.57 kB). View file
|
|
|
backend/app/schemas/__pycache__/search.cpython-313.pyc
ADDED
|
Binary file (1.7 kB). View file
|
|
|
backend/app/schemas/chat.py
ADDED
|
@@ -0,0 +1,128 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import List, Literal, Optional
|
| 2 |
+
|
| 3 |
+
from pydantic import BaseModel, Field
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
class ChatMessage(BaseModel):
|
| 7 |
+
role: Literal["user", "assistant"] = Field(
|
| 8 |
+
...,
|
| 9 |
+
description="Role of the message author (user or assistant).",
|
| 10 |
+
)
|
| 11 |
+
content: str = Field(..., description="Message text content.")
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
class ChatRequest(BaseModel):
|
| 15 |
+
query: str = Field(..., description="User query to be answered.")
|
| 16 |
+
namespace: Optional[str] = Field(
|
| 17 |
+
default=None,
|
| 18 |
+
description=(
|
| 19 |
+
"Target Pinecone namespace. Defaults to the configured "
|
| 20 |
+
"PINECONE_NAMESPACE when omitted."
|
| 21 |
+
),
|
| 22 |
+
)
|
| 23 |
+
top_k: int = Field(
|
| 24 |
+
default=5,
|
| 25 |
+
ge=1,
|
| 26 |
+
le=100,
|
| 27 |
+
description="Maximum number of retrieved document chunks.",
|
| 28 |
+
)
|
| 29 |
+
use_web_fallback: bool = Field(
|
| 30 |
+
default=True,
|
| 31 |
+
description=(
|
| 32 |
+
"Whether to fall back to web search when retrieval is weak. "
|
| 33 |
+
"Requires a configured Tavily API key."
|
| 34 |
+
),
|
| 35 |
+
)
|
| 36 |
+
min_score: float = Field(
|
| 37 |
+
default=0.25,
|
| 38 |
+
ge=0.0,
|
| 39 |
+
le=1.0,
|
| 40 |
+
description=(
|
| 41 |
+
"If the top retrieval score is below this threshold and "
|
| 42 |
+
"use_web_fallback is true, a web search will be attempted."
|
| 43 |
+
),
|
| 44 |
+
)
|
| 45 |
+
max_web_results: int = Field(
|
| 46 |
+
default=5,
|
| 47 |
+
ge=1,
|
| 48 |
+
le=20,
|
| 49 |
+
description="Maximum number of web search results to fetch when enabled.",
|
| 50 |
+
)
|
| 51 |
+
chat_history: Optional[List[ChatMessage]] = Field(
|
| 52 |
+
default=None,
|
| 53 |
+
description=(
|
| 54 |
+
"Optional prior conversation history. "
|
| 55 |
+
"Messages with role='user' or 'assistant' are supported."
|
| 56 |
+
),
|
| 57 |
+
)
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
class SourceHit(BaseModel):
|
| 61 |
+
source: str = Field(
|
| 62 |
+
...,
|
| 63 |
+
description="Origin of the snippet (e.g. wiki, openalex, arxiv, web).",
|
| 64 |
+
)
|
| 65 |
+
title: str = Field(
|
| 66 |
+
...,
|
| 67 |
+
description="Title of the underlying document or web page.",
|
| 68 |
+
)
|
| 69 |
+
url: str = Field(
|
| 70 |
+
"",
|
| 71 |
+
description="URL associated with the source, when available.",
|
| 72 |
+
)
|
| 73 |
+
score: float = Field(
|
| 74 |
+
0.0,
|
| 75 |
+
description=(
|
| 76 |
+
"Relevance score from the vector store or a synthetic score for web search."
|
| 77 |
+
),
|
| 78 |
+
)
|
| 79 |
+
chunk_text: str = Field(
|
| 80 |
+
...,
|
| 81 |
+
description="Text content of the retrieved chunk or web snippet.",
|
| 82 |
+
)
|
| 83 |
+
|
| 84 |
+
|
| 85 |
+
class ChatTimings(BaseModel):
|
| 86 |
+
retrieve_ms: float = Field(
|
| 87 |
+
0.0,
|
| 88 |
+
description="Time spent retrieving from Pinecone, in milliseconds.",
|
| 89 |
+
)
|
| 90 |
+
web_ms: float = Field(
|
| 91 |
+
0.0,
|
| 92 |
+
description="Time spent calling web search tools, in milliseconds.",
|
| 93 |
+
)
|
| 94 |
+
generate_ms: float = Field(
|
| 95 |
+
0.0,
|
| 96 |
+
description="Time spent generating the answer with the LLM, in milliseconds.",
|
| 97 |
+
)
|
| 98 |
+
total_ms: float = Field(
|
| 99 |
+
0.0,
|
| 100 |
+
description="End-to-end time from request receipt to response, in milliseconds.",
|
| 101 |
+
)
|
| 102 |
+
|
| 103 |
+
|
| 104 |
+
class ChatTraceMetadata(BaseModel):
|
| 105 |
+
langsmith_project: Optional[str] = Field(
|
| 106 |
+
default=None,
|
| 107 |
+
description="LangSmith project name associated with this trace, if any.",
|
| 108 |
+
)
|
| 109 |
+
trace_enabled: bool = Field(
|
| 110 |
+
default=False,
|
| 111 |
+
description="Whether LangSmith / LangChain tracing was enabled for this call.",
|
| 112 |
+
)
|
| 113 |
+
|
| 114 |
+
|
| 115 |
+
class ChatResponse(BaseModel):
|
| 116 |
+
answer: str = Field(..., description="Generated answer text.")
|
| 117 |
+
sources: List[SourceHit] = Field(
|
| 118 |
+
default_factory=list,
|
| 119 |
+
description="List of document or web snippets used as context.",
|
| 120 |
+
)
|
| 121 |
+
timings: ChatTimings = Field(
|
| 122 |
+
default_factory=ChatTimings,
|
| 123 |
+
description="Timing information for key phases of the pipeline.",
|
| 124 |
+
)
|
| 125 |
+
trace: ChatTraceMetadata = Field(
|
| 126 |
+
...,
|
| 127 |
+
description="Tracing configuration metadata for observability.",
|
| 128 |
+
)
|
backend/app/schemas/documents.py
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import Any, Dict, Optional
|
| 2 |
+
|
| 3 |
+
from pydantic import BaseModel, Field
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
class UploadTextRequest(BaseModel):
|
| 7 |
+
title: str = Field(..., description="Document title")
|
| 8 |
+
source: str = Field(
|
| 9 |
+
default="manual",
|
| 10 |
+
description="Source label for the document (e.g. manual, docling)",
|
| 11 |
+
)
|
| 12 |
+
text: str = Field(..., description="Full text content of the document")
|
| 13 |
+
namespace: Optional[str] = Field(
|
| 14 |
+
default=None, description="Target Pinecone namespace (defaults to env)"
|
| 15 |
+
)
|
| 16 |
+
metadata: Optional[Dict[str, Any]] = Field(
|
| 17 |
+
default=None,
|
| 18 |
+
description="Additional metadata fields to store alongside the document",
|
| 19 |
+
)
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
class UploadTextResponse(BaseModel):
|
| 23 |
+
namespace: str
|
| 24 |
+
source: str
|
| 25 |
+
ingested_documents: int
|
| 26 |
+
ingested_chunks: int
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
class NamespaceStat(BaseModel):
|
| 30 |
+
vector_count: int
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
class DocumentsStatsResponse(BaseModel):
|
| 34 |
+
namespaces: Dict[str, NamespaceStat]
|
backend/app/schemas/ingest.py
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import Any, Dict, List, Optional
|
| 2 |
+
|
| 3 |
+
from pydantic import BaseModel, Field
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
class ArxivIngestRequest(BaseModel):
|
| 7 |
+
query: str = Field(..., description="Search query for arXiv")
|
| 8 |
+
max_docs: int = Field(
|
| 9 |
+
default=10,
|
| 10 |
+
ge=1,
|
| 11 |
+
le=20,
|
| 12 |
+
description="Maximum number of documents to fetch (capped at 20)",
|
| 13 |
+
)
|
| 14 |
+
namespace: Optional[str] = Field(
|
| 15 |
+
default=None, description="Target Pinecone namespace (defaults to env)"
|
| 16 |
+
)
|
| 17 |
+
category: Optional[str] = Field(
|
| 18 |
+
default=None,
|
| 19 |
+
description="Optional category label for ingested papers",
|
| 20 |
+
)
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
class OpenAlexIngestRequest(BaseModel):
|
| 24 |
+
query: str = Field(..., description="Search query for OpenAlex works")
|
| 25 |
+
max_docs: int = Field(
|
| 26 |
+
default=10,
|
| 27 |
+
ge=1,
|
| 28 |
+
le=20,
|
| 29 |
+
description="Maximum number of documents to fetch (capped at 20)",
|
| 30 |
+
)
|
| 31 |
+
namespace: Optional[str] = Field(
|
| 32 |
+
default=None, description="Target Pinecone namespace (defaults to env)"
|
| 33 |
+
)
|
| 34 |
+
mailto: str = Field(
|
| 35 |
+
...,
|
| 36 |
+
description="Contact email passed to OpenAlex via the mailto query parameter",
|
| 37 |
+
)
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
class WikiIngestRequest(BaseModel):
|
| 41 |
+
titles: List[str] = Field(
|
| 42 |
+
...,
|
| 43 |
+
description="List of Wikipedia page titles (first 20 will be used)",
|
| 44 |
+
)
|
| 45 |
+
namespace: Optional[str] = Field(
|
| 46 |
+
default=None, description="Target Pinecone namespace (defaults to env)"
|
| 47 |
+
)
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
class IngestResponse(BaseModel):
|
| 51 |
+
namespace: str
|
| 52 |
+
source: str
|
| 53 |
+
ingested_documents: int
|
| 54 |
+
ingested_chunks: int
|
| 55 |
+
skipped_documents: int
|
| 56 |
+
details: Optional[Dict[str, Any]] = None
|
backend/app/schemas/search.py
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import Any, Dict, List, Optional
|
| 2 |
+
|
| 3 |
+
from pydantic import BaseModel, Field
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
class SearchRequest(BaseModel):
|
| 7 |
+
query: str = Field(..., description="User query text")
|
| 8 |
+
top_k: int = Field(
|
| 9 |
+
default=5,
|
| 10 |
+
ge=1,
|
| 11 |
+
le=100,
|
| 12 |
+
description="Number of results to return",
|
| 13 |
+
)
|
| 14 |
+
namespace: Optional[str] = Field(
|
| 15 |
+
default=None, description="Target Pinecone namespace (defaults to env)"
|
| 16 |
+
)
|
| 17 |
+
filters: Optional[Dict[str, Any]] = Field(
|
| 18 |
+
default=None,
|
| 19 |
+
description="Optional metadata filters passed directly to Pinecone search",
|
| 20 |
+
)
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
class SearchHit(BaseModel):
|
| 24 |
+
id: str
|
| 25 |
+
score: float
|
| 26 |
+
fields: Dict[str, Any]
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
class SearchResponse(BaseModel):
|
| 30 |
+
namespace: str
|
| 31 |
+
query: str
|
| 32 |
+
top_k: int
|
| 33 |
+
hits: List[SearchHit]
|
backend/app/services/__pycache__/chunking.cpython-313.pyc
ADDED
|
Binary file (3.1 kB). View file
|
|
|
backend/app/services/__pycache__/dedupe.cpython-313.pyc
ADDED
|
Binary file (1.16 kB). View file
|
|
|
backend/app/services/__pycache__/normalize.cpython-313.pyc
ADDED
|
Binary file (1.48 kB). View file
|
|
|
backend/app/services/__pycache__/pinecone_store.cpython-313.pyc
ADDED
|
Binary file (7.45 kB). View file
|
|
|
backend/app/services/chat/__pycache__/graph.cpython-313.pyc
ADDED
|
Binary file (12.7 kB). View file
|
|
|