Spaces:
Sleeping
Sleeping
Claude Code - Backend Implementation Specialist Claude Sonnet 4.5 commited on
Commit ·
36bfe21
1
Parent(s): 9d096d7
Add Docker deployment configuration for Hugging Face Spaces
Browse files- Add Dockerfile with Python 3.11 slim base
- Configure app to run on port 7860 (HF standard)
- Add .dockerignore for optimized builds
- Update README with HF Space metadata
- Include all FastAPI app files and dependencies
- Add .env.example for configuration reference
Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
- .dockerignore +51 -0
- .env.example +16 -0
- .gitignore +28 -0
- Dockerfile +20 -0
- README.md +179 -5
- app/__init__.py +0 -0
- app/api/__init__.py +0 -0
- app/api/routes/__init__.py +0 -0
- app/api/routes/chat.py +72 -0
- app/api/routes/health.py +60 -0
- app/config.py +35 -0
- app/db/__init__.py +0 -0
- app/db/postgres.py +124 -0
- app/db/qdrant.py +96 -0
- app/db/schema.sql +22 -0
- app/main.py +37 -0
- app/models/__init__.py +0 -0
- app/models/chat.py +56 -0
- app/models/document.py +27 -0
- app/services/__init__.py +0 -0
- app/services/embeddings.py +39 -0
- app/services/generation.py +79 -0
- app/services/rag_pipeline.py +88 -0
- app/services/retrieval.py +31 -0
- requirements.txt +11 -0
.dockerignore
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Git
|
| 2 |
+
.git
|
| 3 |
+
.gitignore
|
| 4 |
+
.gitattributes
|
| 5 |
+
|
| 6 |
+
# Python
|
| 7 |
+
__pycache__
|
| 8 |
+
*.py[cod]
|
| 9 |
+
*$py.class
|
| 10 |
+
*.so
|
| 11 |
+
.Python
|
| 12 |
+
env/
|
| 13 |
+
venv/
|
| 14 |
+
ENV/
|
| 15 |
+
*.egg-info/
|
| 16 |
+
|
| 17 |
+
# Environment
|
| 18 |
+
.env
|
| 19 |
+
.env.local
|
| 20 |
+
|
| 21 |
+
# IDE
|
| 22 |
+
.vscode/
|
| 23 |
+
.idea/
|
| 24 |
+
*.swp
|
| 25 |
+
*.swo
|
| 26 |
+
|
| 27 |
+
# Logs
|
| 28 |
+
*.log
|
| 29 |
+
ingestion.log
|
| 30 |
+
|
| 31 |
+
# Testing
|
| 32 |
+
.pytest_cache/
|
| 33 |
+
.coverage
|
| 34 |
+
htmlcov/
|
| 35 |
+
|
| 36 |
+
# Documentation
|
| 37 |
+
README.md
|
| 38 |
+
*.md
|
| 39 |
+
|
| 40 |
+
# Deployment configs
|
| 41 |
+
Procfile
|
| 42 |
+
railway.json
|
| 43 |
+
vercel.json
|
| 44 |
+
|
| 45 |
+
# Scripts
|
| 46 |
+
test-backend.sh
|
| 47 |
+
scripts/
|
| 48 |
+
|
| 49 |
+
# OS
|
| 50 |
+
.DS_Store
|
| 51 |
+
Thumbs.db
|
.env.example
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Cohere API
|
| 2 |
+
COHERE_API_KEY=your_cohere_api_key_here
|
| 3 |
+
|
| 4 |
+
# Qdrant Vector Database
|
| 5 |
+
QDRANT_URL=https://your-cluster.qdrant.io
|
| 6 |
+
QDRANT_API_KEY=your_qdrant_api_key_here
|
| 7 |
+
QDRANT_COLLECTION_NAME=physical_ai_textbook
|
| 8 |
+
|
| 9 |
+
# Neon Postgres
|
| 10 |
+
NEON_DATABASE_URL=postgresql://user:password@host/database
|
| 11 |
+
|
| 12 |
+
# Frontend URL (for CORS)
|
| 13 |
+
FRONTEND_URL=http://localhost:3000
|
| 14 |
+
|
| 15 |
+
# Environment
|
| 16 |
+
ENVIRONMENT=development
|
.gitignore
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Backend files
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.py[cod]
|
| 4 |
+
*$py.class
|
| 5 |
+
*.so
|
| 6 |
+
.Python
|
| 7 |
+
env/
|
| 8 |
+
venv/
|
| 9 |
+
ENV/
|
| 10 |
+
.venv
|
| 11 |
+
|
| 12 |
+
# Environment
|
| 13 |
+
.env
|
| 14 |
+
.env.local
|
| 15 |
+
|
| 16 |
+
# IDE
|
| 17 |
+
.vscode/
|
| 18 |
+
.idea/
|
| 19 |
+
*.swp
|
| 20 |
+
*.swo
|
| 21 |
+
|
| 22 |
+
# Testing
|
| 23 |
+
.pytest_cache/
|
| 24 |
+
.coverage
|
| 25 |
+
htmlcov/
|
| 26 |
+
|
| 27 |
+
# Logs
|
| 28 |
+
*.log
|
Dockerfile
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Use Python 3.11 slim image
|
| 2 |
+
FROM python:3.11-slim
|
| 3 |
+
|
| 4 |
+
# Set working directory
|
| 5 |
+
WORKDIR /app
|
| 6 |
+
|
| 7 |
+
# Copy requirements first for better caching
|
| 8 |
+
COPY requirements.txt .
|
| 9 |
+
|
| 10 |
+
# Install Python dependencies
|
| 11 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 12 |
+
|
| 13 |
+
# Copy application code
|
| 14 |
+
COPY app ./app
|
| 15 |
+
|
| 16 |
+
# Expose port
|
| 17 |
+
EXPOSE 7860
|
| 18 |
+
|
| 19 |
+
# Run the application
|
| 20 |
+
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]
|
README.md
CHANGED
|
@@ -1,10 +1,184 @@
|
|
| 1 |
---
|
| 2 |
-
title:
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom:
|
| 5 |
-
colorTo:
|
| 6 |
sdk: docker
|
|
|
|
| 7 |
pinned: false
|
| 8 |
---
|
| 9 |
|
| 10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
+
title: RAG Chatbot
|
| 3 |
+
emoji: 🤖
|
| 4 |
+
colorFrom: blue
|
| 5 |
+
colorTo: purple
|
| 6 |
sdk: docker
|
| 7 |
+
app_port: 7860
|
| 8 |
pinned: false
|
| 9 |
---
|
| 10 |
|
| 11 |
+
# Physical AI RAG Backend
|
| 12 |
+
|
| 13 |
+
FastAPI backend for the Physical AI textbook RAG chatbot.
|
| 14 |
+
|
| 15 |
+
## Features
|
| 16 |
+
|
| 17 |
+
- **RAG Pipeline**: Retrieval-Augmented Generation using Cohere API
|
| 18 |
+
- **Vector Search**: Qdrant for semantic search
|
| 19 |
+
- **Conversation Storage**: Neon Postgres for chat history
|
| 20 |
+
- **Text Selection Context**: Support for querying with selected text
|
| 21 |
+
|
| 22 |
+
## Tech Stack
|
| 23 |
+
|
| 24 |
+
- FastAPI (Python 3.11+)
|
| 25 |
+
- Cohere API (embeddings + generation)
|
| 26 |
+
- Qdrant Cloud (vector database)
|
| 27 |
+
- Neon Serverless Postgres (conversation storage)
|
| 28 |
+
|
| 29 |
+
## Setup
|
| 30 |
+
|
| 31 |
+
### 1. Install Dependencies
|
| 32 |
+
|
| 33 |
+
```bash
|
| 34 |
+
cd backend
|
| 35 |
+
pip install -r requirements.txt
|
| 36 |
+
```
|
| 37 |
+
|
| 38 |
+
### 2. Configure Environment
|
| 39 |
+
|
| 40 |
+
Copy `.env.example` to `.env` and fill in your credentials:
|
| 41 |
+
|
| 42 |
+
```bash
|
| 43 |
+
cp .env.example .env
|
| 44 |
+
```
|
| 45 |
+
|
| 46 |
+
Required environment variables:
|
| 47 |
+
- `COHERE_API_KEY`: Your Cohere API key
|
| 48 |
+
- `QDRANT_URL`: Qdrant cluster URL
|
| 49 |
+
- `QDRANT_API_KEY`: Qdrant API key
|
| 50 |
+
- `NEON_DATABASE_URL`: Neon Postgres connection string
|
| 51 |
+
- `FRONTEND_URL`: Frontend URL for CORS
|
| 52 |
+
|
| 53 |
+
### 3. Setup Database
|
| 54 |
+
|
| 55 |
+
Run the schema on your Neon database:
|
| 56 |
+
|
| 57 |
+
```bash
|
| 58 |
+
psql $NEON_DATABASE_URL < app/db/schema.sql
|
| 59 |
+
```
|
| 60 |
+
|
| 61 |
+
### 4. Ingest Content
|
| 62 |
+
|
| 63 |
+
Parse MDX files and upload to Qdrant:
|
| 64 |
+
|
| 65 |
+
```bash
|
| 66 |
+
python scripts/ingest_content.py
|
| 67 |
+
```
|
| 68 |
+
|
| 69 |
+
This will:
|
| 70 |
+
- Parse all 11 chapters from `docs/chapters/`
|
| 71 |
+
- Create ~80-100 semantic chunks
|
| 72 |
+
- Generate embeddings via Cohere
|
| 73 |
+
- Upload to Qdrant
|
| 74 |
+
|
| 75 |
+
### 5. Run Server
|
| 76 |
+
|
| 77 |
+
```bash
|
| 78 |
+
uvicorn app.main:app --reload --port 8000
|
| 79 |
+
```
|
| 80 |
+
|
| 81 |
+
API will be available at `http://localhost:8000`
|
| 82 |
+
|
| 83 |
+
## API Endpoints
|
| 84 |
+
|
| 85 |
+
### Chat
|
| 86 |
+
|
| 87 |
+
**POST /api/chat/query**
|
| 88 |
+
```json
|
| 89 |
+
{
|
| 90 |
+
"query": "What is Physical AI?",
|
| 91 |
+
"conversation_id": "uuid-optional",
|
| 92 |
+
"filters": { "chapter": 1 }
|
| 93 |
+
}
|
| 94 |
+
```
|
| 95 |
+
|
| 96 |
+
**POST /api/chat/query-with-context**
|
| 97 |
+
```json
|
| 98 |
+
{
|
| 99 |
+
"query": "Explain this",
|
| 100 |
+
"selected_text": "Physical AI systems...",
|
| 101 |
+
"selection_metadata": {
|
| 102 |
+
"chapter_title": "Introduction",
|
| 103 |
+
"url": "/docs/chapters/physical-ai-intro"
|
| 104 |
+
}
|
| 105 |
+
}
|
| 106 |
+
```
|
| 107 |
+
|
| 108 |
+
**POST /api/chat/conversations**
|
| 109 |
+
Create a new conversation.
|
| 110 |
+
|
| 111 |
+
**GET /api/chat/conversations/{id}**
|
| 112 |
+
Get conversation with all messages.
|
| 113 |
+
|
| 114 |
+
### Health
|
| 115 |
+
|
| 116 |
+
**GET /api/health**
|
| 117 |
+
Basic health check.
|
| 118 |
+
|
| 119 |
+
**GET /api/health/detailed**
|
| 120 |
+
Detailed health check with database status.
|
| 121 |
+
|
| 122 |
+
## Deployment
|
| 123 |
+
|
| 124 |
+
### Railway (Recommended)
|
| 125 |
+
|
| 126 |
+
1. Create Railway project
|
| 127 |
+
2. Connect GitHub repo
|
| 128 |
+
3. Set environment variables
|
| 129 |
+
4. Deploy command: `uvicorn app.main:app --host 0.0.0.0 --port $PORT`
|
| 130 |
+
|
| 131 |
+
### Render
|
| 132 |
+
|
| 133 |
+
1. Create new Web Service
|
| 134 |
+
2. Connect GitHub repo
|
| 135 |
+
3. Build command: `pip install -r requirements.txt`
|
| 136 |
+
4. Start command: `uvicorn app.main:app --host 0.0.0.0 --port $PORT`
|
| 137 |
+
|
| 138 |
+
## Project Structure
|
| 139 |
+
|
| 140 |
+
```
|
| 141 |
+
backend/
|
| 142 |
+
├── app/
|
| 143 |
+
│ ├── main.py # FastAPI app
|
| 144 |
+
│ ├── config.py # Settings
|
| 145 |
+
│ ├── models/
|
| 146 |
+
│ │ ├── chat.py # Chat models
|
| 147 |
+
│ │ └── document.py # Document models
|
| 148 |
+
│ ├── services/
|
| 149 |
+
│ │ ├── embeddings.py # Cohere embeddings
|
| 150 |
+
│ │ ├── generation.py # Cohere generation
|
| 151 |
+
│ │ ├── retrieval.py # Qdrant search
|
| 152 |
+
│ │ └── rag_pipeline.py # Main RAG logic
|
| 153 |
+
│ ├── db/
|
| 154 |
+
│ │ ├── postgres.py # Neon client
|
| 155 |
+
│ │ ├── qdrant.py # Qdrant client
|
| 156 |
+
│ │ └── schema.sql # Database schema
|
| 157 |
+
│ └── api/
|
| 158 |
+
│ └── routes/
|
| 159 |
+
│ ├── chat.py # Chat endpoints
|
| 160 |
+
│ └── health.py # Health endpoints
|
| 161 |
+
├── scripts/
|
| 162 |
+
│ └── ingest_content.py # Content ingestion
|
| 163 |
+
└── requirements.txt
|
| 164 |
+
```
|
| 165 |
+
|
| 166 |
+
## Development
|
| 167 |
+
|
| 168 |
+
Run with auto-reload:
|
| 169 |
+
```bash
|
| 170 |
+
uvicorn app.main:app --reload
|
| 171 |
+
```
|
| 172 |
+
|
| 173 |
+
View API docs:
|
| 174 |
+
- Swagger UI: http://localhost:8000/docs
|
| 175 |
+
- ReDoc: http://localhost:8000/redoc
|
| 176 |
+
|
| 177 |
+
## Cost Estimate
|
| 178 |
+
|
| 179 |
+
- Cohere: ~$5-10/month (moderate usage)
|
| 180 |
+
- Qdrant Cloud: Free (1GB tier)
|
| 181 |
+
- Neon Postgres: Free tier
|
| 182 |
+
- Railway: Free (500 hours/month)
|
| 183 |
+
|
| 184 |
+
**Total: ~$5-10/month**
|
app/__init__.py
ADDED
|
File without changes
|
app/api/__init__.py
ADDED
|
File without changes
|
app/api/routes/__init__.py
ADDED
|
File without changes
|
app/api/routes/chat.py
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import APIRouter, HTTPException
|
| 2 |
+
from uuid import UUID
|
| 3 |
+
import logging
|
| 4 |
+
import traceback
|
| 5 |
+
from app.models.chat import (
|
| 6 |
+
ChatQuery,
|
| 7 |
+
ChatQueryWithContext,
|
| 8 |
+
ChatResponse,
|
| 9 |
+
Conversation
|
| 10 |
+
)
|
| 11 |
+
from app.services.rag_pipeline import RAGPipeline
|
| 12 |
+
from app.db.postgres import PostgresDB
|
| 13 |
+
|
| 14 |
+
router = APIRouter()
|
| 15 |
+
rag_pipeline = RAGPipeline()
|
| 16 |
+
db = PostgresDB()
|
| 17 |
+
logger = logging.getLogger(__name__)
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
@router.post("/query", response_model=ChatResponse)
|
| 21 |
+
async def query_chat(request: ChatQuery):
|
| 22 |
+
"""Process a chat query."""
|
| 23 |
+
try:
|
| 24 |
+
response = rag_pipeline.process_query(
|
| 25 |
+
query=request.query,
|
| 26 |
+
conversation_id=request.conversation_id,
|
| 27 |
+
filters=request.filters
|
| 28 |
+
)
|
| 29 |
+
return response
|
| 30 |
+
except Exception as e:
|
| 31 |
+
logger.error(f"Error processing query: {str(e)}")
|
| 32 |
+
logger.error(traceback.format_exc())
|
| 33 |
+
raise HTTPException(status_code=500, detail=f"Error processing query: {str(e)}")
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
@router.post("/query-with-context", response_model=ChatResponse)
|
| 37 |
+
async def query_with_context(request: ChatQueryWithContext):
|
| 38 |
+
"""Process a chat query with selected text context."""
|
| 39 |
+
try:
|
| 40 |
+
response = rag_pipeline.process_query(
|
| 41 |
+
query=request.query,
|
| 42 |
+
conversation_id=request.conversation_id,
|
| 43 |
+
selected_text=request.selected_text,
|
| 44 |
+
filters=request.filters
|
| 45 |
+
)
|
| 46 |
+
return response
|
| 47 |
+
except Exception as e:
|
| 48 |
+
raise HTTPException(status_code=500, detail=f"Error processing query: {str(e)}")
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
@router.post("/conversations", response_model=dict)
|
| 52 |
+
async def create_conversation():
|
| 53 |
+
"""Create a new conversation."""
|
| 54 |
+
try:
|
| 55 |
+
conversation_id = db.create_conversation()
|
| 56 |
+
return {"conversation_id": conversation_id}
|
| 57 |
+
except Exception as e:
|
| 58 |
+
raise HTTPException(status_code=500, detail=f"Error creating conversation: {str(e)}")
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
@router.get("/conversations/{conversation_id}", response_model=Conversation)
|
| 62 |
+
async def get_conversation(conversation_id: UUID):
|
| 63 |
+
"""Get a conversation with all its messages."""
|
| 64 |
+
try:
|
| 65 |
+
conversation = db.get_conversation(conversation_id)
|
| 66 |
+
if not conversation:
|
| 67 |
+
raise HTTPException(status_code=404, detail="Conversation not found")
|
| 68 |
+
return conversation
|
| 69 |
+
except HTTPException:
|
| 70 |
+
raise
|
| 71 |
+
except Exception as e:
|
| 72 |
+
raise HTTPException(status_code=500, detail=f"Error retrieving conversation: {str(e)}")
|
app/api/routes/health.py
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import APIRouter, HTTPException
|
| 2 |
+
from app.db.qdrant import QdrantDB
|
| 3 |
+
from app.db.postgres import PostgresDB
|
| 4 |
+
|
| 5 |
+
router = APIRouter()
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
@router.get("/health")
|
| 9 |
+
async def health_check():
|
| 10 |
+
"""Health check endpoint."""
|
| 11 |
+
return {
|
| 12 |
+
"status": "healthy",
|
| 13 |
+
"service": "Physical AI RAG Backend"
|
| 14 |
+
}
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
@router.get("/health/detailed")
|
| 18 |
+
async def detailed_health_check():
|
| 19 |
+
"""Detailed health check including database connections."""
|
| 20 |
+
health_status = {
|
| 21 |
+
"status": "healthy",
|
| 22 |
+
"service": "Physical AI RAG Backend",
|
| 23 |
+
"components": {}
|
| 24 |
+
}
|
| 25 |
+
|
| 26 |
+
# Check Qdrant
|
| 27 |
+
try:
|
| 28 |
+
qdrant = QdrantDB()
|
| 29 |
+
collection_info = qdrant.get_collection_info()
|
| 30 |
+
health_status["components"]["qdrant"] = {
|
| 31 |
+
"status": "healthy",
|
| 32 |
+
"collection": collection_info.dict()
|
| 33 |
+
}
|
| 34 |
+
except Exception as e:
|
| 35 |
+
health_status["status"] = "degraded"
|
| 36 |
+
health_status["components"]["qdrant"] = {
|
| 37 |
+
"status": "unhealthy",
|
| 38 |
+
"error": str(e)
|
| 39 |
+
}
|
| 40 |
+
|
| 41 |
+
# Check Postgres
|
| 42 |
+
try:
|
| 43 |
+
db = PostgresDB()
|
| 44 |
+
with db.get_connection() as conn:
|
| 45 |
+
with conn.cursor() as cur:
|
| 46 |
+
cur.execute("SELECT 1")
|
| 47 |
+
health_status["components"]["postgres"] = {
|
| 48 |
+
"status": "healthy"
|
| 49 |
+
}
|
| 50 |
+
except Exception as e:
|
| 51 |
+
health_status["status"] = "degraded"
|
| 52 |
+
health_status["components"]["postgres"] = {
|
| 53 |
+
"status": "unhealthy",
|
| 54 |
+
"error": str(e)
|
| 55 |
+
}
|
| 56 |
+
|
| 57 |
+
if health_status["status"] != "healthy":
|
| 58 |
+
raise HTTPException(status_code=503, detail=health_status)
|
| 59 |
+
|
| 60 |
+
return health_status
|
app/config.py
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pydantic_settings import BaseSettings
|
| 2 |
+
from functools import lru_cache
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
class Settings(BaseSettings):
|
| 6 |
+
"""Application settings loaded from environment variables."""
|
| 7 |
+
|
| 8 |
+
# Cohere API
|
| 9 |
+
cohere_api_key: str
|
| 10 |
+
cohere_embed_model: str = "embed-english-v3.0"
|
| 11 |
+
cohere_generation_model: str = "command-r-08-2024"
|
| 12 |
+
|
| 13 |
+
# Qdrant
|
| 14 |
+
qdrant_url: str
|
| 15 |
+
qdrant_api_key: str
|
| 16 |
+
qdrant_collection_name: str = "physical_ai_textbook"
|
| 17 |
+
|
| 18 |
+
# Neon Postgres
|
| 19 |
+
neon_database_url: str
|
| 20 |
+
|
| 21 |
+
# Frontend
|
| 22 |
+
frontend_url: str = "http://localhost:3000"
|
| 23 |
+
|
| 24 |
+
# Application
|
| 25 |
+
environment: str = "development"
|
| 26 |
+
|
| 27 |
+
class Config:
|
| 28 |
+
env_file = ".env"
|
| 29 |
+
case_sensitive = False
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
@lru_cache()
|
| 33 |
+
def get_settings() -> Settings:
|
| 34 |
+
"""Get cached settings instance."""
|
| 35 |
+
return Settings()
|
app/db/__init__.py
ADDED
|
File without changes
|
app/db/postgres.py
ADDED
|
@@ -0,0 +1,124 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import psycopg2
|
| 2 |
+
from psycopg2.extras import RealDictCursor
|
| 3 |
+
from contextlib import contextmanager
|
| 4 |
+
from typing import Optional, List, Dict, Any
|
| 5 |
+
from uuid import UUID, uuid4
|
| 6 |
+
from datetime import datetime
|
| 7 |
+
from app.config import get_settings
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
class PostgresDB:
|
| 11 |
+
"""PostgreSQL database client for conversation storage."""
|
| 12 |
+
|
| 13 |
+
def __init__(self):
|
| 14 |
+
self.settings = get_settings()
|
| 15 |
+
self.connection_string = self.settings.neon_database_url
|
| 16 |
+
|
| 17 |
+
@contextmanager
|
| 18 |
+
def get_connection(self):
|
| 19 |
+
"""Context manager for database connections."""
|
| 20 |
+
conn = psycopg2.connect(self.connection_string)
|
| 21 |
+
try:
|
| 22 |
+
yield conn
|
| 23 |
+
conn.commit()
|
| 24 |
+
except Exception:
|
| 25 |
+
conn.rollback()
|
| 26 |
+
raise
|
| 27 |
+
finally:
|
| 28 |
+
conn.close()
|
| 29 |
+
|
| 30 |
+
def create_conversation(self, metadata: Optional[Dict[str, Any]] = None) -> UUID:
|
| 31 |
+
"""Create a new conversation."""
|
| 32 |
+
conversation_id = uuid4()
|
| 33 |
+
with self.get_connection() as conn:
|
| 34 |
+
with conn.cursor() as cur:
|
| 35 |
+
cur.execute(
|
| 36 |
+
"""
|
| 37 |
+
INSERT INTO conversations (id, metadata)
|
| 38 |
+
VALUES (%s, %s)
|
| 39 |
+
RETURNING id
|
| 40 |
+
""",
|
| 41 |
+
(str(conversation_id), psycopg2.extras.Json(metadata or {}))
|
| 42 |
+
)
|
| 43 |
+
result = cur.fetchone()[0]
|
| 44 |
+
return UUID(result) if isinstance(result, str) else result
|
| 45 |
+
|
| 46 |
+
def add_message(
|
| 47 |
+
self,
|
| 48 |
+
conversation_id: UUID,
|
| 49 |
+
role: str,
|
| 50 |
+
content: str,
|
| 51 |
+
context_used: Optional[List[str]] = None,
|
| 52 |
+
metadata: Optional[Dict[str, Any]] = None
|
| 53 |
+
) -> UUID:
|
| 54 |
+
"""Add a message to a conversation."""
|
| 55 |
+
message_id = uuid4()
|
| 56 |
+
with self.get_connection() as conn:
|
| 57 |
+
with conn.cursor() as cur:
|
| 58 |
+
cur.execute(
|
| 59 |
+
"""
|
| 60 |
+
INSERT INTO messages (id, conversation_id, role, content, context_used, metadata)
|
| 61 |
+
VALUES (%s, %s, %s, %s, %s, %s)
|
| 62 |
+
RETURNING id
|
| 63 |
+
""",
|
| 64 |
+
(
|
| 65 |
+
str(message_id),
|
| 66 |
+
str(conversation_id),
|
| 67 |
+
role,
|
| 68 |
+
content,
|
| 69 |
+
context_used or [],
|
| 70 |
+
psycopg2.extras.Json(metadata or {})
|
| 71 |
+
)
|
| 72 |
+
)
|
| 73 |
+
result = cur.fetchone()[0]
|
| 74 |
+
return UUID(result) if isinstance(result, str) else result
|
| 75 |
+
|
| 76 |
+
def get_conversation(self, conversation_id: UUID) -> Optional[Dict[str, Any]]:
|
| 77 |
+
"""Get a conversation with all its messages."""
|
| 78 |
+
with self.get_connection() as conn:
|
| 79 |
+
with conn.cursor(cursor_factory=RealDictCursor) as cur:
|
| 80 |
+
# Get conversation
|
| 81 |
+
cur.execute(
|
| 82 |
+
"SELECT * FROM conversations WHERE id = %s",
|
| 83 |
+
(str(conversation_id),)
|
| 84 |
+
)
|
| 85 |
+
conversation = cur.fetchone()
|
| 86 |
+
|
| 87 |
+
if not conversation:
|
| 88 |
+
return None
|
| 89 |
+
|
| 90 |
+
# Get messages
|
| 91 |
+
cur.execute(
|
| 92 |
+
"""
|
| 93 |
+
SELECT * FROM messages
|
| 94 |
+
WHERE conversation_id = %s
|
| 95 |
+
ORDER BY created_at ASC
|
| 96 |
+
""",
|
| 97 |
+
(str(conversation_id),)
|
| 98 |
+
)
|
| 99 |
+
messages = cur.fetchall()
|
| 100 |
+
|
| 101 |
+
return {
|
| 102 |
+
**dict(conversation),
|
| 103 |
+
'messages': [dict(msg) for msg in messages]
|
| 104 |
+
}
|
| 105 |
+
|
| 106 |
+
def get_conversation_history(
|
| 107 |
+
self,
|
| 108 |
+
conversation_id: UUID,
|
| 109 |
+
limit: int = 10
|
| 110 |
+
) -> List[Dict[str, Any]]:
|
| 111 |
+
"""Get recent messages from a conversation."""
|
| 112 |
+
with self.get_connection() as conn:
|
| 113 |
+
with conn.cursor(cursor_factory=RealDictCursor) as cur:
|
| 114 |
+
cur.execute(
|
| 115 |
+
"""
|
| 116 |
+
SELECT * FROM messages
|
| 117 |
+
WHERE conversation_id = %s
|
| 118 |
+
ORDER BY created_at DESC
|
| 119 |
+
LIMIT %s
|
| 120 |
+
""",
|
| 121 |
+
(str(conversation_id), limit)
|
| 122 |
+
)
|
| 123 |
+
messages = cur.fetchall()
|
| 124 |
+
return [dict(msg) for msg in reversed(messages)]
|
app/db/qdrant.py
ADDED
|
@@ -0,0 +1,96 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from qdrant_client import QdrantClient
|
| 2 |
+
from qdrant_client.models import Distance, VectorParams, PointStruct, Filter, FieldCondition, MatchValue
|
| 3 |
+
from typing import List, Dict, Any, Optional
|
| 4 |
+
from app.config import get_settings
|
| 5 |
+
from app.models.document import SearchResult
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
class QdrantDB:
|
| 9 |
+
"""Qdrant vector database client."""
|
| 10 |
+
|
| 11 |
+
def __init__(self):
|
| 12 |
+
self.settings = get_settings()
|
| 13 |
+
self.client = QdrantClient(
|
| 14 |
+
url=self.settings.qdrant_url,
|
| 15 |
+
api_key=self.settings.qdrant_api_key
|
| 16 |
+
)
|
| 17 |
+
self.collection_name = self.settings.qdrant_collection_name
|
| 18 |
+
|
| 19 |
+
def create_collection(self, vector_size: int = 1024):
|
| 20 |
+
"""Create the collection if it doesn't exist."""
|
| 21 |
+
try:
|
| 22 |
+
self.client.get_collection(self.collection_name)
|
| 23 |
+
print(f"Collection '{self.collection_name}' already exists")
|
| 24 |
+
except Exception:
|
| 25 |
+
self.client.create_collection(
|
| 26 |
+
collection_name=self.collection_name,
|
| 27 |
+
vectors_config=VectorParams(
|
| 28 |
+
size=vector_size,
|
| 29 |
+
distance=Distance.COSINE
|
| 30 |
+
)
|
| 31 |
+
)
|
| 32 |
+
print(f"Created collection '{self.collection_name}'")
|
| 33 |
+
|
| 34 |
+
def upsert_chunks(self, chunks: List[Dict[str, Any]], vectors: List[List[float]]):
|
| 35 |
+
"""Insert or update document chunks with their embeddings."""
|
| 36 |
+
points = [
|
| 37 |
+
PointStruct(
|
| 38 |
+
id=chunk['chunk_id'],
|
| 39 |
+
vector=vector,
|
| 40 |
+
payload=chunk
|
| 41 |
+
)
|
| 42 |
+
for chunk, vector in zip(chunks, vectors)
|
| 43 |
+
]
|
| 44 |
+
|
| 45 |
+
self.client.upsert(
|
| 46 |
+
collection_name=self.collection_name,
|
| 47 |
+
points=points
|
| 48 |
+
)
|
| 49 |
+
|
| 50 |
+
def search(
|
| 51 |
+
self,
|
| 52 |
+
query_vector: List[float],
|
| 53 |
+
limit: int = 5,
|
| 54 |
+
filters: Optional[Dict[str, Any]] = None
|
| 55 |
+
) -> List[SearchResult]:
|
| 56 |
+
"""Search for similar chunks."""
|
| 57 |
+
# Build filter if provided
|
| 58 |
+
search_filter = None
|
| 59 |
+
if filters:
|
| 60 |
+
conditions = []
|
| 61 |
+
if 'chapter' in filters:
|
| 62 |
+
conditions.append(
|
| 63 |
+
FieldCondition(
|
| 64 |
+
key="chapter_number",
|
| 65 |
+
match=MatchValue(value=filters['chapter'])
|
| 66 |
+
)
|
| 67 |
+
)
|
| 68 |
+
if conditions:
|
| 69 |
+
search_filter = Filter(must=conditions)
|
| 70 |
+
|
| 71 |
+
# Perform search using query_points
|
| 72 |
+
results = self.client.query_points(
|
| 73 |
+
collection_name=self.collection_name,
|
| 74 |
+
query=query_vector,
|
| 75 |
+
limit=limit,
|
| 76 |
+
query_filter=search_filter
|
| 77 |
+
).points
|
| 78 |
+
|
| 79 |
+
# Convert to SearchResult models
|
| 80 |
+
return [
|
| 81 |
+
SearchResult(
|
| 82 |
+
chunk_id=result.payload['chunk_id'],
|
| 83 |
+
chapter_number=result.payload['chapter_number'],
|
| 84 |
+
chapter_title=result.payload['chapter_title'],
|
| 85 |
+
section_title=result.payload['section_title'],
|
| 86 |
+
content=result.payload['content'],
|
| 87 |
+
content_type=result.payload['content_type'],
|
| 88 |
+
url=result.payload['url'],
|
| 89 |
+
score=result.score
|
| 90 |
+
)
|
| 91 |
+
for result in results
|
| 92 |
+
]
|
| 93 |
+
|
| 94 |
+
def get_collection_info(self) -> Dict[str, Any]:
|
| 95 |
+
"""Get information about the collection."""
|
| 96 |
+
return self.client.get_collection(self.collection_name)
|
app/db/schema.sql
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
-- Conversations table
|
| 2 |
+
CREATE TABLE IF NOT EXISTS conversations (
|
| 3 |
+
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
| 4 |
+
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
|
| 5 |
+
metadata JSONB DEFAULT '{}'::jsonb
|
| 6 |
+
);
|
| 7 |
+
|
| 8 |
+
-- Messages table
|
| 9 |
+
CREATE TABLE IF NOT EXISTS messages (
|
| 10 |
+
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
| 11 |
+
conversation_id UUID REFERENCES conversations(id) ON DELETE CASCADE,
|
| 12 |
+
role VARCHAR(20) NOT NULL CHECK (role IN ('user', 'assistant')),
|
| 13 |
+
content TEXT NOT NULL,
|
| 14 |
+
context_used TEXT[],
|
| 15 |
+
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
|
| 16 |
+
metadata JSONB DEFAULT '{}'::jsonb
|
| 17 |
+
);
|
| 18 |
+
|
| 19 |
+
-- Indexes for performance
|
| 20 |
+
CREATE INDEX IF NOT EXISTS idx_messages_conversation_id ON messages(conversation_id);
|
| 21 |
+
CREATE INDEX IF NOT EXISTS idx_messages_created_at ON messages(created_at DESC);
|
| 22 |
+
CREATE INDEX IF NOT EXISTS idx_conversations_created_at ON conversations(created_at DESC);
|
app/main.py
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import FastAPI
|
| 2 |
+
from fastapi.middleware.cors import CORSMiddleware
|
| 3 |
+
from app.config import get_settings
|
| 4 |
+
from app.api.routes import chat, health
|
| 5 |
+
|
| 6 |
+
# Initialize settings
|
| 7 |
+
settings = get_settings()
|
| 8 |
+
|
| 9 |
+
# Create FastAPI app
|
| 10 |
+
app = FastAPI(
|
| 11 |
+
title="Physical AI RAG Backend",
|
| 12 |
+
description="RAG-powered chatbot backend for Physical AI textbook",
|
| 13 |
+
version="1.0.0"
|
| 14 |
+
)
|
| 15 |
+
|
| 16 |
+
# Configure CORS
|
| 17 |
+
app.add_middleware(
|
| 18 |
+
CORSMiddleware,
|
| 19 |
+
allow_origins=[settings.frontend_url, "http://localhost:3000", "https://*.vercel.app"],
|
| 20 |
+
allow_credentials=True,
|
| 21 |
+
allow_methods=["*"],
|
| 22 |
+
allow_headers=["*"],
|
| 23 |
+
)
|
| 24 |
+
|
| 25 |
+
# Include routers
|
| 26 |
+
app.include_router(health.router, prefix="/api", tags=["health"])
|
| 27 |
+
app.include_router(chat.router, prefix="/api/chat", tags=["chat"])
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
@app.get("/")
|
| 31 |
+
async def root():
|
| 32 |
+
"""Root endpoint."""
|
| 33 |
+
return {
|
| 34 |
+
"message": "Physical AI RAG Backend",
|
| 35 |
+
"version": "1.0.0",
|
| 36 |
+
"docs": "/docs"
|
| 37 |
+
}
|
app/models/__init__.py
ADDED
|
File without changes
|
app/models/chat.py
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pydantic import BaseModel, Field
|
| 2 |
+
from typing import Optional, List, Dict, Any
|
| 3 |
+
from datetime import datetime
|
| 4 |
+
from uuid import UUID
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
class ChatQuery(BaseModel):
|
| 8 |
+
"""Request model for chat queries."""
|
| 9 |
+
query: str = Field(..., min_length=1, max_length=1000)
|
| 10 |
+
conversation_id: Optional[UUID] = None
|
| 11 |
+
filters: Optional[Dict[str, Any]] = None
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
class ChatQueryWithContext(BaseModel):
|
| 15 |
+
"""Request model for chat queries with selected text context."""
|
| 16 |
+
query: str = Field(..., min_length=1, max_length=1000)
|
| 17 |
+
selected_text: Optional[str] = None
|
| 18 |
+
selection_metadata: Optional[Dict[str, Any]] = None
|
| 19 |
+
conversation_id: Optional[UUID] = None
|
| 20 |
+
filters: Optional[Dict[str, Any]] = None
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
class SourceReference(BaseModel):
|
| 24 |
+
"""Reference to a source document chunk."""
|
| 25 |
+
chapter_number: int
|
| 26 |
+
chapter_title: str
|
| 27 |
+
section_title: str
|
| 28 |
+
url: str
|
| 29 |
+
relevance_score: float
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
class ChatResponse(BaseModel):
|
| 33 |
+
"""Response model for chat queries."""
|
| 34 |
+
answer: str
|
| 35 |
+
sources: List[SourceReference]
|
| 36 |
+
conversation_id: UUID
|
| 37 |
+
message_id: UUID
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
class Message(BaseModel):
|
| 41 |
+
"""Message model."""
|
| 42 |
+
id: UUID
|
| 43 |
+
conversation_id: UUID
|
| 44 |
+
role: str
|
| 45 |
+
content: str
|
| 46 |
+
context_used: Optional[List[str]] = None
|
| 47 |
+
created_at: datetime
|
| 48 |
+
metadata: Optional[Dict[str, Any]] = None
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
class Conversation(BaseModel):
|
| 52 |
+
"""Conversation model."""
|
| 53 |
+
id: UUID
|
| 54 |
+
created_at: datetime
|
| 55 |
+
metadata: Optional[Dict[str, Any]] = None
|
| 56 |
+
messages: Optional[List[Message]] = None
|
app/models/document.py
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pydantic import BaseModel
|
| 2 |
+
from typing import Optional, List
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
class DocumentChunk(BaseModel):
|
| 6 |
+
"""Model for a document chunk to be indexed."""
|
| 7 |
+
chunk_id: str
|
| 8 |
+
chapter_number: int
|
| 9 |
+
chapter_title: str
|
| 10 |
+
section_title: str
|
| 11 |
+
content: str
|
| 12 |
+
content_type: str # text, code, callout, quiz
|
| 13 |
+
url: str
|
| 14 |
+
keywords: Optional[List[str]] = None
|
| 15 |
+
word_count: int
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
class SearchResult(BaseModel):
|
| 19 |
+
"""Model for a search result from Qdrant."""
|
| 20 |
+
chunk_id: str
|
| 21 |
+
chapter_number: int
|
| 22 |
+
chapter_title: str
|
| 23 |
+
section_title: str
|
| 24 |
+
content: str
|
| 25 |
+
content_type: str
|
| 26 |
+
url: str
|
| 27 |
+
score: float
|
app/services/__init__.py
ADDED
|
File without changes
|
app/services/embeddings.py
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import cohere
|
| 2 |
+
from typing import List
|
| 3 |
+
from app.config import get_settings
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
class EmbeddingService:
|
| 7 |
+
"""Service for generating embeddings using Cohere."""
|
| 8 |
+
|
| 9 |
+
def __init__(self):
|
| 10 |
+
self.settings = get_settings()
|
| 11 |
+
self.client = cohere.Client(self.settings.cohere_api_key)
|
| 12 |
+
self.model = self.settings.cohere_embed_model
|
| 13 |
+
|
| 14 |
+
def embed_text(self, text: str) -> List[float]:
|
| 15 |
+
"""Generate embedding for a single text."""
|
| 16 |
+
response = self.client.embed(
|
| 17 |
+
texts=[text],
|
| 18 |
+
model=self.model,
|
| 19 |
+
input_type="search_document"
|
| 20 |
+
)
|
| 21 |
+
return response.embeddings[0]
|
| 22 |
+
|
| 23 |
+
def embed_texts(self, texts: List[str]) -> List[List[float]]:
|
| 24 |
+
"""Generate embeddings for multiple texts."""
|
| 25 |
+
response = self.client.embed(
|
| 26 |
+
texts=texts,
|
| 27 |
+
model=self.model,
|
| 28 |
+
input_type="search_document"
|
| 29 |
+
)
|
| 30 |
+
return response.embeddings
|
| 31 |
+
|
| 32 |
+
def embed_query(self, query: str) -> List[float]:
|
| 33 |
+
"""Generate embedding for a search query."""
|
| 34 |
+
response = self.client.embed(
|
| 35 |
+
texts=[query],
|
| 36 |
+
model=self.model,
|
| 37 |
+
input_type="search_query"
|
| 38 |
+
)
|
| 39 |
+
return response.embeddings[0]
|
app/services/generation.py
ADDED
|
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import cohere
|
| 2 |
+
from typing import List
|
| 3 |
+
from app.config import get_settings
|
| 4 |
+
from app.models.document import SearchResult
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
class GenerationService:
|
| 8 |
+
"""Service for generating responses using Cohere."""
|
| 9 |
+
|
| 10 |
+
def __init__(self):
|
| 11 |
+
self.settings = get_settings()
|
| 12 |
+
self.client = cohere.Client(self.settings.cohere_api_key)
|
| 13 |
+
self.model = self.settings.cohere_generation_model
|
| 14 |
+
|
| 15 |
+
def generate_response(
|
| 16 |
+
self,
|
| 17 |
+
query: str,
|
| 18 |
+
retrieved_chunks: List[SearchResult],
|
| 19 |
+
selected_text: str = None,
|
| 20 |
+
conversation_history: List[dict] = None
|
| 21 |
+
) -> str:
|
| 22 |
+
"""Generate a response based on retrieved context."""
|
| 23 |
+
# Build context from retrieved chunks
|
| 24 |
+
context_parts = []
|
| 25 |
+
for i, chunk in enumerate(retrieved_chunks, 1):
|
| 26 |
+
context_parts.append(
|
| 27 |
+
f"[Source {i}: {chunk.chapter_title} - {chunk.section_title}]\n{chunk.content}"
|
| 28 |
+
)
|
| 29 |
+
context = "\n\n".join(context_parts)
|
| 30 |
+
|
| 31 |
+
# Build prompt
|
| 32 |
+
system_prompt = """You are an AI teaching assistant for the Physical AI and Humanoid Robotics textbook.
|
| 33 |
+
|
| 34 |
+
CRITICAL RULES - YOU MUST FOLLOW THESE:
|
| 35 |
+
1. ALWAYS provide a direct, complete answer to the user's question
|
| 36 |
+
2. NEVER ask questions back to the user (NO "Could you clarify...", NO "What specifically...", NO "Please specify...")
|
| 37 |
+
3. If the question is vague, make reasonable assumptions and answer based on the most relevant information in the context
|
| 38 |
+
4. If the answer is not in the context, say "I don't have information about that in the textbook" - but still DON'T ask questions
|
| 39 |
+
5. Provide educational, clear, and concise answers
|
| 40 |
+
6. Use technical terms appropriately and explain them when needed
|
| 41 |
+
7. For code-related questions, provide relevant code snippets from the context
|
| 42 |
+
|
| 43 |
+
Remember: Your job is to ANSWER, not to ask for clarification. Always give the best answer you can based on the available context."""
|
| 44 |
+
|
| 45 |
+
user_prompt = f"""Context from the textbook:
|
| 46 |
+
{context}
|
| 47 |
+
"""
|
| 48 |
+
|
| 49 |
+
if selected_text:
|
| 50 |
+
user_prompt += f"\nUser selected this text: \"{selected_text}\"\n"
|
| 51 |
+
|
| 52 |
+
user_prompt += f"\nQuestion: {query}\n\nAnswer based on the context above:"
|
| 53 |
+
|
| 54 |
+
# Build chat history
|
| 55 |
+
chat_history = []
|
| 56 |
+
if conversation_history:
|
| 57 |
+
for msg in conversation_history[-5:]: # Last 5 messages for context
|
| 58 |
+
# Map roles to Cohere's expected format
|
| 59 |
+
role_mapping = {
|
| 60 |
+
"user": "User",
|
| 61 |
+
"assistant": "Chatbot"
|
| 62 |
+
}
|
| 63 |
+
cohere_role = role_mapping.get(msg['role'], "User")
|
| 64 |
+
chat_history.append({
|
| 65 |
+
"role": cohere_role,
|
| 66 |
+
"message": msg['content']
|
| 67 |
+
})
|
| 68 |
+
|
| 69 |
+
# Generate response
|
| 70 |
+
response = self.client.chat(
|
| 71 |
+
model=self.model,
|
| 72 |
+
message=user_prompt,
|
| 73 |
+
chat_history=chat_history,
|
| 74 |
+
preamble=system_prompt,
|
| 75 |
+
temperature=0.3,
|
| 76 |
+
max_tokens=1000
|
| 77 |
+
)
|
| 78 |
+
|
| 79 |
+
return response.text
|
app/services/rag_pipeline.py
ADDED
|
@@ -0,0 +1,88 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import Optional, Dict, Any
|
| 2 |
+
from uuid import UUID
|
| 3 |
+
from app.services.retrieval import RetrievalService
|
| 4 |
+
from app.services.generation import GenerationService
|
| 5 |
+
from app.db.postgres import PostgresDB
|
| 6 |
+
from app.models.chat import ChatResponse, SourceReference
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
class RAGPipeline:
|
| 10 |
+
"""Main RAG pipeline orchestrating retrieval and generation."""
|
| 11 |
+
|
| 12 |
+
def __init__(self):
|
| 13 |
+
self.retrieval = RetrievalService()
|
| 14 |
+
self.generation = GenerationService()
|
| 15 |
+
self.db = PostgresDB()
|
| 16 |
+
|
| 17 |
+
def process_query(
|
| 18 |
+
self,
|
| 19 |
+
query: str,
|
| 20 |
+
conversation_id: Optional[UUID] = None,
|
| 21 |
+
selected_text: Optional[str] = None,
|
| 22 |
+
filters: Optional[Dict[str, Any]] = None
|
| 23 |
+
) -> ChatResponse:
|
| 24 |
+
"""Process a user query through the RAG pipeline."""
|
| 25 |
+
|
| 26 |
+
# Create conversation if needed
|
| 27 |
+
if not conversation_id:
|
| 28 |
+
conversation_id = self.db.create_conversation()
|
| 29 |
+
|
| 30 |
+
# Get conversation history
|
| 31 |
+
conversation_history = self.db.get_conversation_history(conversation_id)
|
| 32 |
+
|
| 33 |
+
# Retrieve relevant chunks
|
| 34 |
+
retrieved_chunks = self.retrieval.retrieve(
|
| 35 |
+
query=query,
|
| 36 |
+
limit=5,
|
| 37 |
+
filters=filters
|
| 38 |
+
)
|
| 39 |
+
|
| 40 |
+
# Generate response
|
| 41 |
+
answer = self.generation.generate_response(
|
| 42 |
+
query=query,
|
| 43 |
+
retrieved_chunks=retrieved_chunks,
|
| 44 |
+
selected_text=selected_text,
|
| 45 |
+
conversation_history=conversation_history
|
| 46 |
+
)
|
| 47 |
+
|
| 48 |
+
# Store user message
|
| 49 |
+
self.db.add_message(
|
| 50 |
+
conversation_id=conversation_id,
|
| 51 |
+
role="user",
|
| 52 |
+
content=query,
|
| 53 |
+
metadata={
|
| 54 |
+
"selected_text": selected_text,
|
| 55 |
+
"filters": filters
|
| 56 |
+
}
|
| 57 |
+
)
|
| 58 |
+
|
| 59 |
+
# Store assistant message
|
| 60 |
+
context_used = [chunk.chunk_id for chunk in retrieved_chunks]
|
| 61 |
+
message_id = self.db.add_message(
|
| 62 |
+
conversation_id=conversation_id,
|
| 63 |
+
role="assistant",
|
| 64 |
+
content=answer,
|
| 65 |
+
context_used=context_used
|
| 66 |
+
)
|
| 67 |
+
|
| 68 |
+
# Build source references with localhost URLs for development
|
| 69 |
+
sources = [
|
| 70 |
+
SourceReference(
|
| 71 |
+
chapter_number=chunk.chapter_number,
|
| 72 |
+
chapter_title=chunk.chapter_title,
|
| 73 |
+
section_title=chunk.section_title,
|
| 74 |
+
url=chunk.url.replace(
|
| 75 |
+
"https://physical-ai-textbook.vercel.app",
|
| 76 |
+
"http://localhost:3000"
|
| 77 |
+
),
|
| 78 |
+
relevance_score=chunk.score
|
| 79 |
+
)
|
| 80 |
+
for chunk in retrieved_chunks
|
| 81 |
+
]
|
| 82 |
+
|
| 83 |
+
return ChatResponse(
|
| 84 |
+
answer=answer,
|
| 85 |
+
sources=sources,
|
| 86 |
+
conversation_id=conversation_id,
|
| 87 |
+
message_id=message_id
|
| 88 |
+
)
|
app/services/retrieval.py
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import List, Optional, Dict, Any
|
| 2 |
+
from app.db.qdrant import QdrantDB
|
| 3 |
+
from app.services.embeddings import EmbeddingService
|
| 4 |
+
from app.models.document import SearchResult
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
class RetrievalService:
|
| 8 |
+
"""Service for retrieving relevant document chunks."""
|
| 9 |
+
|
| 10 |
+
def __init__(self):
|
| 11 |
+
self.qdrant = QdrantDB()
|
| 12 |
+
self.embeddings = EmbeddingService()
|
| 13 |
+
|
| 14 |
+
def retrieve(
|
| 15 |
+
self,
|
| 16 |
+
query: str,
|
| 17 |
+
limit: int = 5,
|
| 18 |
+
filters: Optional[Dict[str, Any]] = None
|
| 19 |
+
) -> List[SearchResult]:
|
| 20 |
+
"""Retrieve relevant chunks for a query."""
|
| 21 |
+
# Generate query embedding
|
| 22 |
+
query_vector = self.embeddings.embed_query(query)
|
| 23 |
+
|
| 24 |
+
# Search in Qdrant
|
| 25 |
+
results = self.qdrant.search(
|
| 26 |
+
query_vector=query_vector,
|
| 27 |
+
limit=limit,
|
| 28 |
+
filters=filters
|
| 29 |
+
)
|
| 30 |
+
|
| 31 |
+
return results
|
requirements.txt
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
fastapi
|
| 2 |
+
uvicorn
|
| 3 |
+
python-dotenv
|
| 4 |
+
pydantic
|
| 5 |
+
pydantic-settings
|
| 6 |
+
cohere
|
| 7 |
+
qdrant-client
|
| 8 |
+
psycopg2-binary
|
| 9 |
+
sqlalchemy
|
| 10 |
+
python-multipart
|
| 11 |
+
httpx
|