Spaces:
Sleeping
Sleeping
Commit ·
2ade705
1
Parent(s): f850e5a
Redeploy backend
Browse files- .README.md.swp +0 -0
- .env +8 -9
- .env.example +6 -7
- backend.log +0 -0
- book_ingestor.egg-info/PKG-INFO +24 -49
- rag_agent_api/README.md +9 -9
- rag_agent_api/__init__.py +2 -2
- rag_agent_api/__pycache__/__init__.cpython-313.pyc +0 -0
- rag_agent_api/__pycache__/config.cpython-313.pyc +0 -0
- rag_agent_api/__pycache__/main.cpython-313.pyc +0 -0
- rag_agent_api/__pycache__/openrouter_agent.cpython-313.pyc +0 -0
- rag_agent_api/config.py +4 -1
- rag_agent_api/main.py +10 -10
- rag_agent_api/{agent.py → openrouter_agent.py} +50 -51
- requirements.txt +1 -1
- tests/test_integration.py +21 -18
.README.md.swp
ADDED
|
Binary file (1.02 kB). View file
|
|
|
.env
CHANGED
|
@@ -1,20 +1,19 @@
|
|
| 1 |
-
COHERE_API_KEY=
|
| 2 |
-
QDRANT_URL=
|
| 3 |
-
QDRANT_API_KEY=
|
| 4 |
-
REACT_APP_RAG_API_URL=
|
| 5 |
# RAG Agent and API Layer Environment Variables
|
| 6 |
|
| 7 |
# OpenAI API Configuration
|
| 8 |
-
|
| 9 |
-
GEMINI_API_KEY="AIzaSyDM79Xi6rsffqHrwVOlc3FwPnk9pwBh9OI"
|
| 10 |
|
| 11 |
# Qdrant Configuration
|
| 12 |
-
QDRANT_URL=
|
| 13 |
-
QDRANT_API_KEY=
|
| 14 |
QDRANT_COLLECTION_NAME=rag_embedding
|
| 15 |
|
| 16 |
# Cohere Configuration (for query embeddings)
|
| 17 |
-
COHERE_API_KEY=
|
| 18 |
|
| 19 |
# Application Configuration
|
| 20 |
DEFAULT_CONTEXT_WINDOW=5
|
|
|
|
| 1 |
+
COHERE_API_KEY=Dq2dLJlwDOZwAg4K7XalSEC91kXnucGd52KmkJh7
|
| 2 |
+
QDRANT_URL=https://72888a6e-0dfc-4620-bf85-0b9025951e0c.us-east4-0.gcp.cloud.qdrant.io:6333
|
| 3 |
+
QDRANT_API_KEY=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhY2Nlc3MiOiJtIn0.BDBAtGf7x_XGCu3lO4-kNxgJeVgnSTKUjHeZBT6qJkQ
|
| 4 |
+
REACT_APP_RAG_API_URL=http://localhost:8000
|
| 5 |
# RAG Agent and API Layer Environment Variables
|
| 6 |
|
| 7 |
# OpenAI API Configuration
|
| 8 |
+
OPENROUTER_API_KEY=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
|
|
|
|
| 9 |
|
| 10 |
# Qdrant Configuration
|
| 11 |
+
QDRANT_URL=https://72888a6e-0dfc-4620-bf85-0b9025951e0c.us-east4-0.gcp.cloud.qdrant.io:6333
|
| 12 |
+
QDRANT_API_KEY=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhY2Nlc3MiOiJtIn0.BDBAtGf7x_XGCu3lO4-kNxgJeVgnSTKUjHeZBT6qJkQ
|
| 13 |
QDRANT_COLLECTION_NAME=rag_embedding
|
| 14 |
|
| 15 |
# Cohere Configuration (for query embeddings)
|
| 16 |
+
COHERE_API_KEY=Dq2dLJlwDOZwAg4K7XalSEC91kXnucGd52KmkJh7
|
| 17 |
|
| 18 |
# Application Configuration
|
| 19 |
DEFAULT_CONTEXT_WINDOW=5
|
.env.example
CHANGED
|
@@ -1,15 +1,14 @@
|
|
| 1 |
# RAG Agent and API Layer Environment Variables
|
| 2 |
|
| 3 |
-
#
|
| 4 |
-
|
| 5 |
-
GEMINI_API_KEY="AIzaSyDM79Xi6rsffqHrwVOlc3FwPnk9pwBh9OI"
|
| 6 |
# Qdrant Configuration
|
| 7 |
-
QDRANT_URL=
|
| 8 |
-
QDRANT_API_KEY=
|
| 9 |
QDRANT_COLLECTION_NAME=rag_embedding
|
| 10 |
-
REACT_APP_RAG_API_URL=
|
| 11 |
# Cohere Configuration (for query embeddings)
|
| 12 |
-
COHERE_API_KEY=
|
| 13 |
|
| 14 |
# Application Configuration
|
| 15 |
DEFAULT_CONTEXT_WINDOW=5
|
|
|
|
| 1 |
# RAG Agent and API Layer Environment Variables
|
| 2 |
|
| 3 |
+
# OpenRouter API Configuration
|
| 4 |
+
OPENROUTER_API_KEY=your-openrouter-api-key-here
|
|
|
|
| 5 |
# Qdrant Configuration
|
| 6 |
+
QDRANT_URL=https://72888a6e-0dfc-4620-bf85-0b9025951e0c.us-east4-0.gcp.cloud.qdrant.io:6333
|
| 7 |
+
QDRANT_API_KEY=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhY2Nlc3MiOiJtIn0.BDBAtGf7x_XGCu3lO4-kNxgJeVgnSTKUjHeZBT6qJkQ
|
| 8 |
QDRANT_COLLECTION_NAME=rag_embedding
|
| 9 |
+
REACT_APP_RAG_API_URL=http://localhost:8000
|
| 10 |
# Cohere Configuration (for query embeddings)
|
| 11 |
+
COHERE_API_KEY=Dq2dLJlwDOZwAg4K7XalSEC91kXnucGd52KmkJh7
|
| 12 |
|
| 13 |
# Application Configuration
|
| 14 |
DEFAULT_CONTEXT_WINDOW=5
|
backend.log
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
book_ingestor.egg-info/PKG-INFO
CHANGED
|
@@ -14,60 +14,35 @@ Requires-Dist: uvicorn>=0.24.0
|
|
| 14 |
Requires-Dist: openai>=1.0.0
|
| 15 |
Requires-Dist: pydantic>=2.0.0
|
| 16 |
|
| 17 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
|
| 19 |
-
|
| 20 |
|
| 21 |
-
|
| 22 |
|
| 23 |
-
|
| 24 |
-
```bash
|
| 25 |
-
cd backend
|
| 26 |
-
uv sync
|
| 27 |
-
```
|
| 28 |
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
|
|
|
| 34 |
|
| 35 |
-
##
|
| 36 |
|
| 37 |
-
|
| 38 |
-
- `
|
| 39 |
-
- `
|
| 40 |
-
- `
|
| 41 |
|
| 42 |
-
##
|
| 43 |
|
| 44 |
-
|
| 45 |
-
```bash
|
| 46 |
-
cd backend
|
| 47 |
-
uv run python main.py
|
| 48 |
-
```
|
| 49 |
-
|
| 50 |
-
This will:
|
| 51 |
-
1. Collect all URLs from the target book (https://sanilahmed.github.io/hackathon-ai-book/)
|
| 52 |
-
2. Extract text content from each URL
|
| 53 |
-
3. Chunk the content into fixed-size segments
|
| 54 |
-
4. Generate embeddings using Cohere
|
| 55 |
-
5. Store embeddings with metadata in Qdrant Cloud collection named "rag_embedding"
|
| 56 |
-
|
| 57 |
-
### Run the verification pipeline:
|
| 58 |
-
```bash
|
| 59 |
-
cd backend
|
| 60 |
-
python -m verify_retrieval.main
|
| 61 |
-
```
|
| 62 |
-
|
| 63 |
-
Or with specific options:
|
| 64 |
-
```bash
|
| 65 |
-
python -m verify_retrieval.main --query "transformer architecture in NLP" --top-k 10
|
| 66 |
-
```
|
| 67 |
-
|
| 68 |
-
The verification system will:
|
| 69 |
-
1. Load vectors and metadata stored in Qdrant from the original ingestion
|
| 70 |
-
2. Implement retrieval functions to query Qdrant using sample keywords or phrases
|
| 71 |
-
3. Validate that retrieved chunks are accurate and relevant
|
| 72 |
-
4. Check that metadata (URL, title, chunk_id) matches source content
|
| 73 |
-
5. Log results and confirm the pipeline executes end-to-end without errors
|
|
|
|
| 14 |
Requires-Dist: openai>=1.0.0
|
| 15 |
Requires-Dist: pydantic>=2.0.0
|
| 16 |
|
| 17 |
+
---
|
| 18 |
+
title: Backend Deploy
|
| 19 |
+
emoji: 🚀
|
| 20 |
+
colorFrom: blue
|
| 21 |
+
colorTo: purple
|
| 22 |
+
sdk: docker
|
| 23 |
+
pinned: false
|
| 24 |
+
---
|
| 25 |
|
| 26 |
+
# RAG Agent and API Layer
|
| 27 |
|
| 28 |
+
This is a FastAPI application that provides a question-answering API using Gemini agents and Qdrant retrieval for RAG (Retrieval Augmented Generation) functionality.
|
| 29 |
|
| 30 |
+
## API Endpoints
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
|
| 32 |
+
- `GET /` - Root endpoint with API information
|
| 33 |
+
- `POST /ask` - Main question-answering endpoint
|
| 34 |
+
- `GET /health` - Health check endpoint
|
| 35 |
+
- `GET /ready` - Readiness check endpoint
|
| 36 |
+
- `/docs` - API documentation (Swagger UI)
|
| 37 |
+
- `/redoc` - API documentation (Redoc)
|
| 38 |
|
| 39 |
+
## Configuration
|
| 40 |
|
| 41 |
+
The application requires the following environment variables:
|
| 42 |
+
- `GEMINI_API_KEY` - API key for Google Gemini
|
| 43 |
+
- `QDRANT_URL` - URL for Qdrant vector database
|
| 44 |
+
- `QDRANT_API_KEY` - API key for Qdrant database
|
| 45 |
|
| 46 |
+
## Deployment
|
| 47 |
|
| 48 |
+
This application is configured for deployment on Hugging Face Spaces using Docker.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
rag_agent_api/README.md
CHANGED
|
@@ -1,17 +1,17 @@
|
|
| 1 |
# RAG Agent and API Layer
|
| 2 |
|
| 3 |
-
A FastAPI-based question-answering system that uses
|
| 4 |
|
| 5 |
## Overview
|
| 6 |
|
| 7 |
-
The RAG Agent and API Layer provides a question-answering API that retrieves relevant content from Qdrant and uses an
|
| 8 |
|
| 9 |
## Architecture
|
| 10 |
|
| 11 |
The system consists of several key components:
|
| 12 |
|
| 13 |
- **FastAPI Application**: Main entry point for the question-answering API
|
| 14 |
-
- **
|
| 15 |
- **Qdrant Retriever**: Retrieves relevant content chunks from Qdrant database
|
| 16 |
- **Configuration Manager**: Handles environment variables and settings
|
| 17 |
- **Data Models**: Pydantic models for API requests/responses
|
|
@@ -22,7 +22,7 @@ The system consists of several key components:
|
|
| 22 |
### Prerequisites
|
| 23 |
|
| 24 |
- Python 3.9+
|
| 25 |
-
-
|
| 26 |
- Qdrant Cloud instance with book content embeddings
|
| 27 |
- Cohere API key (for query embeddings)
|
| 28 |
|
|
@@ -42,7 +42,7 @@ The system consists of several key components:
|
|
| 42 |
|
| 43 |
3. Edit `.env` with your API keys and configuration:
|
| 44 |
```env
|
| 45 |
-
|
| 46 |
QDRANT_URL=your-qdrant-instance-url
|
| 47 |
QDRANT_API_KEY=your-qdrant-api-key
|
| 48 |
QDRANT_COLLECTION_NAME=rag_embedding
|
|
@@ -103,7 +103,7 @@ Root endpoint with API information.
|
|
| 103 |
|
| 104 |
### Environment Variables
|
| 105 |
|
| 106 |
-
- `
|
| 107 |
- `QDRANT_URL`: URL of your Qdrant instance
|
| 108 |
- `QDRANT_API_KEY`: Your Qdrant API key
|
| 109 |
- `QDRANT_COLLECTION_NAME`: Name of the collection with book embeddings (default: `rag_embedding`)
|
|
@@ -123,8 +123,8 @@ Pydantic models for API request/response schemas.
|
|
| 123 |
### Schemas (`schemas.py`)
|
| 124 |
Additional schemas for internal data structures.
|
| 125 |
|
| 126 |
-
### Agent (`
|
| 127 |
-
|
| 128 |
|
| 129 |
### Retrieval (`retrieval.py`)
|
| 130 |
Qdrant integration for content retrieval with semantic search.
|
|
@@ -160,7 +160,7 @@ pytest
|
|
| 160 |
|
| 161 |
# Run specific test files
|
| 162 |
pytest tests/test_api.py
|
| 163 |
-
pytest tests/
|
| 164 |
pytest tests/test_retrieval.py
|
| 165 |
```
|
| 166 |
|
|
|
|
| 1 |
# RAG Agent and API Layer
|
| 2 |
|
| 3 |
+
A FastAPI-based question-answering system that uses OpenRouter Agents and Qdrant retrieval to generate grounded responses based on book content.
|
| 4 |
|
| 5 |
## Overview
|
| 6 |
|
| 7 |
+
The RAG Agent and API Layer provides a question-answering API that retrieves relevant content from Qdrant and uses an OpenRouter agent to generate accurate, source-grounded responses. The system ensures that all answers are based only on the provided context to prevent hallucinations.
|
| 8 |
|
| 9 |
## Architecture
|
| 10 |
|
| 11 |
The system consists of several key components:
|
| 12 |
|
| 13 |
- **FastAPI Application**: Main entry point for the question-answering API
|
| 14 |
+
- **OpenRouter Agent**: Generates responses based on retrieved context
|
| 15 |
- **Qdrant Retriever**: Retrieves relevant content chunks from Qdrant database
|
| 16 |
- **Configuration Manager**: Handles environment variables and settings
|
| 17 |
- **Data Models**: Pydantic models for API requests/responses
|
|
|
|
| 22 |
### Prerequisites
|
| 23 |
|
| 24 |
- Python 3.9+
|
| 25 |
+
- OpenRouter API key
|
| 26 |
- Qdrant Cloud instance with book content embeddings
|
| 27 |
- Cohere API key (for query embeddings)
|
| 28 |
|
|
|
|
| 42 |
|
| 43 |
3. Edit `.env` with your API keys and configuration:
|
| 44 |
```env
|
| 45 |
+
OPENROUTER_API_KEY=your-openrouter-api-key-here
|
| 46 |
QDRANT_URL=your-qdrant-instance-url
|
| 47 |
QDRANT_API_KEY=your-qdrant-api-key
|
| 48 |
QDRANT_COLLECTION_NAME=rag_embedding
|
|
|
|
| 103 |
|
| 104 |
### Environment Variables
|
| 105 |
|
| 106 |
+
- `OPENROUTER_API_KEY`: Your OpenRouter API key
|
| 107 |
- `QDRANT_URL`: URL of your Qdrant instance
|
| 108 |
- `QDRANT_API_KEY`: Your Qdrant API key
|
| 109 |
- `QDRANT_COLLECTION_NAME`: Name of the collection with book embeddings (default: `rag_embedding`)
|
|
|
|
| 123 |
### Schemas (`schemas.py`)
|
| 124 |
Additional schemas for internal data structures.
|
| 125 |
|
| 126 |
+
### Agent (`openrouter_agent.py`)
|
| 127 |
+
OpenRouter agent implementation with context injection and response validation.
|
| 128 |
|
| 129 |
### Retrieval (`retrieval.py`)
|
| 130 |
Qdrant integration for content retrieval with semantic search.
|
|
|
|
| 160 |
|
| 161 |
# Run specific test files
|
| 162 |
pytest tests/test_api.py
|
| 163 |
+
pytest tests/test_openrouter_agent.py
|
| 164 |
pytest tests/test_retrieval.py
|
| 165 |
```
|
| 166 |
|
rag_agent_api/__init__.py
CHANGED
|
@@ -10,7 +10,7 @@ __license__ = "MIT"
|
|
| 10 |
# Import main components for easy access
|
| 11 |
from .main import app
|
| 12 |
from .config import Config, get_config, validate_config
|
| 13 |
-
from .
|
| 14 |
from .retrieval import QdrantRetriever
|
| 15 |
|
| 16 |
# Define what gets imported with "from rag_agent_api import *"
|
|
@@ -19,6 +19,6 @@ __all__ = [
|
|
| 19 |
"Config",
|
| 20 |
"get_config",
|
| 21 |
"validate_config",
|
| 22 |
-
"
|
| 23 |
"QdrantRetriever"
|
| 24 |
]
|
|
|
|
| 10 |
# Import main components for easy access
|
| 11 |
from .main import app
|
| 12 |
from .config import Config, get_config, validate_config
|
| 13 |
+
from .openrouter_agent import OpenRouterAgent
|
| 14 |
from .retrieval import QdrantRetriever
|
| 15 |
|
| 16 |
# Define what gets imported with "from rag_agent_api import *"
|
|
|
|
| 19 |
"Config",
|
| 20 |
"get_config",
|
| 21 |
"validate_config",
|
| 22 |
+
"OpenRouterAgent",
|
| 23 |
"QdrantRetriever"
|
| 24 |
]
|
rag_agent_api/__pycache__/__init__.cpython-313.pyc
CHANGED
|
Binary files a/rag_agent_api/__pycache__/__init__.cpython-313.pyc and b/rag_agent_api/__pycache__/__init__.cpython-313.pyc differ
|
|
|
rag_agent_api/__pycache__/config.cpython-313.pyc
CHANGED
|
Binary files a/rag_agent_api/__pycache__/config.cpython-313.pyc and b/rag_agent_api/__pycache__/config.cpython-313.pyc differ
|
|
|
rag_agent_api/__pycache__/main.cpython-313.pyc
CHANGED
|
Binary files a/rag_agent_api/__pycache__/main.cpython-313.pyc and b/rag_agent_api/__pycache__/main.cpython-313.pyc differ
|
|
|
rag_agent_api/__pycache__/openrouter_agent.cpython-313.pyc
ADDED
|
Binary file (12.7 kB). View file
|
|
|
rag_agent_api/config.py
CHANGED
|
@@ -20,8 +20,8 @@ class Config:
|
|
| 20 |
def __init__(self):
|
| 21 |
"""Initialize configuration by loading environment variables."""
|
| 22 |
self.openai_api_key = os.getenv('OPENAI_API_KEY')
|
| 23 |
-
self.gemini_api_key = os.getenv('GEMINI_API_KEY')
|
| 24 |
self.cohere_api_key = os.getenv('COHERE_API_KEY')
|
|
|
|
| 25 |
self.qdrant_url = os.getenv('QDRANT_URL')
|
| 26 |
self.qdrant_api_key = os.getenv('QDRANT_API_KEY')
|
| 27 |
self.qdrant_collection_name = os.getenv('QDRANT_COLLECTION_NAME', 'rag_embedding')
|
|
@@ -38,6 +38,9 @@ class Config:
|
|
| 38 |
"""
|
| 39 |
errors = []
|
| 40 |
|
|
|
|
|
|
|
|
|
|
| 41 |
if not self.cohere_api_key:
|
| 42 |
errors.append("COHERE_API_KEY environment variable not set")
|
| 43 |
|
|
|
|
| 20 |
def __init__(self):
|
| 21 |
"""Initialize configuration by loading environment variables."""
|
| 22 |
self.openai_api_key = os.getenv('OPENAI_API_KEY')
|
|
|
|
| 23 |
self.cohere_api_key = os.getenv('COHERE_API_KEY')
|
| 24 |
+
self.openrouter_api_key = os.getenv('OPENROUTER_API_KEY')
|
| 25 |
self.qdrant_url = os.getenv('QDRANT_URL')
|
| 26 |
self.qdrant_api_key = os.getenv('QDRANT_API_KEY')
|
| 27 |
self.qdrant_collection_name = os.getenv('QDRANT_COLLECTION_NAME', 'rag_embedding')
|
|
|
|
| 38 |
"""
|
| 39 |
errors = []
|
| 40 |
|
| 41 |
+
if not self.openrouter_api_key:
|
| 42 |
+
errors.append("OPENROUTER_API_KEY environment variable not set")
|
| 43 |
+
|
| 44 |
if not self.cohere_api_key:
|
| 45 |
errors.append("COHERE_API_KEY environment variable not set")
|
| 46 |
|
rag_agent_api/main.py
CHANGED
|
@@ -13,7 +13,7 @@ from .config import validate_config, get_config
|
|
| 13 |
from .models import QueryRequest, APIResponse, ErrorResponse, HealthResponse
|
| 14 |
from .schemas import AgentResponse, AgentContext
|
| 15 |
from .utils import setup_logging, generate_response_id, format_timestamp, create_error_response
|
| 16 |
-
from .
|
| 17 |
from .retrieval import QdrantRetriever
|
| 18 |
|
| 19 |
|
|
@@ -41,7 +41,7 @@ setup_logging(config.log_level)
|
|
| 41 |
|
| 42 |
# Global instances
|
| 43 |
retriever: Optional[QdrantRetriever] = None
|
| 44 |
-
agent: Optional[
|
| 45 |
|
| 46 |
|
| 47 |
@app.on_event("startup")
|
|
@@ -56,10 +56,10 @@ async def startup_event():
|
|
| 56 |
|
| 57 |
# Initialize agent first (this doesn't require async initialization)
|
| 58 |
try:
|
| 59 |
-
agent =
|
| 60 |
-
logging.info("
|
| 61 |
except Exception as e:
|
| 62 |
-
logging.error(f"Failed to initialize
|
| 63 |
raise
|
| 64 |
|
| 65 |
# Initialize retriever (async operations will be handled in the methods themselves)
|
|
@@ -82,22 +82,22 @@ async def health_check() -> HealthResponse:
|
|
| 82 |
HealthResponse with status of services
|
| 83 |
"""
|
| 84 |
# Check if all required components are initialized
|
| 85 |
-
|
| 86 |
qdrant_status = "up" if retriever else "down"
|
| 87 |
agent_status = "up" if agent else "down"
|
| 88 |
|
| 89 |
# Determine overall status
|
| 90 |
overall_status = "healthy"
|
| 91 |
-
if
|
| 92 |
overall_status = "unhealthy"
|
| 93 |
-
elif
|
| 94 |
overall_status = "degraded"
|
| 95 |
|
| 96 |
return HealthResponse(
|
| 97 |
status=overall_status,
|
| 98 |
timestamp=format_timestamp(),
|
| 99 |
services={
|
| 100 |
-
"
|
| 101 |
"qdrant": qdrant_status,
|
| 102 |
"agent": agent_status
|
| 103 |
}
|
|
@@ -194,7 +194,7 @@ async def root() -> Dict[str, Any]:
|
|
| 194 |
return {
|
| 195 |
"message": "RAG Agent and API Layer",
|
| 196 |
"version": "1.0.0",
|
| 197 |
-
"description": "Question-answering API using
|
| 198 |
"endpoints": {
|
| 199 |
"POST /ask": "Main question-answering endpoint",
|
| 200 |
"GET /health": "Health check endpoint",
|
|
|
|
| 13 |
from .models import QueryRequest, APIResponse, ErrorResponse, HealthResponse
|
| 14 |
from .schemas import AgentResponse, AgentContext
|
| 15 |
from .utils import setup_logging, generate_response_id, format_timestamp, create_error_response
|
| 16 |
+
from .openrouter_agent import OpenRouterAgent
|
| 17 |
from .retrieval import QdrantRetriever
|
| 18 |
|
| 19 |
|
|
|
|
| 41 |
|
| 42 |
# Global instances
|
| 43 |
retriever: Optional[QdrantRetriever] = None
|
| 44 |
+
agent: Optional[OpenRouterAgent] = None
|
| 45 |
|
| 46 |
|
| 47 |
@app.on_event("startup")
|
|
|
|
| 56 |
|
| 57 |
# Initialize agent first (this doesn't require async initialization)
|
| 58 |
try:
|
| 59 |
+
agent = OpenRouterAgent()
|
| 60 |
+
logging.info("OpenRouter agent initialized successfully")
|
| 61 |
except Exception as e:
|
| 62 |
+
logging.error(f"Failed to initialize OpenRouter agent: {e}")
|
| 63 |
raise
|
| 64 |
|
| 65 |
# Initialize retriever (async operations will be handled in the methods themselves)
|
|
|
|
| 82 |
HealthResponse with status of services
|
| 83 |
"""
|
| 84 |
# Check if all required components are initialized
|
| 85 |
+
openrouter_status = "up" if agent else "down"
|
| 86 |
qdrant_status = "up" if retriever else "down"
|
| 87 |
agent_status = "up" if agent else "down"
|
| 88 |
|
| 89 |
# Determine overall status
|
| 90 |
overall_status = "healthy"
|
| 91 |
+
if openrouter_status == "down" or qdrant_status == "down":
|
| 92 |
overall_status = "unhealthy"
|
| 93 |
+
elif openrouter_status == "degraded" or qdrant_status == "degraded":
|
| 94 |
overall_status = "degraded"
|
| 95 |
|
| 96 |
return HealthResponse(
|
| 97 |
status=overall_status,
|
| 98 |
timestamp=format_timestamp(),
|
| 99 |
services={
|
| 100 |
+
"openrouter": openrouter_status,
|
| 101 |
"qdrant": qdrant_status,
|
| 102 |
"agent": agent_status
|
| 103 |
}
|
|
|
|
| 194 |
return {
|
| 195 |
"message": "RAG Agent and API Layer",
|
| 196 |
"version": "1.0.0",
|
| 197 |
+
"description": "Question-answering API using OpenRouter Agents and Qdrant retrieval",
|
| 198 |
"endpoints": {
|
| 199 |
"POST /ask": "Main question-answering endpoint",
|
| 200 |
"GET /health": "Health check endpoint",
|
rag_agent_api/{agent.py → openrouter_agent.py}
RENAMED
|
@@ -1,44 +1,41 @@
|
|
| 1 |
"""
|
| 2 |
-
|
| 3 |
|
| 4 |
-
This module provides functionality for creating and managing
|
| 5 |
that generates responses based on retrieved context.
|
| 6 |
"""
|
| 7 |
import asyncio
|
| 8 |
import logging
|
| 9 |
from typing import List, Dict, Any, Optional
|
| 10 |
-
import
|
| 11 |
from .config import get_config
|
| 12 |
from .schemas import AgentContext, AgentResponse, SourceChunkSchema
|
| 13 |
from .utils import format_confidence_score
|
| 14 |
|
| 15 |
|
| 16 |
-
class
|
| 17 |
"""
|
| 18 |
-
A class to manage the
|
| 19 |
"""
|
| 20 |
-
def __init__(self, model_name: str = "
|
| 21 |
"""
|
| 22 |
-
Initialize the
|
| 23 |
|
| 24 |
Args:
|
| 25 |
-
model_name: Name of the
|
| 26 |
"""
|
| 27 |
config = get_config()
|
| 28 |
-
api_key = config.
|
| 29 |
|
| 30 |
if not api_key:
|
| 31 |
-
raise ValueError("
|
| 32 |
|
| 33 |
-
|
| 34 |
-
genai.configure(api_key=api_key)
|
| 35 |
-
|
| 36 |
-
# Create the generative model instance
|
| 37 |
-
self.model = genai.GenerativeModel(model_name)
|
| 38 |
self.model_name = model_name
|
|
|
|
| 39 |
self.default_temperature = config.default_temperature
|
| 40 |
|
| 41 |
-
logging.info(f"
|
| 42 |
|
| 43 |
async def generate_response(self, context: AgentContext) -> AgentResponse:
|
| 44 |
"""
|
|
@@ -80,23 +77,43 @@ class GeminiAgent:
|
|
| 80 |
# Prepare the user message with the query
|
| 81 |
user_message = self._create_user_message(context)
|
| 82 |
|
| 83 |
-
#
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 95 |
}
|
| 96 |
-
)
|
| 97 |
|
| 98 |
-
|
| 99 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 100 |
|
| 101 |
# If the response indicates no information was found, return the exact message
|
| 102 |
if "I could not find this information in the book" in raw_response:
|
|
@@ -134,7 +151,7 @@ class GeminiAgent:
|
|
| 134 |
return agent_response
|
| 135 |
|
| 136 |
except Exception as e:
|
| 137 |
-
logging.error(f"Error generating response from
|
| 138 |
# Return the specific message when there's an error
|
| 139 |
return AgentResponse(
|
| 140 |
raw_response="I could not find this information in the book.",
|
|
@@ -197,20 +214,6 @@ QUESTION:
|
|
| 197 |
|
| 198 |
return "\n".join(formatted_chunks)
|
| 199 |
|
| 200 |
-
def _create_context_messages(self, context: AgentContext) -> List[Dict[str, str]]:
|
| 201 |
-
"""
|
| 202 |
-
Create context messages from the retrieved chunks.
|
| 203 |
-
With the new format, context is now provided in the user message,
|
| 204 |
-
so this method returns an empty list to avoid duplication.
|
| 205 |
-
|
| 206 |
-
Args:
|
| 207 |
-
context: AgentContext containing the query and retrieved context chunks
|
| 208 |
-
|
| 209 |
-
Returns:
|
| 210 |
-
Empty list since context is now in user message
|
| 211 |
-
"""
|
| 212 |
-
return []
|
| 213 |
-
|
| 214 |
def _identify_used_sources(self, response: str, chunks: List[SourceChunkSchema]) -> List[str]:
|
| 215 |
"""
|
| 216 |
Identify which sources were likely used in the response.
|
|
@@ -356,8 +359,4 @@ QUESTION:
|
|
| 356 |
return True
|
| 357 |
|
| 358 |
# In a more sophisticated implementation, you'd validate against the context more rigorously
|
| 359 |
-
return True
|
| 360 |
-
|
| 361 |
-
|
| 362 |
-
# Global agent instance (if needed)
|
| 363 |
-
# agent_instance = OpenAIAgent()
|
|
|
|
| 1 |
"""
|
| 2 |
+
OpenRouter Agent module for the RAG Agent and API Layer system.
|
| 3 |
|
| 4 |
+
This module provides functionality for creating and managing an OpenRouter agent
|
| 5 |
that generates responses based on retrieved context.
|
| 6 |
"""
|
| 7 |
import asyncio
|
| 8 |
import logging
|
| 9 |
from typing import List, Dict, Any, Optional
|
| 10 |
+
import httpx
|
| 11 |
from .config import get_config
|
| 12 |
from .schemas import AgentContext, AgentResponse, SourceChunkSchema
|
| 13 |
from .utils import format_confidence_score
|
| 14 |
|
| 15 |
|
| 16 |
+
class OpenRouterAgent:
|
| 17 |
"""
|
| 18 |
+
A class to manage the OpenRouter agent for generating responses based on context.
|
| 19 |
"""
|
| 20 |
+
def __init__(self, model_name: str = "arcee-ai/trinity-mini:free"):
|
| 21 |
"""
|
| 22 |
+
Initialize the OpenRouter agent with configuration.
|
| 23 |
|
| 24 |
Args:
|
| 25 |
+
model_name: Name of the OpenRouter model to use (default: arcee-ai/trinity-mini:free)
|
| 26 |
"""
|
| 27 |
config = get_config()
|
| 28 |
+
api_key = config.openrouter_api_key
|
| 29 |
|
| 30 |
if not api_key:
|
| 31 |
+
raise ValueError("OPENROUTER_API_KEY environment variable not set")
|
| 32 |
|
| 33 |
+
self.api_key = api_key
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
self.model_name = model_name
|
| 35 |
+
self.base_url = "https://openrouter.ai/api/v1"
|
| 36 |
self.default_temperature = config.default_temperature
|
| 37 |
|
| 38 |
+
logging.info(f"OpenRouter agent initialized with model: {model_name}")
|
| 39 |
|
| 40 |
async def generate_response(self, context: AgentContext) -> AgentResponse:
|
| 41 |
"""
|
|
|
|
| 77 |
# Prepare the user message with the query
|
| 78 |
user_message = self._create_user_message(context)
|
| 79 |
|
| 80 |
+
# Prepare the payload for OpenRouter API
|
| 81 |
+
payload = {
|
| 82 |
+
"model": self.model_name,
|
| 83 |
+
"messages": [
|
| 84 |
+
{"role": "system", "content": system_message},
|
| 85 |
+
{"role": "user", "content": user_message}
|
| 86 |
+
],
|
| 87 |
+
"temperature": context.source_policy if hasattr(context, 'temperature') else self.default_temperature,
|
| 88 |
+
"max_tokens": 1000
|
| 89 |
+
}
|
| 90 |
+
|
| 91 |
+
# Make the API call to OpenRouter
|
| 92 |
+
async with httpx.AsyncClient(timeout=30.0) as client:
|
| 93 |
+
headers = {
|
| 94 |
+
"Authorization": f"Bearer {self.api_key}",
|
| 95 |
+
"Content-Type": "application/json"
|
| 96 |
}
|
|
|
|
| 97 |
|
| 98 |
+
response = await client.post(
|
| 99 |
+
f"{self.base_url}/chat/completions",
|
| 100 |
+
json=payload,
|
| 101 |
+
headers=headers
|
| 102 |
+
)
|
| 103 |
+
|
| 104 |
+
if response.status_code != 200:
|
| 105 |
+
logging.error(f"OpenRouter API error: {response.status_code} - {response.text}")
|
| 106 |
+
return AgentResponse(
|
| 107 |
+
raw_response="I could not find this information in the book.",
|
| 108 |
+
used_sources=[],
|
| 109 |
+
confidence_score=0.0,
|
| 110 |
+
is_valid=False,
|
| 111 |
+
validation_details=f"API error: {response.status_code}",
|
| 112 |
+
unsupported_claims=[]
|
| 113 |
+
)
|
| 114 |
+
|
| 115 |
+
response_data = response.json()
|
| 116 |
+
raw_response = response_data["choices"][0]["message"]["content"]
|
| 117 |
|
| 118 |
# If the response indicates no information was found, return the exact message
|
| 119 |
if "I could not find this information in the book" in raw_response:
|
|
|
|
| 151 |
return agent_response
|
| 152 |
|
| 153 |
except Exception as e:
|
| 154 |
+
logging.error(f"Error generating response from OpenRouter agent: {e}", exc_info=True)
|
| 155 |
# Return the specific message when there's an error
|
| 156 |
return AgentResponse(
|
| 157 |
raw_response="I could not find this information in the book.",
|
|
|
|
| 214 |
|
| 215 |
return "\n".join(formatted_chunks)
|
| 216 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 217 |
def _identify_used_sources(self, response: str, chunks: List[SourceChunkSchema]) -> List[str]:
|
| 218 |
"""
|
| 219 |
Identify which sources were likely used in the response.
|
|
|
|
| 359 |
return True
|
| 360 |
|
| 361 |
# In a more sophisticated implementation, you'd validate against the context more rigorously
|
| 362 |
+
return True
|
|
|
|
|
|
|
|
|
|
|
|
requirements.txt
CHANGED
|
@@ -9,4 +9,4 @@ uvicorn>=0.24.0
|
|
| 9 |
openai>=1.0.0
|
| 10 |
pydantic>=2.0.0
|
| 11 |
numpy>=1.21.0
|
| 12 |
-
|
|
|
|
| 9 |
openai>=1.0.0
|
| 10 |
pydantic>=2.0.0
|
| 11 |
numpy>=1.21.0
|
| 12 |
+
httpx>=0.27.0
|
tests/test_integration.py
CHANGED
|
@@ -7,7 +7,7 @@ from fastapi.testclient import TestClient
|
|
| 7 |
from unittest.mock import Mock, patch, AsyncMock
|
| 8 |
from rag_agent_api.main import app, retriever, agent
|
| 9 |
from rag_agent_api.retrieval import QdrantRetriever
|
| 10 |
-
from rag_agent_api.
|
| 11 |
from rag_agent_api.schemas import SourceChunkSchema, AgentResponse, AgentContext
|
| 12 |
|
| 13 |
|
|
@@ -17,13 +17,13 @@ def test_full_query_flow_with_mocked_components():
|
|
| 17 |
'QDRANT_URL': 'http://test-qdrant:6333',
|
| 18 |
'QDRANT_API_KEY': 'test-api-key',
|
| 19 |
'COHERE_API_KEY': 'test-cohere-key',
|
| 20 |
-
'
|
| 21 |
}):
|
| 22 |
with patch('rag_agent_api.main.QdrantRetriever') as mock_retriever_class:
|
| 23 |
-
with patch('rag_agent_api.main.
|
| 24 |
# Create mock instances
|
| 25 |
mock_retriever = Mock(spec=QdrantRetriever)
|
| 26 |
-
mock_agent = Mock(spec=
|
| 27 |
|
| 28 |
# Configure the class mocks to return our instance mocks
|
| 29 |
mock_retriever_class.return_value = mock_retriever
|
|
@@ -84,11 +84,11 @@ async def test_agent_context_creation():
|
|
| 84 |
'QDRANT_URL': 'http://test-qdrant:6333',
|
| 85 |
'QDRANT_API_KEY': 'test-api-key',
|
| 86 |
'COHERE_API_KEY': 'test-cohere-key',
|
| 87 |
-
'
|
| 88 |
}):
|
| 89 |
with patch('rag_agent_api.retrieval.AsyncQdrantClient') as mock_qdrant_client:
|
| 90 |
with patch('rag_agent_api.retrieval.cohere.Client') as mock_cohere_client:
|
| 91 |
-
with patch('rag_agent_api.
|
| 92 |
# Mock the Qdrant client
|
| 93 |
mock_qdrant_instance = Mock()
|
| 94 |
mock_qdrant_client.return_value = mock_qdrant_instance
|
|
@@ -101,7 +101,7 @@ async def test_agent_context_creation():
|
|
| 101 |
|
| 102 |
# Initialize components
|
| 103 |
retriever = QdrantRetriever(collection_name="test_collection")
|
| 104 |
-
agent =
|
| 105 |
|
| 106 |
# Create test chunks
|
| 107 |
test_chunk = SourceChunkSchema(
|
|
@@ -145,7 +145,7 @@ def test_health_endpoint_integration():
|
|
| 145 |
assert "services" in data
|
| 146 |
|
| 147 |
# Check that services status is included
|
| 148 |
-
assert "
|
| 149 |
assert "qdrant" in data["services"]
|
| 150 |
assert "agent" in data["services"]
|
| 151 |
|
|
@@ -157,11 +157,11 @@ async def test_retrieval_and_agent_integration():
|
|
| 157 |
'QDRANT_URL': 'http://test-qdrant:6333',
|
| 158 |
'QDRANT_API_KEY': 'test-api-key',
|
| 159 |
'COHERE_API_KEY': 'test-cohere-key',
|
| 160 |
-
'
|
| 161 |
}):
|
| 162 |
with patch('rag_agent_api.retrieval.AsyncQdrantClient') as mock_qdrant_client:
|
| 163 |
with patch('rag_agent_api.retrieval.cohere.Client') as mock_cohere_client:
|
| 164 |
-
with patch('rag_agent_api.
|
| 165 |
# Mock the Qdrant client
|
| 166 |
mock_qdrant_instance = Mock()
|
| 167 |
mock_qdrant_client.return_value = mock_qdrant_instance
|
|
@@ -172,18 +172,21 @@ async def test_retrieval_and_agent_integration():
|
|
| 172 |
mock_cohere_client.return_value = mock_cohere_instance
|
| 173 |
mock_cohere_instance.embed.return_value = Mock(embeddings=[[0.1, 0.2, 0.3]])
|
| 174 |
|
| 175 |
-
# Mock the
|
| 176 |
-
|
| 177 |
-
|
| 178 |
mock_completion = Mock()
|
| 179 |
-
mock_completion.
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
|
|
|
|
|
|
|
|
|
| 183 |
|
| 184 |
# Initialize components
|
| 185 |
test_retriever = QdrantRetriever(collection_name="test_collection")
|
| 186 |
-
test_agent =
|
| 187 |
|
| 188 |
# Mock the retrieval result
|
| 189 |
mock_chunk = SourceChunkSchema(
|
|
|
|
| 7 |
from unittest.mock import Mock, patch, AsyncMock
|
| 8 |
from rag_agent_api.main import app, retriever, agent
|
| 9 |
from rag_agent_api.retrieval import QdrantRetriever
|
| 10 |
+
from rag_agent_api.openrouter_agent import OpenRouterAgent
|
| 11 |
from rag_agent_api.schemas import SourceChunkSchema, AgentResponse, AgentContext
|
| 12 |
|
| 13 |
|
|
|
|
| 17 |
'QDRANT_URL': 'http://test-qdrant:6333',
|
| 18 |
'QDRANT_API_KEY': 'test-api-key',
|
| 19 |
'COHERE_API_KEY': 'test-cohere-key',
|
| 20 |
+
'OPENROUTER_API_KEY': 'test-openrouter-key'
|
| 21 |
}):
|
| 22 |
with patch('rag_agent_api.main.QdrantRetriever') as mock_retriever_class:
|
| 23 |
+
with patch('rag_agent_api.main.OpenRouterAgent') as mock_agent_class:
|
| 24 |
# Create mock instances
|
| 25 |
mock_retriever = Mock(spec=QdrantRetriever)
|
| 26 |
+
mock_agent = Mock(spec=OpenRouterAgent)
|
| 27 |
|
| 28 |
# Configure the class mocks to return our instance mocks
|
| 29 |
mock_retriever_class.return_value = mock_retriever
|
|
|
|
| 84 |
'QDRANT_URL': 'http://test-qdrant:6333',
|
| 85 |
'QDRANT_API_KEY': 'test-api-key',
|
| 86 |
'COHERE_API_KEY': 'test-cohere-key',
|
| 87 |
+
'OPENROUTER_API_KEY': 'test-openrouter-key'
|
| 88 |
}):
|
| 89 |
with patch('rag_agent_api.retrieval.AsyncQdrantClient') as mock_qdrant_client:
|
| 90 |
with patch('rag_agent_api.retrieval.cohere.Client') as mock_cohere_client:
|
| 91 |
+
with patch('rag_agent_api.openrouter_agent.httpx.AsyncClient'):
|
| 92 |
# Mock the Qdrant client
|
| 93 |
mock_qdrant_instance = Mock()
|
| 94 |
mock_qdrant_client.return_value = mock_qdrant_instance
|
|
|
|
| 101 |
|
| 102 |
# Initialize components
|
| 103 |
retriever = QdrantRetriever(collection_name="test_collection")
|
| 104 |
+
agent = OpenRouterAgent(model_name="gpt-4-test")
|
| 105 |
|
| 106 |
# Create test chunks
|
| 107 |
test_chunk = SourceChunkSchema(
|
|
|
|
| 145 |
assert "services" in data
|
| 146 |
|
| 147 |
# Check that services status is included
|
| 148 |
+
assert "openrouter" in data["services"]
|
| 149 |
assert "qdrant" in data["services"]
|
| 150 |
assert "agent" in data["services"]
|
| 151 |
|
|
|
|
| 157 |
'QDRANT_URL': 'http://test-qdrant:6333',
|
| 158 |
'QDRANT_API_KEY': 'test-api-key',
|
| 159 |
'COHERE_API_KEY': 'test-cohere-key',
|
| 160 |
+
'OPENROUTER_API_KEY': 'test-openrouter-key'
|
| 161 |
}):
|
| 162 |
with patch('rag_agent_api.retrieval.AsyncQdrantClient') as mock_qdrant_client:
|
| 163 |
with patch('rag_agent_api.retrieval.cohere.Client') as mock_cohere_client:
|
| 164 |
+
with patch('rag_agent_api.openrouter_agent.httpx.AsyncClient') as mock_httpx_client:
|
| 165 |
# Mock the Qdrant client
|
| 166 |
mock_qdrant_instance = Mock()
|
| 167 |
mock_qdrant_client.return_value = mock_qdrant_instance
|
|
|
|
| 172 |
mock_cohere_client.return_value = mock_cohere_instance
|
| 173 |
mock_cohere_instance.embed.return_value = Mock(embeddings=[[0.1, 0.2, 0.3]])
|
| 174 |
|
| 175 |
+
# Mock the httpx client for OpenRouter
|
| 176 |
+
mock_httpx_instance = Mock()
|
| 177 |
+
mock_httpx_client.return_value.__aenter__.return_value = mock_httpx_instance
|
| 178 |
mock_completion = Mock()
|
| 179 |
+
mock_completion.json.return_value = {
|
| 180 |
+
"choices": [
|
| 181 |
+
{"message": {"content": "This is a test response"}}
|
| 182 |
+
]
|
| 183 |
+
}
|
| 184 |
+
mock_httpx_instance.post = AsyncMock(return_value=mock_completion)
|
| 185 |
+
mock_httpx_instance.post.return_value.status_code = 200
|
| 186 |
|
| 187 |
# Initialize components
|
| 188 |
test_retriever = QdrantRetriever(collection_name="test_collection")
|
| 189 |
+
test_agent = OpenRouterAgent(model_name="gpt-4-test")
|
| 190 |
|
| 191 |
# Mock the retrieval result
|
| 192 |
mock_chunk = SourceChunkSchema(
|