Spaces:
Sleeping
Sleeping
Commit ·
ce4595c
1
Parent(s): 2ade705
Update backend logic
Browse files- .env +2 -2
- .env.example +2 -2
- README.md +57 -32
- backend.log +230 -81
- book_ingestor.egg-info/PKG-INFO +49 -24
- check_qdrant.py +59 -0
- rag_agent_api/README.md +9 -9
- rag_agent_api/__init__.py +2 -2
- rag_agent_api/__pycache__/__init__.cpython-313.pyc +0 -0
- rag_agent_api/__pycache__/agent.cpython-313.pyc +0 -0
- rag_agent_api/__pycache__/config.cpython-313.pyc +0 -0
- rag_agent_api/__pycache__/main.cpython-313.pyc +0 -0
- rag_agent_api/__pycache__/openrouter_agent.cpython-313.pyc +0 -0
- rag_agent_api/__pycache__/retrieval.cpython-313.pyc +0 -0
- rag_agent_api/agent.py +363 -0
- rag_agent_api/config.py +0 -1
- rag_agent_api/main.py +6 -11
- rag_agent_api/retrieval.py +126 -35
- requirements.txt +9 -11
- test_retrieval.py +60 -0
- tests/test_integration.py +18 -21
.env
CHANGED
|
@@ -4,7 +4,7 @@ QDRANT_API_KEY=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhY2Nlc3MiOiJtIn0.BDBAtGf7
|
|
| 4 |
REACT_APP_RAG_API_URL=http://localhost:8000
|
| 5 |
# RAG Agent and API Layer Environment Variables
|
| 6 |
|
| 7 |
-
#
|
| 8 |
OPENROUTER_API_KEY=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
|
| 9 |
|
| 10 |
# Qdrant Configuration
|
|
@@ -13,7 +13,7 @@ QDRANT_API_KEY=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhY2Nlc3MiOiJtIn0.BDBAtGf7
|
|
| 13 |
QDRANT_COLLECTION_NAME=rag_embedding
|
| 14 |
|
| 15 |
# Cohere Configuration (for query embeddings)
|
| 16 |
-
COHERE_API_KEY=
|
| 17 |
|
| 18 |
# Application Configuration
|
| 19 |
DEFAULT_CONTEXT_WINDOW=5
|
|
|
|
| 4 |
REACT_APP_RAG_API_URL=http://localhost:8000
|
| 5 |
# RAG Agent and API Layer Environment Variables
|
| 6 |
|
| 7 |
+
# OpenRouter API Configuration
|
| 8 |
OPENROUTER_API_KEY=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
|
| 9 |
|
| 10 |
# Qdrant Configuration
|
|
|
|
| 13 |
QDRANT_COLLECTION_NAME=rag_embedding
|
| 14 |
|
| 15 |
# Cohere Configuration (for query embeddings)
|
| 16 |
+
COHERE_API_KEY=RGfPBR6t5Ev2VXgIA00o5XcHiuXYkyCVL8TjkSZs
|
| 17 |
|
| 18 |
# Application Configuration
|
| 19 |
DEFAULT_CONTEXT_WINDOW=5
|
.env.example
CHANGED
|
@@ -1,14 +1,14 @@
|
|
| 1 |
# RAG Agent and API Layer Environment Variables
|
| 2 |
|
| 3 |
# OpenRouter API Configuration
|
| 4 |
-
OPENROUTER_API_KEY=
|
| 5 |
# Qdrant Configuration
|
| 6 |
QDRANT_URL=https://72888a6e-0dfc-4620-bf85-0b9025951e0c.us-east4-0.gcp.cloud.qdrant.io:6333
|
| 7 |
QDRANT_API_KEY=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhY2Nlc3MiOiJtIn0.BDBAtGf7x_XGCu3lO4-kNxgJeVgnSTKUjHeZBT6qJkQ
|
| 8 |
QDRANT_COLLECTION_NAME=rag_embedding
|
| 9 |
REACT_APP_RAG_API_URL=http://localhost:8000
|
| 10 |
# Cohere Configuration (for query embeddings)
|
| 11 |
-
COHERE_API_KEY=
|
| 12 |
|
| 13 |
# Application Configuration
|
| 14 |
DEFAULT_CONTEXT_WINDOW=5
|
|
|
|
| 1 |
# RAG Agent and API Layer Environment Variables
|
| 2 |
|
| 3 |
# OpenRouter API Configuration
|
| 4 |
+
OPENROUTER_API_KEY=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
|
| 5 |
# Qdrant Configuration
|
| 6 |
QDRANT_URL=https://72888a6e-0dfc-4620-bf85-0b9025951e0c.us-east4-0.gcp.cloud.qdrant.io:6333
|
| 7 |
QDRANT_API_KEY=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhY2Nlc3MiOiJtIn0.BDBAtGf7x_XGCu3lO4-kNxgJeVgnSTKUjHeZBT6qJkQ
|
| 8 |
QDRANT_COLLECTION_NAME=rag_embedding
|
| 9 |
REACT_APP_RAG_API_URL=http://localhost:8000
|
| 10 |
# Cohere Configuration (for query embeddings)
|
| 11 |
+
COHERE_API_KEY=RGfPBR6t5Ev2VXgIA00o5XcHiuXYkyCVL8TjkSZs
|
| 12 |
|
| 13 |
# Application Configuration
|
| 14 |
DEFAULT_CONTEXT_WINDOW=5
|
README.md
CHANGED
|
@@ -1,32 +1,57 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
- `
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Book Content Ingestor & RAG Verification
|
| 2 |
+
|
| 3 |
+
A system to extract content from Docusaurus-based book websites, chunk and embed it using Cohere, store embeddings in Qdrant Cloud for RAG applications, and verify the retrieval pipeline functionality.
|
| 4 |
+
|
| 5 |
+
## Setup
|
| 6 |
+
|
| 7 |
+
1. Install dependencies using uv:
|
| 8 |
+
```bash
|
| 9 |
+
cd backend
|
| 10 |
+
uv sync
|
| 11 |
+
```
|
| 12 |
+
|
| 13 |
+
2. Create a `.env` file with your API keys:
|
| 14 |
+
```bash
|
| 15 |
+
cp .env.example .env
|
| 16 |
+
# Edit .env with your actual API keys
|
| 17 |
+
```
|
| 18 |
+
|
| 19 |
+
## Environment Variables
|
| 20 |
+
|
| 21 |
+
- `COHERE_API_KEY`: Your Cohere API key
|
| 22 |
+
- `QDRANT_URL`: Your Qdrant Cloud URL
|
| 23 |
+
- `QDRANT_API_KEY`: Your Qdrant API key
|
| 24 |
+
- `QDRANT_COLLECTION_NAME`: Name of the collection to use (default: "rag_embedding")
|
| 25 |
+
|
| 26 |
+
## Usage
|
| 27 |
+
|
| 28 |
+
### Run the ingestion pipeline:
|
| 29 |
+
```bash
|
| 30 |
+
cd backend
|
| 31 |
+
uv run python main.py
|
| 32 |
+
```
|
| 33 |
+
|
| 34 |
+
This will:
|
| 35 |
+
1. Collect all URLs from the target book (https://sanilahmed.github.io/hackathon-ai-book/)
|
| 36 |
+
2. Extract text content from each URL
|
| 37 |
+
3. Chunk the content into fixed-size segments
|
| 38 |
+
4. Generate embeddings using Cohere
|
| 39 |
+
5. Store embeddings with metadata in Qdrant Cloud collection named "rag_embedding"
|
| 40 |
+
|
| 41 |
+
### Run the verification pipeline:
|
| 42 |
+
```bash
|
| 43 |
+
cd backend
|
| 44 |
+
python -m verify_retrieval.main
|
| 45 |
+
```
|
| 46 |
+
|
| 47 |
+
Or with specific options:
|
| 48 |
+
```bash
|
| 49 |
+
python -m verify_retrieval.main --query "transformer architecture in NLP" --top-k 10
|
| 50 |
+
```
|
| 51 |
+
|
| 52 |
+
The verification system will:
|
| 53 |
+
1. Load vectors and metadata stored in Qdrant from the original ingestion
|
| 54 |
+
2. Implement retrieval functions to query Qdrant using sample keywords or phrases
|
| 55 |
+
3. Validate that retrieved chunks are accurate and relevant
|
| 56 |
+
4. Check that metadata (URL, title, chunk_id) matches source content
|
| 57 |
+
5. Log results and confirm the pipeline executes end-to-end without errors
|
backend.log
CHANGED
|
@@ -1,14 +1,14 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
Traceback (most recent call last):
|
| 13 |
File "/mnt/d/Hackathon/book/backend/rag_agent_api/retrieval.py", line 132, in _embed_query
|
| 14 |
response = await self.cohere_client.embed(
|
|
@@ -32,20 +32,20 @@ Traceback (most recent call last):
|
|
| 32 |
raise TooManyRequestsError(
|
| 33 |
...<8 lines>...
|
| 34 |
)
|
| 35 |
-
cohere.errors.too_many_requests_error.TooManyRequestsError: headers: {'access-control-expose-headers': 'X-Debug-Trace-ID', 'cache-control': 'no-cache, no-store, no-transform, must-revalidate, private, max-age=0', 'content-encoding': 'gzip', 'content-type': 'application/json', 'expires': 'Thu, 01 Jan 1970 00:00:00 GMT', 'pragma': 'no-cache', 'vary': 'Origin,Accept-Encoding', 'x-accel-expires': '0', 'x-debug-trace-id': '
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
Traceback (most recent call last):
|
| 50 |
File "/mnt/d/Hackathon/book/backend/rag_agent_api/retrieval.py", line 132, in _embed_query
|
| 51 |
response = await self.cohere_client.embed(
|
|
@@ -69,21 +69,61 @@ Traceback (most recent call last):
|
|
| 69 |
raise TooManyRequestsError(
|
| 70 |
...<8 lines>...
|
| 71 |
)
|
| 72 |
-
cohere.errors.too_many_requests_error.TooManyRequestsError: headers: {'access-control-expose-headers': 'X-Debug-Trace-ID', 'cache-control': 'no-cache, no-store, no-transform, must-revalidate, private, max-age=0', 'content-encoding': 'gzip', 'content-type': 'application/json', 'expires': 'Thu, 01 Jan 1970 00:00:00 GMT', 'pragma': 'no-cache', 'vary': 'Origin,Accept-Encoding', 'x-accel-expires': '0', 'x-debug-trace-id': '
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 87 |
Traceback (most recent call last):
|
| 88 |
File "/mnt/d/Hackathon/book/backend/rag_agent_api/retrieval.py", line 132, in _embed_query
|
| 89 |
response = await self.cohere_client.embed(
|
|
@@ -103,54 +143,163 @@ Traceback (most recent call last):
|
|
| 103 |
...<7 lines>...
|
| 104 |
)
|
| 105 |
^
|
| 106 |
-
File "/home/sobiafatima/miniconda3/lib/python3.13/site-packages/cohere/raw_base_client.py", line
|
| 107 |
-
|
| 108 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 109 |
)
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
|
|
|
|
|
|
|
|
|
| 125 |
Traceback (most recent call last):
|
| 126 |
-
File "/
|
| 127 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 128 |
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 129 |
...<3 lines>...
|
| 130 |
)
|
| 131 |
^
|
| 132 |
-
File "/home/sobiafatima/miniconda3/lib/python3.13/site-packages/
|
| 133 |
-
await
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 137 |
^
|
| 138 |
-
File "/home/sobiafatima/miniconda3/lib/python3.13/site-packages/
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
...<
|
| 142 |
)
|
| 143 |
^
|
| 144 |
-
File "/home/sobiafatima/miniconda3/lib/python3.13/site-packages/
|
| 145 |
-
|
| 146 |
-
|
|
|
|
| 147 |
)
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2026-01-02 21:51:07,979 - root - INFO - OpenRouter agent initialized with model: arcee-ai/trinity-mini:free
|
| 2 |
+
2026-01-02 21:51:07,980 - root - INFO - OpenRouter agent initialized successfully
|
| 3 |
+
2026-01-02 21:51:09,509 - httpx - INFO - HTTP Request: GET https://72888a6e-0dfc-4620-bf85-0b9025951e0c.us-east4-0.gcp.cloud.qdrant.io:6333 "HTTP/1.1 200 OK"
|
| 4 |
+
2026-01-02 21:51:09,616 - root - INFO - Initialized Qdrant retriever for collection: rag_embedding
|
| 5 |
+
2026-01-02 21:51:09,616 - root - INFO - Qdrant retriever initialized successfully
|
| 6 |
+
2026-01-02 21:51:09,616 - root - INFO - Application startup completed
|
| 7 |
+
2026-01-02 21:56:18,858 - root - INFO - Processing query: what about this book?...
|
| 8 |
+
2026-01-02 21:56:18,858 - root - INFO - Step 1: Retrieving relevant content from Qdrant...
|
| 9 |
+
2026-01-02 21:56:18,858 - root - INFO - Retrieving context for query: 'what about this book?' from collection: rag_embedding
|
| 10 |
+
2026-01-02 21:56:20,085 - httpx - INFO - HTTP Request: POST https://api.cohere.com/v1/embed "HTTP/1.1 429 Too Many Requests"
|
| 11 |
+
2026-01-02 21:56:20,158 - root - ERROR - Error embedding query with Cohere: headers: {'access-control-expose-headers': 'X-Debug-Trace-ID', 'cache-control': 'no-cache, no-store, no-transform, must-revalidate, private, max-age=0', 'content-encoding': 'gzip', 'content-type': 'application/json', 'expires': 'Thu, 01 Jan 1970 00:00:00 GMT', 'pragma': 'no-cache', 'vary': 'Origin,Accept-Encoding', 'x-accel-expires': '0', 'x-debug-trace-id': 'a074d2b0b8f1166420f46cc0e91c3ef8', 'date': 'Fri, 02 Jan 2026 16:56:15 GMT', 'x-envoy-upstream-service-time': '16', 'server': 'envoy', 'via': '1.1 google', 'alt-svc': 'h3=":443"; ma=2592000,h3-29=":443"; ma=2592000', 'transfer-encoding': 'chunked'}, status_code: 429, body: {'id': '0d36b9be-f4cc-4559-b824-e673736abec0', 'message': 'Please wait and try again later'}
|
| 12 |
Traceback (most recent call last):
|
| 13 |
File "/mnt/d/Hackathon/book/backend/rag_agent_api/retrieval.py", line 132, in _embed_query
|
| 14 |
response = await self.cohere_client.embed(
|
|
|
|
| 32 |
raise TooManyRequestsError(
|
| 33 |
...<8 lines>...
|
| 34 |
)
|
| 35 |
+
cohere.errors.too_many_requests_error.TooManyRequestsError: headers: {'access-control-expose-headers': 'X-Debug-Trace-ID', 'cache-control': 'no-cache, no-store, no-transform, must-revalidate, private, max-age=0', 'content-encoding': 'gzip', 'content-type': 'application/json', 'expires': 'Thu, 01 Jan 1970 00:00:00 GMT', 'pragma': 'no-cache', 'vary': 'Origin,Accept-Encoding', 'x-accel-expires': '0', 'x-debug-trace-id': 'a074d2b0b8f1166420f46cc0e91c3ef8', 'date': 'Fri, 02 Jan 2026 16:56:15 GMT', 'x-envoy-upstream-service-time': '16', 'server': 'envoy', 'via': '1.1 google', 'alt-svc': 'h3=":443"; ma=2592000,h3-29=":443"; ma=2592000', 'transfer-encoding': 'chunked'}, status_code: 429, body: {'id': '0d36b9be-f4cc-4559-b824-e673736abec0', 'message': 'Please wait and try again later'}
|
| 36 |
+
2026-01-02 21:56:21,542 - root - WARNING - Using zero vector as final fallback for query embedding
|
| 37 |
+
2026-01-02 21:56:23,990 - httpx - INFO - HTTP Request: POST https://72888a6e-0dfc-4620-bf85-0b9025951e0c.us-east4-0.gcp.cloud.qdrant.io:6333/collections/rag_embedding/points/query "HTTP/1.1 200 OK"
|
| 38 |
+
2026-01-02 21:56:24,063 - root - INFO - Retrieved 5 valid chunks from Qdrant
|
| 39 |
+
2026-01-02 21:56:24,063 - root - INFO - Retrieved 5 chunks from Qdrant
|
| 40 |
+
2026-01-02 21:56:24,063 - root - INFO - Step 2: Generating response with OpenAI agent...
|
| 41 |
+
2026-01-02 21:56:27,063 - httpx - INFO - HTTP Request: POST https://openrouter.ai/api/v1/chat/completions "HTTP/1.1 200 OK"
|
| 42 |
+
2026-01-02 21:56:28,191 - root - INFO - Step 3: Formatting response...
|
| 43 |
+
2026-01-02 21:56:28,191 - root - INFO - Query processed successfully, response ID: resp_12b8d406
|
| 44 |
+
2026-01-02 22:18:31,661 - root - INFO - Processing query: what about this book?...
|
| 45 |
+
2026-01-02 22:18:31,672 - root - INFO - Step 1: Retrieving relevant content from Qdrant...
|
| 46 |
+
2026-01-02 22:18:31,679 - root - INFO - Retrieving context for query: 'what about this book?' from collection: rag_embedding
|
| 47 |
+
2026-01-02 22:18:32,663 - httpx - INFO - HTTP Request: POST https://api.cohere.com/v1/embed "HTTP/1.1 429 Too Many Requests"
|
| 48 |
+
2026-01-02 22:18:32,681 - root - ERROR - Error embedding query with Cohere: headers: {'access-control-expose-headers': 'X-Debug-Trace-ID', 'cache-control': 'no-cache, no-store, no-transform, must-revalidate, private, max-age=0', 'content-encoding': 'gzip', 'content-type': 'application/json', 'expires': 'Thu, 01 Jan 1970 00:00:00 GMT', 'pragma': 'no-cache', 'vary': 'Origin,Accept-Encoding', 'x-accel-expires': '0', 'x-debug-trace-id': '16258d9e56f535c3a9cda7da3a75bc2d', 'date': 'Fri, 02 Jan 2026 17:18:28 GMT', 'x-envoy-upstream-service-time': '13', 'server': 'envoy', 'via': '1.1 google', 'alt-svc': 'h3=":443"; ma=2592000,h3-29=":443"; ma=2592000', 'transfer-encoding': 'chunked'}, status_code: 429, body: {'id': '69b64fea-d70d-43f9-a1d9-9fc56b940914', 'message': 'Please wait and try again later'}
|
| 49 |
Traceback (most recent call last):
|
| 50 |
File "/mnt/d/Hackathon/book/backend/rag_agent_api/retrieval.py", line 132, in _embed_query
|
| 51 |
response = await self.cohere_client.embed(
|
|
|
|
| 69 |
raise TooManyRequestsError(
|
| 70 |
...<8 lines>...
|
| 71 |
)
|
| 72 |
+
cohere.errors.too_many_requests_error.TooManyRequestsError: headers: {'access-control-expose-headers': 'X-Debug-Trace-ID', 'cache-control': 'no-cache, no-store, no-transform, must-revalidate, private, max-age=0', 'content-encoding': 'gzip', 'content-type': 'application/json', 'expires': 'Thu, 01 Jan 1970 00:00:00 GMT', 'pragma': 'no-cache', 'vary': 'Origin,Accept-Encoding', 'x-accel-expires': '0', 'x-debug-trace-id': '16258d9e56f535c3a9cda7da3a75bc2d', 'date': 'Fri, 02 Jan 2026 17:18:28 GMT', 'x-envoy-upstream-service-time': '13', 'server': 'envoy', 'via': '1.1 google', 'alt-svc': 'h3=":443"; ma=2592000,h3-29=":443"; ma=2592000', 'transfer-encoding': 'chunked'}, status_code: 429, body: {'id': '69b64fea-d70d-43f9-a1d9-9fc56b940914', 'message': 'Please wait and try again later'}
|
| 73 |
+
2026-01-02 22:18:32,704 - root - WARNING - Using zero vector as final fallback for query embedding
|
| 74 |
+
2026-01-02 22:18:34,063 - httpx - INFO - HTTP Request: POST https://72888a6e-0dfc-4620-bf85-0b9025951e0c.us-east4-0.gcp.cloud.qdrant.io:6333/collections/rag_embedding/points/query "HTTP/1.1 200 OK"
|
| 75 |
+
2026-01-02 22:18:34,095 - root - INFO - Retrieved 5 valid chunks from Qdrant
|
| 76 |
+
2026-01-02 22:18:34,097 - root - INFO - Retrieved 5 chunks from Qdrant
|
| 77 |
+
2026-01-02 22:18:34,098 - root - INFO - Step 2: Generating response with OpenAI agent...
|
| 78 |
+
2026-01-02 22:18:38,176 - httpx - INFO - HTTP Request: POST https://openrouter.ai/api/v1/chat/completions "HTTP/1.1 200 OK"
|
| 79 |
+
2026-01-02 22:18:40,245 - root - INFO - Agent response generated successfully. Confidence: 0.30
|
| 80 |
+
2026-01-02 22:18:40,245 - root - INFO - Step 3: Formatting response...
|
| 81 |
+
2026-01-02 22:18:40,246 - root - INFO - Query processed successfully, response ID: resp_c32d1dbe
|
| 82 |
+
2026-01-02 22:20:37,532 - root - INFO - Processing query: what about this book?...
|
| 83 |
+
2026-01-02 22:20:37,533 - root - INFO - Step 1: Retrieving relevant content from Qdrant...
|
| 84 |
+
2026-01-02 22:20:37,533 - root - INFO - Retrieving context for query: 'what about this book?' from collection: rag_embedding
|
| 85 |
+
2026-01-02 22:20:47,620 - root - ERROR - Error embedding query with Cohere: [Errno -3] Temporary failure in name resolution
|
| 86 |
+
Traceback (most recent call last):
|
| 87 |
+
File "/home/sobiafatima/miniconda3/lib/python3.13/site-packages/httpx/_transports/default.py", line 101, in map_httpcore_exceptions
|
| 88 |
+
yield
|
| 89 |
+
File "/home/sobiafatima/miniconda3/lib/python3.13/site-packages/httpx/_transports/default.py", line 394, in handle_async_request
|
| 90 |
+
resp = await self._pool.handle_async_request(req)
|
| 91 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 92 |
+
File "/home/sobiafatima/miniconda3/lib/python3.13/site-packages/httpcore/_async/connection_pool.py", line 256, in handle_async_request
|
| 93 |
+
raise exc from None
|
| 94 |
+
File "/home/sobiafatima/miniconda3/lib/python3.13/site-packages/httpcore/_async/connection_pool.py", line 236, in handle_async_request
|
| 95 |
+
response = await connection.handle_async_request(
|
| 96 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 97 |
+
pool_request.request
|
| 98 |
+
^^^^^^^^^^^^^^^^^^^^
|
| 99 |
+
)
|
| 100 |
+
^
|
| 101 |
+
File "/home/sobiafatima/miniconda3/lib/python3.13/site-packages/httpcore/_async/connection.py", line 101, in handle_async_request
|
| 102 |
+
raise exc
|
| 103 |
+
File "/home/sobiafatima/miniconda3/lib/python3.13/site-packages/httpcore/_async/connection.py", line 78, in handle_async_request
|
| 104 |
+
stream = await self._connect(request)
|
| 105 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 106 |
+
File "/home/sobiafatima/miniconda3/lib/python3.13/site-packages/httpcore/_async/connection.py", line 124, in _connect
|
| 107 |
+
stream = await self._network_backend.connect_tcp(**kwargs)
|
| 108 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 109 |
+
File "/home/sobiafatima/miniconda3/lib/python3.13/site-packages/httpcore/_backends/auto.py", line 31, in connect_tcp
|
| 110 |
+
return await self._backend.connect_tcp(
|
| 111 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 112 |
+
...<5 lines>...
|
| 113 |
+
)
|
| 114 |
+
^
|
| 115 |
+
File "/home/sobiafatima/miniconda3/lib/python3.13/site-packages/httpcore/_backends/anyio.py", line 113, in connect_tcp
|
| 116 |
+
with map_exceptions(exc_map):
|
| 117 |
+
~~~~~~~~~~~~~~^^^^^^^^^
|
| 118 |
+
File "/home/sobiafatima/miniconda3/lib/python3.13/contextlib.py", line 162, in __exit__
|
| 119 |
+
self.gen.throw(value)
|
| 120 |
+
~~~~~~~~~~~~~~^^^^^^^
|
| 121 |
+
File "/home/sobiafatima/miniconda3/lib/python3.13/site-packages/httpcore/_exceptions.py", line 14, in map_exceptions
|
| 122 |
+
raise to_exc(exc) from exc
|
| 123 |
+
httpcore.ConnectError: [Errno -3] Temporary failure in name resolution
|
| 124 |
+
|
| 125 |
+
The above exception was the direct cause of the following exception:
|
| 126 |
+
|
| 127 |
Traceback (most recent call last):
|
| 128 |
File "/mnt/d/Hackathon/book/backend/rag_agent_api/retrieval.py", line 132, in _embed_query
|
| 129 |
response = await self.cohere_client.embed(
|
|
|
|
| 143 |
...<7 lines>...
|
| 144 |
)
|
| 145 |
^
|
| 146 |
+
File "/home/sobiafatima/miniconda3/lib/python3.13/site-packages/cohere/raw_base_client.py", line 4554, in embed
|
| 147 |
+
_response = await self._client_wrapper.httpx_client.request(
|
| 148 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 149 |
+
...<15 lines>...
|
| 150 |
+
)
|
| 151 |
+
^
|
| 152 |
+
File "/home/sobiafatima/miniconda3/lib/python3.13/site-packages/cohere/core/http_client.py", line 412, in request
|
| 153 |
+
response = await self.httpx_client.request(
|
| 154 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 155 |
+
...<33 lines>...
|
| 156 |
+
)
|
| 157 |
+
^
|
| 158 |
+
File "/home/sobiafatima/miniconda3/lib/python3.13/site-packages/httpx/_client.py", line 1540, in request
|
| 159 |
+
return await self.send(request, auth=auth, follow_redirects=follow_redirects)
|
| 160 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 161 |
+
File "/home/sobiafatima/miniconda3/lib/python3.13/site-packages/httpx/_client.py", line 1629, in send
|
| 162 |
+
response = await self._send_handling_auth(
|
| 163 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 164 |
+
...<4 lines>...
|
| 165 |
+
)
|
| 166 |
+
^
|
| 167 |
+
File "/home/sobiafatima/miniconda3/lib/python3.13/site-packages/httpx/_client.py", line 1657, in _send_handling_auth
|
| 168 |
+
response = await self._send_handling_redirects(
|
| 169 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 170 |
+
...<3 lines>...
|
| 171 |
)
|
| 172 |
+
^
|
| 173 |
+
File "/home/sobiafatima/miniconda3/lib/python3.13/site-packages/httpx/_client.py", line 1694, in _send_handling_redirects
|
| 174 |
+
response = await self._send_single_request(request)
|
| 175 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 176 |
+
File "/home/sobiafatima/miniconda3/lib/python3.13/site-packages/httpx/_client.py", line 1730, in _send_single_request
|
| 177 |
+
response = await transport.handle_async_request(request)
|
| 178 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 179 |
+
File "/home/sobiafatima/miniconda3/lib/python3.13/site-packages/httpx/_transports/default.py", line 393, in handle_async_request
|
| 180 |
+
with map_httpcore_exceptions():
|
| 181 |
+
~~~~~~~~~~~~~~~~~~~~~~~^^
|
| 182 |
+
File "/home/sobiafatima/miniconda3/lib/python3.13/contextlib.py", line 162, in __exit__
|
| 183 |
+
self.gen.throw(value)
|
| 184 |
+
~~~~~~~~~~~~~~^^^^^^^
|
| 185 |
+
File "/home/sobiafatima/miniconda3/lib/python3.13/site-packages/httpx/_transports/default.py", line 118, in map_httpcore_exceptions
|
| 186 |
+
raise mapped_exc(message) from exc
|
| 187 |
+
httpx.ConnectError: [Errno -3] Temporary failure in name resolution
|
| 188 |
+
2026-01-02 22:20:48,168 - root - WARNING - Using zero vector as final fallback for query embedding
|
| 189 |
+
2026-01-02 22:20:58,240 - root - ERROR - Error retrieving context from Qdrant: [Errno -3] Temporary failure in name resolution
|
| 190 |
Traceback (most recent call last):
|
| 191 |
+
File "/home/sobiafatima/miniconda3/lib/python3.13/site-packages/httpx/_transports/default.py", line 101, in map_httpcore_exceptions
|
| 192 |
+
yield
|
| 193 |
+
File "/home/sobiafatima/miniconda3/lib/python3.13/site-packages/httpx/_transports/default.py", line 394, in handle_async_request
|
| 194 |
+
resp = await self._pool.handle_async_request(req)
|
| 195 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 196 |
+
File "/home/sobiafatima/miniconda3/lib/python3.13/site-packages/httpcore/_async/connection_pool.py", line 256, in handle_async_request
|
| 197 |
+
raise exc from None
|
| 198 |
+
File "/home/sobiafatima/miniconda3/lib/python3.13/site-packages/httpcore/_async/connection_pool.py", line 236, in handle_async_request
|
| 199 |
+
response = await connection.handle_async_request(
|
| 200 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 201 |
+
pool_request.request
|
| 202 |
+
^^^^^^^^^^^^^^^^^^^^
|
| 203 |
+
)
|
| 204 |
+
^
|
| 205 |
+
File "/home/sobiafatima/miniconda3/lib/python3.13/site-packages/httpcore/_async/connection.py", line 101, in handle_async_request
|
| 206 |
+
raise exc
|
| 207 |
+
File "/home/sobiafatima/miniconda3/lib/python3.13/site-packages/httpcore/_async/connection.py", line 78, in handle_async_request
|
| 208 |
+
stream = await self._connect(request)
|
| 209 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 210 |
+
File "/home/sobiafatima/miniconda3/lib/python3.13/site-packages/httpcore/_async/connection.py", line 124, in _connect
|
| 211 |
+
stream = await self._network_backend.connect_tcp(**kwargs)
|
| 212 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 213 |
+
File "/home/sobiafatima/miniconda3/lib/python3.13/site-packages/httpcore/_backends/auto.py", line 31, in connect_tcp
|
| 214 |
+
return await self._backend.connect_tcp(
|
| 215 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 216 |
+
...<5 lines>...
|
| 217 |
+
)
|
| 218 |
+
^
|
| 219 |
+
File "/home/sobiafatima/miniconda3/lib/python3.13/site-packages/httpcore/_backends/anyio.py", line 113, in connect_tcp
|
| 220 |
+
with map_exceptions(exc_map):
|
| 221 |
+
~~~~~~~~~~~~~~^^^^^^^^^
|
| 222 |
+
File "/home/sobiafatima/miniconda3/lib/python3.13/contextlib.py", line 162, in __exit__
|
| 223 |
+
self.gen.throw(value)
|
| 224 |
+
~~~~~~~~~~~~~~^^^^^^^
|
| 225 |
+
File "/home/sobiafatima/miniconda3/lib/python3.13/site-packages/httpcore/_exceptions.py", line 14, in map_exceptions
|
| 226 |
+
raise to_exc(exc) from exc
|
| 227 |
+
httpcore.ConnectError: [Errno -3] Temporary failure in name resolution
|
| 228 |
+
|
| 229 |
+
The above exception was the direct cause of the following exception:
|
| 230 |
+
|
| 231 |
+
Traceback (most recent call last):
|
| 232 |
+
File "/home/sobiafatima/miniconda3/lib/python3.13/site-packages/qdrant_client/http/api_client.py", line 223, in send_inner
|
| 233 |
+
response = await self._async_client.send(request)
|
| 234 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 235 |
+
File "/home/sobiafatima/miniconda3/lib/python3.13/site-packages/httpx/_client.py", line 1629, in send
|
| 236 |
+
response = await self._send_handling_auth(
|
| 237 |
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 238 |
+
...<4 lines>...
|
| 239 |
+
)
|
| 240 |
+
^
|
| 241 |
+
File "/home/sobiafatima/miniconda3/lib/python3.13/site-packages/httpx/_client.py", line 1657, in _send_handling_auth
|
| 242 |
+
response = await self._send_handling_redirects(
|
| 243 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 244 |
...<3 lines>...
|
| 245 |
)
|
| 246 |
^
|
| 247 |
+
File "/home/sobiafatima/miniconda3/lib/python3.13/site-packages/httpx/_client.py", line 1694, in _send_handling_redirects
|
| 248 |
+
response = await self._send_single_request(request)
|
| 249 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 250 |
+
File "/home/sobiafatima/miniconda3/lib/python3.13/site-packages/httpx/_client.py", line 1730, in _send_single_request
|
| 251 |
+
response = await transport.handle_async_request(request)
|
| 252 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 253 |
+
File "/home/sobiafatima/miniconda3/lib/python3.13/site-packages/httpx/_transports/default.py", line 393, in handle_async_request
|
| 254 |
+
with map_httpcore_exceptions():
|
| 255 |
+
~~~~~~~~~~~~~~~~~~~~~~~^^
|
| 256 |
+
File "/home/sobiafatima/miniconda3/lib/python3.13/contextlib.py", line 162, in __exit__
|
| 257 |
+
self.gen.throw(value)
|
| 258 |
+
~~~~~~~~~~~~~~^^^^^^^
|
| 259 |
+
File "/home/sobiafatima/miniconda3/lib/python3.13/site-packages/httpx/_transports/default.py", line 118, in map_httpcore_exceptions
|
| 260 |
+
raise mapped_exc(message) from exc
|
| 261 |
+
httpx.ConnectError: [Errno -3] Temporary failure in name resolution
|
| 262 |
+
|
| 263 |
+
During handling of the above exception, another exception occurred:
|
| 264 |
+
|
| 265 |
+
Traceback (most recent call last):
|
| 266 |
+
File "/mnt/d/Hackathon/book/backend/rag_agent_api/retrieval.py", line 80, in retrieve_context
|
| 267 |
+
search_results = await self.client.query_points(
|
| 268 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 269 |
+
...<5 lines>...
|
| 270 |
+
)
|
| 271 |
^
|
| 272 |
+
File "/home/sobiafatima/miniconda3/lib/python3.13/site-packages/qdrant_client/async_qdrant_client.py", line 400, in query_points
|
| 273 |
+
return await self._client.query_points(
|
| 274 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 275 |
+
...<16 lines>...
|
| 276 |
)
|
| 277 |
^
|
| 278 |
+
File "/home/sobiafatima/miniconda3/lib/python3.13/site-packages/qdrant_client/async_qdrant_remote.py", line 461, in query_points
|
| 279 |
+
query_result = await self.http.search_api.query_points(
|
| 280 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 281 |
+
...<4 lines>...
|
| 282 |
)
|
| 283 |
+
^
|
| 284 |
+
File "/home/sobiafatima/miniconda3/lib/python3.13/site-packages/qdrant_client/http/api/search_api.py", line 560, in query_points
|
| 285 |
+
return await self._build_for_query_points(
|
| 286 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 287 |
+
...<4 lines>...
|
| 288 |
+
)
|
| 289 |
+
^
|
| 290 |
+
File "/home/sobiafatima/miniconda3/lib/python3.13/site-packages/qdrant_client/http/api_client.py", line 184, in request
|
| 291 |
+
return await self.send(request, type_)
|
| 292 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 293 |
+
File "/home/sobiafatima/miniconda3/lib/python3.13/site-packages/qdrant_client/http/api_client.py", line 201, in send
|
| 294 |
+
response = await self.middleware(request, self.send_inner)
|
| 295 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 296 |
+
File "/home/sobiafatima/miniconda3/lib/python3.13/site-packages/qdrant_client/http/api_client.py", line 245, in __call__
|
| 297 |
+
return await call_next(request)
|
| 298 |
+
^^^^^^^^^^^^^^^^^^^^^^^^
|
| 299 |
+
File "/home/sobiafatima/miniconda3/lib/python3.13/site-packages/qdrant_client/http/api_client.py", line 225, in send_inner
|
| 300 |
+
raise ResponseHandlingException(e)
|
| 301 |
+
qdrant_client.http.exceptions.ResponseHandlingException: [Errno -3] Temporary failure in name resolution
|
| 302 |
+
2026-01-02 22:20:58,441 - root - INFO - Retrieved 0 chunks from Qdrant
|
| 303 |
+
2026-01-02 22:20:58,441 - root - INFO - Step 2: Generating response with OpenAI agent...
|
| 304 |
+
2026-01-02 22:20:58,441 - root - INFO - Step 3: Formatting response...
|
| 305 |
+
2026-01-02 22:20:58,441 - root - INFO - Query processed successfully, response ID: resp_ab31a354
|
book_ingestor.egg-info/PKG-INFO
CHANGED
|
@@ -14,35 +14,60 @@ Requires-Dist: uvicorn>=0.24.0
|
|
| 14 |
Requires-Dist: openai>=1.0.0
|
| 15 |
Requires-Dist: pydantic>=2.0.0
|
| 16 |
|
| 17 |
-
|
| 18 |
-
title: Backend Deploy
|
| 19 |
-
emoji: 🚀
|
| 20 |
-
colorFrom: blue
|
| 21 |
-
colorTo: purple
|
| 22 |
-
sdk: docker
|
| 23 |
-
pinned: false
|
| 24 |
-
---
|
| 25 |
|
| 26 |
-
|
| 27 |
|
| 28 |
-
|
| 29 |
|
| 30 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
- `/redoc` - API documentation (Redoc)
|
| 38 |
|
| 39 |
-
##
|
| 40 |
|
| 41 |
-
|
| 42 |
-
- `
|
| 43 |
-
- `
|
| 44 |
-
- `
|
| 45 |
|
| 46 |
-
##
|
| 47 |
|
| 48 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
Requires-Dist: openai>=1.0.0
|
| 15 |
Requires-Dist: pydantic>=2.0.0
|
| 16 |
|
| 17 |
+
# Book Content Ingestor & RAG Verification
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
|
| 19 |
+
A system to extract content from Docusaurus-based book websites, chunk and embed it using Cohere, store embeddings in Qdrant Cloud for RAG applications, and verify the retrieval pipeline functionality.
|
| 20 |
|
| 21 |
+
## Setup
|
| 22 |
|
| 23 |
+
1. Install dependencies using uv:
|
| 24 |
+
```bash
|
| 25 |
+
cd backend
|
| 26 |
+
uv sync
|
| 27 |
+
```
|
| 28 |
|
| 29 |
+
2. Create a `.env` file with your API keys:
|
| 30 |
+
```bash
|
| 31 |
+
cp .env.example .env
|
| 32 |
+
# Edit .env with your actual API keys
|
| 33 |
+
```
|
|
|
|
| 34 |
|
| 35 |
+
## Environment Variables
|
| 36 |
|
| 37 |
+
- `COHERE_API_KEY`: Your Cohere API key
|
| 38 |
+
- `QDRANT_URL`: Your Qdrant Cloud URL
|
| 39 |
+
- `QDRANT_API_KEY`: Your Qdrant API key
|
| 40 |
+
- `QDRANT_COLLECTION_NAME`: Name of the collection to use (default: "rag_embedding")
|
| 41 |
|
| 42 |
+
## Usage
|
| 43 |
|
| 44 |
+
### Run the ingestion pipeline:
|
| 45 |
+
```bash
|
| 46 |
+
cd backend
|
| 47 |
+
uv run python main.py
|
| 48 |
+
```
|
| 49 |
+
|
| 50 |
+
This will:
|
| 51 |
+
1. Collect all URLs from the target book (https://sanilahmed.github.io/hackathon-ai-book/)
|
| 52 |
+
2. Extract text content from each URL
|
| 53 |
+
3. Chunk the content into fixed-size segments
|
| 54 |
+
4. Generate embeddings using Cohere
|
| 55 |
+
5. Store embeddings with metadata in Qdrant Cloud collection named "rag_embedding"
|
| 56 |
+
|
| 57 |
+
### Run the verification pipeline:
|
| 58 |
+
```bash
|
| 59 |
+
cd backend
|
| 60 |
+
python -m verify_retrieval.main
|
| 61 |
+
```
|
| 62 |
+
|
| 63 |
+
Or with specific options:
|
| 64 |
+
```bash
|
| 65 |
+
python -m verify_retrieval.main --query "transformer architecture in NLP" --top-k 10
|
| 66 |
+
```
|
| 67 |
+
|
| 68 |
+
The verification system will:
|
| 69 |
+
1. Load vectors and metadata stored in Qdrant from the original ingestion
|
| 70 |
+
2. Implement retrieval functions to query Qdrant using sample keywords or phrases
|
| 71 |
+
3. Validate that retrieved chunks are accurate and relevant
|
| 72 |
+
4. Check that metadata (URL, title, chunk_id) matches source content
|
| 73 |
+
5. Log results and confirm the pipeline executes end-to-end without errors
|
check_qdrant.py
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Script to check if Qdrant collection exists and has data.
|
| 4 |
+
"""
|
| 5 |
+
import os
|
| 6 |
+
from qdrant_client import QdrantClient
|
| 7 |
+
from dotenv import load_dotenv
|
| 8 |
+
|
| 9 |
+
# Load environment variables
|
| 10 |
+
load_dotenv()
|
| 11 |
+
|
| 12 |
+
# Get environment variables
|
| 13 |
+
qdrant_url = os.getenv('QDRANT_URL')
|
| 14 |
+
qdrant_api_key = os.getenv('QDRANT_API_KEY')
|
| 15 |
+
|
| 16 |
+
if not qdrant_url or not qdrant_api_key:
|
| 17 |
+
print("Error: QDRANT_URL or QDRANT_API_KEY not found in environment variables")
|
| 18 |
+
exit(1)
|
| 19 |
+
|
| 20 |
+
# Initialize Qdrant client
|
| 21 |
+
client = QdrantClient(
|
| 22 |
+
url=qdrant_url,
|
| 23 |
+
api_key=qdrant_api_key,
|
| 24 |
+
timeout=30
|
| 25 |
+
)
|
| 26 |
+
|
| 27 |
+
try:
|
| 28 |
+
# List all collections
|
| 29 |
+
collections = client.get_collections()
|
| 30 |
+
print("Available collections:")
|
| 31 |
+
for collection in collections.collections:
|
| 32 |
+
# For newer Qdrant versions, get the collection info to get point count
|
| 33 |
+
collection_info = client.get_collection(collection.name)
|
| 34 |
+
print(f" - {collection.name} (points: {collection_info.points_count})")
|
| 35 |
+
|
| 36 |
+
# Check specifically for the rag_embedding collection
|
| 37 |
+
try:
|
| 38 |
+
collection_info = client.get_collection("rag_embedding")
|
| 39 |
+
print(f"\nCollection 'rag_embedding' exists with {collection_info.points_count} points")
|
| 40 |
+
|
| 41 |
+
if collection_info.points_count > 0:
|
| 42 |
+
# Get a sample point to verify data exists
|
| 43 |
+
points = client.scroll(
|
| 44 |
+
collection_name="rag_embedding",
|
| 45 |
+
limit=1
|
| 46 |
+
)
|
| 47 |
+
if len(points[0]) > 0:
|
| 48 |
+
sample_point = points[0][0]
|
| 49 |
+
print(f"Sample point ID: {sample_point.id}")
|
| 50 |
+
print(f"Sample point payload keys: {list(sample_point.payload.keys())}")
|
| 51 |
+
print(f"Sample text preview: {sample_point.payload.get('text', '')[:100]}...")
|
| 52 |
+
else:
|
| 53 |
+
print("Collection 'rag_embedding' exists but is empty")
|
| 54 |
+
|
| 55 |
+
except Exception as e:
|
| 56 |
+
print(f"\nCollection 'rag_embedding' does not exist: {e}")
|
| 57 |
+
|
| 58 |
+
except Exception as e:
|
| 59 |
+
print(f"Error connecting to Qdrant: {e}")
|
rag_agent_api/README.md
CHANGED
|
@@ -1,17 +1,17 @@
|
|
| 1 |
# RAG Agent and API Layer
|
| 2 |
|
| 3 |
-
A FastAPI-based question-answering system that uses
|
| 4 |
|
| 5 |
## Overview
|
| 6 |
|
| 7 |
-
The RAG Agent and API Layer provides a question-answering API that retrieves relevant content from Qdrant and uses an
|
| 8 |
|
| 9 |
## Architecture
|
| 10 |
|
| 11 |
The system consists of several key components:
|
| 12 |
|
| 13 |
- **FastAPI Application**: Main entry point for the question-answering API
|
| 14 |
-
- **
|
| 15 |
- **Qdrant Retriever**: Retrieves relevant content chunks from Qdrant database
|
| 16 |
- **Configuration Manager**: Handles environment variables and settings
|
| 17 |
- **Data Models**: Pydantic models for API requests/responses
|
|
@@ -22,7 +22,7 @@ The system consists of several key components:
|
|
| 22 |
### Prerequisites
|
| 23 |
|
| 24 |
- Python 3.9+
|
| 25 |
-
-
|
| 26 |
- Qdrant Cloud instance with book content embeddings
|
| 27 |
- Cohere API key (for query embeddings)
|
| 28 |
|
|
@@ -42,7 +42,7 @@ The system consists of several key components:
|
|
| 42 |
|
| 43 |
3. Edit `.env` with your API keys and configuration:
|
| 44 |
```env
|
| 45 |
-
|
| 46 |
QDRANT_URL=your-qdrant-instance-url
|
| 47 |
QDRANT_API_KEY=your-qdrant-api-key
|
| 48 |
QDRANT_COLLECTION_NAME=rag_embedding
|
|
@@ -103,7 +103,7 @@ Root endpoint with API information.
|
|
| 103 |
|
| 104 |
### Environment Variables
|
| 105 |
|
| 106 |
-
- `
|
| 107 |
- `QDRANT_URL`: URL of your Qdrant instance
|
| 108 |
- `QDRANT_API_KEY`: Your Qdrant API key
|
| 109 |
- `QDRANT_COLLECTION_NAME`: Name of the collection with book embeddings (default: `rag_embedding`)
|
|
@@ -123,8 +123,8 @@ Pydantic models for API request/response schemas.
|
|
| 123 |
### Schemas (`schemas.py`)
|
| 124 |
Additional schemas for internal data structures.
|
| 125 |
|
| 126 |
-
### Agent (`
|
| 127 |
-
|
| 128 |
|
| 129 |
### Retrieval (`retrieval.py`)
|
| 130 |
Qdrant integration for content retrieval with semantic search.
|
|
@@ -160,7 +160,7 @@ pytest
|
|
| 160 |
|
| 161 |
# Run specific test files
|
| 162 |
pytest tests/test_api.py
|
| 163 |
-
pytest tests/
|
| 164 |
pytest tests/test_retrieval.py
|
| 165 |
```
|
| 166 |
|
|
|
|
| 1 |
# RAG Agent and API Layer
|
| 2 |
|
| 3 |
+
A FastAPI-based question-answering system that uses OpenAI Agents and Qdrant retrieval to generate grounded responses based on book content.
|
| 4 |
|
| 5 |
## Overview
|
| 6 |
|
| 7 |
+
The RAG Agent and API Layer provides a question-answering API that retrieves relevant content from Qdrant and uses an OpenAI agent to generate accurate, source-grounded responses. The system ensures that all answers are based only on the provided context to prevent hallucinations.
|
| 8 |
|
| 9 |
## Architecture
|
| 10 |
|
| 11 |
The system consists of several key components:
|
| 12 |
|
| 13 |
- **FastAPI Application**: Main entry point for the question-answering API
|
| 14 |
+
- **OpenAI Agent**: Generates responses based on retrieved context
|
| 15 |
- **Qdrant Retriever**: Retrieves relevant content chunks from Qdrant database
|
| 16 |
- **Configuration Manager**: Handles environment variables and settings
|
| 17 |
- **Data Models**: Pydantic models for API requests/responses
|
|
|
|
| 22 |
### Prerequisites
|
| 23 |
|
| 24 |
- Python 3.9+
|
| 25 |
+
- OpenAI API key
|
| 26 |
- Qdrant Cloud instance with book content embeddings
|
| 27 |
- Cohere API key (for query embeddings)
|
| 28 |
|
|
|
|
| 42 |
|
| 43 |
3. Edit `.env` with your API keys and configuration:
|
| 44 |
```env
|
| 45 |
+
OPENAI_API_KEY=your-openai-api-key-here
|
| 46 |
QDRANT_URL=your-qdrant-instance-url
|
| 47 |
QDRANT_API_KEY=your-qdrant-api-key
|
| 48 |
QDRANT_COLLECTION_NAME=rag_embedding
|
|
|
|
| 103 |
|
| 104 |
### Environment Variables
|
| 105 |
|
| 106 |
+
- `OPENAI_API_KEY`: Your OpenAI API key
|
| 107 |
- `QDRANT_URL`: URL of your Qdrant instance
|
| 108 |
- `QDRANT_API_KEY`: Your Qdrant API key
|
| 109 |
- `QDRANT_COLLECTION_NAME`: Name of the collection with book embeddings (default: `rag_embedding`)
|
|
|
|
| 123 |
### Schemas (`schemas.py`)
|
| 124 |
Additional schemas for internal data structures.
|
| 125 |
|
| 126 |
+
### Agent (`agent.py`)
|
| 127 |
+
OpenAI agent implementation with context injection and response validation.
|
| 128 |
|
| 129 |
### Retrieval (`retrieval.py`)
|
| 130 |
Qdrant integration for content retrieval with semantic search.
|
|
|
|
| 160 |
|
| 161 |
# Run specific test files
|
| 162 |
pytest tests/test_api.py
|
| 163 |
+
pytest tests/test_agent.py
|
| 164 |
pytest tests/test_retrieval.py
|
| 165 |
```
|
| 166 |
|
rag_agent_api/__init__.py
CHANGED
|
@@ -10,7 +10,7 @@ __license__ = "MIT"
|
|
| 10 |
# Import main components for easy access
|
| 11 |
from .main import app
|
| 12 |
from .config import Config, get_config, validate_config
|
| 13 |
-
from .
|
| 14 |
from .retrieval import QdrantRetriever
|
| 15 |
|
| 16 |
# Define what gets imported with "from rag_agent_api import *"
|
|
@@ -19,6 +19,6 @@ __all__ = [
|
|
| 19 |
"Config",
|
| 20 |
"get_config",
|
| 21 |
"validate_config",
|
| 22 |
-
"
|
| 23 |
"QdrantRetriever"
|
| 24 |
]
|
|
|
|
| 10 |
# Import main components for easy access
|
| 11 |
from .main import app
|
| 12 |
from .config import Config, get_config, validate_config
|
| 13 |
+
from .agent import GeminiAgent
|
| 14 |
from .retrieval import QdrantRetriever
|
| 15 |
|
| 16 |
# Define what gets imported with "from rag_agent_api import *"
|
|
|
|
| 19 |
"Config",
|
| 20 |
"get_config",
|
| 21 |
"validate_config",
|
| 22 |
+
"GeminiAgent",
|
| 23 |
"QdrantRetriever"
|
| 24 |
]
|
rag_agent_api/__pycache__/__init__.cpython-313.pyc
CHANGED
|
Binary files a/rag_agent_api/__pycache__/__init__.cpython-313.pyc and b/rag_agent_api/__pycache__/__init__.cpython-313.pyc differ
|
|
|
rag_agent_api/__pycache__/agent.cpython-313.pyc
CHANGED
|
Binary files a/rag_agent_api/__pycache__/agent.cpython-313.pyc and b/rag_agent_api/__pycache__/agent.cpython-313.pyc differ
|
|
|
rag_agent_api/__pycache__/config.cpython-313.pyc
CHANGED
|
Binary files a/rag_agent_api/__pycache__/config.cpython-313.pyc and b/rag_agent_api/__pycache__/config.cpython-313.pyc differ
|
|
|
rag_agent_api/__pycache__/main.cpython-313.pyc
CHANGED
|
Binary files a/rag_agent_api/__pycache__/main.cpython-313.pyc and b/rag_agent_api/__pycache__/main.cpython-313.pyc differ
|
|
|
rag_agent_api/__pycache__/openrouter_agent.cpython-313.pyc
CHANGED
|
Binary files a/rag_agent_api/__pycache__/openrouter_agent.cpython-313.pyc and b/rag_agent_api/__pycache__/openrouter_agent.cpython-313.pyc differ
|
|
|
rag_agent_api/__pycache__/retrieval.cpython-313.pyc
CHANGED
|
Binary files a/rag_agent_api/__pycache__/retrieval.cpython-313.pyc and b/rag_agent_api/__pycache__/retrieval.cpython-313.pyc differ
|
|
|
rag_agent_api/agent.py
ADDED
|
@@ -0,0 +1,363 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Google Gemini Agent module for the RAG Agent and API Layer system.
|
| 3 |
+
|
| 4 |
+
This module provides functionality for creating and managing a Google Gemini agent
|
| 5 |
+
that generates responses based on retrieved context.
|
| 6 |
+
"""
|
| 7 |
+
import asyncio
|
| 8 |
+
import logging
|
| 9 |
+
from typing import List, Dict, Any, Optional
|
| 10 |
+
import google.generativeai as genai
|
| 11 |
+
from .config import get_config
|
| 12 |
+
from .schemas import AgentContext, AgentResponse, SourceChunkSchema
|
| 13 |
+
from .utils import format_confidence_score
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
class GeminiAgent:
|
| 17 |
+
"""
|
| 18 |
+
A class to manage the Google Gemini agent for generating responses based on context.
|
| 19 |
+
"""
|
| 20 |
+
def __init__(self, model_name: str = "gemini-2.5-flash"):
|
| 21 |
+
"""
|
| 22 |
+
Initialize the Google Gemini agent with configuration.
|
| 23 |
+
|
| 24 |
+
Args:
|
| 25 |
+
model_name: Name of the Gemini model to use (default: gemini-2.5-flash)
|
| 26 |
+
"""
|
| 27 |
+
config = get_config()
|
| 28 |
+
api_key = config.gemini_api_key
|
| 29 |
+
|
| 30 |
+
if not api_key:
|
| 31 |
+
raise ValueError("GEMINI_API_KEY environment variable not set")
|
| 32 |
+
|
| 33 |
+
# Configure the Gemini client
|
| 34 |
+
genai.configure(api_key=api_key)
|
| 35 |
+
|
| 36 |
+
# Create the generative model instance
|
| 37 |
+
self.model = genai.GenerativeModel(model_name)
|
| 38 |
+
self.model_name = model_name
|
| 39 |
+
self.default_temperature = config.default_temperature
|
| 40 |
+
|
| 41 |
+
logging.info(f"Gemini agent initialized with model: {model_name}")
|
| 42 |
+
|
| 43 |
+
async def generate_response(self, context: AgentContext) -> AgentResponse:
|
| 44 |
+
"""
|
| 45 |
+
Generate a response based on the provided context.
|
| 46 |
+
|
| 47 |
+
Args:
|
| 48 |
+
context: AgentContext containing the query and retrieved context chunks
|
| 49 |
+
|
| 50 |
+
Returns:
|
| 51 |
+
AgentResponse with the generated answer and metadata
|
| 52 |
+
"""
|
| 53 |
+
# Check if retrieved context is empty (no chunks at all)
|
| 54 |
+
if not context.retrieved_chunks:
|
| 55 |
+
return AgentResponse(
|
| 56 |
+
raw_response="I could not find this information in the book.",
|
| 57 |
+
used_sources=[],
|
| 58 |
+
confidence_score=0.0,
|
| 59 |
+
is_valid=True,
|
| 60 |
+
validation_details="No context chunks retrieved from the database",
|
| 61 |
+
unsupported_claims=[]
|
| 62 |
+
)
|
| 63 |
+
|
| 64 |
+
# Check if context is insufficient (very short content)
|
| 65 |
+
total_context_length = sum(len(chunk.content) for chunk in context.retrieved_chunks)
|
| 66 |
+
if total_context_length < 10: # Much lower threshold, but still meaningful
|
| 67 |
+
return AgentResponse(
|
| 68 |
+
raw_response="I could not find this information in the book.",
|
| 69 |
+
used_sources=[],
|
| 70 |
+
confidence_score=0.0,
|
| 71 |
+
is_valid=True,
|
| 72 |
+
validation_details="No sufficient context provided to answer the question",
|
| 73 |
+
unsupported_claims=[]
|
| 74 |
+
)
|
| 75 |
+
|
| 76 |
+
try:
|
| 77 |
+
# Prepare the system message with instructions for grounding responses
|
| 78 |
+
system_message = self._create_system_message(context)
|
| 79 |
+
|
| 80 |
+
# Prepare the user message with the query
|
| 81 |
+
user_message = self._create_user_message(context)
|
| 82 |
+
|
| 83 |
+
# For Google Gemini, we need to format the prompt differently
|
| 84 |
+
# Combine system instructions and user query
|
| 85 |
+
full_prompt = f"{system_message}\n\n{user_message}"
|
| 86 |
+
|
| 87 |
+
# Generate response from Google Gemini
|
| 88 |
+
# For async generation, we need to use the appropriate async method
|
| 89 |
+
chat = self.model.start_chat()
|
| 90 |
+
response = await chat.send_message_async(
|
| 91 |
+
full_prompt,
|
| 92 |
+
generation_config={
|
| 93 |
+
"temperature": context.source_policy if hasattr(context, 'temperature') else self.default_temperature,
|
| 94 |
+
"max_output_tokens": 1000
|
| 95 |
+
}
|
| 96 |
+
)
|
| 97 |
+
|
| 98 |
+
# Extract the response text
|
| 99 |
+
raw_response = response.text if response and hasattr(response, 'text') else str(response)
|
| 100 |
+
|
| 101 |
+
# If the response indicates no information was found, return the exact message
|
| 102 |
+
if "I could not find this information in the book" in raw_response:
|
| 103 |
+
return AgentResponse(
|
| 104 |
+
raw_response="I could not find this information in the book.",
|
| 105 |
+
used_sources=[],
|
| 106 |
+
confidence_score=0.0,
|
| 107 |
+
is_valid=True,
|
| 108 |
+
validation_details="No relevant information found in the provided context",
|
| 109 |
+
unsupported_claims=[]
|
| 110 |
+
)
|
| 111 |
+
|
| 112 |
+
# Determine which sources were used (this is a simplified approach)
|
| 113 |
+
used_sources = self._identify_used_sources(raw_response, context.retrieved_chunks)
|
| 114 |
+
|
| 115 |
+
# Calculate confidence score (based on similarity scores of used sources)
|
| 116 |
+
confidence_score = self._calculate_confidence_score(used_sources, context.retrieved_chunks)
|
| 117 |
+
|
| 118 |
+
# Validate that the response is grounded in the provided context
|
| 119 |
+
grounding_validation = self._validate_response_grounding(
|
| 120 |
+
raw_response, context.retrieved_chunks, context.query
|
| 121 |
+
)
|
| 122 |
+
|
| 123 |
+
# Create and return the agent response
|
| 124 |
+
agent_response = AgentResponse(
|
| 125 |
+
raw_response=raw_response,
|
| 126 |
+
used_sources=used_sources,
|
| 127 |
+
confidence_score=confidence_score,
|
| 128 |
+
is_valid=grounding_validation["is_valid"],
|
| 129 |
+
validation_details=grounding_validation["details"],
|
| 130 |
+
unsupported_claims=grounding_validation["unsupported_claims"]
|
| 131 |
+
)
|
| 132 |
+
|
| 133 |
+
logging.info(f"Agent response generated successfully. Confidence: {confidence_score:.2f}")
|
| 134 |
+
return agent_response
|
| 135 |
+
|
| 136 |
+
except Exception as e:
|
| 137 |
+
logging.error(f"Error generating response from Google Gemini agent: {e}", exc_info=True)
|
| 138 |
+
# Return the specific message when there's an error
|
| 139 |
+
return AgentResponse(
|
| 140 |
+
raw_response="I could not find this information in the book.",
|
| 141 |
+
used_sources=[],
|
| 142 |
+
confidence_score=0.0,
|
| 143 |
+
is_valid=False,
|
| 144 |
+
validation_details=f"Error generating response: {str(e)}",
|
| 145 |
+
unsupported_claims=[]
|
| 146 |
+
)
|
| 147 |
+
|
| 148 |
+
def _create_system_message(self, context: AgentContext) -> str:
|
| 149 |
+
"""
|
| 150 |
+
Create the system message that instructs the agent on how to behave.
|
| 151 |
+
|
| 152 |
+
Args:
|
| 153 |
+
context: AgentContext containing the query and retrieved context chunks
|
| 154 |
+
|
| 155 |
+
Returns:
|
| 156 |
+
Formatted system message string
|
| 157 |
+
"""
|
| 158 |
+
system_prompt = """You are a documentation-based assistant.
|
| 159 |
+
Answer ONLY using the provided context from the book
|
| 160 |
+
"Physical AI & Humanoid Robotics".
|
| 161 |
+
If the answer is not found, reply EXACTLY:
|
| 162 |
+
"I could not find this information in the book."""
|
| 163 |
+
return system_prompt
|
| 164 |
+
|
| 165 |
+
def _create_user_message(self, context: AgentContext) -> str:
|
| 166 |
+
"""
|
| 167 |
+
Create the user message containing the query.
|
| 168 |
+
|
| 169 |
+
Args:
|
| 170 |
+
context: AgentContext containing the query and retrieved context chunks
|
| 171 |
+
|
| 172 |
+
Returns:
|
| 173 |
+
Formatted user message string
|
| 174 |
+
"""
|
| 175 |
+
return f"""CONTEXT:
|
| 176 |
+
{self._format_context_chunks(context.retrieved_chunks)}
|
| 177 |
+
|
| 178 |
+
QUESTION:
|
| 179 |
+
{context.query}"""
|
| 180 |
+
|
| 181 |
+
def _format_context_chunks(self, chunks: List[SourceChunkSchema]) -> str:
|
| 182 |
+
"""
|
| 183 |
+
Format the context chunks for the prompt.
|
| 184 |
+
|
| 185 |
+
Args:
|
| 186 |
+
chunks: List of source chunks to format
|
| 187 |
+
|
| 188 |
+
Returns:
|
| 189 |
+
Formatted context string
|
| 190 |
+
"""
|
| 191 |
+
if not chunks:
|
| 192 |
+
return ""
|
| 193 |
+
|
| 194 |
+
formatted_chunks = []
|
| 195 |
+
for i, chunk in enumerate(chunks):
|
| 196 |
+
formatted_chunks.append(f"[Chunk {i+1}]\n{chunk.content}\n[/Chunk {i+1}]")
|
| 197 |
+
|
| 198 |
+
return "\n".join(formatted_chunks)
|
| 199 |
+
|
| 200 |
+
def _create_context_messages(self, context: AgentContext) -> List[Dict[str, str]]:
|
| 201 |
+
"""
|
| 202 |
+
Create context messages from the retrieved chunks.
|
| 203 |
+
With the new format, context is now provided in the user message,
|
| 204 |
+
so this method returns an empty list to avoid duplication.
|
| 205 |
+
|
| 206 |
+
Args:
|
| 207 |
+
context: AgentContext containing the query and retrieved context chunks
|
| 208 |
+
|
| 209 |
+
Returns:
|
| 210 |
+
Empty list since context is now in user message
|
| 211 |
+
"""
|
| 212 |
+
return []
|
| 213 |
+
|
| 214 |
+
def _identify_used_sources(self, response: str, chunks: List[SourceChunkSchema]) -> List[str]:
|
| 215 |
+
"""
|
| 216 |
+
Identify which sources were likely used in the response.
|
| 217 |
+
This is a simplified approach - in a real implementation, you might use
|
| 218 |
+
more sophisticated techniques like semantic similarity.
|
| 219 |
+
|
| 220 |
+
Args:
|
| 221 |
+
response: The agent's response text
|
| 222 |
+
chunks: List of source chunks that were provided to the agent
|
| 223 |
+
|
| 224 |
+
Returns:
|
| 225 |
+
List of source IDs that were likely used
|
| 226 |
+
"""
|
| 227 |
+
used_sources = []
|
| 228 |
+
response_lower = response.lower()
|
| 229 |
+
|
| 230 |
+
for chunk in chunks:
|
| 231 |
+
# Check if any significant words from the chunk appear in the response
|
| 232 |
+
content_words = set(chunk.content.lower().split()[:20]) # Check first 20 words
|
| 233 |
+
response_words = set(response_lower.split())
|
| 234 |
+
|
| 235 |
+
# If there's significant overlap, consider this chunk as used
|
| 236 |
+
overlap = content_words.intersection(response_words)
|
| 237 |
+
if len(overlap) > 2: # Arbitrary threshold
|
| 238 |
+
used_sources.append(chunk.id)
|
| 239 |
+
|
| 240 |
+
# If no sources were identified, return all sources (conservative approach)
|
| 241 |
+
if not used_sources:
|
| 242 |
+
used_sources = [chunk.id for chunk in chunks]
|
| 243 |
+
|
| 244 |
+
return used_sources
|
| 245 |
+
|
| 246 |
+
def _calculate_confidence_score(self, used_sources: List[str], chunks: List[SourceChunkSchema]) -> float:
|
| 247 |
+
"""
|
| 248 |
+
Calculate a confidence score based on the quality of the used sources.
|
| 249 |
+
|
| 250 |
+
Args:
|
| 251 |
+
used_sources: List of source IDs that were used
|
| 252 |
+
chunks: List of all source chunks that were provided to the agent
|
| 253 |
+
|
| 254 |
+
Returns:
|
| 255 |
+
Confidence score between 0.0 and 1.0
|
| 256 |
+
"""
|
| 257 |
+
if not used_sources:
|
| 258 |
+
return 0.1 # Low confidence if no sources were used
|
| 259 |
+
|
| 260 |
+
# Calculate average similarity score of used sources
|
| 261 |
+
total_similarity = 0.0
|
| 262 |
+
used_count = 0
|
| 263 |
+
|
| 264 |
+
for chunk in chunks:
|
| 265 |
+
if chunk.id in used_sources:
|
| 266 |
+
total_similarity += chunk.similarity_score
|
| 267 |
+
used_count += 1
|
| 268 |
+
|
| 269 |
+
if used_count == 0:
|
| 270 |
+
return 0.1 # Low confidence if no matching chunks found
|
| 271 |
+
|
| 272 |
+
avg_similarity = total_similarity / used_count
|
| 273 |
+
|
| 274 |
+
# If similarity scores are very low (e.g., due to embedding issues),
|
| 275 |
+
# but we have content, still provide some confidence
|
| 276 |
+
if avg_similarity < 0.1 and len(used_sources) > 0:
|
| 277 |
+
# If we have relevant content but low similarity scores,
|
| 278 |
+
# it might be due to embedding issues, not lack of relevance
|
| 279 |
+
# So we'll set a minimum confidence if content exists
|
| 280 |
+
return 0.3 # Low but not zero confidence
|
| 281 |
+
else:
|
| 282 |
+
# Normalize the confidence score (adjust based on your requirements)
|
| 283 |
+
# Higher similarity scores contribute to higher confidence
|
| 284 |
+
confidence = avg_similarity
|
| 285 |
+
|
| 286 |
+
return format_confidence_score(confidence)
|
| 287 |
+
|
| 288 |
+
def _validate_response_grounding(self, response: str, chunks: List[SourceChunkSchema], query: str) -> Dict[str, Any]:
|
| 289 |
+
"""
|
| 290 |
+
Validate that the response is grounded in the provided context.
|
| 291 |
+
|
| 292 |
+
Args:
|
| 293 |
+
response: The agent's response text
|
| 294 |
+
chunks: List of source chunks that were provided to the agent
|
| 295 |
+
query: The original query
|
| 296 |
+
|
| 297 |
+
Returns:
|
| 298 |
+
Dictionary with validation results
|
| 299 |
+
"""
|
| 300 |
+
# Check if the response contains elements from the provided context
|
| 301 |
+
response_lower = response.lower()
|
| 302 |
+
context_text = " ".join([chunk.content.lower() for chunk in chunks])
|
| 303 |
+
|
| 304 |
+
# Simple heuristic: check if response contains significant terms from context
|
| 305 |
+
response_words = set(response_lower.split())
|
| 306 |
+
context_words = set(context_text.split())
|
| 307 |
+
|
| 308 |
+
# Calculate overlap between response and context
|
| 309 |
+
overlap = response_words.intersection(context_words)
|
| 310 |
+
total_response_words = len(response_words)
|
| 311 |
+
overlap_count = len(overlap)
|
| 312 |
+
|
| 313 |
+
# If less than 30% of response words come from context, flag as potentially ungrounded
|
| 314 |
+
is_grounded = True
|
| 315 |
+
unsupported_claims = []
|
| 316 |
+
|
| 317 |
+
if total_response_words > 0:
|
| 318 |
+
grounding_ratio = overlap_count / total_response_words
|
| 319 |
+
is_grounded = grounding_ratio >= 0.3 # At least 30% of words should come from context
|
| 320 |
+
|
| 321 |
+
# For now, we'll just return the basic validation
|
| 322 |
+
# In a more sophisticated implementation, you'd analyze the response more deeply
|
| 323 |
+
details = f"Response grounding validation completed. Context overlap ratio: {overlap_count/total_response_words if total_response_words > 0 else 0:.2f}"
|
| 324 |
+
|
| 325 |
+
return {
|
| 326 |
+
"is_valid": is_grounded,
|
| 327 |
+
"details": details,
|
| 328 |
+
"unsupported_claims": unsupported_claims
|
| 329 |
+
}
|
| 330 |
+
|
| 331 |
+
async def validate_response_quality(self, response: str, context: AgentContext) -> bool:
|
| 332 |
+
"""
|
| 333 |
+
Validate the quality of the agent's response.
|
| 334 |
+
|
| 335 |
+
Args:
|
| 336 |
+
response: The agent's response text
|
| 337 |
+
context: AgentContext containing the query and retrieved context chunks
|
| 338 |
+
|
| 339 |
+
Returns:
|
| 340 |
+
True if response meets quality standards, False otherwise
|
| 341 |
+
"""
|
| 342 |
+
# Check for common signs of poor quality responses
|
| 343 |
+
if not response or response.strip() == "":
|
| 344 |
+
logging.warning("Agent returned an empty response")
|
| 345 |
+
return False
|
| 346 |
+
|
| 347 |
+
# Check if response contains generic fallback phrases
|
| 348 |
+
lower_response = response.lower()
|
| 349 |
+
if "i don't know" in lower_response or "i don't have" in lower_response:
|
| 350 |
+
# This might be a valid response if there's no relevant context
|
| 351 |
+
if len(context.retrieved_chunks) == 0:
|
| 352 |
+
return True # Valid response if no context was provided
|
| 353 |
+
else:
|
| 354 |
+
# Check if the response is justified given the context
|
| 355 |
+
# For now, we'll consider it valid if it acknowledges the lack of relevant information
|
| 356 |
+
return True
|
| 357 |
+
|
| 358 |
+
# In a more sophisticated implementation, you'd validate against the context more rigorously
|
| 359 |
+
return True
|
| 360 |
+
|
| 361 |
+
|
| 362 |
+
# Global agent instance (if needed)
|
| 363 |
+
# agent_instance = OpenAIAgent()
|
rag_agent_api/config.py
CHANGED
|
@@ -19,7 +19,6 @@ class Config:
|
|
| 19 |
|
| 20 |
def __init__(self):
|
| 21 |
"""Initialize configuration by loading environment variables."""
|
| 22 |
-
self.openai_api_key = os.getenv('OPENAI_API_KEY')
|
| 23 |
self.cohere_api_key = os.getenv('COHERE_API_KEY')
|
| 24 |
self.openrouter_api_key = os.getenv('OPENROUTER_API_KEY')
|
| 25 |
self.qdrant_url = os.getenv('QDRANT_URL')
|
|
|
|
| 19 |
|
| 20 |
def __init__(self):
|
| 21 |
"""Initialize configuration by loading environment variables."""
|
|
|
|
| 22 |
self.cohere_api_key = os.getenv('COHERE_API_KEY')
|
| 23 |
self.openrouter_api_key = os.getenv('OPENROUTER_API_KEY')
|
| 24 |
self.qdrant_url = os.getenv('QDRANT_URL')
|
rag_agent_api/main.py
CHANGED
|
@@ -82,22 +82,22 @@ async def health_check() -> HealthResponse:
|
|
| 82 |
HealthResponse with status of services
|
| 83 |
"""
|
| 84 |
# Check if all required components are initialized
|
| 85 |
-
|
| 86 |
qdrant_status = "up" if retriever else "down"
|
| 87 |
agent_status = "up" if agent else "down"
|
| 88 |
|
| 89 |
# Determine overall status
|
| 90 |
overall_status = "healthy"
|
| 91 |
-
if
|
| 92 |
overall_status = "unhealthy"
|
| 93 |
-
elif
|
| 94 |
overall_status = "degraded"
|
| 95 |
|
| 96 |
return HealthResponse(
|
| 97 |
status=overall_status,
|
| 98 |
timestamp=format_timestamp(),
|
| 99 |
services={
|
| 100 |
-
"
|
| 101 |
"qdrant": qdrant_status,
|
| 102 |
"agent": agent_status
|
| 103 |
}
|
|
@@ -194,7 +194,7 @@ async def root() -> Dict[str, Any]:
|
|
| 194 |
return {
|
| 195 |
"message": "RAG Agent and API Layer",
|
| 196 |
"version": "1.0.0",
|
| 197 |
-
"description": "Question-answering API using
|
| 198 |
"endpoints": {
|
| 199 |
"POST /ask": "Main question-answering endpoint",
|
| 200 |
"GET /health": "Health check endpoint",
|
|
@@ -243,9 +243,4 @@ async def readiness_check() -> Dict[str, str]:
|
|
| 243 |
if retriever and agent:
|
| 244 |
return {"status": "ready"}
|
| 245 |
else:
|
| 246 |
-
raise HTTPException(status_code=503, detail="Service not ready")
|
| 247 |
-
|
| 248 |
-
|
| 249 |
-
if __name__ == "__main__":
|
| 250 |
-
import uvicorn
|
| 251 |
-
uvicorn.run(app, host="0.0.0.0", port=8000)
|
|
|
|
| 82 |
HealthResponse with status of services
|
| 83 |
"""
|
| 84 |
# Check if all required components are initialized
|
| 85 |
+
gemini_status = "up" if agent else "down"
|
| 86 |
qdrant_status = "up" if retriever else "down"
|
| 87 |
agent_status = "up" if agent else "down"
|
| 88 |
|
| 89 |
# Determine overall status
|
| 90 |
overall_status = "healthy"
|
| 91 |
+
if gemini_status == "down" or qdrant_status == "down":
|
| 92 |
overall_status = "unhealthy"
|
| 93 |
+
elif gemini_status == "degraded" or qdrant_status == "degraded":
|
| 94 |
overall_status = "degraded"
|
| 95 |
|
| 96 |
return HealthResponse(
|
| 97 |
status=overall_status,
|
| 98 |
timestamp=format_timestamp(),
|
| 99 |
services={
|
| 100 |
+
"gemini": gemini_status,
|
| 101 |
"qdrant": qdrant_status,
|
| 102 |
"agent": agent_status
|
| 103 |
}
|
|
|
|
| 194 |
return {
|
| 195 |
"message": "RAG Agent and API Layer",
|
| 196 |
"version": "1.0.0",
|
| 197 |
+
"description": "Question-answering API using OpenAI Agents and Qdrant retrieval",
|
| 198 |
"endpoints": {
|
| 199 |
"POST /ask": "Main question-answering endpoint",
|
| 200 |
"GET /health": "Health check endpoint",
|
|
|
|
| 243 |
if retriever and agent:
|
| 244 |
return {"status": "ready"}
|
| 245 |
else:
|
| 246 |
+
raise HTTPException(status_code=503, detail="Service not ready")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
rag_agent_api/retrieval.py
CHANGED
|
@@ -76,6 +76,16 @@ class QdrantRetriever:
|
|
| 76 |
# Embed the query using Cohere
|
| 77 |
query_embedding = await self._embed_query(query)
|
| 78 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 79 |
# Perform semantic search in Qdrant
|
| 80 |
search_results = await self.client.query_points(
|
| 81 |
collection_name=self.collection_name,
|
|
@@ -116,53 +126,134 @@ class QdrantRetriever:
|
|
| 116 |
# Return empty list instead of raising exception to allow graceful handling
|
| 117 |
return []
|
| 118 |
|
| 119 |
-
async def
|
| 120 |
"""
|
| 121 |
-
|
| 122 |
|
| 123 |
Args:
|
| 124 |
-
query: The query string
|
|
|
|
| 125 |
|
| 126 |
Returns:
|
| 127 |
-
List of
|
| 128 |
"""
|
| 129 |
try:
|
| 130 |
-
# Use
|
| 131 |
-
#
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 136 |
)
|
| 137 |
|
| 138 |
-
# Extract
|
| 139 |
-
|
| 140 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 141 |
except Exception as e:
|
| 142 |
-
logging.error(f"Error
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 143 |
|
| 144 |
-
|
|
|
|
| 145 |
try:
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 166 |
|
| 167 |
def _validate_chunk(self, chunk: SourceChunkSchema) -> bool:
|
| 168 |
"""
|
|
|
|
| 76 |
# Embed the query using Cohere
|
| 77 |
query_embedding = await self._embed_query(query)
|
| 78 |
|
| 79 |
+
# Check if we got a zero vector fallback (indicating embedding service failure)
|
| 80 |
+
is_zero_vector = all(x == 0.0 for x in query_embedding)
|
| 81 |
+
|
| 82 |
+
if is_zero_vector:
|
| 83 |
+
# If we have a zero vector, try a different approach - keyword search
|
| 84 |
+
logging.warning("Zero vector detected, attempting keyword-based fallback search")
|
| 85 |
+
retrieved_chunks = await self._keyword_search_fallback(query, top_k)
|
| 86 |
+
logging.info(f"Keyword fallback search retrieved {len(retrieved_chunks)} chunks from Qdrant")
|
| 87 |
+
return retrieved_chunks
|
| 88 |
+
|
| 89 |
# Perform semantic search in Qdrant
|
| 90 |
search_results = await self.client.query_points(
|
| 91 |
collection_name=self.collection_name,
|
|
|
|
| 126 |
# Return empty list instead of raising exception to allow graceful handling
|
| 127 |
return []
|
| 128 |
|
| 129 |
+
async def _keyword_search_fallback(self, query: str, top_k: int = 5) -> List[SourceChunkSchema]:
|
| 130 |
"""
|
| 131 |
+
Fallback method to search using keyword matching when embedding service is unavailable.
|
| 132 |
|
| 133 |
Args:
|
| 134 |
+
query: The user's query string
|
| 135 |
+
top_k: Number of results to return (default: 5)
|
| 136 |
|
| 137 |
Returns:
|
| 138 |
+
List of SourceChunkSchema objects containing relevant content
|
| 139 |
"""
|
| 140 |
try:
|
| 141 |
+
# Use Qdrant's full-text search capability or filter-based approach
|
| 142 |
+
# For now, we'll use a scroll + filter approach to find relevant chunks
|
| 143 |
+
from qdrant_client.http import models
|
| 144 |
+
|
| 145 |
+
# Simple approach: get all points and filter based on keyword matching
|
| 146 |
+
# In a production system, you'd want to use Qdrant's text indexing capabilities
|
| 147 |
+
all_points = await self.client.scroll(
|
| 148 |
+
collection_name=self.collection_name,
|
| 149 |
+
limit=10000, # Get up to 10000 points (or as many as exist)
|
| 150 |
+
with_payload=True,
|
| 151 |
+
with_vectors=False
|
| 152 |
)
|
| 153 |
|
| 154 |
+
# Extract points from the result (structure may vary depending on Qdrant client version)
|
| 155 |
+
points = all_points[0] if isinstance(all_points, tuple) else all_points
|
| 156 |
+
|
| 157 |
+
# Score points based on keyword matching
|
| 158 |
+
scored_chunks = []
|
| 159 |
+
query_lower = query.lower()
|
| 160 |
+
query_words = set(query_lower.split())
|
| 161 |
+
|
| 162 |
+
for point in points:
|
| 163 |
+
payload = point.payload if hasattr(point, 'payload') else point
|
| 164 |
+
content = payload.get('text', '') if isinstance(payload, dict) else getattr(payload, 'text', '')
|
| 165 |
+
content_lower = content.lower()
|
| 166 |
+
|
| 167 |
+
# Calculate a simple keyword match score
|
| 168 |
+
content_words = set(content_lower.split())
|
| 169 |
+
overlap = query_words.intersection(content_words)
|
| 170 |
+
score = len(overlap) / len(query_words) if query_words else 0 # Jaccard similarity
|
| 171 |
+
|
| 172 |
+
if score > 0 or query_lower in content_lower: # Only include if there's some match
|
| 173 |
+
chunk = SourceChunkSchema(
|
| 174 |
+
id=point.id if hasattr(point, 'id') else getattr(point, 'point_id', None),
|
| 175 |
+
url=payload.get('url', '') if isinstance(payload, dict) else getattr(payload, 'url', ''),
|
| 176 |
+
title=payload.get('title', '') if isinstance(payload, dict) else getattr(payload, 'title', ''),
|
| 177 |
+
content=content,
|
| 178 |
+
similarity_score=score,
|
| 179 |
+
chunk_index=payload.get('chunk_index', 0) if isinstance(payload, dict) else getattr(payload, 'chunk_index', 0)
|
| 180 |
+
)
|
| 181 |
+
|
| 182 |
+
if self._validate_chunk(chunk):
|
| 183 |
+
scored_chunks.append((chunk, score))
|
| 184 |
+
|
| 185 |
+
# Sort by score and return top_k
|
| 186 |
+
scored_chunks.sort(key=lambda x: x[1], reverse=True)
|
| 187 |
+
top_chunks = [chunk for chunk, score in scored_chunks[:top_k]]
|
| 188 |
+
|
| 189 |
+
return top_chunks
|
| 190 |
+
|
| 191 |
except Exception as e:
|
| 192 |
+
logging.error(f"Error in keyword fallback search: {e}", exc_info=True)
|
| 193 |
+
return []
|
| 194 |
+
|
| 195 |
+
async def _embed_query(self, query: str) -> List[float]:
|
| 196 |
+
"""
|
| 197 |
+
Embed the query using Cohere to prepare for semantic search with retry logic for rate limits.
|
| 198 |
+
|
| 199 |
+
Args:
|
| 200 |
+
query: The query string to embed
|
| 201 |
+
|
| 202 |
+
Returns:
|
| 203 |
+
List of floats representing the query embedding
|
| 204 |
+
"""
|
| 205 |
+
import time
|
| 206 |
+
import random
|
| 207 |
+
from cohere.errors.too_many_requests_error import TooManyRequestsError
|
| 208 |
|
| 209 |
+
# Try Cohere with retry logic for rate limits
|
| 210 |
+
for attempt in range(3): # Try up to 3 times
|
| 211 |
try:
|
| 212 |
+
# Use Cohere to embed the query
|
| 213 |
+
# The original book content was likely embedded with Cohere embed-english-v3.0
|
| 214 |
+
response = await self.cohere_client.embed(
|
| 215 |
+
texts=[query],
|
| 216 |
+
model="embed-english-v3.0", # 1024-dimensional embedding model
|
| 217 |
+
input_type="search_query" # Specify this is a search query
|
| 218 |
+
)
|
| 219 |
+
|
| 220 |
+
# Extract the embedding from the response
|
| 221 |
+
embedding = response.embeddings[0] # Get the first (and only) embedding
|
| 222 |
+
return embedding
|
| 223 |
+
except TooManyRequestsError as e:
|
| 224 |
+
if attempt < 2: # Don't wait after the last attempt
|
| 225 |
+
# Exponential backoff with jitter
|
| 226 |
+
wait_time = (2 ** attempt) + random.uniform(0, 1)
|
| 227 |
+
logging.warning(f"Cohere rate limited (attempt {attempt + 1}), waiting {wait_time:.2f}s: {e}")
|
| 228 |
+
await asyncio.sleep(wait_time)
|
| 229 |
+
else:
|
| 230 |
+
logging.error(f"Cohere rate limited after {attempt + 1} attempts: {e}")
|
| 231 |
+
except Exception as e:
|
| 232 |
+
logging.error(f"Error embedding query with Cohere: {e}", exc_info=True)
|
| 233 |
+
break # Don't retry for other types of errors
|
| 234 |
+
|
| 235 |
+
# If Cohere fails, try using OpenAI embeddings as fallback if available
|
| 236 |
+
try:
|
| 237 |
+
from openai import OpenAI
|
| 238 |
+
from .config import get_config
|
| 239 |
+
config = get_config()
|
| 240 |
+
|
| 241 |
+
if config.openai_api_key:
|
| 242 |
+
client = OpenAI(api_key=config.openai_api_key)
|
| 243 |
+
response = client.embeddings.create(
|
| 244 |
+
input=query,
|
| 245 |
+
model="text-embedding-ada-002"
|
| 246 |
+
)
|
| 247 |
+
embedding = response.data[0].embedding
|
| 248 |
+
logging.info("Successfully used OpenAI embedding as fallback")
|
| 249 |
+
return embedding
|
| 250 |
+
except Exception as openai_error:
|
| 251 |
+
logging.warning(f"OpenAI fallback also failed: {openai_error}")
|
| 252 |
+
|
| 253 |
+
# If all fail, return a zero vector of the correct size (1024) as a last resort
|
| 254 |
+
# This will result in poor semantic matches but won't crash the system
|
| 255 |
+
logging.warning("Using zero vector as final fallback for query embedding")
|
| 256 |
+
return [0.0] * 1024
|
| 257 |
|
| 258 |
def _validate_chunk(self, chunk: SourceChunkSchema) -> bool:
|
| 259 |
"""
|
requirements.txt
CHANGED
|
@@ -1,12 +1,10 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
cohere>=4.9.0
|
| 5 |
-
qdrant-client>=1.7.0
|
| 6 |
python-dotenv>=1.0.0
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
|
|
|
| 1 |
+
fastapi>=0.104.1
|
| 2 |
+
uvicorn[standard]>=0.24.0
|
| 3 |
+
qdrant-client>=1.8.0
|
|
|
|
|
|
|
| 4 |
python-dotenv>=1.0.0
|
| 5 |
+
httpx>=0.25.0
|
| 6 |
+
cohere>=4.9.0
|
| 7 |
+
google-generativeai>=0.4.0
|
| 8 |
+
openai>=1.6.0
|
| 9 |
+
pydantic>=2.5.0
|
| 10 |
+
typing-extensions>=4.8.0
|
test_retrieval.py
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Test script to directly test the Qdrant retrieval functionality
|
| 4 |
+
"""
|
| 5 |
+
import asyncio
|
| 6 |
+
import os
|
| 7 |
+
from dotenv import load_dotenv
|
| 8 |
+
from rag_agent_api.retrieval import QdrantRetriever
|
| 9 |
+
from rag_agent_api.config import get_config
|
| 10 |
+
|
| 11 |
+
# Load environment variables
|
| 12 |
+
load_dotenv()
|
| 13 |
+
|
| 14 |
+
async def test_retrieval():
|
| 15 |
+
print("Testing Qdrant retrieval functionality...")
|
| 16 |
+
|
| 17 |
+
# Create a QdrantRetriever instance
|
| 18 |
+
retriever = QdrantRetriever()
|
| 19 |
+
|
| 20 |
+
print("1. Testing collection existence...")
|
| 21 |
+
exists = await retriever.validate_collection_exists()
|
| 22 |
+
print(f" Collection exists: {exists}")
|
| 23 |
+
|
| 24 |
+
if exists:
|
| 25 |
+
print("2. Getting total points in collection...")
|
| 26 |
+
total_points = await retriever.get_total_points()
|
| 27 |
+
print(f" Total points: {total_points}")
|
| 28 |
+
|
| 29 |
+
print("3. Testing query embedding...")
|
| 30 |
+
try:
|
| 31 |
+
query = "what about this book?"
|
| 32 |
+
embedding = await retriever._embed_query(query)
|
| 33 |
+
print(f" Query embedding successful, length: {len(embedding)}")
|
| 34 |
+
except Exception as e:
|
| 35 |
+
print(f" Query embedding failed: {e}")
|
| 36 |
+
return
|
| 37 |
+
|
| 38 |
+
print("4. Testing direct search...")
|
| 39 |
+
try:
|
| 40 |
+
results = await retriever.retrieve_context(query, top_k=5)
|
| 41 |
+
print(f" Retrieved {len(results)} results")
|
| 42 |
+
|
| 43 |
+
if results:
|
| 44 |
+
print(" Sample results:")
|
| 45 |
+
for i, result in enumerate(results[:2]): # Show first 2 results
|
| 46 |
+
print(f" Result {i+1}:")
|
| 47 |
+
print(f" ID: {result.id}")
|
| 48 |
+
print(f" Title: {result.title}")
|
| 49 |
+
print(f" Content preview: {result.content[:100]}...")
|
| 50 |
+
print(f" Similarity: {result.similarity_score}")
|
| 51 |
+
print(f" URL: {result.url}")
|
| 52 |
+
else:
|
| 53 |
+
print(" No results retrieved - this indicates the main issue")
|
| 54 |
+
except Exception as e:
|
| 55 |
+
print(f" Direct search failed: {e}")
|
| 56 |
+
import traceback
|
| 57 |
+
traceback.print_exc()
|
| 58 |
+
|
| 59 |
+
if __name__ == "__main__":
|
| 60 |
+
asyncio.run(test_retrieval())
|
tests/test_integration.py
CHANGED
|
@@ -7,7 +7,7 @@ from fastapi.testclient import TestClient
|
|
| 7 |
from unittest.mock import Mock, patch, AsyncMock
|
| 8 |
from rag_agent_api.main import app, retriever, agent
|
| 9 |
from rag_agent_api.retrieval import QdrantRetriever
|
| 10 |
-
from rag_agent_api.
|
| 11 |
from rag_agent_api.schemas import SourceChunkSchema, AgentResponse, AgentContext
|
| 12 |
|
| 13 |
|
|
@@ -17,13 +17,13 @@ def test_full_query_flow_with_mocked_components():
|
|
| 17 |
'QDRANT_URL': 'http://test-qdrant:6333',
|
| 18 |
'QDRANT_API_KEY': 'test-api-key',
|
| 19 |
'COHERE_API_KEY': 'test-cohere-key',
|
| 20 |
-
'
|
| 21 |
}):
|
| 22 |
with patch('rag_agent_api.main.QdrantRetriever') as mock_retriever_class:
|
| 23 |
-
with patch('rag_agent_api.main.
|
| 24 |
# Create mock instances
|
| 25 |
mock_retriever = Mock(spec=QdrantRetriever)
|
| 26 |
-
mock_agent = Mock(spec=
|
| 27 |
|
| 28 |
# Configure the class mocks to return our instance mocks
|
| 29 |
mock_retriever_class.return_value = mock_retriever
|
|
@@ -84,11 +84,11 @@ async def test_agent_context_creation():
|
|
| 84 |
'QDRANT_URL': 'http://test-qdrant:6333',
|
| 85 |
'QDRANT_API_KEY': 'test-api-key',
|
| 86 |
'COHERE_API_KEY': 'test-cohere-key',
|
| 87 |
-
'
|
| 88 |
}):
|
| 89 |
with patch('rag_agent_api.retrieval.AsyncQdrantClient') as mock_qdrant_client:
|
| 90 |
with patch('rag_agent_api.retrieval.cohere.Client') as mock_cohere_client:
|
| 91 |
-
with patch('rag_agent_api.
|
| 92 |
# Mock the Qdrant client
|
| 93 |
mock_qdrant_instance = Mock()
|
| 94 |
mock_qdrant_client.return_value = mock_qdrant_instance
|
|
@@ -101,7 +101,7 @@ async def test_agent_context_creation():
|
|
| 101 |
|
| 102 |
# Initialize components
|
| 103 |
retriever = QdrantRetriever(collection_name="test_collection")
|
| 104 |
-
agent =
|
| 105 |
|
| 106 |
# Create test chunks
|
| 107 |
test_chunk = SourceChunkSchema(
|
|
@@ -145,7 +145,7 @@ def test_health_endpoint_integration():
|
|
| 145 |
assert "services" in data
|
| 146 |
|
| 147 |
# Check that services status is included
|
| 148 |
-
assert "
|
| 149 |
assert "qdrant" in data["services"]
|
| 150 |
assert "agent" in data["services"]
|
| 151 |
|
|
@@ -157,11 +157,11 @@ async def test_retrieval_and_agent_integration():
|
|
| 157 |
'QDRANT_URL': 'http://test-qdrant:6333',
|
| 158 |
'QDRANT_API_KEY': 'test-api-key',
|
| 159 |
'COHERE_API_KEY': 'test-cohere-key',
|
| 160 |
-
'
|
| 161 |
}):
|
| 162 |
with patch('rag_agent_api.retrieval.AsyncQdrantClient') as mock_qdrant_client:
|
| 163 |
with patch('rag_agent_api.retrieval.cohere.Client') as mock_cohere_client:
|
| 164 |
-
with patch('rag_agent_api.
|
| 165 |
# Mock the Qdrant client
|
| 166 |
mock_qdrant_instance = Mock()
|
| 167 |
mock_qdrant_client.return_value = mock_qdrant_instance
|
|
@@ -172,21 +172,18 @@ async def test_retrieval_and_agent_integration():
|
|
| 172 |
mock_cohere_client.return_value = mock_cohere_instance
|
| 173 |
mock_cohere_instance.embed.return_value = Mock(embeddings=[[0.1, 0.2, 0.3]])
|
| 174 |
|
| 175 |
-
# Mock the
|
| 176 |
-
|
| 177 |
-
|
| 178 |
mock_completion = Mock()
|
| 179 |
-
mock_completion.
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
}
|
| 184 |
-
mock_httpx_instance.post = AsyncMock(return_value=mock_completion)
|
| 185 |
-
mock_httpx_instance.post.return_value.status_code = 200
|
| 186 |
|
| 187 |
# Initialize components
|
| 188 |
test_retriever = QdrantRetriever(collection_name="test_collection")
|
| 189 |
-
test_agent =
|
| 190 |
|
| 191 |
# Mock the retrieval result
|
| 192 |
mock_chunk = SourceChunkSchema(
|
|
|
|
| 7 |
from unittest.mock import Mock, patch, AsyncMock
|
| 8 |
from rag_agent_api.main import app, retriever, agent
|
| 9 |
from rag_agent_api.retrieval import QdrantRetriever
|
| 10 |
+
from rag_agent_api.agent import OpenAIAgent
|
| 11 |
from rag_agent_api.schemas import SourceChunkSchema, AgentResponse, AgentContext
|
| 12 |
|
| 13 |
|
|
|
|
| 17 |
'QDRANT_URL': 'http://test-qdrant:6333',
|
| 18 |
'QDRANT_API_KEY': 'test-api-key',
|
| 19 |
'COHERE_API_KEY': 'test-cohere-key',
|
| 20 |
+
'OPENAI_API_KEY': 'test-openai-key'
|
| 21 |
}):
|
| 22 |
with patch('rag_agent_api.main.QdrantRetriever') as mock_retriever_class:
|
| 23 |
+
with patch('rag_agent_api.main.OpenAIAgent') as mock_agent_class:
|
| 24 |
# Create mock instances
|
| 25 |
mock_retriever = Mock(spec=QdrantRetriever)
|
| 26 |
+
mock_agent = Mock(spec=OpenAIAgent)
|
| 27 |
|
| 28 |
# Configure the class mocks to return our instance mocks
|
| 29 |
mock_retriever_class.return_value = mock_retriever
|
|
|
|
| 84 |
'QDRANT_URL': 'http://test-qdrant:6333',
|
| 85 |
'QDRANT_API_KEY': 'test-api-key',
|
| 86 |
'COHERE_API_KEY': 'test-cohere-key',
|
| 87 |
+
'OPENAI_API_KEY': 'test-openai-key'
|
| 88 |
}):
|
| 89 |
with patch('rag_agent_api.retrieval.AsyncQdrantClient') as mock_qdrant_client:
|
| 90 |
with patch('rag_agent_api.retrieval.cohere.Client') as mock_cohere_client:
|
| 91 |
+
with patch('rag_agent_api.agent.AsyncOpenAI'):
|
| 92 |
# Mock the Qdrant client
|
| 93 |
mock_qdrant_instance = Mock()
|
| 94 |
mock_qdrant_client.return_value = mock_qdrant_instance
|
|
|
|
| 101 |
|
| 102 |
# Initialize components
|
| 103 |
retriever = QdrantRetriever(collection_name="test_collection")
|
| 104 |
+
agent = OpenAIAgent(model_name="gpt-4-test")
|
| 105 |
|
| 106 |
# Create test chunks
|
| 107 |
test_chunk = SourceChunkSchema(
|
|
|
|
| 145 |
assert "services" in data
|
| 146 |
|
| 147 |
# Check that services status is included
|
| 148 |
+
assert "openai" in data["services"]
|
| 149 |
assert "qdrant" in data["services"]
|
| 150 |
assert "agent" in data["services"]
|
| 151 |
|
|
|
|
| 157 |
'QDRANT_URL': 'http://test-qdrant:6333',
|
| 158 |
'QDRANT_API_KEY': 'test-api-key',
|
| 159 |
'COHERE_API_KEY': 'test-cohere-key',
|
| 160 |
+
'OPENAI_API_KEY': 'test-openai-key'
|
| 161 |
}):
|
| 162 |
with patch('rag_agent_api.retrieval.AsyncQdrantClient') as mock_qdrant_client:
|
| 163 |
with patch('rag_agent_api.retrieval.cohere.Client') as mock_cohere_client:
|
| 164 |
+
with patch('rag_agent_api.agent.AsyncOpenAI') as mock_openai:
|
| 165 |
# Mock the Qdrant client
|
| 166 |
mock_qdrant_instance = Mock()
|
| 167 |
mock_qdrant_client.return_value = mock_qdrant_instance
|
|
|
|
| 172 |
mock_cohere_client.return_value = mock_cohere_instance
|
| 173 |
mock_cohere_instance.embed.return_value = Mock(embeddings=[[0.1, 0.2, 0.3]])
|
| 174 |
|
| 175 |
+
# Mock the OpenAI client
|
| 176 |
+
mock_openai_instance = Mock()
|
| 177 |
+
mock_openai.return_value = mock_openai_instance
|
| 178 |
mock_completion = Mock()
|
| 179 |
+
mock_completion.choices = [Mock()]
|
| 180 |
+
mock_completion.choices[0].message = Mock()
|
| 181 |
+
mock_completion.choices[0].message.content = "This is a test response"
|
| 182 |
+
mock_openai_instance.chat.completions.create = AsyncMock(return_value=mock_completion)
|
|
|
|
|
|
|
|
|
|
| 183 |
|
| 184 |
# Initialize components
|
| 185 |
test_retriever = QdrantRetriever(collection_name="test_collection")
|
| 186 |
+
test_agent = OpenAIAgent(model_name="gpt-4-test")
|
| 187 |
|
| 188 |
# Mock the retrieval result
|
| 189 |
mock_chunk = SourceChunkSchema(
|