sanilahmed2019 commited on
Commit
ce4595c
·
1 Parent(s): 2ade705

Update backend logic

Browse files
.env CHANGED
@@ -4,7 +4,7 @@ QDRANT_API_KEY=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhY2Nlc3MiOiJtIn0.BDBAtGf7
4
  REACT_APP_RAG_API_URL=http://localhost:8000
5
  # RAG Agent and API Layer Environment Variables
6
 
7
- # OpenAI API Configuration
8
  OPENROUTER_API_KEY=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
9
 
10
  # Qdrant Configuration
@@ -13,7 +13,7 @@ QDRANT_API_KEY=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhY2Nlc3MiOiJtIn0.BDBAtGf7
13
  QDRANT_COLLECTION_NAME=rag_embedding
14
 
15
  # Cohere Configuration (for query embeddings)
16
- COHERE_API_KEY=Dq2dLJlwDOZwAg4K7XalSEC91kXnucGd52KmkJh7
17
 
18
  # Application Configuration
19
  DEFAULT_CONTEXT_WINDOW=5
 
4
  REACT_APP_RAG_API_URL=http://localhost:8000
5
  # RAG Agent and API Layer Environment Variables
6
 
7
+ # OpenRouter API Configuration
8
  OPENROUTER_API_KEY=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
9
 
10
  # Qdrant Configuration
 
13
  QDRANT_COLLECTION_NAME=rag_embedding
14
 
15
  # Cohere Configuration (for query embeddings)
16
+ COHERE_API_KEY=RGfPBR6t5Ev2VXgIA00o5XcHiuXYkyCVL8TjkSZs
17
 
18
  # Application Configuration
19
  DEFAULT_CONTEXT_WINDOW=5
.env.example CHANGED
@@ -1,14 +1,14 @@
1
  # RAG Agent and API Layer Environment Variables
2
 
3
  # OpenRouter API Configuration
4
- OPENROUTER_API_KEY=your-openrouter-api-key-here
5
  # Qdrant Configuration
6
  QDRANT_URL=https://72888a6e-0dfc-4620-bf85-0b9025951e0c.us-east4-0.gcp.cloud.qdrant.io:6333
7
  QDRANT_API_KEY=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhY2Nlc3MiOiJtIn0.BDBAtGf7x_XGCu3lO4-kNxgJeVgnSTKUjHeZBT6qJkQ
8
  QDRANT_COLLECTION_NAME=rag_embedding
9
  REACT_APP_RAG_API_URL=http://localhost:8000
10
  # Cohere Configuration (for query embeddings)
11
- COHERE_API_KEY=Dq2dLJlwDOZwAg4K7XalSEC91kXnucGd52KmkJh7
12
 
13
  # Application Configuration
14
  DEFAULT_CONTEXT_WINDOW=5
 
1
  # RAG Agent and API Layer Environment Variables
2
 
3
  # OpenRouter API Configuration
4
+ OPENROUTER_API_KEY=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
5
  # Qdrant Configuration
6
  QDRANT_URL=https://72888a6e-0dfc-4620-bf85-0b9025951e0c.us-east4-0.gcp.cloud.qdrant.io:6333
7
  QDRANT_API_KEY=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhY2Nlc3MiOiJtIn0.BDBAtGf7x_XGCu3lO4-kNxgJeVgnSTKUjHeZBT6qJkQ
8
  QDRANT_COLLECTION_NAME=rag_embedding
9
  REACT_APP_RAG_API_URL=http://localhost:8000
10
  # Cohere Configuration (for query embeddings)
11
+ COHERE_API_KEY=RGfPBR6t5Ev2VXgIA00o5XcHiuXYkyCVL8TjkSZs
12
 
13
  # Application Configuration
14
  DEFAULT_CONTEXT_WINDOW=5
README.md CHANGED
@@ -1,32 +1,57 @@
1
- ---
2
- title: Backend Deploy
3
- emoji: 🚀
4
- colorFrom: blue
5
- colorTo: purple
6
- sdk: docker
7
- pinned: false
8
- ---
9
-
10
- # RAG Agent and API Layer
11
-
12
- This is a FastAPI application that provides a question-answering API using Gemini agents and Qdrant retrieval for RAG (Retrieval Augmented Generation) functionality.
13
-
14
- ## API Endpoints
15
-
16
- - `GET /` - Root endpoint with API information
17
- - `POST /ask` - Main question-answering endpoint
18
- - `GET /health` - Health check endpoint
19
- - `GET /ready` - Readiness check endpoint
20
- - `/docs` - API documentation (Swagger UI)
21
- - `/redoc` - API documentation (Redoc)
22
-
23
- ## Configuration
24
-
25
- The application requires the following environment variables:
26
- - `GEMINI_API_KEY` - API key for Google Gemini
27
- - `QDRANT_URL` - URL for Qdrant vector database
28
- - `QDRANT_API_KEY` - API key for Qdrant database
29
-
30
- ## Deployment
31
-
32
- This application is configured for deployment on Hugging Face Spaces using Docker.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Book Content Ingestor & RAG Verification
2
+
3
+ A system to extract content from Docusaurus-based book websites, chunk and embed it using Cohere, store embeddings in Qdrant Cloud for RAG applications, and verify the retrieval pipeline functionality.
4
+
5
+ ## Setup
6
+
7
+ 1. Install dependencies using uv:
8
+ ```bash
9
+ cd backend
10
+ uv sync
11
+ ```
12
+
13
+ 2. Create a `.env` file with your API keys:
14
+ ```bash
15
+ cp .env.example .env
16
+ # Edit .env with your actual API keys
17
+ ```
18
+
19
+ ## Environment Variables
20
+
21
+ - `COHERE_API_KEY`: Your Cohere API key
22
+ - `QDRANT_URL`: Your Qdrant Cloud URL
23
+ - `QDRANT_API_KEY`: Your Qdrant API key
24
+ - `QDRANT_COLLECTION_NAME`: Name of the collection to use (default: "rag_embedding")
25
+
26
+ ## Usage
27
+
28
+ ### Run the ingestion pipeline:
29
+ ```bash
30
+ cd backend
31
+ uv run python main.py
32
+ ```
33
+
34
+ This will:
35
+ 1. Collect all URLs from the target book (https://sanilahmed.github.io/hackathon-ai-book/)
36
+ 2. Extract text content from each URL
37
+ 3. Chunk the content into fixed-size segments
38
+ 4. Generate embeddings using Cohere
39
+ 5. Store embeddings with metadata in Qdrant Cloud collection named "rag_embedding"
40
+
41
+ ### Run the verification pipeline:
42
+ ```bash
43
+ cd backend
44
+ python -m verify_retrieval.main
45
+ ```
46
+
47
+ Or with specific options:
48
+ ```bash
49
+ python -m verify_retrieval.main --query "transformer architecture in NLP" --top-k 10
50
+ ```
51
+
52
+ The verification system will:
53
+ 1. Load vectors and metadata stored in Qdrant from the original ingestion
54
+ 2. Implement retrieval functions to query Qdrant using sample keywords or phrases
55
+ 3. Validate that retrieved chunks are accurate and relevant
56
+ 4. Check that metadata (URL, title, chunk_id) matches source content
57
+ 5. Log results and confirm the pipeline executes end-to-end without errors
backend.log CHANGED
@@ -1,14 +1,14 @@
1
- 2025-12-28 03:28:11,862 - root - INFO - OpenRouter agent initialized with model: arcee-ai/trinity-mini:free
2
- 2025-12-28 03:28:11,862 - root - INFO - OpenRouter agent initialized successfully
3
- 2025-12-28 03:28:12,881 - httpx - INFO - HTTP Request: GET https://72888a6e-0dfc-4620-bf85-0b9025951e0c.us-east4-0.gcp.cloud.qdrant.io:6333 "HTTP/1.1 200 OK"
4
- 2025-12-28 03:28:12,935 - root - INFO - Initialized Qdrant retriever for collection: rag_embedding
5
- 2025-12-28 03:28:12,935 - root - INFO - Qdrant retriever initialized successfully
6
- 2025-12-28 03:28:12,935 - root - INFO - Application startup completed
7
- 2025-12-28 03:33:25,861 - root - INFO - Processing query: what about this book?...
8
- 2025-12-28 03:33:25,866 - root - INFO - Step 1: Retrieving relevant content from Qdrant...
9
- 2025-12-28 03:33:25,872 - root - INFO - Retrieving context for query: 'what about this book?' from collection: rag_embedding
10
- 2025-12-28 03:33:27,515 - httpx - INFO - HTTP Request: POST https://api.cohere.com/v1/embed "HTTP/1.1 429 Too Many Requests"
11
- 2025-12-28 03:33:27,529 - root - ERROR - Error embedding query with Cohere: headers: {'access-control-expose-headers': 'X-Debug-Trace-ID', 'cache-control': 'no-cache, no-store, no-transform, must-revalidate, private, max-age=0', 'content-encoding': 'gzip', 'content-type': 'application/json', 'expires': 'Thu, 01 Jan 1970 00:00:00 GMT', 'pragma': 'no-cache', 'vary': 'Origin,Accept-Encoding', 'x-accel-expires': '0', 'x-debug-trace-id': 'd089f3fe358a80aeb61a8713a62bb51e', 'x-trial-endpoint-call-limit': '100', 'x-trial-endpoint-call-remaining': '99', 'date': 'Sat, 27 Dec 2025 22:33:27 GMT', 'x-envoy-upstream-service-time': '22', 'server': 'envoy', 'via': '1.1 google', 'alt-svc': 'h3=":443"; ma=2592000,h3-29=":443"; ma=2592000', 'transfer-encoding': 'chunked'}, status_code: 429, body: {'id': 'd2182ca7-5051-4c05-b9b7-a79e9dbe1312', 'message': "You are using a Trial key, which is limited to 1000 API calls / month. You can continue to use the Trial key for free or upgrade to a Production key with higher rate limits at 'https://dashboard.cohere.com/api-keys'. Contact us on 'https://discord.gg/XW44jPfYJu' or email us at support@cohere.com with any questions"}
12
  Traceback (most recent call last):
13
  File "/mnt/d/Hackathon/book/backend/rag_agent_api/retrieval.py", line 132, in _embed_query
14
  response = await self.cohere_client.embed(
@@ -32,20 +32,20 @@ Traceback (most recent call last):
32
  raise TooManyRequestsError(
33
  ...<8 lines>...
34
  )
35
- cohere.errors.too_many_requests_error.TooManyRequestsError: headers: {'access-control-expose-headers': 'X-Debug-Trace-ID', 'cache-control': 'no-cache, no-store, no-transform, must-revalidate, private, max-age=0', 'content-encoding': 'gzip', 'content-type': 'application/json', 'expires': 'Thu, 01 Jan 1970 00:00:00 GMT', 'pragma': 'no-cache', 'vary': 'Origin,Accept-Encoding', 'x-accel-expires': '0', 'x-debug-trace-id': 'd089f3fe358a80aeb61a8713a62bb51e', 'x-trial-endpoint-call-limit': '100', 'x-trial-endpoint-call-remaining': '99', 'date': 'Sat, 27 Dec 2025 22:33:27 GMT', 'x-envoy-upstream-service-time': '22', 'server': 'envoy', 'via': '1.1 google', 'alt-svc': 'h3=":443"; ma=2592000,h3-29=":443"; ma=2592000', 'transfer-encoding': 'chunked'}, status_code: 429, body: {'id': 'd2182ca7-5051-4c05-b9b7-a79e9dbe1312', 'message': "You are using a Trial key, which is limited to 1000 API calls / month. You can continue to use the Trial key for free or upgrade to a Production key with higher rate limits at 'https://dashboard.cohere.com/api-keys'. Contact us on 'https://discord.gg/XW44jPfYJu' or email us at support@cohere.com with any questions"}
36
- 2025-12-28 03:33:29,957 - root - WARNING - Using zero vector as final fallback for query embedding
37
- 2025-12-28 03:33:32,465 - httpx - INFO - HTTP Request: POST https://72888a6e-0dfc-4620-bf85-0b9025951e0c.us-east4-0.gcp.cloud.qdrant.io:6333/collections/rag_embedding/points/query "HTTP/1.1 200 OK"
38
- 2025-12-28 03:33:32,482 - root - INFO - Retrieved 5 valid chunks from Qdrant
39
- 2025-12-28 03:33:32,482 - root - INFO - Retrieved 5 chunks from Qdrant
40
- 2025-12-28 03:33:32,482 - root - INFO - Step 2: Generating response with OpenAI agent...
41
- 2025-12-28 03:33:40,381 - httpx - INFO - HTTP Request: POST https://openrouter.ai/api/v1/chat/completions "HTTP/1.1 200 OK"
42
- 2025-12-28 03:33:41,893 - root - INFO - Step 3: Formatting response...
43
- 2025-12-28 03:33:41,893 - root - INFO - Query processed successfully, response ID: resp_d77ed446
44
- 2025-12-28 03:48:54,357 - root - INFO - Processing query: What is this book about?...
45
- 2025-12-28 03:48:54,360 - root - INFO - Step 1: Retrieving relevant content from Qdrant...
46
- 2025-12-28 03:48:54,363 - root - INFO - Retrieving context for query: 'What is this book about?' from collection: rag_embedding
47
- 2025-12-28 03:48:55,736 - httpx - INFO - HTTP Request: POST https://api.cohere.com/v1/embed "HTTP/1.1 429 Too Many Requests"
48
- 2025-12-28 03:48:55,750 - root - ERROR - Error embedding query with Cohere: headers: {'access-control-expose-headers': 'X-Debug-Trace-ID', 'cache-control': 'no-cache, no-store, no-transform, must-revalidate, private, max-age=0', 'content-encoding': 'gzip', 'content-type': 'application/json', 'expires': 'Thu, 01 Jan 1970 00:00:00 GMT', 'pragma': 'no-cache', 'vary': 'Origin,Accept-Encoding', 'x-accel-expires': '0', 'x-debug-trace-id': 'b1f3f38920e419721e629c6abc56371b', 'x-trial-endpoint-call-limit': '100', 'x-trial-endpoint-call-remaining': '99', 'date': 'Sat, 27 Dec 2025 22:48:55 GMT', 'x-envoy-upstream-service-time': '15', 'server': 'envoy', 'via': '1.1 google', 'alt-svc': 'h3=":443"; ma=2592000,h3-29=":443"; ma=2592000', 'transfer-encoding': 'chunked'}, status_code: 429, body: {'id': '41a0eefd-af53-4f33-b084-34c94d377f38', 'message': "You are using a Trial key, which is limited to 1000 API calls / month. You can continue to use the Trial key for free or upgrade to a Production key with higher rate limits at 'https://dashboard.cohere.com/api-keys'. Contact us on 'https://discord.gg/XW44jPfYJu' or email us at support@cohere.com with any questions"}
49
  Traceback (most recent call last):
50
  File "/mnt/d/Hackathon/book/backend/rag_agent_api/retrieval.py", line 132, in _embed_query
51
  response = await self.cohere_client.embed(
@@ -69,21 +69,61 @@ Traceback (most recent call last):
69
  raise TooManyRequestsError(
70
  ...<8 lines>...
71
  )
72
- cohere.errors.too_many_requests_error.TooManyRequestsError: headers: {'access-control-expose-headers': 'X-Debug-Trace-ID', 'cache-control': 'no-cache, no-store, no-transform, must-revalidate, private, max-age=0', 'content-encoding': 'gzip', 'content-type': 'application/json', 'expires': 'Thu, 01 Jan 1970 00:00:00 GMT', 'pragma': 'no-cache', 'vary': 'Origin,Accept-Encoding', 'x-accel-expires': '0', 'x-debug-trace-id': 'b1f3f38920e419721e629c6abc56371b', 'x-trial-endpoint-call-limit': '100', 'x-trial-endpoint-call-remaining': '99', 'date': 'Sat, 27 Dec 2025 22:48:55 GMT', 'x-envoy-upstream-service-time': '15', 'server': 'envoy', 'via': '1.1 google', 'alt-svc': 'h3=":443"; ma=2592000,h3-29=":443"; ma=2592000', 'transfer-encoding': 'chunked'}, status_code: 429, body: {'id': '41a0eefd-af53-4f33-b084-34c94d377f38', 'message': "You are using a Trial key, which is limited to 1000 API calls / month. You can continue to use the Trial key for free or upgrade to a Production key with higher rate limits at 'https://dashboard.cohere.com/api-keys'. Contact us on 'https://discord.gg/XW44jPfYJu' or email us at support@cohere.com with any questions"}
73
- 2025-12-28 03:48:55,790 - root - WARNING - Using zero vector as final fallback for query embedding
74
- 2025-12-28 03:48:56,887 - httpx - INFO - HTTP Request: POST https://72888a6e-0dfc-4620-bf85-0b9025951e0c.us-east4-0.gcp.cloud.qdrant.io:6333/collections/rag_embedding/points/query "HTTP/1.1 200 OK"
75
- 2025-12-28 03:48:56,897 - root - INFO - Retrieved 5 valid chunks from Qdrant
76
- 2025-12-28 03:48:56,897 - root - INFO - Retrieved 5 chunks from Qdrant
77
- 2025-12-28 03:48:56,897 - root - INFO - Step 2: Generating response with OpenAI agent...
78
- 2025-12-28 03:49:00,669 - httpx - INFO - HTTP Request: POST https://openrouter.ai/api/v1/chat/completions "HTTP/1.1 200 OK"
79
- 2025-12-28 03:49:02,263 - root - INFO - Agent response generated successfully. Confidence: 0.30
80
- 2025-12-28 03:49:02,265 - root - INFO - Step 3: Formatting response...
81
- 2025-12-28 03:49:02,269 - root - INFO - Query processed successfully, response ID: resp_523ca795
82
- 2025-12-28 03:51:03,381 - root - INFO - Processing query: What is this book about?...
83
- 2025-12-28 03:51:03,381 - root - INFO - Step 1: Retrieving relevant content from Qdrant...
84
- 2025-12-28 03:51:03,382 - root - INFO - Retrieving context for query: 'What is this book about?' from collection: rag_embedding
85
- 2025-12-28 03:51:03,863 - httpx - INFO - HTTP Request: POST https://api.cohere.com/v1/embed "HTTP/1.1 429 Too Many Requests"
86
- 2025-12-28 03:51:03,868 - root - ERROR - Error embedding query with Cohere: headers: {'access-control-expose-headers': 'X-Debug-Trace-ID', 'cache-control': 'no-cache, no-store, no-transform, must-revalidate, private, max-age=0', 'content-encoding': 'gzip', 'content-type': 'application/json', 'expires': 'Thu, 01 Jan 1970 00:00:00 GMT', 'pragma': 'no-cache', 'vary': 'Origin,Accept-Encoding', 'x-accel-expires': '0', 'x-debug-trace-id': '19432b16b53a7488ff206de4686f4925', 'x-trial-endpoint-call-limit': '100', 'x-trial-endpoint-call-remaining': '99', 'date': 'Sat, 27 Dec 2025 22:51:03 GMT', 'x-envoy-upstream-service-time': '11', 'server': 'envoy', 'via': '1.1 google', 'alt-svc': 'h3=":443"; ma=2592000,h3-29=":443"; ma=2592000', 'transfer-encoding': 'chunked'}, status_code: 429, body: {'id': '6cc43158-05de-4534-8bb1-dfe337b29d9e', 'message': "You are using a Trial key, which is limited to 1000 API calls / month. You can continue to use the Trial key for free or upgrade to a Production key with higher rate limits at 'https://dashboard.cohere.com/api-keys'. Contact us on 'https://discord.gg/XW44jPfYJu' or email us at support@cohere.com with any questions"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
  Traceback (most recent call last):
88
  File "/mnt/d/Hackathon/book/backend/rag_agent_api/retrieval.py", line 132, in _embed_query
89
  response = await self.cohere_client.embed(
@@ -103,54 +143,163 @@ Traceback (most recent call last):
103
  ...<7 lines>...
104
  )
105
  ^
106
- File "/home/sobiafatima/miniconda3/lib/python3.13/site-packages/cohere/raw_base_client.py", line 4637, in embed
107
- raise TooManyRequestsError(
108
- ...<8 lines>...
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
  )
110
- cohere.errors.too_many_requests_error.TooManyRequestsError: headers: {'access-control-expose-headers': 'X-Debug-Trace-ID', 'cache-control': 'no-cache, no-store, no-transform, must-revalidate, private, max-age=0', 'content-encoding': 'gzip', 'content-type': 'application/json', 'expires': 'Thu, 01 Jan 1970 00:00:00 GMT', 'pragma': 'no-cache', 'vary': 'Origin,Accept-Encoding', 'x-accel-expires': '0', 'x-debug-trace-id': '19432b16b53a7488ff206de4686f4925', 'x-trial-endpoint-call-limit': '100', 'x-trial-endpoint-call-remaining': '99', 'date': 'Sat, 27 Dec 2025 22:51:03 GMT', 'x-envoy-upstream-service-time': '11', 'server': 'envoy', 'via': '1.1 google', 'alt-svc': 'h3=":443"; ma=2592000,h3-29=":443"; ma=2592000', 'transfer-encoding': 'chunked'}, status_code: 429, body: {'id': '6cc43158-05de-4534-8bb1-dfe337b29d9e', 'message': "You are using a Trial key, which is limited to 1000 API calls / month. You can continue to use the Trial key for free or upgrade to a Production key with higher rate limits at 'https://dashboard.cohere.com/api-keys'. Contact us on 'https://discord.gg/XW44jPfYJu' or email us at support@cohere.com with any questions"}
111
- 2025-12-28 03:51:03,882 - root - WARNING - Using zero vector as final fallback for query embedding
112
- 2025-12-28 03:51:05,002 - httpx - INFO - HTTP Request: POST https://72888a6e-0dfc-4620-bf85-0b9025951e0c.us-east4-0.gcp.cloud.qdrant.io:6333/collections/rag_embedding/points/query "HTTP/1.1 200 OK"
113
- 2025-12-28 03:51:05,009 - root - INFO - Retrieved 5 valid chunks from Qdrant
114
- 2025-12-28 03:51:05,009 - root - INFO - Retrieved 5 chunks from Qdrant
115
- 2025-12-28 03:51:05,010 - root - INFO - Step 2: Generating response with OpenAI agent...
116
- 2025-12-28 03:51:08,526 - httpx - INFO - HTTP Request: POST https://openrouter.ai/api/v1/chat/completions "HTTP/1.1 200 OK"
117
- 2025-12-28 03:51:09,866 - root - INFO - Agent response generated successfully. Confidence: 0.30
118
- 2025-12-28 03:51:09,866 - root - INFO - Step 3: Formatting response...
119
- 2025-12-28 03:51:09,869 - root - INFO - Query processed successfully, response ID: resp_b7ce931e
120
- 2026-01-01 15:42:34,257 - root - INFO - Processing query: what about this book??...
121
- 2026-01-01 15:42:34,275 - root - INFO - Step 1: Retrieving relevant content from Qdrant...
122
- 2026-01-01 15:42:34,279 - root - INFO - Retrieving context for query: 'what about this book??' from collection: rag_embedding
123
- 2026-01-01 15:42:36,042 - httpx - INFO - HTTP Request: POST https://api.cohere.com/v1/embed "HTTP/1.1 429 Too Many Requests"
124
- 2026-01-01 15:42:36,079 - root - ERROR - Error embedding query with Cohere: headers: {'access-control-expose-headers': 'X-Debug-Trace-ID', 'cache-control': 'no-cache, no-store, no-transform, must-revalidate, private, max-age=0', 'content-encoding': 'gzip', 'content-type': 'application/json', 'expires': 'Thu, 01 Jan 1970 00:00:00 GMT', 'pragma': 'no-cache', 'vary': 'Origin,Accept-Encoding', 'x-accel-expires': '0', 'x-debug-trace-id': '8021dc50cfd962fdc707cc6726dff6b3', 'date': 'Thu, 01 Jan 2026 10:42:34 GMT', 'x-envoy-upstream-service-time': '27', 'server': 'envoy', 'via': '1.1 google', 'alt-svc': 'h3=":443"; ma=2592000,h3-29=":443"; ma=2592000', 'transfer-encoding': 'chunked'}, status_code: 429, body: {'id': '542f6133-103b-4cf2-a9a2-3778972e6290', 'message': 'Please wait and try again later'}
 
 
 
125
  Traceback (most recent call last):
126
- File "/mnt/d/Hackathon/book/backend/rag_agent_api/retrieval.py", line 132, in _embed_query
127
- response = await self.cohere_client.embed(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
128
  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 
 
 
 
 
129
  ...<3 lines>...
130
  )
131
  ^
132
- File "/home/sobiafatima/miniconda3/lib/python3.13/site-packages/cohere/client.py", line 402, in embed
133
- await asyncio.gather(
134
- ^^^^^^^^^^^^^^^^^^^^^
135
- ...<12 lines>...
136
- ),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
137
  ^
138
- File "/home/sobiafatima/miniconda3/lib/python3.13/site-packages/cohere/base_client.py", line 2598, in embed
139
- _response = await self._raw_client.embed(
140
- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
141
- ...<7 lines>...
142
  )
143
  ^
144
- File "/home/sobiafatima/miniconda3/lib/python3.13/site-packages/cohere/raw_base_client.py", line 4637, in embed
145
- raise TooManyRequestsError(
146
- ...<8 lines>...
 
147
  )
148
- cohere.errors.too_many_requests_error.TooManyRequestsError: headers: {'access-control-expose-headers': 'X-Debug-Trace-ID', 'cache-control': 'no-cache, no-store, no-transform, must-revalidate, private, max-age=0', 'content-encoding': 'gzip', 'content-type': 'application/json', 'expires': 'Thu, 01 Jan 1970 00:00:00 GMT', 'pragma': 'no-cache', 'vary': 'Origin,Accept-Encoding', 'x-accel-expires': '0', 'x-debug-trace-id': '8021dc50cfd962fdc707cc6726dff6b3', 'date': 'Thu, 01 Jan 2026 10:42:34 GMT', 'x-envoy-upstream-service-time': '27', 'server': 'envoy', 'via': '1.1 google', 'alt-svc': 'h3=":443"; ma=2592000,h3-29=":443"; ma=2592000', 'transfer-encoding': 'chunked'}, status_code: 429, body: {'id': '542f6133-103b-4cf2-a9a2-3778972e6290', 'message': 'Please wait and try again later'}
149
- 2026-01-01 15:42:36,199 - root - WARNING - Using zero vector as final fallback for query embedding
150
- 2026-01-01 15:42:37,468 - httpx - INFO - HTTP Request: POST https://72888a6e-0dfc-4620-bf85-0b9025951e0c.us-east4-0.gcp.cloud.qdrant.io:6333/collections/rag_embedding/points/query "HTTP/1.1 200 OK"
151
- 2026-01-01 15:42:37,506 - root - INFO - Retrieved 5 valid chunks from Qdrant
152
- 2026-01-01 15:42:37,506 - root - INFO - Retrieved 5 chunks from Qdrant
153
- 2026-01-01 15:42:37,507 - root - INFO - Step 2: Generating response with OpenAI agent...
154
- 2026-01-01 15:42:40,636 - httpx - INFO - HTTP Request: POST https://openrouter.ai/api/v1/chat/completions "HTTP/1.1 200 OK"
155
- 2026-01-01 15:42:42,440 - root - INFO - Step 3: Formatting response...
156
- 2026-01-01 15:42:42,443 - root - INFO - Query processed successfully, response ID: resp_159653f4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2026-01-02 21:51:07,979 - root - INFO - OpenRouter agent initialized with model: arcee-ai/trinity-mini:free
2
+ 2026-01-02 21:51:07,980 - root - INFO - OpenRouter agent initialized successfully
3
+ 2026-01-02 21:51:09,509 - httpx - INFO - HTTP Request: GET https://72888a6e-0dfc-4620-bf85-0b9025951e0c.us-east4-0.gcp.cloud.qdrant.io:6333 "HTTP/1.1 200 OK"
4
+ 2026-01-02 21:51:09,616 - root - INFO - Initialized Qdrant retriever for collection: rag_embedding
5
+ 2026-01-02 21:51:09,616 - root - INFO - Qdrant retriever initialized successfully
6
+ 2026-01-02 21:51:09,616 - root - INFO - Application startup completed
7
+ 2026-01-02 21:56:18,858 - root - INFO - Processing query: what about this book?...
8
+ 2026-01-02 21:56:18,858 - root - INFO - Step 1: Retrieving relevant content from Qdrant...
9
+ 2026-01-02 21:56:18,858 - root - INFO - Retrieving context for query: 'what about this book?' from collection: rag_embedding
10
+ 2026-01-02 21:56:20,085 - httpx - INFO - HTTP Request: POST https://api.cohere.com/v1/embed "HTTP/1.1 429 Too Many Requests"
11
+ 2026-01-02 21:56:20,158 - root - ERROR - Error embedding query with Cohere: headers: {'access-control-expose-headers': 'X-Debug-Trace-ID', 'cache-control': 'no-cache, no-store, no-transform, must-revalidate, private, max-age=0', 'content-encoding': 'gzip', 'content-type': 'application/json', 'expires': 'Thu, 01 Jan 1970 00:00:00 GMT', 'pragma': 'no-cache', 'vary': 'Origin,Accept-Encoding', 'x-accel-expires': '0', 'x-debug-trace-id': 'a074d2b0b8f1166420f46cc0e91c3ef8', 'date': 'Fri, 02 Jan 2026 16:56:15 GMT', 'x-envoy-upstream-service-time': '16', 'server': 'envoy', 'via': '1.1 google', 'alt-svc': 'h3=":443"; ma=2592000,h3-29=":443"; ma=2592000', 'transfer-encoding': 'chunked'}, status_code: 429, body: {'id': '0d36b9be-f4cc-4559-b824-e673736abec0', 'message': 'Please wait and try again later'}
12
  Traceback (most recent call last):
13
  File "/mnt/d/Hackathon/book/backend/rag_agent_api/retrieval.py", line 132, in _embed_query
14
  response = await self.cohere_client.embed(
 
32
  raise TooManyRequestsError(
33
  ...<8 lines>...
34
  )
35
+ cohere.errors.too_many_requests_error.TooManyRequestsError: headers: {'access-control-expose-headers': 'X-Debug-Trace-ID', 'cache-control': 'no-cache, no-store, no-transform, must-revalidate, private, max-age=0', 'content-encoding': 'gzip', 'content-type': 'application/json', 'expires': 'Thu, 01 Jan 1970 00:00:00 GMT', 'pragma': 'no-cache', 'vary': 'Origin,Accept-Encoding', 'x-accel-expires': '0', 'x-debug-trace-id': 'a074d2b0b8f1166420f46cc0e91c3ef8', 'date': 'Fri, 02 Jan 2026 16:56:15 GMT', 'x-envoy-upstream-service-time': '16', 'server': 'envoy', 'via': '1.1 google', 'alt-svc': 'h3=":443"; ma=2592000,h3-29=":443"; ma=2592000', 'transfer-encoding': 'chunked'}, status_code: 429, body: {'id': '0d36b9be-f4cc-4559-b824-e673736abec0', 'message': 'Please wait and try again later'}
36
+ 2026-01-02 21:56:21,542 - root - WARNING - Using zero vector as final fallback for query embedding
37
+ 2026-01-02 21:56:23,990 - httpx - INFO - HTTP Request: POST https://72888a6e-0dfc-4620-bf85-0b9025951e0c.us-east4-0.gcp.cloud.qdrant.io:6333/collections/rag_embedding/points/query "HTTP/1.1 200 OK"
38
+ 2026-01-02 21:56:24,063 - root - INFO - Retrieved 5 valid chunks from Qdrant
39
+ 2026-01-02 21:56:24,063 - root - INFO - Retrieved 5 chunks from Qdrant
40
+ 2026-01-02 21:56:24,063 - root - INFO - Step 2: Generating response with OpenAI agent...
41
+ 2026-01-02 21:56:27,063 - httpx - INFO - HTTP Request: POST https://openrouter.ai/api/v1/chat/completions "HTTP/1.1 200 OK"
42
+ 2026-01-02 21:56:28,191 - root - INFO - Step 3: Formatting response...
43
+ 2026-01-02 21:56:28,191 - root - INFO - Query processed successfully, response ID: resp_12b8d406
44
+ 2026-01-02 22:18:31,661 - root - INFO - Processing query: what about this book?...
45
+ 2026-01-02 22:18:31,672 - root - INFO - Step 1: Retrieving relevant content from Qdrant...
46
+ 2026-01-02 22:18:31,679 - root - INFO - Retrieving context for query: 'what about this book?' from collection: rag_embedding
47
+ 2026-01-02 22:18:32,663 - httpx - INFO - HTTP Request: POST https://api.cohere.com/v1/embed "HTTP/1.1 429 Too Many Requests"
48
+ 2026-01-02 22:18:32,681 - root - ERROR - Error embedding query with Cohere: headers: {'access-control-expose-headers': 'X-Debug-Trace-ID', 'cache-control': 'no-cache, no-store, no-transform, must-revalidate, private, max-age=0', 'content-encoding': 'gzip', 'content-type': 'application/json', 'expires': 'Thu, 01 Jan 1970 00:00:00 GMT', 'pragma': 'no-cache', 'vary': 'Origin,Accept-Encoding', 'x-accel-expires': '0', 'x-debug-trace-id': '16258d9e56f535c3a9cda7da3a75bc2d', 'date': 'Fri, 02 Jan 2026 17:18:28 GMT', 'x-envoy-upstream-service-time': '13', 'server': 'envoy', 'via': '1.1 google', 'alt-svc': 'h3=":443"; ma=2592000,h3-29=":443"; ma=2592000', 'transfer-encoding': 'chunked'}, status_code: 429, body: {'id': '69b64fea-d70d-43f9-a1d9-9fc56b940914', 'message': 'Please wait and try again later'}
49
  Traceback (most recent call last):
50
  File "/mnt/d/Hackathon/book/backend/rag_agent_api/retrieval.py", line 132, in _embed_query
51
  response = await self.cohere_client.embed(
 
69
  raise TooManyRequestsError(
70
  ...<8 lines>...
71
  )
72
+ cohere.errors.too_many_requests_error.TooManyRequestsError: headers: {'access-control-expose-headers': 'X-Debug-Trace-ID', 'cache-control': 'no-cache, no-store, no-transform, must-revalidate, private, max-age=0', 'content-encoding': 'gzip', 'content-type': 'application/json', 'expires': 'Thu, 01 Jan 1970 00:00:00 GMT', 'pragma': 'no-cache', 'vary': 'Origin,Accept-Encoding', 'x-accel-expires': '0', 'x-debug-trace-id': '16258d9e56f535c3a9cda7da3a75bc2d', 'date': 'Fri, 02 Jan 2026 17:18:28 GMT', 'x-envoy-upstream-service-time': '13', 'server': 'envoy', 'via': '1.1 google', 'alt-svc': 'h3=":443"; ma=2592000,h3-29=":443"; ma=2592000', 'transfer-encoding': 'chunked'}, status_code: 429, body: {'id': '69b64fea-d70d-43f9-a1d9-9fc56b940914', 'message': 'Please wait and try again later'}
73
+ 2026-01-02 22:18:32,704 - root - WARNING - Using zero vector as final fallback for query embedding
74
+ 2026-01-02 22:18:34,063 - httpx - INFO - HTTP Request: POST https://72888a6e-0dfc-4620-bf85-0b9025951e0c.us-east4-0.gcp.cloud.qdrant.io:6333/collections/rag_embedding/points/query "HTTP/1.1 200 OK"
75
+ 2026-01-02 22:18:34,095 - root - INFO - Retrieved 5 valid chunks from Qdrant
76
+ 2026-01-02 22:18:34,097 - root - INFO - Retrieved 5 chunks from Qdrant
77
+ 2026-01-02 22:18:34,098 - root - INFO - Step 2: Generating response with OpenAI agent...
78
+ 2026-01-02 22:18:38,176 - httpx - INFO - HTTP Request: POST https://openrouter.ai/api/v1/chat/completions "HTTP/1.1 200 OK"
79
+ 2026-01-02 22:18:40,245 - root - INFO - Agent response generated successfully. Confidence: 0.30
80
+ 2026-01-02 22:18:40,245 - root - INFO - Step 3: Formatting response...
81
+ 2026-01-02 22:18:40,246 - root - INFO - Query processed successfully, response ID: resp_c32d1dbe
82
+ 2026-01-02 22:20:37,532 - root - INFO - Processing query: what about this book?...
83
+ 2026-01-02 22:20:37,533 - root - INFO - Step 1: Retrieving relevant content from Qdrant...
84
+ 2026-01-02 22:20:37,533 - root - INFO - Retrieving context for query: 'what about this book?' from collection: rag_embedding
85
+ 2026-01-02 22:20:47,620 - root - ERROR - Error embedding query with Cohere: [Errno -3] Temporary failure in name resolution
86
+ Traceback (most recent call last):
87
+ File "/home/sobiafatima/miniconda3/lib/python3.13/site-packages/httpx/_transports/default.py", line 101, in map_httpcore_exceptions
88
+ yield
89
+ File "/home/sobiafatima/miniconda3/lib/python3.13/site-packages/httpx/_transports/default.py", line 394, in handle_async_request
90
+ resp = await self._pool.handle_async_request(req)
91
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
92
+ File "/home/sobiafatima/miniconda3/lib/python3.13/site-packages/httpcore/_async/connection_pool.py", line 256, in handle_async_request
93
+ raise exc from None
94
+ File "/home/sobiafatima/miniconda3/lib/python3.13/site-packages/httpcore/_async/connection_pool.py", line 236, in handle_async_request
95
+ response = await connection.handle_async_request(
96
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
97
+ pool_request.request
98
+ ^^^^^^^^^^^^^^^^^^^^
99
+ )
100
+ ^
101
+ File "/home/sobiafatima/miniconda3/lib/python3.13/site-packages/httpcore/_async/connection.py", line 101, in handle_async_request
102
+ raise exc
103
+ File "/home/sobiafatima/miniconda3/lib/python3.13/site-packages/httpcore/_async/connection.py", line 78, in handle_async_request
104
+ stream = await self._connect(request)
105
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
106
+ File "/home/sobiafatima/miniconda3/lib/python3.13/site-packages/httpcore/_async/connection.py", line 124, in _connect
107
+ stream = await self._network_backend.connect_tcp(**kwargs)
108
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
109
+ File "/home/sobiafatima/miniconda3/lib/python3.13/site-packages/httpcore/_backends/auto.py", line 31, in connect_tcp
110
+ return await self._backend.connect_tcp(
111
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
112
+ ...<5 lines>...
113
+ )
114
+ ^
115
+ File "/home/sobiafatima/miniconda3/lib/python3.13/site-packages/httpcore/_backends/anyio.py", line 113, in connect_tcp
116
+ with map_exceptions(exc_map):
117
+ ~~~~~~~~~~~~~~^^^^^^^^^
118
+ File "/home/sobiafatima/miniconda3/lib/python3.13/contextlib.py", line 162, in __exit__
119
+ self.gen.throw(value)
120
+ ~~~~~~~~~~~~~~^^^^^^^
121
+ File "/home/sobiafatima/miniconda3/lib/python3.13/site-packages/httpcore/_exceptions.py", line 14, in map_exceptions
122
+ raise to_exc(exc) from exc
123
+ httpcore.ConnectError: [Errno -3] Temporary failure in name resolution
124
+
125
+ The above exception was the direct cause of the following exception:
126
+
127
  Traceback (most recent call last):
128
  File "/mnt/d/Hackathon/book/backend/rag_agent_api/retrieval.py", line 132, in _embed_query
129
  response = await self.cohere_client.embed(
 
143
  ...<7 lines>...
144
  )
145
  ^
146
+ File "/home/sobiafatima/miniconda3/lib/python3.13/site-packages/cohere/raw_base_client.py", line 4554, in embed
147
+ _response = await self._client_wrapper.httpx_client.request(
148
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
149
+ ...<15 lines>...
150
+ )
151
+ ^
152
+ File "/home/sobiafatima/miniconda3/lib/python3.13/site-packages/cohere/core/http_client.py", line 412, in request
153
+ response = await self.httpx_client.request(
154
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
155
+ ...<33 lines>...
156
+ )
157
+ ^
158
+ File "/home/sobiafatima/miniconda3/lib/python3.13/site-packages/httpx/_client.py", line 1540, in request
159
+ return await self.send(request, auth=auth, follow_redirects=follow_redirects)
160
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
161
+ File "/home/sobiafatima/miniconda3/lib/python3.13/site-packages/httpx/_client.py", line 1629, in send
162
+ response = await self._send_handling_auth(
163
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
164
+ ...<4 lines>...
165
+ )
166
+ ^
167
+ File "/home/sobiafatima/miniconda3/lib/python3.13/site-packages/httpx/_client.py", line 1657, in _send_handling_auth
168
+ response = await self._send_handling_redirects(
169
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
170
+ ...<3 lines>...
171
  )
172
+ ^
173
+ File "/home/sobiafatima/miniconda3/lib/python3.13/site-packages/httpx/_client.py", line 1694, in _send_handling_redirects
174
+ response = await self._send_single_request(request)
175
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
176
+ File "/home/sobiafatima/miniconda3/lib/python3.13/site-packages/httpx/_client.py", line 1730, in _send_single_request
177
+ response = await transport.handle_async_request(request)
178
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
179
+ File "/home/sobiafatima/miniconda3/lib/python3.13/site-packages/httpx/_transports/default.py", line 393, in handle_async_request
180
+ with map_httpcore_exceptions():
181
+ ~~~~~~~~~~~~~~~~~~~~~~~^^
182
+ File "/home/sobiafatima/miniconda3/lib/python3.13/contextlib.py", line 162, in __exit__
183
+ self.gen.throw(value)
184
+ ~~~~~~~~~~~~~~^^^^^^^
185
+ File "/home/sobiafatima/miniconda3/lib/python3.13/site-packages/httpx/_transports/default.py", line 118, in map_httpcore_exceptions
186
+ raise mapped_exc(message) from exc
187
+ httpx.ConnectError: [Errno -3] Temporary failure in name resolution
188
+ 2026-01-02 22:20:48,168 - root - WARNING - Using zero vector as final fallback for query embedding
189
+ 2026-01-02 22:20:58,240 - root - ERROR - Error retrieving context from Qdrant: [Errno -3] Temporary failure in name resolution
190
  Traceback (most recent call last):
191
+ File "/home/sobiafatima/miniconda3/lib/python3.13/site-packages/httpx/_transports/default.py", line 101, in map_httpcore_exceptions
192
+ yield
193
+ File "/home/sobiafatima/miniconda3/lib/python3.13/site-packages/httpx/_transports/default.py", line 394, in handle_async_request
194
+ resp = await self._pool.handle_async_request(req)
195
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
196
+ File "/home/sobiafatima/miniconda3/lib/python3.13/site-packages/httpcore/_async/connection_pool.py", line 256, in handle_async_request
197
+ raise exc from None
198
+ File "/home/sobiafatima/miniconda3/lib/python3.13/site-packages/httpcore/_async/connection_pool.py", line 236, in handle_async_request
199
+ response = await connection.handle_async_request(
200
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
201
+ pool_request.request
202
+ ^^^^^^^^^^^^^^^^^^^^
203
+ )
204
+ ^
205
+ File "/home/sobiafatima/miniconda3/lib/python3.13/site-packages/httpcore/_async/connection.py", line 101, in handle_async_request
206
+ raise exc
207
+ File "/home/sobiafatima/miniconda3/lib/python3.13/site-packages/httpcore/_async/connection.py", line 78, in handle_async_request
208
+ stream = await self._connect(request)
209
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
210
+ File "/home/sobiafatima/miniconda3/lib/python3.13/site-packages/httpcore/_async/connection.py", line 124, in _connect
211
+ stream = await self._network_backend.connect_tcp(**kwargs)
212
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
213
+ File "/home/sobiafatima/miniconda3/lib/python3.13/site-packages/httpcore/_backends/auto.py", line 31, in connect_tcp
214
+ return await self._backend.connect_tcp(
215
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
216
+ ...<5 lines>...
217
+ )
218
+ ^
219
+ File "/home/sobiafatima/miniconda3/lib/python3.13/site-packages/httpcore/_backends/anyio.py", line 113, in connect_tcp
220
+ with map_exceptions(exc_map):
221
+ ~~~~~~~~~~~~~~^^^^^^^^^
222
+ File "/home/sobiafatima/miniconda3/lib/python3.13/contextlib.py", line 162, in __exit__
223
+ self.gen.throw(value)
224
+ ~~~~~~~~~~~~~~^^^^^^^
225
+ File "/home/sobiafatima/miniconda3/lib/python3.13/site-packages/httpcore/_exceptions.py", line 14, in map_exceptions
226
+ raise to_exc(exc) from exc
227
+ httpcore.ConnectError: [Errno -3] Temporary failure in name resolution
228
+
229
+ The above exception was the direct cause of the following exception:
230
+
231
+ Traceback (most recent call last):
232
+ File "/home/sobiafatima/miniconda3/lib/python3.13/site-packages/qdrant_client/http/api_client.py", line 223, in send_inner
233
+ response = await self._async_client.send(request)
234
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
235
+ File "/home/sobiafatima/miniconda3/lib/python3.13/site-packages/httpx/_client.py", line 1629, in send
236
+ response = await self._send_handling_auth(
237
  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
238
+ ...<4 lines>...
239
+ )
240
+ ^
241
+ File "/home/sobiafatima/miniconda3/lib/python3.13/site-packages/httpx/_client.py", line 1657, in _send_handling_auth
242
+ response = await self._send_handling_redirects(
243
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
244
  ...<3 lines>...
245
  )
246
  ^
247
+ File "/home/sobiafatima/miniconda3/lib/python3.13/site-packages/httpx/_client.py", line 1694, in _send_handling_redirects
248
+ response = await self._send_single_request(request)
249
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
250
+ File "/home/sobiafatima/miniconda3/lib/python3.13/site-packages/httpx/_client.py", line 1730, in _send_single_request
251
+ response = await transport.handle_async_request(request)
252
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
253
+ File "/home/sobiafatima/miniconda3/lib/python3.13/site-packages/httpx/_transports/default.py", line 393, in handle_async_request
254
+ with map_httpcore_exceptions():
255
+ ~~~~~~~~~~~~~~~~~~~~~~~^^
256
+ File "/home/sobiafatima/miniconda3/lib/python3.13/contextlib.py", line 162, in __exit__
257
+ self.gen.throw(value)
258
+ ~~~~~~~~~~~~~~^^^^^^^
259
+ File "/home/sobiafatima/miniconda3/lib/python3.13/site-packages/httpx/_transports/default.py", line 118, in map_httpcore_exceptions
260
+ raise mapped_exc(message) from exc
261
+ httpx.ConnectError: [Errno -3] Temporary failure in name resolution
262
+
263
+ During handling of the above exception, another exception occurred:
264
+
265
+ Traceback (most recent call last):
266
+ File "/mnt/d/Hackathon/book/backend/rag_agent_api/retrieval.py", line 80, in retrieve_context
267
+ search_results = await self.client.query_points(
268
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
269
+ ...<5 lines>...
270
+ )
271
  ^
272
+ File "/home/sobiafatima/miniconda3/lib/python3.13/site-packages/qdrant_client/async_qdrant_client.py", line 400, in query_points
273
+ return await self._client.query_points(
274
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
275
+ ...<16 lines>...
276
  )
277
  ^
278
+ File "/home/sobiafatima/miniconda3/lib/python3.13/site-packages/qdrant_client/async_qdrant_remote.py", line 461, in query_points
279
+ query_result = await self.http.search_api.query_points(
280
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
281
+ ...<4 lines>...
282
  )
283
+ ^
284
+ File "/home/sobiafatima/miniconda3/lib/python3.13/site-packages/qdrant_client/http/api/search_api.py", line 560, in query_points
285
+ return await self._build_for_query_points(
286
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
287
+ ...<4 lines>...
288
+ )
289
+ ^
290
+ File "/home/sobiafatima/miniconda3/lib/python3.13/site-packages/qdrant_client/http/api_client.py", line 184, in request
291
+ return await self.send(request, type_)
292
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
293
+ File "/home/sobiafatima/miniconda3/lib/python3.13/site-packages/qdrant_client/http/api_client.py", line 201, in send
294
+ response = await self.middleware(request, self.send_inner)
295
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
296
+ File "/home/sobiafatima/miniconda3/lib/python3.13/site-packages/qdrant_client/http/api_client.py", line 245, in __call__
297
+ return await call_next(request)
298
+ ^^^^^^^^^^^^^^^^^^^^^^^^
299
+ File "/home/sobiafatima/miniconda3/lib/python3.13/site-packages/qdrant_client/http/api_client.py", line 225, in send_inner
300
+ raise ResponseHandlingException(e)
301
+ qdrant_client.http.exceptions.ResponseHandlingException: [Errno -3] Temporary failure in name resolution
302
+ 2026-01-02 22:20:58,441 - root - INFO - Retrieved 0 chunks from Qdrant
303
+ 2026-01-02 22:20:58,441 - root - INFO - Step 2: Generating response with OpenAI agent...
304
+ 2026-01-02 22:20:58,441 - root - INFO - Step 3: Formatting response...
305
+ 2026-01-02 22:20:58,441 - root - INFO - Query processed successfully, response ID: resp_ab31a354
book_ingestor.egg-info/PKG-INFO CHANGED
@@ -14,35 +14,60 @@ Requires-Dist: uvicorn>=0.24.0
14
  Requires-Dist: openai>=1.0.0
15
  Requires-Dist: pydantic>=2.0.0
16
 
17
- ---
18
- title: Backend Deploy
19
- emoji: 🚀
20
- colorFrom: blue
21
- colorTo: purple
22
- sdk: docker
23
- pinned: false
24
- ---
25
 
26
- # RAG Agent and API Layer
27
 
28
- This is a FastAPI application that provides a question-answering API using Gemini agents and Qdrant retrieval for RAG (Retrieval Augmented Generation) functionality.
29
 
30
- ## API Endpoints
 
 
 
 
31
 
32
- - `GET /` - Root endpoint with API information
33
- - `POST /ask` - Main question-answering endpoint
34
- - `GET /health` - Health check endpoint
35
- - `GET /ready` - Readiness check endpoint
36
- - `/docs` - API documentation (Swagger UI)
37
- - `/redoc` - API documentation (Redoc)
38
 
39
- ## Configuration
40
 
41
- The application requires the following environment variables:
42
- - `GEMINI_API_KEY` - API key for Google Gemini
43
- - `QDRANT_URL` - URL for Qdrant vector database
44
- - `QDRANT_API_KEY` - API key for Qdrant database
45
 
46
- ## Deployment
47
 
48
- This application is configured for deployment on Hugging Face Spaces using Docker.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  Requires-Dist: openai>=1.0.0
15
  Requires-Dist: pydantic>=2.0.0
16
 
17
+ # Book Content Ingestor & RAG Verification
 
 
 
 
 
 
 
18
 
19
+ A system to extract content from Docusaurus-based book websites, chunk and embed it using Cohere, store embeddings in Qdrant Cloud for RAG applications, and verify the retrieval pipeline functionality.
20
 
21
+ ## Setup
22
 
23
+ 1. Install dependencies using uv:
24
+ ```bash
25
+ cd backend
26
+ uv sync
27
+ ```
28
 
29
+ 2. Create a `.env` file with your API keys:
30
+ ```bash
31
+ cp .env.example .env
32
+ # Edit .env with your actual API keys
33
+ ```
 
34
 
35
+ ## Environment Variables
36
 
37
+ - `COHERE_API_KEY`: Your Cohere API key
38
+ - `QDRANT_URL`: Your Qdrant Cloud URL
39
+ - `QDRANT_API_KEY`: Your Qdrant API key
40
+ - `QDRANT_COLLECTION_NAME`: Name of the collection to use (default: "rag_embedding")
41
 
42
+ ## Usage
43
 
44
+ ### Run the ingestion pipeline:
45
+ ```bash
46
+ cd backend
47
+ uv run python main.py
48
+ ```
49
+
50
+ This will:
51
+ 1. Collect all URLs from the target book (https://sanilahmed.github.io/hackathon-ai-book/)
52
+ 2. Extract text content from each URL
53
+ 3. Chunk the content into fixed-size segments
54
+ 4. Generate embeddings using Cohere
55
+ 5. Store embeddings with metadata in Qdrant Cloud collection named "rag_embedding"
56
+
57
+ ### Run the verification pipeline:
58
+ ```bash
59
+ cd backend
60
+ python -m verify_retrieval.main
61
+ ```
62
+
63
+ Or with specific options:
64
+ ```bash
65
+ python -m verify_retrieval.main --query "transformer architecture in NLP" --top-k 10
66
+ ```
67
+
68
+ The verification system will:
69
+ 1. Load vectors and metadata stored in Qdrant from the original ingestion
70
+ 2. Implement retrieval functions to query Qdrant using sample keywords or phrases
71
+ 3. Validate that retrieved chunks are accurate and relevant
72
+ 4. Check that metadata (URL, title, chunk_id) matches source content
73
+ 5. Log results and confirm the pipeline executes end-to-end without errors
check_qdrant.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Script to check if Qdrant collection exists and has data.
4
+ """
5
+ import os
6
+ from qdrant_client import QdrantClient
7
+ from dotenv import load_dotenv
8
+
9
+ # Load environment variables
10
+ load_dotenv()
11
+
12
+ # Get environment variables
13
+ qdrant_url = os.getenv('QDRANT_URL')
14
+ qdrant_api_key = os.getenv('QDRANT_API_KEY')
15
+
16
+ if not qdrant_url or not qdrant_api_key:
17
+ print("Error: QDRANT_URL or QDRANT_API_KEY not found in environment variables")
18
+ exit(1)
19
+
20
+ # Initialize Qdrant client
21
+ client = QdrantClient(
22
+ url=qdrant_url,
23
+ api_key=qdrant_api_key,
24
+ timeout=30
25
+ )
26
+
27
+ try:
28
+ # List all collections
29
+ collections = client.get_collections()
30
+ print("Available collections:")
31
+ for collection in collections.collections:
32
+ # For newer Qdrant versions, get the collection info to get point count
33
+ collection_info = client.get_collection(collection.name)
34
+ print(f" - {collection.name} (points: {collection_info.points_count})")
35
+
36
+ # Check specifically for the rag_embedding collection
37
+ try:
38
+ collection_info = client.get_collection("rag_embedding")
39
+ print(f"\nCollection 'rag_embedding' exists with {collection_info.points_count} points")
40
+
41
+ if collection_info.points_count > 0:
42
+ # Get a sample point to verify data exists
43
+ points = client.scroll(
44
+ collection_name="rag_embedding",
45
+ limit=1
46
+ )
47
+ if len(points[0]) > 0:
48
+ sample_point = points[0][0]
49
+ print(f"Sample point ID: {sample_point.id}")
50
+ print(f"Sample point payload keys: {list(sample_point.payload.keys())}")
51
+ print(f"Sample text preview: {sample_point.payload.get('text', '')[:100]}...")
52
+ else:
53
+ print("Collection 'rag_embedding' exists but is empty")
54
+
55
+ except Exception as e:
56
+ print(f"\nCollection 'rag_embedding' does not exist: {e}")
57
+
58
+ except Exception as e:
59
+ print(f"Error connecting to Qdrant: {e}")
rag_agent_api/README.md CHANGED
@@ -1,17 +1,17 @@
1
  # RAG Agent and API Layer
2
 
3
- A FastAPI-based question-answering system that uses OpenRouter Agents and Qdrant retrieval to generate grounded responses based on book content.
4
 
5
  ## Overview
6
 
7
- The RAG Agent and API Layer provides a question-answering API that retrieves relevant content from Qdrant and uses an OpenRouter agent to generate accurate, source-grounded responses. The system ensures that all answers are based only on the provided context to prevent hallucinations.
8
 
9
  ## Architecture
10
 
11
  The system consists of several key components:
12
 
13
  - **FastAPI Application**: Main entry point for the question-answering API
14
- - **OpenRouter Agent**: Generates responses based on retrieved context
15
  - **Qdrant Retriever**: Retrieves relevant content chunks from Qdrant database
16
  - **Configuration Manager**: Handles environment variables and settings
17
  - **Data Models**: Pydantic models for API requests/responses
@@ -22,7 +22,7 @@ The system consists of several key components:
22
  ### Prerequisites
23
 
24
  - Python 3.9+
25
- - OpenRouter API key
26
  - Qdrant Cloud instance with book content embeddings
27
  - Cohere API key (for query embeddings)
28
 
@@ -42,7 +42,7 @@ The system consists of several key components:
42
 
43
  3. Edit `.env` with your API keys and configuration:
44
  ```env
45
- OPENROUTER_API_KEY=your-openrouter-api-key-here
46
  QDRANT_URL=your-qdrant-instance-url
47
  QDRANT_API_KEY=your-qdrant-api-key
48
  QDRANT_COLLECTION_NAME=rag_embedding
@@ -103,7 +103,7 @@ Root endpoint with API information.
103
 
104
  ### Environment Variables
105
 
106
- - `OPENROUTER_API_KEY`: Your OpenRouter API key
107
  - `QDRANT_URL`: URL of your Qdrant instance
108
  - `QDRANT_API_KEY`: Your Qdrant API key
109
  - `QDRANT_COLLECTION_NAME`: Name of the collection with book embeddings (default: `rag_embedding`)
@@ -123,8 +123,8 @@ Pydantic models for API request/response schemas.
123
  ### Schemas (`schemas.py`)
124
  Additional schemas for internal data structures.
125
 
126
- ### Agent (`openrouter_agent.py`)
127
- OpenRouter agent implementation with context injection and response validation.
128
 
129
  ### Retrieval (`retrieval.py`)
130
  Qdrant integration for content retrieval with semantic search.
@@ -160,7 +160,7 @@ pytest
160
 
161
  # Run specific test files
162
  pytest tests/test_api.py
163
- pytest tests/test_openrouter_agent.py
164
  pytest tests/test_retrieval.py
165
  ```
166
 
 
1
  # RAG Agent and API Layer
2
 
3
+ A FastAPI-based question-answering system that uses OpenAI Agents and Qdrant retrieval to generate grounded responses based on book content.
4
 
5
  ## Overview
6
 
7
+ The RAG Agent and API Layer provides a question-answering API that retrieves relevant content from Qdrant and uses an OpenAI agent to generate accurate, source-grounded responses. The system ensures that all answers are based only on the provided context to prevent hallucinations.
8
 
9
  ## Architecture
10
 
11
  The system consists of several key components:
12
 
13
  - **FastAPI Application**: Main entry point for the question-answering API
14
+ - **OpenAI Agent**: Generates responses based on retrieved context
15
  - **Qdrant Retriever**: Retrieves relevant content chunks from Qdrant database
16
  - **Configuration Manager**: Handles environment variables and settings
17
  - **Data Models**: Pydantic models for API requests/responses
 
22
  ### Prerequisites
23
 
24
  - Python 3.9+
25
+ - OpenAI API key
26
  - Qdrant Cloud instance with book content embeddings
27
  - Cohere API key (for query embeddings)
28
 
 
42
 
43
  3. Edit `.env` with your API keys and configuration:
44
  ```env
45
+ OPENAI_API_KEY=your-openai-api-key-here
46
  QDRANT_URL=your-qdrant-instance-url
47
  QDRANT_API_KEY=your-qdrant-api-key
48
  QDRANT_COLLECTION_NAME=rag_embedding
 
103
 
104
  ### Environment Variables
105
 
106
+ - `OPENAI_API_KEY`: Your OpenAI API key
107
  - `QDRANT_URL`: URL of your Qdrant instance
108
  - `QDRANT_API_KEY`: Your Qdrant API key
109
  - `QDRANT_COLLECTION_NAME`: Name of the collection with book embeddings (default: `rag_embedding`)
 
123
  ### Schemas (`schemas.py`)
124
  Additional schemas for internal data structures.
125
 
126
+ ### Agent (`agent.py`)
127
+ OpenAI agent implementation with context injection and response validation.
128
 
129
  ### Retrieval (`retrieval.py`)
130
  Qdrant integration for content retrieval with semantic search.
 
160
 
161
  # Run specific test files
162
  pytest tests/test_api.py
163
+ pytest tests/test_agent.py
164
  pytest tests/test_retrieval.py
165
  ```
166
 
rag_agent_api/__init__.py CHANGED
@@ -10,7 +10,7 @@ __license__ = "MIT"
10
  # Import main components for easy access
11
  from .main import app
12
  from .config import Config, get_config, validate_config
13
- from .openrouter_agent import OpenRouterAgent
14
  from .retrieval import QdrantRetriever
15
 
16
  # Define what gets imported with "from rag_agent_api import *"
@@ -19,6 +19,6 @@ __all__ = [
19
  "Config",
20
  "get_config",
21
  "validate_config",
22
- "OpenRouterAgent",
23
  "QdrantRetriever"
24
  ]
 
10
  # Import main components for easy access
11
  from .main import app
12
  from .config import Config, get_config, validate_config
13
+ from .agent import GeminiAgent
14
  from .retrieval import QdrantRetriever
15
 
16
  # Define what gets imported with "from rag_agent_api import *"
 
19
  "Config",
20
  "get_config",
21
  "validate_config",
22
+ "GeminiAgent",
23
  "QdrantRetriever"
24
  ]
rag_agent_api/__pycache__/__init__.cpython-313.pyc CHANGED
Binary files a/rag_agent_api/__pycache__/__init__.cpython-313.pyc and b/rag_agent_api/__pycache__/__init__.cpython-313.pyc differ
 
rag_agent_api/__pycache__/agent.cpython-313.pyc CHANGED
Binary files a/rag_agent_api/__pycache__/agent.cpython-313.pyc and b/rag_agent_api/__pycache__/agent.cpython-313.pyc differ
 
rag_agent_api/__pycache__/config.cpython-313.pyc CHANGED
Binary files a/rag_agent_api/__pycache__/config.cpython-313.pyc and b/rag_agent_api/__pycache__/config.cpython-313.pyc differ
 
rag_agent_api/__pycache__/main.cpython-313.pyc CHANGED
Binary files a/rag_agent_api/__pycache__/main.cpython-313.pyc and b/rag_agent_api/__pycache__/main.cpython-313.pyc differ
 
rag_agent_api/__pycache__/openrouter_agent.cpython-313.pyc CHANGED
Binary files a/rag_agent_api/__pycache__/openrouter_agent.cpython-313.pyc and b/rag_agent_api/__pycache__/openrouter_agent.cpython-313.pyc differ
 
rag_agent_api/__pycache__/retrieval.cpython-313.pyc CHANGED
Binary files a/rag_agent_api/__pycache__/retrieval.cpython-313.pyc and b/rag_agent_api/__pycache__/retrieval.cpython-313.pyc differ
 
rag_agent_api/agent.py ADDED
@@ -0,0 +1,363 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Google Gemini Agent module for the RAG Agent and API Layer system.
3
+
4
+ This module provides functionality for creating and managing a Google Gemini agent
5
+ that generates responses based on retrieved context.
6
+ """
7
+ import asyncio
8
+ import logging
9
+ from typing import List, Dict, Any, Optional
10
+ import google.generativeai as genai
11
+ from .config import get_config
12
+ from .schemas import AgentContext, AgentResponse, SourceChunkSchema
13
+ from .utils import format_confidence_score
14
+
15
+
16
+ class GeminiAgent:
17
+ """
18
+ A class to manage the Google Gemini agent for generating responses based on context.
19
+ """
20
+ def __init__(self, model_name: str = "gemini-2.5-flash"):
21
+ """
22
+ Initialize the Google Gemini agent with configuration.
23
+
24
+ Args:
25
+ model_name: Name of the Gemini model to use (default: gemini-2.5-flash)
26
+ """
27
+ config = get_config()
28
+ api_key = config.gemini_api_key
29
+
30
+ if not api_key:
31
+ raise ValueError("GEMINI_API_KEY environment variable not set")
32
+
33
+ # Configure the Gemini client
34
+ genai.configure(api_key=api_key)
35
+
36
+ # Create the generative model instance
37
+ self.model = genai.GenerativeModel(model_name)
38
+ self.model_name = model_name
39
+ self.default_temperature = config.default_temperature
40
+
41
+ logging.info(f"Gemini agent initialized with model: {model_name}")
42
+
43
+ async def generate_response(self, context: AgentContext) -> AgentResponse:
44
+ """
45
+ Generate a response based on the provided context.
46
+
47
+ Args:
48
+ context: AgentContext containing the query and retrieved context chunks
49
+
50
+ Returns:
51
+ AgentResponse with the generated answer and metadata
52
+ """
53
+ # Check if retrieved context is empty (no chunks at all)
54
+ if not context.retrieved_chunks:
55
+ return AgentResponse(
56
+ raw_response="I could not find this information in the book.",
57
+ used_sources=[],
58
+ confidence_score=0.0,
59
+ is_valid=True,
60
+ validation_details="No context chunks retrieved from the database",
61
+ unsupported_claims=[]
62
+ )
63
+
64
+ # Check if context is insufficient (very short content)
65
+ total_context_length = sum(len(chunk.content) for chunk in context.retrieved_chunks)
66
+ if total_context_length < 10: # Much lower threshold, but still meaningful
67
+ return AgentResponse(
68
+ raw_response="I could not find this information in the book.",
69
+ used_sources=[],
70
+ confidence_score=0.0,
71
+ is_valid=True,
72
+ validation_details="No sufficient context provided to answer the question",
73
+ unsupported_claims=[]
74
+ )
75
+
76
+ try:
77
+ # Prepare the system message with instructions for grounding responses
78
+ system_message = self._create_system_message(context)
79
+
80
+ # Prepare the user message with the query
81
+ user_message = self._create_user_message(context)
82
+
83
+ # For Google Gemini, we need to format the prompt differently
84
+ # Combine system instructions and user query
85
+ full_prompt = f"{system_message}\n\n{user_message}"
86
+
87
+ # Generate response from Google Gemini
88
+ # For async generation, we need to use the appropriate async method
89
+ chat = self.model.start_chat()
90
+ response = await chat.send_message_async(
91
+ full_prompt,
92
+ generation_config={
93
+ "temperature": context.source_policy if hasattr(context, 'temperature') else self.default_temperature,
94
+ "max_output_tokens": 1000
95
+ }
96
+ )
97
+
98
+ # Extract the response text
99
+ raw_response = response.text if response and hasattr(response, 'text') else str(response)
100
+
101
+ # If the response indicates no information was found, return the exact message
102
+ if "I could not find this information in the book" in raw_response:
103
+ return AgentResponse(
104
+ raw_response="I could not find this information in the book.",
105
+ used_sources=[],
106
+ confidence_score=0.0,
107
+ is_valid=True,
108
+ validation_details="No relevant information found in the provided context",
109
+ unsupported_claims=[]
110
+ )
111
+
112
+ # Determine which sources were used (this is a simplified approach)
113
+ used_sources = self._identify_used_sources(raw_response, context.retrieved_chunks)
114
+
115
+ # Calculate confidence score (based on similarity scores of used sources)
116
+ confidence_score = self._calculate_confidence_score(used_sources, context.retrieved_chunks)
117
+
118
+ # Validate that the response is grounded in the provided context
119
+ grounding_validation = self._validate_response_grounding(
120
+ raw_response, context.retrieved_chunks, context.query
121
+ )
122
+
123
+ # Create and return the agent response
124
+ agent_response = AgentResponse(
125
+ raw_response=raw_response,
126
+ used_sources=used_sources,
127
+ confidence_score=confidence_score,
128
+ is_valid=grounding_validation["is_valid"],
129
+ validation_details=grounding_validation["details"],
130
+ unsupported_claims=grounding_validation["unsupported_claims"]
131
+ )
132
+
133
+ logging.info(f"Agent response generated successfully. Confidence: {confidence_score:.2f}")
134
+ return agent_response
135
+
136
+ except Exception as e:
137
+ logging.error(f"Error generating response from Google Gemini agent: {e}", exc_info=True)
138
+ # Return the specific message when there's an error
139
+ return AgentResponse(
140
+ raw_response="I could not find this information in the book.",
141
+ used_sources=[],
142
+ confidence_score=0.0,
143
+ is_valid=False,
144
+ validation_details=f"Error generating response: {str(e)}",
145
+ unsupported_claims=[]
146
+ )
147
+
148
+ def _create_system_message(self, context: AgentContext) -> str:
149
+ """
150
+ Create the system message that instructs the agent on how to behave.
151
+
152
+ Args:
153
+ context: AgentContext containing the query and retrieved context chunks
154
+
155
+ Returns:
156
+ Formatted system message string
157
+ """
158
+ system_prompt = """You are a documentation-based assistant.
159
+ Answer ONLY using the provided context from the book
160
+ "Physical AI & Humanoid Robotics".
161
+ If the answer is not found, reply EXACTLY:
162
+ "I could not find this information in the book."""
163
+ return system_prompt
164
+
165
+ def _create_user_message(self, context: AgentContext) -> str:
166
+ """
167
+ Create the user message containing the query.
168
+
169
+ Args:
170
+ context: AgentContext containing the query and retrieved context chunks
171
+
172
+ Returns:
173
+ Formatted user message string
174
+ """
175
+ return f"""CONTEXT:
176
+ {self._format_context_chunks(context.retrieved_chunks)}
177
+
178
+ QUESTION:
179
+ {context.query}"""
180
+
181
+ def _format_context_chunks(self, chunks: List[SourceChunkSchema]) -> str:
182
+ """
183
+ Format the context chunks for the prompt.
184
+
185
+ Args:
186
+ chunks: List of source chunks to format
187
+
188
+ Returns:
189
+ Formatted context string
190
+ """
191
+ if not chunks:
192
+ return ""
193
+
194
+ formatted_chunks = []
195
+ for i, chunk in enumerate(chunks):
196
+ formatted_chunks.append(f"[Chunk {i+1}]\n{chunk.content}\n[/Chunk {i+1}]")
197
+
198
+ return "\n".join(formatted_chunks)
199
+
200
+ def _create_context_messages(self, context: AgentContext) -> List[Dict[str, str]]:
201
+ """
202
+ Create context messages from the retrieved chunks.
203
+ With the new format, context is now provided in the user message,
204
+ so this method returns an empty list to avoid duplication.
205
+
206
+ Args:
207
+ context: AgentContext containing the query and retrieved context chunks
208
+
209
+ Returns:
210
+ Empty list since context is now in user message
211
+ """
212
+ return []
213
+
214
+ def _identify_used_sources(self, response: str, chunks: List[SourceChunkSchema]) -> List[str]:
215
+ """
216
+ Identify which sources were likely used in the response.
217
+ This is a simplified approach - in a real implementation, you might use
218
+ more sophisticated techniques like semantic similarity.
219
+
220
+ Args:
221
+ response: The agent's response text
222
+ chunks: List of source chunks that were provided to the agent
223
+
224
+ Returns:
225
+ List of source IDs that were likely used
226
+ """
227
+ used_sources = []
228
+ response_lower = response.lower()
229
+
230
+ for chunk in chunks:
231
+ # Check if any significant words from the chunk appear in the response
232
+ content_words = set(chunk.content.lower().split()[:20]) # Check first 20 words
233
+ response_words = set(response_lower.split())
234
+
235
+ # If there's significant overlap, consider this chunk as used
236
+ overlap = content_words.intersection(response_words)
237
+ if len(overlap) > 2: # Arbitrary threshold
238
+ used_sources.append(chunk.id)
239
+
240
+ # If no sources were identified, return all sources (conservative approach)
241
+ if not used_sources:
242
+ used_sources = [chunk.id for chunk in chunks]
243
+
244
+ return used_sources
245
+
246
+ def _calculate_confidence_score(self, used_sources: List[str], chunks: List[SourceChunkSchema]) -> float:
247
+ """
248
+ Calculate a confidence score based on the quality of the used sources.
249
+
250
+ Args:
251
+ used_sources: List of source IDs that were used
252
+ chunks: List of all source chunks that were provided to the agent
253
+
254
+ Returns:
255
+ Confidence score between 0.0 and 1.0
256
+ """
257
+ if not used_sources:
258
+ return 0.1 # Low confidence if no sources were used
259
+
260
+ # Calculate average similarity score of used sources
261
+ total_similarity = 0.0
262
+ used_count = 0
263
+
264
+ for chunk in chunks:
265
+ if chunk.id in used_sources:
266
+ total_similarity += chunk.similarity_score
267
+ used_count += 1
268
+
269
+ if used_count == 0:
270
+ return 0.1 # Low confidence if no matching chunks found
271
+
272
+ avg_similarity = total_similarity / used_count
273
+
274
+ # If similarity scores are very low (e.g., due to embedding issues),
275
+ # but we have content, still provide some confidence
276
+ if avg_similarity < 0.1 and len(used_sources) > 0:
277
+ # If we have relevant content but low similarity scores,
278
+ # it might be due to embedding issues, not lack of relevance
279
+ # So we'll set a minimum confidence if content exists
280
+ return 0.3 # Low but not zero confidence
281
+ else:
282
+ # Normalize the confidence score (adjust based on your requirements)
283
+ # Higher similarity scores contribute to higher confidence
284
+ confidence = avg_similarity
285
+
286
+ return format_confidence_score(confidence)
287
+
288
+ def _validate_response_grounding(self, response: str, chunks: List[SourceChunkSchema], query: str) -> Dict[str, Any]:
289
+ """
290
+ Validate that the response is grounded in the provided context.
291
+
292
+ Args:
293
+ response: The agent's response text
294
+ chunks: List of source chunks that were provided to the agent
295
+ query: The original query
296
+
297
+ Returns:
298
+ Dictionary with validation results
299
+ """
300
+ # Check if the response contains elements from the provided context
301
+ response_lower = response.lower()
302
+ context_text = " ".join([chunk.content.lower() for chunk in chunks])
303
+
304
+ # Simple heuristic: check if response contains significant terms from context
305
+ response_words = set(response_lower.split())
306
+ context_words = set(context_text.split())
307
+
308
+ # Calculate overlap between response and context
309
+ overlap = response_words.intersection(context_words)
310
+ total_response_words = len(response_words)
311
+ overlap_count = len(overlap)
312
+
313
+ # If less than 30% of response words come from context, flag as potentially ungrounded
314
+ is_grounded = True
315
+ unsupported_claims = []
316
+
317
+ if total_response_words > 0:
318
+ grounding_ratio = overlap_count / total_response_words
319
+ is_grounded = grounding_ratio >= 0.3 # At least 30% of words should come from context
320
+
321
+ # For now, we'll just return the basic validation
322
+ # In a more sophisticated implementation, you'd analyze the response more deeply
323
+ details = f"Response grounding validation completed. Context overlap ratio: {overlap_count/total_response_words if total_response_words > 0 else 0:.2f}"
324
+
325
+ return {
326
+ "is_valid": is_grounded,
327
+ "details": details,
328
+ "unsupported_claims": unsupported_claims
329
+ }
330
+
331
+ async def validate_response_quality(self, response: str, context: AgentContext) -> bool:
332
+ """
333
+ Validate the quality of the agent's response.
334
+
335
+ Args:
336
+ response: The agent's response text
337
+ context: AgentContext containing the query and retrieved context chunks
338
+
339
+ Returns:
340
+ True if response meets quality standards, False otherwise
341
+ """
342
+ # Check for common signs of poor quality responses
343
+ if not response or response.strip() == "":
344
+ logging.warning("Agent returned an empty response")
345
+ return False
346
+
347
+ # Check if response contains generic fallback phrases
348
+ lower_response = response.lower()
349
+ if "i don't know" in lower_response or "i don't have" in lower_response:
350
+ # This might be a valid response if there's no relevant context
351
+ if len(context.retrieved_chunks) == 0:
352
+ return True # Valid response if no context was provided
353
+ else:
354
+ # Check if the response is justified given the context
355
+ # For now, we'll consider it valid if it acknowledges the lack of relevant information
356
+ return True
357
+
358
+ # In a more sophisticated implementation, you'd validate against the context more rigorously
359
+ return True
360
+
361
+
362
+ # Global agent instance (if needed)
363
+ # agent_instance = OpenAIAgent()
rag_agent_api/config.py CHANGED
@@ -19,7 +19,6 @@ class Config:
19
 
20
  def __init__(self):
21
  """Initialize configuration by loading environment variables."""
22
- self.openai_api_key = os.getenv('OPENAI_API_KEY')
23
  self.cohere_api_key = os.getenv('COHERE_API_KEY')
24
  self.openrouter_api_key = os.getenv('OPENROUTER_API_KEY')
25
  self.qdrant_url = os.getenv('QDRANT_URL')
 
19
 
20
  def __init__(self):
21
  """Initialize configuration by loading environment variables."""
 
22
  self.cohere_api_key = os.getenv('COHERE_API_KEY')
23
  self.openrouter_api_key = os.getenv('OPENROUTER_API_KEY')
24
  self.qdrant_url = os.getenv('QDRANT_URL')
rag_agent_api/main.py CHANGED
@@ -82,22 +82,22 @@ async def health_check() -> HealthResponse:
82
  HealthResponse with status of services
83
  """
84
  # Check if all required components are initialized
85
- openrouter_status = "up" if agent else "down"
86
  qdrant_status = "up" if retriever else "down"
87
  agent_status = "up" if agent else "down"
88
 
89
  # Determine overall status
90
  overall_status = "healthy"
91
- if openrouter_status == "down" or qdrant_status == "down":
92
  overall_status = "unhealthy"
93
- elif openrouter_status == "degraded" or qdrant_status == "degraded":
94
  overall_status = "degraded"
95
 
96
  return HealthResponse(
97
  status=overall_status,
98
  timestamp=format_timestamp(),
99
  services={
100
- "openrouter": openrouter_status,
101
  "qdrant": qdrant_status,
102
  "agent": agent_status
103
  }
@@ -194,7 +194,7 @@ async def root() -> Dict[str, Any]:
194
  return {
195
  "message": "RAG Agent and API Layer",
196
  "version": "1.0.0",
197
- "description": "Question-answering API using OpenRouter Agents and Qdrant retrieval",
198
  "endpoints": {
199
  "POST /ask": "Main question-answering endpoint",
200
  "GET /health": "Health check endpoint",
@@ -243,9 +243,4 @@ async def readiness_check() -> Dict[str, str]:
243
  if retriever and agent:
244
  return {"status": "ready"}
245
  else:
246
- raise HTTPException(status_code=503, detail="Service not ready")
247
-
248
-
249
- if __name__ == "__main__":
250
- import uvicorn
251
- uvicorn.run(app, host="0.0.0.0", port=8000)
 
82
  HealthResponse with status of services
83
  """
84
  # Check if all required components are initialized
85
+ gemini_status = "up" if agent else "down"
86
  qdrant_status = "up" if retriever else "down"
87
  agent_status = "up" if agent else "down"
88
 
89
  # Determine overall status
90
  overall_status = "healthy"
91
+ if gemini_status == "down" or qdrant_status == "down":
92
  overall_status = "unhealthy"
93
+ elif gemini_status == "degraded" or qdrant_status == "degraded":
94
  overall_status = "degraded"
95
 
96
  return HealthResponse(
97
  status=overall_status,
98
  timestamp=format_timestamp(),
99
  services={
100
+ "gemini": gemini_status,
101
  "qdrant": qdrant_status,
102
  "agent": agent_status
103
  }
 
194
  return {
195
  "message": "RAG Agent and API Layer",
196
  "version": "1.0.0",
197
+ "description": "Question-answering API using OpenAI Agents and Qdrant retrieval",
198
  "endpoints": {
199
  "POST /ask": "Main question-answering endpoint",
200
  "GET /health": "Health check endpoint",
 
243
  if retriever and agent:
244
  return {"status": "ready"}
245
  else:
246
+ raise HTTPException(status_code=503, detail="Service not ready")
 
 
 
 
 
rag_agent_api/retrieval.py CHANGED
@@ -76,6 +76,16 @@ class QdrantRetriever:
76
  # Embed the query using Cohere
77
  query_embedding = await self._embed_query(query)
78
 
 
 
 
 
 
 
 
 
 
 
79
  # Perform semantic search in Qdrant
80
  search_results = await self.client.query_points(
81
  collection_name=self.collection_name,
@@ -116,53 +126,134 @@ class QdrantRetriever:
116
  # Return empty list instead of raising exception to allow graceful handling
117
  return []
118
 
119
- async def _embed_query(self, query: str) -> List[float]:
120
  """
121
- Embed the query using Cohere to prepare for semantic search.
122
 
123
  Args:
124
- query: The query string to embed
 
125
 
126
  Returns:
127
- List of floats representing the query embedding
128
  """
129
  try:
130
- # Use Cohere to embed the query
131
- # The original book content was likely embedded with Cohere embed-english-v3.0
132
- response = await self.cohere_client.embed(
133
- texts=[query],
134
- model="embed-english-v3.0", # 1024-dimensional embedding model
135
- input_type="search_query" # Specify this is a search query
 
 
 
 
 
136
  )
137
 
138
- # Extract the embedding from the response
139
- embedding = response.embeddings[0] # Get the first (and only) embedding
140
- return embedding
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
141
  except Exception as e:
142
- logging.error(f"Error embedding query with Cohere: {e}", exc_info=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
143
 
144
- # Try using OpenAI embeddings as fallback if available
 
145
  try:
146
- from openai import OpenAI
147
- from .config import get_config
148
- config = get_config()
149
-
150
- if config.openai_api_key:
151
- client = OpenAI(api_key=config.openai_api_key)
152
- response = client.embeddings.create(
153
- input=query,
154
- model="text-embedding-ada-002"
155
- )
156
- embedding = response.data[0].embedding
157
- logging.info("Successfully used OpenAI embedding as fallback")
158
- return embedding
159
- except Exception as openai_error:
160
- logging.warning(f"OpenAI fallback also failed: {openai_error}")
161
-
162
- # If both fail, return a zero vector of the correct size (1024) as a last resort
163
- # This will result in poor semantic matches but won't crash the system
164
- logging.warning("Using zero vector as final fallback for query embedding")
165
- return [0.0] * 1024
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
166
 
167
  def _validate_chunk(self, chunk: SourceChunkSchema) -> bool:
168
  """
 
76
  # Embed the query using Cohere
77
  query_embedding = await self._embed_query(query)
78
 
79
+ # Check if we got a zero vector fallback (indicating embedding service failure)
80
+ is_zero_vector = all(x == 0.0 for x in query_embedding)
81
+
82
+ if is_zero_vector:
83
+ # If we have a zero vector, try a different approach - keyword search
84
+ logging.warning("Zero vector detected, attempting keyword-based fallback search")
85
+ retrieved_chunks = await self._keyword_search_fallback(query, top_k)
86
+ logging.info(f"Keyword fallback search retrieved {len(retrieved_chunks)} chunks from Qdrant")
87
+ return retrieved_chunks
88
+
89
  # Perform semantic search in Qdrant
90
  search_results = await self.client.query_points(
91
  collection_name=self.collection_name,
 
126
  # Return empty list instead of raising exception to allow graceful handling
127
  return []
128
 
129
+ async def _keyword_search_fallback(self, query: str, top_k: int = 5) -> List[SourceChunkSchema]:
130
  """
131
+ Fallback method to search using keyword matching when embedding service is unavailable.
132
 
133
  Args:
134
+ query: The user's query string
135
+ top_k: Number of results to return (default: 5)
136
 
137
  Returns:
138
+ List of SourceChunkSchema objects containing relevant content
139
  """
140
  try:
141
+ # Use Qdrant's full-text search capability or filter-based approach
142
+ # For now, we'll use a scroll + filter approach to find relevant chunks
143
+ from qdrant_client.http import models
144
+
145
+ # Simple approach: get all points and filter based on keyword matching
146
+ # In a production system, you'd want to use Qdrant's text indexing capabilities
147
+ all_points = await self.client.scroll(
148
+ collection_name=self.collection_name,
149
+ limit=10000, # Get up to 10000 points (or as many as exist)
150
+ with_payload=True,
151
+ with_vectors=False
152
  )
153
 
154
+ # Extract points from the result (structure may vary depending on Qdrant client version)
155
+ points = all_points[0] if isinstance(all_points, tuple) else all_points
156
+
157
+ # Score points based on keyword matching
158
+ scored_chunks = []
159
+ query_lower = query.lower()
160
+ query_words = set(query_lower.split())
161
+
162
+ for point in points:
163
+ payload = point.payload if hasattr(point, 'payload') else point
164
+ content = payload.get('text', '') if isinstance(payload, dict) else getattr(payload, 'text', '')
165
+ content_lower = content.lower()
166
+
167
+ # Calculate a simple keyword match score
168
+ content_words = set(content_lower.split())
169
+ overlap = query_words.intersection(content_words)
170
+ score = len(overlap) / len(query_words) if query_words else 0 # Jaccard similarity
171
+
172
+ if score > 0 or query_lower in content_lower: # Only include if there's some match
173
+ chunk = SourceChunkSchema(
174
+ id=point.id if hasattr(point, 'id') else getattr(point, 'point_id', None),
175
+ url=payload.get('url', '') if isinstance(payload, dict) else getattr(payload, 'url', ''),
176
+ title=payload.get('title', '') if isinstance(payload, dict) else getattr(payload, 'title', ''),
177
+ content=content,
178
+ similarity_score=score,
179
+ chunk_index=payload.get('chunk_index', 0) if isinstance(payload, dict) else getattr(payload, 'chunk_index', 0)
180
+ )
181
+
182
+ if self._validate_chunk(chunk):
183
+ scored_chunks.append((chunk, score))
184
+
185
+ # Sort by score and return top_k
186
+ scored_chunks.sort(key=lambda x: x[1], reverse=True)
187
+ top_chunks = [chunk for chunk, score in scored_chunks[:top_k]]
188
+
189
+ return top_chunks
190
+
191
  except Exception as e:
192
+ logging.error(f"Error in keyword fallback search: {e}", exc_info=True)
193
+ return []
194
+
195
+ async def _embed_query(self, query: str) -> List[float]:
196
+ """
197
+ Embed the query using Cohere to prepare for semantic search with retry logic for rate limits.
198
+
199
+ Args:
200
+ query: The query string to embed
201
+
202
+ Returns:
203
+ List of floats representing the query embedding
204
+ """
205
+ import time
206
+ import random
207
+ from cohere.errors.too_many_requests_error import TooManyRequestsError
208
 
209
+ # Try Cohere with retry logic for rate limits
210
+ for attempt in range(3): # Try up to 3 times
211
  try:
212
+ # Use Cohere to embed the query
213
+ # The original book content was likely embedded with Cohere embed-english-v3.0
214
+ response = await self.cohere_client.embed(
215
+ texts=[query],
216
+ model="embed-english-v3.0", # 1024-dimensional embedding model
217
+ input_type="search_query" # Specify this is a search query
218
+ )
219
+
220
+ # Extract the embedding from the response
221
+ embedding = response.embeddings[0] # Get the first (and only) embedding
222
+ return embedding
223
+ except TooManyRequestsError as e:
224
+ if attempt < 2: # Don't wait after the last attempt
225
+ # Exponential backoff with jitter
226
+ wait_time = (2 ** attempt) + random.uniform(0, 1)
227
+ logging.warning(f"Cohere rate limited (attempt {attempt + 1}), waiting {wait_time:.2f}s: {e}")
228
+ await asyncio.sleep(wait_time)
229
+ else:
230
+ logging.error(f"Cohere rate limited after {attempt + 1} attempts: {e}")
231
+ except Exception as e:
232
+ logging.error(f"Error embedding query with Cohere: {e}", exc_info=True)
233
+ break # Don't retry for other types of errors
234
+
235
+ # If Cohere fails, try using OpenAI embeddings as fallback if available
236
+ try:
237
+ from openai import OpenAI
238
+ from .config import get_config
239
+ config = get_config()
240
+
241
+ if config.openai_api_key:
242
+ client = OpenAI(api_key=config.openai_api_key)
243
+ response = client.embeddings.create(
244
+ input=query,
245
+ model="text-embedding-ada-002"
246
+ )
247
+ embedding = response.data[0].embedding
248
+ logging.info("Successfully used OpenAI embedding as fallback")
249
+ return embedding
250
+ except Exception as openai_error:
251
+ logging.warning(f"OpenAI fallback also failed: {openai_error}")
252
+
253
+ # If all fail, return a zero vector of the correct size (1024) as a last resort
254
+ # This will result in poor semantic matches but won't crash the system
255
+ logging.warning("Using zero vector as final fallback for query embedding")
256
+ return [0.0] * 1024
257
 
258
  def _validate_chunk(self, chunk: SourceChunkSchema) -> bool:
259
  """
requirements.txt CHANGED
@@ -1,12 +1,10 @@
1
- # Backend Service Dependencies
2
- requests>=2.31.0
3
- beautifulsoup4>=4.12.0
4
- cohere>=4.9.0
5
- qdrant-client>=1.7.0
6
  python-dotenv>=1.0.0
7
- fastapi>=0.104.0
8
- uvicorn>=0.24.0
9
- openai>=1.0.0
10
- pydantic>=2.0.0
11
- numpy>=1.21.0
12
- httpx>=0.27.0
 
1
+ fastapi>=0.104.1
2
+ uvicorn[standard]>=0.24.0
3
+ qdrant-client>=1.8.0
 
 
4
  python-dotenv>=1.0.0
5
+ httpx>=0.25.0
6
+ cohere>=4.9.0
7
+ google-generativeai>=0.4.0
8
+ openai>=1.6.0
9
+ pydantic>=2.5.0
10
+ typing-extensions>=4.8.0
test_retrieval.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Test script to directly test the Qdrant retrieval functionality
4
+ """
5
+ import asyncio
6
+ import os
7
+ from dotenv import load_dotenv
8
+ from rag_agent_api.retrieval import QdrantRetriever
9
+ from rag_agent_api.config import get_config
10
+
11
+ # Load environment variables
12
+ load_dotenv()
13
+
14
+ async def test_retrieval():
15
+ print("Testing Qdrant retrieval functionality...")
16
+
17
+ # Create a QdrantRetriever instance
18
+ retriever = QdrantRetriever()
19
+
20
+ print("1. Testing collection existence...")
21
+ exists = await retriever.validate_collection_exists()
22
+ print(f" Collection exists: {exists}")
23
+
24
+ if exists:
25
+ print("2. Getting total points in collection...")
26
+ total_points = await retriever.get_total_points()
27
+ print(f" Total points: {total_points}")
28
+
29
+ print("3. Testing query embedding...")
30
+ try:
31
+ query = "what about this book?"
32
+ embedding = await retriever._embed_query(query)
33
+ print(f" Query embedding successful, length: {len(embedding)}")
34
+ except Exception as e:
35
+ print(f" Query embedding failed: {e}")
36
+ return
37
+
38
+ print("4. Testing direct search...")
39
+ try:
40
+ results = await retriever.retrieve_context(query, top_k=5)
41
+ print(f" Retrieved {len(results)} results")
42
+
43
+ if results:
44
+ print(" Sample results:")
45
+ for i, result in enumerate(results[:2]): # Show first 2 results
46
+ print(f" Result {i+1}:")
47
+ print(f" ID: {result.id}")
48
+ print(f" Title: {result.title}")
49
+ print(f" Content preview: {result.content[:100]}...")
50
+ print(f" Similarity: {result.similarity_score}")
51
+ print(f" URL: {result.url}")
52
+ else:
53
+ print(" No results retrieved - this indicates the main issue")
54
+ except Exception as e:
55
+ print(f" Direct search failed: {e}")
56
+ import traceback
57
+ traceback.print_exc()
58
+
59
+ if __name__ == "__main__":
60
+ asyncio.run(test_retrieval())
tests/test_integration.py CHANGED
@@ -7,7 +7,7 @@ from fastapi.testclient import TestClient
7
  from unittest.mock import Mock, patch, AsyncMock
8
  from rag_agent_api.main import app, retriever, agent
9
  from rag_agent_api.retrieval import QdrantRetriever
10
- from rag_agent_api.openrouter_agent import OpenRouterAgent
11
  from rag_agent_api.schemas import SourceChunkSchema, AgentResponse, AgentContext
12
 
13
 
@@ -17,13 +17,13 @@ def test_full_query_flow_with_mocked_components():
17
  'QDRANT_URL': 'http://test-qdrant:6333',
18
  'QDRANT_API_KEY': 'test-api-key',
19
  'COHERE_API_KEY': 'test-cohere-key',
20
- 'OPENROUTER_API_KEY': 'test-openrouter-key'
21
  }):
22
  with patch('rag_agent_api.main.QdrantRetriever') as mock_retriever_class:
23
- with patch('rag_agent_api.main.OpenRouterAgent') as mock_agent_class:
24
  # Create mock instances
25
  mock_retriever = Mock(spec=QdrantRetriever)
26
- mock_agent = Mock(spec=OpenRouterAgent)
27
 
28
  # Configure the class mocks to return our instance mocks
29
  mock_retriever_class.return_value = mock_retriever
@@ -84,11 +84,11 @@ async def test_agent_context_creation():
84
  'QDRANT_URL': 'http://test-qdrant:6333',
85
  'QDRANT_API_KEY': 'test-api-key',
86
  'COHERE_API_KEY': 'test-cohere-key',
87
- 'OPENROUTER_API_KEY': 'test-openrouter-key'
88
  }):
89
  with patch('rag_agent_api.retrieval.AsyncQdrantClient') as mock_qdrant_client:
90
  with patch('rag_agent_api.retrieval.cohere.Client') as mock_cohere_client:
91
- with patch('rag_agent_api.openrouter_agent.httpx.AsyncClient'):
92
  # Mock the Qdrant client
93
  mock_qdrant_instance = Mock()
94
  mock_qdrant_client.return_value = mock_qdrant_instance
@@ -101,7 +101,7 @@ async def test_agent_context_creation():
101
 
102
  # Initialize components
103
  retriever = QdrantRetriever(collection_name="test_collection")
104
- agent = OpenRouterAgent(model_name="gpt-4-test")
105
 
106
  # Create test chunks
107
  test_chunk = SourceChunkSchema(
@@ -145,7 +145,7 @@ def test_health_endpoint_integration():
145
  assert "services" in data
146
 
147
  # Check that services status is included
148
- assert "openrouter" in data["services"]
149
  assert "qdrant" in data["services"]
150
  assert "agent" in data["services"]
151
 
@@ -157,11 +157,11 @@ async def test_retrieval_and_agent_integration():
157
  'QDRANT_URL': 'http://test-qdrant:6333',
158
  'QDRANT_API_KEY': 'test-api-key',
159
  'COHERE_API_KEY': 'test-cohere-key',
160
- 'OPENROUTER_API_KEY': 'test-openrouter-key'
161
  }):
162
  with patch('rag_agent_api.retrieval.AsyncQdrantClient') as mock_qdrant_client:
163
  with patch('rag_agent_api.retrieval.cohere.Client') as mock_cohere_client:
164
- with patch('rag_agent_api.openrouter_agent.httpx.AsyncClient') as mock_httpx_client:
165
  # Mock the Qdrant client
166
  mock_qdrant_instance = Mock()
167
  mock_qdrant_client.return_value = mock_qdrant_instance
@@ -172,21 +172,18 @@ async def test_retrieval_and_agent_integration():
172
  mock_cohere_client.return_value = mock_cohere_instance
173
  mock_cohere_instance.embed.return_value = Mock(embeddings=[[0.1, 0.2, 0.3]])
174
 
175
- # Mock the httpx client for OpenRouter
176
- mock_httpx_instance = Mock()
177
- mock_httpx_client.return_value.__aenter__.return_value = mock_httpx_instance
178
  mock_completion = Mock()
179
- mock_completion.json.return_value = {
180
- "choices": [
181
- {"message": {"content": "This is a test response"}}
182
- ]
183
- }
184
- mock_httpx_instance.post = AsyncMock(return_value=mock_completion)
185
- mock_httpx_instance.post.return_value.status_code = 200
186
 
187
  # Initialize components
188
  test_retriever = QdrantRetriever(collection_name="test_collection")
189
- test_agent = OpenRouterAgent(model_name="gpt-4-test")
190
 
191
  # Mock the retrieval result
192
  mock_chunk = SourceChunkSchema(
 
7
  from unittest.mock import Mock, patch, AsyncMock
8
  from rag_agent_api.main import app, retriever, agent
9
  from rag_agent_api.retrieval import QdrantRetriever
10
+ from rag_agent_api.agent import OpenAIAgent
11
  from rag_agent_api.schemas import SourceChunkSchema, AgentResponse, AgentContext
12
 
13
 
 
17
  'QDRANT_URL': 'http://test-qdrant:6333',
18
  'QDRANT_API_KEY': 'test-api-key',
19
  'COHERE_API_KEY': 'test-cohere-key',
20
+ 'OPENAI_API_KEY': 'test-openai-key'
21
  }):
22
  with patch('rag_agent_api.main.QdrantRetriever') as mock_retriever_class:
23
+ with patch('rag_agent_api.main.OpenAIAgent') as mock_agent_class:
24
  # Create mock instances
25
  mock_retriever = Mock(spec=QdrantRetriever)
26
+ mock_agent = Mock(spec=OpenAIAgent)
27
 
28
  # Configure the class mocks to return our instance mocks
29
  mock_retriever_class.return_value = mock_retriever
 
84
  'QDRANT_URL': 'http://test-qdrant:6333',
85
  'QDRANT_API_KEY': 'test-api-key',
86
  'COHERE_API_KEY': 'test-cohere-key',
87
+ 'OPENAI_API_KEY': 'test-openai-key'
88
  }):
89
  with patch('rag_agent_api.retrieval.AsyncQdrantClient') as mock_qdrant_client:
90
  with patch('rag_agent_api.retrieval.cohere.Client') as mock_cohere_client:
91
+ with patch('rag_agent_api.agent.AsyncOpenAI'):
92
  # Mock the Qdrant client
93
  mock_qdrant_instance = Mock()
94
  mock_qdrant_client.return_value = mock_qdrant_instance
 
101
 
102
  # Initialize components
103
  retriever = QdrantRetriever(collection_name="test_collection")
104
+ agent = OpenAIAgent(model_name="gpt-4-test")
105
 
106
  # Create test chunks
107
  test_chunk = SourceChunkSchema(
 
145
  assert "services" in data
146
 
147
  # Check that services status is included
148
+ assert "openai" in data["services"]
149
  assert "qdrant" in data["services"]
150
  assert "agent" in data["services"]
151
 
 
157
  'QDRANT_URL': 'http://test-qdrant:6333',
158
  'QDRANT_API_KEY': 'test-api-key',
159
  'COHERE_API_KEY': 'test-cohere-key',
160
+ 'OPENAI_API_KEY': 'test-openai-key'
161
  }):
162
  with patch('rag_agent_api.retrieval.AsyncQdrantClient') as mock_qdrant_client:
163
  with patch('rag_agent_api.retrieval.cohere.Client') as mock_cohere_client:
164
+ with patch('rag_agent_api.agent.AsyncOpenAI') as mock_openai:
165
  # Mock the Qdrant client
166
  mock_qdrant_instance = Mock()
167
  mock_qdrant_client.return_value = mock_qdrant_instance
 
172
  mock_cohere_client.return_value = mock_cohere_instance
173
  mock_cohere_instance.embed.return_value = Mock(embeddings=[[0.1, 0.2, 0.3]])
174
 
175
+ # Mock the OpenAI client
176
+ mock_openai_instance = Mock()
177
+ mock_openai.return_value = mock_openai_instance
178
  mock_completion = Mock()
179
+ mock_completion.choices = [Mock()]
180
+ mock_completion.choices[0].message = Mock()
181
+ mock_completion.choices[0].message.content = "This is a test response"
182
+ mock_openai_instance.chat.completions.create = AsyncMock(return_value=mock_completion)
 
 
 
183
 
184
  # Initialize components
185
  test_retriever = QdrantRetriever(collection_name="test_collection")
186
+ test_agent = OpenAIAgent(model_name="gpt-4-test")
187
 
188
  # Mock the retrieval result
189
  mock_chunk = SourceChunkSchema(