sanilahmed2019 commited on
Commit
2ade705
·
1 Parent(s): f850e5a

Redeploy backend

Browse files
.README.md.swp ADDED
Binary file (1.02 kB). View file
 
.env CHANGED
@@ -1,20 +1,19 @@
1
- COHERE_API_KEY="Dq2dLJlwDOZwAg4K7XalSEC91kXnucGd52KmkJh7"
2
- QDRANT_URL="https://72888a6e-0dfc-4620-bf85-0b9025951e0c.us-east4-0.gcp.cloud.qdrant.io:6333"
3
- QDRANT_API_KEY="eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhY2Nlc3MiOiJtIn0.BDBAtGf7x_XGCu3lO4-kNxgJeVgnSTKUjHeZBT6qJkQ"
4
- REACT_APP_RAG_API_URL="http://localhost:8000"
5
  # RAG Agent and API Layer Environment Variables
6
 
7
  # OpenAI API Configuration
8
- OPENAI_API_KEY="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"
9
- GEMINI_API_KEY="AIzaSyDM79Xi6rsffqHrwVOlc3FwPnk9pwBh9OI"
10
 
11
  # Qdrant Configuration
12
- QDRANT_URL="https://72888a6e-0dfc-4620-bf85-0b9025951e0c.us-east4-0.gcp.cloud.qdrant.io:6333"
13
- QDRANT_API_KEY="eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhY2Nlc3MiOiJtIn0.BDBAtGf7x_XGCu3lO4-kNxgJeVgnSTKUjHeZBT6qJkQ"
14
  QDRANT_COLLECTION_NAME=rag_embedding
15
 
16
  # Cohere Configuration (for query embeddings)
17
- COHERE_API_KEY="Dq2dLJlwDOZwAg4K7XalSEC91kXnucGd52KmkJh7"
18
 
19
  # Application Configuration
20
  DEFAULT_CONTEXT_WINDOW=5
 
1
+ COHERE_API_KEY=Dq2dLJlwDOZwAg4K7XalSEC91kXnucGd52KmkJh7
2
+ QDRANT_URL=https://72888a6e-0dfc-4620-bf85-0b9025951e0c.us-east4-0.gcp.cloud.qdrant.io:6333
3
+ QDRANT_API_KEY=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhY2Nlc3MiOiJtIn0.BDBAtGf7x_XGCu3lO4-kNxgJeVgnSTKUjHeZBT6qJkQ
4
+ REACT_APP_RAG_API_URL=http://localhost:8000
5
  # RAG Agent and API Layer Environment Variables
6
 
7
  # OpenAI API Configuration
8
+ OPENROUTER_API_KEY=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
 
9
 
10
  # Qdrant Configuration
11
+ QDRANT_URL=https://72888a6e-0dfc-4620-bf85-0b9025951e0c.us-east4-0.gcp.cloud.qdrant.io:6333
12
+ QDRANT_API_KEY=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhY2Nlc3MiOiJtIn0.BDBAtGf7x_XGCu3lO4-kNxgJeVgnSTKUjHeZBT6qJkQ
13
  QDRANT_COLLECTION_NAME=rag_embedding
14
 
15
  # Cohere Configuration (for query embeddings)
16
+ COHERE_API_KEY=Dq2dLJlwDOZwAg4K7XalSEC91kXnucGd52KmkJh7
17
 
18
  # Application Configuration
19
  DEFAULT_CONTEXT_WINDOW=5
.env.example CHANGED
@@ -1,15 +1,14 @@
1
  # RAG Agent and API Layer Environment Variables
2
 
3
- # OpenAI API Configuration
4
- OPENAI_API_KEY="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"
5
- GEMINI_API_KEY="AIzaSyDM79Xi6rsffqHrwVOlc3FwPnk9pwBh9OI"
6
  # Qdrant Configuration
7
- QDRANT_URL="https://72888a6e-0dfc-4620-bf85-0b9025951e0c.us-east4-0.gcp.cloud.qdrant.io:6333"
8
- QDRANT_API_KEY="eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhY2Nlc3MiOiJtIn0.BDBAtGf7x_XGCu3lO4-kNxgJeVgnSTKUjHeZBT6qJkQ"
9
  QDRANT_COLLECTION_NAME=rag_embedding
10
- REACT_APP_RAG_API_URL="http://localhost:8000"
11
  # Cohere Configuration (for query embeddings)
12
- COHERE_API_KEY="Dq2dLJlwDOZwAg4K7XalSEC91kXnucGd52KmkJh7"
13
 
14
  # Application Configuration
15
  DEFAULT_CONTEXT_WINDOW=5
 
1
  # RAG Agent and API Layer Environment Variables
2
 
3
+ # OpenRouter API Configuration
4
+ OPENROUTER_API_KEY=your-openrouter-api-key-here
 
5
  # Qdrant Configuration
6
+ QDRANT_URL=https://72888a6e-0dfc-4620-bf85-0b9025951e0c.us-east4-0.gcp.cloud.qdrant.io:6333
7
+ QDRANT_API_KEY=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhY2Nlc3MiOiJtIn0.BDBAtGf7x_XGCu3lO4-kNxgJeVgnSTKUjHeZBT6qJkQ
8
  QDRANT_COLLECTION_NAME=rag_embedding
9
+ REACT_APP_RAG_API_URL=http://localhost:8000
10
  # Cohere Configuration (for query embeddings)
11
+ COHERE_API_KEY=Dq2dLJlwDOZwAg4K7XalSEC91kXnucGd52KmkJh7
12
 
13
  # Application Configuration
14
  DEFAULT_CONTEXT_WINDOW=5
backend.log CHANGED
The diff for this file is too large to render. See raw diff
 
book_ingestor.egg-info/PKG-INFO CHANGED
@@ -14,60 +14,35 @@ Requires-Dist: uvicorn>=0.24.0
14
  Requires-Dist: openai>=1.0.0
15
  Requires-Dist: pydantic>=2.0.0
16
 
17
- # Book Content Ingestor & RAG Verification
 
 
 
 
 
 
 
18
 
19
- A system to extract content from Docusaurus-based book websites, chunk and embed it using Cohere, store embeddings in Qdrant Cloud for RAG applications, and verify the retrieval pipeline functionality.
20
 
21
- ## Setup
22
 
23
- 1. Install dependencies using uv:
24
- ```bash
25
- cd backend
26
- uv sync
27
- ```
28
 
29
- 2. Create a `.env` file with your API keys:
30
- ```bash
31
- cp .env.example .env
32
- # Edit .env with your actual API keys
33
- ```
 
34
 
35
- ## Environment Variables
36
 
37
- - `COHERE_API_KEY`: Your Cohere API key
38
- - `QDRANT_URL`: Your Qdrant Cloud URL
39
- - `QDRANT_API_KEY`: Your Qdrant API key
40
- - `QDRANT_COLLECTION_NAME`: Name of the collection to use (default: "rag_embedding")
41
 
42
- ## Usage
43
 
44
- ### Run the ingestion pipeline:
45
- ```bash
46
- cd backend
47
- uv run python main.py
48
- ```
49
-
50
- This will:
51
- 1. Collect all URLs from the target book (https://sanilahmed.github.io/hackathon-ai-book/)
52
- 2. Extract text content from each URL
53
- 3. Chunk the content into fixed-size segments
54
- 4. Generate embeddings using Cohere
55
- 5. Store embeddings with metadata in Qdrant Cloud collection named "rag_embedding"
56
-
57
- ### Run the verification pipeline:
58
- ```bash
59
- cd backend
60
- python -m verify_retrieval.main
61
- ```
62
-
63
- Or with specific options:
64
- ```bash
65
- python -m verify_retrieval.main --query "transformer architecture in NLP" --top-k 10
66
- ```
67
-
68
- The verification system will:
69
- 1. Load vectors and metadata stored in Qdrant from the original ingestion
70
- 2. Implement retrieval functions to query Qdrant using sample keywords or phrases
71
- 3. Validate that retrieved chunks are accurate and relevant
72
- 4. Check that metadata (URL, title, chunk_id) matches source content
73
- 5. Log results and confirm the pipeline executes end-to-end without errors
 
14
  Requires-Dist: openai>=1.0.0
15
  Requires-Dist: pydantic>=2.0.0
16
 
17
+ ---
18
+ title: Backend Deploy
19
+ emoji: 🚀
20
+ colorFrom: blue
21
+ colorTo: purple
22
+ sdk: docker
23
+ pinned: false
24
+ ---
25
 
26
+ # RAG Agent and API Layer
27
 
28
+ This is a FastAPI application that provides a question-answering API using Gemini agents and Qdrant retrieval for RAG (Retrieval Augmented Generation) functionality.
29
 
30
+ ## API Endpoints
 
 
 
 
31
 
32
+ - `GET /` - Root endpoint with API information
33
+ - `POST /ask` - Main question-answering endpoint
34
+ - `GET /health` - Health check endpoint
35
+ - `GET /ready` - Readiness check endpoint
36
+ - `/docs` - API documentation (Swagger UI)
37
+ - `/redoc` - API documentation (Redoc)
38
 
39
+ ## Configuration
40
 
41
+ The application requires the following environment variables:
42
+ - `GEMINI_API_KEY` - API key for Google Gemini
43
+ - `QDRANT_URL` - URL for Qdrant vector database
44
+ - `QDRANT_API_KEY` - API key for Qdrant database
45
 
46
+ ## Deployment
47
 
48
+ This application is configured for deployment on Hugging Face Spaces using Docker.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
rag_agent_api/README.md CHANGED
@@ -1,17 +1,17 @@
1
  # RAG Agent and API Layer
2
 
3
- A FastAPI-based question-answering system that uses OpenAI Agents and Qdrant retrieval to generate grounded responses based on book content.
4
 
5
  ## Overview
6
 
7
- The RAG Agent and API Layer provides a question-answering API that retrieves relevant content from Qdrant and uses an OpenAI agent to generate accurate, source-grounded responses. The system ensures that all answers are based only on the provided context to prevent hallucinations.
8
 
9
  ## Architecture
10
 
11
  The system consists of several key components:
12
 
13
  - **FastAPI Application**: Main entry point for the question-answering API
14
- - **OpenAI Agent**: Generates responses based on retrieved context
15
  - **Qdrant Retriever**: Retrieves relevant content chunks from Qdrant database
16
  - **Configuration Manager**: Handles environment variables and settings
17
  - **Data Models**: Pydantic models for API requests/responses
@@ -22,7 +22,7 @@ The system consists of several key components:
22
  ### Prerequisites
23
 
24
  - Python 3.9+
25
- - OpenAI API key
26
  - Qdrant Cloud instance with book content embeddings
27
  - Cohere API key (for query embeddings)
28
 
@@ -42,7 +42,7 @@ The system consists of several key components:
42
 
43
  3. Edit `.env` with your API keys and configuration:
44
  ```env
45
- OPENAI_API_KEY=your-openai-api-key-here
46
  QDRANT_URL=your-qdrant-instance-url
47
  QDRANT_API_KEY=your-qdrant-api-key
48
  QDRANT_COLLECTION_NAME=rag_embedding
@@ -103,7 +103,7 @@ Root endpoint with API information.
103
 
104
  ### Environment Variables
105
 
106
- - `OPENAI_API_KEY`: Your OpenAI API key
107
  - `QDRANT_URL`: URL of your Qdrant instance
108
  - `QDRANT_API_KEY`: Your Qdrant API key
109
  - `QDRANT_COLLECTION_NAME`: Name of the collection with book embeddings (default: `rag_embedding`)
@@ -123,8 +123,8 @@ Pydantic models for API request/response schemas.
123
  ### Schemas (`schemas.py`)
124
  Additional schemas for internal data structures.
125
 
126
- ### Agent (`agent.py`)
127
- OpenAI agent implementation with context injection and response validation.
128
 
129
  ### Retrieval (`retrieval.py`)
130
  Qdrant integration for content retrieval with semantic search.
@@ -160,7 +160,7 @@ pytest
160
 
161
  # Run specific test files
162
  pytest tests/test_api.py
163
- pytest tests/test_agent.py
164
  pytest tests/test_retrieval.py
165
  ```
166
 
 
1
  # RAG Agent and API Layer
2
 
3
+ A FastAPI-based question-answering system that uses OpenRouter Agents and Qdrant retrieval to generate grounded responses based on book content.
4
 
5
  ## Overview
6
 
7
+ The RAG Agent and API Layer provides a question-answering API that retrieves relevant content from Qdrant and uses an OpenRouter agent to generate accurate, source-grounded responses. The system ensures that all answers are based only on the provided context to prevent hallucinations.
8
 
9
  ## Architecture
10
 
11
  The system consists of several key components:
12
 
13
  - **FastAPI Application**: Main entry point for the question-answering API
14
+ - **OpenRouter Agent**: Generates responses based on retrieved context
15
  - **Qdrant Retriever**: Retrieves relevant content chunks from Qdrant database
16
  - **Configuration Manager**: Handles environment variables and settings
17
  - **Data Models**: Pydantic models for API requests/responses
 
22
  ### Prerequisites
23
 
24
  - Python 3.9+
25
+ - OpenRouter API key
26
  - Qdrant Cloud instance with book content embeddings
27
  - Cohere API key (for query embeddings)
28
 
 
42
 
43
  3. Edit `.env` with your API keys and configuration:
44
  ```env
45
+ OPENROUTER_API_KEY=your-openrouter-api-key-here
46
  QDRANT_URL=your-qdrant-instance-url
47
  QDRANT_API_KEY=your-qdrant-api-key
48
  QDRANT_COLLECTION_NAME=rag_embedding
 
103
 
104
  ### Environment Variables
105
 
106
+ - `OPENROUTER_API_KEY`: Your OpenRouter API key
107
  - `QDRANT_URL`: URL of your Qdrant instance
108
  - `QDRANT_API_KEY`: Your Qdrant API key
109
  - `QDRANT_COLLECTION_NAME`: Name of the collection with book embeddings (default: `rag_embedding`)
 
123
  ### Schemas (`schemas.py`)
124
  Additional schemas for internal data structures.
125
 
126
+ ### Agent (`openrouter_agent.py`)
127
+ OpenRouter agent implementation with context injection and response validation.
128
 
129
  ### Retrieval (`retrieval.py`)
130
  Qdrant integration for content retrieval with semantic search.
 
160
 
161
  # Run specific test files
162
  pytest tests/test_api.py
163
+ pytest tests/test_openrouter_agent.py
164
  pytest tests/test_retrieval.py
165
  ```
166
 
rag_agent_api/__init__.py CHANGED
@@ -10,7 +10,7 @@ __license__ = "MIT"
10
  # Import main components for easy access
11
  from .main import app
12
  from .config import Config, get_config, validate_config
13
- from .agent import GeminiAgent
14
  from .retrieval import QdrantRetriever
15
 
16
  # Define what gets imported with "from rag_agent_api import *"
@@ -19,6 +19,6 @@ __all__ = [
19
  "Config",
20
  "get_config",
21
  "validate_config",
22
- "GeminiAgent",
23
  "QdrantRetriever"
24
  ]
 
10
  # Import main components for easy access
11
  from .main import app
12
  from .config import Config, get_config, validate_config
13
+ from .openrouter_agent import OpenRouterAgent
14
  from .retrieval import QdrantRetriever
15
 
16
  # Define what gets imported with "from rag_agent_api import *"
 
19
  "Config",
20
  "get_config",
21
  "validate_config",
22
+ "OpenRouterAgent",
23
  "QdrantRetriever"
24
  ]
rag_agent_api/__pycache__/__init__.cpython-313.pyc CHANGED
Binary files a/rag_agent_api/__pycache__/__init__.cpython-313.pyc and b/rag_agent_api/__pycache__/__init__.cpython-313.pyc differ
 
rag_agent_api/__pycache__/config.cpython-313.pyc CHANGED
Binary files a/rag_agent_api/__pycache__/config.cpython-313.pyc and b/rag_agent_api/__pycache__/config.cpython-313.pyc differ
 
rag_agent_api/__pycache__/main.cpython-313.pyc CHANGED
Binary files a/rag_agent_api/__pycache__/main.cpython-313.pyc and b/rag_agent_api/__pycache__/main.cpython-313.pyc differ
 
rag_agent_api/__pycache__/openrouter_agent.cpython-313.pyc ADDED
Binary file (12.7 kB). View file
 
rag_agent_api/config.py CHANGED
@@ -20,8 +20,8 @@ class Config:
20
  def __init__(self):
21
  """Initialize configuration by loading environment variables."""
22
  self.openai_api_key = os.getenv('OPENAI_API_KEY')
23
- self.gemini_api_key = os.getenv('GEMINI_API_KEY')
24
  self.cohere_api_key = os.getenv('COHERE_API_KEY')
 
25
  self.qdrant_url = os.getenv('QDRANT_URL')
26
  self.qdrant_api_key = os.getenv('QDRANT_API_KEY')
27
  self.qdrant_collection_name = os.getenv('QDRANT_COLLECTION_NAME', 'rag_embedding')
@@ -38,6 +38,9 @@ class Config:
38
  """
39
  errors = []
40
 
 
 
 
41
  if not self.cohere_api_key:
42
  errors.append("COHERE_API_KEY environment variable not set")
43
 
 
20
  def __init__(self):
21
  """Initialize configuration by loading environment variables."""
22
  self.openai_api_key = os.getenv('OPENAI_API_KEY')
 
23
  self.cohere_api_key = os.getenv('COHERE_API_KEY')
24
+ self.openrouter_api_key = os.getenv('OPENROUTER_API_KEY')
25
  self.qdrant_url = os.getenv('QDRANT_URL')
26
  self.qdrant_api_key = os.getenv('QDRANT_API_KEY')
27
  self.qdrant_collection_name = os.getenv('QDRANT_COLLECTION_NAME', 'rag_embedding')
 
38
  """
39
  errors = []
40
 
41
+ if not self.openrouter_api_key:
42
+ errors.append("OPENROUTER_API_KEY environment variable not set")
43
+
44
  if not self.cohere_api_key:
45
  errors.append("COHERE_API_KEY environment variable not set")
46
 
rag_agent_api/main.py CHANGED
@@ -13,7 +13,7 @@ from .config import validate_config, get_config
13
  from .models import QueryRequest, APIResponse, ErrorResponse, HealthResponse
14
  from .schemas import AgentResponse, AgentContext
15
  from .utils import setup_logging, generate_response_id, format_timestamp, create_error_response
16
- from .agent import GeminiAgent
17
  from .retrieval import QdrantRetriever
18
 
19
 
@@ -41,7 +41,7 @@ setup_logging(config.log_level)
41
 
42
  # Global instances
43
  retriever: Optional[QdrantRetriever] = None
44
- agent: Optional[GeminiAgent] = None
45
 
46
 
47
  @app.on_event("startup")
@@ -56,10 +56,10 @@ async def startup_event():
56
 
57
  # Initialize agent first (this doesn't require async initialization)
58
  try:
59
- agent = GeminiAgent()
60
- logging.info("Google Gemini agent initialized successfully")
61
  except Exception as e:
62
- logging.error(f"Failed to initialize Google Gemini agent: {e}")
63
  raise
64
 
65
  # Initialize retriever (async operations will be handled in the methods themselves)
@@ -82,22 +82,22 @@ async def health_check() -> HealthResponse:
82
  HealthResponse with status of services
83
  """
84
  # Check if all required components are initialized
85
- gemini_status = "up" if agent else "down"
86
  qdrant_status = "up" if retriever else "down"
87
  agent_status = "up" if agent else "down"
88
 
89
  # Determine overall status
90
  overall_status = "healthy"
91
- if gemini_status == "down" or qdrant_status == "down":
92
  overall_status = "unhealthy"
93
- elif gemini_status == "degraded" or qdrant_status == "degraded":
94
  overall_status = "degraded"
95
 
96
  return HealthResponse(
97
  status=overall_status,
98
  timestamp=format_timestamp(),
99
  services={
100
- "gemini": gemini_status,
101
  "qdrant": qdrant_status,
102
  "agent": agent_status
103
  }
@@ -194,7 +194,7 @@ async def root() -> Dict[str, Any]:
194
  return {
195
  "message": "RAG Agent and API Layer",
196
  "version": "1.0.0",
197
- "description": "Question-answering API using OpenAI Agents and Qdrant retrieval",
198
  "endpoints": {
199
  "POST /ask": "Main question-answering endpoint",
200
  "GET /health": "Health check endpoint",
 
13
  from .models import QueryRequest, APIResponse, ErrorResponse, HealthResponse
14
  from .schemas import AgentResponse, AgentContext
15
  from .utils import setup_logging, generate_response_id, format_timestamp, create_error_response
16
+ from .openrouter_agent import OpenRouterAgent
17
  from .retrieval import QdrantRetriever
18
 
19
 
 
41
 
42
  # Global instances
43
  retriever: Optional[QdrantRetriever] = None
44
+ agent: Optional[OpenRouterAgent] = None
45
 
46
 
47
  @app.on_event("startup")
 
56
 
57
  # Initialize agent first (this doesn't require async initialization)
58
  try:
59
+ agent = OpenRouterAgent()
60
+ logging.info("OpenRouter agent initialized successfully")
61
  except Exception as e:
62
+ logging.error(f"Failed to initialize OpenRouter agent: {e}")
63
  raise
64
 
65
  # Initialize retriever (async operations will be handled in the methods themselves)
 
82
  HealthResponse with status of services
83
  """
84
  # Check if all required components are initialized
85
+ openrouter_status = "up" if agent else "down"
86
  qdrant_status = "up" if retriever else "down"
87
  agent_status = "up" if agent else "down"
88
 
89
  # Determine overall status
90
  overall_status = "healthy"
91
+ if openrouter_status == "down" or qdrant_status == "down":
92
  overall_status = "unhealthy"
93
+ elif openrouter_status == "degraded" or qdrant_status == "degraded":
94
  overall_status = "degraded"
95
 
96
  return HealthResponse(
97
  status=overall_status,
98
  timestamp=format_timestamp(),
99
  services={
100
+ "openrouter": openrouter_status,
101
  "qdrant": qdrant_status,
102
  "agent": agent_status
103
  }
 
194
  return {
195
  "message": "RAG Agent and API Layer",
196
  "version": "1.0.0",
197
+ "description": "Question-answering API using OpenRouter Agents and Qdrant retrieval",
198
  "endpoints": {
199
  "POST /ask": "Main question-answering endpoint",
200
  "GET /health": "Health check endpoint",
rag_agent_api/{agent.py → openrouter_agent.py} RENAMED
@@ -1,44 +1,41 @@
1
  """
2
- Google Gemini Agent module for the RAG Agent and API Layer system.
3
 
4
- This module provides functionality for creating and managing a Google Gemini agent
5
  that generates responses based on retrieved context.
6
  """
7
  import asyncio
8
  import logging
9
  from typing import List, Dict, Any, Optional
10
- import google.generativeai as genai
11
  from .config import get_config
12
  from .schemas import AgentContext, AgentResponse, SourceChunkSchema
13
  from .utils import format_confidence_score
14
 
15
 
16
- class GeminiAgent:
17
  """
18
- A class to manage the Google Gemini agent for generating responses based on context.
19
  """
20
- def __init__(self, model_name: str = "gemini-2.5-flash"):
21
  """
22
- Initialize the Google Gemini agent with configuration.
23
 
24
  Args:
25
- model_name: Name of the Gemini model to use (default: gemini-2.5-flash)
26
  """
27
  config = get_config()
28
- api_key = config.gemini_api_key
29
 
30
  if not api_key:
31
- raise ValueError("GEMINI_API_KEY environment variable not set")
32
 
33
- # Configure the Gemini client
34
- genai.configure(api_key=api_key)
35
-
36
- # Create the generative model instance
37
- self.model = genai.GenerativeModel(model_name)
38
  self.model_name = model_name
 
39
  self.default_temperature = config.default_temperature
40
 
41
- logging.info(f"Gemini agent initialized with model: {model_name}")
42
 
43
  async def generate_response(self, context: AgentContext) -> AgentResponse:
44
  """
@@ -80,23 +77,43 @@ class GeminiAgent:
80
  # Prepare the user message with the query
81
  user_message = self._create_user_message(context)
82
 
83
- # For Google Gemini, we need to format the prompt differently
84
- # Combine system instructions and user query
85
- full_prompt = f"{system_message}\n\n{user_message}"
86
-
87
- # Generate response from Google Gemini
88
- # For async generation, we need to use the appropriate async method
89
- chat = self.model.start_chat()
90
- response = await chat.send_message_async(
91
- full_prompt,
92
- generation_config={
93
- "temperature": context.source_policy if hasattr(context, 'temperature') else self.default_temperature,
94
- "max_output_tokens": 1000
 
 
 
 
95
  }
96
- )
97
 
98
- # Extract the response text
99
- raw_response = response.text if response and hasattr(response, 'text') else str(response)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
 
101
  # If the response indicates no information was found, return the exact message
102
  if "I could not find this information in the book" in raw_response:
@@ -134,7 +151,7 @@ class GeminiAgent:
134
  return agent_response
135
 
136
  except Exception as e:
137
- logging.error(f"Error generating response from Google Gemini agent: {e}", exc_info=True)
138
  # Return the specific message when there's an error
139
  return AgentResponse(
140
  raw_response="I could not find this information in the book.",
@@ -197,20 +214,6 @@ QUESTION:
197
 
198
  return "\n".join(formatted_chunks)
199
 
200
- def _create_context_messages(self, context: AgentContext) -> List[Dict[str, str]]:
201
- """
202
- Create context messages from the retrieved chunks.
203
- With the new format, context is now provided in the user message,
204
- so this method returns an empty list to avoid duplication.
205
-
206
- Args:
207
- context: AgentContext containing the query and retrieved context chunks
208
-
209
- Returns:
210
- Empty list since context is now in user message
211
- """
212
- return []
213
-
214
  def _identify_used_sources(self, response: str, chunks: List[SourceChunkSchema]) -> List[str]:
215
  """
216
  Identify which sources were likely used in the response.
@@ -356,8 +359,4 @@ QUESTION:
356
  return True
357
 
358
  # In a more sophisticated implementation, you'd validate against the context more rigorously
359
- return True
360
-
361
-
362
- # Global agent instance (if needed)
363
- # agent_instance = OpenAIAgent()
 
1
  """
2
+ OpenRouter Agent module for the RAG Agent and API Layer system.
3
 
4
+ This module provides functionality for creating and managing an OpenRouter agent
5
  that generates responses based on retrieved context.
6
  """
7
  import asyncio
8
  import logging
9
  from typing import List, Dict, Any, Optional
10
+ import httpx
11
  from .config import get_config
12
  from .schemas import AgentContext, AgentResponse, SourceChunkSchema
13
  from .utils import format_confidence_score
14
 
15
 
16
+ class OpenRouterAgent:
17
  """
18
+ A class to manage the OpenRouter agent for generating responses based on context.
19
  """
20
+ def __init__(self, model_name: str = "arcee-ai/trinity-mini:free"):
21
  """
22
+ Initialize the OpenRouter agent with configuration.
23
 
24
  Args:
25
+ model_name: Name of the OpenRouter model to use (default: arcee-ai/trinity-mini:free)
26
  """
27
  config = get_config()
28
+ api_key = config.openrouter_api_key
29
 
30
  if not api_key:
31
+ raise ValueError("OPENROUTER_API_KEY environment variable not set")
32
 
33
+ self.api_key = api_key
 
 
 
 
34
  self.model_name = model_name
35
+ self.base_url = "https://openrouter.ai/api/v1"
36
  self.default_temperature = config.default_temperature
37
 
38
+ logging.info(f"OpenRouter agent initialized with model: {model_name}")
39
 
40
  async def generate_response(self, context: AgentContext) -> AgentResponse:
41
  """
 
77
  # Prepare the user message with the query
78
  user_message = self._create_user_message(context)
79
 
80
+ # Prepare the payload for OpenRouter API
81
+ payload = {
82
+ "model": self.model_name,
83
+ "messages": [
84
+ {"role": "system", "content": system_message},
85
+ {"role": "user", "content": user_message}
86
+ ],
87
+ "temperature": context.source_policy if hasattr(context, 'temperature') else self.default_temperature,
88
+ "max_tokens": 1000
89
+ }
90
+
91
+ # Make the API call to OpenRouter
92
+ async with httpx.AsyncClient(timeout=30.0) as client:
93
+ headers = {
94
+ "Authorization": f"Bearer {self.api_key}",
95
+ "Content-Type": "application/json"
96
  }
 
97
 
98
+ response = await client.post(
99
+ f"{self.base_url}/chat/completions",
100
+ json=payload,
101
+ headers=headers
102
+ )
103
+
104
+ if response.status_code != 200:
105
+ logging.error(f"OpenRouter API error: {response.status_code} - {response.text}")
106
+ return AgentResponse(
107
+ raw_response="I could not find this information in the book.",
108
+ used_sources=[],
109
+ confidence_score=0.0,
110
+ is_valid=False,
111
+ validation_details=f"API error: {response.status_code}",
112
+ unsupported_claims=[]
113
+ )
114
+
115
+ response_data = response.json()
116
+ raw_response = response_data["choices"][0]["message"]["content"]
117
 
118
  # If the response indicates no information was found, return the exact message
119
  if "I could not find this information in the book" in raw_response:
 
151
  return agent_response
152
 
153
  except Exception as e:
154
+ logging.error(f"Error generating response from OpenRouter agent: {e}", exc_info=True)
155
  # Return the specific message when there's an error
156
  return AgentResponse(
157
  raw_response="I could not find this information in the book.",
 
214
 
215
  return "\n".join(formatted_chunks)
216
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
217
  def _identify_used_sources(self, response: str, chunks: List[SourceChunkSchema]) -> List[str]:
218
  """
219
  Identify which sources were likely used in the response.
 
359
  return True
360
 
361
  # In a more sophisticated implementation, you'd validate against the context more rigorously
362
+ return True
 
 
 
 
requirements.txt CHANGED
@@ -9,4 +9,4 @@ uvicorn>=0.24.0
9
  openai>=1.0.0
10
  pydantic>=2.0.0
11
  numpy>=1.21.0
12
- google-generativeai>=0.8.0
 
9
  openai>=1.0.0
10
  pydantic>=2.0.0
11
  numpy>=1.21.0
12
+ httpx>=0.27.0
tests/test_integration.py CHANGED
@@ -7,7 +7,7 @@ from fastapi.testclient import TestClient
7
  from unittest.mock import Mock, patch, AsyncMock
8
  from rag_agent_api.main import app, retriever, agent
9
  from rag_agent_api.retrieval import QdrantRetriever
10
- from rag_agent_api.agent import OpenAIAgent
11
  from rag_agent_api.schemas import SourceChunkSchema, AgentResponse, AgentContext
12
 
13
 
@@ -17,13 +17,13 @@ def test_full_query_flow_with_mocked_components():
17
  'QDRANT_URL': 'http://test-qdrant:6333',
18
  'QDRANT_API_KEY': 'test-api-key',
19
  'COHERE_API_KEY': 'test-cohere-key',
20
- 'OPENAI_API_KEY': 'test-openai-key'
21
  }):
22
  with patch('rag_agent_api.main.QdrantRetriever') as mock_retriever_class:
23
- with patch('rag_agent_api.main.OpenAIAgent') as mock_agent_class:
24
  # Create mock instances
25
  mock_retriever = Mock(spec=QdrantRetriever)
26
- mock_agent = Mock(spec=OpenAIAgent)
27
 
28
  # Configure the class mocks to return our instance mocks
29
  mock_retriever_class.return_value = mock_retriever
@@ -84,11 +84,11 @@ async def test_agent_context_creation():
84
  'QDRANT_URL': 'http://test-qdrant:6333',
85
  'QDRANT_API_KEY': 'test-api-key',
86
  'COHERE_API_KEY': 'test-cohere-key',
87
- 'OPENAI_API_KEY': 'test-openai-key'
88
  }):
89
  with patch('rag_agent_api.retrieval.AsyncQdrantClient') as mock_qdrant_client:
90
  with patch('rag_agent_api.retrieval.cohere.Client') as mock_cohere_client:
91
- with patch('rag_agent_api.agent.AsyncOpenAI'):
92
  # Mock the Qdrant client
93
  mock_qdrant_instance = Mock()
94
  mock_qdrant_client.return_value = mock_qdrant_instance
@@ -101,7 +101,7 @@ async def test_agent_context_creation():
101
 
102
  # Initialize components
103
  retriever = QdrantRetriever(collection_name="test_collection")
104
- agent = OpenAIAgent(model_name="gpt-4-test")
105
 
106
  # Create test chunks
107
  test_chunk = SourceChunkSchema(
@@ -145,7 +145,7 @@ def test_health_endpoint_integration():
145
  assert "services" in data
146
 
147
  # Check that services status is included
148
- assert "openai" in data["services"]
149
  assert "qdrant" in data["services"]
150
  assert "agent" in data["services"]
151
 
@@ -157,11 +157,11 @@ async def test_retrieval_and_agent_integration():
157
  'QDRANT_URL': 'http://test-qdrant:6333',
158
  'QDRANT_API_KEY': 'test-api-key',
159
  'COHERE_API_KEY': 'test-cohere-key',
160
- 'OPENAI_API_KEY': 'test-openai-key'
161
  }):
162
  with patch('rag_agent_api.retrieval.AsyncQdrantClient') as mock_qdrant_client:
163
  with patch('rag_agent_api.retrieval.cohere.Client') as mock_cohere_client:
164
- with patch('rag_agent_api.agent.AsyncOpenAI') as mock_openai:
165
  # Mock the Qdrant client
166
  mock_qdrant_instance = Mock()
167
  mock_qdrant_client.return_value = mock_qdrant_instance
@@ -172,18 +172,21 @@ async def test_retrieval_and_agent_integration():
172
  mock_cohere_client.return_value = mock_cohere_instance
173
  mock_cohere_instance.embed.return_value = Mock(embeddings=[[0.1, 0.2, 0.3]])
174
 
175
- # Mock the OpenAI client
176
- mock_openai_instance = Mock()
177
- mock_openai.return_value = mock_openai_instance
178
  mock_completion = Mock()
179
- mock_completion.choices = [Mock()]
180
- mock_completion.choices[0].message = Mock()
181
- mock_completion.choices[0].message.content = "This is a test response"
182
- mock_openai_instance.chat.completions.create = AsyncMock(return_value=mock_completion)
 
 
 
183
 
184
  # Initialize components
185
  test_retriever = QdrantRetriever(collection_name="test_collection")
186
- test_agent = OpenAIAgent(model_name="gpt-4-test")
187
 
188
  # Mock the retrieval result
189
  mock_chunk = SourceChunkSchema(
 
7
  from unittest.mock import Mock, patch, AsyncMock
8
  from rag_agent_api.main import app, retriever, agent
9
  from rag_agent_api.retrieval import QdrantRetriever
10
+ from rag_agent_api.openrouter_agent import OpenRouterAgent
11
  from rag_agent_api.schemas import SourceChunkSchema, AgentResponse, AgentContext
12
 
13
 
 
17
  'QDRANT_URL': 'http://test-qdrant:6333',
18
  'QDRANT_API_KEY': 'test-api-key',
19
  'COHERE_API_KEY': 'test-cohere-key',
20
+ 'OPENROUTER_API_KEY': 'test-openrouter-key'
21
  }):
22
  with patch('rag_agent_api.main.QdrantRetriever') as mock_retriever_class:
23
+ with patch('rag_agent_api.main.OpenRouterAgent') as mock_agent_class:
24
  # Create mock instances
25
  mock_retriever = Mock(spec=QdrantRetriever)
26
+ mock_agent = Mock(spec=OpenRouterAgent)
27
 
28
  # Configure the class mocks to return our instance mocks
29
  mock_retriever_class.return_value = mock_retriever
 
84
  'QDRANT_URL': 'http://test-qdrant:6333',
85
  'QDRANT_API_KEY': 'test-api-key',
86
  'COHERE_API_KEY': 'test-cohere-key',
87
+ 'OPENROUTER_API_KEY': 'test-openrouter-key'
88
  }):
89
  with patch('rag_agent_api.retrieval.AsyncQdrantClient') as mock_qdrant_client:
90
  with patch('rag_agent_api.retrieval.cohere.Client') as mock_cohere_client:
91
+ with patch('rag_agent_api.openrouter_agent.httpx.AsyncClient'):
92
  # Mock the Qdrant client
93
  mock_qdrant_instance = Mock()
94
  mock_qdrant_client.return_value = mock_qdrant_instance
 
101
 
102
  # Initialize components
103
  retriever = QdrantRetriever(collection_name="test_collection")
104
+ agent = OpenRouterAgent(model_name="gpt-4-test")
105
 
106
  # Create test chunks
107
  test_chunk = SourceChunkSchema(
 
145
  assert "services" in data
146
 
147
  # Check that services status is included
148
+ assert "openrouter" in data["services"]
149
  assert "qdrant" in data["services"]
150
  assert "agent" in data["services"]
151
 
 
157
  'QDRANT_URL': 'http://test-qdrant:6333',
158
  'QDRANT_API_KEY': 'test-api-key',
159
  'COHERE_API_KEY': 'test-cohere-key',
160
+ 'OPENROUTER_API_KEY': 'test-openrouter-key'
161
  }):
162
  with patch('rag_agent_api.retrieval.AsyncQdrantClient') as mock_qdrant_client:
163
  with patch('rag_agent_api.retrieval.cohere.Client') as mock_cohere_client:
164
+ with patch('rag_agent_api.openrouter_agent.httpx.AsyncClient') as mock_httpx_client:
165
  # Mock the Qdrant client
166
  mock_qdrant_instance = Mock()
167
  mock_qdrant_client.return_value = mock_qdrant_instance
 
172
  mock_cohere_client.return_value = mock_cohere_instance
173
  mock_cohere_instance.embed.return_value = Mock(embeddings=[[0.1, 0.2, 0.3]])
174
 
175
+ # Mock the httpx client for OpenRouter
176
+ mock_httpx_instance = Mock()
177
+ mock_httpx_client.return_value.__aenter__.return_value = mock_httpx_instance
178
  mock_completion = Mock()
179
+ mock_completion.json.return_value = {
180
+ "choices": [
181
+ {"message": {"content": "This is a test response"}}
182
+ ]
183
+ }
184
+ mock_httpx_instance.post = AsyncMock(return_value=mock_completion)
185
+ mock_httpx_instance.post.return_value.status_code = 200
186
 
187
  # Initialize components
188
  test_retriever = QdrantRetriever(collection_name="test_collection")
189
+ test_agent = OpenRouterAgent(model_name="gpt-4-test")
190
 
191
  # Mock the retrieval result
192
  mock_chunk = SourceChunkSchema(