Spaces:

sanilahmed2019
/

backend-deploy

Sleeping

App Files Files Community

sanilahmed2019 commited on Jan 2

Commit

2ade705

1 Parent(s): f850e5a

Redeploy backend

Browse files

Files changed (16) hide show

.README.md.swp +0 -0
.env +8 -9
.env.example +6 -7
backend.log +0 -0
book_ingestor.egg-info/PKG-INFO +24 -49
rag_agent_api/README.md +9 -9
rag_agent_api/__init__.py +2 -2
rag_agent_api/__pycache__/__init__.cpython-313.pyc +0 -0
rag_agent_api/__pycache__/config.cpython-313.pyc +0 -0
rag_agent_api/__pycache__/main.cpython-313.pyc +0 -0
rag_agent_api/__pycache__/openrouter_agent.cpython-313.pyc +0 -0
rag_agent_api/config.py +4 -1
rag_agent_api/main.py +10 -10
rag_agent_api/{agent.py → openrouter_agent.py} +50 -51
requirements.txt +1 -1
tests/test_integration.py +21 -18

.README.md.swp ADDED Viewed

Binary file (1.02 kB). View file

.env CHANGED Viewed

@@ -1,20 +1,19 @@
-COHERE_API_KEY="Dq2dLJlwDOZwAg4K7XalSEC91kXnucGd52KmkJh7"
-QDRANT_URL="https://72888a6e-0dfc-4620-bf85-0b9025951e0c.us-east4-0.gcp.cloud.qdrant.io:6333"
-QDRANT_API_KEY="eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhY2Nlc3MiOiJtIn0.BDBAtGf7x_XGCu3lO4-kNxgJeVgnSTKUjHeZBT6qJkQ"
-REACT_APP_RAG_API_URL="http://localhost:8000"
 # RAG Agent and API Layer Environment Variables
 # OpenAI API Configuration
-OPENAI_API_KEY="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"
-GEMINI_API_KEY="AIzaSyDM79Xi6rsffqHrwVOlc3FwPnk9pwBh9OI"
 # Qdrant Configuration
-QDRANT_URL="https://72888a6e-0dfc-4620-bf85-0b9025951e0c.us-east4-0.gcp.cloud.qdrant.io:6333"
-QDRANT_API_KEY="eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhY2Nlc3MiOiJtIn0.BDBAtGf7x_XGCu3lO4-kNxgJeVgnSTKUjHeZBT6qJkQ"
 QDRANT_COLLECTION_NAME=rag_embedding
 # Cohere Configuration (for query embeddings)
-COHERE_API_KEY="Dq2dLJlwDOZwAg4K7XalSEC91kXnucGd52KmkJh7"
 # Application Configuration
 DEFAULT_CONTEXT_WINDOW=5

+COHERE_API_KEY=Dq2dLJlwDOZwAg4K7XalSEC91kXnucGd52KmkJh7
+QDRANT_URL=https://72888a6e-0dfc-4620-bf85-0b9025951e0c.us-east4-0.gcp.cloud.qdrant.io:6333
+QDRANT_API_KEY=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhY2Nlc3MiOiJtIn0.BDBAtGf7x_XGCu3lO4-kNxgJeVgnSTKUjHeZBT6qJkQ
+REACT_APP_RAG_API_URL=http://localhost:8000
 # RAG Agent and API Layer Environment Variables
 # OpenAI API Configuration
+OPENROUTER_API_KEY=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
 # Qdrant Configuration
+QDRANT_URL=https://72888a6e-0dfc-4620-bf85-0b9025951e0c.us-east4-0.gcp.cloud.qdrant.io:6333
+QDRANT_API_KEY=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhY2Nlc3MiOiJtIn0.BDBAtGf7x_XGCu3lO4-kNxgJeVgnSTKUjHeZBT6qJkQ
 QDRANT_COLLECTION_NAME=rag_embedding
 # Cohere Configuration (for query embeddings)
+COHERE_API_KEY=Dq2dLJlwDOZwAg4K7XalSEC91kXnucGd52KmkJh7
 # Application Configuration
 DEFAULT_CONTEXT_WINDOW=5

.env.example CHANGED Viewed

@@ -1,15 +1,14 @@
 # RAG Agent and API Layer Environment Variables
-# OpenAI API Configuration
-OPENAI_API_KEY="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"
-GEMINI_API_KEY="AIzaSyDM79Xi6rsffqHrwVOlc3FwPnk9pwBh9OI"
 # Qdrant Configuration
-QDRANT_URL="https://72888a6e-0dfc-4620-bf85-0b9025951e0c.us-east4-0.gcp.cloud.qdrant.io:6333"
-QDRANT_API_KEY="eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhY2Nlc3MiOiJtIn0.BDBAtGf7x_XGCu3lO4-kNxgJeVgnSTKUjHeZBT6qJkQ"
 QDRANT_COLLECTION_NAME=rag_embedding
-REACT_APP_RAG_API_URL="http://localhost:8000"
 # Cohere Configuration (for query embeddings)
-COHERE_API_KEY="Dq2dLJlwDOZwAg4K7XalSEC91kXnucGd52KmkJh7"
 # Application Configuration
 DEFAULT_CONTEXT_WINDOW=5

 # RAG Agent and API Layer Environment Variables
+# OpenRouter API Configuration
+OPENROUTER_API_KEY=your-openrouter-api-key-here
 # Qdrant Configuration
+QDRANT_URL=https://72888a6e-0dfc-4620-bf85-0b9025951e0c.us-east4-0.gcp.cloud.qdrant.io:6333
+QDRANT_API_KEY=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhY2Nlc3MiOiJtIn0.BDBAtGf7x_XGCu3lO4-kNxgJeVgnSTKUjHeZBT6qJkQ
 QDRANT_COLLECTION_NAME=rag_embedding
+REACT_APP_RAG_API_URL=http://localhost:8000
 # Cohere Configuration (for query embeddings)
+COHERE_API_KEY=Dq2dLJlwDOZwAg4K7XalSEC91kXnucGd52KmkJh7
 # Application Configuration
 DEFAULT_CONTEXT_WINDOW=5

backend.log CHANGED Viewed

The diff for this file is too large to render. See raw diff

book_ingestor.egg-info/PKG-INFO CHANGED Viewed

@@ -14,60 +14,35 @@ Requires-Dist: uvicorn>=0.24.0
 Requires-Dist: openai>=1.0.0
 Requires-Dist: pydantic>=2.0.0
-# Book Content Ingestor & RAG Verification
-A system to extract content from Docusaurus-based book websites, chunk and embed it using Cohere, store embeddings in Qdrant Cloud for RAG applications, and verify the retrieval pipeline functionality.
-## Setup
-1. Install dependencies using uv:
-```bash
-cd backend
-uv sync
-```
-2. Create a `.env` file with your API keys:
-```bash
-cp .env.example .env
-# Edit .env with your actual API keys
-```
-## Environment Variables
-- `COHERE_API_KEY`: Your Cohere API key
-- `QDRANT_URL`: Your Qdrant Cloud URL
-- `QDRANT_API_KEY`: Your Qdrant API key
-- `QDRANT_COLLECTION_NAME`: Name of the collection to use (default: "rag_embedding")
-## Usage
-### Run the ingestion pipeline:
-```bash
-cd backend
-uv run python main.py
-```
-This will:
-1. Collect all URLs from the target book (https://sanilahmed.github.io/hackathon-ai-book/)
-2. Extract text content from each URL
-3. Chunk the content into fixed-size segments
-4. Generate embeddings using Cohere
-5. Store embeddings with metadata in Qdrant Cloud collection named "rag_embedding"
-### Run the verification pipeline:
-```bash
-cd backend
-python -m verify_retrieval.main
-```
-Or with specific options:
-```bash
-python -m verify_retrieval.main --query "transformer architecture in NLP" --top-k 10
-```
-The verification system will:
-1. Load vectors and metadata stored in Qdrant from the original ingestion
-2. Implement retrieval functions to query Qdrant using sample keywords or phrases
-3. Validate that retrieved chunks are accurate and relevant
-4. Check that metadata (URL, title, chunk_id) matches source content
-5. Log results and confirm the pipeline executes end-to-end without errors

 Requires-Dist: openai>=1.0.0
 Requires-Dist: pydantic>=2.0.0
+---
+title: Backend Deploy
+emoji: 🚀
+colorFrom: blue
+colorTo: purple
+sdk: docker
+pinned: false
+---
+# RAG Agent and API Layer
+This is a FastAPI application that provides a question-answering API using Gemini agents and Qdrant retrieval for RAG (Retrieval Augmented Generation) functionality.
+## API Endpoints
+- `GET /` - Root endpoint with API information
+- `POST /ask` - Main question-answering endpoint
+- `GET /health` - Health check endpoint
+- `GET /ready` - Readiness check endpoint
+- `/docs` - API documentation (Swagger UI)
+- `/redoc` - API documentation (Redoc)
+## Configuration
+The application requires the following environment variables:
+- `GEMINI_API_KEY` - API key for Google Gemini
+- `QDRANT_URL` - URL for Qdrant vector database
+- `QDRANT_API_KEY` - API key for Qdrant database
+## Deployment
+This application is configured for deployment on Hugging Face Spaces using Docker.

rag_agent_api/README.md CHANGED Viewed

@@ -1,17 +1,17 @@
 # RAG Agent and API Layer
-A FastAPI-based question-answering system that uses OpenAI Agents and Qdrant retrieval to generate grounded responses based on book content.
 ## Overview
-The RAG Agent and API Layer provides a question-answering API that retrieves relevant content from Qdrant and uses an OpenAI agent to generate accurate, source-grounded responses. The system ensures that all answers are based only on the provided context to prevent hallucinations.
 ## Architecture
 The system consists of several key components:
 - **FastAPI Application**: Main entry point for the question-answering API
-- **OpenAI Agent**: Generates responses based on retrieved context
 - **Qdrant Retriever**: Retrieves relevant content chunks from Qdrant database
 - **Configuration Manager**: Handles environment variables and settings
 - **Data Models**: Pydantic models for API requests/responses
@@ -22,7 +22,7 @@ The system consists of several key components:
 ### Prerequisites
 - Python 3.9+
-- OpenAI API key
 - Qdrant Cloud instance with book content embeddings
 - Cohere API key (for query embeddings)
@@ -42,7 +42,7 @@ The system consists of several key components:
 3. Edit `.env` with your API keys and configuration:
    ```env
-   OPENAI_API_KEY=your-openai-api-key-here
    QDRANT_URL=your-qdrant-instance-url
    QDRANT_API_KEY=your-qdrant-api-key
    QDRANT_COLLECTION_NAME=rag_embedding
@@ -103,7 +103,7 @@ Root endpoint with API information.
 ### Environment Variables
-- `OPENAI_API_KEY`: Your OpenAI API key
 - `QDRANT_URL`: URL of your Qdrant instance
 - `QDRANT_API_KEY`: Your Qdrant API key
 - `QDRANT_COLLECTION_NAME`: Name of the collection with book embeddings (default: `rag_embedding`)
@@ -123,8 +123,8 @@ Pydantic models for API request/response schemas.
 ### Schemas (`schemas.py`)
 Additional schemas for internal data structures.
-### Agent (`agent.py`)
-OpenAI agent implementation with context injection and response validation.
 ### Retrieval (`retrieval.py`)
 Qdrant integration for content retrieval with semantic search.
@@ -160,7 +160,7 @@ pytest
 # Run specific test files
 pytest tests/test_api.py
-pytest tests/test_agent.py
 pytest tests/test_retrieval.py
 ```

 # RAG Agent and API Layer
+A FastAPI-based question-answering system that uses OpenRouter Agents and Qdrant retrieval to generate grounded responses based on book content.
 ## Overview
+The RAG Agent and API Layer provides a question-answering API that retrieves relevant content from Qdrant and uses an OpenRouter agent to generate accurate, source-grounded responses. The system ensures that all answers are based only on the provided context to prevent hallucinations.
 ## Architecture
 The system consists of several key components:
 - **FastAPI Application**: Main entry point for the question-answering API
+- **OpenRouter Agent**: Generates responses based on retrieved context
 - **Qdrant Retriever**: Retrieves relevant content chunks from Qdrant database
 - **Configuration Manager**: Handles environment variables and settings
 - **Data Models**: Pydantic models for API requests/responses
 ### Prerequisites
 - Python 3.9+
+- OpenRouter API key
 - Qdrant Cloud instance with book content embeddings
 - Cohere API key (for query embeddings)
 3. Edit `.env` with your API keys and configuration:
    ```env
+   OPENROUTER_API_KEY=your-openrouter-api-key-here
    QDRANT_URL=your-qdrant-instance-url
    QDRANT_API_KEY=your-qdrant-api-key
    QDRANT_COLLECTION_NAME=rag_embedding
 ### Environment Variables
+- `OPENROUTER_API_KEY`: Your OpenRouter API key
 - `QDRANT_URL`: URL of your Qdrant instance
 - `QDRANT_API_KEY`: Your Qdrant API key
 - `QDRANT_COLLECTION_NAME`: Name of the collection with book embeddings (default: `rag_embedding`)
 ### Schemas (`schemas.py`)
 Additional schemas for internal data structures.
+### Agent (`openrouter_agent.py`)
+OpenRouter agent implementation with context injection and response validation.
 ### Retrieval (`retrieval.py`)
 Qdrant integration for content retrieval with semantic search.
 # Run specific test files
 pytest tests/test_api.py
+pytest tests/test_openrouter_agent.py
 pytest tests/test_retrieval.py
 ```

rag_agent_api/__init__.py CHANGED Viewed

@@ -10,7 +10,7 @@ __license__ = "MIT"
 # Import main components for easy access
 from .main import app
 from .config import Config, get_config, validate_config
-from .agent import GeminiAgent
 from .retrieval import QdrantRetriever
 # Define what gets imported with "from rag_agent_api import *"
@@ -19,6 +19,6 @@ __all__ = [
     "Config",
     "get_config",
     "validate_config",
-    "GeminiAgent",
     "QdrantRetriever"
 ]

 # Import main components for easy access
 from .main import app
 from .config import Config, get_config, validate_config
+from .openrouter_agent import OpenRouterAgent
 from .retrieval import QdrantRetriever
 # Define what gets imported with "from rag_agent_api import *"
     "Config",
     "get_config",
     "validate_config",
+    "OpenRouterAgent",
     "QdrantRetriever"
 ]

rag_agent_api/__pycache__/__init__.cpython-313.pyc CHANGED Viewed

Binary files a/rag_agent_api/__pycache__/__init__.cpython-313.pyc and b/rag_agent_api/__pycache__/__init__.cpython-313.pyc differ

rag_agent_api/__pycache__/config.cpython-313.pyc CHANGED Viewed

Binary files a/rag_agent_api/__pycache__/config.cpython-313.pyc and b/rag_agent_api/__pycache__/config.cpython-313.pyc differ

rag_agent_api/__pycache__/main.cpython-313.pyc CHANGED Viewed

Binary files a/rag_agent_api/__pycache__/main.cpython-313.pyc and b/rag_agent_api/__pycache__/main.cpython-313.pyc differ

rag_agent_api/__pycache__/openrouter_agent.cpython-313.pyc ADDED Viewed

Binary file (12.7 kB). View file

rag_agent_api/config.py CHANGED Viewed

@@ -20,8 +20,8 @@ class Config:
     def __init__(self):
         """Initialize configuration by loading environment variables."""
         self.openai_api_key = os.getenv('OPENAI_API_KEY')
-        self.gemini_api_key = os.getenv('GEMINI_API_KEY')
         self.cohere_api_key = os.getenv('COHERE_API_KEY')
         self.qdrant_url = os.getenv('QDRANT_URL')
         self.qdrant_api_key = os.getenv('QDRANT_API_KEY')
         self.qdrant_collection_name = os.getenv('QDRANT_COLLECTION_NAME', 'rag_embedding')
@@ -38,6 +38,9 @@ class Config:
         """
         errors = []
         if not self.cohere_api_key:
             errors.append("COHERE_API_KEY environment variable not set")

     def __init__(self):
         """Initialize configuration by loading environment variables."""
         self.openai_api_key = os.getenv('OPENAI_API_KEY')
         self.cohere_api_key = os.getenv('COHERE_API_KEY')
+        self.openrouter_api_key = os.getenv('OPENROUTER_API_KEY')
         self.qdrant_url = os.getenv('QDRANT_URL')
         self.qdrant_api_key = os.getenv('QDRANT_API_KEY')
         self.qdrant_collection_name = os.getenv('QDRANT_COLLECTION_NAME', 'rag_embedding')
         """
         errors = []
+        if not self.openrouter_api_key:
+            errors.append("OPENROUTER_API_KEY environment variable not set")
         if not self.cohere_api_key:
             errors.append("COHERE_API_KEY environment variable not set")

rag_agent_api/main.py CHANGED Viewed

@@ -13,7 +13,7 @@ from .config import validate_config, get_config
 from .models import QueryRequest, APIResponse, ErrorResponse, HealthResponse
 from .schemas import AgentResponse, AgentContext
 from .utils import setup_logging, generate_response_id, format_timestamp, create_error_response
-from .agent import GeminiAgent
 from .retrieval import QdrantRetriever
@@ -41,7 +41,7 @@ setup_logging(config.log_level)
 # Global instances
 retriever: Optional[QdrantRetriever] = None
-agent: Optional[GeminiAgent] = None
 @app.on_event("startup")
@@ -56,10 +56,10 @@ async def startup_event():
     # Initialize agent first (this doesn't require async initialization)
     try:
-        agent = GeminiAgent()
-        logging.info("Google Gemini agent initialized successfully")
     except Exception as e:
-        logging.error(f"Failed to initialize Google Gemini agent: {e}")
         raise
     # Initialize retriever (async operations will be handled in the methods themselves)
@@ -82,22 +82,22 @@ async def health_check() -> HealthResponse:
         HealthResponse with status of services
     """
     # Check if all required components are initialized
-    gemini_status = "up" if agent else "down"
     qdrant_status = "up" if retriever else "down"
     agent_status = "up" if agent else "down"
     # Determine overall status
     overall_status = "healthy"
-    if gemini_status == "down" or qdrant_status == "down":
         overall_status = "unhealthy"
-    elif gemini_status == "degraded" or qdrant_status == "degraded":
         overall_status = "degraded"
     return HealthResponse(
         status=overall_status,
         timestamp=format_timestamp(),
         services={
-            "gemini": gemini_status,
             "qdrant": qdrant_status,
             "agent": agent_status
         }
@@ -194,7 +194,7 @@ async def root() -> Dict[str, Any]:
     return {
         "message": "RAG Agent and API Layer",
         "version": "1.0.0",
-        "description": "Question-answering API using OpenAI Agents and Qdrant retrieval",
         "endpoints": {
             "POST /ask": "Main question-answering endpoint",
             "GET /health": "Health check endpoint",

 from .models import QueryRequest, APIResponse, ErrorResponse, HealthResponse
 from .schemas import AgentResponse, AgentContext
 from .utils import setup_logging, generate_response_id, format_timestamp, create_error_response
+from .openrouter_agent import OpenRouterAgent
 from .retrieval import QdrantRetriever
 # Global instances
 retriever: Optional[QdrantRetriever] = None
+agent: Optional[OpenRouterAgent] = None
 @app.on_event("startup")
     # Initialize agent first (this doesn't require async initialization)
     try:
+        agent = OpenRouterAgent()
+        logging.info("OpenRouter agent initialized successfully")
     except Exception as e:
+        logging.error(f"Failed to initialize OpenRouter agent: {e}")
         raise
     # Initialize retriever (async operations will be handled in the methods themselves)
         HealthResponse with status of services
     """
     # Check if all required components are initialized
+    openrouter_status = "up" if agent else "down"
     qdrant_status = "up" if retriever else "down"
     agent_status = "up" if agent else "down"
     # Determine overall status
     overall_status = "healthy"
+    if openrouter_status == "down" or qdrant_status == "down":
         overall_status = "unhealthy"
+    elif openrouter_status == "degraded" or qdrant_status == "degraded":
         overall_status = "degraded"
     return HealthResponse(
         status=overall_status,
         timestamp=format_timestamp(),
         services={
+            "openrouter": openrouter_status,
             "qdrant": qdrant_status,
             "agent": agent_status
         }
     return {
         "message": "RAG Agent and API Layer",
         "version": "1.0.0",
+        "description": "Question-answering API using OpenRouter Agents and Qdrant retrieval",
         "endpoints": {
             "POST /ask": "Main question-answering endpoint",
             "GET /health": "Health check endpoint",

rag_agent_api/{agent.py → openrouter_agent.py} RENAMED Viewed

@@ -1,44 +1,41 @@
 """
-Google Gemini Agent module for the RAG Agent and API Layer system.
-This module provides functionality for creating and managing a Google Gemini agent
 that generates responses based on retrieved context.
 """
 import asyncio
 import logging
 from typing import List, Dict, Any, Optional
-import google.generativeai as genai
 from .config import get_config
 from .schemas import AgentContext, AgentResponse, SourceChunkSchema
 from .utils import format_confidence_score
-class GeminiAgent:
     """
-    A class to manage the Google Gemini agent for generating responses based on context.
     """
-    def __init__(self, model_name: str = "gemini-2.5-flash"):
         """
-        Initialize the Google Gemini agent with configuration.
         Args:
-            model_name: Name of the Gemini model to use (default: gemini-2.5-flash)
         """
         config = get_config()
-        api_key = config.gemini_api_key
         if not api_key:
-            raise ValueError("GEMINI_API_KEY environment variable not set")
-        # Configure the Gemini client
-        genai.configure(api_key=api_key)
-        # Create the generative model instance
-        self.model = genai.GenerativeModel(model_name)
         self.model_name = model_name
         self.default_temperature = config.default_temperature
-        logging.info(f"Gemini agent initialized with model: {model_name}")
     async def generate_response(self, context: AgentContext) -> AgentResponse:
         """
@@ -80,23 +77,43 @@ class GeminiAgent:
             # Prepare the user message with the query
             user_message = self._create_user_message(context)
-            # For Google Gemini, we need to format the prompt differently
-            # Combine system instructions and user query
-            full_prompt = f"{system_message}\n\n{user_message}"
-            # Generate response from Google Gemini
-            # For async generation, we need to use the appropriate async method
-            chat = self.model.start_chat()
-            response = await chat.send_message_async(
-                full_prompt,
-                generation_config={
-                    "temperature": context.source_policy if hasattr(context, 'temperature') else self.default_temperature,
-                    "max_output_tokens": 1000
                 }
-            )
-            # Extract the response text
-            raw_response = response.text if response and hasattr(response, 'text') else str(response)
             # If the response indicates no information was found, return the exact message
             if "I could not find this information in the book" in raw_response:
@@ -134,7 +151,7 @@ class GeminiAgent:
             return agent_response
         except Exception as e:
-            logging.error(f"Error generating response from Google Gemini agent: {e}", exc_info=True)
             # Return the specific message when there's an error
             return AgentResponse(
                 raw_response="I could not find this information in the book.",
@@ -197,20 +214,6 @@ QUESTION:
         return "\n".join(formatted_chunks)
-    def _create_context_messages(self, context: AgentContext) -> List[Dict[str, str]]:
-        """
-        Create context messages from the retrieved chunks.
-        With the new format, context is now provided in the user message,
-        so this method returns an empty list to avoid duplication.
-        Args:
-            context: AgentContext containing the query and retrieved context chunks
-        Returns:
-            Empty list since context is now in user message
-        """
-        return []
     def _identify_used_sources(self, response: str, chunks: List[SourceChunkSchema]) -> List[str]:
         """
         Identify which sources were likely used in the response.
@@ -356,8 +359,4 @@ QUESTION:
                 return True
         # In a more sophisticated implementation, you'd validate against the context more rigorously
-        return True
-# Global agent instance (if needed)
-# agent_instance = OpenAIAgent()

 """
+OpenRouter Agent module for the RAG Agent and API Layer system.
+This module provides functionality for creating and managing an OpenRouter agent
 that generates responses based on retrieved context.
 """
 import asyncio
 import logging
 from typing import List, Dict, Any, Optional
+import httpx
 from .config import get_config
 from .schemas import AgentContext, AgentResponse, SourceChunkSchema
 from .utils import format_confidence_score
+class OpenRouterAgent:
     """
+    A class to manage the OpenRouter agent for generating responses based on context.
     """
+    def __init__(self, model_name: str = "arcee-ai/trinity-mini:free"):
         """
+        Initialize the OpenRouter agent with configuration.
         Args:
+            model_name: Name of the OpenRouter model to use (default: arcee-ai/trinity-mini:free)
         """
         config = get_config()
+        api_key = config.openrouter_api_key
         if not api_key:
+            raise ValueError("OPENROUTER_API_KEY environment variable not set")
+        self.api_key = api_key
         self.model_name = model_name
+        self.base_url = "https://openrouter.ai/api/v1"
         self.default_temperature = config.default_temperature
+        logging.info(f"OpenRouter agent initialized with model: {model_name}")
     async def generate_response(self, context: AgentContext) -> AgentResponse:
         """
             # Prepare the user message with the query
             user_message = self._create_user_message(context)
+            # Prepare the payload for OpenRouter API
+            payload = {
+                "model": self.model_name,
+                "messages": [
+                    {"role": "system", "content": system_message},
+                    {"role": "user", "content": user_message}
+                ],
+                "temperature": context.source_policy if hasattr(context, 'temperature') else self.default_temperature,
+                "max_tokens": 1000
+            }
+            # Make the API call to OpenRouter
+            async with httpx.AsyncClient(timeout=30.0) as client:
+                headers = {
+                    "Authorization": f"Bearer {self.api_key}",
+                    "Content-Type": "application/json"
                 }
+                response = await client.post(
+                    f"{self.base_url}/chat/completions",
+                    json=payload,
+                    headers=headers
+                )
+                if response.status_code != 200:
+                    logging.error(f"OpenRouter API error: {response.status_code} - {response.text}")
+                    return AgentResponse(
+                        raw_response="I could not find this information in the book.",
+                        used_sources=[],
+                        confidence_score=0.0,
+                        is_valid=False,
+                        validation_details=f"API error: {response.status_code}",
+                        unsupported_claims=[]
+                    )
+                response_data = response.json()
+                raw_response = response_data["choices"][0]["message"]["content"]
             # If the response indicates no information was found, return the exact message
             if "I could not find this information in the book" in raw_response:
             return agent_response
         except Exception as e:
+            logging.error(f"Error generating response from OpenRouter agent: {e}", exc_info=True)
             # Return the specific message when there's an error
             return AgentResponse(
                 raw_response="I could not find this information in the book.",
         return "\n".join(formatted_chunks)
     def _identify_used_sources(self, response: str, chunks: List[SourceChunkSchema]) -> List[str]:
         """
         Identify which sources were likely used in the response.
                 return True
         # In a more sophisticated implementation, you'd validate against the context more rigorously
+        return True

requirements.txt CHANGED Viewed

@@ -9,4 +9,4 @@ uvicorn>=0.24.0
 openai>=1.0.0
 pydantic>=2.0.0
 numpy>=1.21.0
-google-generativeai>=0.8.0

 openai>=1.0.0
 pydantic>=2.0.0
 numpy>=1.21.0
+httpx>=0.27.0

tests/test_integration.py CHANGED Viewed

@@ -7,7 +7,7 @@ from fastapi.testclient import TestClient
 from unittest.mock import Mock, patch, AsyncMock
 from rag_agent_api.main import app, retriever, agent
 from rag_agent_api.retrieval import QdrantRetriever
-from rag_agent_api.agent import OpenAIAgent
 from rag_agent_api.schemas import SourceChunkSchema, AgentResponse, AgentContext
@@ -17,13 +17,13 @@ def test_full_query_flow_with_mocked_components():
         'QDRANT_URL': 'http://test-qdrant:6333',
         'QDRANT_API_KEY': 'test-api-key',
         'COHERE_API_KEY': 'test-cohere-key',
-        'OPENAI_API_KEY': 'test-openai-key'
     }):
         with patch('rag_agent_api.main.QdrantRetriever') as mock_retriever_class:
-            with patch('rag_agent_api.main.OpenAIAgent') as mock_agent_class:
                 # Create mock instances
                 mock_retriever = Mock(spec=QdrantRetriever)
-                mock_agent = Mock(spec=OpenAIAgent)
                 # Configure the class mocks to return our instance mocks
                 mock_retriever_class.return_value = mock_retriever
@@ -84,11 +84,11 @@ async def test_agent_context_creation():
         'QDRANT_URL': 'http://test-qdrant:6333',
         'QDRANT_API_KEY': 'test-api-key',
         'COHERE_API_KEY': 'test-cohere-key',
-        'OPENAI_API_KEY': 'test-openai-key'
     }):
         with patch('rag_agent_api.retrieval.AsyncQdrantClient') as mock_qdrant_client:
             with patch('rag_agent_api.retrieval.cohere.Client') as mock_cohere_client:
-                with patch('rag_agent_api.agent.AsyncOpenAI'):
                     # Mock the Qdrant client
                     mock_qdrant_instance = Mock()
                     mock_qdrant_client.return_value = mock_qdrant_instance
@@ -101,7 +101,7 @@ async def test_agent_context_creation():
                     # Initialize components
                     retriever = QdrantRetriever(collection_name="test_collection")
-                    agent = OpenAIAgent(model_name="gpt-4-test")
                     # Create test chunks
                     test_chunk = SourceChunkSchema(
@@ -145,7 +145,7 @@ def test_health_endpoint_integration():
             assert "services" in data
             # Check that services status is included
-            assert "openai" in data["services"]
             assert "qdrant" in data["services"]
             assert "agent" in data["services"]
@@ -157,11 +157,11 @@ async def test_retrieval_and_agent_integration():
         'QDRANT_URL': 'http://test-qdrant:6333',
         'QDRANT_API_KEY': 'test-api-key',
         'COHERE_API_KEY': 'test-cohere-key',
-        'OPENAI_API_KEY': 'test-openai-key'
     }):
         with patch('rag_agent_api.retrieval.AsyncQdrantClient') as mock_qdrant_client:
             with patch('rag_agent_api.retrieval.cohere.Client') as mock_cohere_client:
-                with patch('rag_agent_api.agent.AsyncOpenAI') as mock_openai:
                     # Mock the Qdrant client
                     mock_qdrant_instance = Mock()
                     mock_qdrant_client.return_value = mock_qdrant_instance
@@ -172,18 +172,21 @@ async def test_retrieval_and_agent_integration():
                     mock_cohere_client.return_value = mock_cohere_instance
                     mock_cohere_instance.embed.return_value = Mock(embeddings=[[0.1, 0.2, 0.3]])
-                    # Mock the OpenAI client
-                    mock_openai_instance = Mock()
-                    mock_openai.return_value = mock_openai_instance
                     mock_completion = Mock()
-                    mock_completion.choices = [Mock()]
-                    mock_completion.choices[0].message = Mock()
-                    mock_completion.choices[0].message.content = "This is a test response"
-                    mock_openai_instance.chat.completions.create = AsyncMock(return_value=mock_completion)
                     # Initialize components
                     test_retriever = QdrantRetriever(collection_name="test_collection")
-                    test_agent = OpenAIAgent(model_name="gpt-4-test")
                     # Mock the retrieval result
                     mock_chunk = SourceChunkSchema(

 from unittest.mock import Mock, patch, AsyncMock
 from rag_agent_api.main import app, retriever, agent
 from rag_agent_api.retrieval import QdrantRetriever
+from rag_agent_api.openrouter_agent import OpenRouterAgent
 from rag_agent_api.schemas import SourceChunkSchema, AgentResponse, AgentContext
         'QDRANT_URL': 'http://test-qdrant:6333',
         'QDRANT_API_KEY': 'test-api-key',
         'COHERE_API_KEY': 'test-cohere-key',
+        'OPENROUTER_API_KEY': 'test-openrouter-key'
     }):
         with patch('rag_agent_api.main.QdrantRetriever') as mock_retriever_class:
+            with patch('rag_agent_api.main.OpenRouterAgent') as mock_agent_class:
                 # Create mock instances
                 mock_retriever = Mock(spec=QdrantRetriever)
+                mock_agent = Mock(spec=OpenRouterAgent)
                 # Configure the class mocks to return our instance mocks
                 mock_retriever_class.return_value = mock_retriever
         'QDRANT_URL': 'http://test-qdrant:6333',
         'QDRANT_API_KEY': 'test-api-key',
         'COHERE_API_KEY': 'test-cohere-key',
+        'OPENROUTER_API_KEY': 'test-openrouter-key'
     }):
         with patch('rag_agent_api.retrieval.AsyncQdrantClient') as mock_qdrant_client:
             with patch('rag_agent_api.retrieval.cohere.Client') as mock_cohere_client:
+                with patch('rag_agent_api.openrouter_agent.httpx.AsyncClient'):
                     # Mock the Qdrant client
                     mock_qdrant_instance = Mock()
                     mock_qdrant_client.return_value = mock_qdrant_instance
                     # Initialize components
                     retriever = QdrantRetriever(collection_name="test_collection")
+                    agent = OpenRouterAgent(model_name="gpt-4-test")
                     # Create test chunks
                     test_chunk = SourceChunkSchema(
             assert "services" in data
             # Check that services status is included
+            assert "openrouter" in data["services"]
             assert "qdrant" in data["services"]
             assert "agent" in data["services"]
         'QDRANT_URL': 'http://test-qdrant:6333',
         'QDRANT_API_KEY': 'test-api-key',
         'COHERE_API_KEY': 'test-cohere-key',
+        'OPENROUTER_API_KEY': 'test-openrouter-key'
     }):
         with patch('rag_agent_api.retrieval.AsyncQdrantClient') as mock_qdrant_client:
             with patch('rag_agent_api.retrieval.cohere.Client') as mock_cohere_client:
+                with patch('rag_agent_api.openrouter_agent.httpx.AsyncClient') as mock_httpx_client:
                     # Mock the Qdrant client
                     mock_qdrant_instance = Mock()
                     mock_qdrant_client.return_value = mock_qdrant_instance
                     mock_cohere_client.return_value = mock_cohere_instance
                     mock_cohere_instance.embed.return_value = Mock(embeddings=[[0.1, 0.2, 0.3]])
+                    # Mock the httpx client for OpenRouter
+                    mock_httpx_instance = Mock()
+                    mock_httpx_client.return_value.__aenter__.return_value = mock_httpx_instance
                     mock_completion = Mock()
+                    mock_completion.json.return_value = {
+                        "choices": [
+                            {"message": {"content": "This is a test response"}}
+                        ]
+                    }
+                    mock_httpx_instance.post = AsyncMock(return_value=mock_completion)
+                    mock_httpx_instance.post.return_value.status_code = 200
                     # Initialize components
                     test_retriever = QdrantRetriever(collection_name="test_collection")
+                    test_agent = OpenRouterAgent(model_name="gpt-4-test")
                     # Mock the retrieval result
                     mock_chunk = SourceChunkSchema(