Deltacorvi's picture
Upload 11 files
7224b0b verified
"""
Gemini AI Agent implementation.
Handles communication with Google's Gemini API and manages conversation context.
"""
import logging
import asyncio
from typing import List, Dict, Any, Optional
import google.generativeai as genai
from google.generativeai.types import HarmCategory, HarmBlockThreshold
from src.config import Config
from src.utils import sanitize_input, format_response
logger = logging.getLogger(__name__)
class GeminiAgent:
"""
AI Agent powered by Google's Gemini 1.5 Flash model.
This agent is designed to handle complex, multi-modal questions including:
- Text analysis and reasoning
- Mathematical computations
- Research tasks requiring web search
- Data analysis and interpretation
- Creative problem solving
"""
def __init__(self, config: Config):
"""
Initialize the Gemini Agent.
Args:
config: Configuration object containing API keys and settings
"""
self.config = config
self.conversation_history: List[Dict[str, str]] = []
self._initialize_client()
def _initialize_client(self):
"""
Initialize the Gemini API client with safety settings.
"""
try:
genai.configure(api_key=self.config.gemini_api_key)
# Configure safety settings for research and analysis tasks
safety_settings = {
HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,
HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
}
# Initialize the model
self.model = genai.GenerativeModel(
model_name=self.config.model_name,
safety_settings=safety_settings
)
logger.info(f"Gemini client initialized with model: {self.config.model_name}")
except Exception as e:
logger.error(f"Failed to initialize Gemini client: {e}")
raise
def _build_system_prompt(self) -> str:
"""
Build the system prompt that defines the agent's capabilities and role.
Returns:
str: System prompt for the agent
"""
return """You are an advanced AI research assistant specialized in answering complex, multi-faceted questions that may require:
- Deep research and fact-checking
- Mathematical calculations and logical reasoning
- Analysis of multimedia content (images, videos, audio)
- Data interpretation and statistical analysis
- Cross-referencing multiple sources
- Handling ambiguous or challenging queries
Key capabilities:
1. **Research Excellence**: Thoroughly investigate topics using available information
2. **Multi-modal Analysis**: Process and analyze images, videos, and audio content
3. **Mathematical Reasoning**: Perform calculations and logical deductions
4. **Data Analysis**: Interpret tables, charts, and datasets
5. **Fact Verification**: Cross-check information for accuracy
6. **Creative Problem Solving**: Approach unusual questions with innovative methods
Guidelines:
- Provide detailed, well-researched answers
- Show your reasoning process step-by-step
- If information is incomplete, clearly state assumptions
- For complex calculations, break down the steps
- When analyzing media, describe what you observe
- Always strive for accuracy over speed
- If uncertain, express confidence levels
Remember: You excel at handling challenging questions that require deep thinking and research."""
async def process_question(self, question: str, context: Optional[str] = None) -> str:
"""
Process a question and generate a response using Gemini.
Args:
question: The user's question
context: Optional additional context
Returns:
str: The agent's response
"""
try:
# Sanitize input
clean_question = sanitize_input(question)
# Build the full prompt
system_prompt = self._build_system_prompt()
# Prepare conversation context
context_str = ""
if self.conversation_history:
context_str = "\n\nPrevious conversation context:\n"
for entry in self.conversation_history[-3:]: # Last 3 exchanges
context_str += f"Q: {entry['question']}\nA: {entry['answer'][:200]}...\n"
# Build final prompt
full_prompt = f"{system_prompt}\n\n"
if context:
full_prompt += f"Additional Context: {context}\n\n"
full_prompt += f"{context_str}\nCurrent Question: {clean_question}\n\nResponse:"
# Generate response
response = self.model.generate_content(
full_prompt,
generation_config=genai.types.GenerationConfig(
max_output_tokens=self.config.max_tokens,
temperature=self.config.temperature,
)
)
# Extract and format response
if response.text:
formatted_response = format_response(response.text)
# Update conversation history
self._update_history(clean_question, formatted_response)
logger.info(f"Successfully processed question: {clean_question[:50]}...")
return formatted_response
else:
logger.warning("Received empty response from Gemini")
return "I apologize, but I couldn't generate a response to your question. Please try rephrasing it."
except Exception as e:
logger.error(f"Error processing question: {e}")
return f"I encountered an error while processing your question: {str(e)}"
def _update_history(self, question: str, answer: str):
"""
Update the conversation history.
Args:
question: The user's question
answer: The agent's response
"""
self.conversation_history.append({
'question': question,
'answer': answer
})
# Keep only recent history
if len(self.conversation_history) > self.config.max_history_length:
self.conversation_history = self.conversation_history[-self.config.max_history_length:]
def clear_history(self):
"""
Clear the conversation history.
"""
self.conversation_history.clear()
logger.info("Conversation history cleared")
def get_stats(self) -> Dict[str, Any]:
"""
Get agent statistics.
Returns:
Dict containing agent statistics
"""
return {
'model': self.config.model_name,
'conversation_length': len(self.conversation_history),
'max_tokens': self.config.max_tokens,
'temperature': self.config.temperature
}