Spaces:
Sleeping
Sleeping
File size: 7,305 Bytes
7224b0b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 |
"""
Gemini AI Agent implementation.
Handles communication with Google's Gemini API and manages conversation context.
"""
import logging
import asyncio
from typing import List, Dict, Any, Optional
import google.generativeai as genai
from google.generativeai.types import HarmCategory, HarmBlockThreshold
from src.config import Config
from src.utils import sanitize_input, format_response
logger = logging.getLogger(__name__)
class GeminiAgent:
"""
AI Agent powered by Google's Gemini 1.5 Flash model.
This agent is designed to handle complex, multi-modal questions including:
- Text analysis and reasoning
- Mathematical computations
- Research tasks requiring web search
- Data analysis and interpretation
- Creative problem solving
"""
def __init__(self, config: Config):
"""
Initialize the Gemini Agent.
Args:
config: Configuration object containing API keys and settings
"""
self.config = config
self.conversation_history: List[Dict[str, str]] = []
self._initialize_client()
def _initialize_client(self):
"""
Initialize the Gemini API client with safety settings.
"""
try:
genai.configure(api_key=self.config.gemini_api_key)
# Configure safety settings for research and analysis tasks
safety_settings = {
HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,
HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
}
# Initialize the model
self.model = genai.GenerativeModel(
model_name=self.config.model_name,
safety_settings=safety_settings
)
logger.info(f"Gemini client initialized with model: {self.config.model_name}")
except Exception as e:
logger.error(f"Failed to initialize Gemini client: {e}")
raise
def _build_system_prompt(self) -> str:
"""
Build the system prompt that defines the agent's capabilities and role.
Returns:
str: System prompt for the agent
"""
return """You are an advanced AI research assistant specialized in answering complex, multi-faceted questions that may require:
- Deep research and fact-checking
- Mathematical calculations and logical reasoning
- Analysis of multimedia content (images, videos, audio)
- Data interpretation and statistical analysis
- Cross-referencing multiple sources
- Handling ambiguous or challenging queries
Key capabilities:
1. **Research Excellence**: Thoroughly investigate topics using available information
2. **Multi-modal Analysis**: Process and analyze images, videos, and audio content
3. **Mathematical Reasoning**: Perform calculations and logical deductions
4. **Data Analysis**: Interpret tables, charts, and datasets
5. **Fact Verification**: Cross-check information for accuracy
6. **Creative Problem Solving**: Approach unusual questions with innovative methods
Guidelines:
- Provide detailed, well-researched answers
- Show your reasoning process step-by-step
- If information is incomplete, clearly state assumptions
- For complex calculations, break down the steps
- When analyzing media, describe what you observe
- Always strive for accuracy over speed
- If uncertain, express confidence levels
Remember: You excel at handling challenging questions that require deep thinking and research."""
async def process_question(self, question: str, context: Optional[str] = None) -> str:
"""
Process a question and generate a response using Gemini.
Args:
question: The user's question
context: Optional additional context
Returns:
str: The agent's response
"""
try:
# Sanitize input
clean_question = sanitize_input(question)
# Build the full prompt
system_prompt = self._build_system_prompt()
# Prepare conversation context
context_str = ""
if self.conversation_history:
context_str = "\n\nPrevious conversation context:\n"
for entry in self.conversation_history[-3:]: # Last 3 exchanges
context_str += f"Q: {entry['question']}\nA: {entry['answer'][:200]}...\n"
# Build final prompt
full_prompt = f"{system_prompt}\n\n"
if context:
full_prompt += f"Additional Context: {context}\n\n"
full_prompt += f"{context_str}\nCurrent Question: {clean_question}\n\nResponse:"
# Generate response
response = self.model.generate_content(
full_prompt,
generation_config=genai.types.GenerationConfig(
max_output_tokens=self.config.max_tokens,
temperature=self.config.temperature,
)
)
# Extract and format response
if response.text:
formatted_response = format_response(response.text)
# Update conversation history
self._update_history(clean_question, formatted_response)
logger.info(f"Successfully processed question: {clean_question[:50]}...")
return formatted_response
else:
logger.warning("Received empty response from Gemini")
return "I apologize, but I couldn't generate a response to your question. Please try rephrasing it."
except Exception as e:
logger.error(f"Error processing question: {e}")
return f"I encountered an error while processing your question: {str(e)}"
def _update_history(self, question: str, answer: str):
"""
Update the conversation history.
Args:
question: The user's question
answer: The agent's response
"""
self.conversation_history.append({
'question': question,
'answer': answer
})
# Keep only recent history
if len(self.conversation_history) > self.config.max_history_length:
self.conversation_history = self.conversation_history[-self.config.max_history_length:]
def clear_history(self):
"""
Clear the conversation history.
"""
self.conversation_history.clear()
logger.info("Conversation history cleared")
def get_stats(self) -> Dict[str, Any]:
"""
Get agent statistics.
Returns:
Dict containing agent statistics
"""
return {
'model': self.config.model_name,
'conversation_length': len(self.conversation_history),
'max_tokens': self.config.max_tokens,
'temperature': self.config.temperature
} |