Spaces:

Deltacorvi
/

Gemini_AI_Research_Agent

Sleeping

File size: 7,305 Bytes

7224b0b

"""

Gemini AI Agent implementation.

Handles communication with Google's Gemini API and manages conversation context.

"""

import logging
import asyncio
from typing import List, Dict, Any, Optional
import google.generativeai as genai
from google.generativeai.types import HarmCategory, HarmBlockThreshold
from src.config import Config
from src.utils import sanitize_input, format_response


logger = logging.getLogger(__name__)


class GeminiAgent:
    """

    AI Agent powered by Google's Gemini 1.5 Flash model.



    This agent is designed to handle complex, multi-modal questions including:

    - Text analysis and reasoning

    - Mathematical computations

    - Research tasks requiring web search

    - Data analysis and interpretation

    - Creative problem solving

    """

    def __init__(self, config: Config):
        """

        Initialize the Gemini Agent.



        Args:

            config: Configuration object containing API keys and settings

        """
        self.config = config
        self.conversation_history: List[Dict[str, str]] = []
        self._initialize_client()

    def _initialize_client(self):
        """

        Initialize the Gemini API client with safety settings.

        """
        try:
            genai.configure(api_key=self.config.gemini_api_key)

            # Configure safety settings for research and analysis tasks
            safety_settings = {
                HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
                HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
                HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,
                HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
            }

            # Initialize the model
            self.model = genai.GenerativeModel(
                model_name=self.config.model_name,
                safety_settings=safety_settings
            )

            logger.info(f"Gemini client initialized with model: {self.config.model_name}")

        except Exception as e:
            logger.error(f"Failed to initialize Gemini client: {e}")
            raise

    def _build_system_prompt(self) -> str:
        """

        Build the system prompt that defines the agent's capabilities and role.



        Returns:

            str: System prompt for the agent

        """
        return """You are an advanced AI research assistant specialized in answering complex, multi-faceted questions that may require:



- Deep research and fact-checking

- Mathematical calculations and logical reasoning

- Analysis of multimedia content (images, videos, audio)

- Data interpretation and statistical analysis

- Cross-referencing multiple sources

- Handling ambiguous or challenging queries



Key capabilities:

1. **Research Excellence**: Thoroughly investigate topics using available information

2. **Multi-modal Analysis**: Process and analyze images, videos, and audio content

3. **Mathematical Reasoning**: Perform calculations and logical deductions

4. **Data Analysis**: Interpret tables, charts, and datasets

5. **Fact Verification**: Cross-check information for accuracy

6. **Creative Problem Solving**: Approach unusual questions with innovative methods



Guidelines:

- Provide detailed, well-researched answers

- Show your reasoning process step-by-step

- If information is incomplete, clearly state assumptions

- For complex calculations, break down the steps

- When analyzing media, describe what you observe

- Always strive for accuracy over speed

- If uncertain, express confidence levels



Remember: You excel at handling challenging questions that require deep thinking and research."""

    async def process_question(self, question: str, context: Optional[str] = None) -> str:
        """

        Process a question and generate a response using Gemini.



        Args:

            question: The user's question

            context: Optional additional context



        Returns:

            str: The agent's response

        """
        try:
            # Sanitize input
            clean_question = sanitize_input(question)

            # Build the full prompt
            system_prompt = self._build_system_prompt()

            # Prepare conversation context
            context_str = ""
            if self.conversation_history:
                context_str = "\n\nPrevious conversation context:\n"
                for entry in self.conversation_history[-3:]:  # Last 3 exchanges
                    context_str += f"Q: {entry['question']}\nA: {entry['answer'][:200]}...\n"

            # Build final prompt
            full_prompt = f"{system_prompt}\n\n"
            if context:
                full_prompt += f"Additional Context: {context}\n\n"
            full_prompt += f"{context_str}\nCurrent Question: {clean_question}\n\nResponse:"

            # Generate response
            response = self.model.generate_content(
                full_prompt,
                generation_config=genai.types.GenerationConfig(
                    max_output_tokens=self.config.max_tokens,
                    temperature=self.config.temperature,
                )
            )

            # Extract and format response
            if response.text:
                formatted_response = format_response(response.text)

                # Update conversation history
                self._update_history(clean_question, formatted_response)

                logger.info(f"Successfully processed question: {clean_question[:50]}...")
                return formatted_response
            else:
                logger.warning("Received empty response from Gemini")
                return "I apologize, but I couldn't generate a response to your question. Please try rephrasing it."

        except Exception as e:
            logger.error(f"Error processing question: {e}")
            return f"I encountered an error while processing your question: {str(e)}"

    def _update_history(self, question: str, answer: str):
        """

        Update the conversation history.



        Args:

            question: The user's question

            answer: The agent's response

        """
        self.conversation_history.append({
            'question': question,
            'answer': answer
        })

        # Keep only recent history
        if len(self.conversation_history) > self.config.max_history_length:
            self.conversation_history = self.conversation_history[-self.config.max_history_length:]

    def clear_history(self):
        """

        Clear the conversation history.

        """
        self.conversation_history.clear()
        logger.info("Conversation history cleared")

    def get_stats(self) -> Dict[str, Any]:
        """

        Get agent statistics.



        Returns:

            Dict containing agent statistics

        """
        return {
            'model': self.config.model_name,
            'conversation_length': len(self.conversation_history),
            'max_tokens': self.config.max_tokens,
            'temperature': self.config.temperature
        }