Final_Assignment_Template

Sleeping

File size: 6,719 Bytes

import os
import boto3
import json
from dotenv import load_dotenv
from video_parser import VideoParser
from excel_parser import ExcelParser
import re

load_dotenv()

class NovaProAgent:
    def __init__(self):
        print("NovaProAgent initialized.")
        
        # Get AWS credentials from environment variables
        aws_access_key_id = os.getenv('AWS_ACCESS_KEY_ID')
        aws_secret_access_key = os.getenv('AWS_SECRET_ACCESS_KEY')

        # Initialize the AWS client
        boto3.client(
            's3',
            aws_access_key_id=aws_access_key_id,
            aws_secret_access_key=aws_secret_access_key
        )
        session = boto3.session.Session()
        
        self.bedrock_client = boto3.client(
            service_name='bedrock-runtime',
            region_name='us-east-1'
        )

        self.model_id = "amazon.nova-pro-v1:0"
        self.content_type = "application/json"
        self.accept = "application/json"
        
        # Initialize parsers
        self.video_parser = VideoParser()
        self.excel_parser = ExcelParser()
        
    async def __call__(self, question: str) -> str:
        print(f"NovaProAgent received question (first 50 chars): {question}...")
        
        try:
            # Check if question involves video analysis
            if 'youtube.com' in question or 'video' in question.lower():
                return await self._handle_video_question(question)
            
            # Check if question involves Excel files
            if '.xlsx' in question or '.xls' in question or 'excel' in question.lower():
                return await self._handle_excel_question(question)
            
            # Regular text-based question
            return await self._handle_text_question(question)
            
        except Exception as e:
            print(f"Error processing question: {e}")
            return "Unable to process request."
    
    async def _handle_video_question(self, question: str) -> str:
        """Handle questions that require video analysis"""
        # Extract YouTube URL
        youtube_url = re.search(r'https://www\.youtube\.com/watch\?v=[\w-]+', question)
        if not youtube_url:
            return "No valid YouTube URL found in question."
        
        url = youtube_url.group()
        
        # Extract video ID for reference
        video_id = re.search(r'v=([\w-]+)', url).group(1)
        
        # Use Nova Pro to provide intelligent response about video analysis
        video_prompt = f"""User is asking about a YouTube video: {url}
Video ID: {video_id}
User question: {question}

Provide a helpful response about video analysis limitations and suggest alternatives."""
        
        payload = {
            "messages": [{
                "role": "user",
                "content": [{"text": video_prompt}]
            }],
            "inferenceConfig": {
                "max_new_tokens": 150,
                "temperature": 0.0
            }
        }
        
        try:
            response = self.bedrock_client.invoke_model(
                modelId=self.model_id,
                contentType=self.content_type,
                accept=self.accept,
                body=json.dumps(payload)
            )
            
            response_body = json.loads(response['body'].read())
            return response_body['output']['message']['content'][0]['text'].strip()
            
        except Exception as e:
            return f"Video ID: {video_id}. Direct video analysis unavailable due to access restrictions."
    
    async def _handle_excel_question(self, question: str) -> str:
        """Handle questions that require Excel file analysis"""
        # Extract file path from question if present
        file_patterns = [r'([A-Za-z]:\\[^\s]+\.xlsx?)', r'([^\s]+\.xlsx?)']
        file_path = None
        
        for pattern in file_patterns:
            match = re.search(pattern, question)
            if match:
                file_path = match.group(1)
                break
        
        if not file_path:
            return "Please provide Excel file path in your question."
        
        try:
            if 'sales' in question.lower() and 'food' in question.lower():
                results = self.excel_parser.analyze_sales_data(file_path)
                return results.get('total_food_sales', 'No sales data found')
            else:
                df = self.excel_parser.read_excel_file(file_path)
                return f"Excel file loaded with {len(df)} rows and {len(df.columns)} columns."
                
        except Exception as e:
            return f"Excel analysis failed: {str(e)}"
    
    async def _handle_text_question(self, question: str) -> str:
        """Handle regular text-based questions"""
        # Create a more focused prompt for concise answers
        prompt = f"""Answer this question directly and concisely. Provide only the essential information requested, not explanations or step-by-step reasoning unless specifically asked.

Question: {question}

Answer:"""
        
        # Prepare the request payload for Nova Pro
        payload = {
            "messages": [
                {
                    "role": "user",
                    "content": [{
                        "text": prompt
                    }]
                }
            ],
            "inferenceConfig": {
                "max_new_tokens": 250,
                "temperature": 0.0
            }
        }
        
        # Call Nova Pro model
        response = self.bedrock_client.invoke_model(
            modelId=self.model_id,
            contentType=self.content_type,
            accept=self.accept,
            body=json.dumps(payload)
        )
        
        # Parse response
        response_body = json.loads(response['body'].read())
        answer = response_body['output']['message']['content'][0]['text']
        
        # Clean up the answer
        answer = answer.strip()
        
        # Remove verbose beginnings
        verbose_starts = [
            "To answer this question",
            "Based on the information",
            "According to",
            "The answer is",
            "Looking at"
        ]
        
        for start in verbose_starts:
            if answer.lower().startswith(start.lower()):
                sentences = answer.split('. ')
                for sentence in sentences[1:]:
                    if len(sentence.strip()) > 10:
                        answer = sentence.strip()
                        break
        
        # Limit length
        if len(answer) > 200:
            sentences = answer.split('. ')
            answer = sentences[0] + '.'
        
        return answer