import os import boto3 import json from dotenv import load_dotenv from video_parser import VideoParser from excel_parser import ExcelParser import re load_dotenv() class NovaProAgent: def __init__(self): print("NovaProAgent initialized.") # Get AWS credentials from environment variables aws_access_key_id = os.getenv('AWS_ACCESS_KEY_ID') aws_secret_access_key = os.getenv('AWS_SECRET_ACCESS_KEY') # Initialize the AWS client boto3.client( 's3', aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key ) session = boto3.session.Session() self.bedrock_client = boto3.client( service_name='bedrock-runtime', region_name='us-east-1' ) self.model_id = "amazon.nova-pro-v1:0" self.content_type = "application/json" self.accept = "application/json" # Initialize parsers self.video_parser = VideoParser() self.excel_parser = ExcelParser() async def __call__(self, question: str) -> str: print(f"NovaProAgent received question (first 50 chars): {question}...") try: # Check if question involves video analysis if 'youtube.com' in question or 'video' in question.lower(): return await self._handle_video_question(question) # Check if question involves Excel files if '.xlsx' in question or '.xls' in question or 'excel' in question.lower(): return await self._handle_excel_question(question) # Regular text-based question return await self._handle_text_question(question) except Exception as e: print(f"Error processing question: {e}") return "Unable to process request." async def _handle_video_question(self, question: str) -> str: """Handle questions that require video analysis""" # Extract YouTube URL youtube_url = re.search(r'https://www\.youtube\.com/watch\?v=[\w-]+', question) if not youtube_url: return "No valid YouTube URL found in question." url = youtube_url.group() try: # Download video using VideoParser video_path = self.video_parser.download_youtube_video(url) # Extract frames for analysis frames = self.video_parser.analyze_video_frames(video_path, sample_rate=60) # Clean up self.video_parser.cleanup() return f"Analyzed {len(frames)} frames from video. Video processing complete." except Exception as e: return f"Video analysis failed: {str(e)}" async def _handle_excel_question(self, question: str) -> str: """Handle questions that require Excel file analysis""" # Extract file path from question if present file_patterns = [r'([A-Za-z]:\\[^\s]+\.xlsx?)', r'([^\s]+\.xlsx?)'] file_path = None for pattern in file_patterns: match = re.search(pattern, question) if match: file_path = match.group(1) break if not file_path: return "Please provide Excel file path in your question." try: if 'sales' in question.lower() and 'food' in question.lower(): results = self.excel_parser.analyze_sales_data(file_path) return results.get('total_food_sales', 'No sales data found') else: df = self.excel_parser.read_excel_file(file_path) return f"Excel file loaded with {len(df)} rows and {len(df.columns)} columns." except Exception as e: return f"Excel analysis failed: {str(e)}" async def _handle_text_question(self, question: str) -> str: """Handle regular text-based questions""" # Create a more focused prompt for concise answers prompt = f"""Answer this question directly and concisely. Provide only the essential information requested, not explanations or step-by-step reasoning unless specifically asked. Question: {question} Answer:""" # Prepare the request payload for Nova Pro payload = { "messages": [ { "role": "user", "content": [{ "text": prompt }] } ], "inferenceConfig": { "max_new_tokens": 250, "temperature": 0.0 } } # Call Nova Pro model response = self.bedrock_client.invoke_model( modelId=self.model_id, contentType=self.content_type, accept=self.accept, body=json.dumps(payload) ) # Parse response response_body = json.loads(response['body'].read()) answer = response_body['output']['message']['content'][0]['text'] # Clean up the answer answer = answer.strip() # Remove verbose beginnings verbose_starts = [ "To answer this question", "Based on the information", "According to", "The answer is", "Looking at" ] for start in verbose_starts: if answer.lower().startswith(start.lower()): sentences = answer.split('. ') for sentence in sentences[1:]: if len(sentence.strip()) > 10: answer = sentence.strip() break # Limit length if len(answer) > 200: sentences = answer.split('. ') answer = sentences[0] + '.' return answer