Spaces:
Sleeping
Sleeping
| import os | |
| import boto3 | |
| import json | |
| from dotenv import load_dotenv | |
| from video_parser import VideoParser | |
| from excel_parser import ExcelParser | |
| import re | |
| load_dotenv() | |
| class NovaProAgent: | |
| def __init__(self): | |
| print("NovaProAgent initialized.") | |
| # Get AWS credentials from environment variables | |
| aws_access_key_id = os.getenv('AWS_ACCESS_KEY_ID') | |
| aws_secret_access_key = os.getenv('AWS_SECRET_ACCESS_KEY') | |
| # Initialize the AWS client | |
| boto3.client( | |
| 's3', | |
| aws_access_key_id=aws_access_key_id, | |
| aws_secret_access_key=aws_secret_access_key | |
| ) | |
| session = boto3.session.Session() | |
| self.bedrock_client = boto3.client( | |
| service_name='bedrock-runtime', | |
| region_name='us-east-1' | |
| ) | |
| self.model_id = "amazon.nova-pro-v1:0" | |
| self.content_type = "application/json" | |
| self.accept = "application/json" | |
| # Initialize parsers | |
| self.video_parser = VideoParser() | |
| self.excel_parser = ExcelParser() | |
| async def __call__(self, question: str) -> str: | |
| print(f"NovaProAgent received question (first 50 chars): {question}...") | |
| try: | |
| # Check if question involves video analysis | |
| if 'youtube.com' in question or 'video' in question.lower(): | |
| return await self._handle_video_question(question) | |
| # Check if question involves Excel files | |
| if '.xlsx' in question or '.xls' in question or 'excel' in question.lower(): | |
| return await self._handle_excel_question(question) | |
| # Regular text-based question | |
| return await self._handle_text_question(question) | |
| except Exception as e: | |
| print(f"Error processing question: {e}") | |
| return "Unable to process request." | |
| async def _handle_video_question(self, question: str) -> str: | |
| """Handle questions that require video analysis""" | |
| # Extract YouTube URL | |
| youtube_url = re.search(r'https://www\.youtube\.com/watch\?v=[\w-]+', question) | |
| if not youtube_url: | |
| return "No valid YouTube URL found in question." | |
| url = youtube_url.group() | |
| try: | |
| # Download video using VideoParser | |
| video_path = self.video_parser.download_youtube_video(url) | |
| # Extract frames for analysis | |
| frames = self.video_parser.analyze_video_frames(video_path, sample_rate=60) | |
| # Clean up | |
| self.video_parser.cleanup() | |
| return f"Analyzed {len(frames)} frames from video. Video processing complete." | |
| except Exception as e: | |
| return f"Video analysis failed: {str(e)}" | |
| async def _handle_excel_question(self, question: str) -> str: | |
| """Handle questions that require Excel file analysis""" | |
| # Extract file path from question if present | |
| file_patterns = [r'([A-Za-z]:\\[^\s]+\.xlsx?)', r'([^\s]+\.xlsx?)'] | |
| file_path = None | |
| for pattern in file_patterns: | |
| match = re.search(pattern, question) | |
| if match: | |
| file_path = match.group(1) | |
| break | |
| if not file_path: | |
| return "Please provide Excel file path in your question." | |
| try: | |
| if 'sales' in question.lower() and 'food' in question.lower(): | |
| results = self.excel_parser.analyze_sales_data(file_path) | |
| return results.get('total_food_sales', 'No sales data found') | |
| else: | |
| df = self.excel_parser.read_excel_file(file_path) | |
| return f"Excel file loaded with {len(df)} rows and {len(df.columns)} columns." | |
| except Exception as e: | |
| return f"Excel analysis failed: {str(e)}" | |
| async def _handle_text_question(self, question: str) -> str: | |
| """Handle regular text-based questions""" | |
| # Create a more focused prompt for concise answers | |
| prompt = f"""Answer this question directly and concisely. Provide only the essential information requested, not explanations or step-by-step reasoning unless specifically asked. | |
| Question: {question} | |
| Answer:""" | |
| # Prepare the request payload for Nova Pro | |
| payload = { | |
| "messages": [ | |
| { | |
| "role": "user", | |
| "content": [{ | |
| "text": prompt | |
| }] | |
| } | |
| ], | |
| "inferenceConfig": { | |
| "max_new_tokens": 250, | |
| "temperature": 0.0 | |
| } | |
| } | |
| # Call Nova Pro model | |
| response = self.bedrock_client.invoke_model( | |
| modelId=self.model_id, | |
| contentType=self.content_type, | |
| accept=self.accept, | |
| body=json.dumps(payload) | |
| ) | |
| # Parse response | |
| response_body = json.loads(response['body'].read()) | |
| answer = response_body['output']['message']['content'][0]['text'] | |
| # Clean up the answer | |
| answer = answer.strip() | |
| # Remove verbose beginnings | |
| verbose_starts = [ | |
| "To answer this question", | |
| "Based on the information", | |
| "According to", | |
| "The answer is", | |
| "Looking at" | |
| ] | |
| for start in verbose_starts: | |
| if answer.lower().startswith(start.lower()): | |
| sentences = answer.split('. ') | |
| for sentence in sentences[1:]: | |
| if len(sentence.strip()) > 10: | |
| answer = sentence.strip() | |
| break | |
| # Limit length | |
| if len(answer) > 200: | |
| sentences = answer.split('. ') | |
| answer = sentences[0] + '.' | |
| return answer |