Final_Assignment_Template

Sleeping

App Files Files Community

Kackle commited on Jun 29, 2025

Commit

1d4ab9d

verified ·

1 Parent(s): 4e65abc

remove youtube because of bot issues

Browse files

Files changed (1) hide show

video_parser.py +178 -78

video_parser.py CHANGED Viewed

@@ -1,91 +1,191 @@
-import cv2
-import requests
-import tempfile
 import os
-from urllib.parse import urlparse, parse_qs
-import yt_dlp
-class VideoParser:
     def __init__(self):
-        self.temp_dir = tempfile.mkdtemp()
-    def download_youtube_video(self, url: str) -> str:
-        """Download YouTube video and return local path"""
-        ydl_opts = {
-            'format': 'worst[height<=480]/worst',
-            'outtmpl': os.path.join(self.temp_dir, '%(title)s.%(ext)s'),
-            'quiet': True,
-            'no_warnings': True,
-            'extract_flat': False,
-            'user_agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
         }
-        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
-            info = ydl.extract_info(url, download=True)
-            return ydl.prepare_filename(info)
-    def analyze_video_frames(self, video_path: str, sample_rate: int = 30):
-        """Analyze video frames for object detection/counting"""
-        cap = cv2.VideoCapture(video_path)
-        frame_count = 0
-        results = []
-        while cap.isOpened():
-            ret, frame = cap.read()
-            if not ret:
                 break
-            if frame_count % sample_rate == 0:
-                # Basic frame analysis - you'd integrate with object detection here
-                results.append({
-                    'frame': frame_count,
-                    'timestamp': frame_count / cap.get(cv2.CAP_PROP_FPS),
-                    'frame_data': frame
-                })
-            frame_count += 1
-        cap.release()
-        return results
-    def extract_audio(self, video_path: str) -> str:
-        """Extract audio from video for speech analysis"""
-        audio_path = video_path.rsplit('.', 1)[0] + '.wav'
-        # Use ffmpeg to extract audio
-        import subprocess
-        subprocess.run([
-            'ffmpeg', '-i', video_path, '-vn', '-acodec', 'pcm_s16le',
-            '-ar', '16000', '-ac', '1', audio_path, '-y'
-        ], capture_output=True)
-        return audio_path
-    def get_youtube_metadata(self, url: str) -> dict:
-        """Extract YouTube video metadata without downloading"""
         try:
-            ydl_opts = {
-                'quiet': True,
-                'no_download': True,
-                'extract_flat': False
-            }
-            with yt_dlp.YoutubeDL(ydl_opts) as ydl:
-                info = ydl.extract_info(url, download=False)
-                return {
-                    'title': info.get('title', 'Unknown'),
-                    'description': info.get('description', '')[:500],
-                    'duration': info.get('duration', 0),
-                    'view_count': info.get('view_count', 0),
-                    'upload_date': info.get('upload_date', 'Unknown'),
-                    'uploader': info.get('uploader', 'Unknown')
-                }
         except Exception as e:
-            return {'error': str(e)}
-    def cleanup(self):
-        """Clean up temporary files"""
-        import shutil
-        shutil.rmtree(self.temp_dir, ignore_errors=True)

 import os
+import boto3
+import json
+from dotenv import load_dotenv
+from video_parser import VideoParser
+from excel_parser import ExcelParser
+import re
+load_dotenv()
+class NovaProAgent:
     def __init__(self):
+        print("NovaProAgent initialized.")
+        # Get AWS credentials from environment variables
+        aws_access_key_id = os.getenv('AWS_ACCESS_KEY_ID')
+        aws_secret_access_key = os.getenv('AWS_SECRET_ACCESS_KEY')
+        # Initialize the AWS client
+        boto3.client(
+            's3',
+            aws_access_key_id=aws_access_key_id,
+            aws_secret_access_key=aws_secret_access_key
+        )
+        session = boto3.session.Session()
+        self.bedrock_client = boto3.client(
+            service_name='bedrock-runtime',
+            region_name='us-east-1'
+        )
+        self.model_id = "amazon.nova-pro-v1:0"
+        self.content_type = "application/json"
+        self.accept = "application/json"
+        # Initialize parsers
+        self.video_parser = VideoParser()
+        self.excel_parser = ExcelParser()
+    async def __call__(self, question: str) -> str:
+        print(f"NovaProAgent received question (first 50 chars): {question}...")
+        try:
+            # Check if question involves video analysis
+            if 'youtube.com' in question or 'video' in question.lower():
+                return await self._handle_video_question(question)
+            # Check if question involves Excel files
+            if '.xlsx' in question or '.xls' in question or 'excel' in question.lower():
+                return await self._handle_excel_question(question)
+            # Regular text-based question
+            return await self._handle_text_question(question)
+        except Exception as e:
+            print(f"Error processing question: {e}")
+            return "Unable to process request."
+    async def _handle_video_question(self, question: str) -> str:
+        """Handle questions that require video analysis"""
+        # Extract YouTube URL
+        youtube_url = re.search(r'https://www\.youtube\.com/watch\?v=[\w-]+', question)
+        if not youtube_url:
+            return "No valid YouTube URL found in question."
+        url = youtube_url.group()
+        # Extract video ID for reference
+        video_id = re.search(r'v=([\w-]+)', url).group(1)
+        # Use Nova Pro to provide intelligent response about video analysis
+        video_prompt = f"""User is asking about a YouTube video: {url}
+Video ID: {video_id}
+User question: {question}
+Provide a helpful response about video analysis limitations and suggest alternatives."""
+        payload = {
+            "messages": [{
+                "role": "user",
+                "content": [{"text": video_prompt}]
+            }],
+            "inferenceConfig": {
+                "max_new_tokens": 150,
+                "temperature": 0.0
+            }
         }
+        try:
+            response = self.bedrock_client.invoke_model(
+                modelId=self.model_id,
+                contentType=self.content_type,
+                accept=self.accept,
+                body=json.dumps(payload)
+            )
+            response_body = json.loads(response['body'].read())
+            return response_body['output']['message']['content'][0]['text'].strip()
+        except Exception as e:
+            return f"Video ID: {video_id}. Direct video analysis unavailable due to access restrictions."
+    async def _handle_excel_question(self, question: str) -> str:
+        """Handle questions that require Excel file analysis"""
+        # Extract file path from question if present
+        file_patterns = [r'([A-Za-z]:\\[^\s]+\.xlsx?)', r'([^\s]+\.xlsx?)']
+        file_path = None
+        for pattern in file_patterns:
+            match = re.search(pattern, question)
+            if match:
+                file_path = match.group(1)
                 break
+        if not file_path:
+            return "Please provide Excel file path in your question."
         try:
+            if 'sales' in question.lower() and 'food' in question.lower():
+                results = self.excel_parser.analyze_sales_data(file_path)
+                return results.get('total_food_sales', 'No sales data found')
+            else:
+                df = self.excel_parser.read_excel_file(file_path)
+                return f"Excel file loaded with {len(df)} rows and {len(df.columns)} columns."
         except Exception as e:
+            return f"Excel analysis failed: {str(e)}"
+    async def _handle_text_question(self, question: str) -> str:
+        """Handle regular text-based questions"""
+        # Create a more focused prompt for concise answers
+        prompt = f"""Answer this question directly and concisely. Provide only the essential information requested, not explanations or step-by-step reasoning unless specifically asked.
+Question: {question}
+Answer:"""
+        # Prepare the request payload for Nova Pro
+        payload = {
+            "messages": [
+                {
+                    "role": "user",
+                    "content": [{
+                        "text": prompt
+                    }]
+                }
+            ],
+            "inferenceConfig": {
+                "max_new_tokens": 250,
+                "temperature": 0.0
+            }
+        }
+        # Call Nova Pro model
+        response = self.bedrock_client.invoke_model(
+            modelId=self.model_id,
+            contentType=self.content_type,
+            accept=self.accept,
+            body=json.dumps(payload)
+        )
+        # Parse response
+        response_body = json.loads(response['body'].read())
+        answer = response_body['output']['message']['content'][0]['text']
+        # Clean up the answer
+        answer = answer.strip()
+        # Remove verbose beginnings
+        verbose_starts = [
+            "To answer this question",
+            "Based on the information",
+            "According to",
+            "The answer is",
+            "Looking at"
+        ]
+        for start in verbose_starts:
+            if answer.lower().startswith(start.lower()):
+                sentences = answer.split('. ')
+                for sentence in sentences[1:]:
+                    if len(sentence.strip()) > 10:
+                        answer = sentence.strip()
+                        break
+        # Limit length
+        if len(answer) > 200:
+            sentences = answer.split('. ')
+            answer = sentences[0] + '.'
+        return answer