Final_Assignment_Template

Sleeping

App Files Files Community

Kackle commited on Jun 29, 2025

Commit

d8e49a7

verified ·

1 Parent(s): 1d4ab9d

mistake copy again

Browse files

Files changed (1) hide show

video_parser.py +78 -178

video_parser.py CHANGED Viewed

@@ -1,191 +1,91 @@
 import os
-import boto3
-import json
-from dotenv import load_dotenv
-from video_parser import VideoParser
-from excel_parser import ExcelParser
-import re
-load_dotenv()
-class NovaProAgent:
     def __init__(self):
-        print("NovaProAgent initialized.")
-        # Get AWS credentials from environment variables
-        aws_access_key_id = os.getenv('AWS_ACCESS_KEY_ID')
-        aws_secret_access_key = os.getenv('AWS_SECRET_ACCESS_KEY')
-        # Initialize the AWS client
-        boto3.client(
-            's3',
-            aws_access_key_id=aws_access_key_id,
-            aws_secret_access_key=aws_secret_access_key
-        )
-        session = boto3.session.Session()
-        self.bedrock_client = boto3.client(
-            service_name='bedrock-runtime',
-            region_name='us-east-1'
-        )
-        self.model_id = "amazon.nova-pro-v1:0"
-        self.content_type = "application/json"
-        self.accept = "application/json"
-        # Initialize parsers
-        self.video_parser = VideoParser()
-        self.excel_parser = ExcelParser()
-    async def __call__(self, question: str) -> str:
-        print(f"NovaProAgent received question (first 50 chars): {question}...")
-        try:
-            # Check if question involves video analysis
-            if 'youtube.com' in question or 'video' in question.lower():
-                return await self._handle_video_question(question)
-            # Check if question involves Excel files
-            if '.xlsx' in question or '.xls' in question or 'excel' in question.lower():
-                return await self._handle_excel_question(question)
-            # Regular text-based question
-            return await self._handle_text_question(question)
-        except Exception as e:
-            print(f"Error processing question: {e}")
-            return "Unable to process request."
-    async def _handle_video_question(self, question: str) -> str:
-        """Handle questions that require video analysis"""
-        # Extract YouTube URL
-        youtube_url = re.search(r'https://www\.youtube\.com/watch\?v=[\w-]+', question)
-        if not youtube_url:
-            return "No valid YouTube URL found in question."
-        url = youtube_url.group()
-        # Extract video ID for reference
-        video_id = re.search(r'v=([\w-]+)', url).group(1)
-        # Use Nova Pro to provide intelligent response about video analysis
-        video_prompt = f"""User is asking about a YouTube video: {url}
-Video ID: {video_id}
-User question: {question}
-Provide a helpful response about video analysis limitations and suggest alternatives."""
-        payload = {
-            "messages": [{
-                "role": "user",
-                "content": [{"text": video_prompt}]
-            }],
-            "inferenceConfig": {
-                "max_new_tokens": 150,
-                "temperature": 0.0
-            }
         }
-        try:
-            response = self.bedrock_client.invoke_model(
-                modelId=self.model_id,
-                contentType=self.content_type,
-                accept=self.accept,
-                body=json.dumps(payload)
-            )
-            response_body = json.loads(response['body'].read())
-            return response_body['output']['message']['content'][0]['text'].strip()
-        except Exception as e:
-            return f"Video ID: {video_id}. Direct video analysis unavailable due to access restrictions."
-    async def _handle_excel_question(self, question: str) -> str:
-        """Handle questions that require Excel file analysis"""
-        # Extract file path from question if present
-        file_patterns = [r'([A-Za-z]:\\[^\s]+\.xlsx?)', r'([^\s]+\.xlsx?)']
-        file_path = None
-        for pattern in file_patterns:
-            match = re.search(pattern, question)
-            if match:
-                file_path = match.group(1)
                 break
-        if not file_path:
-            return "Please provide Excel file path in your question."
         try:
-            if 'sales' in question.lower() and 'food' in question.lower():
-                results = self.excel_parser.analyze_sales_data(file_path)
-                return results.get('total_food_sales', 'No sales data found')
-            else:
-                df = self.excel_parser.read_excel_file(file_path)
-                return f"Excel file loaded with {len(df)} rows and {len(df.columns)} columns."
         except Exception as e:
-            return f"Excel analysis failed: {str(e)}"
-    async def _handle_text_question(self, question: str) -> str:
-        """Handle regular text-based questions"""
-        # Create a more focused prompt for concise answers
-        prompt = f"""Answer this question directly and concisely. Provide only the essential information requested, not explanations or step-by-step reasoning unless specifically asked.
-Question: {question}
-Answer:"""
-        # Prepare the request payload for Nova Pro
-        payload = {
-            "messages": [
-                {
-                    "role": "user",
-                    "content": [{
-                        "text": prompt
-                    }]
-                }
-            ],
-            "inferenceConfig": {
-                "max_new_tokens": 250,
-                "temperature": 0.0
-            }
-        }
-        # Call Nova Pro model
-        response = self.bedrock_client.invoke_model(
-            modelId=self.model_id,
-            contentType=self.content_type,
-            accept=self.accept,
-            body=json.dumps(payload)
-        )
-        # Parse response
-        response_body = json.loads(response['body'].read())
-        answer = response_body['output']['message']['content'][0]['text']
-        # Clean up the answer
-        answer = answer.strip()
-        # Remove verbose beginnings
-        verbose_starts = [
-            "To answer this question",
-            "Based on the information",
-            "According to",
-            "The answer is",
-            "Looking at"
-        ]
-        for start in verbose_starts:
-            if answer.lower().startswith(start.lower()):
-                sentences = answer.split('. ')
-                for sentence in sentences[1:]:
-                    if len(sentence.strip()) > 10:
-                        answer = sentence.strip()
-                        break
-        # Limit length
-        if len(answer) > 200:
-            sentences = answer.split('. ')
-            answer = sentences[0] + '.'
-        return answer

+import cv2
+import requests
+import tempfile
 import os
+from urllib.parse import urlparse, parse_qs
+import yt_dlp
+class VideoParser:
     def __init__(self):
+        self.temp_dir = tempfile.mkdtemp()
+    def download_youtube_video(self, url: str) -> str:
+        """Download YouTube video and return local path"""
+        ydl_opts = {
+            'format': 'worst[height<=480]/worst',
+            'outtmpl': os.path.join(self.temp_dir, '%(title)s.%(ext)s'),
+            'quiet': True,
+            'no_warnings': True,
+            'extract_flat': False,
+            'user_agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
         }
+        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+            info = ydl.extract_info(url, download=True)
+            return ydl.prepare_filename(info)
+    def analyze_video_frames(self, video_path: str, sample_rate: int = 30):
+        """Analyze video frames for object detection/counting"""
+        cap = cv2.VideoCapture(video_path)
+        frame_count = 0
+        results = []
+        while cap.isOpened():
+            ret, frame = cap.read()
+            if not ret:
                 break
+            if frame_count % sample_rate == 0:
+                # Basic frame analysis - you'd integrate with object detection here
+                results.append({
+                    'frame': frame_count,
+                    'timestamp': frame_count / cap.get(cv2.CAP_PROP_FPS),
+                    'frame_data': frame
+                })
+            frame_count += 1
+        cap.release()
+        return results
+    def extract_audio(self, video_path: str) -> str:
+        """Extract audio from video for speech analysis"""
+        audio_path = video_path.rsplit('.', 1)[0] + '.wav'
+        # Use ffmpeg to extract audio
+        import subprocess
+        subprocess.run([
+            'ffmpeg', '-i', video_path, '-vn', '-acodec', 'pcm_s16le',
+            '-ar', '16000', '-ac', '1', audio_path, '-y'
+        ], capture_output=True)
+        return audio_path
+    def get_youtube_metadata(self, url: str) -> dict:
+        """Extract YouTube video metadata without downloading"""
         try:
+            ydl_opts = {
+                'quiet': True,
+                'no_download': True,
+                'extract_flat': False
+            }
+            with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+                info = ydl.extract_info(url, download=False)
+                return {
+                    'title': info.get('title', 'Unknown'),
+                    'description': info.get('description', '')[:500],
+                    'duration': info.get('duration', 0),
+                    'view_count': info.get('view_count', 0),
+                    'upload_date': info.get('upload_date', 'Unknown'),
+                    'uploader': info.get('uploader', 'Unknown')
+                }
         except Exception as e:
+            return {'error': str(e)}
+    def cleanup(self):
+        """Clean up temporary files"""
+        import shutil
+        shutil.rmtree(self.temp_dir, ignore_errors=True)