Final_Assignment_Template

Sleeping

App Files Files Community

Kackle commited on Jun 29, 2025

Commit

9be08bb

verified ·

1 Parent(s): dccc150

removed placeholders

Browse files

Files changed (1) hide show

excel_parser.py +160 -69

excel_parser.py CHANGED Viewed

@@ -1,80 +1,171 @@
-import pandas as pd
-import openpyxl
-from typing import Dict, List, Any
-class ExcelParser:
     def __init__(self):
-        pass
-    def read_excel_file(self, file_path: str, sheet_name: str = None) -> pd.DataFrame:
-        """Read Excel file and return DataFrame"""
         try:
-            if sheet_name:
-                df = pd.read_excel(file_path, sheet_name=sheet_name)
-            else:
-                df = pd.read_excel(file_path)
-            return df
         except Exception as e:
-            print(f"Error reading Excel file: {e}")
-            return None
-    def get_sheet_names(self, file_path: str) -> List[str]:
-        """Get all sheet names from Excel file"""
         try:
-            wb = openpyxl.load_workbook(file_path)
-            return wb.sheetnames
         except Exception as e:
-            print(f"Error getting sheet names: {e}")
-            return []
-    def analyze_sales_data(self, file_path: str) -> Dict[str, Any]:
-        """Analyze sales data from Excel file"""
-        df = self.read_excel_file(file_path)
-        if df is None:
-            return {}
-        results = {}
-        # Look for common column patterns
-        food_keywords = ['food', 'burger', 'sandwich', 'fries', 'pizza', 'chicken']
-        drink_keywords = ['drink', 'soda', 'coffee', 'juice', 'water', 'tea']
-        # Try to identify food vs drink items
-        if 'category' in df.columns.str.lower():
-            category_col = [col for col in df.columns if 'category' in col.lower()][0]
-            food_items = df[~df[category_col].str.lower().str.contains('|'.join(drink_keywords), na=False)]
-        else:
-            # Try to identify by item name
-            item_col = [col for col in df.columns if any(word in col.lower() for word in ['item', 'product', 'name'])][0]
-            food_items = df[~df[item_col].str.lower().str.contains('|'.join(drink_keywords), na=False)]
-        # Find sales/price column
-        sales_cols = [col for col in df.columns if any(word in col.lower() for word in ['sales', 'price', 'total', 'amount'])]
-        if sales_cols:
-            sales_col = sales_cols[0]
-            total_food_sales = food_items[sales_col].sum()
-            results['total_food_sales'] = f"${total_food_sales:,.2f}"
-        return results
-    def calculate_totals(self, df: pd.DataFrame, column: str) -> float:
-        """Calculate total for a specific column"""
         try:
-            return df[column].sum()
         except Exception as e:
-            print(f"Error calculating totals: {e}")
-            return 0.0
-    def filter_data(self, df: pd.DataFrame, filters: Dict[str, Any]) -> pd.DataFrame:
-        """Filter DataFrame based on criteria"""
-        filtered_df = df.copy()
-        for column, value in filters.items():
-            if column in filtered_df.columns:
-                if isinstance(value, list):
-                    filtered_df = filtered_df[filtered_df[column].isin(value)]
-                else:
-                    filtered_df = filtered_df[filtered_df[column] == value]
-        return filtered_df

+import os
+import boto3
+import json
+from dotenv import load_dotenv
+from video_parser import VideoParser
+from excel_parser import ExcelParser
+import re
+load_dotenv()
+class NovaProAgent:
     def __init__(self):
+        print("NovaProAgent initialized.")
+        # Get AWS credentials from environment variables
+        aws_access_key_id = os.getenv('AWS_ACCESS_KEY_ID')
+        aws_secret_access_key = os.getenv('AWS_SECRET_ACCESS_KEY')
+        # Initialize the AWS client
+        boto3.client(
+            's3',
+            aws_access_key_id=aws_access_key_id,
+            aws_secret_access_key=aws_secret_access_key
+        )
+        session = boto3.session.Session()
+        self.bedrock_client = boto3.client(
+            service_name='bedrock-runtime',
+            region_name='us-east-1'
+        )
+        self.model_id = "amazon.nova-pro-v1:0"
+        self.content_type = "application/json"
+        self.accept = "application/json"
+        # Initialize parsers
+        self.video_parser = VideoParser()
+        self.excel_parser = ExcelParser()
+    async def __call__(self, question: str) -> str:
+        print(f"NovaProAgent received question (first 50 chars): {question}...")
         try:
+            # Check if question involves video analysis
+            if 'youtube.com' in question or 'video' in question.lower():
+                return await self._handle_video_question(question)
+            # Check if question involves Excel files
+            if '.xlsx' in question or '.xls' in question or 'excel' in question.lower():
+                return await self._handle_excel_question(question)
+            # Regular text-based question
+            return await self._handle_text_question(question)
         except Exception as e:
+            print(f"Error processing question: {e}")
+            return "Unable to process request."
+    async def _handle_video_question(self, question: str) -> str:
+        """Handle questions that require video analysis"""
+        # Extract YouTube URL
+        youtube_url = re.search(r'https://www\.youtube\.com/watch\?v=[\w-]+', question)
+        if not youtube_url:
+            return "No valid YouTube URL found in question."
+        url = youtube_url.group()
         try:
+            # Download video using VideoParser
+            video_path = self.video_parser.download_youtube_video(url)
+            # Extract frames for analysis
+            frames = self.video_parser.analyze_video_frames(video_path, sample_rate=60)
+            # Clean up
+            self.video_parser.cleanup()
+            return f"Analyzed {len(frames)} frames from video. Video processing complete."
         except Exception as e:
+            return f"Video analysis failed: {str(e)}"
+    async def _handle_excel_question(self, question: str) -> str:
+        """Handle questions that require Excel file analysis"""
+        # Extract file path from question if present
+        file_patterns = [r'([A-Za-z]:\\[^\s]+\.xlsx?)', r'([^\s]+\.xlsx?)']
+        file_path = None
+        for pattern in file_patterns:
+            match = re.search(pattern, question)
+            if match:
+                file_path = match.group(1)
+                break
+        if not file_path:
+            return "Please provide Excel file path in your question."
         try:
+            if 'sales' in question.lower() and 'food' in question.lower():
+                results = self.excel_parser.analyze_sales_data(file_path)
+                return results.get('total_food_sales', 'No sales data found')
+            else:
+                df = self.excel_parser.read_excel_file(file_path)
+                return f"Excel file loaded with {len(df)} rows and {len(df.columns)} columns."
         except Exception as e:
+            return f"Excel analysis failed: {str(e)}"
+    async def _handle_text_question(self, question: str) -> str:
+        """Handle regular text-based questions"""
+        # Create a more focused prompt for concise answers
+        prompt = f"""Answer this question directly and concisely. Provide only the essential information requested, not explanations or step-by-step reasoning unless specifically asked.
+Question: {question}
+Answer:"""
+        # Prepare the request payload for Nova Pro
+        payload = {
+            "messages": [
+                {
+                    "role": "user",
+                    "content": [{
+                        "text": prompt
+                    }]
+                }
+            ],
+            "inferenceConfig": {
+                "max_new_tokens": 250,
+                "temperature": 0.0
+            }
+        }
+        # Call Nova Pro model
+        response = self.bedrock_client.invoke_model(
+            modelId=self.model_id,
+            contentType=self.content_type,
+            accept=self.accept,
+            body=json.dumps(payload)
+        )
+        # Parse response
+        response_body = json.loads(response['body'].read())
+        answer = response_body['output']['message']['content'][0]['text']
+        # Clean up the answer
+        answer = answer.strip()
+        # Remove verbose beginnings
+        verbose_starts = [
+            "To answer this question",
+            "Based on the information",
+            "According to",
+            "The answer is",
+            "Looking at"
+        ]
+        for start in verbose_starts:
+            if answer.lower().startswith(start.lower()):
+                sentences = answer.split('. ')
+                for sentence in sentences[1:]:
+                    if len(sentence.strip()) > 10:
+                        answer = sentence.strip()
+                        break
+        # Limit length
+        if len(answer) > 200:
+            sentences = answer.split('. ')
+            answer = sentences[0] + '.'
+        return answer