Final_Assignment_Template

Sleeping

App Files Files Community

Kackle commited on Jun 29, 2025

Commit

a3381cd

verified ·

1 Parent(s): 9be08bb

mistake in file replace

Browse files

Files changed (1) hide show

excel_parser.py +69 -160

excel_parser.py CHANGED Viewed

@@ -1,171 +1,80 @@
-import os
-import boto3
-import json
-from dotenv import load_dotenv
-from video_parser import VideoParser
-from excel_parser import ExcelParser
-import re
-load_dotenv()
-class NovaProAgent:
     def __init__(self):
-        print("NovaProAgent initialized.")
-        # Get AWS credentials from environment variables
-        aws_access_key_id = os.getenv('AWS_ACCESS_KEY_ID')
-        aws_secret_access_key = os.getenv('AWS_SECRET_ACCESS_KEY')
-        # Initialize the AWS client
-        boto3.client(
-            's3',
-            aws_access_key_id=aws_access_key_id,
-            aws_secret_access_key=aws_secret_access_key
-        )
-        session = boto3.session.Session()
-        self.bedrock_client = boto3.client(
-            service_name='bedrock-runtime',
-            region_name='us-east-1'
-        )
-        self.model_id = "amazon.nova-pro-v1:0"
-        self.content_type = "application/json"
-        self.accept = "application/json"
-        # Initialize parsers
-        self.video_parser = VideoParser()
-        self.excel_parser = ExcelParser()
-    async def __call__(self, question: str) -> str:
-        print(f"NovaProAgent received question (first 50 chars): {question}...")
         try:
-            # Check if question involves video analysis
-            if 'youtube.com' in question or 'video' in question.lower():
-                return await self._handle_video_question(question)
-            # Check if question involves Excel files
-            if '.xlsx' in question or '.xls' in question or 'excel' in question.lower():
-                return await self._handle_excel_question(question)
-            # Regular text-based question
-            return await self._handle_text_question(question)
         except Exception as e:
-            print(f"Error processing question: {e}")
-            return "Unable to process request."
-    async def _handle_video_question(self, question: str) -> str:
-        """Handle questions that require video analysis"""
-        # Extract YouTube URL
-        youtube_url = re.search(r'https://www\.youtube\.com/watch\?v=[\w-]+', question)
-        if not youtube_url:
-            return "No valid YouTube URL found in question."
-        url = youtube_url.group()
         try:
-            # Download video using VideoParser
-            video_path = self.video_parser.download_youtube_video(url)
-            # Extract frames for analysis
-            frames = self.video_parser.analyze_video_frames(video_path, sample_rate=60)
-            # Clean up
-            self.video_parser.cleanup()
-            return f"Analyzed {len(frames)} frames from video. Video processing complete."
         except Exception as e:
-            return f"Video analysis failed: {str(e)}"
-    async def _handle_excel_question(self, question: str) -> str:
-        """Handle questions that require Excel file analysis"""
-        # Extract file path from question if present
-        file_patterns = [r'([A-Za-z]:\\[^\s]+\.xlsx?)', r'([^\s]+\.xlsx?)']
-        file_path = None
-        for pattern in file_patterns:
-            match = re.search(pattern, question)
-            if match:
-                file_path = match.group(1)
-                break
-        if not file_path:
-            return "Please provide Excel file path in your question."
         try:
-            if 'sales' in question.lower() and 'food' in question.lower():
-                results = self.excel_parser.analyze_sales_data(file_path)
-                return results.get('total_food_sales', 'No sales data found')
-            else:
-                df = self.excel_parser.read_excel_file(file_path)
-                return f"Excel file loaded with {len(df)} rows and {len(df.columns)} columns."
         except Exception as e:
-            return f"Excel analysis failed: {str(e)}"
-    async def _handle_text_question(self, question: str) -> str:
-        """Handle regular text-based questions"""
-        # Create a more focused prompt for concise answers
-        prompt = f"""Answer this question directly and concisely. Provide only the essential information requested, not explanations or step-by-step reasoning unless specifically asked.
-Question: {question}
-Answer:"""
-        # Prepare the request payload for Nova Pro
-        payload = {
-            "messages": [
-                {
-                    "role": "user",
-                    "content": [{
-                        "text": prompt
-                    }]
-                }
-            ],
-            "inferenceConfig": {
-                "max_new_tokens": 250,
-                "temperature": 0.0
-            }
-        }
-        # Call Nova Pro model
-        response = self.bedrock_client.invoke_model(
-            modelId=self.model_id,
-            contentType=self.content_type,
-            accept=self.accept,
-            body=json.dumps(payload)
-        )
-        # Parse response
-        response_body = json.loads(response['body'].read())
-        answer = response_body['output']['message']['content'][0]['text']
-        # Clean up the answer
-        answer = answer.strip()
-        # Remove verbose beginnings
-        verbose_starts = [
-            "To answer this question",
-            "Based on the information",
-            "According to",
-            "The answer is",
-            "Looking at"
-        ]
-        for start in verbose_starts:
-            if answer.lower().startswith(start.lower()):
-                sentences = answer.split('. ')
-                for sentence in sentences[1:]:
-                    if len(sentence.strip()) > 10:
-                        answer = sentence.strip()
-                        break
-        # Limit length
-        if len(answer) > 200:
-            sentences = answer.split('. ')
-            answer = sentences[0] + '.'
-        return answer

+import pandas as pd
+import openpyxl
+from typing import Dict, List, Any
+class ExcelParser:
     def __init__(self):
+        pass
+    def read_excel_file(self, file_path: str, sheet_name: str = None) -> pd.DataFrame:
+        """Read Excel file and return DataFrame"""
         try:
+            if sheet_name:
+                df = pd.read_excel(file_path, sheet_name=sheet_name)
+            else:
+                df = pd.read_excel(file_path)
+            return df
         except Exception as e:
+            print(f"Error reading Excel file: {e}")
+            return None
+    def get_sheet_names(self, file_path: str) -> List[str]:
+        """Get all sheet names from Excel file"""
         try:
+            wb = openpyxl.load_workbook(file_path)
+            return wb.sheetnames
         except Exception as e:
+            print(f"Error getting sheet names: {e}")
+            return []
+    def analyze_sales_data(self, file_path: str) -> Dict[str, Any]:
+        """Analyze sales data from Excel file"""
+        df = self.read_excel_file(file_path)
+        if df is None:
+            return {}
+        results = {}
+        # Look for common column patterns
+        food_keywords = ['food', 'burger', 'sandwich', 'fries', 'pizza', 'chicken']
+        drink_keywords = ['drink', 'soda', 'coffee', 'juice', 'water', 'tea']
+        # Try to identify food vs drink items
+        if 'category' in df.columns.str.lower():
+            category_col = [col for col in df.columns if 'category' in col.lower()][0]
+            food_items = df[~df[category_col].str.lower().str.contains('|'.join(drink_keywords), na=False)]
+        else:
+            # Try to identify by item name
+            item_col = [col for col in df.columns if any(word in col.lower() for word in ['item', 'product', 'name'])][0]
+            food_items = df[~df[item_col].str.lower().str.contains('|'.join(drink_keywords), na=False)]
+        # Find sales/price column
+        sales_cols = [col for col in df.columns if any(word in col.lower() for word in ['sales', 'price', 'total', 'amount'])]
+        if sales_cols:
+            sales_col = sales_cols[0]
+            total_food_sales = food_items[sales_col].sum()
+            results['total_food_sales'] = f"${total_food_sales:,.2f}"
+        return results
+    def calculate_totals(self, df: pd.DataFrame, column: str) -> float:
+        """Calculate total for a specific column"""
         try:
+            return df[column].sum()
         except Exception as e:
+            print(f"Error calculating totals: {e}")
+            return 0.0
+    def filter_data(self, df: pd.DataFrame, filters: Dict[str, Any]) -> pd.DataFrame:
+        """Filter DataFrame based on criteria"""
+        filtered_df = df.copy()
+        for column, value in filters.items():
+            if column in filtered_df.columns:
+                if isinstance(value, list):
+                    filtered_df = filtered_df[filtered_df[column].isin(value)]
+                else:
+                    filtered_df = filtered_df[filtered_df[column] == value]
+        return filtered_df