Kackle commited on
Commit
9be08bb
·
verified ·
1 Parent(s): dccc150

removed placeholders

Browse files
Files changed (1) hide show
  1. excel_parser.py +160 -69
excel_parser.py CHANGED
@@ -1,80 +1,171 @@
1
- import pandas as pd
2
- import openpyxl
3
- from typing import Dict, List, Any
 
 
 
 
4
 
5
- class ExcelParser:
 
 
6
  def __init__(self):
7
- pass
8
-
9
- def read_excel_file(self, file_path: str, sheet_name: str = None) -> pd.DataFrame:
10
- """Read Excel file and return DataFrame"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  try:
12
- if sheet_name:
13
- df = pd.read_excel(file_path, sheet_name=sheet_name)
14
- else:
15
- df = pd.read_excel(file_path)
16
- return df
 
 
 
 
 
 
17
  except Exception as e:
18
- print(f"Error reading Excel file: {e}")
19
- return None
20
 
21
- def get_sheet_names(self, file_path: str) -> List[str]:
22
- """Get all sheet names from Excel file"""
 
 
 
 
 
 
 
23
  try:
24
- wb = openpyxl.load_workbook(file_path)
25
- return wb.sheetnames
 
 
 
 
 
 
 
 
 
26
  except Exception as e:
27
- print(f"Error getting sheet names: {e}")
28
- return []
29
-
30
- def analyze_sales_data(self, file_path: str) -> Dict[str, Any]:
31
- """Analyze sales data from Excel file"""
32
- df = self.read_excel_file(file_path)
33
- if df is None:
34
- return {}
35
-
36
- results = {}
37
-
38
- # Look for common column patterns
39
- food_keywords = ['food', 'burger', 'sandwich', 'fries', 'pizza', 'chicken']
40
- drink_keywords = ['drink', 'soda', 'coffee', 'juice', 'water', 'tea']
41
-
42
- # Try to identify food vs drink items
43
- if 'category' in df.columns.str.lower():
44
- category_col = [col for col in df.columns if 'category' in col.lower()][0]
45
- food_items = df[~df[category_col].str.lower().str.contains('|'.join(drink_keywords), na=False)]
46
- else:
47
- # Try to identify by item name
48
- item_col = [col for col in df.columns if any(word in col.lower() for word in ['item', 'product', 'name'])][0]
49
- food_items = df[~df[item_col].str.lower().str.contains('|'.join(drink_keywords), na=False)]
50
-
51
- # Find sales/price column
52
- sales_cols = [col for col in df.columns if any(word in col.lower() for word in ['sales', 'price', 'total', 'amount'])]
53
-
54
- if sales_cols:
55
- sales_col = sales_cols[0]
56
- total_food_sales = food_items[sales_col].sum()
57
- results['total_food_sales'] = f"${total_food_sales:,.2f}"
58
-
59
- return results
60
 
61
- def calculate_totals(self, df: pd.DataFrame, column: str) -> float:
62
- """Calculate total for a specific column"""
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  try:
64
- return df[column].sum()
 
 
 
 
 
 
65
  except Exception as e:
66
- print(f"Error calculating totals: {e}")
67
- return 0.0
68
 
69
- def filter_data(self, df: pd.DataFrame, filters: Dict[str, Any]) -> pd.DataFrame:
70
- """Filter DataFrame based on criteria"""
71
- filtered_df = df.copy()
72
-
73
- for column, value in filters.items():
74
- if column in filtered_df.columns:
75
- if isinstance(value, list):
76
- filtered_df = filtered_df[filtered_df[column].isin(value)]
77
- else:
78
- filtered_df = filtered_df[filtered_df[column] == value]
79
-
80
- return filtered_df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import boto3
3
+ import json
4
+ from dotenv import load_dotenv
5
+ from video_parser import VideoParser
6
+ from excel_parser import ExcelParser
7
+ import re
8
 
9
+ load_dotenv()
10
+
11
+ class NovaProAgent:
12
  def __init__(self):
13
+ print("NovaProAgent initialized.")
14
+
15
+ # Get AWS credentials from environment variables
16
+ aws_access_key_id = os.getenv('AWS_ACCESS_KEY_ID')
17
+ aws_secret_access_key = os.getenv('AWS_SECRET_ACCESS_KEY')
18
+
19
+ # Initialize the AWS client
20
+ boto3.client(
21
+ 's3',
22
+ aws_access_key_id=aws_access_key_id,
23
+ aws_secret_access_key=aws_secret_access_key
24
+ )
25
+ session = boto3.session.Session()
26
+
27
+ self.bedrock_client = boto3.client(
28
+ service_name='bedrock-runtime',
29
+ region_name='us-east-1'
30
+ )
31
+
32
+ self.model_id = "amazon.nova-pro-v1:0"
33
+ self.content_type = "application/json"
34
+ self.accept = "application/json"
35
+
36
+ # Initialize parsers
37
+ self.video_parser = VideoParser()
38
+ self.excel_parser = ExcelParser()
39
+
40
+ async def __call__(self, question: str) -> str:
41
+ print(f"NovaProAgent received question (first 50 chars): {question}...")
42
+
43
  try:
44
+ # Check if question involves video analysis
45
+ if 'youtube.com' in question or 'video' in question.lower():
46
+ return await self._handle_video_question(question)
47
+
48
+ # Check if question involves Excel files
49
+ if '.xlsx' in question or '.xls' in question or 'excel' in question.lower():
50
+ return await self._handle_excel_question(question)
51
+
52
+ # Regular text-based question
53
+ return await self._handle_text_question(question)
54
+
55
  except Exception as e:
56
+ print(f"Error processing question: {e}")
57
+ return "Unable to process request."
58
 
59
+ async def _handle_video_question(self, question: str) -> str:
60
+ """Handle questions that require video analysis"""
61
+ # Extract YouTube URL
62
+ youtube_url = re.search(r'https://www\.youtube\.com/watch\?v=[\w-]+', question)
63
+ if not youtube_url:
64
+ return "No valid YouTube URL found in question."
65
+
66
+ url = youtube_url.group()
67
+
68
  try:
69
+ # Download video using VideoParser
70
+ video_path = self.video_parser.download_youtube_video(url)
71
+
72
+ # Extract frames for analysis
73
+ frames = self.video_parser.analyze_video_frames(video_path, sample_rate=60)
74
+
75
+ # Clean up
76
+ self.video_parser.cleanup()
77
+
78
+ return f"Analyzed {len(frames)} frames from video. Video processing complete."
79
+
80
  except Exception as e:
81
+ return f"Video analysis failed: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
 
83
+ async def _handle_excel_question(self, question: str) -> str:
84
+ """Handle questions that require Excel file analysis"""
85
+ # Extract file path from question if present
86
+ file_patterns = [r'([A-Za-z]:\\[^\s]+\.xlsx?)', r'([^\s]+\.xlsx?)']
87
+ file_path = None
88
+
89
+ for pattern in file_patterns:
90
+ match = re.search(pattern, question)
91
+ if match:
92
+ file_path = match.group(1)
93
+ break
94
+
95
+ if not file_path:
96
+ return "Please provide Excel file path in your question."
97
+
98
  try:
99
+ if 'sales' in question.lower() and 'food' in question.lower():
100
+ results = self.excel_parser.analyze_sales_data(file_path)
101
+ return results.get('total_food_sales', 'No sales data found')
102
+ else:
103
+ df = self.excel_parser.read_excel_file(file_path)
104
+ return f"Excel file loaded with {len(df)} rows and {len(df.columns)} columns."
105
+
106
  except Exception as e:
107
+ return f"Excel analysis failed: {str(e)}"
 
108
 
109
+ async def _handle_text_question(self, question: str) -> str:
110
+ """Handle regular text-based questions"""
111
+ # Create a more focused prompt for concise answers
112
+ prompt = f"""Answer this question directly and concisely. Provide only the essential information requested, not explanations or step-by-step reasoning unless specifically asked.
113
+
114
+ Question: {question}
115
+
116
+ Answer:"""
117
+
118
+ # Prepare the request payload for Nova Pro
119
+ payload = {
120
+ "messages": [
121
+ {
122
+ "role": "user",
123
+ "content": [{
124
+ "text": prompt
125
+ }]
126
+ }
127
+ ],
128
+ "inferenceConfig": {
129
+ "max_new_tokens": 250,
130
+ "temperature": 0.0
131
+ }
132
+ }
133
+
134
+ # Call Nova Pro model
135
+ response = self.bedrock_client.invoke_model(
136
+ modelId=self.model_id,
137
+ contentType=self.content_type,
138
+ accept=self.accept,
139
+ body=json.dumps(payload)
140
+ )
141
+
142
+ # Parse response
143
+ response_body = json.loads(response['body'].read())
144
+ answer = response_body['output']['message']['content'][0]['text']
145
+
146
+ # Clean up the answer
147
+ answer = answer.strip()
148
+
149
+ # Remove verbose beginnings
150
+ verbose_starts = [
151
+ "To answer this question",
152
+ "Based on the information",
153
+ "According to",
154
+ "The answer is",
155
+ "Looking at"
156
+ ]
157
+
158
+ for start in verbose_starts:
159
+ if answer.lower().startswith(start.lower()):
160
+ sentences = answer.split('. ')
161
+ for sentence in sentences[1:]:
162
+ if len(sentence.strip()) > 10:
163
+ answer = sentence.strip()
164
+ break
165
+
166
+ # Limit length
167
+ if len(answer) > 200:
168
+ sentences = answer.split('. ')
169
+ answer = sentences[0] + '.'
170
+
171
+ return answer