Kackle commited on
Commit
a3381cd
·
verified ·
1 Parent(s): 9be08bb

mistake in file replace

Browse files
Files changed (1) hide show
  1. excel_parser.py +69 -160
excel_parser.py CHANGED
@@ -1,171 +1,80 @@
1
- import os
2
- import boto3
3
- import json
4
- from dotenv import load_dotenv
5
- from video_parser import VideoParser
6
- from excel_parser import ExcelParser
7
- import re
8
 
9
- load_dotenv()
10
-
11
- class NovaProAgent:
12
  def __init__(self):
13
- print("NovaProAgent initialized.")
14
-
15
- # Get AWS credentials from environment variables
16
- aws_access_key_id = os.getenv('AWS_ACCESS_KEY_ID')
17
- aws_secret_access_key = os.getenv('AWS_SECRET_ACCESS_KEY')
18
-
19
- # Initialize the AWS client
20
- boto3.client(
21
- 's3',
22
- aws_access_key_id=aws_access_key_id,
23
- aws_secret_access_key=aws_secret_access_key
24
- )
25
- session = boto3.session.Session()
26
-
27
- self.bedrock_client = boto3.client(
28
- service_name='bedrock-runtime',
29
- region_name='us-east-1'
30
- )
31
-
32
- self.model_id = "amazon.nova-pro-v1:0"
33
- self.content_type = "application/json"
34
- self.accept = "application/json"
35
-
36
- # Initialize parsers
37
- self.video_parser = VideoParser()
38
- self.excel_parser = ExcelParser()
39
-
40
- async def __call__(self, question: str) -> str:
41
- print(f"NovaProAgent received question (first 50 chars): {question}...")
42
-
43
  try:
44
- # Check if question involves video analysis
45
- if 'youtube.com' in question or 'video' in question.lower():
46
- return await self._handle_video_question(question)
47
-
48
- # Check if question involves Excel files
49
- if '.xlsx' in question or '.xls' in question or 'excel' in question.lower():
50
- return await self._handle_excel_question(question)
51
-
52
- # Regular text-based question
53
- return await self._handle_text_question(question)
54
-
55
  except Exception as e:
56
- print(f"Error processing question: {e}")
57
- return "Unable to process request."
58
 
59
- async def _handle_video_question(self, question: str) -> str:
60
- """Handle questions that require video analysis"""
61
- # Extract YouTube URL
62
- youtube_url = re.search(r'https://www\.youtube\.com/watch\?v=[\w-]+', question)
63
- if not youtube_url:
64
- return "No valid YouTube URL found in question."
65
-
66
- url = youtube_url.group()
67
-
68
  try:
69
- # Download video using VideoParser
70
- video_path = self.video_parser.download_youtube_video(url)
71
-
72
- # Extract frames for analysis
73
- frames = self.video_parser.analyze_video_frames(video_path, sample_rate=60)
74
-
75
- # Clean up
76
- self.video_parser.cleanup()
77
-
78
- return f"Analyzed {len(frames)} frames from video. Video processing complete."
79
-
80
  except Exception as e:
81
- return f"Video analysis failed: {str(e)}"
 
82
 
83
- async def _handle_excel_question(self, question: str) -> str:
84
- """Handle questions that require Excel file analysis"""
85
- # Extract file path from question if present
86
- file_patterns = [r'([A-Za-z]:\\[^\s]+\.xlsx?)', r'([^\s]+\.xlsx?)']
87
- file_path = None
88
-
89
- for pattern in file_patterns:
90
- match = re.search(pattern, question)
91
- if match:
92
- file_path = match.group(1)
93
- break
94
-
95
- if not file_path:
96
- return "Please provide Excel file path in your question."
97
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98
  try:
99
- if 'sales' in question.lower() and 'food' in question.lower():
100
- results = self.excel_parser.analyze_sales_data(file_path)
101
- return results.get('total_food_sales', 'No sales data found')
102
- else:
103
- df = self.excel_parser.read_excel_file(file_path)
104
- return f"Excel file loaded with {len(df)} rows and {len(df.columns)} columns."
105
-
106
  except Exception as e:
107
- return f"Excel analysis failed: {str(e)}"
 
108
 
109
- async def _handle_text_question(self, question: str) -> str:
110
- """Handle regular text-based questions"""
111
- # Create a more focused prompt for concise answers
112
- prompt = f"""Answer this question directly and concisely. Provide only the essential information requested, not explanations or step-by-step reasoning unless specifically asked.
113
-
114
- Question: {question}
115
-
116
- Answer:"""
117
-
118
- # Prepare the request payload for Nova Pro
119
- payload = {
120
- "messages": [
121
- {
122
- "role": "user",
123
- "content": [{
124
- "text": prompt
125
- }]
126
- }
127
- ],
128
- "inferenceConfig": {
129
- "max_new_tokens": 250,
130
- "temperature": 0.0
131
- }
132
- }
133
-
134
- # Call Nova Pro model
135
- response = self.bedrock_client.invoke_model(
136
- modelId=self.model_id,
137
- contentType=self.content_type,
138
- accept=self.accept,
139
- body=json.dumps(payload)
140
- )
141
-
142
- # Parse response
143
- response_body = json.loads(response['body'].read())
144
- answer = response_body['output']['message']['content'][0]['text']
145
-
146
- # Clean up the answer
147
- answer = answer.strip()
148
-
149
- # Remove verbose beginnings
150
- verbose_starts = [
151
- "To answer this question",
152
- "Based on the information",
153
- "According to",
154
- "The answer is",
155
- "Looking at"
156
- ]
157
-
158
- for start in verbose_starts:
159
- if answer.lower().startswith(start.lower()):
160
- sentences = answer.split('. ')
161
- for sentence in sentences[1:]:
162
- if len(sentence.strip()) > 10:
163
- answer = sentence.strip()
164
- break
165
-
166
- # Limit length
167
- if len(answer) > 200:
168
- sentences = answer.split('. ')
169
- answer = sentences[0] + '.'
170
-
171
- return answer
 
1
+ import pandas as pd
2
+ import openpyxl
3
+ from typing import Dict, List, Any
 
 
 
 
4
 
5
+ class ExcelParser:
 
 
6
  def __init__(self):
7
+ pass
8
+
9
+ def read_excel_file(self, file_path: str, sheet_name: str = None) -> pd.DataFrame:
10
+ """Read Excel file and return DataFrame"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  try:
12
+ if sheet_name:
13
+ df = pd.read_excel(file_path, sheet_name=sheet_name)
14
+ else:
15
+ df = pd.read_excel(file_path)
16
+ return df
 
 
 
 
 
 
17
  except Exception as e:
18
+ print(f"Error reading Excel file: {e}")
19
+ return None
20
 
21
+ def get_sheet_names(self, file_path: str) -> List[str]:
22
+ """Get all sheet names from Excel file"""
 
 
 
 
 
 
 
23
  try:
24
+ wb = openpyxl.load_workbook(file_path)
25
+ return wb.sheetnames
 
 
 
 
 
 
 
 
 
26
  except Exception as e:
27
+ print(f"Error getting sheet names: {e}")
28
+ return []
29
 
30
+ def analyze_sales_data(self, file_path: str) -> Dict[str, Any]:
31
+ """Analyze sales data from Excel file"""
32
+ df = self.read_excel_file(file_path)
33
+ if df is None:
34
+ return {}
35
+
36
+ results = {}
37
+
38
+ # Look for common column patterns
39
+ food_keywords = ['food', 'burger', 'sandwich', 'fries', 'pizza', 'chicken']
40
+ drink_keywords = ['drink', 'soda', 'coffee', 'juice', 'water', 'tea']
41
+
42
+ # Try to identify food vs drink items
43
+ if 'category' in df.columns.str.lower():
44
+ category_col = [col for col in df.columns if 'category' in col.lower()][0]
45
+ food_items = df[~df[category_col].str.lower().str.contains('|'.join(drink_keywords), na=False)]
46
+ else:
47
+ # Try to identify by item name
48
+ item_col = [col for col in df.columns if any(word in col.lower() for word in ['item', 'product', 'name'])][0]
49
+ food_items = df[~df[item_col].str.lower().str.contains('|'.join(drink_keywords), na=False)]
50
+
51
+ # Find sales/price column
52
+ sales_cols = [col for col in df.columns if any(word in col.lower() for word in ['sales', 'price', 'total', 'amount'])]
53
+
54
+ if sales_cols:
55
+ sales_col = sales_cols[0]
56
+ total_food_sales = food_items[sales_col].sum()
57
+ results['total_food_sales'] = f"${total_food_sales:,.2f}"
58
+
59
+ return results
60
+
61
+ def calculate_totals(self, df: pd.DataFrame, column: str) -> float:
62
+ """Calculate total for a specific column"""
63
  try:
64
+ return df[column].sum()
 
 
 
 
 
 
65
  except Exception as e:
66
+ print(f"Error calculating totals: {e}")
67
+ return 0.0
68
 
69
+ def filter_data(self, df: pd.DataFrame, filters: Dict[str, Any]) -> pd.DataFrame:
70
+ """Filter DataFrame based on criteria"""
71
+ filtered_df = df.copy()
72
+
73
+ for column, value in filters.items():
74
+ if column in filtered_df.columns:
75
+ if isinstance(value, list):
76
+ filtered_df = filtered_df[filtered_df[column].isin(value)]
77
+ else:
78
+ filtered_df = filtered_df[filtered_df[column] == value]
79
+
80
+ return filtered_df