Kackle commited on
Commit
5154eab
·
verified ·
1 Parent(s): 29e530e

created excel parser

Browse files
Files changed (1) hide show
  1. excel_parser +80 -0
excel_parser ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import openpyxl
3
+ from typing import Dict, List, Any
4
+
5
+ class ExcelParser:
6
+ def __init__(self):
7
+ pass
8
+
9
+ def read_excel_file(self, file_path: str, sheet_name: str = None) -> pd.DataFrame:
10
+ """Read Excel file and return DataFrame"""
11
+ try:
12
+ if sheet_name:
13
+ df = pd.read_excel(file_path, sheet_name=sheet_name)
14
+ else:
15
+ df = pd.read_excel(file_path)
16
+ return df
17
+ except Exception as e:
18
+ print(f"Error reading Excel file: {e}")
19
+ return None
20
+
21
+ def get_sheet_names(self, file_path: str) -> List[str]:
22
+ """Get all sheet names from Excel file"""
23
+ try:
24
+ wb = openpyxl.load_workbook(file_path)
25
+ return wb.sheetnames
26
+ except Exception as e:
27
+ print(f"Error getting sheet names: {e}")
28
+ return []
29
+
30
+ def analyze_sales_data(self, file_path: str) -> Dict[str, Any]:
31
+ """Analyze sales data from Excel file"""
32
+ df = self.read_excel_file(file_path)
33
+ if df is None:
34
+ return {}
35
+
36
+ results = {}
37
+
38
+ # Look for common column patterns
39
+ food_keywords = ['food', 'burger', 'sandwich', 'fries', 'pizza', 'chicken']
40
+ drink_keywords = ['drink', 'soda', 'coffee', 'juice', 'water', 'tea']
41
+
42
+ # Try to identify food vs drink items
43
+ if 'category' in df.columns.str.lower():
44
+ category_col = [col for col in df.columns if 'category' in col.lower()][0]
45
+ food_items = df[~df[category_col].str.lower().str.contains('|'.join(drink_keywords), na=False)]
46
+ else:
47
+ # Try to identify by item name
48
+ item_col = [col for col in df.columns if any(word in col.lower() for word in ['item', 'product', 'name'])][0]
49
+ food_items = df[~df[item_col].str.lower().str.contains('|'.join(drink_keywords), na=False)]
50
+
51
+ # Find sales/price column
52
+ sales_cols = [col for col in df.columns if any(word in col.lower() for word in ['sales', 'price', 'total', 'amount'])]
53
+
54
+ if sales_cols:
55
+ sales_col = sales_cols[0]
56
+ total_food_sales = food_items[sales_col].sum()
57
+ results['total_food_sales'] = f"${total_food_sales:,.2f}"
58
+
59
+ return results
60
+
61
+ def calculate_totals(self, df: pd.DataFrame, column: str) -> float:
62
+ """Calculate total for a specific column"""
63
+ try:
64
+ return df[column].sum()
65
+ except Exception as e:
66
+ print(f"Error calculating totals: {e}")
67
+ return 0.0
68
+
69
+ def filter_data(self, df: pd.DataFrame, filters: Dict[str, Any]) -> pd.DataFrame:
70
+ """Filter DataFrame based on criteria"""
71
+ filtered_df = df.copy()
72
+
73
+ for column, value in filters.items():
74
+ if column in filtered_df.columns:
75
+ if isinstance(value, list):
76
+ filtered_df = filtered_df[filtered_df[column].isin(value)]
77
+ else:
78
+ filtered_df = filtered_df[filtered_df[column] == value]
79
+
80
+ return filtered_df