Spaces:
Sleeping
Sleeping
created excel parser
Browse files- excel_parser +80 -0
excel_parser
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import openpyxl
|
| 3 |
+
from typing import Dict, List, Any
|
| 4 |
+
|
| 5 |
+
class ExcelParser:
|
| 6 |
+
def __init__(self):
|
| 7 |
+
pass
|
| 8 |
+
|
| 9 |
+
def read_excel_file(self, file_path: str, sheet_name: str = None) -> pd.DataFrame:
|
| 10 |
+
"""Read Excel file and return DataFrame"""
|
| 11 |
+
try:
|
| 12 |
+
if sheet_name:
|
| 13 |
+
df = pd.read_excel(file_path, sheet_name=sheet_name)
|
| 14 |
+
else:
|
| 15 |
+
df = pd.read_excel(file_path)
|
| 16 |
+
return df
|
| 17 |
+
except Exception as e:
|
| 18 |
+
print(f"Error reading Excel file: {e}")
|
| 19 |
+
return None
|
| 20 |
+
|
| 21 |
+
def get_sheet_names(self, file_path: str) -> List[str]:
|
| 22 |
+
"""Get all sheet names from Excel file"""
|
| 23 |
+
try:
|
| 24 |
+
wb = openpyxl.load_workbook(file_path)
|
| 25 |
+
return wb.sheetnames
|
| 26 |
+
except Exception as e:
|
| 27 |
+
print(f"Error getting sheet names: {e}")
|
| 28 |
+
return []
|
| 29 |
+
|
| 30 |
+
def analyze_sales_data(self, file_path: str) -> Dict[str, Any]:
|
| 31 |
+
"""Analyze sales data from Excel file"""
|
| 32 |
+
df = self.read_excel_file(file_path)
|
| 33 |
+
if df is None:
|
| 34 |
+
return {}
|
| 35 |
+
|
| 36 |
+
results = {}
|
| 37 |
+
|
| 38 |
+
# Look for common column patterns
|
| 39 |
+
food_keywords = ['food', 'burger', 'sandwich', 'fries', 'pizza', 'chicken']
|
| 40 |
+
drink_keywords = ['drink', 'soda', 'coffee', 'juice', 'water', 'tea']
|
| 41 |
+
|
| 42 |
+
# Try to identify food vs drink items
|
| 43 |
+
if 'category' in df.columns.str.lower():
|
| 44 |
+
category_col = [col for col in df.columns if 'category' in col.lower()][0]
|
| 45 |
+
food_items = df[~df[category_col].str.lower().str.contains('|'.join(drink_keywords), na=False)]
|
| 46 |
+
else:
|
| 47 |
+
# Try to identify by item name
|
| 48 |
+
item_col = [col for col in df.columns if any(word in col.lower() for word in ['item', 'product', 'name'])][0]
|
| 49 |
+
food_items = df[~df[item_col].str.lower().str.contains('|'.join(drink_keywords), na=False)]
|
| 50 |
+
|
| 51 |
+
# Find sales/price column
|
| 52 |
+
sales_cols = [col for col in df.columns if any(word in col.lower() for word in ['sales', 'price', 'total', 'amount'])]
|
| 53 |
+
|
| 54 |
+
if sales_cols:
|
| 55 |
+
sales_col = sales_cols[0]
|
| 56 |
+
total_food_sales = food_items[sales_col].sum()
|
| 57 |
+
results['total_food_sales'] = f"${total_food_sales:,.2f}"
|
| 58 |
+
|
| 59 |
+
return results
|
| 60 |
+
|
| 61 |
+
def calculate_totals(self, df: pd.DataFrame, column: str) -> float:
|
| 62 |
+
"""Calculate total for a specific column"""
|
| 63 |
+
try:
|
| 64 |
+
return df[column].sum()
|
| 65 |
+
except Exception as e:
|
| 66 |
+
print(f"Error calculating totals: {e}")
|
| 67 |
+
return 0.0
|
| 68 |
+
|
| 69 |
+
def filter_data(self, df: pd.DataFrame, filters: Dict[str, Any]) -> pd.DataFrame:
|
| 70 |
+
"""Filter DataFrame based on criteria"""
|
| 71 |
+
filtered_df = df.copy()
|
| 72 |
+
|
| 73 |
+
for column, value in filters.items():
|
| 74 |
+
if column in filtered_df.columns:
|
| 75 |
+
if isinstance(value, list):
|
| 76 |
+
filtered_df = filtered_df[filtered_df[column].isin(value)]
|
| 77 |
+
else:
|
| 78 |
+
filtered_df = filtered_df[filtered_df[column] == value]
|
| 79 |
+
|
| 80 |
+
return filtered_df
|