Final_Assignment_Template / excel_parser.py
Kackle's picture
mistake in file replace
a3381cd verified
import pandas as pd
import openpyxl
from typing import Dict, List, Any
class ExcelParser:
def __init__(self):
pass
def read_excel_file(self, file_path: str, sheet_name: str = None) -> pd.DataFrame:
"""Read Excel file and return DataFrame"""
try:
if sheet_name:
df = pd.read_excel(file_path, sheet_name=sheet_name)
else:
df = pd.read_excel(file_path)
return df
except Exception as e:
print(f"Error reading Excel file: {e}")
return None
def get_sheet_names(self, file_path: str) -> List[str]:
"""Get all sheet names from Excel file"""
try:
wb = openpyxl.load_workbook(file_path)
return wb.sheetnames
except Exception as e:
print(f"Error getting sheet names: {e}")
return []
def analyze_sales_data(self, file_path: str) -> Dict[str, Any]:
"""Analyze sales data from Excel file"""
df = self.read_excel_file(file_path)
if df is None:
return {}
results = {}
# Look for common column patterns
food_keywords = ['food', 'burger', 'sandwich', 'fries', 'pizza', 'chicken']
drink_keywords = ['drink', 'soda', 'coffee', 'juice', 'water', 'tea']
# Try to identify food vs drink items
if 'category' in df.columns.str.lower():
category_col = [col for col in df.columns if 'category' in col.lower()][0]
food_items = df[~df[category_col].str.lower().str.contains('|'.join(drink_keywords), na=False)]
else:
# Try to identify by item name
item_col = [col for col in df.columns if any(word in col.lower() for word in ['item', 'product', 'name'])][0]
food_items = df[~df[item_col].str.lower().str.contains('|'.join(drink_keywords), na=False)]
# Find sales/price column
sales_cols = [col for col in df.columns if any(word in col.lower() for word in ['sales', 'price', 'total', 'amount'])]
if sales_cols:
sales_col = sales_cols[0]
total_food_sales = food_items[sales_col].sum()
results['total_food_sales'] = f"${total_food_sales:,.2f}"
return results
def calculate_totals(self, df: pd.DataFrame, column: str) -> float:
"""Calculate total for a specific column"""
try:
return df[column].sum()
except Exception as e:
print(f"Error calculating totals: {e}")
return 0.0
def filter_data(self, df: pd.DataFrame, filters: Dict[str, Any]) -> pd.DataFrame:
"""Filter DataFrame based on criteria"""
filtered_df = df.copy()
for column, value in filters.items():
if column in filtered_df.columns:
if isinstance(value, list):
filtered_df = filtered_df[filtered_df[column].isin(value)]
else:
filtered_df = filtered_df[filtered_df[column] == value]
return filtered_df