gaia_unit4_space / tools /excel_tools.py
hawkdev's picture
initial commit
ac299d5
import re
from pathlib import Path
from typing import Optional
import pandas as pd
def excel_food_sales_total_usd(path: str) -> str:
"""
Sum sales for food items excluding drinks from the GAIA-style fast-food workbook.
Heuristic: classify rows using a Category/Type/Menu column; exclude drink/beverage/soda.
"""
p = Path(path)
if not p.exists():
return f"Error: file not found: {path}"
try:
xl = pd.ExcelFile(path)
except Exception as e:
return f"Error opening Excel: {e}"
total = 0.0
notes: list[str] = []
for sheet in xl.sheet_names:
try:
df = pd.read_excel(xl, sheet_name=sheet)
except Exception as e:
notes.append(f"{sheet}: read error {e}")
continue
if df.empty:
continue
cols_lower = {str(c).lower(): c for c in df.columns}
cat_col = None
for key in (
"category",
"type",
"menu category",
"item type",
"group",
):
if key in cols_lower:
cat_col = cols_lower[key]
break
money_col = None
for c in df.columns:
cl = str(c).lower()
if any(
w in cl
for w in ("sales", "revenue", "total", "amount", "usd", "price")
):
if df[c].dtype == object or pd.api.types.is_numeric_dtype(df[c]):
money_col = c
break
if money_col is None:
for c in df.columns:
if pd.api.types.is_numeric_dtype(df[c]):
money_col = c
break
if money_col is None:
notes.append(f"{sheet}: no numeric sales column found")
continue
for _, row in df.iterrows():
val = row[money_col]
if pd.isna(val):
continue
try:
amount = float(val)
except (TypeError, ValueError):
s = str(val).replace("$", "").replace(",", "").strip()
try:
amount = float(s)
except ValueError:
continue
if cat_col is not None:
raw = row[cat_col]
label = str(raw).lower() if pd.notna(raw) else ""
if any(
d in label
for d in (
"drink",
"beverage",
"soda",
"coffee",
"tea",
"juice",
"water",
"shake",
"smoothie",
)
):
continue
total += amount
if total == 0 and not notes:
return "Error: could not aggregate (no matching rows)."
return f"{total:.2f}"