Spaces:
Running
Running
| """ | |
| core/waste_parser.py | |
| ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| SPJIMR Waste Analytics β Excel Parser | |
| Reads the daily block-wise waste log (Book2.xlsx format) and returns | |
| clean DataFrames for the analytics and gamification pages. | |
| Data format in Excel: | |
| Row with datetime β date header (every 6 rows) | |
| Row 'Waste type' β column headers | |
| Row 'Wet waste' β per-block wet waste kg | |
| Row 'Dry waste' β per-block dry waste kg | |
| Row 'TOTAL (Kg)' β skip (recalculated here) | |
| Blank row β separator | |
| """ | |
| from __future__ import annotations | |
| import io | |
| import logging | |
| from datetime import datetime, date | |
| from pathlib import Path | |
| from typing import Union | |
| import pandas as pd | |
| logger = logging.getLogger(__name__) | |
| # Canonical block/location names as they appear in the spreadsheet | |
| LOCATIONS = [ | |
| "A&B Block", | |
| "C&D Block", | |
| "L H Hostel", | |
| "Hostel no -25", | |
| "Hostel no -26", | |
| "Hostel no -27", | |
| "Hostel no -28", | |
| "Hostel no -29", | |
| "Hostel no -30", | |
| "cantean", | |
| "MESS", | |
| ] | |
| # Friendly display names | |
| LOCATION_LABELS = { | |
| "A&B Block": "A & B Block", | |
| "C&D Block": "C & D Block", | |
| "L H Hostel": "LH Hostel", | |
| "Hostel no -25": "Hostel 25", | |
| "Hostel no -26": "Hostel 26", | |
| "Hostel no -27": "Hostel 27", | |
| "Hostel no -28": "Hostel 28", | |
| "Hostel no -29": "Hostel 29", | |
| "Hostel no -30": "Hostel 30", | |
| "cantean": "Canteen", | |
| "MESS": "Mess", | |
| } | |
| # Category grouping for gamification scoring | |
| LOCATION_GROUPS = { | |
| "Academic": ["A&B Block", "C&D Block"], | |
| "Hostels": ["L H Hostel", "Hostel no -25", "Hostel no -26", | |
| "Hostel no -27", "Hostel no -28", "Hostel no -29", "Hostel no -30"], | |
| "Dining": ["cantean", "MESS"], | |
| } | |
| def _safe_float(val) -> float: | |
| """Return float or 0.0 for None / formula strings.""" | |
| if val is None: | |
| return 0.0 | |
| if isinstance(val, str) and val.startswith("="): | |
| return 0.0 | |
| try: | |
| return float(val) | |
| except (TypeError, ValueError): | |
| return 0.0 | |
| def parse_waste_excel(source: Union[str, Path, bytes, io.BytesIO]) -> pd.DataFrame: | |
| """ | |
| Parse the waste-log Excel file into a tidy long-form DataFrame. | |
| Parameters | |
| ---------- | |
| source : file path (str/Path), raw bytes, or BytesIO | |
| Returns | |
| ------- | |
| pd.DataFrame with columns: | |
| date datetime.date | |
| location str (one of LOCATIONS) | |
| label str (friendly display name) | |
| group str (Academic | Hostels | Dining) | |
| wet_kg float | |
| dry_kg float | |
| total_kg float | |
| month str (e.g. "Apr 2025") | |
| week int (ISO week number) | |
| """ | |
| if isinstance(source, (str, Path)): | |
| import openpyxl | |
| wb = openpyxl.load_workbook(str(source), data_only=True) | |
| elif isinstance(source, bytes): | |
| import openpyxl | |
| wb = openpyxl.load_workbook(io.BytesIO(source), data_only=True) | |
| else: | |
| import openpyxl | |
| wb = openpyxl.load_workbook(source, data_only=True) | |
| ws = wb.active | |
| rows = list(ws.iter_rows(values_only=True)) | |
| records: list[dict] = [] | |
| current_date: date | None = None | |
| wet_row: dict | None = None | |
| for raw_row in rows: | |
| cell0 = raw_row[0] | |
| # ββ Date header ββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| if isinstance(cell0, datetime): | |
| current_date = cell0.date() | |
| wet_row = None | |
| continue | |
| if not current_date: | |
| continue | |
| if not isinstance(cell0, str): | |
| continue | |
| tag = cell0.strip().lower() | |
| # ββ Wet waste row ββββββββββββββββββββββββββββββββββββββββββββββββ | |
| if tag == "wet waste": | |
| wet_row = {loc: _safe_float(raw_row[i + 1]) for i, loc in enumerate(LOCATIONS)} | |
| continue | |
| # ββ Dry waste row β complete the record βββββββββββββββββββββββββ | |
| if tag == "dry waste" and wet_row is not None: | |
| dry_row = {loc: _safe_float(raw_row[i + 1]) for i, loc in enumerate(LOCATIONS)} | |
| for loc in LOCATIONS: | |
| w = wet_row.get(loc, 0.0) | |
| d = dry_row.get(loc, 0.0) | |
| records.append( | |
| { | |
| "date": current_date, | |
| "location": loc, | |
| "label": LOCATION_LABELS[loc], | |
| "group": next( | |
| (g for g, locs in LOCATION_GROUPS.items() if loc in locs), | |
| "Other", | |
| ), | |
| "wet_kg": w, | |
| "dry_kg": d, | |
| "total_kg": w + d, | |
| } | |
| ) | |
| wet_row = None | |
| continue | |
| if not records: | |
| logger.warning("waste_parser: no records parsed from file.") | |
| return pd.DataFrame() | |
| df = pd.DataFrame(records) | |
| df["date"] = pd.to_datetime(df["date"]) | |
| df["month"] = df["date"].dt.strftime("%b %Y") | |
| df["week"] = df["date"].dt.isocalendar().week.astype(int) | |
| df["day"] = df["date"].dt.day_name() | |
| logger.info( | |
| "waste_parser: parsed %d records | %d locations | %d days", | |
| len(df), df["location"].nunique(), df["date"].nunique(), | |
| ) | |
| return df | |
| # ββ Aggregation helpers used by both analytics and gamification pages βββββββββ | |
| def monthly_summary(df: pd.DataFrame) -> pd.DataFrame: | |
| """Total wet / dry / total per location per month.""" | |
| return ( | |
| df.groupby(["month", "location", "label", "group"], as_index=False) | |
| .agg(wet_kg=("wet_kg", "sum"), dry_kg=("dry_kg", "sum"), total_kg=("total_kg", "sum")) | |
| .sort_values("total_kg", ascending=False) | |
| ) | |
| def daily_totals(df: pd.DataFrame) -> pd.DataFrame: | |
| """Campus-wide daily totals (all locations summed).""" | |
| return ( | |
| df.groupby("date", as_index=False) | |
| .agg(wet_kg=("wet_kg", "sum"), dry_kg=("dry_kg", "sum"), total_kg=("total_kg", "sum")) | |
| .sort_values("date") | |
| ) | |
| def block_weekly(df: pd.DataFrame) -> pd.DataFrame: | |
| """Weekly totals per location.""" | |
| return ( | |
| df.groupby(["week", "location", "label", "group"], as_index=False) | |
| .agg(wet_kg=("wet_kg", "sum"), dry_kg=("dry_kg", "sum"), total_kg=("total_kg", "sum")) | |
| ) | |
| def wet_dry_ratio(df: pd.DataFrame) -> pd.DataFrame: | |
| """Wet-to-dry ratio per location (monthly).""" | |
| m = monthly_summary(df) | |
| m["wet_pct"] = (m["wet_kg"] / m["total_kg"].replace(0, float("nan")) * 100).round(1) | |
| m["dry_pct"] = (m["dry_kg"] / m["total_kg"].replace(0, float("nan")) * 100).round(1) | |
| return m | |