Spaces:

ahanbose
/

WISE_Energy

Running

App Files Files Community

WISE_Energy / src /core /waste_parser.py

ahanbose

Upload 3 files

5a77372 verified 10 days ago

raw

history blame contribute delete

7.11 kB

	"""
	core/waste_parser.py
	────────────────────────────────────────────────────────────────────
	SPJIMR Waste Analytics — Excel Parser
	Reads the daily block-wise waste log (Book2.xlsx format) and returns
	clean DataFrames for the analytics and gamification pages.

	Data format in Excel:
	Row with datetime → date header (every 6 rows)
	Row 'Waste type' → column headers
	Row 'Wet waste' → per-block wet waste kg
	Row 'Dry waste' → per-block dry waste kg
	Row 'TOTAL (Kg)' → skip (recalculated here)
	Blank row → separator
	"""

	from __future__ import annotations
	import io
	import logging
	from datetime import datetime, date
	from pathlib import Path
	from typing import Union

	import pandas as pd

	logger = logging.getLogger(__name__)

	# Canonical block/location names as they appear in the spreadsheet
	LOCATIONS = [
	"A&B Block",
	"C&D Block",
	"L H Hostel",
	"Hostel no -25",
	"Hostel no -26",
	"Hostel no -27",
	"Hostel no -28",
	"Hostel no -29",
	"Hostel no -30",
	"cantean",
	"MESS",
	]

	# Friendly display names
	LOCATION_LABELS = {
	"A&B Block": "A & B Block",
	"C&D Block": "C & D Block",
	"L H Hostel": "LH Hostel",
	"Hostel no -25": "Hostel 25",
	"Hostel no -26": "Hostel 26",
	"Hostel no -27": "Hostel 27",
	"Hostel no -28": "Hostel 28",
	"Hostel no -29": "Hostel 29",
	"Hostel no -30": "Hostel 30",
	"cantean": "Canteen",
	"MESS": "Mess",
	}

	# Category grouping for gamification scoring
	LOCATION_GROUPS = {
	"Academic": ["A&B Block", "C&D Block"],
	"Hostels": ["L H Hostel", "Hostel no -25", "Hostel no -26",
	"Hostel no -27", "Hostel no -28", "Hostel no -29", "Hostel no -30"],
	"Dining": ["cantean", "MESS"],
	}


	def _safe_float(val) -> float:
	"""Return float or 0.0 for None / formula strings."""
	if val is None:
	return 0.0
	if isinstance(val, str) and val.startswith("="):
	return 0.0
	try:
	return float(val)
	except (TypeError, ValueError):
	return 0.0


	def parse_waste_excel(source: Union[str, Path, bytes, io.BytesIO]) -> pd.DataFrame:
	"""
	Parse the waste-log Excel file into a tidy long-form DataFrame.

	Parameters
	----------
	source : file path (str/Path), raw bytes, or BytesIO

	Returns
	-------
	pd.DataFrame with columns:
	date datetime.date
	location str (one of LOCATIONS)
	label str (friendly display name)
	group str (Academic \| Hostels \| Dining)
	wet_kg float
	dry_kg float
	total_kg float
	month str (e.g. "Apr 2025")
	week int (ISO week number)
	"""
	if isinstance(source, (str, Path)):
	import openpyxl
	wb = openpyxl.load_workbook(str(source), data_only=True)
	elif isinstance(source, bytes):
	import openpyxl
	wb = openpyxl.load_workbook(io.BytesIO(source), data_only=True)
	else:
	import openpyxl
	wb = openpyxl.load_workbook(source, data_only=True)

	ws = wb.active
	rows = list(ws.iter_rows(values_only=True))

	records: list[dict] = []
	current_date: date \| None = None
	wet_row: dict \| None = None

	for raw_row in rows:
	cell0 = raw_row[0]

	# ── Date header ──────────────────────────────────────────────────
	if isinstance(cell0, datetime):
	current_date = cell0.date()
	wet_row = None
	continue

	if not current_date:
	continue

	if not isinstance(cell0, str):
	continue

	tag = cell0.strip().lower()

	# ── Wet waste row ────────────────────────────────────────────────
	if tag == "wet waste":
	wet_row = {loc: _safe_float(raw_row[i + 1]) for i, loc in enumerate(LOCATIONS)}
	continue

	# ── Dry waste row — complete the record ─────────────────────────
	if tag == "dry waste" and wet_row is not None:
	dry_row = {loc: _safe_float(raw_row[i + 1]) for i, loc in enumerate(LOCATIONS)}

	for loc in LOCATIONS:
	w = wet_row.get(loc, 0.0)
	d = dry_row.get(loc, 0.0)
	records.append(
	{
	"date": current_date,
	"location": loc,
	"label": LOCATION_LABELS[loc],
	"group": next(
	(g for g, locs in LOCATION_GROUPS.items() if loc in locs),
	"Other",
	),
	"wet_kg": w,
	"dry_kg": d,
	"total_kg": w + d,
	}
	)
	wet_row = None
	continue

	if not records:
	logger.warning("waste_parser: no records parsed from file.")
	return pd.DataFrame()

	df = pd.DataFrame(records)
	df["date"] = pd.to_datetime(df["date"])
	df["month"] = df["date"].dt.strftime("%b %Y")
	df["week"] = df["date"].dt.isocalendar().week.astype(int)
	df["day"] = df["date"].dt.day_name()

	logger.info(
	"waste_parser: parsed %d records \| %d locations \| %d days",
	len(df), df["location"].nunique(), df["date"].nunique(),
	)
	return df


	# ── Aggregation helpers used by both analytics and gamification pages ─────────

	def monthly_summary(df: pd.DataFrame) -> pd.DataFrame:
	"""Total wet / dry / total per location per month."""
	return (
	df.groupby(["month", "location", "label", "group"], as_index=False)
	.agg(wet_kg=("wet_kg", "sum"), dry_kg=("dry_kg", "sum"), total_kg=("total_kg", "sum"))
	.sort_values("total_kg", ascending=False)
	)


	def daily_totals(df: pd.DataFrame) -> pd.DataFrame:
	"""Campus-wide daily totals (all locations summed)."""
	return (
	df.groupby("date", as_index=False)
	.agg(wet_kg=("wet_kg", "sum"), dry_kg=("dry_kg", "sum"), total_kg=("total_kg", "sum"))
	.sort_values("date")
	)


	def block_weekly(df: pd.DataFrame) -> pd.DataFrame:
	"""Weekly totals per location."""
	return (
	df.groupby(["week", "location", "label", "group"], as_index=False)
	.agg(wet_kg=("wet_kg", "sum"), dry_kg=("dry_kg", "sum"), total_kg=("total_kg", "sum"))
	)


	def wet_dry_ratio(df: pd.DataFrame) -> pd.DataFrame:
	"""Wet-to-dry ratio per location (monthly)."""
	m = monthly_summary(df)
	m["wet_pct"] = (m["wet_kg"] / m["total_kg"].replace(0, float("nan")) * 100).round(1)
	m["dry_pct"] = (m["dry_kg"] / m["total_kg"].replace(0, float("nan")) * 100).round(1)
	return m