from __future__ import annotations import re from datetime import UTC, date, datetime, timedelta from typing import Any MONTHS = ( "January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December", ) MONTH_BY_NAME = {name: index + 1 for index, name in enumerate(MONTHS)} def unknown_date(source: str = "") -> dict[str, str]: return { "text": "date not listed", "sort_date": "", "precision": "unknown", "source": source, } def date_info(text: str, sort_date: str, precision: str, source: str) -> dict[str, str]: return { "text": text, "sort_date": sort_date, "precision": precision, "source": source, } def parse_iso_datetime(value: str, source: str = "server submission") -> dict[str, str] | None: value = str(value or "").strip() if not value: return None try: parsed = datetime.fromisoformat(value.replace("Z", "+00:00")) except ValueError: return None if parsed.tzinfo is None: parsed = parsed.replace(tzinfo=UTC) parsed = parsed.astimezone(UTC) return date_info( parsed.strftime("%b %-d, %Y"), parsed.date().isoformat(), "second", source, ) def parse_friedman_date_text(value: str, source: str = "Erich Friedman Packing Center") -> dict[str, str] | None: value = str(value or "").strip() if not value: return None slash = re.fullmatch(r"(\d{1,2})/(\d{1,2})/(\d{2}|\d{4})", value) if slash: month = int(slash.group(1)) day = int(slash.group(2)) year = int(slash.group(3)) if year < 100: year += 2000 if year <= 69 else 1900 try: parsed = date(year, month, day) except ValueError: return None return date_info(parsed.strftime("%b %-d, %Y"), parsed.isoformat(), "day", source) month_pattern = "|".join(MONTHS) match = re.search(rf"\b((?:{month_pattern})\s+\d{{4}}|\d{{4}})\b", value) if not match: return None text = match.group(1) parts = text.split() if len(parts) == 2: month = MONTH_BY_NAME[parts[0]] year = int(parts[1]) return date_info(text, f"{year:04d}-{month:02d}-01", "month", source) year = int(parts[0]) return date_info(text, f"{year:04d}-01-01", "year", source) def date_from_reference_text(reference_text: str, source: str = "Erich Friedman Packing Center") -> dict[str, str]: reference_text = str(reference_text or "") month_pattern = "|".join(MONTHS) match = re.search(rf"\bin\s+((?:{month_pattern})\s+\d{{4}}|\d{{4}})\b", reference_text) if match: parsed = parse_friedman_date_text(match.group(1), source) if parsed: return parsed return unknown_date(source) def date_from_friedman_record(record: dict[str, Any], source: str = "Erich Friedman Packing Center") -> dict[str, str]: meta = record.get("friedman_reference") if isinstance(meta, dict): parsed = parse_friedman_date_text(str(meta.get("date") or ""), source) if parsed: return parsed reference_text = str(meta.get("reference_text") or "") if reference_text: return date_from_reference_text(reference_text, source) parsed = parse_friedman_date_text(str(record.get("friedman_date") or record.get("date") or ""), source) if parsed: return parsed return date_from_reference_text(str(record.get("reference_text") or record.get("friedman_reference_text") or ""), source) def date_from_submission(record: dict[str, Any]) -> dict[str, str]: return parse_iso_datetime(str(record.get("submitted_at") or ""), "server submission") or unknown_date("server submission") def display_date_info(info: dict[str, Any] | None) -> str: if not isinstance(info, dict): return "date not listed" return str(info.get("text") or "date not listed") def is_recent_date_info(info: dict[str, Any] | None, *, now: date | None = None, days: int = 7) -> bool: if not isinstance(info, dict): return False if info.get("precision") not in {"day", "second"}: return False sort_date = str(info.get("sort_date") or "") if not sort_date: return False try: parsed = date.fromisoformat(sort_date) except ValueError: return False today = now or datetime.now(UTC).date() return today - timedelta(days=days) <= parsed <= today