Spaces:
Sleeping
Sleeping
File size: 4,566 Bytes
ef637fb | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 | from __future__ import annotations
import re
from datetime import UTC, date, datetime, timedelta
from typing import Any
MONTHS = (
"January",
"February",
"March",
"April",
"May",
"June",
"July",
"August",
"September",
"October",
"November",
"December",
)
MONTH_BY_NAME = {name: index + 1 for index, name in enumerate(MONTHS)}
def unknown_date(source: str = "") -> dict[str, str]:
return {
"text": "date not listed",
"sort_date": "",
"precision": "unknown",
"source": source,
}
def date_info(text: str, sort_date: str, precision: str, source: str) -> dict[str, str]:
return {
"text": text,
"sort_date": sort_date,
"precision": precision,
"source": source,
}
def parse_iso_datetime(value: str, source: str = "server submission") -> dict[str, str] | None:
value = str(value or "").strip()
if not value:
return None
try:
parsed = datetime.fromisoformat(value.replace("Z", "+00:00"))
except ValueError:
return None
if parsed.tzinfo is None:
parsed = parsed.replace(tzinfo=UTC)
parsed = parsed.astimezone(UTC)
return date_info(
parsed.strftime("%b %-d, %Y"),
parsed.date().isoformat(),
"second",
source,
)
def parse_friedman_date_text(value: str, source: str = "Erich Friedman Packing Center") -> dict[str, str] | None:
value = str(value or "").strip()
if not value:
return None
slash = re.fullmatch(r"(\d{1,2})/(\d{1,2})/(\d{2}|\d{4})", value)
if slash:
month = int(slash.group(1))
day = int(slash.group(2))
year = int(slash.group(3))
if year < 100:
year += 2000 if year <= 69 else 1900
try:
parsed = date(year, month, day)
except ValueError:
return None
return date_info(parsed.strftime("%b %-d, %Y"), parsed.isoformat(), "day", source)
month_pattern = "|".join(MONTHS)
match = re.search(rf"\b((?:{month_pattern})\s+\d{{4}}|\d{{4}})\b", value)
if not match:
return None
text = match.group(1)
parts = text.split()
if len(parts) == 2:
month = MONTH_BY_NAME[parts[0]]
year = int(parts[1])
return date_info(text, f"{year:04d}-{month:02d}-01", "month", source)
year = int(parts[0])
return date_info(text, f"{year:04d}-01-01", "year", source)
def date_from_reference_text(reference_text: str, source: str = "Erich Friedman Packing Center") -> dict[str, str]:
reference_text = str(reference_text or "")
month_pattern = "|".join(MONTHS)
match = re.search(rf"\bin\s+((?:{month_pattern})\s+\d{{4}}|\d{{4}})\b", reference_text)
if match:
parsed = parse_friedman_date_text(match.group(1), source)
if parsed:
return parsed
return unknown_date(source)
def date_from_friedman_record(record: dict[str, Any], source: str = "Erich Friedman Packing Center") -> dict[str, str]:
meta = record.get("friedman_reference")
if isinstance(meta, dict):
parsed = parse_friedman_date_text(str(meta.get("date") or ""), source)
if parsed:
return parsed
reference_text = str(meta.get("reference_text") or "")
if reference_text:
return date_from_reference_text(reference_text, source)
parsed = parse_friedman_date_text(str(record.get("friedman_date") or record.get("date") or ""), source)
if parsed:
return parsed
return date_from_reference_text(str(record.get("reference_text") or record.get("friedman_reference_text") or ""), source)
def date_from_submission(record: dict[str, Any]) -> dict[str, str]:
return parse_iso_datetime(str(record.get("submitted_at") or ""), "server submission") or unknown_date("server submission")
def display_date_info(info: dict[str, Any] | None) -> str:
if not isinstance(info, dict):
return "date not listed"
return str(info.get("text") or "date not listed")
def is_recent_date_info(info: dict[str, Any] | None, *, now: date | None = None, days: int = 7) -> bool:
if not isinstance(info, dict):
return False
if info.get("precision") not in {"day", "second"}:
return False
sort_date = str(info.get("sort_date") or "")
if not sort_date:
return False
try:
parsed = date.fromisoformat(sort_date)
except ValueError:
return False
today = now or datetime.now(UTC).date()
return today - timedelta(days=days) <= parsed <= today
|