File size: 4,566 Bytes
ef637fb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
from __future__ import annotations

import re
from datetime import UTC, date, datetime, timedelta
from typing import Any


MONTHS = (
    "January",
    "February",
    "March",
    "April",
    "May",
    "June",
    "July",
    "August",
    "September",
    "October",
    "November",
    "December",
)

MONTH_BY_NAME = {name: index + 1 for index, name in enumerate(MONTHS)}


def unknown_date(source: str = "") -> dict[str, str]:
    return {
        "text": "date not listed",
        "sort_date": "",
        "precision": "unknown",
        "source": source,
    }


def date_info(text: str, sort_date: str, precision: str, source: str) -> dict[str, str]:
    return {
        "text": text,
        "sort_date": sort_date,
        "precision": precision,
        "source": source,
    }


def parse_iso_datetime(value: str, source: str = "server submission") -> dict[str, str] | None:
    value = str(value or "").strip()
    if not value:
        return None
    try:
        parsed = datetime.fromisoformat(value.replace("Z", "+00:00"))
    except ValueError:
        return None
    if parsed.tzinfo is None:
        parsed = parsed.replace(tzinfo=UTC)
    parsed = parsed.astimezone(UTC)
    return date_info(
        parsed.strftime("%b %-d, %Y"),
        parsed.date().isoformat(),
        "second",
        source,
    )


def parse_friedman_date_text(value: str, source: str = "Erich Friedman Packing Center") -> dict[str, str] | None:
    value = str(value or "").strip()
    if not value:
        return None

    slash = re.fullmatch(r"(\d{1,2})/(\d{1,2})/(\d{2}|\d{4})", value)
    if slash:
        month = int(slash.group(1))
        day = int(slash.group(2))
        year = int(slash.group(3))
        if year < 100:
            year += 2000 if year <= 69 else 1900
        try:
            parsed = date(year, month, day)
        except ValueError:
            return None
        return date_info(parsed.strftime("%b %-d, %Y"), parsed.isoformat(), "day", source)

    month_pattern = "|".join(MONTHS)
    match = re.search(rf"\b((?:{month_pattern})\s+\d{{4}}|\d{{4}})\b", value)
    if not match:
        return None
    text = match.group(1)
    parts = text.split()
    if len(parts) == 2:
        month = MONTH_BY_NAME[parts[0]]
        year = int(parts[1])
        return date_info(text, f"{year:04d}-{month:02d}-01", "month", source)
    year = int(parts[0])
    return date_info(text, f"{year:04d}-01-01", "year", source)


def date_from_reference_text(reference_text: str, source: str = "Erich Friedman Packing Center") -> dict[str, str]:
    reference_text = str(reference_text or "")
    month_pattern = "|".join(MONTHS)
    match = re.search(rf"\bin\s+((?:{month_pattern})\s+\d{{4}}|\d{{4}})\b", reference_text)
    if match:
        parsed = parse_friedman_date_text(match.group(1), source)
        if parsed:
            return parsed
    return unknown_date(source)


def date_from_friedman_record(record: dict[str, Any], source: str = "Erich Friedman Packing Center") -> dict[str, str]:
    meta = record.get("friedman_reference")
    if isinstance(meta, dict):
        parsed = parse_friedman_date_text(str(meta.get("date") or ""), source)
        if parsed:
            return parsed
        reference_text = str(meta.get("reference_text") or "")
        if reference_text:
            return date_from_reference_text(reference_text, source)
    parsed = parse_friedman_date_text(str(record.get("friedman_date") or record.get("date") or ""), source)
    if parsed:
        return parsed
    return date_from_reference_text(str(record.get("reference_text") or record.get("friedman_reference_text") or ""), source)


def date_from_submission(record: dict[str, Any]) -> dict[str, str]:
    return parse_iso_datetime(str(record.get("submitted_at") or ""), "server submission") or unknown_date("server submission")


def display_date_info(info: dict[str, Any] | None) -> str:
    if not isinstance(info, dict):
        return "date not listed"
    return str(info.get("text") or "date not listed")


def is_recent_date_info(info: dict[str, Any] | None, *, now: date | None = None, days: int = 7) -> bool:
    if not isinstance(info, dict):
        return False
    if info.get("precision") not in {"day", "second"}:
        return False
    sort_date = str(info.get("sort_date") or "")
    if not sort_date:
        return False
    try:
        parsed = date.fromisoformat(sort_date)
    except ValueError:
        return False
    today = now or datetime.now(UTC).date()
    return today - timedelta(days=days) <= parsed <= today