File size: 4,911 Bytes
51f3e0e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
from __future__ import annotations

import re
from dataclasses import dataclass
from typing import Optional


MONTHS = {
    "january": 1,
    "february": 2,
    "march": 3,
    "april": 4,
    "may": 5,
    "june": 6,
    "july": 7,
    "august": 8,
    "september": 9,
    "sept": 9,
    "october": 10,
    "november": 11,
    "december": 12,
}


HEMISPHERE_MULTIPLIERS = {
    "n": 1,
    "s": -1,
    "e": 1,
    "w": -1,
}


COORD_PATTERN = re.compile(
    r"([-+]?\d+(?:\.\d+)?)\s*(?:°|deg|degrees)?\s*([NnSsEeWw])"
)

LAT_LON_WORD_PATTERN = re.compile(
    r"(?:latitude|lat)\s*[:=]?\s*([-+]?\d+(?:\.\d+)?)|(?:longitude|lon)\s*[:=]?\s*([-+]?\d+(?:\.\d+)?)",
    re.IGNORECASE,
)

DATE_PATTERN = re.compile(
    r"\b("
    + "|".join(MONTHS.keys())
    + r")\s+(\d{1,2})(?:st|nd|rd|th)?(?:,\s*|\s+)(-?\d{1,4})(?:\s*(BCE|BC|CE|AD))?",
    re.IGNORECASE,
)

YEAR_ONLY_PATTERN = re.compile(r"\b(-?\d{1,4})\s*(BCE|BC|CE|AD)?\b", re.IGNORECASE)

HOUR_PATTERN = re.compile(r"\b(\d{1,2})(?::(\d{2}))?\s*(?:hours?|h)\b", re.IGNORECASE)

SEASONAL_HOUR_PATTERN = re.compile(
    r"\b(?:at|around)\s*(\d{1,2})(?::(\d{2}))?\s*(?:am|pm)\b", re.IGNORECASE
)


@dataclass
class ParsedPrompt:
    lat: Optional[float] = None
    lon: Optional[float] = None
    year: Optional[int] = None
    month: Optional[int] = None
    day: Optional[int] = None
    hour: Optional[int] = None
    minute: Optional[int] = None
    confidence: float = 0.0
    residual_text: str = ""


def _apply_hemisphere(value: float, hemisphere: str) -> float:
    multiplier = HEMISPHERE_MULTIPLIERS.get(hemisphere.lower(), 1)
    return value * multiplier


def _parse_coordinates(text: str) -> tuple[Optional[float], Optional[float], float]:
    lat = lon = None
    confidence = 0.0

    matches = COORD_PATTERN.findall(text)
    lat_candidate = lon_candidate = None
    for value_str, hemisphere in matches:
        value = float(value_str)
        hemi = hemisphere.lower()
        adjusted = _apply_hemisphere(value, hemi)
        if hemi in ("n", "s") and lat_candidate is None:
            lat_candidate = adjusted
        elif hemi in ("e", "w") and lon_candidate is None:
            lon_candidate = adjusted

    if lat_candidate is not None and lon_candidate is not None:
        lat, lon = lat_candidate, lon_candidate
        confidence += 0.5

    if lat is None or lon is None:
        word_matches = LAT_LON_WORD_PATTERN.findall(text)
        lat_words = [float(item[0]) for item in word_matches if item[0]]
        lon_words = [float(item[1]) for item in word_matches if item[1]]
        if lat is None and lat_words:
            lat = lat_words[0]
            confidence += 0.2
        if lon is None and lon_words:
            lon = lon_words[0]
            confidence += 0.2

    return lat, lon, min(confidence, 0.6)


def _convert_year(raw_year: str, era: Optional[str]) -> int:
    year = int(raw_year)
    if era:
        era = era.upper()
        if era in ("BCE", "BC"):
            return -abs(year)
    return year


def _parse_date(text: str) -> tuple[Optional[int], Optional[int], Optional[int], float]:
    match = DATE_PATTERN.search(text)
    if match:
        month_name, day_str, year_str, era = match.groups()
        month = MONTHS.get(month_name.lower())
        day = int(day_str)
        year = _convert_year(year_str, era)
        return year, month, day, 0.4

    # Fallback: year-only pattern
    for candidate in YEAR_ONLY_PATTERN.finditer(text):
        year_str, era = candidate.groups()
        year = _convert_year(year_str, era)
        if -5000 <= year <= 3000:  # plausible historical range
            return year, None, None, 0.2
    return None, None, None, 0.0


def _parse_hour(text: str) -> tuple[Optional[int], Optional[int], float]:
    match = HOUR_PATTERN.search(text)
    if match:
        hour = int(match.group(1))
        minute = int(match.group(2)) if match.group(2) else 0
        return hour, minute, 0.2

    match = SEASONAL_HOUR_PATTERN.search(text)
    if match:
        hour = int(match.group(1))
        minute = int(match.group(2)) if match.group(2) else 0
        suffix = match.group(0).lower()
        if "pm" in suffix and hour < 12:
            hour += 12
        if "am" in suffix and hour == 12:
            hour = 0
        return hour, minute, 0.15

    return None, None, 0.0


def parse_prompt_context(prompt: Optional[str]) -> ParsedPrompt:
    if not prompt:
        return ParsedPrompt(residual_text="")

    lat, lon, coord_conf = _parse_coordinates(prompt)
    year, month, day, date_conf = _parse_date(prompt)
    hour, minute, hour_conf = _parse_hour(prompt)

    total_conf = coord_conf + date_conf + hour_conf
    return ParsedPrompt(
        lat=lat,
        lon=lon,
        year=year,
        month=month,
        day=day,
        hour=hour,
        minute=minute,
        confidence=min(total_conf, 1.0),
        residual_text=prompt,
    )