File size: 7,587 Bytes
2deab8c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
"""Utility functions shared across renderers.

Mirrors PHP helpers: h(), formatMoneyFigures(), handbook_anchor(), etc.
"""

from __future__ import annotations

import html
import re


def h(s: str) -> str:
    """HTML-escape (mirrors PHP h())."""
    return html.escape(str(s), quote=True)


def is_assoc(a: list | dict) -> bool:
    """Check if an array is associative (dict-like) vs sequential list."""
    return isinstance(a, dict)


def hb_slug(s: str) -> str:
    """Slug helper for anchors."""
    tmp = s.lower().strip()
    tmp = re.sub(r"[^a-z0-9]+", "_", tmp, flags=re.IGNORECASE)
    tmp = re.sub(r"_+", "_", tmp)
    return tmp.strip("_")


def handbook_anchor(prefix: str, text: str, idx: int) -> str:
    """Normalise a string into a safe anchor id. Mirrors PHP handbook_anchor."""
    base = text.lower().strip()
    base = re.sub(r"[^a-z0-9]+", "-", base, flags=re.IGNORECASE)
    base = base.strip("-")
    if not base:
        base = f"{prefix}-{idx}"
    return f"{prefix}-{base}-{idx}"


def is_truthy(val) -> bool:
    """Mirrors PHP handbook_true."""
    if isinstance(val, bool):
        return val
    if isinstance(val, int):
        return val != 0
    v = str(val).lower().strip()
    return v not in ("0", "false", "")


def format_money_figures(text: str) -> str:
    """Normalize all monetary figures to "USD X,XXX" format.

    - Converts existing $X,XXX → USD X,XXX
    - Normalizes bare large numbers (1,000+) → USD X,XXX
    - Formats with commas
    - Currency type is always USD (no $ symbol)
    """
    if not text:
        return text

    # Step 1: Convert "$X" → "USD X" directly (preserves ALL dollar amounts)
    def _dollar_to_usd(m: re.Match) -> str:
        num_str = m.group(1).replace(",", "")
        try:
            num = float(num_str)
        except ValueError:
            return m.group(0)
        if "." in m.group(1):
            dec_part = m.group(1).split(".")[-1]
            formatted = f"{num:,.{len(dec_part)}f}"
        elif num == int(num):
            formatted = f"{int(num):,}"
        else:
            formatted = f"{num:,.2f}"
        return "USD " + formatted

    text = re.sub(r'\$([\d,]+(?:\.\d+)?)', _dollar_to_usd, text)

    # Step 2: Normalize existing "USD X,XXX" for consistent comma formatting
    def _normalize_usd(m: re.Match) -> str:
        num_str = m.group(1).replace(",", "")
        try:
            num = float(num_str)
        except ValueError:
            return m.group(0)
        if "." in m.group(1):
            dec_part = m.group(1).split(".")[-1]
            formatted = f"{num:,.{len(dec_part)}f}"
        elif num == int(num):
            formatted = f"{int(num):,}"
        else:
            formatted = f"{num:,.2f}"
        return "USD " + formatted

    text = re.sub(r'\bUSD\s+([\d,]+(?:\.\d+)?)', _normalize_usd, text, flags=re.IGNORECASE)

    # Step 3: Add "USD " to bare large numbers (4+ digits or comma-formatted)
    # that aren't already preceded by "USD "
    def _format_bare_large(m: re.Match) -> str:
        num_str = m.group(1).replace(",", "")
        dec = m.group(2) if m.group(2) else ""
        try:
            num = float(num_str)
        except ValueError:
            return m.group(0)
        if dec:
            formatted = f"{num:,.{len(dec)}f}"
        else:
            formatted = f"{num:,.0f}"
        return "USD " + formatted

    text = re.sub(
        r"(?<!\d)(?<!USD )((?:\d{1,3}(?:,\d{3})+)|(?:\d{4,}))(?:\.(\d+))?(?![%\d/])",
        _format_bare_large,
        text,
    )

    return text


def ensure_program_options_pair(text: str) -> str:
    """Ensure REGULAR/PRIME program options appear together when either appears.

    If only one of the two appears in text, append "(REGULAR and PRIME)"
    to preserve source meaning while enforcing consistency.
    """
    if not text:
        return text

    has_regular = bool(re.search(r"\bREGULAR\b", text, flags=re.IGNORECASE))
    has_prime = bool(re.search(r"\bPRIME\b", text, flags=re.IGNORECASE))

    if has_regular ^ has_prime:
        if re.search(r"\(\s*REGULAR\s+and\s+PRIME\s*\)", text, flags=re.IGNORECASE):
            return text
        return text.rstrip() + " (REGULAR and PRIME)"

    return text


def sort_sections_stable(sections: list[dict]) -> list[dict]:
    """Stable sort: sort_order ASC, then id ASC, then insertion order."""
    for i, s in enumerate(sections):
        s.setdefault("_i", i)

    def sort_key(s: dict):
        so = s.get("sort_order")
        sid = s.get("id")
        so_key = (0, so) if so is not None else (1, 0)
        sid_key = (0, sid) if sid is not None else (1, 0)
        return (so_key, sid_key, s.get("_i", 0))

    sections.sort(key=sort_key)
    for s in sections:
        s.pop("_i", None)
    return sections


def get_any(d: dict, keys: list[str]) -> str:
    """Return the first non-empty string value found for one of the keys."""
    for k in keys:
        v = d.get(k)
        if v is None or isinstance(v, (dict, list)):
            continue
        t = str(v).strip()
        if t:
            return t
    return ""


def emphasize_keywords(text: str) -> str:
    """Add bold HTML emphasis to key handbook terms in already-escaped text.

    Bolds: REGULAR, PRIME, dollar amounts ($X,XXX), and other critical terms.
    Input must already be HTML-escaped. Returns HTML with <strong> tags.
    """
    if not text:
        return text

    escaped = h(text)

    # Bold REGULAR and PRIME (case-insensitive, whole word)
    escaped = re.sub(
        r'\b(REGULAR|PRIME)\b',
        r'<strong>\1</strong>',
        escaped,
        flags=re.IGNORECASE,
    )

    # Bold USD amounts like USD 1,000 or USD 500
    escaped = re.sub(
        r'\b(USD\s+[\d,]+(?:\.\d+)?)',
        r'<strong>\1</strong>',
        escaped,
        flags=re.IGNORECASE,
    )

    # Bold standalone USD
    escaped = re.sub(
        r'\b(USD)\b(?!\s*[\d,])',
        r'<strong>\1</strong>',
        escaped,
        flags=re.IGNORECASE,
    )

    # Bold dollar-sign amounts like $20, $1,000, $1,000.00
    escaped = re.sub(
        r'(\$[\d,]+(?:\.\d+)?)',
        r'<strong>\1</strong>',
        escaped,
    )

    # Bold specific GPA values 2.8, 3.4 and 4.0
    escaped = re.sub(
        r'\b(2\.8|3\.4|4\.0)\b',
        r'<strong>\1</strong>',
        escaped,
    )

    # Bold key qualification and geo terms.
    escaped = re.sub(
        r'\b(GPA\s*\(\s*Undergraduate\s+Requirement\s*\)|GPA|High\s+School\s+grades|Global|Uganda|Kenya)\b',
        r'<strong>\1</strong>',
        escaped,
        flags=re.IGNORECASE,
    )

    # Bold refund policy phrase.
    escaped = re.sub(
        r'\b(Refund\s+Policy)\b',
        r'<strong>\1</strong>',
        escaped,
        flags=re.IGNORECASE,
    )

    return escaped


def linkify_urls(text: str) -> str:
    """Convert URLs in text to clickable <a> tags with target="_blank".
    
    Detects http/https URLs and converts them to proper anchor tags.
    Input should be plain text or already HTML-escaped.
    Returns HTML with <a> tags.
    """
    if not text:
        return text
    
    # Detect and convert http/https URLs to clickable links
    # Pattern: http:// or https:// followed by domain and optional path
    url_pattern = r'(https?://[^\s<)]+)'
    
    def make_link(match):
        url = match.group(1)
        # Clean up trailing punctuation that's likely not part of URL
        url = url.rstrip('.,;:!?)\'\"')
        return f'<a href="{h(url)}" target="_blank" rel="noopener noreferrer">{h(url)}</a>'
    
    return re.sub(url_pattern, make_link, text)