Spaces:

internationalscholarsprogram
/

handbook-engine

Sleeping

File size: 40,851 Bytes

2deab8c

"""Renderers — mirrors PHP renderers.php.

Contains functions for rendering:
- Table of Contents (TOC)
- Global section blocks (overview, steps, bullets, tables, doc_v1, etc.)
- University section blocks (overview, benefits, programs)
- Remote image fetching as data URIs
"""

from __future__ import annotations

import base64
import logging
import re
from typing import Any

import httpx

from app.services.utils import (
    emphasize_keywords,
    format_money_figures,
    get_any,
    h,
    hb_slug,
    is_assoc,
    is_truthy,
)

logger = logging.getLogger(__name__)


# =========================================
# Image fetching (with in-memory cache + async batch support)
# =========================================

_image_cache: dict[str, str] = {}


def _detect_image_mime(data: bytes, content_type: str) -> str:
    """Detect image MIME type from headers or magic bytes."""
    if "image/" in content_type:
        return content_type.split(";")[0].strip()
    if data[:8].startswith(b"\x89PNG"):
        return "image/png"
    if data[:3] == b"\xff\xd8\xff":
        return "image/jpeg"
    if data[:4] == b"GIF8":
        return "image/gif"
    if data[:4] == b"RIFF" and data[8:12] == b"WEBP":
        return "image/webp"
    return ""


def fetch_image_data_uri(url: str) -> str:
    """Fetch a remote image and return as data:... URI. Mirrors PHP fetchImageDataUri."""
    url = url.strip()
    if not url:
        return ""

    # Check cache first (populated by prefetch_images)
    if url in _image_cache:
        return _image_cache[url]

    try:
        with httpx.Client(verify=False, timeout=12, follow_redirects=True) as client:
            resp = client.get(url)
            if resp.status_code < 200 or resp.status_code >= 300 or not resp.content:
                logger.warning("Image fetch failed for %s status=%d", url, resp.status_code)
                _image_cache[url] = ""
                return ""
            data = resp.content
    except Exception as exc:
        logger.warning("Image fetch error for %s: %s", url, exc)
        _image_cache[url] = ""
        return ""

    mime = _detect_image_mime(data, resp.headers.get("content-type", ""))
    if not mime.startswith("image/"):
        logger.warning("Invalid image mime %s for %s", mime, url)
        _image_cache[url] = ""
        return ""

    b64 = base64.b64encode(data).decode("ascii")
    result = f"data:{mime};base64,{b64}"
    _image_cache[url] = result
    return result


async def prefetch_images(urls: list[str]) -> dict[str, str]:
    """Fetch all images in parallel using async HTTP and populate the cache.

    This is the key optimization: instead of fetching ~30 campus images
    serially (30-60s), we fetch them all concurrently (~3-5s).
    """
    import asyncio

    unique_urls = list({u.strip() for u in urls if u.strip() and u.strip() not in _image_cache})
    if not unique_urls:
        return {u: _image_cache.get(u.strip(), "") for u in urls}

    async def _fetch_one(client: httpx.AsyncClient, url: str) -> tuple[str, str]:
        try:
            resp = await client.get(url)
            if resp.status_code < 200 or resp.status_code >= 300 or not resp.content:
                logger.warning("Prefetch image failed for %s status=%d", url, resp.status_code)
                return url, ""
            mime = _detect_image_mime(resp.content, resp.headers.get("content-type", ""))
            if not mime.startswith("image/"):
                logger.warning("Prefetch invalid mime %s for %s", mime, url)
                return url, ""
            b64 = base64.b64encode(resp.content).decode("ascii")
            return url, f"data:{mime};base64,{b64}"
        except Exception as exc:
            logger.warning("Prefetch image error for %s: %s", url, exc)
            return url, ""

    logger.info("Prefetching %d campus images in parallel...", len(unique_urls))
    async with httpx.AsyncClient(verify=False, timeout=15, follow_redirects=True) as client:
        results = await asyncio.gather(*[_fetch_one(client, u) for u in unique_urls])

    fetched = 0
    for url, data_uri in results:
        _image_cache[url] = data_uri
        if data_uri:
            fetched += 1

    logger.info("Prefetched %d/%d images successfully", fetched, len(unique_urls))
    return {u: _image_cache.get(u.strip(), "") for u in urls}


# =========================================
# Funding extraction
# =========================================

def _extract_university_funding(
    j: dict,
    school_meta: dict | None = None,
) -> tuple[str, list[str]]:
    """Extract funding heading + items from benefits section JSON.

    Priority:
    1. section_json.funding.options
    2. section_json.funding_available
    3. fallback from pth_ref_schools.school_category
    """
    if not isinstance(j, dict):
        j = {}

    heading = "Funding Available"
    items: list[str] = []

    # 1. Preferred normalized shape
    funding = j.get("funding", {})
    if isinstance(funding, dict):
        subheading = str(funding.get("subheading", "")).strip()
        if subheading:
            heading = subheading

        options = funding.get("options", [])
        if isinstance(options, list):
            for opt in options:
                if not isinstance(opt, dict):
                    continue
                name = str(opt.get("name", "")).strip()
                amount = str(opt.get("amount", "")).strip()

                if name and amount:
                    items.append(f"{name} - {amount}")
                elif name:
                    items.append(name)
                elif amount:
                    items.append(amount)

    # 2. Legacy fallback shape
    if not items:
        funding_available = j.get("funding_available", [])
        if isinstance(funding_available, list):
            for item in funding_available:
                text = str(item).strip()
                if text:
                    items.append(text)

    # 3. School-category fallback
    if not items and isinstance(school_meta, dict):
        school_category = str(school_meta.get("school_category", "")).strip().lower()
        status = str(school_meta.get("status", "")).strip().lower()

        if status == "in":
            if school_category == "non_cosigner":
                items = [
                    "ISP Study Loan - $10,000",
                    "Partner 1 (Unsecured Loan) - Up to $50,000 per academic year",
                    "Partner 3 (Credit Option) - Up to $15,000",
                ]
            elif school_category == "cosigner":
                items = [
                    "ISP Study Loan - $10,000",
                    "Partner 2 (A Cosigned Loan) - Full Coverage Support",
                    "Partner 3 (Credit Option) - Up to $15,000",
                ]

    return (heading, items)


# =========================================
# TOC sorting and rendering
# =========================================

def sort_toc(items: list[dict]) -> list[dict]:
    """Mirrors PHP sortHandbookToc — sort by sort_order/sort, stable fallback."""
    for idx, e in enumerate(items):
        e.setdefault("_i", idx)

    def key_fn(e: dict):
        so = e.get("sort_order", e.get("sort"))
        if so is not None:
            try:
                so_num = float(so)
                return (0, so_num, e.get("_i", 0))
            except (ValueError, TypeError):
                pass
        return (1, 0.0, e.get("_i", 0))

    items.sort(key=key_fn)
    for e in items:
        e.pop("_i", None)
    return items


def render_toc(items: list[dict], debug: bool = False, show_pages: bool = True) -> str:
    """Render Table of Contents HTML (DOMPDF-safe).

    Mirrors PHP renderToc().
    """
    sorted_items = sort_toc(items)

    out = '<!-- HANDBOOK_TOC_V2 -->'
    out += '<div class="toc">'
    out += '<div class="toc-heading">Table of Contents</div>'
    out += (
        '<table class="toc-table" width="100%" cellspacing="0" cellpadding="0"'
        ' style="border-collapse:collapse; table-layout:fixed; width:100%;">'
        '<colgroup><col /><col width="50" /><col width="48" /></colgroup>'
    )

    for e in sorted_items:
        if not isinstance(e, dict):
            continue
        title = str(e.get("title", "")).strip()
        target = str(e.get("target", e.get("anchor", ""))).strip()
        if not title:
            continue

        level = max(0, min(3, int(e.get("level", 0))))
        bold = bool(e.get("bold", False))
        upper = bool(e.get("upper", False))
        if level == 0:
            bold = True
            upper = True

        row_class = "toc-row--major" if level == 0 else "toc-row--sub"
        if level >= 2:
            row_class += " toc-row--deep"

        text = title.upper() if upper else title
        title_inner = h(text)
        if target:
            title_inner = f'<a href="{h(target)}">{title_inner}</a>'
        if bold:
            title_inner = f"<strong>{title_inner}</strong>"

        page = str(e.get("page", "")).strip()
        if show_pages and page:
            page_cell = f"<strong>{h(page)}</strong>"
        else:
            page_cell = "&nbsp;"

        indent = ""
        if level == 1:
            indent = "padding-left:16px;"
        elif level >= 2:
            indent = "padding-left:30px;"

        title_style = (
            "vertical-align:bottom; padding:1px 4px 1px 0; font-size:10px; "
            "line-height:1.15; color:#111;"
            + (" font-weight:700;" if bold else " font-weight:400;")
            + (" text-transform:uppercase; letter-spacing:0.1px;" if upper else "")
            + (f" {indent}" if indent else "")
        )

        out += f'<tr class="{h(row_class)}">'
        out += f'<td class="toc-title" style="{title_style}">{title_inner}</td>'
        out += '<td class="toc-dots" style="vertical-align:bottom; border-bottom:1px dotted #777; height:0.85em; padding:0;">&nbsp;</td>'
        out += (
            f'<td class="toc-pagenum" style="vertical-align:bottom; text-align:right; '
            f'padding-left:4px; font-size:10px; font-weight:700; line-height:1.15; '
            f'white-space:nowrap; width:48px; color:#111;">{page_cell}</td>'
        )
        out += "</tr>"

    out += "</table></div>"
    return out


def render_toc_hardcoded(
    items: list[dict],
    debug: bool = False,
    page_start: int = 3,
    page_offset: int = 0,
) -> str:
    """Mirrors PHP renderTocHardcoded — sort, assign sequential pages, render."""
    sorted_items = sort_toc(items)

    seq = max(1, page_start)
    for item in sorted_items:
        p = str(item.get("page", "")).strip()
        if p and p.lstrip("-").isdigit():
            display = int(p) + page_offset
            item["page"] = str(display)
            if display >= seq:
                seq = display + 1
        else:
            item["page"] = str(seq)
            seq += 1

    out = "<!-- HANDBOOK_TOC_HARDCODED -->\n"
    out += '<div class="toc">'
    out += '<p class="toc-heading">Table of Contents</p>'
    out += (
        '<table class="toc-table" style="table-layout:fixed;width:100%;">'
        '<colgroup><col /><col width="50" /><col width="48" /></colgroup>'
    )

    for e in sorted_items:
        if not isinstance(e, dict):
            continue
        title = str(e.get("title", "")).strip()
        target = str(e.get("target", e.get("anchor", ""))).strip()
        if not title:
            continue

        level = max(0, min(3, int(e.get("level", 0))))
        bold = bool(e.get("bold", False))
        upper = bool(e.get("upper", False))
        if level == 0:
            bold = True
            upper = True

        row_class = "toc-row--major" if level == 0 else "toc-row--sub"
        if level >= 2:
            row_class += " toc-row--deep"

        text = title.upper() if upper else title
        title_inner = h(text)
        if target:
            title_inner = f'<a href="{h(target)}">{title_inner}</a>'
        if bold:
            title_inner = f"<strong>{title_inner}</strong>"

        page = str(e.get("page", "")).strip()
        page_html = f"<strong>{h(page)}</strong>" if page else "&nbsp;"

        indent = ""
        if level == 1:
            indent = "padding-left:16px;"
        elif level >= 2:
            indent = "padding-left:30px;"

        title_style = (
            "vertical-align:bottom;padding:1px 4px 1px 0;font-size:10px;"
            "line-height:1.15;color:#111;"
            + ("font-weight:700;" if bold else "font-weight:400;")
            + ("text-transform:uppercase;letter-spacing:0.1px;" if upper else "")
            + indent
        )

        out += f'<tr class="{h(row_class)}">'
        out += f'<td class="toc-title" style="{title_style}">{title_inner}</td>'
        out += '<td class="toc-dots" style="vertical-align:bottom;padding:0;"><span class="toc-dots-inner">&nbsp;</span></td>'
        out += (
            f'<td class="toc-pagenum" style="vertical-align:bottom;text-align:right;'
            f'padding-left:4px;font-size:10px;font-weight:700;line-height:1.15;'
            f'white-space:nowrap;width:48px;color:#111111;">{page_html}</td>'
        )
        out += "</tr>"

    out += "</table></div>"
    return out


# =========================================
# table_v3 / table_v4 cell helpers
# =========================================

# Mapping of style names → inline CSS strings for table_v3/v4 cells
_V3_STYLE_MAP: dict[str, str] = {
    "band_teal": "text-align:center;font-weight:700;color:#fff;background:#199970;",
    "band_navy": "text-align:center;font-weight:700;color:#fff;background:#0263A3;",
    "bold_amounts": "font-weight:600;",
    "green_center_bold": "text-align:center;font-weight:700;color:#199970;",
    "center_bold_multiline": "text-align:center;font-weight:600;vertical-align:middle;",
    "footer_center_bold": "text-align:center;font-weight:700;background:#f5f5f5;",
    "covered_merged": "vertical-align:top;font-size:9pt;line-height:1.5;",
}


def _parse_v3_cell(cell: Any) -> tuple[str, str, str]:
    """Parse a table_v3/v4 cell dict into (attr_str, style_str, html_content)."""
    if not isinstance(cell, dict):
        text = format_money_figures(str(cell)) if cell else ""
        return ("", "", h(text))

    colspan = 1
    rowspan = 1
    text_val = str(cell.get("text", ""))
    cs = cell.get("colspan")
    rs = cell.get("rowspan")
    if cs is not None and str(cs).isdigit():
        colspan = int(cs)
    if rs is not None and str(rs).isdigit():
        rowspan = int(rs)

    attr = ""
    if colspan > 1:
        attr += f' colspan="{colspan}"'
    if rowspan > 1:
        attr += f' rowspan="{rowspan}"'

    style_name = str(cell.get("style", ""))
    inline_css = _V3_STYLE_MAP.get(style_name, "")
    style_str = f' style="{inline_css}"' if inline_css else ""

    # Rich parts within cell (merged cells with multiple text blocks)
    parts = cell.get("parts")
    if isinstance(parts, list) and parts:
        html_parts: list[str] = []
        for p in parts:
            if not isinstance(p, dict):
                continue
            pt = format_money_figures(str(p.get("text", "")))
            if not pt:
                continue
            if p.get("bold"):
                html_parts.append(f"<strong>{h(pt)}</strong>")
            else:
                html_parts.append(h(pt))
        content = "<br><br>".join(html_parts) if html_parts else h(format_money_figures(text_val))
    else:
        content = h(format_money_figures(text_val))

    return (attr, style_str, content)


# =========================================
# Global blocks renderer
# =========================================

def render_global_blocks(
    section_key: str,
    section_title: str,
    json_data: dict | list,
    debug: bool = False,
    *,
    universities: list[dict] | None = None,
) -> str:
    """Render a single global section's content.

    Mirrors PHP renderGlobalBlocks() — handles steps, bullets, tables,
    doc_v1, table_v2, summary_of_universities, etc.
    """
    html_out = ""
    key_norm = section_key.lower().strip()

    if not isinstance(json_data, dict):
        json_data = {}

    layout_norm = str(json_data.get("layout", "")).lower().strip()

    # ── Section title ──
    # Prefer the JSON-level title (display-ready) over the DB section_title
    json_title = str(json_data.get("title", "")).strip() if isinstance(json_data, dict) else ""
    title = json_title or section_title.strip()
    if title and key_norm != "table_of_contents":
        html_out += f'<h2 class="h2">{h(title)}</h2>'
    _title_norm = title.lower()

    # ── Steps ──
    steps = json_data.get("steps")
    if isinstance(steps, list):
        step_num = 0
        for s in steps:
            if not isinstance(s, dict):
                continue
            step_num += 1
            step_title = str(s.get("title", s.get("step_title", ""))).strip()
            body = format_money_figures(str(s.get("body", s.get("description", ""))).strip())

            html_out += '<div class="avoid-break" style="margin:0 0 4px;">'
            if step_title:
                html_out += f'<div class="h3">Step {step_num}: {h(step_title)}</div>'
            if body:
                html_out += f'<p class="p">{emphasize_keywords(body)}</p>'

            links = s.get("links", [])
            if isinstance(links, list) and links:
                html_out += '<ul class="ul">'
                for lnk in links:
                    if not isinstance(lnk, dict):
                        continue
                    label = str(lnk.get("label", "Link")).strip()
                    url = str(lnk.get("url", "")).strip()
                    if not url:
                        continue
                    html_out += f'<li><a href="{h(url)}" target="_blank" rel="noopener noreferrer">{h(label)}</a></li>'
                html_out += "</ul>"

            qr = str(s.get("qr_url", s.get("qr_image", ""))).strip()
            if qr:
                html_out += f'<img src="{h(qr)}" alt="QR" style="width:60px; height:60px; margin:4px 0;" />'

            html_out += "</div>"
        return html_out

    # ── Bullets ──
    has_bullets = isinstance(json_data.get("bullets"), list)
    has_items = isinstance(json_data.get("items"), list)
    if has_bullets or (layout_norm == "bullets_with_note" and has_items):
        lst = json_data.get("items") if has_items else json_data.get("bullets")
        html_out += '<ul class="ul">'
        for b in lst:
            b_str = format_money_figures(str(b).strip())
            if not b_str:
                continue
            html_out += f"<li>{emphasize_keywords(b_str)}</li>"
        html_out += "</ul>"

        note = format_money_figures(str(json_data.get("note", json_data.get("footnote", ""))).strip())
        if note:
            html_out += f'<div class="note">{h(note)}</div>'
        return html_out

    # ── Basic table ──
    cols = json_data.get("columns")
    rows = json_data.get("rows")
    if isinstance(cols, list) and isinstance(rows, list):
        html_out += '<table class="tbl">'
        if cols:
            html_out += "<thead><tr>"
            for c in cols:
                html_out += f"<th>{h(str(c))}</th>"
            html_out += "</tr></thead>"
        html_out += "<tbody>"

        for r in rows:
            if not isinstance(r, (list, dict)):
                continue
            html_out += "<tr>"
            if isinstance(r, dict):
                for col_label in cols:
                    key_guess = re.sub(r"[^a-z0-9]+", "_", str(col_label).lower())
                    cell = r.get(key_guess, "")
                    html_out += f"<td>{h(format_money_figures(str(cell)))}</td>"
            else:
                for cell in r:
                    html_out += f"<td>{h(format_money_figures(str(cell)))}</td>"
            html_out += "</tr>"

        html_out += "</tbody></table>"
        return html_out

    # ── table_v2 ──
    if layout_norm == "table_v2":
        base_cols = json_data.get("base_columns", [])
        groups = json_data.get("header_groups", [])
        rows = json_data.get("rows", [])
        if not isinstance(base_cols, list):
            base_cols = []
        if not isinstance(groups, list):
            groups = []
        if not isinstance(rows, list):
            rows = []

        all_cols: list[dict] = []
        for c in base_cols:
            if isinstance(c, dict):
                all_cols.append({"key": str(c.get("key", "")), "label": str(c.get("label", ""))})
        for g in groups:
            if not isinstance(g, dict):
                continue
            g_cols = g.get("columns", [])
            if not isinstance(g_cols, list):
                g_cols = []
            for c in g_cols:
                if isinstance(c, dict):
                    all_cols.append({"key": str(c.get("key", "")), "label": str(c.get("label", ""))})

        html_out += '<table class="tbl tbl-comparison"><thead>'
        has_group_row = bool(groups)
        if has_group_row:
            html_out += "<tr>"
            for c in base_cols:
                if isinstance(c, dict):
                    html_out += f'<th rowspan="2">{h(str(c.get("label", "")))}</th>'
            for g in groups:
                if not isinstance(g, dict):
                    continue
                g_cols = g.get("columns", [])
                if not isinstance(g_cols, list):
                    g_cols = []
                span = max(1, len(g_cols))
                html_out += f'<th colspan="{span}">{h(str(g.get("label", "")))}</th>'
            html_out += "</tr><tr>"
            for g in groups:
                if not isinstance(g, dict):
                    continue
                g_cols = g.get("columns", [])
                if not isinstance(g_cols, list):
                    g_cols = []
                for c in g_cols:
                    if isinstance(c, dict):
                        html_out += f'<th>{h(str(c.get("label", "")))}</th>'
            html_out += "</tr>"
        else:
            html_out += "<tr>"
            for c in all_cols:
                html_out += f'<th>{h(c.get("label", ""))}</th>'
            html_out += "</tr>"

        html_out += "</thead><tbody>"
        for r in rows:
            if not isinstance(r, dict):
                continue
            html_out += "<tr>"
            for c in all_cols:
                k = c.get("key", "")
                val = r.get(k, "")
                if isinstance(val, dict):
                    val = val.get("text", "")
                html_out += f"<td>{h(format_money_figures(str(val)))}</td>"
            html_out += "</tr>"
        html_out += "</tbody></table>"
        return html_out

    # ── doc_v1 ──
    if layout_norm == "doc_v1" and isinstance(json_data.get("blocks"), list):
        for b in json_data["blocks"]:
            if not isinstance(b, dict):
                continue
            btype = str(b.get("type", ""))

            # Skip heading/subheading blocks that duplicate the section title
            if btype in ("heading", "subheading"):
                block_text = str(b.get("text", "")).strip().lower()
                if block_text == _title_norm:
                    continue

            if btype == "paragraph":
                t = format_money_figures(str(b.get("text", "")))
                if t.strip():
                    html_out += f'<p class="p">{emphasize_keywords(t)}</p>'

            elif btype == "subheading":
                t = format_money_figures(str(b.get("text", "")))
                if t.strip():
                    html_out += f'<h3 class="h3 keep-with-next">{h(t)}</h3>'

            elif btype == "bullets":
                items = b.get("items", [])
                if not isinstance(items, list):
                    items = []
                html_out += '<ul class="ul">'
                for it in items:
                    it_str = format_money_figures(str(it).strip())
                    if it_str:
                        html_out += f"<li>{emphasize_keywords(it_str)}</li>"
                html_out += "</ul>"

            elif btype == "numbered_list":
                items = b.get("items", [])
                if not isinstance(items, list):
                    items = []
                html_out += '<ol class="ol">'
                for it in items:
                    it_str = format_money_figures(str(it).strip())
                    if it_str:
                        html_out += f"<li>{emphasize_keywords(it_str)}</li>"
                html_out += "</ol>"

            elif btype == "note":
                t = format_money_figures(str(b.get("text", "")))
                if t.strip():
                    html_out += f'<div class="note">{h(t)}</div>'

            elif btype == "note_inline":
                parts = b.get("parts", [])
                if not isinstance(parts, list):
                    parts = []
                txt = ""
                for p in parts:
                    if not isinstance(p, dict):
                        continue
                    t = format_money_figures(str(p.get("text", "")))
                    if not t:
                        continue
                    style = str(p.get("style", ""))
                    if style == "red_bold":
                        txt += f"<strong>{h(t)}</strong>"
                    else:
                        txt += h(t)
                if re.sub(r"<[^>]+>", "", txt).strip():
                    html_out += f'<div class="note">{txt}</div>'

            elif btype == "table_v1":
                t_cols = b.get("columns", [])
                t_rows = b.get("rows", [])
                if not isinstance(t_cols, list):
                    t_cols = []
                if not isinstance(t_rows, list):
                    t_rows = []
                html_out += '<table class="tbl">'
                if t_cols:
                    html_out += "<thead><tr>"
                    for c in t_cols:
                        html_out += f"<th>{h(str(c))}</th>"
                    html_out += "</tr></thead>"
                html_out += "<tbody>"
                for r in t_rows:
                    if not isinstance(r, list):
                        continue
                    html_out += "<tr>"
                    for cell in r:
                        html_out += f"<td>{h(format_money_figures(str(cell)))}</td>"
                    html_out += "</tr>"
                html_out += "</tbody></table>"

            elif btype == "table":
                # Generic table (columns may be objects or strings, rows may be dicts or lists)
                t_cols = b.get("columns", [])
                t_rows = b.get("rows", [])
                if not isinstance(t_cols, list):
                    t_cols = []
                if not isinstance(t_rows, list):
                    t_rows = []
                col_labels = []
                col_keys = []
                for c in t_cols:
                    if isinstance(c, dict):
                        col_labels.append(str(c.get("label", c.get("key", ""))))
                        col_keys.append(str(c.get("key", "")))
                    else:
                        col_labels.append(str(c))
                        col_keys.append(re.sub(r"[^a-z0-9]+", "_", str(c).lower()))
                html_out += '<table class="tbl">'
                if col_labels:
                    html_out += "<thead><tr>"
                    for lbl in col_labels:
                        html_out += f"<th>{h(lbl)}</th>"
                    html_out += "</tr></thead>"
                html_out += "<tbody>"
                for r in t_rows:
                    html_out += "<tr>"
                    if isinstance(r, dict):
                        for k in col_keys:
                            cell = r.get(k, "")
                            html_out += f"<td>{h(format_money_figures(str(cell)))}</td>"
                    elif isinstance(r, list):
                        for cell in r:
                            html_out += f"<td>{h(format_money_figures(str(cell)))}</td>"
                    html_out += "</tr>"
                html_out += "</tbody></table>"

            elif btype in ("table_v3", "table_v4"):
                t_rows = b.get("rows", [])
                h_rows = b.get("header_rows", [])
                col_widths = b.get("col_width_pct", [])
                if not isinstance(t_rows, list):
                    t_rows = []
                if not isinstance(h_rows, list):
                    h_rows = []
                if not isinstance(col_widths, list):
                    col_widths = []

                html_out += '<table class="tbl">'

                # optional col widths
                if col_widths:
                    html_out += "<colgroup>"
                    for w in col_widths:
                        html_out += f'<col style="width:{w}%">'
                    html_out += "</colgroup>"

                # header rows
                if h_rows:
                    html_out += "<thead>"
                    for hr in h_rows:
                        if not isinstance(hr, list):
                            continue
                        html_out += "<tr>"
                        for cell in hr:
                            c_attr, c_style, c_text = _parse_v3_cell(cell)
                            html_out += f"<th{c_attr}{c_style}>{c_text}</th>"
                        html_out += "</tr>"
                    html_out += "</thead>"

                # body rows
                html_out += "<tbody>"
                for r in t_rows:
                    if not isinstance(r, list):
                        continue
                    html_out += "<tr>"
                    for cell in r:
                        c_attr, c_style, c_text = _parse_v3_cell(cell)
                        html_out += f"<td{c_attr}{c_style}>{c_text}</td>"
                    html_out += "</tr>"
                html_out += "</tbody></table>"

        return html_out

    # ── Fallback ──
    if "text" in json_data:
        html_out += f'<p class="p">{h(format_money_figures(str(json_data["text"])))}</p>'

    if not html_out.strip():
        logger.warning(
            "Empty section render for key=%s title=%s",
            section_key, section_title,
        )

    return html_out


# =========================================
# University section renderer
# =========================================

def render_university_section(
    uni_name: str,
    sections: list[dict],
    allow_remote: bool,
    is_first_uni: bool,
    include_inactive_programs: bool = False,
    website_url: str = "",
    anchor_id: str | None = None,
    debug: bool = False,
    stats: dict | None = None,
    sort_order: int | None = None,
) -> str:
    """Render a single university section. Mirrors PHP renderUniversitySection."""
    classes = ["uni"]
    if not is_first_uni:
        classes.append("page-break")

    id_attr = f' id="{h(anchor_id)}"' if anchor_id else ""
    sort_attr = f' data-sort="{h(str(sort_order))}"' if sort_order is not None else ""

    out = f'<div class="{" ".join(classes)}"{id_attr}{sort_attr} data-section-key="university" data-section-title="{h(uni_name)}">'

    has_stats = isinstance(stats, dict)
    if has_stats:
        stats["universities"] = stats.get("universities", 0) + 1

    # Build map; merge duplicate "programs" sections
    sec_map: dict[str, dict] = {}
    for s in sections:
        if not isinstance(s, dict):
            continue
        k = str(s.get("section_key", ""))
        if not k:
            continue
        if k == "programs" and k in sec_map:
            existing = sec_map["programs"].get("section_json", {})
            incoming = s.get("section_json", {})
            if not isinstance(existing, dict):
                existing = {}
            if not isinstance(incoming, dict):
                incoming = {}
            a = existing.get("programs", [])
            b = incoming.get("programs", [])
            if not isinstance(a, list):
                a = []
            if not isinstance(b, list):
                b = []
            existing["programs"] = a + b
            sec_map["programs"]["section_json"] = existing
            continue
        sec_map[k] = s

    # Campus image
    img_section = sec_map.get("campus_image") or sec_map.get("image")
    campus_url = ""
    campus_cap = ""
    if img_section:
        j = img_section.get("section_json", {})
        if isinstance(j, dict):
            campus_url = str(j.get("image_url", "")).strip()
            campus_cap = str(j.get("caption", "")).strip()

    # Overview data + website
    overview_json: dict | None = None
    resolved_website = (website_url or "").strip()

    if "overview" in sec_map:
        overview_json = sec_map["overview"].get("section_json", {})
        if not isinstance(overview_json, dict):
            overview_json = {}
        site_from_overview = get_any(
            overview_json,
            ["university_website", "university_website_url", "website", "site", "url", "homepage", "web_url"],
        )
        if not resolved_website and site_from_overview:
            resolved_website = site_from_overview

    # 1. University title
    if resolved_website:
        if has_stats:
            stats["university_links"] = stats.get("university_links", 0) + 1
        out += (
            f'<div class="uni-name"><a class="uni-name-link" href="{h(resolved_website)}" '
            f'target="_blank" rel="noopener noreferrer">{h(uni_name)}</a></div>'
        )
    else:
        out += f'<div class="uni-name">{h(uni_name)}</div>'

    # 2-3. Two-column: Summary + Campus image
    image_embedded = False
    campus_cell = ""
    if allow_remote and campus_url:
        embedded = fetch_image_data_uri(campus_url)
        if embedded:
            image_embedded = True
            campus_cell = f'<img class="campus-top-img" src="{h(embedded)}" alt="Campus Image" />'
            if campus_cap:
                campus_cell += f'<div class="campus-top-cap">{h(campus_cap)}</div>'
        else:
            campus_cell = '<div class="campus-placeholder-cell">Campus image unavailable</div>'
    else:
        campus_cell = '<div class="campus-placeholder-cell">Campus image unavailable</div>'

    if has_stats:
        if image_embedded:
            stats["images_embedded"] = stats.get("images_embedded", 0) + 1
        else:
            stats["images_placeholder"] = stats.get("images_placeholder", 0) + 1

    summary_cell = ""
    if overview_json is not None:
        j = overview_json
        founded = get_any(j, ["founded", "Founded"])
        total = get_any(j, ["total_students", "Total Students"])
        undergrad = get_any(j, ["undergraduates", "Undergraduate Students", "undergraduate_students"])
        postgrad = get_any(j, ["postgraduate_students", "Postgraduate Students"])
        acc_rate = get_any(j, ["acceptance_rate", "Acceptance Rate"])
        location = get_any(j, ["location", "Location"])
        tuition = get_any(j, [
            "tuition_out_of_state_yearly",
            "Yearly Out of State Tuition Fees",
            "Yearly Out-of-State Tuition Fees",
            "Yearly Tuition Fees",
            "Yearly Out-of-State Tuition Fees:",
        ])

        summary_cell += '<div class="summary-title">Summary info</div>'
        summary_cell += '<ul class="summary-ul">'
        if founded:
            summary_cell += f'<li><span class="lbl">Founded:</span> {h(founded)}</li>'
        if total:
            summary_cell += f'<li><span class="lbl">Total Students:</span> {h(total)}</li>'
        if undergrad:
            summary_cell += f'<li><span class="lbl">Undergraduate Students:</span> {h(undergrad)}</li>'
        if postgrad:
            summary_cell += f'<li><span class="lbl">Postgraduate Students:</span> {h(postgrad)}</li>'
        if acc_rate or location:
            summary_cell += "<li>"
            if acc_rate:
                summary_cell += f'<span class="lbl">Acceptance Rate:</span> {h(acc_rate)} '
            if location:
                summary_cell += f'<span class="lbl">Location:</span> {h(location)}'
            summary_cell += "</li>"
        if tuition:
            summary_cell += f'<li><span class="lbl">Yearly Tuition/Out-of-State Tuition:</span> {h(tuition)}</li>'
        summary_cell += "</ul>"

        if resolved_website:
            if has_stats:
                stats["website_rows"] = stats.get("website_rows", 0) + 1
            summary_cell += (
                f'<div class="uni-website"><span class="lbl">Website:</span> '
                f'<a href="{h(resolved_website)}" target="_blank" rel="noopener noreferrer">'
                f'{h(resolved_website)}</a></div>'
            )

    out += (
        '<table class="school-top-table" cellspacing="0" cellpadding="0"><tr>'
        f'<td class="school-top-summary" style="vertical-align:top;">{summary_cell}</td>'
        f'<td class="school-top-campus" style="vertical-align:top;">{campus_cell}</td>'
        "</tr></table>"
    )

    # 4. Benefits
    if "benefits" in sec_map:
        j = sec_map["benefits"].get("section_json", {})
        if not isinstance(j, dict):
            j = {}
        benefits = j.get("benefits", [])
        if not isinstance(benefits, list):
            benefits = []

        out += '<div class="benefits-section">'
        out += '<div class="benefits-bar">Benefits for ISP students at this school</div>'
        if benefits:
            out += '<ul class="benefits-ul">'
            for b in benefits:
                b_str = str(b).strip()
                if not b_str:
                    continue
                out += f'<li class="benefit-li"><span class="benefit-bullet">&bull;</span> <span class="benefit-text">{h(b_str)}</span></li>'
            out += "</ul>"
        else:
            out += '<div class="muted" style="margin:4px 0 6px;">No benefits listed.</div>'
        out += "</div>"

    # 5. Programs
    if "programs" in sec_map:
        j = sec_map["programs"].get("section_json", {})
        if not isinstance(j, dict):
            j = {}
        programs = j.get("programs", [])
        if not isinstance(programs, list):
            programs = []

        # Filter inactive
        if not include_inactive_programs:
            def _is_active(p: dict) -> bool:
                flag = p.get("program_active", p.get("is_active", p.get("active", 1)))
                return is_truthy(flag)

            programs = [p for p in programs if isinstance(p, dict) and _is_active(p)]

        out += (
            '<div class="qualify">To qualify for The International Scholars Program at '
            f"{h(uni_name)}, you must be willing to study any of the following programs:</div>"
        )

        if programs:
            out += '<table class="programs">'
            out += (
                '<th style="width:34%">Program</th>'
                '<th style="width:33%">Designation</th>'
                '<th style="width:33%">Entrance Examination</th></tr></thead><tbody>'
            )

            for p in programs:
                if not isinstance(p, dict):
                    continue

                program_name = str(p.get("program_name", "")).strip()
                link = str(p.get("program_link", "")).strip()
                if not link and isinstance(p.get("program_links"), dict):
                    link = str(p["program_links"].get("web_link", "")).strip()

                program_name_html = h(program_name)
                if link:
                    program_name_html = f'<a href="{h(link)}" target="_blank" rel="noopener noreferrer">{program_name_html}</a>'



                entrance = str(p.get("entrance_exam", p.get("entrance_examination", "")))
                designation = str(p.get("designation", ""))
                out += (
                    f"<tr>"
                    f"<td>{program_name_html}</td>"
                    f"<td>{h(designation)}</td>"
                    f"<td>{h(entrance)}</td>"
                    f"</tr>"
                )

            out += "</tbody></table>"
        else:
            out += '<div class="muted" style="margin:0 0 6px;">No programs listed.</div>'

    # Extra sections
    skip_keys = {"campus_image", "image", "overview", "benefits", "programs"}
    for s in sections:
        if not isinstance(s, dict):
            continue
        k = str(s.get("section_key", ""))
        if not k or k in skip_keys:
            continue
        title = str(s.get("section_title", ""))
        j = s.get("section_json", {})
        if not isinstance(j, dict):
            j = {}
        out += render_global_blocks(k, title, j, debug)

    out += "</div>"
    return out