handbook-engine / app /services /renderers.py
internationalscholarsprogram's picture
Initial deploy: ISP Handbook PDF engine
2deab8c verified
"""Renderers — mirrors PHP renderers.php.
Contains functions for rendering:
- Table of Contents (TOC)
- Global section blocks (overview, steps, bullets, tables, doc_v1, etc.)
- University section blocks (overview, benefits, programs)
- Remote image fetching as data URIs
"""
from __future__ import annotations
import base64
import logging
import re
from typing import Any
import httpx
from app.services.utils import (
emphasize_keywords,
format_money_figures,
get_any,
h,
hb_slug,
is_assoc,
is_truthy,
)
logger = logging.getLogger(__name__)
# =========================================
# Image fetching (with in-memory cache + async batch support)
# =========================================
_image_cache: dict[str, str] = {}
def _detect_image_mime(data: bytes, content_type: str) -> str:
"""Detect image MIME type from headers or magic bytes."""
if "image/" in content_type:
return content_type.split(";")[0].strip()
if data[:8].startswith(b"\x89PNG"):
return "image/png"
if data[:3] == b"\xff\xd8\xff":
return "image/jpeg"
if data[:4] == b"GIF8":
return "image/gif"
if data[:4] == b"RIFF" and data[8:12] == b"WEBP":
return "image/webp"
return ""
def fetch_image_data_uri(url: str) -> str:
"""Fetch a remote image and return as data:... URI. Mirrors PHP fetchImageDataUri."""
url = url.strip()
if not url:
return ""
# Check cache first (populated by prefetch_images)
if url in _image_cache:
return _image_cache[url]
try:
with httpx.Client(verify=False, timeout=12, follow_redirects=True) as client:
resp = client.get(url)
if resp.status_code < 200 or resp.status_code >= 300 or not resp.content:
logger.warning("Image fetch failed for %s status=%d", url, resp.status_code)
_image_cache[url] = ""
return ""
data = resp.content
except Exception as exc:
logger.warning("Image fetch error for %s: %s", url, exc)
_image_cache[url] = ""
return ""
mime = _detect_image_mime(data, resp.headers.get("content-type", ""))
if not mime.startswith("image/"):
logger.warning("Invalid image mime %s for %s", mime, url)
_image_cache[url] = ""
return ""
b64 = base64.b64encode(data).decode("ascii")
result = f"data:{mime};base64,{b64}"
_image_cache[url] = result
return result
async def prefetch_images(urls: list[str]) -> dict[str, str]:
"""Fetch all images in parallel using async HTTP and populate the cache.
This is the key optimization: instead of fetching ~30 campus images
serially (30-60s), we fetch them all concurrently (~3-5s).
"""
import asyncio
unique_urls = list({u.strip() for u in urls if u.strip() and u.strip() not in _image_cache})
if not unique_urls:
return {u: _image_cache.get(u.strip(), "") for u in urls}
async def _fetch_one(client: httpx.AsyncClient, url: str) -> tuple[str, str]:
try:
resp = await client.get(url)
if resp.status_code < 200 or resp.status_code >= 300 or not resp.content:
logger.warning("Prefetch image failed for %s status=%d", url, resp.status_code)
return url, ""
mime = _detect_image_mime(resp.content, resp.headers.get("content-type", ""))
if not mime.startswith("image/"):
logger.warning("Prefetch invalid mime %s for %s", mime, url)
return url, ""
b64 = base64.b64encode(resp.content).decode("ascii")
return url, f"data:{mime};base64,{b64}"
except Exception as exc:
logger.warning("Prefetch image error for %s: %s", url, exc)
return url, ""
logger.info("Prefetching %d campus images in parallel...", len(unique_urls))
async with httpx.AsyncClient(verify=False, timeout=15, follow_redirects=True) as client:
results = await asyncio.gather(*[_fetch_one(client, u) for u in unique_urls])
fetched = 0
for url, data_uri in results:
_image_cache[url] = data_uri
if data_uri:
fetched += 1
logger.info("Prefetched %d/%d images successfully", fetched, len(unique_urls))
return {u: _image_cache.get(u.strip(), "") for u in urls}
# =========================================
# Funding extraction
# =========================================
def _extract_university_funding(
j: dict,
school_meta: dict | None = None,
) -> tuple[str, list[str]]:
"""Extract funding heading + items from benefits section JSON.
Priority:
1. section_json.funding.options
2. section_json.funding_available
3. fallback from pth_ref_schools.school_category
"""
if not isinstance(j, dict):
j = {}
heading = "Funding Available"
items: list[str] = []
# 1. Preferred normalized shape
funding = j.get("funding", {})
if isinstance(funding, dict):
subheading = str(funding.get("subheading", "")).strip()
if subheading:
heading = subheading
options = funding.get("options", [])
if isinstance(options, list):
for opt in options:
if not isinstance(opt, dict):
continue
name = str(opt.get("name", "")).strip()
amount = str(opt.get("amount", "")).strip()
if name and amount:
items.append(f"{name} - {amount}")
elif name:
items.append(name)
elif amount:
items.append(amount)
# 2. Legacy fallback shape
if not items:
funding_available = j.get("funding_available", [])
if isinstance(funding_available, list):
for item in funding_available:
text = str(item).strip()
if text:
items.append(text)
# 3. School-category fallback
if not items and isinstance(school_meta, dict):
school_category = str(school_meta.get("school_category", "")).strip().lower()
status = str(school_meta.get("status", "")).strip().lower()
if status == "in":
if school_category == "non_cosigner":
items = [
"ISP Study Loan - $10,000",
"Partner 1 (Unsecured Loan) - Up to $50,000 per academic year",
"Partner 3 (Credit Option) - Up to $15,000",
]
elif school_category == "cosigner":
items = [
"ISP Study Loan - $10,000",
"Partner 2 (A Cosigned Loan) - Full Coverage Support",
"Partner 3 (Credit Option) - Up to $15,000",
]
return (heading, items)
# =========================================
# TOC sorting and rendering
# =========================================
def sort_toc(items: list[dict]) -> list[dict]:
"""Mirrors PHP sortHandbookToc — sort by sort_order/sort, stable fallback."""
for idx, e in enumerate(items):
e.setdefault("_i", idx)
def key_fn(e: dict):
so = e.get("sort_order", e.get("sort"))
if so is not None:
try:
so_num = float(so)
return (0, so_num, e.get("_i", 0))
except (ValueError, TypeError):
pass
return (1, 0.0, e.get("_i", 0))
items.sort(key=key_fn)
for e in items:
e.pop("_i", None)
return items
def render_toc(items: list[dict], debug: bool = False, show_pages: bool = True) -> str:
"""Render Table of Contents HTML (DOMPDF-safe).
Mirrors PHP renderToc().
"""
sorted_items = sort_toc(items)
out = '<!-- HANDBOOK_TOC_V2 -->'
out += '<div class="toc">'
out += '<div class="toc-heading">Table of Contents</div>'
out += (
'<table class="toc-table" width="100%" cellspacing="0" cellpadding="0"'
' style="border-collapse:collapse; table-layout:fixed; width:100%;">'
'<colgroup><col /><col width="50" /><col width="48" /></colgroup>'
)
for e in sorted_items:
if not isinstance(e, dict):
continue
title = str(e.get("title", "")).strip()
target = str(e.get("target", e.get("anchor", ""))).strip()
if not title:
continue
level = max(0, min(3, int(e.get("level", 0))))
bold = bool(e.get("bold", False))
upper = bool(e.get("upper", False))
if level == 0:
bold = True
upper = True
row_class = "toc-row--major" if level == 0 else "toc-row--sub"
if level >= 2:
row_class += " toc-row--deep"
text = title.upper() if upper else title
title_inner = h(text)
if target:
title_inner = f'<a href="{h(target)}">{title_inner}</a>'
if bold:
title_inner = f"<strong>{title_inner}</strong>"
page = str(e.get("page", "")).strip()
if show_pages and page:
page_cell = f"<strong>{h(page)}</strong>"
else:
page_cell = "&nbsp;"
indent = ""
if level == 1:
indent = "padding-left:16px;"
elif level >= 2:
indent = "padding-left:30px;"
title_style = (
"vertical-align:bottom; padding:1px 4px 1px 0; font-size:10px; "
"line-height:1.15; color:#111;"
+ (" font-weight:700;" if bold else " font-weight:400;")
+ (" text-transform:uppercase; letter-spacing:0.1px;" if upper else "")
+ (f" {indent}" if indent else "")
)
out += f'<tr class="{h(row_class)}">'
out += f'<td class="toc-title" style="{title_style}">{title_inner}</td>'
out += '<td class="toc-dots" style="vertical-align:bottom; border-bottom:1px dotted #777; height:0.85em; padding:0;">&nbsp;</td>'
out += (
f'<td class="toc-pagenum" style="vertical-align:bottom; text-align:right; '
f'padding-left:4px; font-size:10px; font-weight:700; line-height:1.15; '
f'white-space:nowrap; width:48px; color:#111;">{page_cell}</td>'
)
out += "</tr>"
out += "</table></div>"
return out
def render_toc_hardcoded(
items: list[dict],
debug: bool = False,
page_start: int = 3,
page_offset: int = 0,
) -> str:
"""Mirrors PHP renderTocHardcoded — sort, assign sequential pages, render."""
sorted_items = sort_toc(items)
seq = max(1, page_start)
for item in sorted_items:
p = str(item.get("page", "")).strip()
if p and p.lstrip("-").isdigit():
display = int(p) + page_offset
item["page"] = str(display)
if display >= seq:
seq = display + 1
else:
item["page"] = str(seq)
seq += 1
out = "<!-- HANDBOOK_TOC_HARDCODED -->\n"
out += '<div class="toc">'
out += '<p class="toc-heading">Table of Contents</p>'
out += (
'<table class="toc-table" style="table-layout:fixed;width:100%;">'
'<colgroup><col /><col width="50" /><col width="48" /></colgroup>'
)
for e in sorted_items:
if not isinstance(e, dict):
continue
title = str(e.get("title", "")).strip()
target = str(e.get("target", e.get("anchor", ""))).strip()
if not title:
continue
level = max(0, min(3, int(e.get("level", 0))))
bold = bool(e.get("bold", False))
upper = bool(e.get("upper", False))
if level == 0:
bold = True
upper = True
row_class = "toc-row--major" if level == 0 else "toc-row--sub"
if level >= 2:
row_class += " toc-row--deep"
text = title.upper() if upper else title
title_inner = h(text)
if target:
title_inner = f'<a href="{h(target)}">{title_inner}</a>'
if bold:
title_inner = f"<strong>{title_inner}</strong>"
page = str(e.get("page", "")).strip()
page_html = f"<strong>{h(page)}</strong>" if page else "&nbsp;"
indent = ""
if level == 1:
indent = "padding-left:16px;"
elif level >= 2:
indent = "padding-left:30px;"
title_style = (
"vertical-align:bottom;padding:1px 4px 1px 0;font-size:10px;"
"line-height:1.15;color:#111;"
+ ("font-weight:700;" if bold else "font-weight:400;")
+ ("text-transform:uppercase;letter-spacing:0.1px;" if upper else "")
+ indent
)
out += f'<tr class="{h(row_class)}">'
out += f'<td class="toc-title" style="{title_style}">{title_inner}</td>'
out += '<td class="toc-dots" style="vertical-align:bottom;padding:0;"><span class="toc-dots-inner">&nbsp;</span></td>'
out += (
f'<td class="toc-pagenum" style="vertical-align:bottom;text-align:right;'
f'padding-left:4px;font-size:10px;font-weight:700;line-height:1.15;'
f'white-space:nowrap;width:48px;color:#111111;">{page_html}</td>'
)
out += "</tr>"
out += "</table></div>"
return out
# =========================================
# table_v3 / table_v4 cell helpers
# =========================================
# Mapping of style names → inline CSS strings for table_v3/v4 cells
_V3_STYLE_MAP: dict[str, str] = {
"band_teal": "text-align:center;font-weight:700;color:#fff;background:#199970;",
"band_navy": "text-align:center;font-weight:700;color:#fff;background:#0263A3;",
"bold_amounts": "font-weight:600;",
"green_center_bold": "text-align:center;font-weight:700;color:#199970;",
"center_bold_multiline": "text-align:center;font-weight:600;vertical-align:middle;",
"footer_center_bold": "text-align:center;font-weight:700;background:#f5f5f5;",
"covered_merged": "vertical-align:top;font-size:9pt;line-height:1.5;",
}
def _parse_v3_cell(cell: Any) -> tuple[str, str, str]:
"""Parse a table_v3/v4 cell dict into (attr_str, style_str, html_content)."""
if not isinstance(cell, dict):
text = format_money_figures(str(cell)) if cell else ""
return ("", "", h(text))
colspan = 1
rowspan = 1
text_val = str(cell.get("text", ""))
cs = cell.get("colspan")
rs = cell.get("rowspan")
if cs is not None and str(cs).isdigit():
colspan = int(cs)
if rs is not None and str(rs).isdigit():
rowspan = int(rs)
attr = ""
if colspan > 1:
attr += f' colspan="{colspan}"'
if rowspan > 1:
attr += f' rowspan="{rowspan}"'
style_name = str(cell.get("style", ""))
inline_css = _V3_STYLE_MAP.get(style_name, "")
style_str = f' style="{inline_css}"' if inline_css else ""
# Rich parts within cell (merged cells with multiple text blocks)
parts = cell.get("parts")
if isinstance(parts, list) and parts:
html_parts: list[str] = []
for p in parts:
if not isinstance(p, dict):
continue
pt = format_money_figures(str(p.get("text", "")))
if not pt:
continue
if p.get("bold"):
html_parts.append(f"<strong>{h(pt)}</strong>")
else:
html_parts.append(h(pt))
content = "<br><br>".join(html_parts) if html_parts else h(format_money_figures(text_val))
else:
content = h(format_money_figures(text_val))
return (attr, style_str, content)
# =========================================
# Global blocks renderer
# =========================================
def render_global_blocks(
section_key: str,
section_title: str,
json_data: dict | list,
debug: bool = False,
*,
universities: list[dict] | None = None,
) -> str:
"""Render a single global section's content.
Mirrors PHP renderGlobalBlocks() — handles steps, bullets, tables,
doc_v1, table_v2, summary_of_universities, etc.
"""
html_out = ""
key_norm = section_key.lower().strip()
if not isinstance(json_data, dict):
json_data = {}
layout_norm = str(json_data.get("layout", "")).lower().strip()
# ── Section title ──
# Prefer the JSON-level title (display-ready) over the DB section_title
json_title = str(json_data.get("title", "")).strip() if isinstance(json_data, dict) else ""
title = json_title or section_title.strip()
if title and key_norm != "table_of_contents":
html_out += f'<h2 class="h2">{h(title)}</h2>'
_title_norm = title.lower()
# ── Steps ──
steps = json_data.get("steps")
if isinstance(steps, list):
step_num = 0
for s in steps:
if not isinstance(s, dict):
continue
step_num += 1
step_title = str(s.get("title", s.get("step_title", ""))).strip()
body = format_money_figures(str(s.get("body", s.get("description", ""))).strip())
html_out += '<div class="avoid-break" style="margin:0 0 4px;">'
if step_title:
html_out += f'<div class="h3">Step {step_num}: {h(step_title)}</div>'
if body:
html_out += f'<p class="p">{emphasize_keywords(body)}</p>'
links = s.get("links", [])
if isinstance(links, list) and links:
html_out += '<ul class="ul">'
for lnk in links:
if not isinstance(lnk, dict):
continue
label = str(lnk.get("label", "Link")).strip()
url = str(lnk.get("url", "")).strip()
if not url:
continue
html_out += f'<li><a href="{h(url)}" target="_blank" rel="noopener noreferrer">{h(label)}</a></li>'
html_out += "</ul>"
qr = str(s.get("qr_url", s.get("qr_image", ""))).strip()
if qr:
html_out += f'<img src="{h(qr)}" alt="QR" style="width:60px; height:60px; margin:4px 0;" />'
html_out += "</div>"
return html_out
# ── Bullets ──
has_bullets = isinstance(json_data.get("bullets"), list)
has_items = isinstance(json_data.get("items"), list)
if has_bullets or (layout_norm == "bullets_with_note" and has_items):
lst = json_data.get("items") if has_items else json_data.get("bullets")
html_out += '<ul class="ul">'
for b in lst:
b_str = format_money_figures(str(b).strip())
if not b_str:
continue
html_out += f"<li>{emphasize_keywords(b_str)}</li>"
html_out += "</ul>"
note = format_money_figures(str(json_data.get("note", json_data.get("footnote", ""))).strip())
if note:
html_out += f'<div class="note">{h(note)}</div>'
return html_out
# ── Basic table ──
cols = json_data.get("columns")
rows = json_data.get("rows")
if isinstance(cols, list) and isinstance(rows, list):
html_out += '<table class="tbl">'
if cols:
html_out += "<thead><tr>"
for c in cols:
html_out += f"<th>{h(str(c))}</th>"
html_out += "</tr></thead>"
html_out += "<tbody>"
for r in rows:
if not isinstance(r, (list, dict)):
continue
html_out += "<tr>"
if isinstance(r, dict):
for col_label in cols:
key_guess = re.sub(r"[^a-z0-9]+", "_", str(col_label).lower())
cell = r.get(key_guess, "")
html_out += f"<td>{h(format_money_figures(str(cell)))}</td>"
else:
for cell in r:
html_out += f"<td>{h(format_money_figures(str(cell)))}</td>"
html_out += "</tr>"
html_out += "</tbody></table>"
return html_out
# ── table_v2 ──
if layout_norm == "table_v2":
base_cols = json_data.get("base_columns", [])
groups = json_data.get("header_groups", [])
rows = json_data.get("rows", [])
if not isinstance(base_cols, list):
base_cols = []
if not isinstance(groups, list):
groups = []
if not isinstance(rows, list):
rows = []
all_cols: list[dict] = []
for c in base_cols:
if isinstance(c, dict):
all_cols.append({"key": str(c.get("key", "")), "label": str(c.get("label", ""))})
for g in groups:
if not isinstance(g, dict):
continue
g_cols = g.get("columns", [])
if not isinstance(g_cols, list):
g_cols = []
for c in g_cols:
if isinstance(c, dict):
all_cols.append({"key": str(c.get("key", "")), "label": str(c.get("label", ""))})
html_out += '<table class="tbl tbl-comparison"><thead>'
has_group_row = bool(groups)
if has_group_row:
html_out += "<tr>"
for c in base_cols:
if isinstance(c, dict):
html_out += f'<th rowspan="2">{h(str(c.get("label", "")))}</th>'
for g in groups:
if not isinstance(g, dict):
continue
g_cols = g.get("columns", [])
if not isinstance(g_cols, list):
g_cols = []
span = max(1, len(g_cols))
html_out += f'<th colspan="{span}">{h(str(g.get("label", "")))}</th>'
html_out += "</tr><tr>"
for g in groups:
if not isinstance(g, dict):
continue
g_cols = g.get("columns", [])
if not isinstance(g_cols, list):
g_cols = []
for c in g_cols:
if isinstance(c, dict):
html_out += f'<th>{h(str(c.get("label", "")))}</th>'
html_out += "</tr>"
else:
html_out += "<tr>"
for c in all_cols:
html_out += f'<th>{h(c.get("label", ""))}</th>'
html_out += "</tr>"
html_out += "</thead><tbody>"
for r in rows:
if not isinstance(r, dict):
continue
html_out += "<tr>"
for c in all_cols:
k = c.get("key", "")
val = r.get(k, "")
if isinstance(val, dict):
val = val.get("text", "")
html_out += f"<td>{h(format_money_figures(str(val)))}</td>"
html_out += "</tr>"
html_out += "</tbody></table>"
return html_out
# ── doc_v1 ──
if layout_norm == "doc_v1" and isinstance(json_data.get("blocks"), list):
for b in json_data["blocks"]:
if not isinstance(b, dict):
continue
btype = str(b.get("type", ""))
# Skip heading/subheading blocks that duplicate the section title
if btype in ("heading", "subheading"):
block_text = str(b.get("text", "")).strip().lower()
if block_text == _title_norm:
continue
if btype == "paragraph":
t = format_money_figures(str(b.get("text", "")))
if t.strip():
html_out += f'<p class="p">{emphasize_keywords(t)}</p>'
elif btype == "subheading":
t = format_money_figures(str(b.get("text", "")))
if t.strip():
html_out += f'<h3 class="h3 keep-with-next">{h(t)}</h3>'
elif btype == "bullets":
items = b.get("items", [])
if not isinstance(items, list):
items = []
html_out += '<ul class="ul">'
for it in items:
it_str = format_money_figures(str(it).strip())
if it_str:
html_out += f"<li>{emphasize_keywords(it_str)}</li>"
html_out += "</ul>"
elif btype == "numbered_list":
items = b.get("items", [])
if not isinstance(items, list):
items = []
html_out += '<ol class="ol">'
for it in items:
it_str = format_money_figures(str(it).strip())
if it_str:
html_out += f"<li>{emphasize_keywords(it_str)}</li>"
html_out += "</ol>"
elif btype == "note":
t = format_money_figures(str(b.get("text", "")))
if t.strip():
html_out += f'<div class="note">{h(t)}</div>'
elif btype == "note_inline":
parts = b.get("parts", [])
if not isinstance(parts, list):
parts = []
txt = ""
for p in parts:
if not isinstance(p, dict):
continue
t = format_money_figures(str(p.get("text", "")))
if not t:
continue
style = str(p.get("style", ""))
if style == "red_bold":
txt += f"<strong>{h(t)}</strong>"
else:
txt += h(t)
if re.sub(r"<[^>]+>", "", txt).strip():
html_out += f'<div class="note">{txt}</div>'
elif btype == "table_v1":
t_cols = b.get("columns", [])
t_rows = b.get("rows", [])
if not isinstance(t_cols, list):
t_cols = []
if not isinstance(t_rows, list):
t_rows = []
html_out += '<table class="tbl">'
if t_cols:
html_out += "<thead><tr>"
for c in t_cols:
html_out += f"<th>{h(str(c))}</th>"
html_out += "</tr></thead>"
html_out += "<tbody>"
for r in t_rows:
if not isinstance(r, list):
continue
html_out += "<tr>"
for cell in r:
html_out += f"<td>{h(format_money_figures(str(cell)))}</td>"
html_out += "</tr>"
html_out += "</tbody></table>"
elif btype == "table":
# Generic table (columns may be objects or strings, rows may be dicts or lists)
t_cols = b.get("columns", [])
t_rows = b.get("rows", [])
if not isinstance(t_cols, list):
t_cols = []
if not isinstance(t_rows, list):
t_rows = []
col_labels = []
col_keys = []
for c in t_cols:
if isinstance(c, dict):
col_labels.append(str(c.get("label", c.get("key", ""))))
col_keys.append(str(c.get("key", "")))
else:
col_labels.append(str(c))
col_keys.append(re.sub(r"[^a-z0-9]+", "_", str(c).lower()))
html_out += '<table class="tbl">'
if col_labels:
html_out += "<thead><tr>"
for lbl in col_labels:
html_out += f"<th>{h(lbl)}</th>"
html_out += "</tr></thead>"
html_out += "<tbody>"
for r in t_rows:
html_out += "<tr>"
if isinstance(r, dict):
for k in col_keys:
cell = r.get(k, "")
html_out += f"<td>{h(format_money_figures(str(cell)))}</td>"
elif isinstance(r, list):
for cell in r:
html_out += f"<td>{h(format_money_figures(str(cell)))}</td>"
html_out += "</tr>"
html_out += "</tbody></table>"
elif btype in ("table_v3", "table_v4"):
t_rows = b.get("rows", [])
h_rows = b.get("header_rows", [])
col_widths = b.get("col_width_pct", [])
if not isinstance(t_rows, list):
t_rows = []
if not isinstance(h_rows, list):
h_rows = []
if not isinstance(col_widths, list):
col_widths = []
html_out += '<table class="tbl">'
# optional col widths
if col_widths:
html_out += "<colgroup>"
for w in col_widths:
html_out += f'<col style="width:{w}%">'
html_out += "</colgroup>"
# header rows
if h_rows:
html_out += "<thead>"
for hr in h_rows:
if not isinstance(hr, list):
continue
html_out += "<tr>"
for cell in hr:
c_attr, c_style, c_text = _parse_v3_cell(cell)
html_out += f"<th{c_attr}{c_style}>{c_text}</th>"
html_out += "</tr>"
html_out += "</thead>"
# body rows
html_out += "<tbody>"
for r in t_rows:
if not isinstance(r, list):
continue
html_out += "<tr>"
for cell in r:
c_attr, c_style, c_text = _parse_v3_cell(cell)
html_out += f"<td{c_attr}{c_style}>{c_text}</td>"
html_out += "</tr>"
html_out += "</tbody></table>"
return html_out
# ── Fallback ──
if "text" in json_data:
html_out += f'<p class="p">{h(format_money_figures(str(json_data["text"])))}</p>'
if not html_out.strip():
logger.warning(
"Empty section render for key=%s title=%s",
section_key, section_title,
)
return html_out
# =========================================
# University section renderer
# =========================================
def render_university_section(
uni_name: str,
sections: list[dict],
allow_remote: bool,
is_first_uni: bool,
include_inactive_programs: bool = False,
website_url: str = "",
anchor_id: str | None = None,
debug: bool = False,
stats: dict | None = None,
sort_order: int | None = None,
) -> str:
"""Render a single university section. Mirrors PHP renderUniversitySection."""
classes = ["uni"]
if not is_first_uni:
classes.append("page-break")
id_attr = f' id="{h(anchor_id)}"' if anchor_id else ""
sort_attr = f' data-sort="{h(str(sort_order))}"' if sort_order is not None else ""
out = f'<div class="{" ".join(classes)}"{id_attr}{sort_attr} data-section-key="university" data-section-title="{h(uni_name)}">'
has_stats = isinstance(stats, dict)
if has_stats:
stats["universities"] = stats.get("universities", 0) + 1
# Build map; merge duplicate "programs" sections
sec_map: dict[str, dict] = {}
for s in sections:
if not isinstance(s, dict):
continue
k = str(s.get("section_key", ""))
if not k:
continue
if k == "programs" and k in sec_map:
existing = sec_map["programs"].get("section_json", {})
incoming = s.get("section_json", {})
if not isinstance(existing, dict):
existing = {}
if not isinstance(incoming, dict):
incoming = {}
a = existing.get("programs", [])
b = incoming.get("programs", [])
if not isinstance(a, list):
a = []
if not isinstance(b, list):
b = []
existing["programs"] = a + b
sec_map["programs"]["section_json"] = existing
continue
sec_map[k] = s
# Campus image
img_section = sec_map.get("campus_image") or sec_map.get("image")
campus_url = ""
campus_cap = ""
if img_section:
j = img_section.get("section_json", {})
if isinstance(j, dict):
campus_url = str(j.get("image_url", "")).strip()
campus_cap = str(j.get("caption", "")).strip()
# Overview data + website
overview_json: dict | None = None
resolved_website = (website_url or "").strip()
if "overview" in sec_map:
overview_json = sec_map["overview"].get("section_json", {})
if not isinstance(overview_json, dict):
overview_json = {}
site_from_overview = get_any(
overview_json,
["university_website", "university_website_url", "website", "site", "url", "homepage", "web_url"],
)
if not resolved_website and site_from_overview:
resolved_website = site_from_overview
# 1. University title
if resolved_website:
if has_stats:
stats["university_links"] = stats.get("university_links", 0) + 1
out += (
f'<div class="uni-name"><a class="uni-name-link" href="{h(resolved_website)}" '
f'target="_blank" rel="noopener noreferrer">{h(uni_name)}</a></div>'
)
else:
out += f'<div class="uni-name">{h(uni_name)}</div>'
# 2-3. Two-column: Summary + Campus image
image_embedded = False
campus_cell = ""
if allow_remote and campus_url:
embedded = fetch_image_data_uri(campus_url)
if embedded:
image_embedded = True
campus_cell = f'<img class="campus-top-img" src="{h(embedded)}" alt="Campus Image" />'
if campus_cap:
campus_cell += f'<div class="campus-top-cap">{h(campus_cap)}</div>'
else:
campus_cell = '<div class="campus-placeholder-cell">Campus image unavailable</div>'
else:
campus_cell = '<div class="campus-placeholder-cell">Campus image unavailable</div>'
if has_stats:
if image_embedded:
stats["images_embedded"] = stats.get("images_embedded", 0) + 1
else:
stats["images_placeholder"] = stats.get("images_placeholder", 0) + 1
summary_cell = ""
if overview_json is not None:
j = overview_json
founded = get_any(j, ["founded", "Founded"])
total = get_any(j, ["total_students", "Total Students"])
undergrad = get_any(j, ["undergraduates", "Undergraduate Students", "undergraduate_students"])
postgrad = get_any(j, ["postgraduate_students", "Postgraduate Students"])
acc_rate = get_any(j, ["acceptance_rate", "Acceptance Rate"])
location = get_any(j, ["location", "Location"])
tuition = get_any(j, [
"tuition_out_of_state_yearly",
"Yearly Out of State Tuition Fees",
"Yearly Out-of-State Tuition Fees",
"Yearly Tuition Fees",
"Yearly Out-of-State Tuition Fees:",
])
summary_cell += '<div class="summary-title">Summary info</div>'
summary_cell += '<ul class="summary-ul">'
if founded:
summary_cell += f'<li><span class="lbl">Founded:</span> {h(founded)}</li>'
if total:
summary_cell += f'<li><span class="lbl">Total Students:</span> {h(total)}</li>'
if undergrad:
summary_cell += f'<li><span class="lbl">Undergraduate Students:</span> {h(undergrad)}</li>'
if postgrad:
summary_cell += f'<li><span class="lbl">Postgraduate Students:</span> {h(postgrad)}</li>'
if acc_rate or location:
summary_cell += "<li>"
if acc_rate:
summary_cell += f'<span class="lbl">Acceptance Rate:</span> {h(acc_rate)} '
if location:
summary_cell += f'<span class="lbl">Location:</span> {h(location)}'
summary_cell += "</li>"
if tuition:
summary_cell += f'<li><span class="lbl">Yearly Tuition/Out-of-State Tuition:</span> {h(tuition)}</li>'
summary_cell += "</ul>"
if resolved_website:
if has_stats:
stats["website_rows"] = stats.get("website_rows", 0) + 1
summary_cell += (
f'<div class="uni-website"><span class="lbl">Website:</span> '
f'<a href="{h(resolved_website)}" target="_blank" rel="noopener noreferrer">'
f'{h(resolved_website)}</a></div>'
)
out += (
'<table class="school-top-table" cellspacing="0" cellpadding="0"><tr>'
f'<td class="school-top-summary" style="vertical-align:top;">{summary_cell}</td>'
f'<td class="school-top-campus" style="vertical-align:top;">{campus_cell}</td>'
"</tr></table>"
)
# 4. Benefits
if "benefits" in sec_map:
j = sec_map["benefits"].get("section_json", {})
if not isinstance(j, dict):
j = {}
benefits = j.get("benefits", [])
if not isinstance(benefits, list):
benefits = []
out += '<div class="benefits-section">'
out += '<div class="benefits-bar">Benefits for ISP students at this school</div>'
if benefits:
out += '<ul class="benefits-ul">'
for b in benefits:
b_str = str(b).strip()
if not b_str:
continue
out += f'<li class="benefit-li"><span class="benefit-bullet">&bull;</span> <span class="benefit-text">{h(b_str)}</span></li>'
out += "</ul>"
else:
out += '<div class="muted" style="margin:4px 0 6px;">No benefits listed.</div>'
out += "</div>"
# 5. Programs
if "programs" in sec_map:
j = sec_map["programs"].get("section_json", {})
if not isinstance(j, dict):
j = {}
programs = j.get("programs", [])
if not isinstance(programs, list):
programs = []
# Filter inactive
if not include_inactive_programs:
def _is_active(p: dict) -> bool:
flag = p.get("program_active", p.get("is_active", p.get("active", 1)))
return is_truthy(flag)
programs = [p for p in programs if isinstance(p, dict) and _is_active(p)]
out += (
'<div class="qualify">To qualify for The International Scholars Program at '
f"{h(uni_name)}, you must be willing to study any of the following programs:</div>"
)
if programs:
out += '<table class="programs">'
out += (
'<th style="width:34%">Program</th>'
'<th style="width:33%">Designation</th>'
'<th style="width:33%">Entrance Examination</th></tr></thead><tbody>'
)
for p in programs:
if not isinstance(p, dict):
continue
program_name = str(p.get("program_name", "")).strip()
link = str(p.get("program_link", "")).strip()
if not link and isinstance(p.get("program_links"), dict):
link = str(p["program_links"].get("web_link", "")).strip()
program_name_html = h(program_name)
if link:
program_name_html = f'<a href="{h(link)}" target="_blank" rel="noopener noreferrer">{program_name_html}</a>'
entrance = str(p.get("entrance_exam", p.get("entrance_examination", "")))
designation = str(p.get("designation", ""))
out += (
f"<tr>"
f"<td>{program_name_html}</td>"
f"<td>{h(designation)}</td>"
f"<td>{h(entrance)}</td>"
f"</tr>"
)
out += "</tbody></table>"
else:
out += '<div class="muted" style="margin:0 0 6px;">No programs listed.</div>'
# Extra sections
skip_keys = {"campus_image", "image", "overview", "benefits", "programs"}
for s in sections:
if not isinstance(s, dict):
continue
k = str(s.get("section_key", ""))
if not k or k in skip_keys:
continue
title = str(s.get("section_title", ""))
j = s.get("section_json", {})
if not isinstance(j, dict):
j = {}
out += render_global_blocks(k, title, j, debug)
out += "</div>"
return out