"""Renderers — mirrors PHP renderers.php. Contains functions for rendering: - Table of Contents (TOC) - Global section blocks (overview, steps, bullets, tables, doc_v1, etc.) - University section blocks (overview, benefits, programs) - Remote image fetching as data URIs """ from __future__ import annotations import base64 import logging import re from typing import Any import httpx from app.services.utils import ( emphasize_keywords, format_money_figures, get_any, h, hb_slug, is_assoc, is_truthy, ) logger = logging.getLogger(__name__) # ========================================= # Image fetching # ========================================= def fetch_image_data_uri(url: str) -> str: """Fetch a remote image and return as data:... URI. Mirrors PHP fetchImageDataUri.""" url = url.strip() if not url: return "" try: with httpx.Client(verify=False, timeout=12, follow_redirects=True) as client: resp = client.get(url) if resp.status_code < 200 or resp.status_code >= 300 or not resp.content: logger.warning("Image fetch failed for %s status=%d", url, resp.status_code) return "" data = resp.content except Exception as exc: logger.warning("Image fetch error for %s: %s", url, exc) return "" # Detect mime type from headers or magic bytes content_type = resp.headers.get("content-type", "") mime = "" if "image/" in content_type: mime = content_type.split(";")[0].strip() else: # Magic byte detection if data[:8].startswith(b"\x89PNG"): mime = "image/png" elif data[:3] == b"\xff\xd8\xff": mime = "image/jpeg" elif data[:4] == b"GIF8": mime = "image/gif" elif data[:4] == b"RIFF" and data[8:12] == b"WEBP": mime = "image/webp" if not mime.startswith("image/"): logger.warning("Invalid image mime %s for %s", mime, url) return "" b64 = base64.b64encode(data).decode("ascii") return f"data:{mime};base64,{b64}" # ========================================= # TOC sorting and rendering # ========================================= def sort_toc(items: list[dict]) -> list[dict]: """Mirrors PHP sortHandbookToc — sort by sort_order/sort, stable fallback.""" for idx, e in enumerate(items): e.setdefault("_i", idx) def key_fn(e: dict): so = e.get("sort_order", e.get("sort")) if so is not None: try: so_num = float(so) return (0, so_num, e.get("_i", 0)) except (ValueError, TypeError): pass return (1, 0.0, e.get("_i", 0)) items.sort(key=key_fn) for e in items: e.pop("_i", None) return items def render_toc(items: list[dict], debug: bool = False, show_pages: bool = True) -> str: """Render Table of Contents HTML (DOMPDF-safe). Mirrors PHP renderToc(). """ sorted_items = sort_toc(items) out = '' out += '
' out += '
Table of Contents
' out += ( '' '' ) for e in sorted_items: if not isinstance(e, dict): continue title = str(e.get("title", "")).strip() target = str(e.get("target", e.get("anchor", ""))).strip() if not title: continue level = max(0, min(3, int(e.get("level", 0)))) bold = bool(e.get("bold", False)) upper = bool(e.get("upper", False)) if level == 0: bold = True upper = True row_class = "toc-row--major" if level == 0 else "toc-row--sub" if level >= 2: row_class += " toc-row--deep" text = title.upper() if upper else title title_inner = h(text) if target: title_inner = f'{title_inner}' if bold: title_inner = f"{title_inner}" page = str(e.get("page", "")).strip() if show_pages and page: page_cell = f"{h(page)}" else: page_cell = " " indent = "" if level == 1: indent = "padding-left:16px;" elif level >= 2: indent = "padding-left:30px;" title_style = ( "vertical-align:bottom; padding:1px 4px 1px 0; font-size:10px; " "line-height:1.15; color:#111;" + (" font-weight:700;" if bold else " font-weight:400;") + (" text-transform:uppercase; letter-spacing:0.1px;" if upper else "") + (f" {indent}" if indent else "") ) out += f'' out += f'' out += '' out += ( f'' ) out += "" out += "
{title_inner} {page_cell}
" return out def render_toc_hardcoded( items: list[dict], debug: bool = False, page_start: int = 3, page_offset: int = 0, ) -> str: """Mirrors PHP renderTocHardcoded — sort, assign sequential pages, render.""" sorted_items = sort_toc(items) seq = max(1, page_start) for item in sorted_items: p = str(item.get("page", "")).strip() if p and p.lstrip("-").isdigit(): display = int(p) + page_offset item["page"] = str(display) if display >= seq: seq = display + 1 else: item["page"] = str(seq) seq += 1 out = "\n" out += '
' out += '

Table of Contents

' out += ( '' '' ) for e in sorted_items: if not isinstance(e, dict): continue title = str(e.get("title", "")).strip() target = str(e.get("target", e.get("anchor", ""))).strip() if not title: continue level = max(0, min(3, int(e.get("level", 0)))) bold = bool(e.get("bold", False)) upper = bool(e.get("upper", False)) if level == 0: bold = True upper = True row_class = "toc-row--major" if level == 0 else "toc-row--sub" if level >= 2: row_class += " toc-row--deep" text = title.upper() if upper else title title_inner = h(text) if target: title_inner = f'{title_inner}' if bold: title_inner = f"{title_inner}" page = str(e.get("page", "")).strip() page_html = f"{h(page)}" if page else " " indent = "" if level == 1: indent = "padding-left:16px;" elif level >= 2: indent = "padding-left:30px;" title_style = ( "vertical-align:bottom;padding:1px 4px 1px 0;font-size:10px;" "line-height:1.15;color:#111;" + ("font-weight:700;" if bold else "font-weight:400;") + ("text-transform:uppercase;letter-spacing:0.1px;" if upper else "") + indent ) out += f'' out += f'' out += '' out += ( f'' ) out += "" out += "
{title_inner} {page_html}
" return out # ========================================= # Global blocks renderer # ========================================= def render_global_blocks( section_key: str, section_title: str, json_data: dict | list, debug: bool = False, *, universities: list[dict] | None = None, ) -> str: """Render a single global section's content. Mirrors PHP renderGlobalBlocks() — handles steps, bullets, tables, doc_v1, table_v2, summary_of_universities, etc. """ html_out = "" key_norm = section_key.lower().strip() if not isinstance(json_data, dict): json_data = {} layout_norm = str(json_data.get("layout", "")).lower().strip() # ── Summary of universities ── if key_norm == "summary_of_universities": unis = universities or [] title = section_title.strip() if title: html_out += f'

{h(title)}

' intro = str(json_data.get("intro", "")).strip() if intro: html_out += f'

{h(format_money_figures(intro))}

' elif layout_norm == "doc_v1" and isinstance(json_data.get("blocks"), list): for b in json_data["blocks"]: if not isinstance(b, dict): continue btype = str(b.get("type", "")) if btype not in ("paragraph", "subheading", "note"): continue t = format_money_figures(str(b.get("text", ""))) if not t.strip(): continue if btype == "subheading": html_out += f'

{h(t)}

' elif btype == "note": html_out += f'
{h(t)}
' else: html_out += f'

{emphasize_keywords(t)}

' # Resolve list from universities or doc_v1 bullets resolved: list[str] = [] if unis: def uni_sort_key(u): so = u.get("sort_order") if isinstance(u, dict) else None if so is not None: try: return (0, float(so)) except (ValueError, TypeError): pass return (1, 0.0) sorted_unis = sorted(unis, key=uni_sort_key) for u in sorted_unis: if not isinstance(u, dict): continue name = str(u.get("university_name", u.get("name", ""))).strip() if name: resolved.append(name) if not resolved and layout_norm == "doc_v1" and isinstance(json_data.get("blocks"), list): for b in json_data["blocks"]: if not isinstance(b, dict) or str(b.get("type", "")) != "bullets": continue items = b.get("items", []) if not isinstance(items, list): continue for it in items: it_str = str(it).strip() if it_str: resolved.append(it_str) # Dedupe seen: set[str] = set() deduped: list[str] = [] for nm in resolved: k = nm.lower().strip() if not k or k in seen: continue seen.add(k) deduped.append(nm) if deduped: html_out += '
    ' for name in deduped: anchor = "university_" + hb_slug(name) html_out += f'
  1. {h(name)}
  2. ' html_out += "
" note = str(json_data.get("note", "")).strip() if note: html_out += f'
{h(format_money_figures(note))}
' return html_out # ── Section title ── title = section_title.strip() if title and key_norm != "table_of_contents": html_out += f'

{h(title)}

' # ── Steps ── steps = json_data.get("steps") if isinstance(steps, list): step_num = 0 for s in steps: if not isinstance(s, dict): continue step_num += 1 step_title = str(s.get("title", s.get("step_title", ""))).strip() body = format_money_figures(str(s.get("body", s.get("description", ""))).strip()) html_out += '
' if step_title: html_out += f'
Step {step_num}: {h(step_title)}
' if body: html_out += f'

{emphasize_keywords(body)}

' links = s.get("links", []) if isinstance(links, list) and links: html_out += '" qr = str(s.get("qr_url", s.get("qr_image", ""))).strip() if qr: html_out += f'QR' html_out += "
" return html_out # ── Bullets ── has_bullets = isinstance(json_data.get("bullets"), list) has_items = isinstance(json_data.get("items"), list) if has_bullets or (layout_norm == "bullets_with_note" and has_items): lst = json_data.get("items") if has_items else json_data.get("bullets") html_out += '" note = format_money_figures(str(json_data.get("note", json_data.get("footnote", ""))).strip()) if note: html_out += f'
{h(note)}
' return html_out # ── Basic table ── cols = json_data.get("columns") rows = json_data.get("rows") if isinstance(cols, list) and isinstance(rows, list): html_out += '' if cols: html_out += "" for c in cols: html_out += f"" html_out += "" html_out += "" for r in rows: if not isinstance(r, (list, dict)): continue html_out += "" if isinstance(r, dict): for col_label in cols: key_guess = re.sub(r"[^a-z0-9]+", "_", str(col_label).lower()) cell = r.get(key_guess, "") html_out += f"" else: for cell in r: html_out += f"" html_out += "" html_out += "
{h(str(c))}
{h(format_money_figures(str(cell)))}{h(format_money_figures(str(cell)))}
" return html_out # ── table_v2 ── if layout_norm == "table_v2": base_cols = json_data.get("base_columns", []) groups = json_data.get("header_groups", []) rows = json_data.get("rows", []) if not isinstance(base_cols, list): base_cols = [] if not isinstance(groups, list): groups = [] if not isinstance(rows, list): rows = [] all_cols: list[dict] = [] for c in base_cols: if isinstance(c, dict): all_cols.append({"key": str(c.get("key", "")), "label": str(c.get("label", ""))}) for g in groups: if not isinstance(g, dict): continue g_cols = g.get("columns", []) if not isinstance(g_cols, list): g_cols = [] for c in g_cols: if isinstance(c, dict): all_cols.append({"key": str(c.get("key", "")), "label": str(c.get("label", ""))}) html_out += '' has_group_row = bool(groups) if has_group_row: html_out += "" for c in base_cols: if isinstance(c, dict): html_out += f'' for g in groups: if not isinstance(g, dict): continue g_cols = g.get("columns", []) if not isinstance(g_cols, list): g_cols = [] span = max(1, len(g_cols)) html_out += f'' html_out += "" for g in groups: if not isinstance(g, dict): continue g_cols = g.get("columns", []) if not isinstance(g_cols, list): g_cols = [] for c in g_cols: if isinstance(c, dict): html_out += f'' html_out += "" else: html_out += "" for c in all_cols: html_out += f'' html_out += "" html_out += "" for r in rows: if not isinstance(r, dict): continue html_out += "" for c in all_cols: k = c.get("key", "") val = r.get(k, "") if isinstance(val, dict): val = val.get("text", "") html_out += f"" html_out += "" html_out += "
{h(str(c.get("label", "")))}{h(str(g.get("label", "")))}
{h(str(c.get("label", "")))}
{h(c.get("label", ""))}
{h(format_money_figures(str(val)))}
" return html_out # ── doc_v1 ── if layout_norm == "doc_v1" and isinstance(json_data.get("blocks"), list): for b in json_data["blocks"]: if not isinstance(b, dict): continue btype = str(b.get("type", "")) if btype == "paragraph": t = format_money_figures(str(b.get("text", ""))) if t.strip(): html_out += f'

{emphasize_keywords(t)}

' elif btype == "subheading": t = format_money_figures(str(b.get("text", ""))) if t.strip(): html_out += f'

{h(t)}

' elif btype == "bullets": items = b.get("items", []) if not isinstance(items, list): items = [] html_out += '" elif btype == "numbered_list": items = b.get("items", []) if not isinstance(items, list): items = [] html_out += '
    ' for it in items: it_str = format_money_figures(str(it).strip()) if it_str: html_out += f"
  1. {emphasize_keywords(it_str)}
  2. " html_out += "
" elif btype == "note": t = format_money_figures(str(b.get("text", ""))) if t.strip(): html_out += f'
{h(t)}
' elif btype == "note_inline": parts = b.get("parts", []) if not isinstance(parts, list): parts = [] txt = "" for p in parts: if not isinstance(p, dict): continue t = format_money_figures(str(p.get("text", ""))) if not t: continue style = str(p.get("style", "")) if style == "red_bold": txt += f"{h(t)}" else: txt += h(t) if re.sub(r"<[^>]+>", "", txt).strip(): html_out += f'
{txt}
' elif btype == "table_v1": t_cols = b.get("columns", []) t_rows = b.get("rows", []) if not isinstance(t_cols, list): t_cols = [] if not isinstance(t_rows, list): t_rows = [] html_out += '' if t_cols: html_out += "" for c in t_cols: html_out += f"" html_out += "" html_out += "" for r in t_rows: if not isinstance(r, list): continue html_out += "" for cell in r: html_out += f"" html_out += "" html_out += "
{h(str(c))}
{h(format_money_figures(str(cell)))}
" elif btype in ("table_v3", "table_v4"): t_rows = b.get("rows", []) if not isinstance(t_rows, list): t_rows = [] html_out += '' for r in t_rows: if not isinstance(r, list): continue html_out += "" for cell in r: colspan = 1 rowspan = 1 text_val = "" if isinstance(cell, dict): text_val = str(cell.get("text", "")) cs = cell.get("colspan") rs = cell.get("rowspan") if cs is not None and str(cs).isdigit(): colspan = int(cs) if rs is not None and str(rs).isdigit(): rowspan = int(rs) else: text_val = str(cell) attr = "" if colspan > 1: attr += f' colspan="{colspan}"' if rowspan > 1: attr += f' rowspan="{rowspan}"' html_out += f"{h(format_money_figures(text_val))}" html_out += "" html_out += "
" return html_out # ── Fallback ── if "text" in json_data: html_out += f'

{h(format_money_figures(str(json_data["text"])))}

' if not html_out.strip(): logger.warning( "Empty section render for key=%s title=%s", section_key, section_title, ) return html_out # ========================================= # University section renderer # ========================================= def render_university_section( uni_name: str, sections: list[dict], allow_remote: bool, is_first_uni: bool, include_inactive_programs: bool = False, website_url: str = "", anchor_id: str | None = None, debug: bool = False, stats: dict | None = None, sort_order: int | None = None, ) -> str: """Render a single university section. Mirrors PHP renderUniversitySection.""" classes = ["uni"] if not is_first_uni: classes.append("page-break") id_attr = f' id="{h(anchor_id)}"' if anchor_id else "" sort_attr = f' data-sort="{h(str(sort_order))}"' if sort_order is not None else "" out = f'
' has_stats = isinstance(stats, dict) if has_stats: stats["universities"] = stats.get("universities", 0) + 1 # Build map; merge duplicate "programs" sections sec_map: dict[str, dict] = {} for s in sections: if not isinstance(s, dict): continue k = str(s.get("section_key", "")) if not k: continue if k == "programs" and k in sec_map: existing = sec_map["programs"].get("section_json", {}) incoming = s.get("section_json", {}) if not isinstance(existing, dict): existing = {} if not isinstance(incoming, dict): incoming = {} a = existing.get("programs", []) b = incoming.get("programs", []) if not isinstance(a, list): a = [] if not isinstance(b, list): b = [] existing["programs"] = a + b sec_map["programs"]["section_json"] = existing continue sec_map[k] = s # Campus image img_section = sec_map.get("campus_image") or sec_map.get("image") campus_url = "" campus_cap = "" if img_section: j = img_section.get("section_json", {}) if isinstance(j, dict): campus_url = str(j.get("image_url", "")).strip() campus_cap = str(j.get("caption", "")).strip() # Overview data + website overview_json: dict | None = None resolved_website = (website_url or "").strip() if "overview" in sec_map: overview_json = sec_map["overview"].get("section_json", {}) if not isinstance(overview_json, dict): overview_json = {} site_from_overview = get_any( overview_json, ["university_website", "university_website_url", "website", "site", "url", "homepage", "web_url"], ) if not resolved_website and site_from_overview: resolved_website = site_from_overview # 1. University title if resolved_website: if has_stats: stats["university_links"] = stats.get("university_links", 0) + 1 out += ( f'
{h(uni_name)}
' ) else: out += f'
{h(uni_name)}
' # 2-3. Two-column: Summary + Campus image image_embedded = False campus_cell = "" if allow_remote and campus_url: embedded = fetch_image_data_uri(campus_url) if embedded: image_embedded = True campus_cell = f'Campus Image' if campus_cap: campus_cell += f'
{h(campus_cap)}
' else: campus_cell = '
Campus image unavailable
' else: campus_cell = '
Campus image unavailable
' if has_stats: if image_embedded: stats["images_embedded"] = stats.get("images_embedded", 0) + 1 else: stats["images_placeholder"] = stats.get("images_placeholder", 0) + 1 summary_cell = "" if overview_json is not None: j = overview_json founded = get_any(j, ["founded", "Founded"]) total = get_any(j, ["total_students", "Total Students"]) undergrad = get_any(j, ["undergraduates", "Undergraduate Students", "undergraduate_students"]) postgrad = get_any(j, ["postgraduate_students", "Postgraduate Students"]) acc_rate = get_any(j, ["acceptance_rate", "Acceptance Rate"]) location = get_any(j, ["location", "Location"]) tuition = get_any(j, [ "tuition_out_of_state_yearly", "Yearly Out of State Tuition Fees", "Yearly Out-of-State Tuition Fees", "Yearly Tuition Fees", "Yearly Out-of-State Tuition Fees:", ]) summary_cell += '
Summary info
' summary_cell += '" if resolved_website: if has_stats: stats["website_rows"] = stats.get("website_rows", 0) + 1 summary_cell += ( f'
Website: ' f'' f'{h(resolved_website)}
' ) out += ( '' f'' f'' "
{summary_cell}{campus_cell}
" ) # 4. Benefits if "benefits" in sec_map: j = sec_map["benefits"].get("section_json", {}) if not isinstance(j, dict): j = {} benefits = j.get("benefits", []) if not isinstance(benefits, list): benefits = [] out += '
' out += '
Benefits for ISP students at this school
' if benefits: out += '" else: out += '
No benefits listed.
' out += "
" # 5. Programs if "programs" in sec_map: j = sec_map["programs"].get("section_json", {}) if not isinstance(j, dict): j = {} programs = j.get("programs", []) if not isinstance(programs, list): programs = [] # Filter inactive if not include_inactive_programs: def _is_active(p: dict) -> bool: flag = p.get("program_active", p.get("is_active", p.get("active", 1))) return is_truthy(flag) programs = [p for p in programs if isinstance(p, dict) and _is_active(p)] out += ( '
To qualify for The International Scholars Program at ' f"{h(uni_name)}, you must be willing to study any of the following programs:
" ) if programs: out += '' out += ( '' '' '' '' '' ) for p in programs: if not isinstance(p, dict): continue program_name = str(p.get("program_name", "")).strip() link = str(p.get("program_link", "")).strip() if not link and isinstance(p.get("program_links"), dict): link = str(p["program_links"].get("web_link", "")).strip() program_name_html = h(program_name) if link: program_name_html = f'{program_name_html}' career = p.get("career_pathways", []) career_html = "" if isinstance(career, list): career_items = [str(x).strip() for x in career if str(x).strip()] if career_items: career_html = '" else: raw = str(career).strip() if raw: lines = [l.strip() for l in re.split(r"[\r\n]+", raw) if l.strip()] if len(lines) > 1: career_html = '" else: career_html = h(raw) if not career_html: career_html = " " entrance = str(p.get("entrance_exam", p.get("entrance_examination", ""))) designation = str(p.get("designation", "")) funding = str(p.get("funding_category", "")) out += ( f"" f"" f"" f"" f"" f"" f"" ) out += "
ProgramDesignationEntrance ExaminationExamples of Career PathwaysFunding Category
{program_name_html}{h(designation)}{h(entrance)}{career_html}{h(funding)}
" else: out += '
No programs listed.
' # Extra sections skip_keys = {"campus_image", "image", "overview", "benefits", "programs"} for s in sections: if not isinstance(s, dict): continue k = str(s.get("section_key", "")) if not k or k in skip_keys: continue title = str(s.get("section_title", "")) j = s.get("section_json", {}) if not isinstance(j, dict): j = {} out += render_global_blocks(k, title, j, debug) out += "
" return out