| """HTML builder β assembles the full ISP Handbook HTML document. |
| |
| Uses Jinja2 templates for HTML generation. Data preparation logic is |
| preserved from the original string-concatenation approach. The output |
| is a self-contained HTML suitable for Playwright Chromium PDF export. |
| """ |
|
|
| from __future__ import annotations |
|
|
| import base64 |
| import logging |
| import mimetypes |
| import os |
| import re |
| from pathlib import Path |
| from typing import Any |
|
|
| from jinja2 import Environment, FileSystemLoader, select_autoescape |
| from markupsafe import Markup |
|
|
| from app.core.config import get_settings |
| from app.core.fonts import font_face_css, select_font_family |
| from app.services.normalizer import normalize_section, normalize_university |
| from app.services.renderers import ( |
| fetch_image_data_uri, |
| render_global_blocks, |
| sort_toc, |
| ) |
| from app.services.utils import ( |
| format_money_figures, |
| get_any, |
| h, |
| handbook_anchor, |
| hb_slug, |
| is_truthy, |
| sort_sections_stable, |
| ) |
|
|
| logger = logging.getLogger(__name__) |
|
|
| |
| _TEMPLATES_DIR = Path(__file__).resolve().parent.parent / "templates" |
|
|
|
|
| def _get_jinja_env() -> Environment: |
| """Create a Jinja2 environment pointing to our templates directory.""" |
| env = Environment( |
| loader=FileSystemLoader(str(_TEMPLATES_DIR)), |
| autoescape=select_autoescape(["html"]), |
| trim_blocks=True, |
| lstrip_blocks=True, |
| ) |
| return env |
|
|
|
|
| def _static_base_url() -> str: |
| """Return absolute file:// URL to the static directory.""" |
| static_dir = Path(__file__).resolve().parent.parent / "static" |
| return static_dir.as_uri() |
|
|
|
|
| def _unused_pdf_override_css(font_stack: str) -> str: |
| """Legacy inline PDF override CSS β kept for reference only. |
| All styling now lives in static/css/print.css for Chromium rendering. |
| """ |
| return "" |
|
|
|
|
| |
| SECTION_CLASS_MAP = { |
| "overview": "sec-overview", |
| "how_program_works_and_qualification_requirements": "sec-qualification", |
| "enrolment_steps": "sec-steps", |
| "withdrawal_late_payment_refund_policy": "sec-policy", |
| "refund_guidelines": "sec-refund", |
| "program_contributions": "sec-contributions", |
| "funding_options_available": "sec-funding", |
| "summary_of_universities": "sec-summary", |
| } |
|
|
| PAGE_BREAK_KEYS = { |
| "overview", |
| "how_program_works_and_qualification_requirements", |
| "enrolment_steps", |
| "withdrawal_late_payment_refund_policy", |
| "refund_guidelines", |
| "program_contributions", |
| "funding_options_available", |
| "summary_of_universities", |
| } |
|
|
|
|
| def _prepare_university_data( |
| uni_raw: dict[str, Any], |
| allow_remote: bool, |
| include_inactive_programs: bool, |
| debug: bool, |
| stats: dict[str, Any], |
| ) -> dict[str, Any]: |
| """Prepare a single university's template data. |
| |
| Extracts overview, campus image, benefits, programs, and extra sections |
| from the raw sections list. This moves the logic that was in |
| render_university_section into a data-preparation step so that the |
| Jinja2 template handles the HTML. |
| """ |
| uni_name = uni_raw["name"] |
| sections = uni_raw.get("sections", []) |
| is_first = uni_raw.get("_is_first", False) |
|
|
| stats["universities"] = stats.get("universities", 0) + 1 |
|
|
| |
| sec_map: dict[str, dict] = {} |
| for s in sections: |
| if not isinstance(s, dict): |
| continue |
| k = str(s.get("section_key", "")) |
| if not k: |
| continue |
| if k == "programs" and k in sec_map: |
| existing = sec_map["programs"].get("section_json", {}) |
| incoming = s.get("section_json", {}) |
| if not isinstance(existing, dict): |
| existing = {} |
| if not isinstance(incoming, dict): |
| incoming = {} |
| a = existing.get("programs", []) |
| b = incoming.get("programs", []) |
| if not isinstance(a, list): |
| a = [] |
| if not isinstance(b, list): |
| b = [] |
| existing["programs"] = a + b |
| sec_map["programs"]["section_json"] = existing |
| continue |
| sec_map[k] = s |
|
|
| |
| img_section = sec_map.get("campus_image") or sec_map.get("image") |
| campus_image = "" |
| campus_caption = "" |
| if img_section: |
| j = img_section.get("section_json", {}) |
| if isinstance(j, dict): |
| campus_url = str(j.get("image_url", "")).strip() |
| campus_caption = str(j.get("caption", "")).strip() |
| if allow_remote and campus_url: |
| embedded = fetch_image_data_uri(campus_url) |
| if embedded: |
| campus_image = embedded |
| stats["images_embedded"] = stats.get("images_embedded", 0) + 1 |
| else: |
| stats["images_placeholder"] = stats.get("images_placeholder", 0) + 1 |
| else: |
| stats["images_placeholder"] = stats.get("images_placeholder", 0) + 1 |
|
|
| |
| resolved_website = (uni_raw.get("website") or "").strip() |
| overview_data = None |
|
|
| if "overview" in sec_map: |
| overview_json = sec_map["overview"].get("section_json", {}) |
| if not isinstance(overview_json, dict): |
| overview_json = {} |
|
|
| site_from_overview = get_any( |
| overview_json, |
| ["university_website", "university_website_url", "website", "site", "url", "homepage", "web_url"], |
| ) |
| if not resolved_website and site_from_overview: |
| resolved_website = site_from_overview |
|
|
| overview_data = { |
| "founded": get_any(overview_json, ["founded", "Founded"]), |
| "total_students": get_any(overview_json, ["total_students", "Total Students"]), |
| "undergraduates": get_any(overview_json, ["undergraduates", "Undergraduate Students", "undergraduate_students"]), |
| "postgraduates": get_any(overview_json, ["postgraduate_students", "Postgraduate Students"]), |
| "acceptance_rate": get_any(overview_json, ["acceptance_rate", "Acceptance Rate"]), |
| "location": get_any(overview_json, ["location", "Location"]), |
| "tuition": get_any(overview_json, [ |
| "tuition_out_of_state_yearly", |
| "Yearly Out of State Tuition Fees", |
| "Yearly Out-of-State Tuition Fees", |
| "Yearly Tuition Fees", |
| "Yearly Out-of-State Tuition Fees:", |
| ]), |
| } |
|
|
| if resolved_website: |
| stats["university_links"] = stats.get("university_links", 0) + 1 |
| stats["website_rows"] = stats.get("website_rows", 0) + 1 |
|
|
| |
| benefits = None |
| if "benefits" in sec_map: |
| j = sec_map["benefits"].get("section_json", {}) |
| if not isinstance(j, dict): |
| j = {} |
| raw_benefits = j.get("benefits", []) |
| if isinstance(raw_benefits, list): |
| benefits = [str(b).strip() for b in raw_benefits if str(b).strip()] |
| else: |
| benefits = [] |
|
|
| |
| programs = None |
| if "programs" in sec_map: |
| j = sec_map["programs"].get("section_json", {}) |
| if not isinstance(j, dict): |
| j = {} |
| programs_raw = j.get("programs", []) |
| if not isinstance(programs_raw, list): |
| programs_raw = [] |
|
|
| if not include_inactive_programs: |
| programs_raw = [ |
| p for p in programs_raw |
| if isinstance(p, dict) and is_truthy( |
| p.get("program_active", p.get("is_active", p.get("active", 1))) |
| ) |
| ] |
|
|
| programs = [] |
| seen_names = set() |
| for p in programs_raw: |
| if not isinstance(p, dict): |
| continue |
| program_name = str(p.get("program_name", "")).strip() |
| |
| key = program_name.lower() |
| if key in seen_names: |
| continue |
| seen_names.add(key) |
| link = str(p.get("program_link", "")).strip() |
| if not link and isinstance(p.get("program_links"), dict): |
| link = str(p["program_links"].get("web_link", "")).strip() |
|
|
| |
| career = p.get("career_pathways", []) |
| career_html = "" |
| if isinstance(career, list): |
| career_items = [str(x).strip() for x in career if str(x).strip()] |
| if career_items: |
| career_html = '<ul class="career-list">' |
| for ci in career_items: |
| career_html += f"<li>{h(ci)}</li>" |
| career_html += "</ul>" |
| else: |
| raw = str(career).strip() |
| if raw: |
| import re as _re |
| lines = [l.strip() for l in _re.split(r"[\r\n]+", raw) if l.strip()] |
| if len(lines) > 1: |
| career_html = '<ul class="career-list">' |
| for line in lines: |
| career_html += f"<li>{h(line)}</li>" |
| career_html += "</ul>" |
| else: |
| career_html = h(raw) |
|
|
| if not career_html: |
| career_html = " " |
|
|
| programs.append({ |
| "name": program_name, |
| "link": link, |
| "designation": str(p.get("designation", "")), |
| "entrance": str(p.get("entrance_exam", p.get("entrance_examination", ""))), |
| "career_html": Markup(career_html), |
| "funding": str(p.get("funding_category", "")), |
| }) |
|
|
| |
| skip_keys = {"campus_image", "image", "overview", "benefits", "programs"} |
| extra_sections = [] |
| for s in sections: |
| if not isinstance(s, dict): |
| continue |
| k = str(s.get("section_key", "")) |
| if not k or k in skip_keys: |
| continue |
| title = str(s.get("section_title", "")) |
| j = s.get("section_json", {}) |
| if not isinstance(j, dict): |
| j = {} |
| rendered = render_global_blocks(k, title, j, debug) |
| extra_sections.append({"rendered_html": Markup(rendered)}) |
|
|
| classes = ["uni"] |
| if not is_first: |
| classes.append("page-break") |
|
|
| return { |
| "name": uni_name, |
| "anchor": uni_raw.get("anchor"), |
| "sort_order": uni_raw.get("sort_order"), |
| "website": resolved_website, |
| "classes": classes, |
| "overview": overview_data, |
| "campus_image": campus_image, |
| "campus_caption": campus_caption, |
| "benefits": benefits, |
| "programs": programs, |
| "extra_sections": extra_sections, |
| } |
|
|
|
|
| def build_handbook_html( |
| globals_data: list[dict[str, Any]], |
| by_uni: dict[int, dict[str, Any]], |
| images: dict[str, Any], |
| allow_remote: bool, |
| include_inactive_programs: bool = False, |
| debug: bool = False, |
| ) -> str: |
| """Build the full handbook HTML document using Jinja2 templates. |
| |
| Preserves the same data preparation logic from the original version. |
| Rendering is delegated to Jinja2 templates with Playwright-compatible |
| HTML/CSS output. |
| """ |
| env = _get_jinja_env() |
| template = env.get_template("handbook.html") |
|
|
| font_meta = select_font_family() |
| font_css = font_face_css(font_meta) |
|
|
| |
| base_url = _static_base_url() |
|
|
| stats: dict[str, Any] = { |
| "universities": 0, |
| "images_embedded": 0, |
| "images_placeholder": 0, |
| "program_links_total": 0, |
| "program_missing_links_total": 0, |
| "missing_program_links": {}, |
| "university_links": 0, |
| "website_rows": 0, |
| } |
|
|
| |
| cover_image = images.get("coverImage", "") |
| if cover_image and os.path.isfile(cover_image): |
| cover_image = Path(cover_image).as_uri() |
| else: |
| cover_image = "" |
|
|
| |
| toc_image = images.get("tocImage", "") |
| if toc_image and os.path.isfile(toc_image): |
| toc_image = Path(toc_image).as_uri() |
| else: |
| toc_image = "" |
|
|
| |
| header_image = images.get("headerImage", "") |
| if header_image and os.path.isfile(header_image): |
| mime = mimetypes.guess_type(header_image)[0] or "image/jpeg" |
| with open(header_image, "rb") as f: |
| header_image = f"data:{mime};base64,{base64.b64encode(f.read()).decode()}" |
| else: |
| header_image = "" |
|
|
| |
| label_image = images.get("labelImage", "") |
| if label_image and os.path.isfile(label_image): |
| mime = mimetypes.guess_type(label_image)[0] or "image/jpeg" |
| with open(label_image, "rb") as f: |
| label_image = f"data:{mime};base64,{base64.b64encode(f.read()).decode()}" |
| else: |
| |
| label_image = "https://finsapdev.qhtestingserver.com/MODEL_APIS/handbook/images/label.jpeg" |
|
|
| |
| active_universities: list[dict[str, Any]] = [] |
| for uid, uni in by_uni.items(): |
| if not isinstance(uni, dict): |
| continue |
| if not is_truthy(uni.get("is_active", True)): |
| continue |
| name = str(uni.get("university_name", f"University #{uid}")) |
| anchor = handbook_anchor("uni", name, int(uid)) |
| active_universities.append({ |
| "id": int(uid), |
| "anchor": anchor, |
| "name": name, |
| "sections": uni.get("sections", []) if isinstance(uni.get("sections"), list) else [], |
| "website": str(uni.get("website", "")), |
| "sort_order": int(uni["sort_order"]) if uni.get("sort_order") is not None and str(uni.get("sort_order", "")).lstrip("-").isdigit() else None, |
| }) |
|
|
| |
| globals_data = sort_sections_stable(globals_data) |
|
|
| required_keys = [ |
| "table_of_contents", |
| "overview", |
| "how_program_works_and_qualification_requirements", |
| ] |
| existing_keys = {str(g.get("section_key", "")).lower() for g in globals_data if isinstance(g, dict)} |
| missing = [k for k in required_keys if k not in existing_keys] |
| if missing: |
| msg = f"Handbook required sections missing: {','.join(missing)}" |
| logger.error(msg) |
| raise RuntimeError(msg) |
|
|
| general_sections: list[dict[str, Any]] = [] |
| summary_block: dict[str, Any] | None = None |
| toc_sort_order = None |
| toc_title = "Table of Contents" |
|
|
| for idx, g in enumerate(globals_data): |
| if not isinstance(g, dict): |
| continue |
| key_raw = str(g.get("section_key", "")) |
| key = key_raw.lower() |
| sort_order = int(g["sort_order"]) if g.get("sort_order") is not None and str(g.get("sort_order", "")).lstrip("-").isdigit() else None |
|
|
| if key == "table_of_contents" and toc_sort_order is None: |
| toc_sort_order = sort_order if sort_order is not None else (idx + 1) |
| toc_title = str(g.get("section_title", "Table of Contents")) |
| continue |
|
|
| if key == "summary_of_universities": |
| summary_block = { |
| "anchor": handbook_anchor("summary", "summary-of-universities", idx), |
| "data": g, |
| "sort_order": sort_order, |
| } |
| continue |
|
|
| anchor = handbook_anchor("g", str(g.get("section_title", g.get("section_key", "section"))), idx) |
| general_sections.append({ |
| "anchor": anchor, |
| "data": g, |
| "sort_order": sort_order, |
| }) |
|
|
| |
| toc_items: list[dict[str, Any]] = [] |
| for gs in general_sections: |
| title = str(gs["data"].get("section_title", gs["data"].get("section_key", "Section"))) |
| toc_items.append({ |
| "title": title, |
| "target": "#" + gs["anchor"], |
| "level": 0, |
| "bold": True, |
| "sort": gs["sort_order"], |
| }) |
|
|
| if summary_block: |
| title = str(summary_block["data"].get("section_title", "Summary of Universities")) |
| toc_items.append({ |
| "title": title, |
| "target": "#" + summary_block["anchor"], |
| "level": 0, |
| "bold": True, |
| "sort": summary_block["sort_order"], |
| }) |
|
|
| for u in active_universities: |
| toc_items.append({ |
| "title": u["name"], |
| "target": "#" + u["anchor"], |
| "level": 1, |
| "bold": False, |
| "sort": u.get("sort_order"), |
| }) |
|
|
| |
| sorted_toc = sort_toc(list(toc_items)) |
| toc_items_sorted = [] |
| for e in sorted_toc: |
| if not isinstance(e, dict): |
| continue |
| title = str(e.get("title", "")).strip() |
| if not title: |
| continue |
| level = max(0, min(3, int(e.get("level", 0)))) |
| bold = bool(e.get("bold", False)) |
| upper = bool(e.get("upper", False)) |
| if level == 0: |
| bold = True |
| upper = True |
| display_title = title.upper() if upper else title |
| page = str(e.get("page", "")).strip() |
|
|
| toc_items_sorted.append({ |
| "title": title, |
| "display_title": display_title, |
| "target": str(e.get("target", e.get("anchor", ""))).strip(), |
| "level": level, |
| "bold": bold, |
| "upper": upper, |
| "page": page, |
| }) |
|
|
| |
| template_sections = [] |
| for gs in general_sections: |
| data = gs["data"] |
| key_lower = str(data.get("section_key", "")).lower() |
|
|
| sec_class = SECTION_CLASS_MAP.get(key_lower) |
| if sec_class is None: |
| sec_class = "sec-" + re.sub(r"[^a-z0-9]+", "-", key_lower) |
|
|
| section_json = data.get("section_json", {}) |
| if not isinstance(section_json, dict): |
| section_json = {} |
|
|
| |
| blocks = normalize_section( |
| str(data.get("section_key", "")), |
| str(data.get("section_title", "")), |
| section_json, |
| debug=debug, |
| ) |
|
|
| |
| section_html = render_global_blocks( |
| str(data.get("section_key", "")), |
| str(data.get("section_title", "")), |
| section_json, |
| debug, |
| ) |
|
|
| if not section_html.strip() and not blocks: |
| logger.warning( |
| "Empty section render key=%s sort_order=%s", |
| data.get("section_key"), |
| data.get("sort_order"), |
| ) |
|
|
| template_sections.append({ |
| "anchor": gs["anchor"], |
| "data": data, |
| "page_break": key_lower in PAGE_BREAK_KEYS, |
| "sec_class": sec_class, |
| "blocks": blocks, |
| "rendered_html": Markup(section_html), |
| }) |
|
|
| |
| summary_template = None |
| if summary_block: |
| data = summary_block["data"] |
| section_json = data.get("section_json", {}) |
| if not isinstance(section_json, dict): |
| section_json = {} |
|
|
| |
| summary_blocks = normalize_section( |
| str(data.get("section_key", "")), |
| str(data.get("section_title", "")), |
| section_json, |
| universities=active_universities, |
| debug=debug, |
| ) |
|
|
| summary_html = render_global_blocks( |
| str(data.get("section_key", "")), |
| str(data.get("section_title", "")), |
| section_json, |
| debug, |
| universities=active_universities, |
| ) |
|
|
| summary_template = { |
| "anchor": summary_block["anchor"], |
| "data": data, |
| "blocks": summary_blocks, |
| "rendered_html": Markup(summary_html), |
| } |
|
|
| |
| university_template_data = [] |
| university_block_data = [] |
| for idx, uni_raw in enumerate(active_universities): |
| uni_raw["_is_first"] = (idx == 0) |
| |
| uni_data = _prepare_university_data( |
| uni_raw, allow_remote, include_inactive_programs, debug, stats, |
| ) |
| university_template_data.append(uni_data) |
| |
| uni_block = normalize_university( |
| uni_raw, allow_remote, include_inactive_programs, debug, stats, |
| ) |
| university_block_data.append(uni_block) |
|
|
| |
| bottom_pages_urls = [] |
| raw_bottom = images.get("bottomPages", []) |
| if isinstance(raw_bottom, list): |
| for img_path in raw_bottom: |
| if os.path.isfile(str(img_path)): |
| bottom_pages_urls.append(Path(str(img_path)).as_uri()) |
|
|
| |
| html = template.render( |
| font_css=Markup(font_css), |
| base_url=base_url, |
| extra_css="", |
| header_image=header_image, |
| label_image=label_image, |
| cover_image=cover_image, |
| toc_image=toc_image, |
| toc_items=toc_items, |
| toc_items_sorted=toc_items_sorted, |
| toc_title=toc_title, |
| toc_sort_order=toc_sort_order, |
| general_sections=template_sections, |
| summary_block=summary_template, |
| universities=university_template_data, |
| university_blocks=university_block_data, |
| bottom_pages=bottom_pages_urls, |
| debug=debug, |
| stats=stats, |
| ) |
|
|
| return html |
|
|