Spaces:

internationalscholarsprogram
/

handbook_engine

Running on CPU Upgrade

App Files Files Community

internationalscholarsprogram commited on 6 days ago

Commit

93a688a

verified ·

1 Parent(s): 9e6b0ef

Update services: normalizer, html_builder, renderers, utils - fix PDF output to match V5.0 docx

Browse files

Files changed (6) hide show

app/services/html_builder.py +40 -82
app/services/normalizer.py +48 -197
app/services/renderers.py +46 -83
app/services/utils.py +40 -9
app/templates/handbook.html +2 -1
app/templates/partials/university.html +3 -3

app/services/html_builder.py CHANGED Viewed

@@ -78,6 +78,7 @@ SECTION_CLASS_MAP = {
     "program_features_breakdown": "sec-breakdown",
     "funding_options_available": "sec-funding",
     "summary_of_universities": "sec-summary",
 }
 PAGE_BREAK_KEYS = {
@@ -91,6 +92,7 @@ PAGE_BREAK_KEYS = {
     "program_features_breakdown",
     "funding_options_available",
     "summary_of_universities",
 }
@@ -260,39 +262,11 @@ def _prepare_university_data(
             if not link and isinstance(p.get("program_links"), dict):
                 link = str(p["program_links"].get("web_link", "")).strip()
-            # Build career HTML
-            career = p.get("career_pathways", [])
-            career_html = ""
-            if isinstance(career, list):
-                career_items = [str(x).strip() for x in career if str(x).strip()]
-                if career_items:
-                    career_html = '<ul class="career-list">'
-                    for ci in career_items:
-                        career_html += f"<li>{h(ci)}</li>"
-                    career_html += "</ul>"
-            else:
-                raw = str(career).strip()
-                if raw:
-                    import re as _re
-                    lines = [l.strip() for l in _re.split(r"[\r\n]+", raw) if l.strip()]
-                    if len(lines) > 1:
-                        career_html = '<ul class="career-list">'
-                        for line in lines:
-                            career_html += f"<li>{h(line)}</li>"
-                        career_html += "</ul>"
-                    else:
-                        career_html = h(raw)
-            if not career_html:
-                career_html = "&nbsp;"
             programs.append({
                 "name": program_name,
                 "link": link,
                 "designation": str(p.get("designation", "")),
                 "entrance": str(p.get("entrance_exam", p.get("entrance_examination", ""))),
-                "career_html": Markup(career_html),
-                "funding": str(p.get("funding_category", "")),
             })
     # Extra sections
@@ -398,7 +372,7 @@ def build_handbook_html(
         # Fallback to remote URL when local file is unavailable
         label_image = "https://finsapdev.qhtestingserver.com/MODEL_APIS/handbook/images/label.jpeg"
-    # ── Prepare active universities ──
     active_universities: list[dict[str, Any]] = []
     for uid, uni in by_uni.items():
         if not isinstance(uni, dict):
@@ -407,6 +381,9 @@ def build_handbook_html(
             continue
         name = str(uni.get("university_name", f"University #{uid}"))
         anchor = handbook_anchor("uni", name, int(uid))
         active_universities.append({
             "id": int(uid),
             "anchor": anchor,
@@ -414,8 +391,18 @@ def build_handbook_html(
             "sections": uni.get("sections", []) if isinstance(uni.get("sections"), list) else [],
             "website": str(uni.get("website", "")),
             "sort_order": int(uni["sort_order"]) if uni.get("sort_order") is not None and str(uni.get("sort_order", "")).lstrip("-").isdigit() else None,
         })
     # ── Normalise globals ──
     globals_data = sort_sections_stable(globals_data)
@@ -432,7 +419,6 @@ def build_handbook_html(
         raise RuntimeError(msg)
     general_sections: list[dict[str, Any]] = []
-    summary_block: dict[str, Any] | None = None
     toc_sort_order = None
     toc_title = "Table of Contents"
@@ -448,14 +434,6 @@ def build_handbook_html(
             toc_title = str(g.get("section_title", "Table of Contents"))
             continue
-        if key == "summary_of_universities":
-            summary_block = {
-                "anchor": handbook_anchor("summary", "summary-of-universities", idx),
-                "data": g,
-                "sort_order": sort_order,
-            }
-            continue
         section_hits: list[str] = []
         _collect_program_option_inconsistencies(
             g.get("section_json", {}),
@@ -476,7 +454,12 @@ def build_handbook_html(
     # ── Build TOC items ──
     toc_items: list[dict[str, Any]] = []
     for gs in general_sections:
-        title = str(gs["data"].get("section_title", gs["data"].get("section_key", "Section")))
         toc_items.append({
             "title": title,
             "target": "#" + gs["anchor"],
@@ -485,16 +468,6 @@ def build_handbook_html(
             "sort": gs["sort_order"],
         })
-    if summary_block:
-        title = str(summary_block["data"].get("section_title", "Summary of Universities"))
-        toc_items.append({
-            "title": title,
-            "target": "#" + summary_block["anchor"],
-            "level": 0,
-            "bold": True,
-            "sort": summary_block["sort_order"],
-        })
     for u in active_universities:
         toc_items.append({
             "title": u["name"],
@@ -578,44 +551,24 @@ def build_handbook_html(
             "rendered_html": Markup(section_html),
         })
-    # ── Prepare summary block ──
-    summary_template = None
-    if summary_block:
-        data = summary_block["data"]
-        section_json = data.get("section_json", {})
-        if not isinstance(section_json, dict):
-            section_json = {}
-        # Typed blocks for summary
-        summary_blocks = normalize_section(
-            str(data.get("section_key", "")),
-            str(data.get("section_title", "")),
-            section_json,
-            universities=active_universities,
-            debug=debug,
-        )
-        summary_html = render_global_blocks(
-            str(data.get("section_key", "")),
-            str(data.get("section_title", "")),
-            section_json,
-            debug,
-            universities=active_universities,
-        )
-        summary_template = {
-            "anchor": summary_block["anchor"],
-            "data": data,
-            "blocks": summary_blocks,
-            "rendered_html": Markup(summary_html),
-        }
     # ── Prepare university data for templates (both old + new paths) ──
     university_template_data = []
     university_block_data = []
     for idx, uni_raw in enumerate(active_universities):
         uni_raw["_is_first"] = (idx == 0)
         uni_hits: list[str] = []
         _collect_program_option_inconsistencies(
             uni_raw.get("sections", []),
@@ -630,6 +583,11 @@ def build_handbook_html(
         uni_data = _prepare_university_data(
             uni_raw, allow_remote, include_inactive_programs, debug, stats,
         )
         university_template_data.append(uni_data)
         # New block path
         uni_block = normalize_university(
@@ -665,7 +623,7 @@ def build_handbook_html(
         toc_title=toc_title,
         toc_sort_order=toc_sort_order,
         general_sections=template_sections,
-        summary_block=summary_template,
         universities=university_template_data,
         university_blocks=university_block_data,
         bottom_pages=bottom_pages_urls,

     "program_features_breakdown": "sec-breakdown",
     "funding_options_available": "sec-funding",
     "summary_of_universities": "sec-summary",
+    "summary_of_universities_cosigner": "sec-summary-cosigner",
 }
 PAGE_BREAK_KEYS = {
     "program_features_breakdown",
     "funding_options_available",
     "summary_of_universities",
+    "summary_of_universities_cosigner",
 }
             if not link and isinstance(p.get("program_links"), dict):
                 link = str(p["program_links"].get("web_link", "")).strip()
             programs.append({
                 "name": program_name,
                 "link": link,
                 "designation": str(p.get("designation", "")),
                 "entrance": str(p.get("entrance_exam", p.get("entrance_examination", ""))),
             })
     # Extra sections
         # Fallback to remote URL when local file is unavailable
         label_image = "https://finsapdev.qhtestingserver.com/MODEL_APIS/handbook/images/label.jpeg"
+    # ── Prepare active universities (sorted: Tier One first, Tier Two second) ──
     active_universities: list[dict[str, Any]] = []
     for uid, uni in by_uni.items():
         if not isinstance(uni, dict):
             continue
         name = str(uni.get("university_name", f"University #{uid}"))
         anchor = handbook_anchor("uni", name, int(uid))
+        school_category = str(uni.get("school_category", "")).strip()
+        tier = uni.get("tier")
+        tier_label = str(uni.get("tier_label", "")).strip()
         active_universities.append({
             "id": int(uid),
             "anchor": anchor,
             "sections": uni.get("sections", []) if isinstance(uni.get("sections"), list) else [],
             "website": str(uni.get("website", "")),
             "sort_order": int(uni["sort_order"]) if uni.get("sort_order") is not None and str(uni.get("sort_order", "")).lstrip("-").isdigit() else None,
+            "school_category": school_category,
+            "tier": tier,
+            "tier_label": tier_label,
         })
+    # Stable tier ordering: Tier One (non_cosigner) → Tier Two (cosigner) → others, then alphabetical
+    def _tier_sort(u: dict) -> tuple:
+        t = u.get("tier")
+        rank = t if isinstance(t, int) else 99
+        return (rank, (u.get("name") or "").lower(), u.get("id", 0))
+    active_universities.sort(key=_tier_sort)
     # ── Normalise globals ──
     globals_data = sort_sections_stable(globals_data)
         raise RuntimeError(msg)
     general_sections: list[dict[str, Any]] = []
     toc_sort_order = None
     toc_title = "Table of Contents"
             toc_title = str(g.get("section_title", "Table of Contents"))
             continue
         section_hits: list[str] = []
         _collect_program_option_inconsistencies(
             g.get("section_json", {}),
     # ── Build TOC items ──
     toc_items: list[dict[str, Any]] = []
     for gs in general_sections:
+        # Prefer the JSON-level title (display-ready) over the DB section_title
+        gs_json = gs["data"].get("section_json", {})
+        if isinstance(gs_json, dict) and gs_json.get("title", "").strip():
+            title = gs_json["title"].strip()
+        else:
+            title = str(gs["data"].get("section_title", gs["data"].get("section_key", "Section")))
         toc_items.append({
             "title": title,
             "target": "#" + gs["anchor"],
             "sort": gs["sort_order"],
         })
     for u in active_universities:
         toc_items.append({
             "title": u["name"],
             "rendered_html": Markup(section_html),
         })
     # ── Prepare university data for templates (both old + new paths) ──
+    # Group by tier for tier heading insertion in the PDF output
     university_template_data = []
     university_block_data = []
+    # Track which tier label was last emitted so we can insert tier divider headings
+    _seen_tier_labels: set[str] = set()
     for idx, uni_raw in enumerate(active_universities):
         uni_raw["_is_first"] = (idx == 0)
+        # Insert tier group heading when tier changes
+        current_tier_label = str(uni_raw.get("tier_label", "")).strip()
+        if current_tier_label and current_tier_label not in _seen_tier_labels:
+            _seen_tier_labels.add(current_tier_label)
+            # Mark this university as starting a new tier group
+            uni_raw["_tier_group_start"] = True
+            uni_raw["_tier_group_label"] = f"{current_tier_label} Schools"
         uni_hits: list[str] = []
         _collect_program_option_inconsistencies(
             uni_raw.get("sections", []),
         uni_data = _prepare_university_data(
             uni_raw, allow_remote, include_inactive_programs, debug, stats,
         )
+        # Carry tier metadata to template data
+        uni_data["tier"] = uni_raw.get("tier")
+        uni_data["tier_label"] = uni_raw.get("tier_label", "")
+        uni_data["tier_group_start"] = uni_raw.get("_tier_group_start", False)
+        uni_data["tier_group_label"] = uni_raw.get("_tier_group_label", "")
         university_template_data.append(uni_data)
         # New block path
         uni_block = normalize_university(
         toc_title=toc_title,
         toc_sort_order=toc_sort_order,
         general_sections=template_sections,
+        summary_block=None,
         universities=university_template_data,
         university_blocks=university_block_data,
         bottom_pages=bottom_pages_urls,

app/services/normalizer.py CHANGED Viewed

@@ -69,15 +69,10 @@ def normalize_section(
     layout_norm = str(section_json.get("layout", "")).lower().strip()
-    # ── Summary of universities ──
-    if key_norm == "summary_of_universities":
-        blocks.extend(_normalize_university_summary(
-            section_title, section_json, layout_norm, universities or [],
-        ))
-        return blocks
     # ── Section heading ──
-    title = section_title.strip()
     if title and key_norm != "table_of_contents":
         blocks.append(RenderBlock(
             block_type="heading_1",
@@ -133,7 +128,7 @@ def normalize_section(
     # ── doc_v1 ──
     if layout_norm == "doc_v1" and isinstance(section_json.get("blocks"), list):
-        blocks.extend(_normalize_doc_v1(section_json["blocks"]))
         return blocks
     # ── Fallback ──
@@ -518,15 +513,28 @@ def _normalize_table_v2(json_data: dict) -> RenderBlock:
     )
-def _normalize_doc_v1(blocks: list) -> list[RenderBlock]:
-    """Normalise doc_v1 blocks into typed RenderBlocks."""
     from markupsafe import Markup
     result: list[RenderBlock] = []
     for b in blocks:
         if not isinstance(b, dict):
             continue
         btype = str(b.get("type", ""))
         if btype == "paragraph":
             t = _normalize_text_content(str(b.get("text", "")))
             if t.strip():
@@ -622,6 +630,35 @@ def _normalize_doc_v1(blocks: list) -> list[RenderBlock]:
                 data={"columns": [str(c) for c in t_cols], "rows": norm_rows, "variant": "standard"},
             ))
         elif btype in ("table_v3", "table_v4"):
             t_rows = b.get("rows", [])
             if not isinstance(t_rows, list):
@@ -652,189 +689,3 @@ def _normalize_doc_v1(blocks: list) -> list[RenderBlock]:
             ))
     return result
-_CLOSING_NOTE_MARKER = "we keep expanding and updating"
-def _is_closing_note(text: str) -> bool:
-    """Return True if text is the 'expanding/updating' closing paragraph."""
-    return _CLOSING_NOTE_MARKER in text.lower()
-def _normalize_university_summary(
-    section_title: str,
-    json_data: dict,
-    layout_norm: str,
-    universities: list[dict],
-) -> list[RenderBlock]:
-    """Normalise the summary_of_universities section.
-    Enforced structure:
-      1. Section heading
-      2. Introductory paragraphs (STEM / OPT etc.) – anything NOT the closing note
-      3. Numbered university list
-      4. Closing note paragraph ("We keep expanding and updating…")
-      5. Optional note field
-    """
-    pre_list_blocks: list[RenderBlock] = []   # intro content → before list
-    closing_blocks: list[RenderBlock] = []    # deferred �� after list
-    title = section_title.strip()
-    # ── Collect intro / closing paragraphs ──
-    intro = _normalize_text_content(str(json_data.get("intro", "")).strip())
-    if intro:
-        target = closing_blocks if _is_closing_note(intro) else pre_list_blocks
-        target.append(RenderBlock(
-            block_type="paragraph",
-            css_class="hb-paragraph",
-            data={"text": intro},
-        ))
-    if layout_norm == "doc_v1" and isinstance(json_data.get("blocks"), list):
-        for b in json_data["blocks"]:
-            if not isinstance(b, dict):
-                continue
-            btype = str(b.get("type", ""))
-            if btype not in ("paragraph", "subheading", "note"):
-                continue
-            t = _normalize_text_content(str(b.get("text", "")))
-            if not t.strip():
-                continue
-            if btype == "subheading":
-                pre_list_blocks.append(RenderBlock(
-                    block_type="heading_2", css_class="hb-heading-2", data={"text": t},
-                ))
-            elif btype == "note":
-                target = closing_blocks if _is_closing_note(t) else pre_list_blocks
-                target.append(RenderBlock(
-                    block_type="note", css_class="hb-note", data={"text": t},
-                ))
-            else:
-                target = closing_blocks if _is_closing_note(t) else pre_list_blocks
-                target.append(RenderBlock(
-                    block_type="paragraph", css_class="hb-paragraph", data={"text": t},
-                ))
-    # ── Resolve university list (tier-grouped) ──
-    resolved: list[str] = []
-    tier_one_names: list[str] = []
-    tier_two_names: list[str] = []
-    if universities:
-        def uni_sort_key(u):
-            so = u.get("sort_order") if isinstance(u, dict) else None
-            if so is not None:
-                try:
-                    return (0, float(so))
-                except (ValueError, TypeError):
-                    pass
-            return (1, 0.0)
-        sorted_unis = sorted(universities, key=uni_sort_key)
-        for u in sorted_unis:
-            if isinstance(u, dict):
-                name = str(u.get("university_name", u.get("name", ""))).strip()
-                if name:
-                    resolved.append(name)
-                    # Group by tier for sub-headings
-                    tier = u.get("tier")
-                    if tier == 1:
-                        tier_one_names.append(name)
-                    elif tier == 2:
-                        tier_two_names.append(name)
-                    else:
-                        tier_one_names.append(name)  # default to tier one
-    if not resolved and layout_norm == "doc_v1" and isinstance(json_data.get("blocks"), list):
-        for b in json_data["blocks"]:
-            if not isinstance(b, dict) or str(b.get("type", "")) != "bullets":
-                continue
-            items = b.get("items", [])
-            if isinstance(items, list):
-                for it in items:
-                    it_str = str(it).strip()
-                    if it_str:
-                        resolved.append(it_str)
-    # Dedupe
-    seen: set[str] = set()
-    deduped: list[str] = []
-    for nm in resolved:
-        k = nm.lower().strip()
-        if k and k not in seen:
-            seen.add(k)
-            deduped.append(nm)
-    # ── Assemble in enforced order ──
-    blocks: list[RenderBlock] = []
-    if title:
-        blocks.append(RenderBlock(
-            block_type="heading_1",
-            css_class="hb-heading-1",
-            data={"text": title},
-        ))
-    blocks.extend(pre_list_blocks)
-    # Render university summary grouped by tier when tier data is available
-    if tier_one_names or tier_two_names:
-        # Tier One sub-group
-        if tier_one_names:
-            blocks.append(RenderBlock(
-                block_type="heading_2",
-                css_class="hb-heading-2",
-                data={"text": "Tier One Schools"},
-            ))
-            seen_t1: set[str] = set()
-            deduped_t1 = []
-            for nm in tier_one_names:
-                k = nm.lower().strip()
-                if k and k not in seen_t1:
-                    seen_t1.add(k)
-                    deduped_t1.append(nm)
-            blocks.append(RenderBlock(
-                block_type="university_summary",
-                css_class="hb-university-summary",
-                data={"universities": deduped_t1},
-            ))
-        # Tier Two sub-group (no redundant "Summary of Universities" heading)
-        if tier_two_names:
-            blocks.append(RenderBlock(
-                block_type="heading_2",
-                css_class="hb-heading-2",
-                data={"text": "Tier Two Schools"},
-            ))
-            seen_t2: set[str] = set()
-            deduped_t2 = []
-            for nm in tier_two_names:
-                k = nm.lower().strip()
-                if k and k not in seen_t2:
-                    seen_t2.add(k)
-                    deduped_t2.append(nm)
-            blocks.append(RenderBlock(
-                block_type="university_summary",
-                css_class="hb-university-summary",
-                data={"universities": deduped_t2},
-            ))
-    elif deduped:
-        # Fallback: no tier data available, render flat list (backward compat)
-        blocks.append(RenderBlock(
-            block_type="university_summary",
-            css_class="hb-university-summary",
-            data={"universities": deduped},
-        ))
-    # Closing note always after the list
-    blocks.extend(closing_blocks)
-    note = str(json_data.get("note", "")).strip()
-    if note:
-        blocks.append(RenderBlock(
-            block_type="note",
-            css_class="hb-note",
-            data={"text": _normalize_text_content(note)},
-        ))
-    return blocks

     layout_norm = str(section_json.get("layout", "")).lower().strip()
     # ── Section heading ──
+    # Prefer the JSON-level title (display-ready) over the DB section_title
+    json_title = str(section_json.get("title", "")).strip() if isinstance(section_json, dict) else ""
+    title = json_title or section_title.strip()
     if title and key_norm != "table_of_contents":
         blocks.append(RenderBlock(
             block_type="heading_1",
     # ── doc_v1 ──
     if layout_norm == "doc_v1" and isinstance(section_json.get("blocks"), list):
+        blocks.extend(_normalize_doc_v1(section_json["blocks"], skip_title=title))
         return blocks
     # ── Fallback ──
     )
+def _normalize_doc_v1(blocks: list, *, skip_title: str = "") -> list[RenderBlock]:
+    """Normalise doc_v1 blocks into typed RenderBlocks.
+    Args:
+        skip_title: When set, any leading heading/subheading block whose text
+            matches this title (case-insensitive) is dropped to avoid
+            duplicating the section heading already emitted by the caller.
+    """
     from markupsafe import Markup
+    _skip_norm = skip_title.strip().lower() if skip_title else ""
     result: list[RenderBlock] = []
     for b in blocks:
         if not isinstance(b, dict):
             continue
         btype = str(b.get("type", ""))
+        # Skip heading/subheading blocks that duplicate the section title
+        if _skip_norm and btype in ("heading", "subheading"):
+            block_text = str(b.get("text", "")).strip().lower()
+            if block_text == _skip_norm:
+                continue
         if btype == "paragraph":
             t = _normalize_text_content(str(b.get("text", "")))
             if t.strip():
                 data={"columns": [str(c) for c in t_cols], "rows": norm_rows, "variant": "standard"},
             ))
+        elif btype == "table":
+            # Generic table (columns may be objects or strings, rows may be dicts or lists)
+            t_cols = b.get("columns", [])
+            t_rows = b.get("rows", [])
+            if not isinstance(t_cols, list):
+                t_cols = []
+            if not isinstance(t_rows, list):
+                t_rows = []
+            col_labels = []
+            col_keys = []
+            for c in t_cols:
+                if isinstance(c, dict):
+                    col_labels.append(str(c.get("label", c.get("key", ""))))
+                    col_keys.append(str(c.get("key", "")))
+                else:
+                    col_labels.append(str(c))
+                    col_keys.append(re.sub(r"[^a-z0-9]+", "_", str(c).lower()))
+            norm_rows = []
+            for r in t_rows:
+                if isinstance(r, dict):
+                    norm_rows.append([emphasize_keywords(_normalize_text_content(str(r.get(k, "")))) for k in col_keys])
+                elif isinstance(r, list):
+                    norm_rows.append([emphasize_keywords(_normalize_text_content(str(cell))) for cell in r])
+            result.append(RenderBlock(
+                block_type="table",
+                css_class="hb-table",
+                data={"columns": col_labels, "rows": norm_rows, "variant": "standard"},
+            ))
         elif btype in ("table_v3", "table_v4"):
             t_rows = b.get("rows", [])
             if not isinstance(t_rows, list):
             ))
     return result

app/services/renderers.py CHANGED Viewed

@@ -349,92 +349,13 @@ def render_global_blocks(
     layout_norm = str(json_data.get("layout", "")).lower().strip()
-    # ── Summary of universities ──
-    if key_norm == "summary_of_universities":
-        unis = universities or []
-        title = section_title.strip()
-        if title:
-            html_out += f'<h2 class="h2">{h(title)}</h2>'
-        intro = str(json_data.get("intro", "")).strip()
-        if intro:
-            html_out += f'<p class="p">{h(format_money_figures(intro))}</p>'
-        elif layout_norm == "doc_v1" and isinstance(json_data.get("blocks"), list):
-            for b in json_data["blocks"]:
-                if not isinstance(b, dict):
-                    continue
-                btype = str(b.get("type", ""))
-                if btype not in ("paragraph", "subheading", "note"):
-                    continue
-                t = format_money_figures(str(b.get("text", "")))
-                if not t.strip():
-                    continue
-                if btype == "subheading":
-                    html_out += f'<h3 class="h3">{h(t)}</h3>'
-                elif btype == "note":
-                    html_out += f'<div class="note">{h(t)}</div>'
-                else:
-                    html_out += f'<p class="p">{emphasize_keywords(t)}</p>'
-        # Resolve list from universities or doc_v1 bullets
-        resolved: list[str] = []
-        if unis:
-            def uni_sort_key(u):
-                so = u.get("sort_order") if isinstance(u, dict) else None
-                if so is not None:
-                    try:
-                        return (0, float(so))
-                    except (ValueError, TypeError):
-                        pass
-                return (1, 0.0)
-            sorted_unis = sorted(unis, key=uni_sort_key)
-            for u in sorted_unis:
-                if not isinstance(u, dict):
-                    continue
-                name = str(u.get("university_name", u.get("name", ""))).strip()
-                if name:
-                    resolved.append(name)
-        if not resolved and layout_norm == "doc_v1" and isinstance(json_data.get("blocks"), list):
-            for b in json_data["blocks"]:
-                if not isinstance(b, dict) or str(b.get("type", "")) != "bullets":
-                    continue
-                items = b.get("items", [])
-                if not isinstance(items, list):
-                    continue
-                for it in items:
-                    it_str = str(it).strip()
-                    if it_str:
-                        resolved.append(it_str)
-        # Dedupe
-        seen: set[str] = set()
-        deduped: list[str] = []
-        for nm in resolved:
-            k = nm.lower().strip()
-            if not k or k in seen:
-                continue
-            seen.add(k)
-            deduped.append(nm)
-        if deduped:
-            html_out += '<ol class="ol">'
-            for name in deduped:
-                anchor = "university_" + hb_slug(name)
-                html_out += f'<li><a href="#{h(anchor)}">{h(name)}</a></li>'
-            html_out += "</ol>"
-        note = str(json_data.get("note", "")).strip()
-        if note:
-            html_out += f'<div class="note">{h(format_money_figures(note))}</div>'
-        return html_out
     # ── Section title ──
-    title = section_title.strip()
     if title and key_norm != "table_of_contents":
         html_out += f'<h2 class="h2">{h(title)}</h2>'
     # ── Steps ──
     steps = json_data.get("steps")
@@ -600,6 +521,12 @@ def render_global_blocks(
                 continue
             btype = str(b.get("type", ""))
             if btype == "paragraph":
                 t = format_money_figures(str(b.get("text", "")))
                 if t.strip():
@@ -679,6 +606,42 @@ def render_global_blocks(
                     html_out += "</tr>"
                 html_out += "</tbody></table>"
             elif btype in ("table_v3", "table_v4"):
                 t_rows = b.get("rows", [])
                 h_rows = b.get("header_rows", [])

     layout_norm = str(json_data.get("layout", "")).lower().strip()
     # ── Section title ──
+    # Prefer the JSON-level title (display-ready) over the DB section_title
+    json_title = str(json_data.get("title", "")).strip() if isinstance(json_data, dict) else ""
+    title = json_title or section_title.strip()
     if title and key_norm != "table_of_contents":
         html_out += f'<h2 class="h2">{h(title)}</h2>'
+    _title_norm = title.lower()
     # ── Steps ──
     steps = json_data.get("steps")
                 continue
             btype = str(b.get("type", ""))
+            # Skip heading/subheading blocks that duplicate the section title
+            if btype in ("heading", "subheading"):
+                block_text = str(b.get("text", "")).strip().lower()
+                if block_text == _title_norm:
+                    continue
             if btype == "paragraph":
                 t = format_money_figures(str(b.get("text", "")))
                 if t.strip():
                     html_out += "</tr>"
                 html_out += "</tbody></table>"
+            elif btype == "table":
+                # Generic table (columns may be objects or strings, rows may be dicts or lists)
+                t_cols = b.get("columns", [])
+                t_rows = b.get("rows", [])
+                if not isinstance(t_cols, list):
+                    t_cols = []
+                if not isinstance(t_rows, list):
+                    t_rows = []
+                col_labels = []
+                col_keys = []
+                for c in t_cols:
+                    if isinstance(c, dict):
+                        col_labels.append(str(c.get("label", c.get("key", ""))))
+                        col_keys.append(str(c.get("key", "")))
+                    else:
+                        col_labels.append(str(c))
+                        col_keys.append(re.sub(r"[^a-z0-9]+", "_", str(c).lower()))
+                html_out += '<table class="tbl">'
+                if col_labels:
+                    html_out += "<thead><tr>"
+                    for lbl in col_labels:
+                        html_out += f"<th>{h(lbl)}</th>"
+                    html_out += "</tr></thead>"
+                html_out += "<tbody>"
+                for r in t_rows:
+                    html_out += "<tr>"
+                    if isinstance(r, dict):
+                        for k in col_keys:
+                            cell = r.get(k, "")
+                            html_out += f"<td>{h(format_money_figures(str(cell)))}</td>"
+                    elif isinstance(r, list):
+                        for cell in r:
+                            html_out += f"<td>{h(format_money_figures(str(cell)))}</td>"
+                    html_out += "</tr>"
+                html_out += "</tbody></table>"
             elif btype in ("table_v3", "table_v4"):
                 t_rows = b.get("rows", [])
                 h_rows = b.get("header_rows", [])

app/services/utils.py CHANGED Viewed

@@ -51,20 +51,52 @@ def format_money_figures(text: str) -> str:
     """Normalize all monetary figures to "USD X,XXX" format.
     - Converts existing $X,XXX → USD X,XXX
-    - Normalizes bare large numbers → USD X,XXX
     - Formats with commas
     - Currency type is always USD (no $ symbol)
     """
     if not text:
         return text
-    # Normalize "$X,XXX" → bare number (strip $ symbol)
-    text = re.sub(r'\$([\d,]+(?:\.\d+)?)', lambda m: m.group(1), text)
-    # Normalize "USD X,XXX" → bare number for uniform re-processing
-    text = re.sub(r'\bUSD\s+([\d,]+(?:\.\d+)?)', lambda m: m.group(1), text, flags=re.IGNORECASE)
-    def _format_match(m: re.Match) -> str:
         num_str = m.group(1).replace(",", "")
         dec = m.group(2) if m.group(2) else ""
         try:
@@ -77,10 +109,9 @@ def format_money_figures(text: str) -> str:
             formatted = f"{num:,.0f}"
         return "USD " + formatted
-    # Add "USD " to large numbers (4+ digits or already comma-formatted)
     text = re.sub(
-        r"(?<!\d)((?:\d{1,3}(?:,\d{3})+)|(?:\d{4,}))(?:\.(\d+))?(?![%\d/])",
-        _format_match,
         text,
     )

     """Normalize all monetary figures to "USD X,XXX" format.
     - Converts existing $X,XXX → USD X,XXX
+    - Normalizes bare large numbers (1,000+) → USD X,XXX
     - Formats with commas
     - Currency type is always USD (no $ symbol)
     """
     if not text:
         return text
+    # Step 1: Convert "$X" → "USD X" directly (preserves ALL dollar amounts)
+    def _dollar_to_usd(m: re.Match) -> str:
+        num_str = m.group(1).replace(",", "")
+        try:
+            num = float(num_str)
+        except ValueError:
+            return m.group(0)
+        if "." in m.group(1):
+            dec_part = m.group(1).split(".")[-1]
+            formatted = f"{num:,.{len(dec_part)}f}"
+        elif num == int(num):
+            formatted = f"{int(num):,}"
+        else:
+            formatted = f"{num:,.2f}"
+        return "USD " + formatted
+    text = re.sub(r'\$([\d,]+(?:\.\d+)?)', _dollar_to_usd, text)
+    # Step 2: Normalize existing "USD X,XXX" for consistent comma formatting
+    def _normalize_usd(m: re.Match) -> str:
+        num_str = m.group(1).replace(",", "")
+        try:
+            num = float(num_str)
+        except ValueError:
+            return m.group(0)
+        if "." in m.group(1):
+            dec_part = m.group(1).split(".")[-1]
+            formatted = f"{num:,.{len(dec_part)}f}"
+        elif num == int(num):
+            formatted = f"{int(num):,}"
+        else:
+            formatted = f"{num:,.2f}"
+        return "USD " + formatted
+    text = re.sub(r'\bUSD\s+([\d,]+(?:\.\d+)?)', _normalize_usd, text, flags=re.IGNORECASE)
+    # Step 3: Add "USD " to bare large numbers (4+ digits or comma-formatted)
+    # that aren't already preceded by "USD "
+    def _format_bare_large(m: re.Match) -> str:
         num_str = m.group(1).replace(",", "")
         dec = m.group(2) if m.group(2) else ""
         try:
             formatted = f"{num:,.0f}"
         return "USD " + formatted
     text = re.sub(
+        r"(?<!\d)(?<!USD )((?:\d{1,3}(?:,\d{3})+)|(?:\d{4,}))(?:\.(\d+))?(?![%\d/])",
+        _format_bare_large,
         text,
     )

app/templates/handbook.html CHANGED Viewed

@@ -95,7 +95,8 @@
         {% for uni in universities %}
         {% if uni.tier_group_start and uni.tier_group_label %}
         <div class="section-block page-break tier-group-heading" data-tier="{{ uni.tier_label | default('') | e }}">
-            <h1 class="h1 hb-heading-1" style="margin-top:0.5em;margin-bottom:0.3em;">{{ uni.tier_group_label | e }}</h1>
         </div>
         {% endif %}
         {% include "partials/university.html" %}

         {% for uni in universities %}
         {% if uni.tier_group_start and uni.tier_group_label %}
         <div class="section-block page-break tier-group-heading" data-tier="{{ uni.tier_label | default('') | e }}">
+            <h1 class="h1 hb-heading-1" style="margin-top:0.5em;margin-bottom:0.3em;">{{ uni.tier_group_label | e }}
+            </h1>
         </div>
         {% endif %}
         {% include "partials/university.html" %}

app/templates/partials/university.html CHANGED Viewed

@@ -83,9 +83,9 @@
     <table class="programs">
         <thead>
             <tr>
-                <th style="width:34%">Program</th>
-                <th style="width:33%">Designation</th>
-                <th style="width:33%">Entrance Examination</th>
             </tr>
         </thead>
         <tbody>

     <table class="programs">
         <thead>
             <tr>
+                <th>Program</th>
+                <th>Designation</th>
+                <th>Entrance Examination</th>
             </tr>
         </thead>
         <tbody>