Spaces:
Running on CPU Upgrade
Running on CPU Upgrade
Update services: normalizer, html_builder, renderers, utils - fix PDF output to match V5.0 docx
Browse files- app/services/html_builder.py +40 -82
- app/services/normalizer.py +48 -197
- app/services/renderers.py +46 -83
- app/services/utils.py +40 -9
- app/templates/handbook.html +2 -1
- app/templates/partials/university.html +3 -3
app/services/html_builder.py
CHANGED
|
@@ -78,6 +78,7 @@ SECTION_CLASS_MAP = {
|
|
| 78 |
"program_features_breakdown": "sec-breakdown",
|
| 79 |
"funding_options_available": "sec-funding",
|
| 80 |
"summary_of_universities": "sec-summary",
|
|
|
|
| 81 |
}
|
| 82 |
|
| 83 |
PAGE_BREAK_KEYS = {
|
|
@@ -91,6 +92,7 @@ PAGE_BREAK_KEYS = {
|
|
| 91 |
"program_features_breakdown",
|
| 92 |
"funding_options_available",
|
| 93 |
"summary_of_universities",
|
|
|
|
| 94 |
}
|
| 95 |
|
| 96 |
|
|
@@ -260,39 +262,11 @@ def _prepare_university_data(
|
|
| 260 |
if not link and isinstance(p.get("program_links"), dict):
|
| 261 |
link = str(p["program_links"].get("web_link", "")).strip()
|
| 262 |
|
| 263 |
-
# Build career HTML
|
| 264 |
-
career = p.get("career_pathways", [])
|
| 265 |
-
career_html = ""
|
| 266 |
-
if isinstance(career, list):
|
| 267 |
-
career_items = [str(x).strip() for x in career if str(x).strip()]
|
| 268 |
-
if career_items:
|
| 269 |
-
career_html = '<ul class="career-list">'
|
| 270 |
-
for ci in career_items:
|
| 271 |
-
career_html += f"<li>{h(ci)}</li>"
|
| 272 |
-
career_html += "</ul>"
|
| 273 |
-
else:
|
| 274 |
-
raw = str(career).strip()
|
| 275 |
-
if raw:
|
| 276 |
-
import re as _re
|
| 277 |
-
lines = [l.strip() for l in _re.split(r"[\r\n]+", raw) if l.strip()]
|
| 278 |
-
if len(lines) > 1:
|
| 279 |
-
career_html = '<ul class="career-list">'
|
| 280 |
-
for line in lines:
|
| 281 |
-
career_html += f"<li>{h(line)}</li>"
|
| 282 |
-
career_html += "</ul>"
|
| 283 |
-
else:
|
| 284 |
-
career_html = h(raw)
|
| 285 |
-
|
| 286 |
-
if not career_html:
|
| 287 |
-
career_html = " "
|
| 288 |
-
|
| 289 |
programs.append({
|
| 290 |
"name": program_name,
|
| 291 |
"link": link,
|
| 292 |
"designation": str(p.get("designation", "")),
|
| 293 |
"entrance": str(p.get("entrance_exam", p.get("entrance_examination", ""))),
|
| 294 |
-
"career_html": Markup(career_html),
|
| 295 |
-
"funding": str(p.get("funding_category", "")),
|
| 296 |
})
|
| 297 |
|
| 298 |
# Extra sections
|
|
@@ -398,7 +372,7 @@ def build_handbook_html(
|
|
| 398 |
# Fallback to remote URL when local file is unavailable
|
| 399 |
label_image = "https://finsapdev.qhtestingserver.com/MODEL_APIS/handbook/images/label.jpeg"
|
| 400 |
|
| 401 |
-
# ββ Prepare active universities ββ
|
| 402 |
active_universities: list[dict[str, Any]] = []
|
| 403 |
for uid, uni in by_uni.items():
|
| 404 |
if not isinstance(uni, dict):
|
|
@@ -407,6 +381,9 @@ def build_handbook_html(
|
|
| 407 |
continue
|
| 408 |
name = str(uni.get("university_name", f"University #{uid}"))
|
| 409 |
anchor = handbook_anchor("uni", name, int(uid))
|
|
|
|
|
|
|
|
|
|
| 410 |
active_universities.append({
|
| 411 |
"id": int(uid),
|
| 412 |
"anchor": anchor,
|
|
@@ -414,8 +391,18 @@ def build_handbook_html(
|
|
| 414 |
"sections": uni.get("sections", []) if isinstance(uni.get("sections"), list) else [],
|
| 415 |
"website": str(uni.get("website", "")),
|
| 416 |
"sort_order": int(uni["sort_order"]) if uni.get("sort_order") is not None and str(uni.get("sort_order", "")).lstrip("-").isdigit() else None,
|
|
|
|
|
|
|
|
|
|
| 417 |
})
|
| 418 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 419 |
# ββ Normalise globals ββ
|
| 420 |
globals_data = sort_sections_stable(globals_data)
|
| 421 |
|
|
@@ -432,7 +419,6 @@ def build_handbook_html(
|
|
| 432 |
raise RuntimeError(msg)
|
| 433 |
|
| 434 |
general_sections: list[dict[str, Any]] = []
|
| 435 |
-
summary_block: dict[str, Any] | None = None
|
| 436 |
toc_sort_order = None
|
| 437 |
toc_title = "Table of Contents"
|
| 438 |
|
|
@@ -448,14 +434,6 @@ def build_handbook_html(
|
|
| 448 |
toc_title = str(g.get("section_title", "Table of Contents"))
|
| 449 |
continue
|
| 450 |
|
| 451 |
-
if key == "summary_of_universities":
|
| 452 |
-
summary_block = {
|
| 453 |
-
"anchor": handbook_anchor("summary", "summary-of-universities", idx),
|
| 454 |
-
"data": g,
|
| 455 |
-
"sort_order": sort_order,
|
| 456 |
-
}
|
| 457 |
-
continue
|
| 458 |
-
|
| 459 |
section_hits: list[str] = []
|
| 460 |
_collect_program_option_inconsistencies(
|
| 461 |
g.get("section_json", {}),
|
|
@@ -476,7 +454,12 @@ def build_handbook_html(
|
|
| 476 |
# ββ Build TOC items ββ
|
| 477 |
toc_items: list[dict[str, Any]] = []
|
| 478 |
for gs in general_sections:
|
| 479 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 480 |
toc_items.append({
|
| 481 |
"title": title,
|
| 482 |
"target": "#" + gs["anchor"],
|
|
@@ -485,16 +468,6 @@ def build_handbook_html(
|
|
| 485 |
"sort": gs["sort_order"],
|
| 486 |
})
|
| 487 |
|
| 488 |
-
if summary_block:
|
| 489 |
-
title = str(summary_block["data"].get("section_title", "Summary of Universities"))
|
| 490 |
-
toc_items.append({
|
| 491 |
-
"title": title,
|
| 492 |
-
"target": "#" + summary_block["anchor"],
|
| 493 |
-
"level": 0,
|
| 494 |
-
"bold": True,
|
| 495 |
-
"sort": summary_block["sort_order"],
|
| 496 |
-
})
|
| 497 |
-
|
| 498 |
for u in active_universities:
|
| 499 |
toc_items.append({
|
| 500 |
"title": u["name"],
|
|
@@ -578,44 +551,24 @@ def build_handbook_html(
|
|
| 578 |
"rendered_html": Markup(section_html),
|
| 579 |
})
|
| 580 |
|
| 581 |
-
# ββ Prepare summary block ββ
|
| 582 |
-
summary_template = None
|
| 583 |
-
if summary_block:
|
| 584 |
-
data = summary_block["data"]
|
| 585 |
-
section_json = data.get("section_json", {})
|
| 586 |
-
if not isinstance(section_json, dict):
|
| 587 |
-
section_json = {}
|
| 588 |
-
|
| 589 |
-
# Typed blocks for summary
|
| 590 |
-
summary_blocks = normalize_section(
|
| 591 |
-
str(data.get("section_key", "")),
|
| 592 |
-
str(data.get("section_title", "")),
|
| 593 |
-
section_json,
|
| 594 |
-
universities=active_universities,
|
| 595 |
-
debug=debug,
|
| 596 |
-
)
|
| 597 |
-
|
| 598 |
-
summary_html = render_global_blocks(
|
| 599 |
-
str(data.get("section_key", "")),
|
| 600 |
-
str(data.get("section_title", "")),
|
| 601 |
-
section_json,
|
| 602 |
-
debug,
|
| 603 |
-
universities=active_universities,
|
| 604 |
-
)
|
| 605 |
-
|
| 606 |
-
summary_template = {
|
| 607 |
-
"anchor": summary_block["anchor"],
|
| 608 |
-
"data": data,
|
| 609 |
-
"blocks": summary_blocks,
|
| 610 |
-
"rendered_html": Markup(summary_html),
|
| 611 |
-
}
|
| 612 |
-
|
| 613 |
# ββ Prepare university data for templates (both old + new paths) ββ
|
|
|
|
| 614 |
university_template_data = []
|
| 615 |
university_block_data = []
|
|
|
|
|
|
|
|
|
|
| 616 |
for idx, uni_raw in enumerate(active_universities):
|
| 617 |
uni_raw["_is_first"] = (idx == 0)
|
| 618 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 619 |
uni_hits: list[str] = []
|
| 620 |
_collect_program_option_inconsistencies(
|
| 621 |
uni_raw.get("sections", []),
|
|
@@ -630,6 +583,11 @@ def build_handbook_html(
|
|
| 630 |
uni_data = _prepare_university_data(
|
| 631 |
uni_raw, allow_remote, include_inactive_programs, debug, stats,
|
| 632 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 633 |
university_template_data.append(uni_data)
|
| 634 |
# New block path
|
| 635 |
uni_block = normalize_university(
|
|
@@ -665,7 +623,7 @@ def build_handbook_html(
|
|
| 665 |
toc_title=toc_title,
|
| 666 |
toc_sort_order=toc_sort_order,
|
| 667 |
general_sections=template_sections,
|
| 668 |
-
summary_block=
|
| 669 |
universities=university_template_data,
|
| 670 |
university_blocks=university_block_data,
|
| 671 |
bottom_pages=bottom_pages_urls,
|
|
|
|
| 78 |
"program_features_breakdown": "sec-breakdown",
|
| 79 |
"funding_options_available": "sec-funding",
|
| 80 |
"summary_of_universities": "sec-summary",
|
| 81 |
+
"summary_of_universities_cosigner": "sec-summary-cosigner",
|
| 82 |
}
|
| 83 |
|
| 84 |
PAGE_BREAK_KEYS = {
|
|
|
|
| 92 |
"program_features_breakdown",
|
| 93 |
"funding_options_available",
|
| 94 |
"summary_of_universities",
|
| 95 |
+
"summary_of_universities_cosigner",
|
| 96 |
}
|
| 97 |
|
| 98 |
|
|
|
|
| 262 |
if not link and isinstance(p.get("program_links"), dict):
|
| 263 |
link = str(p["program_links"].get("web_link", "")).strip()
|
| 264 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 265 |
programs.append({
|
| 266 |
"name": program_name,
|
| 267 |
"link": link,
|
| 268 |
"designation": str(p.get("designation", "")),
|
| 269 |
"entrance": str(p.get("entrance_exam", p.get("entrance_examination", ""))),
|
|
|
|
|
|
|
| 270 |
})
|
| 271 |
|
| 272 |
# Extra sections
|
|
|
|
| 372 |
# Fallback to remote URL when local file is unavailable
|
| 373 |
label_image = "https://finsapdev.qhtestingserver.com/MODEL_APIS/handbook/images/label.jpeg"
|
| 374 |
|
| 375 |
+
# ββ Prepare active universities (sorted: Tier One first, Tier Two second) ββ
|
| 376 |
active_universities: list[dict[str, Any]] = []
|
| 377 |
for uid, uni in by_uni.items():
|
| 378 |
if not isinstance(uni, dict):
|
|
|
|
| 381 |
continue
|
| 382 |
name = str(uni.get("university_name", f"University #{uid}"))
|
| 383 |
anchor = handbook_anchor("uni", name, int(uid))
|
| 384 |
+
school_category = str(uni.get("school_category", "")).strip()
|
| 385 |
+
tier = uni.get("tier")
|
| 386 |
+
tier_label = str(uni.get("tier_label", "")).strip()
|
| 387 |
active_universities.append({
|
| 388 |
"id": int(uid),
|
| 389 |
"anchor": anchor,
|
|
|
|
| 391 |
"sections": uni.get("sections", []) if isinstance(uni.get("sections"), list) else [],
|
| 392 |
"website": str(uni.get("website", "")),
|
| 393 |
"sort_order": int(uni["sort_order"]) if uni.get("sort_order") is not None and str(uni.get("sort_order", "")).lstrip("-").isdigit() else None,
|
| 394 |
+
"school_category": school_category,
|
| 395 |
+
"tier": tier,
|
| 396 |
+
"tier_label": tier_label,
|
| 397 |
})
|
| 398 |
|
| 399 |
+
# Stable tier ordering: Tier One (non_cosigner) β Tier Two (cosigner) β others, then alphabetical
|
| 400 |
+
def _tier_sort(u: dict) -> tuple:
|
| 401 |
+
t = u.get("tier")
|
| 402 |
+
rank = t if isinstance(t, int) else 99
|
| 403 |
+
return (rank, (u.get("name") or "").lower(), u.get("id", 0))
|
| 404 |
+
active_universities.sort(key=_tier_sort)
|
| 405 |
+
|
| 406 |
# ββ Normalise globals ββ
|
| 407 |
globals_data = sort_sections_stable(globals_data)
|
| 408 |
|
|
|
|
| 419 |
raise RuntimeError(msg)
|
| 420 |
|
| 421 |
general_sections: list[dict[str, Any]] = []
|
|
|
|
| 422 |
toc_sort_order = None
|
| 423 |
toc_title = "Table of Contents"
|
| 424 |
|
|
|
|
| 434 |
toc_title = str(g.get("section_title", "Table of Contents"))
|
| 435 |
continue
|
| 436 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 437 |
section_hits: list[str] = []
|
| 438 |
_collect_program_option_inconsistencies(
|
| 439 |
g.get("section_json", {}),
|
|
|
|
| 454 |
# ββ Build TOC items ββ
|
| 455 |
toc_items: list[dict[str, Any]] = []
|
| 456 |
for gs in general_sections:
|
| 457 |
+
# Prefer the JSON-level title (display-ready) over the DB section_title
|
| 458 |
+
gs_json = gs["data"].get("section_json", {})
|
| 459 |
+
if isinstance(gs_json, dict) and gs_json.get("title", "").strip():
|
| 460 |
+
title = gs_json["title"].strip()
|
| 461 |
+
else:
|
| 462 |
+
title = str(gs["data"].get("section_title", gs["data"].get("section_key", "Section")))
|
| 463 |
toc_items.append({
|
| 464 |
"title": title,
|
| 465 |
"target": "#" + gs["anchor"],
|
|
|
|
| 468 |
"sort": gs["sort_order"],
|
| 469 |
})
|
| 470 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 471 |
for u in active_universities:
|
| 472 |
toc_items.append({
|
| 473 |
"title": u["name"],
|
|
|
|
| 551 |
"rendered_html": Markup(section_html),
|
| 552 |
})
|
| 553 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 554 |
# ββ Prepare university data for templates (both old + new paths) ββ
|
| 555 |
+
# Group by tier for tier heading insertion in the PDF output
|
| 556 |
university_template_data = []
|
| 557 |
university_block_data = []
|
| 558 |
+
# Track which tier label was last emitted so we can insert tier divider headings
|
| 559 |
+
_seen_tier_labels: set[str] = set()
|
| 560 |
+
|
| 561 |
for idx, uni_raw in enumerate(active_universities):
|
| 562 |
uni_raw["_is_first"] = (idx == 0)
|
| 563 |
|
| 564 |
+
# Insert tier group heading when tier changes
|
| 565 |
+
current_tier_label = str(uni_raw.get("tier_label", "")).strip()
|
| 566 |
+
if current_tier_label and current_tier_label not in _seen_tier_labels:
|
| 567 |
+
_seen_tier_labels.add(current_tier_label)
|
| 568 |
+
# Mark this university as starting a new tier group
|
| 569 |
+
uni_raw["_tier_group_start"] = True
|
| 570 |
+
uni_raw["_tier_group_label"] = f"{current_tier_label} Schools"
|
| 571 |
+
|
| 572 |
uni_hits: list[str] = []
|
| 573 |
_collect_program_option_inconsistencies(
|
| 574 |
uni_raw.get("sections", []),
|
|
|
|
| 583 |
uni_data = _prepare_university_data(
|
| 584 |
uni_raw, allow_remote, include_inactive_programs, debug, stats,
|
| 585 |
)
|
| 586 |
+
# Carry tier metadata to template data
|
| 587 |
+
uni_data["tier"] = uni_raw.get("tier")
|
| 588 |
+
uni_data["tier_label"] = uni_raw.get("tier_label", "")
|
| 589 |
+
uni_data["tier_group_start"] = uni_raw.get("_tier_group_start", False)
|
| 590 |
+
uni_data["tier_group_label"] = uni_raw.get("_tier_group_label", "")
|
| 591 |
university_template_data.append(uni_data)
|
| 592 |
# New block path
|
| 593 |
uni_block = normalize_university(
|
|
|
|
| 623 |
toc_title=toc_title,
|
| 624 |
toc_sort_order=toc_sort_order,
|
| 625 |
general_sections=template_sections,
|
| 626 |
+
summary_block=None,
|
| 627 |
universities=university_template_data,
|
| 628 |
university_blocks=university_block_data,
|
| 629 |
bottom_pages=bottom_pages_urls,
|
app/services/normalizer.py
CHANGED
|
@@ -69,15 +69,10 @@ def normalize_section(
|
|
| 69 |
|
| 70 |
layout_norm = str(section_json.get("layout", "")).lower().strip()
|
| 71 |
|
| 72 |
-
# ββ Summary of universities ββ
|
| 73 |
-
if key_norm == "summary_of_universities":
|
| 74 |
-
blocks.extend(_normalize_university_summary(
|
| 75 |
-
section_title, section_json, layout_norm, universities or [],
|
| 76 |
-
))
|
| 77 |
-
return blocks
|
| 78 |
-
|
| 79 |
# ββ Section heading ββ
|
| 80 |
-
|
|
|
|
|
|
|
| 81 |
if title and key_norm != "table_of_contents":
|
| 82 |
blocks.append(RenderBlock(
|
| 83 |
block_type="heading_1",
|
|
@@ -133,7 +128,7 @@ def normalize_section(
|
|
| 133 |
|
| 134 |
# ββ doc_v1 ββ
|
| 135 |
if layout_norm == "doc_v1" and isinstance(section_json.get("blocks"), list):
|
| 136 |
-
blocks.extend(_normalize_doc_v1(section_json["blocks"]))
|
| 137 |
return blocks
|
| 138 |
|
| 139 |
# ββ Fallback ββ
|
|
@@ -518,15 +513,28 @@ def _normalize_table_v2(json_data: dict) -> RenderBlock:
|
|
| 518 |
)
|
| 519 |
|
| 520 |
|
| 521 |
-
def _normalize_doc_v1(blocks: list) -> list[RenderBlock]:
|
| 522 |
-
"""Normalise doc_v1 blocks into typed RenderBlocks.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 523 |
from markupsafe import Markup
|
|
|
|
| 524 |
result: list[RenderBlock] = []
|
| 525 |
for b in blocks:
|
| 526 |
if not isinstance(b, dict):
|
| 527 |
continue
|
| 528 |
btype = str(b.get("type", ""))
|
| 529 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 530 |
if btype == "paragraph":
|
| 531 |
t = _normalize_text_content(str(b.get("text", "")))
|
| 532 |
if t.strip():
|
|
@@ -622,6 +630,35 @@ def _normalize_doc_v1(blocks: list) -> list[RenderBlock]:
|
|
| 622 |
data={"columns": [str(c) for c in t_cols], "rows": norm_rows, "variant": "standard"},
|
| 623 |
))
|
| 624 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 625 |
elif btype in ("table_v3", "table_v4"):
|
| 626 |
t_rows = b.get("rows", [])
|
| 627 |
if not isinstance(t_rows, list):
|
|
@@ -652,189 +689,3 @@ def _normalize_doc_v1(blocks: list) -> list[RenderBlock]:
|
|
| 652 |
))
|
| 653 |
|
| 654 |
return result
|
| 655 |
-
|
| 656 |
-
|
| 657 |
-
_CLOSING_NOTE_MARKER = "we keep expanding and updating"
|
| 658 |
-
|
| 659 |
-
|
| 660 |
-
def _is_closing_note(text: str) -> bool:
|
| 661 |
-
"""Return True if text is the 'expanding/updating' closing paragraph."""
|
| 662 |
-
return _CLOSING_NOTE_MARKER in text.lower()
|
| 663 |
-
|
| 664 |
-
|
| 665 |
-
def _normalize_university_summary(
|
| 666 |
-
section_title: str,
|
| 667 |
-
json_data: dict,
|
| 668 |
-
layout_norm: str,
|
| 669 |
-
universities: list[dict],
|
| 670 |
-
) -> list[RenderBlock]:
|
| 671 |
-
"""Normalise the summary_of_universities section.
|
| 672 |
-
|
| 673 |
-
Enforced structure:
|
| 674 |
-
1. Section heading
|
| 675 |
-
2. Introductory paragraphs (STEM / OPT etc.) β anything NOT the closing note
|
| 676 |
-
3. Numbered university list
|
| 677 |
-
4. Closing note paragraph ("We keep expanding and updatingβ¦")
|
| 678 |
-
5. Optional note field
|
| 679 |
-
"""
|
| 680 |
-
pre_list_blocks: list[RenderBlock] = [] # intro content β before list
|
| 681 |
-
closing_blocks: list[RenderBlock] = [] # deferred οΏ½οΏ½ after list
|
| 682 |
-
|
| 683 |
-
title = section_title.strip()
|
| 684 |
-
|
| 685 |
-
# ββ Collect intro / closing paragraphs ββ
|
| 686 |
-
intro = _normalize_text_content(str(json_data.get("intro", "")).strip())
|
| 687 |
-
if intro:
|
| 688 |
-
target = closing_blocks if _is_closing_note(intro) else pre_list_blocks
|
| 689 |
-
target.append(RenderBlock(
|
| 690 |
-
block_type="paragraph",
|
| 691 |
-
css_class="hb-paragraph",
|
| 692 |
-
data={"text": intro},
|
| 693 |
-
))
|
| 694 |
-
|
| 695 |
-
if layout_norm == "doc_v1" and isinstance(json_data.get("blocks"), list):
|
| 696 |
-
for b in json_data["blocks"]:
|
| 697 |
-
if not isinstance(b, dict):
|
| 698 |
-
continue
|
| 699 |
-
btype = str(b.get("type", ""))
|
| 700 |
-
if btype not in ("paragraph", "subheading", "note"):
|
| 701 |
-
continue
|
| 702 |
-
t = _normalize_text_content(str(b.get("text", "")))
|
| 703 |
-
if not t.strip():
|
| 704 |
-
continue
|
| 705 |
-
if btype == "subheading":
|
| 706 |
-
pre_list_blocks.append(RenderBlock(
|
| 707 |
-
block_type="heading_2", css_class="hb-heading-2", data={"text": t},
|
| 708 |
-
))
|
| 709 |
-
elif btype == "note":
|
| 710 |
-
target = closing_blocks if _is_closing_note(t) else pre_list_blocks
|
| 711 |
-
target.append(RenderBlock(
|
| 712 |
-
block_type="note", css_class="hb-note", data={"text": t},
|
| 713 |
-
))
|
| 714 |
-
else:
|
| 715 |
-
target = closing_blocks if _is_closing_note(t) else pre_list_blocks
|
| 716 |
-
target.append(RenderBlock(
|
| 717 |
-
block_type="paragraph", css_class="hb-paragraph", data={"text": t},
|
| 718 |
-
))
|
| 719 |
-
|
| 720 |
-
# ββ Resolve university list (tier-grouped) ββ
|
| 721 |
-
resolved: list[str] = []
|
| 722 |
-
tier_one_names: list[str] = []
|
| 723 |
-
tier_two_names: list[str] = []
|
| 724 |
-
if universities:
|
| 725 |
-
def uni_sort_key(u):
|
| 726 |
-
so = u.get("sort_order") if isinstance(u, dict) else None
|
| 727 |
-
if so is not None:
|
| 728 |
-
try:
|
| 729 |
-
return (0, float(so))
|
| 730 |
-
except (ValueError, TypeError):
|
| 731 |
-
pass
|
| 732 |
-
return (1, 0.0)
|
| 733 |
-
|
| 734 |
-
sorted_unis = sorted(universities, key=uni_sort_key)
|
| 735 |
-
for u in sorted_unis:
|
| 736 |
-
if isinstance(u, dict):
|
| 737 |
-
name = str(u.get("university_name", u.get("name", ""))).strip()
|
| 738 |
-
if name:
|
| 739 |
-
resolved.append(name)
|
| 740 |
-
# Group by tier for sub-headings
|
| 741 |
-
tier = u.get("tier")
|
| 742 |
-
if tier == 1:
|
| 743 |
-
tier_one_names.append(name)
|
| 744 |
-
elif tier == 2:
|
| 745 |
-
tier_two_names.append(name)
|
| 746 |
-
else:
|
| 747 |
-
tier_one_names.append(name) # default to tier one
|
| 748 |
-
|
| 749 |
-
if not resolved and layout_norm == "doc_v1" and isinstance(json_data.get("blocks"), list):
|
| 750 |
-
for b in json_data["blocks"]:
|
| 751 |
-
if not isinstance(b, dict) or str(b.get("type", "")) != "bullets":
|
| 752 |
-
continue
|
| 753 |
-
items = b.get("items", [])
|
| 754 |
-
if isinstance(items, list):
|
| 755 |
-
for it in items:
|
| 756 |
-
it_str = str(it).strip()
|
| 757 |
-
if it_str:
|
| 758 |
-
resolved.append(it_str)
|
| 759 |
-
|
| 760 |
-
# Dedupe
|
| 761 |
-
seen: set[str] = set()
|
| 762 |
-
deduped: list[str] = []
|
| 763 |
-
for nm in resolved:
|
| 764 |
-
k = nm.lower().strip()
|
| 765 |
-
if k and k not in seen:
|
| 766 |
-
seen.add(k)
|
| 767 |
-
deduped.append(nm)
|
| 768 |
-
|
| 769 |
-
# ββ Assemble in enforced order ββ
|
| 770 |
-
blocks: list[RenderBlock] = []
|
| 771 |
-
|
| 772 |
-
if title:
|
| 773 |
-
blocks.append(RenderBlock(
|
| 774 |
-
block_type="heading_1",
|
| 775 |
-
css_class="hb-heading-1",
|
| 776 |
-
data={"text": title},
|
| 777 |
-
))
|
| 778 |
-
|
| 779 |
-
blocks.extend(pre_list_blocks)
|
| 780 |
-
|
| 781 |
-
# Render university summary grouped by tier when tier data is available
|
| 782 |
-
if tier_one_names or tier_two_names:
|
| 783 |
-
# Tier One sub-group
|
| 784 |
-
if tier_one_names:
|
| 785 |
-
blocks.append(RenderBlock(
|
| 786 |
-
block_type="heading_2",
|
| 787 |
-
css_class="hb-heading-2",
|
| 788 |
-
data={"text": "Tier One Schools"},
|
| 789 |
-
))
|
| 790 |
-
seen_t1: set[str] = set()
|
| 791 |
-
deduped_t1 = []
|
| 792 |
-
for nm in tier_one_names:
|
| 793 |
-
k = nm.lower().strip()
|
| 794 |
-
if k and k not in seen_t1:
|
| 795 |
-
seen_t1.add(k)
|
| 796 |
-
deduped_t1.append(nm)
|
| 797 |
-
blocks.append(RenderBlock(
|
| 798 |
-
block_type="university_summary",
|
| 799 |
-
css_class="hb-university-summary",
|
| 800 |
-
data={"universities": deduped_t1},
|
| 801 |
-
))
|
| 802 |
-
# Tier Two sub-group (no redundant "Summary of Universities" heading)
|
| 803 |
-
if tier_two_names:
|
| 804 |
-
blocks.append(RenderBlock(
|
| 805 |
-
block_type="heading_2",
|
| 806 |
-
css_class="hb-heading-2",
|
| 807 |
-
data={"text": "Tier Two Schools"},
|
| 808 |
-
))
|
| 809 |
-
seen_t2: set[str] = set()
|
| 810 |
-
deduped_t2 = []
|
| 811 |
-
for nm in tier_two_names:
|
| 812 |
-
k = nm.lower().strip()
|
| 813 |
-
if k and k not in seen_t2:
|
| 814 |
-
seen_t2.add(k)
|
| 815 |
-
deduped_t2.append(nm)
|
| 816 |
-
blocks.append(RenderBlock(
|
| 817 |
-
block_type="university_summary",
|
| 818 |
-
css_class="hb-university-summary",
|
| 819 |
-
data={"universities": deduped_t2},
|
| 820 |
-
))
|
| 821 |
-
elif deduped:
|
| 822 |
-
# Fallback: no tier data available, render flat list (backward compat)
|
| 823 |
-
blocks.append(RenderBlock(
|
| 824 |
-
block_type="university_summary",
|
| 825 |
-
css_class="hb-university-summary",
|
| 826 |
-
data={"universities": deduped},
|
| 827 |
-
))
|
| 828 |
-
|
| 829 |
-
# Closing note always after the list
|
| 830 |
-
blocks.extend(closing_blocks)
|
| 831 |
-
|
| 832 |
-
note = str(json_data.get("note", "")).strip()
|
| 833 |
-
if note:
|
| 834 |
-
blocks.append(RenderBlock(
|
| 835 |
-
block_type="note",
|
| 836 |
-
css_class="hb-note",
|
| 837 |
-
data={"text": _normalize_text_content(note)},
|
| 838 |
-
))
|
| 839 |
-
|
| 840 |
-
return blocks
|
|
|
|
| 69 |
|
| 70 |
layout_norm = str(section_json.get("layout", "")).lower().strip()
|
| 71 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
# ββ Section heading ββ
|
| 73 |
+
# Prefer the JSON-level title (display-ready) over the DB section_title
|
| 74 |
+
json_title = str(section_json.get("title", "")).strip() if isinstance(section_json, dict) else ""
|
| 75 |
+
title = json_title or section_title.strip()
|
| 76 |
if title and key_norm != "table_of_contents":
|
| 77 |
blocks.append(RenderBlock(
|
| 78 |
block_type="heading_1",
|
|
|
|
| 128 |
|
| 129 |
# ββ doc_v1 ββ
|
| 130 |
if layout_norm == "doc_v1" and isinstance(section_json.get("blocks"), list):
|
| 131 |
+
blocks.extend(_normalize_doc_v1(section_json["blocks"], skip_title=title))
|
| 132 |
return blocks
|
| 133 |
|
| 134 |
# ββ Fallback ββ
|
|
|
|
| 513 |
)
|
| 514 |
|
| 515 |
|
| 516 |
+
def _normalize_doc_v1(blocks: list, *, skip_title: str = "") -> list[RenderBlock]:
|
| 517 |
+
"""Normalise doc_v1 blocks into typed RenderBlocks.
|
| 518 |
+
|
| 519 |
+
Args:
|
| 520 |
+
skip_title: When set, any leading heading/subheading block whose text
|
| 521 |
+
matches this title (case-insensitive) is dropped to avoid
|
| 522 |
+
duplicating the section heading already emitted by the caller.
|
| 523 |
+
"""
|
| 524 |
from markupsafe import Markup
|
| 525 |
+
_skip_norm = skip_title.strip().lower() if skip_title else ""
|
| 526 |
result: list[RenderBlock] = []
|
| 527 |
for b in blocks:
|
| 528 |
if not isinstance(b, dict):
|
| 529 |
continue
|
| 530 |
btype = str(b.get("type", ""))
|
| 531 |
|
| 532 |
+
# Skip heading/subheading blocks that duplicate the section title
|
| 533 |
+
if _skip_norm and btype in ("heading", "subheading"):
|
| 534 |
+
block_text = str(b.get("text", "")).strip().lower()
|
| 535 |
+
if block_text == _skip_norm:
|
| 536 |
+
continue
|
| 537 |
+
|
| 538 |
if btype == "paragraph":
|
| 539 |
t = _normalize_text_content(str(b.get("text", "")))
|
| 540 |
if t.strip():
|
|
|
|
| 630 |
data={"columns": [str(c) for c in t_cols], "rows": norm_rows, "variant": "standard"},
|
| 631 |
))
|
| 632 |
|
| 633 |
+
elif btype == "table":
|
| 634 |
+
# Generic table (columns may be objects or strings, rows may be dicts or lists)
|
| 635 |
+
t_cols = b.get("columns", [])
|
| 636 |
+
t_rows = b.get("rows", [])
|
| 637 |
+
if not isinstance(t_cols, list):
|
| 638 |
+
t_cols = []
|
| 639 |
+
if not isinstance(t_rows, list):
|
| 640 |
+
t_rows = []
|
| 641 |
+
col_labels = []
|
| 642 |
+
col_keys = []
|
| 643 |
+
for c in t_cols:
|
| 644 |
+
if isinstance(c, dict):
|
| 645 |
+
col_labels.append(str(c.get("label", c.get("key", ""))))
|
| 646 |
+
col_keys.append(str(c.get("key", "")))
|
| 647 |
+
else:
|
| 648 |
+
col_labels.append(str(c))
|
| 649 |
+
col_keys.append(re.sub(r"[^a-z0-9]+", "_", str(c).lower()))
|
| 650 |
+
norm_rows = []
|
| 651 |
+
for r in t_rows:
|
| 652 |
+
if isinstance(r, dict):
|
| 653 |
+
norm_rows.append([emphasize_keywords(_normalize_text_content(str(r.get(k, "")))) for k in col_keys])
|
| 654 |
+
elif isinstance(r, list):
|
| 655 |
+
norm_rows.append([emphasize_keywords(_normalize_text_content(str(cell))) for cell in r])
|
| 656 |
+
result.append(RenderBlock(
|
| 657 |
+
block_type="table",
|
| 658 |
+
css_class="hb-table",
|
| 659 |
+
data={"columns": col_labels, "rows": norm_rows, "variant": "standard"},
|
| 660 |
+
))
|
| 661 |
+
|
| 662 |
elif btype in ("table_v3", "table_v4"):
|
| 663 |
t_rows = b.get("rows", [])
|
| 664 |
if not isinstance(t_rows, list):
|
|
|
|
| 689 |
))
|
| 690 |
|
| 691 |
return result
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app/services/renderers.py
CHANGED
|
@@ -349,92 +349,13 @@ def render_global_blocks(
|
|
| 349 |
|
| 350 |
layout_norm = str(json_data.get("layout", "")).lower().strip()
|
| 351 |
|
| 352 |
-
# ββ Summary of universities ββ
|
| 353 |
-
if key_norm == "summary_of_universities":
|
| 354 |
-
unis = universities or []
|
| 355 |
-
title = section_title.strip()
|
| 356 |
-
if title:
|
| 357 |
-
html_out += f'<h2 class="h2">{h(title)}</h2>'
|
| 358 |
-
|
| 359 |
-
intro = str(json_data.get("intro", "")).strip()
|
| 360 |
-
if intro:
|
| 361 |
-
html_out += f'<p class="p">{h(format_money_figures(intro))}</p>'
|
| 362 |
-
elif layout_norm == "doc_v1" and isinstance(json_data.get("blocks"), list):
|
| 363 |
-
for b in json_data["blocks"]:
|
| 364 |
-
if not isinstance(b, dict):
|
| 365 |
-
continue
|
| 366 |
-
btype = str(b.get("type", ""))
|
| 367 |
-
if btype not in ("paragraph", "subheading", "note"):
|
| 368 |
-
continue
|
| 369 |
-
t = format_money_figures(str(b.get("text", "")))
|
| 370 |
-
if not t.strip():
|
| 371 |
-
continue
|
| 372 |
-
if btype == "subheading":
|
| 373 |
-
html_out += f'<h3 class="h3">{h(t)}</h3>'
|
| 374 |
-
elif btype == "note":
|
| 375 |
-
html_out += f'<div class="note">{h(t)}</div>'
|
| 376 |
-
else:
|
| 377 |
-
html_out += f'<p class="p">{emphasize_keywords(t)}</p>'
|
| 378 |
-
|
| 379 |
-
# Resolve list from universities or doc_v1 bullets
|
| 380 |
-
resolved: list[str] = []
|
| 381 |
-
if unis:
|
| 382 |
-
def uni_sort_key(u):
|
| 383 |
-
so = u.get("sort_order") if isinstance(u, dict) else None
|
| 384 |
-
if so is not None:
|
| 385 |
-
try:
|
| 386 |
-
return (0, float(so))
|
| 387 |
-
except (ValueError, TypeError):
|
| 388 |
-
pass
|
| 389 |
-
return (1, 0.0)
|
| 390 |
-
|
| 391 |
-
sorted_unis = sorted(unis, key=uni_sort_key)
|
| 392 |
-
for u in sorted_unis:
|
| 393 |
-
if not isinstance(u, dict):
|
| 394 |
-
continue
|
| 395 |
-
name = str(u.get("university_name", u.get("name", ""))).strip()
|
| 396 |
-
if name:
|
| 397 |
-
resolved.append(name)
|
| 398 |
-
|
| 399 |
-
if not resolved and layout_norm == "doc_v1" and isinstance(json_data.get("blocks"), list):
|
| 400 |
-
for b in json_data["blocks"]:
|
| 401 |
-
if not isinstance(b, dict) or str(b.get("type", "")) != "bullets":
|
| 402 |
-
continue
|
| 403 |
-
items = b.get("items", [])
|
| 404 |
-
if not isinstance(items, list):
|
| 405 |
-
continue
|
| 406 |
-
for it in items:
|
| 407 |
-
it_str = str(it).strip()
|
| 408 |
-
if it_str:
|
| 409 |
-
resolved.append(it_str)
|
| 410 |
-
|
| 411 |
-
# Dedupe
|
| 412 |
-
seen: set[str] = set()
|
| 413 |
-
deduped: list[str] = []
|
| 414 |
-
for nm in resolved:
|
| 415 |
-
k = nm.lower().strip()
|
| 416 |
-
if not k or k in seen:
|
| 417 |
-
continue
|
| 418 |
-
seen.add(k)
|
| 419 |
-
deduped.append(nm)
|
| 420 |
-
|
| 421 |
-
if deduped:
|
| 422 |
-
html_out += '<ol class="ol">'
|
| 423 |
-
for name in deduped:
|
| 424 |
-
anchor = "university_" + hb_slug(name)
|
| 425 |
-
html_out += f'<li><a href="#{h(anchor)}">{h(name)}</a></li>'
|
| 426 |
-
html_out += "</ol>"
|
| 427 |
-
|
| 428 |
-
note = str(json_data.get("note", "")).strip()
|
| 429 |
-
if note:
|
| 430 |
-
html_out += f'<div class="note">{h(format_money_figures(note))}</div>'
|
| 431 |
-
|
| 432 |
-
return html_out
|
| 433 |
-
|
| 434 |
# ββ Section title ββ
|
| 435 |
-
|
|
|
|
|
|
|
| 436 |
if title and key_norm != "table_of_contents":
|
| 437 |
html_out += f'<h2 class="h2">{h(title)}</h2>'
|
|
|
|
| 438 |
|
| 439 |
# ββ Steps ββ
|
| 440 |
steps = json_data.get("steps")
|
|
@@ -600,6 +521,12 @@ def render_global_blocks(
|
|
| 600 |
continue
|
| 601 |
btype = str(b.get("type", ""))
|
| 602 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 603 |
if btype == "paragraph":
|
| 604 |
t = format_money_figures(str(b.get("text", "")))
|
| 605 |
if t.strip():
|
|
@@ -679,6 +606,42 @@ def render_global_blocks(
|
|
| 679 |
html_out += "</tr>"
|
| 680 |
html_out += "</tbody></table>"
|
| 681 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 682 |
elif btype in ("table_v3", "table_v4"):
|
| 683 |
t_rows = b.get("rows", [])
|
| 684 |
h_rows = b.get("header_rows", [])
|
|
|
|
| 349 |
|
| 350 |
layout_norm = str(json_data.get("layout", "")).lower().strip()
|
| 351 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 352 |
# ββ Section title ββ
|
| 353 |
+
# Prefer the JSON-level title (display-ready) over the DB section_title
|
| 354 |
+
json_title = str(json_data.get("title", "")).strip() if isinstance(json_data, dict) else ""
|
| 355 |
+
title = json_title or section_title.strip()
|
| 356 |
if title and key_norm != "table_of_contents":
|
| 357 |
html_out += f'<h2 class="h2">{h(title)}</h2>'
|
| 358 |
+
_title_norm = title.lower()
|
| 359 |
|
| 360 |
# ββ Steps ββ
|
| 361 |
steps = json_data.get("steps")
|
|
|
|
| 521 |
continue
|
| 522 |
btype = str(b.get("type", ""))
|
| 523 |
|
| 524 |
+
# Skip heading/subheading blocks that duplicate the section title
|
| 525 |
+
if btype in ("heading", "subheading"):
|
| 526 |
+
block_text = str(b.get("text", "")).strip().lower()
|
| 527 |
+
if block_text == _title_norm:
|
| 528 |
+
continue
|
| 529 |
+
|
| 530 |
if btype == "paragraph":
|
| 531 |
t = format_money_figures(str(b.get("text", "")))
|
| 532 |
if t.strip():
|
|
|
|
| 606 |
html_out += "</tr>"
|
| 607 |
html_out += "</tbody></table>"
|
| 608 |
|
| 609 |
+
elif btype == "table":
|
| 610 |
+
# Generic table (columns may be objects or strings, rows may be dicts or lists)
|
| 611 |
+
t_cols = b.get("columns", [])
|
| 612 |
+
t_rows = b.get("rows", [])
|
| 613 |
+
if not isinstance(t_cols, list):
|
| 614 |
+
t_cols = []
|
| 615 |
+
if not isinstance(t_rows, list):
|
| 616 |
+
t_rows = []
|
| 617 |
+
col_labels = []
|
| 618 |
+
col_keys = []
|
| 619 |
+
for c in t_cols:
|
| 620 |
+
if isinstance(c, dict):
|
| 621 |
+
col_labels.append(str(c.get("label", c.get("key", ""))))
|
| 622 |
+
col_keys.append(str(c.get("key", "")))
|
| 623 |
+
else:
|
| 624 |
+
col_labels.append(str(c))
|
| 625 |
+
col_keys.append(re.sub(r"[^a-z0-9]+", "_", str(c).lower()))
|
| 626 |
+
html_out += '<table class="tbl">'
|
| 627 |
+
if col_labels:
|
| 628 |
+
html_out += "<thead><tr>"
|
| 629 |
+
for lbl in col_labels:
|
| 630 |
+
html_out += f"<th>{h(lbl)}</th>"
|
| 631 |
+
html_out += "</tr></thead>"
|
| 632 |
+
html_out += "<tbody>"
|
| 633 |
+
for r in t_rows:
|
| 634 |
+
html_out += "<tr>"
|
| 635 |
+
if isinstance(r, dict):
|
| 636 |
+
for k in col_keys:
|
| 637 |
+
cell = r.get(k, "")
|
| 638 |
+
html_out += f"<td>{h(format_money_figures(str(cell)))}</td>"
|
| 639 |
+
elif isinstance(r, list):
|
| 640 |
+
for cell in r:
|
| 641 |
+
html_out += f"<td>{h(format_money_figures(str(cell)))}</td>"
|
| 642 |
+
html_out += "</tr>"
|
| 643 |
+
html_out += "</tbody></table>"
|
| 644 |
+
|
| 645 |
elif btype in ("table_v3", "table_v4"):
|
| 646 |
t_rows = b.get("rows", [])
|
| 647 |
h_rows = b.get("header_rows", [])
|
app/services/utils.py
CHANGED
|
@@ -51,20 +51,52 @@ def format_money_figures(text: str) -> str:
|
|
| 51 |
"""Normalize all monetary figures to "USD X,XXX" format.
|
| 52 |
|
| 53 |
- Converts existing $X,XXX β USD X,XXX
|
| 54 |
-
- Normalizes bare large numbers β USD X,XXX
|
| 55 |
- Formats with commas
|
| 56 |
- Currency type is always USD (no $ symbol)
|
| 57 |
"""
|
| 58 |
if not text:
|
| 59 |
return text
|
| 60 |
|
| 61 |
-
#
|
| 62 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 63 |
|
| 64 |
-
|
| 65 |
-
text = re.sub(r'\bUSD\s+([\d,]+(?:\.\d+)?)', lambda m: m.group(1), text, flags=re.IGNORECASE)
|
| 66 |
|
| 67 |
-
|
|
|
|
|
|
|
| 68 |
num_str = m.group(1).replace(",", "")
|
| 69 |
dec = m.group(2) if m.group(2) else ""
|
| 70 |
try:
|
|
@@ -77,10 +109,9 @@ def format_money_figures(text: str) -> str:
|
|
| 77 |
formatted = f"{num:,.0f}"
|
| 78 |
return "USD " + formatted
|
| 79 |
|
| 80 |
-
# Add "USD " to large numbers (4+ digits or already comma-formatted)
|
| 81 |
text = re.sub(
|
| 82 |
-
r"(?<!\d)((?:\d{1,3}(?:,\d{3})+)|(?:\d{4,}))(?:\.(\d+))?(?![%\d/])",
|
| 83 |
-
|
| 84 |
text,
|
| 85 |
)
|
| 86 |
|
|
|
|
| 51 |
"""Normalize all monetary figures to "USD X,XXX" format.
|
| 52 |
|
| 53 |
- Converts existing $X,XXX β USD X,XXX
|
| 54 |
+
- Normalizes bare large numbers (1,000+) β USD X,XXX
|
| 55 |
- Formats with commas
|
| 56 |
- Currency type is always USD (no $ symbol)
|
| 57 |
"""
|
| 58 |
if not text:
|
| 59 |
return text
|
| 60 |
|
| 61 |
+
# Step 1: Convert "$X" β "USD X" directly (preserves ALL dollar amounts)
|
| 62 |
+
def _dollar_to_usd(m: re.Match) -> str:
|
| 63 |
+
num_str = m.group(1).replace(",", "")
|
| 64 |
+
try:
|
| 65 |
+
num = float(num_str)
|
| 66 |
+
except ValueError:
|
| 67 |
+
return m.group(0)
|
| 68 |
+
if "." in m.group(1):
|
| 69 |
+
dec_part = m.group(1).split(".")[-1]
|
| 70 |
+
formatted = f"{num:,.{len(dec_part)}f}"
|
| 71 |
+
elif num == int(num):
|
| 72 |
+
formatted = f"{int(num):,}"
|
| 73 |
+
else:
|
| 74 |
+
formatted = f"{num:,.2f}"
|
| 75 |
+
return "USD " + formatted
|
| 76 |
+
|
| 77 |
+
text = re.sub(r'\$([\d,]+(?:\.\d+)?)', _dollar_to_usd, text)
|
| 78 |
+
|
| 79 |
+
# Step 2: Normalize existing "USD X,XXX" for consistent comma formatting
|
| 80 |
+
def _normalize_usd(m: re.Match) -> str:
|
| 81 |
+
num_str = m.group(1).replace(",", "")
|
| 82 |
+
try:
|
| 83 |
+
num = float(num_str)
|
| 84 |
+
except ValueError:
|
| 85 |
+
return m.group(0)
|
| 86 |
+
if "." in m.group(1):
|
| 87 |
+
dec_part = m.group(1).split(".")[-1]
|
| 88 |
+
formatted = f"{num:,.{len(dec_part)}f}"
|
| 89 |
+
elif num == int(num):
|
| 90 |
+
formatted = f"{int(num):,}"
|
| 91 |
+
else:
|
| 92 |
+
formatted = f"{num:,.2f}"
|
| 93 |
+
return "USD " + formatted
|
| 94 |
|
| 95 |
+
text = re.sub(r'\bUSD\s+([\d,]+(?:\.\d+)?)', _normalize_usd, text, flags=re.IGNORECASE)
|
|
|
|
| 96 |
|
| 97 |
+
# Step 3: Add "USD " to bare large numbers (4+ digits or comma-formatted)
|
| 98 |
+
# that aren't already preceded by "USD "
|
| 99 |
+
def _format_bare_large(m: re.Match) -> str:
|
| 100 |
num_str = m.group(1).replace(",", "")
|
| 101 |
dec = m.group(2) if m.group(2) else ""
|
| 102 |
try:
|
|
|
|
| 109 |
formatted = f"{num:,.0f}"
|
| 110 |
return "USD " + formatted
|
| 111 |
|
|
|
|
| 112 |
text = re.sub(
|
| 113 |
+
r"(?<!\d)(?<!USD )((?:\d{1,3}(?:,\d{3})+)|(?:\d{4,}))(?:\.(\d+))?(?![%\d/])",
|
| 114 |
+
_format_bare_large,
|
| 115 |
text,
|
| 116 |
)
|
| 117 |
|
app/templates/handbook.html
CHANGED
|
@@ -95,7 +95,8 @@
|
|
| 95 |
{% for uni in universities %}
|
| 96 |
{% if uni.tier_group_start and uni.tier_group_label %}
|
| 97 |
<div class="section-block page-break tier-group-heading" data-tier="{{ uni.tier_label | default('') | e }}">
|
| 98 |
-
<h1 class="h1 hb-heading-1" style="margin-top:0.5em;margin-bottom:0.3em;">{{ uni.tier_group_label | e }}
|
|
|
|
| 99 |
</div>
|
| 100 |
{% endif %}
|
| 101 |
{% include "partials/university.html" %}
|
|
|
|
| 95 |
{% for uni in universities %}
|
| 96 |
{% if uni.tier_group_start and uni.tier_group_label %}
|
| 97 |
<div class="section-block page-break tier-group-heading" data-tier="{{ uni.tier_label | default('') | e }}">
|
| 98 |
+
<h1 class="h1 hb-heading-1" style="margin-top:0.5em;margin-bottom:0.3em;">{{ uni.tier_group_label | e }}
|
| 99 |
+
</h1>
|
| 100 |
</div>
|
| 101 |
{% endif %}
|
| 102 |
{% include "partials/university.html" %}
|
app/templates/partials/university.html
CHANGED
|
@@ -83,9 +83,9 @@
|
|
| 83 |
<table class="programs">
|
| 84 |
<thead>
|
| 85 |
<tr>
|
| 86 |
-
<th
|
| 87 |
-
<th
|
| 88 |
-
<th
|
| 89 |
</tr>
|
| 90 |
</thead>
|
| 91 |
<tbody>
|
|
|
|
| 83 |
<table class="programs">
|
| 84 |
<thead>
|
| 85 |
<tr>
|
| 86 |
+
<th>Program</th>
|
| 87 |
+
<th>Designation</th>
|
| 88 |
+
<th>Entrance Examination</th>
|
| 89 |
</tr>
|
| 90 |
</thead>
|
| 91 |
<tbody>
|