"""Normalization layer — converts raw MySQL handbook content into typed render blocks. Each section_json from the database is parsed into a list of RenderBlock objects. Every block has a `block_type` that maps 1-to-1 to a Jinja partial and a CSS class. This prevents ad-hoc interpretation of raw JSON throughout the rendering pipeline. Block types (from theme.BLOCK_TYPES): heading_1, heading_2, paragraph, bullet_list, note, table, enrollment_steps, school_profile, university_summary, toc, cover, full_page_image """ from __future__ import annotations import re from urllib.parse import quote_plus from dataclasses import dataclass, field from typing import Any from app.services.renderers import _extract_university_funding from app.services.utils import ( ensure_program_options_pair, emphasize_keywords, format_money_figures, get_any, h, hb_slug, is_assoc, is_truthy, linkify_urls, ) from app.services.renderers import fetch_image_data_uri # ─────────────────────────────────────────────────────────────── # Block data-classes # ─────────────────────────────────────────────────────────────── @dataclass class RenderBlock: """Base typed render block.""" block_type: str css_class: str = "" data: dict[str, Any] = field(default_factory=dict) # ─────────────────────────────────────────────────────────────── # Section → blocks # ─────────────────────────────────────────────────────────────── def normalize_section( section_key: str, section_title: str, section_json: dict | list, *, universities: list[dict] | None = None, debug: bool = False, ) -> list[RenderBlock]: """Convert a single global section payload into a list of RenderBlocks. This is the single translation point between the database schema and the rendering layer. """ blocks: list[RenderBlock] = [] key_norm = section_key.lower().strip() if not isinstance(section_json, dict): section_json = {} layout_norm = str(section_json.get("layout", "")).lower().strip() # ── Section heading ── # Prefer the JSON-level title (display-ready) over the DB section_title json_title = str(section_json.get("title", "")).strip() if isinstance(section_json, dict) else "" title = json_title or section_title.strip() if title and key_norm != "table_of_contents": blocks.append(RenderBlock( block_type="heading_1", css_class="hb-heading-1", data={"text": title}, )) # ── Steps → enrollment_steps ── steps = section_json.get("steps") if isinstance(steps, list): blocks.append(RenderBlock( block_type="enrollment_steps", css_class="hb-enrollment-steps", data={"steps": _normalize_steps(steps)}, )) return blocks # ── Bullets ── has_bullets = isinstance(section_json.get("bullets"), list) has_items = isinstance(section_json.get("items"), list) if has_bullets or (layout_norm == "bullets_with_note" and has_items): from markupsafe import Markup lst = section_json.get("items") if has_items else section_json.get("bullets") items = [_normalize_text_content(str(b).strip()) for b in lst if str(b).strip()] html_items = [Markup(emphasize_keywords(it)) for it in items] blocks.append(RenderBlock( block_type="bullet_list", css_class="hb-bullet-list", data={"entries": html_items, "html_entries": True}, )) note = _normalize_text_content( str(section_json.get("note", section_json.get("footnote", ""))).strip() ) if note: blocks.append(RenderBlock( block_type="note", css_class="hb-note", data={"text": note}, )) return blocks # ── Basic table ── cols = section_json.get("columns") rows = section_json.get("rows") if isinstance(cols, list) and isinstance(rows, list): blocks.append(_normalize_basic_table(cols, rows)) return blocks # ── table_v2 ── if layout_norm == "table_v2": blocks.append(_normalize_table_v2(section_json)) return blocks # ── doc_v1 ── if layout_norm == "doc_v1" and isinstance(section_json.get("blocks"), list): blocks.extend(_normalize_doc_v1(section_json["blocks"], skip_title=title)) # Post-process breakdown section for Relocation Cost layout if key_norm == "program_features_breakdown": blocks = _postprocess_breakdown(blocks, section_json["blocks"]) # Post-process Tier 2 section for sub-bullet styling if key_norm == "summary_of_universities_cosigner": blocks = _postprocess_tier2(blocks) return blocks # ── Fallback ── if "text" in section_json: text = _normalize_text_content(str(section_json["text"])) if text.strip(): from markupsafe import Markup blocks.append(RenderBlock( block_type="paragraph", css_class="hb-paragraph", data={ "text": text, "html": Markup(emphasize_keywords(text)), }, )) return blocks def _normalize_text_content(text: str) -> str: """Apply global handbook text normalization in a single place.""" return ensure_program_options_pair(format_money_figures(text)) # ─────────────────────────────────────────────────────────────── # University profile normalisation # ─────────────────────────────────────────────────────────────── def normalize_university( uni_raw: dict[str, Any], allow_remote: bool, include_inactive_programs: bool, debug: bool, stats: dict[str, Any], ) -> RenderBlock: """Convert raw university data into a school_profile RenderBlock.""" uni_name = uni_raw["name"] sections = uni_raw.get("sections", []) is_first = uni_raw.get("_is_first", False) stats["universities"] = stats.get("universities", 0) + 1 # Build section map; merge duplicate "programs" sections sec_map: dict[str, dict] = {} for s in sections: if not isinstance(s, dict): continue k = str(s.get("section_key", "")) if not k: continue if k == "programs" and k in sec_map: existing = sec_map["programs"].get("section_json", {}) incoming = s.get("section_json", {}) if not isinstance(existing, dict): existing = {} if not isinstance(incoming, dict): incoming = {} a = existing.get("programs", []) b = incoming.get("programs", []) if not isinstance(a, list): a = [] if not isinstance(b, list): b = [] existing["programs"] = a + b sec_map["programs"]["section_json"] = existing continue sec_map[k] = s # Campus image img_section = sec_map.get("campus_image") or sec_map.get("image") campus_image = "" campus_caption = "" if img_section: j = img_section.get("section_json", {}) if isinstance(j, dict): campus_url = str(j.get("image_url", "")).strip() campus_caption = str(j.get("caption", "")).strip() if allow_remote and campus_url: embedded = fetch_image_data_uri(campus_url) if embedded: campus_image = embedded stats["images_embedded"] = stats.get("images_embedded", 0) + 1 else: stats["images_placeholder"] = stats.get("images_placeholder", 0) + 1 else: stats["images_placeholder"] = stats.get("images_placeholder", 0) + 1 # Overview and website resolved_website = (uni_raw.get("website") or "").strip() overview_data = None if "overview" in sec_map: overview_json = sec_map["overview"].get("section_json", {}) if not isinstance(overview_json, dict): overview_json = {} site_from_overview = get_any( overview_json, ["university_website", "university_website_url", "website", "site", "url", "homepage", "web_url"], ) if not resolved_website and site_from_overview: resolved_website = site_from_overview overview_data = { "founded": get_any(overview_json, ["founded", "Founded"]), "total_students": get_any(overview_json, ["total_students", "Total Students"]), "undergraduates": get_any(overview_json, [ "undergraduates", "Undergraduate Students", "undergraduate_students", ]), "postgraduates": get_any(overview_json, [ "postgraduate_students", "Postgraduate Students", ]), "acceptance_rate": get_any(overview_json, ["acceptance_rate", "Acceptance Rate"]), "location": get_any(overview_json, ["location", "Location"]), "tuition": format_money_figures(str(get_any(overview_json, [ "tuition_out_of_state_yearly", "Yearly Out of State Tuition Fees", "Yearly Out-of-State Tuition Fees", "Yearly Tuition Fees", "Yearly Out-of-State Tuition Fees:", ]) or "")) or None, } if resolved_website: stats["university_links"] = stats.get("university_links", 0) + 1 stats["website_rows"] = stats.get("website_rows", 0) + 1 # Benefits + Funding benefits: list[str] | None = [] funding_heading = "Funding Available" funding_items: list[str] = [] if "benefits" in sec_map: j = sec_map["benefits"].get("section_json", {}) if not isinstance(j, dict): j = {} raw_benefits = j.get("benefits", []) if isinstance(raw_benefits, list): benefits = [ _normalize_text_content(str(b).strip()) for b in raw_benefits if str(b).strip() ] else: benefits = [] funding_heading, funding_items = _extract_university_funding( j, { "school_category": uni_raw.get("school_category"), "status": "in" if is_truthy(uni_raw.get("is_active", True)) else "out", }, ) # Normalize money formatting in funding items funding_items = [_normalize_text_content(item) for item in funding_items] # Programs programs = None if "programs" in sec_map: j = sec_map["programs"].get("section_json", {}) if not isinstance(j, dict): j = {} programs_raw = j.get("programs", []) if not isinstance(programs_raw, list): programs_raw = [] if not include_inactive_programs: programs_raw = [ p for p in programs_raw if isinstance(p, dict) and is_truthy( p.get("program_active", p.get("is_active", p.get("active", 1))) ) ] programs = [] seen_names = set() for p in programs_raw: if not isinstance(p, dict): continue program_name = _normalize_text_content(str(p.get("program_name", "")).strip()) # Deduplicate by lowercase program name key = program_name.lower() if key in seen_names: continue seen_names.add(key) link = str(p.get("program_link", "")).strip() if not link and isinstance(p.get("program_links"), dict): link = str(p["program_links"].get("web_link", "")).strip() programs.append({ "name": program_name, "link": link, "designation": _normalize_text_content(str(p.get("designation", ""))), "entrance": _normalize_text_content(str(p.get("entrance_exam", p.get("entrance_examination", "")))), }) # Extra sections (rendered via global blocks normalizer) skip_keys = {"campus_image", "image", "overview", "benefits", "programs"} extra_blocks: list[list[RenderBlock]] = [] for s in sections: if not isinstance(s, dict): continue k = str(s.get("section_key", "")) if not k or k in skip_keys: continue title = str(s.get("section_title", "")) j = s.get("section_json", {}) if not isinstance(j, dict): j = {} extra_blocks.append(normalize_section(k, title, j, debug=debug)) classes = ["hb-school-profile", "page-break"] return RenderBlock( block_type="school_profile", css_class=" ".join(classes), data={ "name": uni_name, "anchor": uni_raw.get("anchor"), "sort_order": uni_raw.get("sort_order"), "website": resolved_website, "overview": overview_data, "campus_image": campus_image, "campus_caption": campus_caption, "benefits": benefits, "funding_heading": funding_heading, "funding_items": funding_items, "programs": programs, "extra_blocks": extra_blocks, }, ) # ─────────────────────────────────────────────────────────────── # Internal helpers # ─────────────────────────────────────────────────────────────── def _normalize_steps(steps: list) -> list[dict]: """Normalise enrollment steps into structured dicts.""" result = [] step_num = 0 for s in steps: if not isinstance(s, dict): continue step_num += 1 step_title = str(s.get("title", s.get("step_title", ""))).strip() body = _normalize_text_content(str(s.get("body", s.get("description", ""))).strip()) # Pre-format body with bold emphasis on REGULAR, PRIME, $ amounts from markupsafe import Markup body_html = Markup(emphasize_keywords(body)) if body else "" links = [] plain_links = [] raw_links = s.get("links", []) if isinstance(raw_links, list): for lnk in raw_links: if not isinstance(lnk, dict): continue label = str(lnk.get("label", "Link")).strip() url = str(lnk.get("url", "")).strip() if url: low_label = label.lower() low_url = url.lower() is_telegram = "telegram" in low_label or "t.me" in low_url if step_num == 2 and "internationalscholarsprogram.com" in low_url and not re.match(r"^https?://", url, flags=re.IGNORECASE): url = "https://" + url # All links (including Telegram) are rendered as clickable anchors. # For Telegram use the full URL as visible label so readers can see/type it. link_label = url if is_telegram else label links.append({"label": link_label, "url": url}) if step_num == 2 and not any( "internationalscholarsprogram.com" in str(l.get("url", "")).lower() for l in links ): links.append({ "label": "www.internationalscholarsprogram.com", "url": "https://www.internationalscholarsprogram.com", }) qr = str(s.get("qr_url", s.get("qr_image", ""))).strip() telegram_url = "" if step_num == 1: telegram_ref = "" if plain_links: telegram_ref = plain_links[0] elif isinstance(body, str): m = re.search(r"(https?://(?:t\.me|telegram\.me)/[^\s<)]+)", body, flags=re.IGNORECASE) if m: telegram_ref = m.group(1) if telegram_ref: telegram_url = telegram_ref if not qr: qr = ( "https://api.qrserver.com/v1/create-qr-code/?size=160x160&data=" + quote_plus(telegram_ref) ) # Strip the raw telegram URL and the follow-up description from body body = re.sub(r"https?://(?:t\.me|telegram\.me)/[^\s<)]+", "", body, flags=re.IGNORECASE) body = re.sub(r"This telegram group will help you interact with program administrators and other prospective students where you can ask any questions you may have about the program\.?", "", body, flags=re.IGNORECASE) body = re.sub(r"\n{2,}", "\n", body).strip() body_html = Markup(emphasize_keywords(body)) if body else "" result.append({ "number": step_num, "title": step_title, "body": body, "body_html": body_html, "links": links, "plain_links": plain_links, "qr_url": qr, "telegram_url": telegram_url, }) return result def _normalize_basic_table(cols: list, rows: list) -> RenderBlock: """Normalise a basic table (columns + rows).""" norm_rows = [] for r in rows: if not isinstance(r, (list, dict)): continue if isinstance(r, dict): row = [] for col_label in cols: key_guess = re.sub(r"[^a-z0-9]+", "_", str(col_label).lower()) cell = r.get(key_guess, "") # Normalize text, emphasize keywords, then linkify URLs for clickable links cell_html = emphasize_keywords(_normalize_text_content(str(cell))) cell_with_links = linkify_urls(cell_html) row.append(cell_with_links) norm_rows.append(row) else: norm_rows.append([linkify_urls(emphasize_keywords(_normalize_text_content(str(cell)))) for cell in r]) return RenderBlock( block_type="table", css_class="hb-table", data={ "columns": [str(c) for c in cols], "rows": norm_rows, "variant": "standard", }, ) def _normalize_table_v2(json_data: dict) -> RenderBlock: """Normalise table_v2 (comparison table with header groups).""" base_cols = json_data.get("base_columns", []) groups = json_data.get("header_groups", []) rows = json_data.get("rows", []) if not isinstance(base_cols, list): base_cols = [] if not isinstance(groups, list): groups = [] if not isinstance(rows, list): rows = [] all_cols: list[dict] = [] for c in base_cols: if isinstance(c, dict): all_cols.append({"key": str(c.get("key", "")), "label": str(c.get("label", ""))}) for g in groups: if not isinstance(g, dict): continue g_cols = g.get("columns", []) if not isinstance(g_cols, list): g_cols = [] for c in g_cols: if isinstance(c, dict): all_cols.append({"key": str(c.get("key", "")), "label": str(c.get("label", ""))}) norm_rows = [] for r in rows: if not isinstance(r, dict): continue row = {} for c in all_cols: k = c.get("key", "") val = r.get(k, "") if isinstance(val, dict): val = val.get("text", "") row[k] = emphasize_keywords(_normalize_text_content(str(val))) norm_rows.append(row) return RenderBlock( block_type="table", css_class="hb-table hb-table-comparison", data={ "base_columns": [{"key": c.get("key", ""), "label": c.get("label", "")} for c in base_cols if isinstance(c, dict)], "header_groups": [ { "label": str(g.get("label", "")), "columns": [{"key": str(c.get("key", "")), "label": str(c.get("label", ""))} for c in (g.get("columns", []) if isinstance(g.get("columns"), list) else []) if isinstance(c, dict)], } for g in groups if isinstance(g, dict) ], "all_columns": all_cols, "rows": norm_rows, "variant": "comparison", }, ) # ─────────────────────────────────────────────────────────────── # Breakdown section post-processor # ─────────────────────────────────────────────────────────────── def _postprocess_breakdown( blocks: list[RenderBlock], raw_blocks: list, ) -> list[RenderBlock]: """Rewrite the breakdown section to match the reference layout. - "Relocation Cost" becomes a banner heading with page-break-before - The relocation table gets a merged right cell (rowspan) with the cost-coverage note moved inside it - "ISP FINANCING" becomes an inline note with mixed bold/italic - "NB: CREDIT FACILITY" is styled green - Dollar amounts in parentheticals keep their original $ format """ from markupsafe import Markup # Find raw blocks for the relocation cost table (pre-normalised, $ intact) raw_reloc_table = None raw_note_after_table = None found_reloc = False for i, rb in enumerate(raw_blocks): if not isinstance(rb, dict): continue if rb.get("type") == "subheading" and "relocation" in str(rb.get("text", "")).lower(): found_reloc = True continue if found_reloc and rb.get("type") == "table_v1" and raw_reloc_table is None: raw_reloc_table = rb continue if found_reloc and raw_reloc_table and rb.get("type") == "paragraph" and raw_note_after_table is None: raw_note_after_table = rb break result: list[RenderBlock] = [] i = 0 while i < len(blocks): blk = blocks[i] # ── Detect "Relocation Cost" heading ── if (blk.block_type == "heading_2" and "relocation" in blk.data.get("text", "").lower()): # Banner heading with page break result.append(RenderBlock( block_type="heading_2", css_class="hb-heading-2 hb-banner-heading page-break", data={"text": blk.data["text"]}, )) i += 1 # Replace the next table with spanning variant that has merged cell if i < len(blocks) and blocks[i].block_type == "table" and raw_reloc_table: raw_rows = raw_reloc_table.get("rows", []) # Build the note text for the merged right cell note_text = "" if raw_note_after_table: note_text = str(raw_note_after_table.get("text", "")) spanning_rows = _build_relocation_spanning_rows(raw_rows, note_text) result.append(RenderBlock( block_type="table", css_class="hb-table hb-relocation-table", data={"rows": spanning_rows, "variant": "spanning"}, )) i += 1 # skip the original table # Skip the paragraph that was moved into the merged cell if (i < len(blocks) and blocks[i].block_type == "paragraph" and note_text): i += 1 continue # ── "ISP FINANCING" heading → inline note with mixed formatting ── if (blk.block_type == "heading_2" and "isp financing" in blk.data.get("text", "").lower()): # Next block should be the interest rate paragraph rate_text = "" if i + 1 < len(blocks) and blocks[i + 1].block_type == "paragraph": rate_text = blocks[i + 1].data.get("text", "") result.append(RenderBlock( block_type="note", css_class="hb-note hb-isp-financing", data={ "parts": [ {"text": "ISP FINANCING", "style": "bold"}, {"text": " (" + _extract_rate_italic(rate_text) + "): " if rate_text else "", "style": "italic"}, {"text": _extract_rate_amount(rate_text), "style": "bold"}, ], "inline": True, }, )) i += 1 # skip the heading if rate_text: i += 1 # skip the paragraph continue # ── "NB: CREDIT FACILITY" note → green styling ── if (blk.block_type == "note" and "credit facility" in blk.data.get("text", "").lower()): result.append(RenderBlock( block_type="note", css_class="hb-note hb-credit-note", data=blk.data, )) i += 1 continue result.append(blk) i += 1 return result def _build_relocation_spanning_rows( raw_rows: list, note_text: str, ) -> list[list[dict]]: """Build spanning rows for the relocation cost table. Row 0: normal 2-column (consultation fees | Covered in the contribution) Rows 1-7: left cell per row, right cell merged (rowspan) with italic note Rows 8+: left cell only, empty right """ from markupsafe import Markup if not raw_rows: return [] rows: list[list[dict]] = [] # Row 0 — has "Covered in the contribution" first = raw_rows[0] if raw_rows else ["", ""] rows.append([ {"text": Markup(emphasize_keywords(str(first[0] if len(first) > 0 else ""))), "colspan": 1, "rowspan": 1}, {"text": Markup("" + h(str(first[1] if len(first) > 1 else "")) + ""), "colspan": 1, "rowspan": 1}, ]) # Rows 1-7: items with dollar amounts that get the merged right cell # These are the visa/fee/rent/ticket rows (have parenthetical dollar amounts) merged_start = 1 merged_end = min(8, len(raw_rows)) # Visa Integrity through Air ticket for idx in range(merged_start, len(raw_rows)): cell_text = str(raw_rows[idx][0] if len(raw_rows[idx]) > 0 else "") left = {"text": Markup(emphasize_keywords(cell_text)), "colspan": 1, "rowspan": 1} if idx == merged_start and note_text: # First merged row gets the rowspan cell span_count = merged_end - merged_start note_html = note_text.replace("\n\n", "

") right = { "text": Markup('' + h(note_html).replace("<br/><br/>", "

") + "
"), "colspan": 1, "rowspan": span_count, } rows.append([left, right]) elif idx < merged_end: # Subsequent merged rows — no right cell (covered by rowspan) rows.append([left]) else: # Remaining rows — empty right cell rows.append([ left, {"text": "", "colspan": 1, "rowspan": 1}, ]) return rows def _extract_rate_italic(text: str) -> str: """Extract the italic portion: 'Interest rate of 12% – 15% Market Rate PA'.""" # Text is like: "Interest rate of 12% – 15% Market Rate: UP TO USD 10,000" m = re.match(r"(Interest rate.*?(?:Market Rate|PA))", text, re.IGNORECASE) if m: return m.group(1).rstrip(": ") # Fallback: everything before the colon if ":" in text: return text.split(":")[0].strip() return text def _extract_rate_amount(text: str) -> str: """Extract the amount portion: 'UP TO USD 10,000'.""" m = re.search(r"(UP TO.*)", text, re.IGNORECASE) if m: return m.group(1).strip() if ":" in text: return text.split(":", 1)[1].strip() return "" # ─────────────────────────────────────────────────────────────── # Tier 2 (cosigner) section post-processor # ─────────────────────────────────────────────────────────────── def _postprocess_tier2(blocks: list[RenderBlock]) -> list[RenderBlock]: """Style the Tier 2 section to match the reference layout. - Second consecutive bullet_list (sub-bullets under Sources of Funds) gets checkmark styling instead of arrows. """ result: list[RenderBlock] = [] prev_was_bullet = False for blk in blocks: if blk.block_type == "bullet_list": if prev_was_bullet: # This is the sub-bullet list → use checkmark class result.append(RenderBlock( block_type="bullet_list", css_class="hb-bullet-list hb-sub-bullets", data=blk.data, )) else: result.append(blk) prev_was_bullet = True else: prev_was_bullet = False result.append(blk) return result def _normalize_doc_v1(blocks: list, *, skip_title: str = "") -> list[RenderBlock]: """Normalise doc_v1 blocks into typed RenderBlocks. Args: skip_title: When set, any leading heading/subheading block whose text matches this title (case-insensitive) is dropped to avoid duplicating the section heading already emitted by the caller. """ from markupsafe import Markup _skip_norm = skip_title.strip().lower() if skip_title else "" result: list[RenderBlock] = [] for b in blocks: if not isinstance(b, dict): continue btype = str(b.get("type", "")) # Skip heading/subheading blocks that duplicate the section title if _skip_norm and btype in ("heading", "subheading"): block_text = str(b.get("text", "")).strip().lower() if block_text == _skip_norm: continue if btype == "paragraph": t = _normalize_text_content(str(b.get("text", ""))) if t.strip(): result.append(RenderBlock( block_type="paragraph", css_class="hb-paragraph", data={ "text": t, "html": Markup(emphasize_keywords(t)), }, )) elif btype == "subheading": t = _normalize_text_content(str(b.get("text", ""))) if t.strip(): result.append(RenderBlock( block_type="heading_2", css_class="hb-heading-2", data={"text": t}, )) elif btype == "bullets": items = b.get("items", []) if not isinstance(items, list): items = [] normalized = [_normalize_text_content(str(it).strip()) for it in items if str(it).strip()] html_items = [Markup(emphasize_keywords(it)) for it in normalized] if normalized: result.append(RenderBlock( block_type="bullet_list", css_class="hb-bullet-list", data={"entries": html_items, "html_entries": True}, )) elif btype == "numbered_list": items = b.get("items", []) if not isinstance(items, list): items = [] normalized = [_normalize_text_content(str(it).strip()) for it in items if str(it).strip()] html_items = [Markup(emphasize_keywords(it)) for it in normalized] if normalized: result.append(RenderBlock( block_type="bullet_list", css_class="hb-bullet-list hb-numbered-list", data={"entries": html_items, "ordered": True, "html_entries": True}, )) elif btype == "note": t = _normalize_text_content(str(b.get("text", ""))) if t.strip(): result.append(RenderBlock( block_type="note", css_class="hb-note", data={"text": t}, )) elif btype == "note_inline": parts = b.get("parts", []) if not isinstance(parts, list): parts = [] normalized_parts = [] for p in parts: if not isinstance(p, dict): continue t = _normalize_text_content(str(p.get("text", ""))) if t: normalized_parts.append({ "text": t, "style": str(p.get("style", "")), }) if normalized_parts: result.append(RenderBlock( block_type="note", css_class="hb-note", data={"parts": normalized_parts, "inline": True}, )) elif btype == "table_v1": t_cols = b.get("columns", []) t_rows = b.get("rows", []) if not isinstance(t_cols, list): t_cols = [] if not isinstance(t_rows, list): t_rows = [] norm_rows = [] for r in t_rows: if not isinstance(r, list): continue norm_rows.append([emphasize_keywords(_normalize_text_content(str(cell))) for cell in r]) result.append(RenderBlock( block_type="table", css_class="hb-table", data={"columns": [str(c) for c in t_cols], "rows": norm_rows, "variant": "standard"}, )) elif btype == "table": # Generic table (columns may be objects or strings, rows may be dicts or lists) t_cols = b.get("columns", []) t_rows = b.get("rows", []) if not isinstance(t_cols, list): t_cols = [] if not isinstance(t_rows, list): t_rows = [] col_labels = [] col_keys = [] for c in t_cols: if isinstance(c, dict): col_labels.append(str(c.get("label", c.get("key", "")))) col_keys.append(str(c.get("key", ""))) else: col_labels.append(str(c)) col_keys.append(re.sub(r"[^a-z0-9]+", "_", str(c).lower())) norm_rows = [] for r in t_rows: if isinstance(r, dict): norm_rows.append([emphasize_keywords(_normalize_text_content(str(r.get(k, "")))) for k in col_keys]) elif isinstance(r, list): norm_rows.append([emphasize_keywords(_normalize_text_content(str(cell))) for cell in r]) result.append(RenderBlock( block_type="table", css_class="hb-table", data={"columns": col_labels, "rows": norm_rows, "variant": "standard"}, )) elif btype in ("table_v3", "table_v4"): t_rows = b.get("rows", []) if not isinstance(t_rows, list): t_rows = [] norm_rows = [] for r in t_rows: if not isinstance(r, list): continue norm_row = [] for cell in r: if isinstance(cell, dict): norm_row.append({ "text": emphasize_keywords(_normalize_text_content(str(cell.get("text", "")))), "colspan": int(cell.get("colspan", 1)) if str(cell.get("colspan", "")).isdigit() else 1, "rowspan": int(cell.get("rowspan", 1)) if str(cell.get("rowspan", "")).isdigit() else 1, }) else: norm_row.append({ "text": emphasize_keywords(_normalize_text_content(str(cell))), "colspan": 1, "rowspan": 1, }) norm_rows.append(norm_row) result.append(RenderBlock( block_type="table", css_class="hb-table", data={"rows": norm_rows, "variant": "spanning"}, )) return result