Spaces:
Sleeping
Sleeping
| import logging | |
| from typing import Dict | |
| from bs4 import BeautifulSoup | |
| from bs4.element import NavigableString, Tag | |
| from .templateify_base import TemplateifyHelpers | |
| logger = logging.getLogger(__name__) | |
| class TemplateifySpecialEventsService: | |
| def __init__(self, settings=None) -> None: | |
| pass | |
| def convert(self, html: str) -> Dict[str, object]: | |
| soup = BeautifulSoup(html, "html.parser") | |
| h = TemplateifyHelpers(soup) | |
| # ββ HEADER IMAGE βββββββββββββββββββββββββββββββββββββββββββββββ | |
| header_img = soup.select_one(".header_image") | |
| if header_img: | |
| header_img["src"] = "{{HEADER_IMAGE}}" | |
| h.register("{{HEADER_IMAGE}}", "Header image URL (defaults to Polygraph logo)") | |
| logger.info("Tokenized header image") | |
| # ββ EVENT TITLE ββββββββββββββββββββββββββββββββββββββββββββββββ | |
| event_title_div = soup.select_one(".event_title") | |
| if event_title_div: | |
| h1 = event_title_div.find("h1") | |
| if h1: | |
| h1.clear() | |
| h1.append("{{EVENT_TITLE}}") | |
| h.register("{{EVENT_TITLE}}", "Event title / headline") | |
| logger.info("Tokenized event title") | |
| # ββ EVENT DATE βββββββββββββββββββββββββββββββββββββββββββββββββ | |
| event_date_div = soup.select_one(".event_date") | |
| if event_date_div: | |
| p = event_date_div.find("p") | |
| if p: | |
| p.clear() | |
| p.append("{{EVENT_DATE}}") | |
| h.register("{{EVENT_DATE}}", "Event date/time line (e.g. Sunday Β· Feb 9 Β· 6:30pm ET)") | |
| logger.info("Tokenized event date") | |
| # ββ INTRO ββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| event_intro = soup.select_one(".event_intro") | |
| if event_intro: | |
| p = event_intro.find("p") | |
| if p: | |
| p.clear() | |
| p.append("{{{INTRO_TEXT}}}") | |
| h.register("{{{INTRO_TEXT}}}", "Introduction text (can include HTML, unescaped)") | |
| logger.info("Tokenized intro text") | |
| # ββ CTA BUTTON βββββββββββββββββββββββββββββββββββββββββββββββββ | |
| cta_link = soup.select_one(".event_cta_link") | |
| if cta_link: | |
| cta_link["href"] = "{{EVENT_CTA_URL}}" | |
| h.register("{{EVENT_CTA_URL}}", "CTA button URL") | |
| cta_link.clear() | |
| cta_link.append("{{EVENT_CTA_TEXT}}") | |
| h.register("{{EVENT_CTA_TEXT}}", "CTA button text (e.g. Trade what happens live)") | |
| logger.info("Tokenized CTA button") | |
| # ββ TOP STORIES ββββββββββββββββββββββββββββββββββββββββββββββββ | |
| top_stories_div = soup.select_one(".top_stories") | |
| if top_stories_div: | |
| # Tokenize the section heading | |
| heading_td = top_stories_div.select_one(".top_stories_heading") | |
| if heading_td: | |
| heading_td.clear() | |
| heading_td.append("{{TOP_STORIES_HEADING}}") | |
| h.register("{{TOP_STORIES_HEADING}}", "Top Stories section title (customizable)") | |
| # Tokenize the story box contents | |
| def top_stories_transform(node: Tag) -> None: | |
| # Set href on the outer card link | |
| card_link = node.select_one(".top_stories_card_link") | |
| if card_link: | |
| card_link["href"] = "{{URL}}" | |
| h.register("{{URL}}", "Story URL") | |
| img_wrapper = node.select_one(".top_stories_image_wrapper") | |
| if img_wrapper: | |
| img_wrapper.insert_before(NavigableString("{{#TOP_STORIES_IMAGE}}")) | |
| img_wrapper.insert_after(NavigableString("{{/TOP_STORIES_IMAGE}}")) | |
| img = img_wrapper.find("img") | |
| if img: | |
| img["src"] = "{{TOP_STORIES_IMAGE}}" | |
| h.register("{{TOP_STORIES_IMAGE}}", "Top story image URL (optional)") | |
| title_p = node.select_one(".top_stories_title") | |
| if title_p: | |
| title_p.clear() | |
| title_p.append("{{TOP_STORIES_TITLE}}") | |
| h.register("{{TOP_STORIES_TITLE}}", "Top story title") | |
| desc_p = node.select_one(".top_stories_description") | |
| if desc_p: | |
| desc_p.clear() | |
| desc_p.append("{{{TOP_STORIES_DESCRIPTION}}}") | |
| h.register("{{{TOP_STORIES_DESCRIPTION}}}", "Top story description (HTML)") | |
| # Make the Read More button conditional (it's a <span> now, not <a>) | |
| read_more_span = node.select_one(".top_stories_link") | |
| if read_more_span: | |
| btn_table = read_more_span.find_parent("table") | |
| if btn_table: | |
| btn_table.insert_before(NavigableString("{{#TOP_STORIES_LINK_URL}}")) | |
| btn_table.insert_after(NavigableString("{{/TOP_STORIES_LINK_URL}}")) | |
| h.register("{{TOP_STORIES_LINK_URL}}", "Show Read More button (optional β omit to hide)") | |
| h.loopify( | |
| ".top_stories .top_stories_box", | |
| "TOP_STORIES", | |
| top_stories_transform, | |
| "Top Stories block.", | |
| ) | |
| # ββ WALLET WATCH βββββββββββββββββββββββββββββββββββββββββββββββ | |
| def wallet_watch_transform(node: Tag) -> None: | |
| # Set href on the outer card link | |
| card_link = node.select_one(".wallet_watch_card_link") | |
| if card_link: | |
| card_link["href"] = "{{URL}}" | |
| h.register("{{URL}}", "Wallet Watch URL") | |
| img = node.find("img") | |
| if img: | |
| img["src"] = "{{WALLET_WATCH_IMAGE}}" | |
| h.register("{{WALLET_WATCH_IMAGE}}", "Wallet Watch image URL") | |
| paragraphs = node.find_all("p") | |
| if len(paragraphs) >= 1: | |
| paragraphs[0].clear() | |
| paragraphs[0].append("{{WALLET_WATCH_TITLE}}") | |
| h.register("{{WALLET_WATCH_TITLE}}", "Wallet Watch title") | |
| if len(paragraphs) >= 2: | |
| paragraphs[1].clear() | |
| paragraphs[1].append("{{WALLET_WATCH_DESCRIPTION}}") | |
| h.register("{{WALLET_WATCH_DESCRIPTION}}", "Wallet Watch description") | |
| h.loopify( | |
| ".wallet_watch .wallet_watch_box", | |
| "WALLET_WATCH", | |
| wallet_watch_transform, | |
| "Wallet Watch block.", | |
| ) | |
| # ββ MARKET CATEGORIES (nested loop) ββββββββββββββββββββββββββββ | |
| highlighted_div = soup.select_one(".highlighted_markets") | |
| if highlighted_div: | |
| # Find the category header and market rows | |
| cat_header = highlighted_div.select_one(".market_category_header") | |
| market_rows = highlighted_div.select(".market_row") | |
| if cat_header and market_rows: | |
| # Tokenize the category header | |
| cat_td = cat_header.find("td") | |
| if cat_td: | |
| cat_td.clear() | |
| cat_td.append("{{CATEGORY_NAME}}") | |
| h.register("{{CATEGORY_NAME}}", "Category section name") | |
| # Keep the first market row as template, remove extras | |
| template_row = market_rows[0] | |
| for row in market_rows[1:]: | |
| # Remove the parent <a> wrapper too | |
| parent_a = row.find_parent("a") | |
| if parent_a: | |
| parent_a.decompose() | |
| else: | |
| row.decompose() | |
| # Tokenize the template market row | |
| parent_a = template_row.find_parent("a") | |
| if parent_a: | |
| parent_a["href"] = "{{URL}}" | |
| h.register("{{URL}}", "Market URL") | |
| title_p = template_row.select_one(".market_title") | |
| if title_p: | |
| title_p.clear() | |
| title_p.append("{{MARKET_TITLE}}") | |
| h.register("{{MARKET_TITLE}}", "Market title") | |
| volume_p = template_row.select_one(".market_volume") | |
| if volume_p: | |
| volume_p.clear() | |
| volume_p.append("{{MARKET_VOLUME}}") | |
| h.register("{{MARKET_VOLUME}}", "Market volume (e.g. $1.4M vol)") | |
| img_wrapper = template_row.select_one(".market_image_wrapper") | |
| if img_wrapper: | |
| img = img_wrapper.find("img") | |
| if img: | |
| img["src"] = "{{MARKET_IMAGE}}" | |
| h.register("{{MARKET_IMAGE}}", "Market thumbnail image URL") | |
| # Tokenize the Trade Now button link inside the row | |
| trade_btn = template_row.select_one("td[style*='text-align:right'] a") | |
| if trade_btn: | |
| trade_btn["href"] = "{{URL}}" | |
| # Wrap the market row (with its <a> parent) in inner loop | |
| row_element = parent_a if parent_a else template_row | |
| row_element.insert_before(NavigableString("{{#MARKETS}}")) | |
| row_element.insert_after(NavigableString("{{/MARKETS}}")) | |
| # Find the <hr> before the category header (part of the repeating block) | |
| hr_before = cat_header.find_previous_sibling("hr") | |
| # Wrap the entire category block (hr + header + market rows) in outer loop | |
| first_element = hr_before if hr_before else cat_header | |
| first_element.insert_before(NavigableString("{{#MARKET_CATEGORIES}}")) | |
| # Find the last element in this category block | |
| # The {{/MARKETS}} text node is after the row_element | |
| # We need to place {{/MARKET_CATEGORIES}} after it | |
| last_market_sibling = row_element.next_sibling | |
| # Skip past the {{/MARKETS}} text node | |
| while last_market_sibling and isinstance(last_market_sibling, NavigableString) and last_market_sibling.strip(): | |
| last_market_sibling = last_market_sibling.next_sibling | |
| # Insert closing tag after the {{/MARKETS}} tag | |
| markets_end = row_element.next_sibling # This is the {{/MARKETS}} node | |
| if markets_end: | |
| markets_end.insert_after(NavigableString("{{/MARKET_CATEGORIES}}")) | |
| else: | |
| row_element.insert_after(NavigableString("{{/MARKET_CATEGORIES}}")) | |
| logger.info("Tokenized market categories with nested market loop") | |
| # ββ OUTRO ββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| outro_div = soup.select_one(".outro_text") | |
| if outro_div: | |
| paragraphs = outro_div.find_all("p") | |
| if paragraphs: | |
| paragraphs[0].clear() | |
| paragraphs[0].append("{{{OUTRO_TEXT}}}") | |
| h.register("{{{OUTRO_TEXT}}}", "Outro text (can include HTML, unescaped)") | |
| for p in paragraphs[1:]: | |
| p.decompose() | |
| logger.info("Tokenized outro text") | |
| # ββ SECTION CONDITIONALS βββββββββββββββββββββββββββββββββββββββ | |
| section_conditionals = { | |
| ".top_stories": ("HAS_TOP_STORIES", None), | |
| ".wallet_watch": ("HAS_WALLET_WATCH", None), | |
| ".highlighted_markets": ("HAS_MARKET_CATEGORIES", None), | |
| } | |
| for selector, (has_flag_name, heading_selector) in section_conditionals.items(): | |
| section_div = soup.select_one(selector) | |
| if not section_div: | |
| continue | |
| if heading_selector: | |
| heading = section_div.select_one(heading_selector) | |
| if heading: | |
| parent = heading.parent | |
| if parent: | |
| try: | |
| idx = parent.index(heading) | |
| parent.insert(idx, NavigableString(f"{{{{#{has_flag_name}}}}}")) | |
| parent.insert(idx + 2, NavigableString(f"{{{{/{has_flag_name}}}}}")) | |
| except (ValueError, AttributeError): | |
| heading.insert_before(NavigableString(f"{{{{#{has_flag_name}}}}}")) | |
| heading.insert_after(NavigableString(f"{{{{/{has_flag_name}}}}}")) | |
| else: | |
| # Wrap the entire section div content | |
| section_div.insert_before(NavigableString(f"{{{{#{has_flag_name}}}}}")) | |
| section_div.insert_after(NavigableString(f"{{{{/{has_flag_name}}}}}")) | |
| logger.info(f"Added {{{{#{has_flag_name}}}}} conditional for {selector}") | |
| # ββ HEADER / FOOTER LINK WRAPPING ββββββββββββββββββββββββββββββ | |
| h.wrap_images_in_anchors() | |
| return {"html": str(soup), "tokens": h.tokens} | |