Spaces:
Sleeping
Sleeping
| """ | |
| Medium Article HTML Renderer | |
| Renders article data to beautiful HTML matching Medium's styling. | |
| Based on Freedium's medium-parser/core.py template rendering. | |
| """ | |
| import html | |
| from typing import Dict, List, Any, Optional | |
| import logging | |
| # Import centralized image URL utilities | |
| from src.utils import MEDIUM_IMAGE_DEFAULT_WIDTH | |
| logger = logging.getLogger("HTMLRenderer") | |
| # Base HTML template for standalone page | |
| BASE_TEMPLATE = """<!DOCTYPE html> | |
| <html lang="en" class="dark"> | |
| <head> | |
| <meta charset="UTF-8"> | |
| <meta name="viewport" content="width=device-width, initial-scale=1.0"> | |
| <title>{title} | Medium Scraper</title> | |
| <style> | |
| @import url('https://fonts.googleapis.com/css2?family=Playfair+Display:wght@400;700&family=Inter:wght@300;400;600&family=JetBrains+Mono:wght@400;600&display=swap'); | |
| :root {{ | |
| --bg-color: #121212; | |
| --text-color: #e5e5e5; | |
| --accent-color: #6366f1; | |
| --code-bg: #1e1e1e; | |
| }} | |
| body {{ | |
| background-color: var(--bg-color); | |
| color: var(--text-color); | |
| font-family: 'Inter', sans-serif; | |
| line-height: 1.6; | |
| margin: 0; | |
| padding: 0; | |
| }} | |
| /* Container for PDF and Web consistency */ | |
| .container {{ | |
| max-width: 100%; | |
| margin: 0 auto; | |
| padding: 40px; | |
| }} | |
| /* Typography */ | |
| h1, h2, h3, h4 {{ | |
| font-family: 'Playfair Display', serif; | |
| color: #ffffff; | |
| margin-top: 2em; | |
| margin-bottom: 0.5em; | |
| line-height: 1.25; | |
| }} | |
| h1 {{ font-size: 2.5rem; border-bottom: 1px solid rgba(255,255,255,0.1); padding-bottom: 20px; }} | |
| h2 {{ font-size: 1.8rem; }} | |
| h3 {{ font-size: 1.5rem; }} | |
| p {{ margin-bottom: 1.5em; font-size: 1.1rem; }} | |
| a {{ color: var(--accent-color); text-decoration: none; }} | |
| a:hover {{ text-decoration: underline; }} | |
| /* Code Blocks */ | |
| pre {{ | |
| background: var(--code-bg); | |
| padding: 20px; | |
| border-radius: 8px; | |
| overflow-x: auto; | |
| border: 1px solid rgba(255,255,255,0.1); | |
| margin: 2em 0; | |
| }} | |
| code {{ | |
| font-family: 'JetBrains Mono', monospace; | |
| font-size: 0.9em; | |
| color: #efefef; | |
| }} | |
| p code {{ | |
| background: rgba(255,255,255,0.1); | |
| padding: 2px 6px; | |
| border-radius: 4px; | |
| }} | |
| /* Blockquotes */ | |
| blockquote {{ | |
| border-left: 4px solid var(--accent-color); | |
| margin: 2em 0; | |
| padding-left: 20px; | |
| font-style: italic; | |
| color: #a1a1aa; | |
| }} | |
| /* Images */ | |
| img {{ | |
| max-width: 100%; | |
| height: auto; | |
| border-radius: 8px; | |
| margin: 2em auto; | |
| display: block; | |
| }} | |
| /* Lists */ | |
| ul, ol {{ margin: 1.5em 0; padding-left: 2em; }} | |
| li {{ margin-bottom: 0.5em; }} | |
| /* Tables */ | |
| table {{ | |
| width: 100%; | |
| border-collapse: collapse; | |
| margin: 2em 0; | |
| }} | |
| th, td {{ | |
| padding: 12px; | |
| border-bottom: 1px solid rgba(255,255,255,0.1); | |
| text-align: left; | |
| }} | |
| th {{ font-weight: 600; color: #fff; }} | |
| /* Author Card */ | |
| .author-card {{ | |
| background: rgba(255,255,255,0.05); | |
| padding: 20px; | |
| border-radius: 12px; | |
| margin-bottom: 40px; | |
| display: flex; | |
| align-items: center; | |
| gap: 20px; | |
| }} | |
| .author-card img {{ margin: 0; width: 64px; height: 64px; border-radius: 50%; }} | |
| /* Print Overrides */ | |
| @media print {{ | |
| body {{ background: white; color: black; }} | |
| h1, h2, h3 {{ color: black; }} | |
| pre {{ background: #f5f5f5; border: 1px solid #ddd; color: black; }} | |
| code {{ color: black; }} | |
| a {{ color: #000; text-decoration: underline; }} | |
| .container {{ padding: 0; }} | |
| }} | |
| </style> | |
| </head> | |
| <body class="bg-gray-900 text-gray-100"> | |
| {content} | |
| </body> | |
| </html>""" | |
| # Article content template | |
| ARTICLE_TEMPLATE = """ | |
| <div class="container w-full pt-20 mx-auto text-gray-100 break-words bg-gray-800 max-w-none"> | |
| <div class="w-full px-4 text-xl leading-normal md:px-6" style="font-family:Georgia,serif;"> | |
| <div class="font-sans"> | |
| <p class="pb-3 text-base font-bold text-green-500 md:text-sm"> | |
| <a href="{url}#bypass" class="text-sm font-bold text-green-500 no-underline md:text-sm hover:underline">< Go to the original</a> | |
| </p> | |
| {preview_image} | |
| <h1 class="pt-6 pb-2 font-sans text-3xl font-bold text-gray-100 break-normal md:text-4xl">{title}</h1> | |
| {subtitle_html} | |
| </div> | |
| {author_card} | |
| <div class="mt-8 main-content"> | |
| {content} | |
| </div> | |
| <div class="flex flex-wrap gap-2 mt-5"> | |
| {tags_html} | |
| </div> | |
| <div class="container w-full pt-12 mx-auto"></div> | |
| </div> | |
| </div> | |
| """ | |
| # Author card template | |
| AUTHOR_CARD_TEMPLATE = """ | |
| <div class="m-2 mt-5 bg-gray-700 border border-gray-600"> | |
| <div class="flex items-center p-4 space-x-4"> | |
| <div class="flex-shrink-0"> | |
| <a href="https://medium.com/@{username}" target="_blank" class="relative block"> | |
| <img src="https://miro.medium.com/v2/resize:fill:88:88/{image_id}" | |
| alt="{name}" loading="eager" referrerpolicy="no-referrer" | |
| class="rounded-full h-11 w-11"> | |
| </a> | |
| </div> | |
| <div class="flex-grow"> | |
| <a href="https://medium.com/@{username}" target="_blank" | |
| class="block font-semibold text-white">{name}</a> | |
| <button class="px-3 py-1 mt-1 text-sm text-white bg-green-600 rounded-lg"> | |
| <a href="https://medium.com/@{username}" target="_blank" class="text-sm text-white">Follow</a> | |
| </button> | |
| </div> | |
| </div> | |
| <div class="px-4 pb-2"> | |
| <div class="flex flex-wrap items-center space-x-2 text-sm text-gray-400"> | |
| {collection_html} | |
| <span>~{reading_time} min read</span> | |
| <span>·</span> | |
| <span class="text-yellow-400">Free: {free_access}</span> | |
| </div> | |
| </div> | |
| </div> | |
| """ | |
| def escape_html(text: str) -> str: | |
| """Escape HTML special characters.""" | |
| if not text: | |
| return "" | |
| return html.escape(str(text)) | |
| def render_markup(text: str, markups: List[Dict]) -> str: | |
| """Apply markups (bold, italic, links, code) to text.""" | |
| if not text or not markups: | |
| return escape_html(text) | |
| # Sort markups by start position (reversed for correct insertion) | |
| sorted_markups = sorted(markups, key=lambda m: (m.get("start", 0), -m.get("end", 0))) | |
| result = list(text) | |
| insertions = [] # (position, is_closing, tag) | |
| for markup in sorted_markups: | |
| start = markup.get("start", 0) | |
| end = markup.get("end", len(text)) | |
| markup_type = markup.get("type", "") | |
| if markup_type == "STRONG": | |
| insertions.append((start, False, "<strong>")) | |
| insertions.append((end, True, "</strong>")) | |
| elif markup_type == "EM": | |
| insertions.append((start, False, "<em>")) | |
| insertions.append((end, True, "</em>")) | |
| elif markup_type == "CODE": | |
| insertions.append((start, False, '<code class="p-1.5 bg-gray-600 rounded">')) | |
| insertions.append((end, True, "</code>")) | |
| elif markup_type == "A": | |
| href = escape_html(markup.get("href", "")) | |
| if markup.get("anchorType") == "USER": | |
| href = f"https://medium.com/u/{markup.get('userId', '')}" | |
| target = "" if href.startswith("#") else ' target="_blank"' | |
| insertions.append((start, False, f'<a href="{href}"{target} class="underline text-blue-400">')) | |
| insertions.append((end, True, "</a>")) | |
| # Sort insertions: by position, then closing tags before opening | |
| insertions.sort(key=lambda x: (x[0], x[1])) | |
| # Build result with insertions | |
| offset = 0 | |
| escaped = escape_html(text) | |
| result_parts = [] | |
| last_pos = 0 | |
| for pos, is_closing, tag in insertions: | |
| if pos > last_pos: | |
| result_parts.append(escaped[last_pos:pos]) | |
| result_parts.append(tag) | |
| last_pos = pos | |
| result_parts.append(escaped[last_pos:]) | |
| return "".join(result_parts) | |
| def render_paragraph(paragraph: Dict, is_code: bool = False) -> str: | |
| """Render a single paragraph to HTML.""" | |
| para_type = paragraph.get("type", "P") | |
| text = paragraph.get("text", "") | |
| markups = paragraph.get("markups", []) | |
| # Apply markups | |
| formatted_text = render_markup(text, markups) if not is_code else escape_html(text) | |
| if para_type == "H2": | |
| return f'<h2 class="pt-12 font-bold font-sans break-normal text-gray-100 text-2xl">{formatted_text}</h2>' | |
| elif para_type == "H3": | |
| return f'<h3 class="pt-12 font-bold font-sans break-normal text-gray-100 text-2xl">{formatted_text}</h3>' | |
| elif para_type == "H4": | |
| return f'<h4 class="pt-8 font-bold font-sans break-normal text-gray-100 text-xl">{formatted_text}</h4>' | |
| elif para_type == "P": | |
| css_class = "leading-8 mt-7" | |
| if paragraph.get("hasDropCap"): | |
| css_class += " first-letter:text-7xl first-letter:float-left first-letter:mr-2" | |
| return f'<p class="{css_class}">{formatted_text}</p>' | |
| elif para_type == "IMG": | |
| metadata = paragraph.get("metadata") or {} | |
| image_id = metadata.get("id", "") | |
| alt = escape_html(metadata.get("alt", "")) | |
| caption = formatted_text | |
| img_html = f''' | |
| <div class="mt-7"> | |
| <img loading="eager" alt="{alt}" class="pt-5 m-auto" | |
| referrerpolicy="no-referrer" | |
| src="https://miro.medium.com/v2/resize:fit:1400/{image_id}"> | |
| </div> | |
| ''' | |
| if caption: | |
| img_html += f'<figcaption class="mt-3 text-sm text-center text-gray-400">{caption}</figcaption>' | |
| return img_html | |
| elif para_type == "PRE": | |
| code_meta = paragraph.get("codeBlockMetadata") or {} | |
| lang = code_meta.get("lang") or "" | |
| lang_class = f"language-{lang}" if lang else "nohighlight" | |
| return f'<pre class="flex flex-col mt-7 border border-gray-700"><code class="p-2 bg-gray-900 overflow-x-auto {lang_class}">{escape_html(text)}</code></pre>' | |
| elif para_type == "BQ": | |
| return f''' | |
| <blockquote style="box-shadow: inset 3px 0 0 0 rgb(209 207 239);" class="px-5 pt-3 pb-3 mt-5"> | |
| <p class="font-italic">{formatted_text}</p> | |
| </blockquote> | |
| ''' | |
| elif para_type == "PQ": | |
| return f'<blockquote class="ml-5 text-2xl text-gray-300 mt-7"><p>{formatted_text}</p></blockquote>' | |
| elif para_type == "ULI": | |
| return f'<li class="mt-3">{formatted_text}</li>' | |
| elif para_type == "OLI": | |
| return f'<li class="mt-3">{formatted_text}</li>' | |
| elif para_type == "IFRAME": | |
| iframe_data = paragraph.get("iframe") or {} | |
| media_resource = iframe_data.get("mediaResource") or {} | |
| src = media_resource.get("iframeSrc", "") | |
| width = media_resource.get("iframeWidth", "100%") | |
| height = media_resource.get("iframeHeight", "400") | |
| if src: | |
| return f''' | |
| <div class="mt-7"> | |
| <iframe class="w-full" src="{escape_html(src)}" | |
| width="{width}" height="{height}" | |
| allowfullscreen frameborder="0"></iframe> | |
| </div> | |
| ''' | |
| return "" | |
| elif para_type == "MIXTAPE_EMBED": | |
| mixtape = paragraph.get("mixtapeMetadata") or {} | |
| href = escape_html(mixtape.get("href", "")) | |
| thumbnail = mixtape.get("thumbnailImageId", "") | |
| # Parse title/description from markups | |
| parts = text.split("\n") if text else ["", ""] | |
| embed_title = parts[0] if len(parts) > 0 else "" | |
| embed_desc = parts[1] if len(parts) > 1 else "" | |
| return f''' | |
| <div class="items-center p-2 overflow-hidden border border-gray-600 mt-7"> | |
| <a rel="noopener follow" href="{href}" target="_blank"> | |
| <div class="flex flex-row justify-between p-2 overflow-hidden"> | |
| <div class="flex flex-col justify-center p-2"> | |
| <h2 class="text-base font-bold text-gray-100">{escape_html(embed_title)}</h2> | |
| <div class="block mt-2"> | |
| <h3 class="text-sm text-gray-400">{escape_html(embed_desc)}</h3> | |
| </div> | |
| </div> | |
| <div class="relative flex h-40 flew-row w-60"> | |
| <div class="absolute inset-0 bg-center bg-cover" | |
| style="background-image: url('https://miro.medium.com/v2/resize:fit:800/{thumbnail}');"> | |
| </div> | |
| </div> | |
| </div> | |
| </a> | |
| </div> | |
| ''' | |
| else: | |
| logger.warning(f"Unknown paragraph type: {para_type}") | |
| return f'<p class="mt-7">{formatted_text}</p>' | |
| def render_paragraphs(paragraphs: List[Dict], title: str = "", subtitle: str = "", preview_image_id: str = "") -> str: | |
| """Render all paragraphs to HTML content.""" | |
| if not paragraphs: | |
| return "" | |
| out_parts = [] | |
| i = 0 | |
| while i < len(paragraphs): | |
| para = paragraphs[i] | |
| para_type = para.get("type", "") | |
| para_text = para.get("text", "") | |
| # Skip duplicate title/subtitle in first 4 paragraphs | |
| if i < 4: | |
| if para_type in ["H3", "H4", "H2"] and title and _similarity(para_text, title) > 0.8: | |
| i += 1 | |
| continue | |
| if para_type in ["H4", "P"] and subtitle and _similarity(para_text, subtitle) > 0.8: | |
| i += 1 | |
| continue | |
| if para_type == "IMG": | |
| metadata = para.get("metadata") or {} | |
| if metadata.get("id") == preview_image_id: | |
| i += 1 | |
| continue | |
| # Handle grouped elements (lists, code blocks) | |
| if para_type == "ULI": | |
| list_items = [] | |
| while i < len(paragraphs) and paragraphs[i].get("type") == "ULI": | |
| list_items.append(render_paragraph(paragraphs[i])) | |
| i += 1 | |
| out_parts.append(f'<ul class="pl-8 mt-2 list-disc">{"".join(list_items)}</ul>') | |
| continue | |
| if para_type == "OLI": | |
| list_items = [] | |
| while i < len(paragraphs) and paragraphs[i].get("type") == "OLI": | |
| list_items.append(render_paragraph(paragraphs[i])) | |
| i += 1 | |
| out_parts.append(f'<ol class="pl-8 mt-2 list-decimal">{"".join(list_items)}</ol>') | |
| continue | |
| if para_type == "PRE": | |
| code_blocks = [] | |
| while i < len(paragraphs) and paragraphs[i].get("type") == "PRE": | |
| code_blocks.append(escape_html(paragraphs[i].get("text", ""))) | |
| i += 1 | |
| code_meta = para.get("codeBlockMetadata") or {} | |
| lang = code_meta.get("lang") or "" | |
| lang_class = f"language-{lang}" if lang else "nohighlight" | |
| joined_code = "\n".join(code_blocks) | |
| out_parts.append(f'<pre class="flex flex-col mt-7 border border-gray-700"><code class="p-2 bg-gray-900 overflow-x-auto {lang_class}">{joined_code}</code></pre>') | |
| continue | |
| # Regular paragraph | |
| out_parts.append(render_paragraph(para)) | |
| i += 1 | |
| return "\n".join(out_parts) | |
| def _similarity(s1: str, s2: str) -> float: | |
| """Calculate similarity ratio between two strings.""" | |
| if not s1 or not s2: | |
| return 0.0 | |
| s1, s2 = s1.lower(), s2.lower() | |
| if s1 == s2: | |
| return 1.0 | |
| # Simple character overlap | |
| common = len(set(s1) & set(s2)) | |
| total = len(set(s1) | set(s2)) | |
| return common / total if total > 0 else 0.0 | |
| def render_article_html(article_data: Dict[str, Any]) -> str: | |
| """ | |
| Render article data to HTML content (not full page). | |
| Args: | |
| article_data: Dict with title, author, markdownContent, etc. | |
| Returns: | |
| HTML string for article content | |
| """ | |
| title = escape_html(article_data.get("title", "Untitled")) | |
| subtitle = article_data.get("subtitle", "") | |
| url = escape_html(article_data.get("url", "")) | |
| # Author info | |
| author = article_data.get("author") or {} | |
| if isinstance(author, str): | |
| author = {"name": author} | |
| author_name = escape_html(author.get("name", "Unknown")) | |
| author_username = escape_html(author.get("username", "")) | |
| author_image = author.get("imageId", "1*dmbNkD5D-u45r44go_cf0g.png") | |
| # Collection/publication | |
| collection = article_data.get("publication") or article_data.get("collection") or {} | |
| if isinstance(collection, str): | |
| collection = {"name": collection} | |
| collection_html = "" | |
| if collection and isinstance(collection, dict) and collection.get("name"): | |
| collection_html = f''' | |
| <a href="https://medium.com/{escape_html(collection.get('slug', ''))}" target="_blank" class="flex items-center space-x-1"> | |
| <p>{escape_html(collection.get('name', ''))}</p> | |
| </a> | |
| <span>·</span> | |
| ''' | |
| # Reading time | |
| reading_time = article_data.get("readingTime", 5) | |
| if isinstance(reading_time, float): | |
| reading_time = int(reading_time) | |
| # Free access | |
| is_locked = article_data.get("isLocked", False) | |
| free_access = "No" if is_locked else "Yes" | |
| # Preview image | |
| preview_image_id = article_data.get("previewImageId", "") | |
| preview_image_html = "" | |
| if preview_image_id: | |
| preview_image_html = f''' | |
| <img alt="Preview image" style="max-height: 65vh; width: auto; margin: auto" | |
| loading="eager" referrerpolicy="no-referrer" | |
| src="https://miro.medium.com/v2/resize:fit:1400/{preview_image_id}"> | |
| ''' | |
| # Subtitle | |
| subtitle_html = "" | |
| if subtitle: | |
| subtitle_html = f'<h2 class="pt-1 font-sans font-medium text-gray-400 break-normal text-1xl">{escape_html(subtitle)}</h2>' | |
| # Tags | |
| tags = article_data.get("tags", []) | |
| tags_html = "" | |
| for tag in tags[:10]: | |
| tag_slug = tag.get("normalizedTagSlug", tag) if isinstance(tag, dict) else str(tag) | |
| tag_display = tag.get("displayTitle", tag_slug) if isinstance(tag, dict) else tag_slug | |
| tags_html += f''' | |
| <a title="{escape_html(tag_display)}" target="_blank" href="https://medium.com/tag/{escape_html(tag_slug)}"> | |
| <span class="px-2 py-1 text-xs text-green-400 bg-green-900 rounded-full">#{escape_html(tag_slug)}</span> | |
| </a> | |
| ''' | |
| # Author card | |
| author_card = AUTHOR_CARD_TEMPLATE.format( | |
| username=author_username, | |
| image_id=author_image, | |
| name=author_name, | |
| collection_html=collection_html, | |
| reading_time=reading_time, | |
| free_access=free_access | |
| ) | |
| # Content - try paragraphs first, fallback to markdown | |
| paragraphs = article_data.get("paragraphs", []) | |
| markdown_content = article_data.get("markdownContent", "") | |
| # Smart Detection: If paragraphs contain raw markdown syntax (e.g. from V2 fallback), | |
| # switch to Robust Markdown Rendering for better quality. | |
| use_markdown_renderer = False | |
| if not paragraphs: | |
| use_markdown_renderer = True | |
| elif markdown_content and _is_likely_markdown(paragraphs): | |
| logger.info("Detected raw markdown in paragraphs - switching to Markdown Renderer") | |
| use_markdown_renderer = True | |
| if use_markdown_renderer: | |
| # Convert markdown to robust HTML using V8 engine | |
| # If markdownContent missing but paragraphs present, reconstruct from text | |
| if not markdown_content and paragraphs: | |
| markdown_content = "\n\n".join([p.get("text", "") for p in paragraphs]) | |
| content_html = _markdown_to_html(markdown_content) | |
| else: | |
| content_html = render_paragraphs(paragraphs, title, subtitle, preview_image_id) | |
| # Build article HTML | |
| article_html = ARTICLE_TEMPLATE.format( | |
| url=url, | |
| preview_image=preview_image_html, | |
| title=title, | |
| subtitle_html=subtitle_html, | |
| author_card=author_card, | |
| content=content_html, | |
| tags_html=tags_html | |
| ) | |
| return article_html | |
| def _is_likely_markdown(paragraphs: List[Dict]) -> bool: | |
| """ | |
| Detect if paragraphs are actually just containers for raw markdown. | |
| This happens when the scraper falls back to dumping markdown tokens into the text field. | |
| """ | |
| if not paragraphs: | |
| return False | |
| # Check the first few paragraphs for tell-tale markdown syntax | |
| # that shouldn't appear in clean text | |
| sample_text = "\n".join([p.get("text", "") for p in paragraphs[:8]]) | |
| triggers = [ | |
| "#### ", # Headers | |
| " | |
| def render_full_page(article_data: Dict[str, Any]) -> str: | |
| """ | |
| Render article data to a complete standalone HTML page. | |
| Args: | |
| article_data: Dict with title, author, markdownContent, etc. | |
| Returns: | |
| Complete HTML page string | |
| """ | |
| title = escape_html(article_data.get("title", "Untitled")) | |
| content = render_article_html(article_data) | |
| return BASE_TEMPLATE.format(title=title, content=content) | |
| import markdown as md_lib | |
| def _markdown_to_html(markdown_text: str) -> str: | |
| """Robust markdown to HTML conversion using library.""" | |
| if not markdown_text: | |
| return "" | |
| # Use extra extension for tables, code blocks, etc. | |
| html_content = md_lib.markdown( | |
| markdown_text, | |
| extensions=['extra', 'codehilite', 'nl2br', 'sane_lists', 'fenced_code'], | |
| output_format='html5' | |
| ) | |
| # Post-process for Tailwind/Prose styling matching our CSS | |
| # Enhance headers | |
| html_content = html_content.replace('<h1>', '<h1 class="pt-12 font-bold text-3xl">') | |
| html_content = html_content.replace('<h2>', '<h2 class="pt-12 font-bold text-2xl">') | |
| html_content = html_content.replace('<h3>', '<h3 class="pt-8 font-bold text-xl">') | |
| html_content = html_content.replace('<h4>', '<h4 class="pt-6 font-bold text-lg">') | |
| # Enhance paragraphs (add margin/leading) | |
| html_content = html_content.replace('<p>', '<p class="mt-4 leading-8">') | |
| # Enhance lists | |
| html_content = html_content.replace('<ul>', '<ul class="pl-8 mt-2 list-disc">') | |
| html_content = html_content.replace('<ol>', '<ol class="pl-8 mt-2 list-decimal">') | |
| html_content = html_content.replace('<li>', '<li class="ml-4 mt-1">') | |
| # Enhance blockquotes | |
| html_content = html_content.replace('blockquote>', 'blockquote class="px-5 py-3 mt-5 border-l-4 border-gray-500">') | |
| # Enhance pre/code | |
| html_content = html_content.replace('<pre>', '<pre class="mt-7 border border-gray-700 bg-gray-900 p-4 rounded overflow-x-auto">') | |
| return html_content | |