Spaces:

droov
/

polygrapher

Sleeping

App Files Files Community

dhruv575 commited on Feb 12

Commit

a13876a

1 Parent(s): 252f87c

Plethora of updates

Browse files

Files changed (5) hide show

app/email_new_converter.py +15 -5
app/main.py +113 -52
app/polygraph-email.html +13 -59
app/post_process.py +5 -56
app/templateify_new_service.py +42 -76

app/email_new_converter.py CHANGED Viewed

@@ -238,6 +238,7 @@ class EmailNewConverter:
             is_top_comment_image = False
             is_top_news_image = False
             is_last_word_image = False
             img_alt = img.get("alt", "").lower()
             # Check if image is inside a top_news_box table
@@ -251,7 +252,13 @@ class EmailNewConverter:
             if parent is not None:
                 is_last_word_image = True
                 logger.info(f"Identified last word image (should preserve aspect ratio): {src[:50]}...")
             # Check if image is inside a top_comment_image anchor tag
             parent = img.find_parent("a", class_="top_comment_image")
             if parent is not None:
@@ -276,6 +283,10 @@ class EmailNewConverter:
             elif img_alt == "last word image":
                 is_last_word_image = True
                 logger.info(f"Identified last word image by alt text (should preserve aspect ratio): {src[:50]}...")
             else:
                 # Check if image is in footer section by looking for footer comment in HTML structure
                 # Get the HTML string representation of parent elements to check for footer comments
@@ -294,8 +305,8 @@ class EmailNewConverter:
                     parent = parent.find_parent()
                     depth += 1
-            # Combine checks - if it's footer, top comment, top news, or last word, don't make square
-            is_non_square_image = is_footer_image or is_top_comment_image or is_top_news_image or is_last_word_image
             # Check if image is already on Cloudinary
             is_cloudinary = "res.cloudinary.com" in src
@@ -349,7 +360,7 @@ class EmailNewConverter:
                 # For non-square images (footer, top comment, top news, last word), preserve aspect ratio - no transformations
                 if is_non_square_image:
-                    image_type = "footer" if is_footer_image else ("top comment" if is_top_comment_image else ("last word" if is_last_word_image else "top news"))
                     logger.info(f"{image_type.capitalize()} image processed (preserving aspect ratio): {src[:50]}...")
                     img["src"] = cloudinary_url
                 else:
@@ -564,7 +575,6 @@ class EmailNewConverter:
     max-width: 100% !important;
   }
   .top_news_box,
-  .story_box,
   .news_box,
   .ending_box,
   .new_box,

             is_top_comment_image = False
             is_top_news_image = False
             is_last_word_image = False
+            is_whale_image = False
             img_alt = img.get("alt", "").lower()
             # Check if image is inside a top_news_box table
             if parent is not None:
                 is_last_word_image = True
                 logger.info(f"Identified last word image (should preserve aspect ratio): {src[:50]}...")
+            # Check if image is inside a whale_box table and whale_image_wrapper div
+            parent = img.find_parent("div", class_="whale_image_wrapper")
+            if parent is not None:
+                is_whale_image = True
+                logger.info(f"Identified whale image (should preserve aspect ratio): {src[:50]}...")
             # Check if image is inside a top_comment_image anchor tag
             parent = img.find_parent("a", class_="top_comment_image")
             if parent is not None:
             elif img_alt == "last word image":
                 is_last_word_image = True
                 logger.info(f"Identified last word image by alt text (should preserve aspect ratio): {src[:50]}...")
+            # Whale images can be identified by alt text
+            elif img_alt == "whale image":
+                is_whale_image = True
+                logger.info(f"Identified whale image by alt text (should preserve aspect ratio): {src[:50]}...")
             else:
                 # Check if image is in footer section by looking for footer comment in HTML structure
                 # Get the HTML string representation of parent elements to check for footer comments
                     parent = parent.find_parent()
                     depth += 1
+            # Combine checks - if it's footer, top comment, top news, last word, or whale, don't make square
+            is_non_square_image = is_footer_image or is_top_comment_image or is_top_news_image or is_last_word_image or is_whale_image
             # Check if image is already on Cloudinary
             is_cloudinary = "res.cloudinary.com" in src
                 # For non-square images (footer, top comment, top news, last word), preserve aspect ratio - no transformations
                 if is_non_square_image:
+                    image_type = "footer" if is_footer_image else ("top comment" if is_top_comment_image else ("last word" if is_last_word_image else ("whale" if is_whale_image else "top news")))
                     logger.info(f"{image_type.capitalize()} image processed (preserving aspect ratio): {src[:50]}...")
                     img["src"] = cloudinary_url
                 else:
     max-width: 100% !important;
   }
   .top_news_box,
   .news_box,
   .ending_box,
   .new_box,

app/main.py CHANGED Viewed

@@ -304,32 +304,32 @@ async def post_process_email(
 async def post_note_to_substack(
     request: dict = Body(...),
 ) -> dict:
-    """Post a note with link attachment to Substack.
     Request body:
-        hostname: Substack hostname (e.g., "your-publication.substack.com") (required)
         sid: Substack session ID cookie value (required)
-        url: URL for the link attachment (required)
         bodyJson: ProseMirror document structure for the note content (required)
     """
     try:
         hostname = request.get("hostname", "").strip()
         sid = request.get("sid", "").strip()
         url = request.get("url", "").strip()
         body_json = request.get("bodyJson")
         if not hostname:
             raise HTTPException(status_code=400, detail="Missing 'hostname' in request body")
         if not sid:
             raise HTTPException(status_code=400, detail="Missing 'sid' in request body")
-        if not url:
-            raise HTTPException(status_code=400, detail="Missing 'url' in request body")
         if not body_json:
             raise HTTPException(status_code=400, detail="Missing 'bodyJson' in request body")
         base_url = f"https://{hostname}"
         cookies = {"substack.sid": sid}
         # Headers to make request look like it's coming from a browser
         headers = {
             "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
@@ -339,54 +339,95 @@ async def post_note_to_substack(
             "Origin": base_url,
             "Referer": f"{base_url}/",
         }
-        # Step 1: Create attachment
-        # Use curl_cffi to impersonate Chrome for better TLS fingerprint matching
-        logger.info(f"Creating Substack attachment for URL: {url}")
-        attachment_response = curl_requests.post(
-            f"{base_url}/api/v1/comment/attachment",
-            json={"url": url, "type": "link"},
-            headers=headers,
-            cookies=cookies,
-            timeout=30,
-            impersonate="chrome110",  # Impersonate Chrome 110 for better compatibility
-        )
-        if not attachment_response.ok:
-            error_text = attachment_response.text
-            logger.error(f"Substack attachment creation failed: {attachment_response.status_code} - {error_text}")
-            raise HTTPException(
-                status_code=attachment_response.status_code,
-                detail=f"Failed to create attachment: {error_text[:200]}"
             )
-        attachment_data = attachment_response.json()
-        attachment_id = attachment_data.get("id")
-        if not attachment_id:
-            logger.error(f"No attachment ID in response: {attachment_data}")
-            raise HTTPException(status_code=500, detail="No attachment ID returned from Substack")
-        logger.info(f"Attachment created with ID: {attachment_id}")
         # Step 2: Publish note
-        # Use curl_cffi to impersonate Chrome for better TLS fingerprint matching
         logger.info("Publishing note to Substack")
         feed_response = curl_requests.post(
             f"{base_url}/api/v1/comment/feed",
-            json={
-                "bodyJson": body_json,
-                "attachmentIds": [attachment_id],
-                "tabId": "for-you",
-                "surface": "feed",
-                "replyMinimumRole": "everyone",
-            },
             headers=headers,
             cookies=cookies,
             timeout=30,
-            impersonate="chrome110",  # Impersonate Chrome 110 for better compatibility
         )
         if not feed_response.ok:
             error_text = feed_response.text
             logger.error(f"Substack note publishing failed: {feed_response.status_code} - {error_text}")
@@ -394,16 +435,16 @@ async def post_note_to_substack(
                 status_code=feed_response.status_code,
                 detail=f"Failed to publish note: {error_text[:200]}"
             )
         feed_data = feed_response.json()
         logger.info("Note published successfully to Substack")
         return {
             "success": True,
-            "attachmentId": attachment_id,
             "noteId": feed_data.get("id"),
         }
     except HTTPException:
         raise
     except (requests.RequestException, curl_requests.RequestException) as e:
@@ -414,6 +455,26 @@ async def post_note_to_substack(
         raise HTTPException(status_code=500, detail=f"Error posting to Substack: {str(e)}")
 @app.post("/api/menu-data/fetch")
 async def fetch_menu_data(
     request: dict = Body(...),

 async def post_note_to_substack(
     request: dict = Body(...),
 ) -> dict:
+    """Post a note with optional link attachment to Substack.
     Request body:
+        hostname: Substack hostname (e.g., "substack.com") (required)
         sid: Substack session ID cookie value (required)
+        url: URL for the link attachment (optional - if empty, no link attachment is created)
+        imageUrl: External image URL to upload and attach to the note (optional)
         bodyJson: ProseMirror document structure for the note content (required)
     """
     try:
         hostname = request.get("hostname", "").strip()
         sid = request.get("sid", "").strip()
         url = request.get("url", "").strip()
+        image_url = request.get("imageUrl", "").strip()
         body_json = request.get("bodyJson")
         if not hostname:
             raise HTTPException(status_code=400, detail="Missing 'hostname' in request body")
         if not sid:
             raise HTTPException(status_code=400, detail="Missing 'sid' in request body")
         if not body_json:
             raise HTTPException(status_code=400, detail="Missing 'bodyJson' in request body")
         base_url = f"https://{hostname}"
         cookies = {"substack.sid": sid}
         # Headers to make request look like it's coming from a browser
         headers = {
             "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
             "Origin": base_url,
             "Referer": f"{base_url}/",
         }
+        attachment_ids = []
+        # Step 1a: If image URL provided, upload it to Substack and create image attachment
+        if image_url:
+            try:
+                logger.info(f"Downloading image from: {image_url}")
+                img_response = curl_requests.get(
+                    image_url,
+                    timeout=30,
+                    impersonate="chrome110",
+                )
+                if img_response.ok:
+                    import base64
+                    img_bytes = img_response.content
+                    content_type = img_response.headers.get("content-type", "image/jpeg")
+                    # Only use the main content type (strip charset etc.)
+                    content_type = content_type.split(";")[0].strip()
+                    b64_data = base64.b64encode(img_bytes).decode("utf-8")
+                    data_uri = f"data:{content_type};base64,{b64_data}"
+                    logger.info("Uploading image to Substack")
+                    img_upload_response = curl_requests.post(
+                        f"{base_url}/api/v1/image",
+                        json={"image": data_uri},
+                        headers=headers,
+                        cookies=cookies,
+                        timeout=30,
+                        impersonate="chrome110",
+                    )
+                    if img_upload_response.ok:
+                        img_data = img_upload_response.json()
+                        cdn_url = img_data.get("url", "")
+                        logger.info(f"Image uploaded to Substack CDN: {cdn_url}")
+                        # Patch the bodyJson to use the CDN URL instead of external URL
+                        if cdn_url:
+                            _patch_image_src(body_json, image_url, cdn_url, img_data)
+                    else:
+                        logger.warning(f"Image upload failed ({img_upload_response.status_code}), note will be posted without image")
+                else:
+                    logger.warning(f"Image download failed ({img_response.status_code}), note will be posted without image")
+            except Exception as img_err:
+                logger.warning(f"Image processing failed: {img_err}, note will be posted without image")
+        # Step 1b: Create link attachment if URL provided
+        if url:
+            logger.info(f"Creating Substack attachment for URL: {url}")
+            attachment_response = curl_requests.post(
+                f"{base_url}/api/v1/comment/attachment",
+                json={"url": url, "type": "link"},
+                headers=headers,
+                cookies=cookies,
+                timeout=30,
+                impersonate="chrome110",
             )
+            if attachment_response.ok:
+                attachment_data = attachment_response.json()
+                attachment_id = attachment_data.get("id")
+                if attachment_id:
+                    attachment_ids.append(attachment_id)
+                    logger.info(f"Attachment created with ID: {attachment_id}")
+                else:
+                    logger.warning("No attachment ID in response, continuing without link attachment")
+            else:
+                error_text = attachment_response.text
+                logger.warning(f"Attachment creation failed ({attachment_response.status_code}), continuing without link attachment: {error_text[:200]}")
         # Step 2: Publish note
         logger.info("Publishing note to Substack")
+        feed_payload = {
+            "bodyJson": body_json,
+            "tabId": "for-you",
+            "surface": "feed",
+            "replyMinimumRole": "everyone",
+        }
+        if attachment_ids:
+            feed_payload["attachmentIds"] = attachment_ids
         feed_response = curl_requests.post(
             f"{base_url}/api/v1/comment/feed",
+            json=feed_payload,
             headers=headers,
             cookies=cookies,
             timeout=30,
+            impersonate="chrome110",
         )
         if not feed_response.ok:
             error_text = feed_response.text
             logger.error(f"Substack note publishing failed: {feed_response.status_code} - {error_text}")
                 status_code=feed_response.status_code,
                 detail=f"Failed to publish note: {error_text[:200]}"
             )
         feed_data = feed_response.json()
         logger.info("Note published successfully to Substack")
         return {
             "success": True,
+            "attachmentIds": attachment_ids,
             "noteId": feed_data.get("id"),
         }
     except HTTPException:
         raise
     except (requests.RequestException, curl_requests.RequestException) as e:
         raise HTTPException(status_code=500, detail=f"Error posting to Substack: {str(e)}")
+def _patch_image_src(body_json: dict, original_src: str, cdn_url: str, img_data: dict):
+    """Walk the ProseMirror document and replace external image src with Substack CDN URL."""
+    if not isinstance(body_json, dict):
+        return
+    if body_json.get("type") == "image2":
+        attrs = body_json.get("attrs", {})
+        if attrs.get("src") == original_src or not attrs.get("src"):
+            attrs["src"] = cdn_url
+            if img_data.get("imageWidth"):
+                attrs["width"] = img_data["imageWidth"]
+            if img_data.get("imageHeight"):
+                attrs["height"] = img_data["imageHeight"]
+            if img_data.get("bytes"):
+                attrs["bytes"] = img_data["bytes"]
+            if img_data.get("contentType"):
+                attrs["type"] = img_data["contentType"]
+    for child in body_json.get("content", []):
+        _patch_image_src(child, original_src, cdn_url, img_data)
 @app.post("/api/menu-data/fetch")
 async def fetch_menu_data(
     request: dict = Body(...),

app/polygraph-email.html CHANGED Viewed

@@ -99,7 +99,7 @@
                         color:#111827;
                         padding-bottom:8px;
                       ">
-                        Top Stories
                       </td>
                     </tr>
                   </table>
@@ -113,9 +113,11 @@
                   ">
                     <tr>
                       <td style="padding:16px;">
-                        <a href="https://polymarket.com" style="text-decoration:none; color:inherit; display:block;">
-                          <img src="https://yourcdn.com/polygraph/top-news.png" alt="Top news image" style="display:block; width:100%; height:auto; border-radius:8px; margin-bottom:12px;" />
-                        </a>
                         <a href="https://polymarket.com" style="text-decoration:none; color:inherit; display:block;">
                           <p style="
                             margin:0 0 8px 0;
@@ -234,61 +236,6 @@
                     </tr>
                   </table>
-                  <!-- STORY CARD 1 -->
-                  <table class="story_box" role="presentation" width="100%" cellpadding="0" cellspacing="0" border="0" style="
-                    margin-bottom:16px;
-                    border-radius:14px;
-                    border:1px solid #2E5CFF;
-                    background-color:#FFFFFF;
-                  ">
-                    <tr>
-                      <td style="padding:16px;">
-                        <p style="
-                          margin:0 0 8px 0;
-                          font-family:'Open Sauce One', -apple-system, BlinkMacSystemFont, 'Segoe UI', Arial, sans-serif;
-                          font-size:16px;
-                          line-height:24px;
-                          font-weight:700;
-                          color:#2E5CFF;
-                        ">
-                          Shutdown End in Sight?
-                        </p>
-                        <p style="
-                          margin:0 0 16px 0;
-                          font-family:'SF Pro Text', -apple-system, BlinkMacSystemFont, 'Segoe UI', Arial, sans-serif;
-                          font-size:17px;
-                          line-height:22px;
-                          letter-spacing:-0.43px;
-                          color:#212121;
-                        ">
-                          With new bipartisan talks in the Senate, Polymarket traders are shortening
-                          their estimate for the length of the government shutdown.
-                        </p>
-                        <table role="presentation" cellpadding="0" cellspacing="0" border="0" width="100%">
-                          <tr>
-                            <td align="right">
-                              <a href="https://polymarket.com"
-                                 style="
-                                   display:inline-block;
-                                   text-align:center;
-                                   text-decoration:none;
-                                   font-family:'SF Pro Text', -apple-system, BlinkMacSystemFont, 'Segoe UI', Arial, sans-serif;
-                                   font-size:15px;
-                                   line-height:20px;
-                                   letter-spacing:-0.23px;
-                                   color:#2E5CFF;
-                                   border-radius:8px;
-                                   border:1px solid #2E5CFF;
-                                   padding:8px 14px;
-                                 ">
-                                Check odds →
-                              </a>
-                            </td>
-                          </tr>
-                        </table>
-                      </td>
-                    </tr>
-                  </table>
                   </div>
                   <!-- SECTION: WALLET WATCH -->
@@ -946,6 +893,13 @@
                         </table>
                       </td>
                     </tr>
                   </table>
                   <!-- SEE ALL WHALE MOVES -->

                         color:#111827;
                         padding-bottom:8px;
                       ">
+                        Top News
                       </td>
                     </tr>
                   </table>
                   ">
                     <tr>
                       <td style="padding:16px;">
+                        <div class="top_news_image_wrapper">
+                          <a href="https://polymarket.com" style="text-decoration:none; color:inherit; display:block;">
+                            <img src="https://yourcdn.com/polygraph/top-news.png" alt="Top news image" style="display:block; width:100%; height:auto; border-radius:8px; margin-bottom:12px;" />
+                          </a>
+                        </div>
                         <a href="https://polymarket.com" style="text-decoration:none; color:inherit; display:block;">
                           <p style="
                             margin:0 0 8px 0;
                     </tr>
                   </table>
                   </div>
                   <!-- SECTION: WALLET WATCH -->
                         </table>
                       </td>
                     </tr>
+                    <tr>
+                      <td style="padding:0 12px 12px 12px;">
+                        <div class="whale_image_wrapper">
+                          <img src="https://yourcdn.com/polygraph/whale-move.png" alt="Whale move image" style="display:block; width:100%; height:auto; border-radius:8px;" />
+                        </div>
+                      </td>
+                    </tr>
                   </table>
                   <!-- SEE ALL WHALE MOVES -->

app/post_process.py CHANGED Viewed

@@ -107,8 +107,7 @@ def append_signup_modal_to_selective_links(html_content: str) -> str:
     - Header image link (alt="Polygraph by Polymarket")
     - Footer logo link (alt="Polymarket")
     - Top news box links (class="top_news_box")
-    - Top stories box links (class="story_box")
     This should be called BEFORE dubification, so the dubified links
     will point to URLs that already have the signup modal parameter.
     """
@@ -195,37 +194,6 @@ def append_signup_modal_to_selective_links(html_content: str) -> str:
                         link["href"] = urlunparse(new_parsed)
                         modified_count += 1
-    # Find story_box links
-    story_boxes = soup.find_all("table", class_="story_box")
-    for box in story_boxes:
-        # Find the wrapper anchor or links inside
-        box_link = box.find_parent("a")
-        if box_link and box_link.get("href"):
-            href = box_link["href"]
-            if "polymarket.com" in href:
-                parsed = urlparse(href)
-                query_params = parse_qs(parsed.query, keep_blank_values=True)
-                query_params['modal'] = ['signup']
-                query_params['td'] = ['9']
-                new_query = urlencode(query_params, doseq=True)
-                new_parsed = parsed._replace(query=new_query)
-                box_link["href"] = urlunparse(new_parsed)
-                modified_count += 1
-        # Also check for "Read more" or "Check odds" link inside the box
-        action_link = box.find("a", string=re.compile("Read more|Check odds"))
-        if action_link and action_link.get("href"):
-            href = action_link["href"]
-            if "polymarket.com" in href:
-                parsed = urlparse(href)
-                query_params = parse_qs(parsed.query, keep_blank_values=True)
-                query_params['modal'] = ['signup']
-                query_params['td'] = ['9']
-                new_query = urlencode(query_params, doseq=True)
-                new_parsed = parsed._replace(query=new_query)
-                action_link["href"] = urlunparse(new_parsed)
-                modified_count += 1
     print(f"  Appended signup modal to {modified_count} selective link(s)")
     return str(soup)
@@ -364,20 +332,6 @@ def fix_header_centering(html_content: str) -> str:
     return html_content
-def fix_story_box_headlines(html_content: str) -> str:
-    """Add margin-top:0 to story box h4 elements to prevent empty line above headlines."""
-    # Match h4 in story boxes that have margin-bottom:8px but no margin-top:0
-    def fix_h4(match):
-        full = match.group(0)
-        if 'margin-top:0' in full:
-            return full
-        # Insert margin-top:0 after margin-bottom:8px
-        return full.replace("margin-bottom:8px'", "margin-bottom:8px; margin-top:0'")
-    html_content = re.sub(r"<h4 style='[^']*margin-bottom:8px'[^>]*>", fix_h4, html_content)
-    return html_content
 def fix_unsubscribe_link(html_content: str) -> str:
     """Fix unsubscribe link href to use Customer.io merge tag.
@@ -708,21 +662,16 @@ def minify_html(html_content: str) -> str:
     if 'margin:0 auto' in html_content:
         print(f"  Fixed header centering (added margin:0 auto)")
-    # Step 4: Fix story box headlines (add margin-top:0 to prevent empty line)
-    html_content = fix_story_box_headlines(html_content)
-    if "margin-top:0" in html_content:
-        print(f"  Fixed story box headlines (added margin-top:0)")
-    # Step 5: Fix unsubscribe link href (update from # to {% manage_subscription_preferences_url %})
     html_content = fix_unsubscribe_link(html_content)
-    # Step 6: Fix Cloudinary image URLs (add transformations to market images)
     html_content = fix_cloudinary_image_transformations(html_content)
-    # Step 7: Fix section header alignment for mobile
     html_content = fix_section_header_alignment(html_content)
-    # Step 8: Add spaces before links (ensures proper spacing in email clients)
     html_content = add_link_spacing(html_content)
     final_size = len(html_content)

     - Header image link (alt="Polygraph by Polymarket")
     - Footer logo link (alt="Polymarket")
     - Top news box links (class="top_news_box")
     This should be called BEFORE dubification, so the dubified links
     will point to URLs that already have the signup modal parameter.
     """
                         link["href"] = urlunparse(new_parsed)
                         modified_count += 1
     print(f"  Appended signup modal to {modified_count} selective link(s)")
     return str(soup)
     return html_content
 def fix_unsubscribe_link(html_content: str) -> str:
     """Fix unsubscribe link href to use Customer.io merge tag.
     if 'margin:0 auto' in html_content:
         print(f"  Fixed header centering (added margin:0 auto)")
+    # Step 4: Fix unsubscribe link href (update from # to {% manage_subscription_preferences_url %})
     html_content = fix_unsubscribe_link(html_content)
+    # Step 5: Fix Cloudinary image URLs (add transformations to market images)
     html_content = fix_cloudinary_image_transformations(html_content)
+    # Step 6: Fix section header alignment for mobile
     html_content = fix_section_header_alignment(html_content)
+    # Step 7: Add spaces before links (ensures proper spacing in email clients)
     html_content = add_link_spacing(html_content)
     final_size = len(html_content)

app/templateify_new_service.py CHANGED Viewed

@@ -27,7 +27,6 @@ class TemplateifyNewService:
                 a:has(.markets_box),
                 a:has(.sports_box),
                 a:has(.whale_box),
-                a:has(.story_box),
                 a:has(.top_news_box),
                 a:has(.top_comment_image),
                 a:has(.last_word_box),
@@ -268,80 +267,40 @@ class TemplateifyNewService:
                     logger.info("Wrapped footer logo in anchor tag with hardcoded URL")
                 break
-        def top_story_transform(node: Tag) -> None:
-            # Find and preserve the "Check odds" link BEFORE wrapping in parent anchor
-            # Look for anchor tags within the table structure
-            inner_link = node.find("a")
-            # Now wrap entire story_box table in an anchor tag if not already wrapped
-            if not node.find_parent("a"):
-                link = soup.new_tag("a", href="{{URL}}", target="_blank", rel="noopener noreferrer")
-                link["class"] = ["market-link"]
-                node.wrap(link)
-            # Update the href on the wrapper link
-            parent_link = node.find_parent("a")
-            if parent_link:
-                parent_link["href"] = "{{URL}}"
-                if "class" not in parent_link.attrs:
-                    parent_link["class"] = []
-                if "market-link" not in parent_link["class"]:
-                    parent_link["class"].append("market-link")
-            # Find headline (first p with blue color) and body (second p)
-            paragraphs = node.find_all("p")
-            if len(paragraphs) >= 1:
-                # First paragraph is the headline (has blue color #2E5CFF)
-                paragraphs[0].clear()
-                paragraphs[0].append("{{HEADLINE}}")
-                register("{{HEADLINE}}", "Top story headline")
-            if len(paragraphs) >= 2:
-                # Second paragraph is the body
-                paragraphs[1].clear()
-                paragraphs[1].append("{{BODY}}")
-                register("{{BODY}}", "Top story body")
-            # Convert the "Check odds" link to a span to avoid nested anchors (invalid HTML)
-            # The whole story_box is already wrapped in an anchor, so we can't have another anchor inside
-            if inner_link:
-                # Get the text content and original styling
-                link_text = inner_link.get_text()
-                original_style = inner_link.get("style", "")
-                # Create a new span element to replace the anchor
-                span = soup.new_tag("span")
-                span.string = link_text
-                # Preserve the original styling from the link
-                if original_style:
-                    span["style"] = original_style
-                # Add any classes that were on the link
-                if inner_link.get("class"):
-                    span["class"] = inner_link["class"]
-                # Replace the anchor with the span
-                inner_link.replace_with(span)
-                logger.info("Converted 'Check odds' link to styled span to avoid nested anchors")
         def top_news_transform(node: Tag) -> None:
             # Image, title, description, and "Read more" button all link to the same URL
             # Quick Links remain independent with their own URLs
             # Initialize anchor variables
             img_anchor = None
             title_anchor = None
             desc_anchor = None
-            # Find and tokenize the image (wrapped in anchor)
-            img = node.find("img")
-            if img:
-                img["src"] = "{{TOP_NEWS_IMAGE}}"
-                register("{{TOP_NEWS_IMAGE}}", "Top news image URL")
-                # Find the anchor wrapping the image and tokenize its href
-                img_anchor = img.find_parent("a")
-                if img_anchor:
-                    img_anchor["href"] = "{{URL}}"
             # Find headline and description paragraphs (both wrapped in anchors)
             paragraphs = node.find_all("p")
@@ -441,13 +400,6 @@ class TemplateifyNewService:
             "Top news block.",
         )
-        loopify(
-            ".top_stories .story_box",
-            "TOP_STORIES",
-            top_story_transform,
-            "Top stories block.",
-        )
         def wallet_watch_transform(node: Tag) -> None:
             # Wrap entire wallet_watch_box table in an anchor tag if not already wrapped
             if not node.find_parent("a"):
@@ -867,6 +819,20 @@ class TemplateifyNewService:
                         for span in spans[1:]:
                             span.decompose()
             # Register WALLET_URL token (the main box link now points to wallet)
             register("{{WALLET_URL}}", "Whale wallet/profile URL")
@@ -888,7 +854,7 @@ class TemplateifyNewService:
             ".markets": ("HAS_HOT_MARKETS", ".wrap_h", "a.see_all"),
             ".sports": ("HAS_SPORTS_EVENTS", ".wrap_h", "a.see_all"),
             ".whales": ("HAS_WHALE_MOVES", "h2", "a.see_all"),  # Whale moves doesn't have wrap_h, just h2
-            # Note: .top_stories doesn't need a conditional wrapper - TOP_NEWS and TOP_STORIES loops are already conditional
             ".comments": ("HAS_TOP_COMMENTS", ".wrap_h", None),  # Comments doesn't have "See all" link
             ".last_word": ("HAS_LAST_WORD", "h2", None),  # Last word has h2 heading, no "See all" link
         }

                 a:has(.markets_box),
                 a:has(.sports_box),
                 a:has(.whale_box),
                 a:has(.top_news_box),
                 a:has(.top_comment_image),
                 a:has(.last_word_box),
                     logger.info("Wrapped footer logo in anchor tag with hardcoded URL")
                 break
         def top_news_transform(node: Tag) -> None:
             # Image, title, description, and "Read more" button all link to the same URL
             # Quick Links remain independent with their own URLs
             # Initialize anchor variables
             img_anchor = None
             title_anchor = None
             desc_anchor = None
+            # Find and tokenize the image, wrap in conditional
+            img_wrapper = node.find("div", class_="top_news_image_wrapper")
+            if img_wrapper:
+                # Wrap the entire wrapper div in conditional
+                opening_tag = NavigableString("{{#TOP_NEWS_IMAGE}}")
+                closing_tag = NavigableString("{{/TOP_NEWS_IMAGE}}")
+                img_wrapper.insert_before(opening_tag)
+                img_wrapper.insert_after(closing_tag)
+                img = img_wrapper.find("img")
+                if img:
+                    img["src"] = "{{TOP_NEWS_IMAGE}}"
+                    register("{{TOP_NEWS_IMAGE}}", "Top news image URL (optional)")
+                    img_anchor = img.find_parent("a")
+                    if img_anchor:
+                        img_anchor["href"] = "{{URL}}"
+            else:
+                # Fallback: find img directly if wrapper doesn't exist
+                img = node.find("img")
+                if img:
+                    img["src"] = "{{TOP_NEWS_IMAGE}}"
+                    register("{{TOP_NEWS_IMAGE}}", "Top news image URL (optional)")
+                    img_anchor = img.find_parent("a")
+                    if img_anchor:
+                        img_anchor["href"] = "{{URL}}"
             # Find headline and description paragraphs (both wrapped in anchors)
             paragraphs = node.find_all("p")
             "Top news block.",
         )
         def wallet_watch_transform(node: Tag) -> None:
             # Wrap entire wallet_watch_box table in an anchor tag if not already wrapped
             if not node.find_parent("a"):
                         for span in spans[1:]:
                             span.decompose()
+            # Find and tokenize the optional whale image, wrap in conditional
+            whale_img_wrapper = node.find("div", class_="whale_image_wrapper")
+            if whale_img_wrapper:
+                # Wrap the entire wrapper div in conditional
+                opening_tag = NavigableString("{{#WHALE_IMAGE}}")
+                closing_tag = NavigableString("{{/WHALE_IMAGE}}")
+                whale_img_wrapper.insert_before(opening_tag)
+                whale_img_wrapper.insert_after(closing_tag)
+                whale_img = whale_img_wrapper.find("img")
+                if whale_img:
+                    whale_img["src"] = "{{WHALE_IMAGE}}"
+                    register("{{WHALE_IMAGE}}", "Whale move image URL (optional)")
             # Register WALLET_URL token (the main box link now points to wallet)
             register("{{WALLET_URL}}", "Whale wallet/profile URL")
             ".markets": ("HAS_HOT_MARKETS", ".wrap_h", "a.see_all"),
             ".sports": ("HAS_SPORTS_EVENTS", ".wrap_h", "a.see_all"),
             ".whales": ("HAS_WHALE_MOVES", "h2", "a.see_all"),  # Whale moves doesn't have wrap_h, just h2
+            # Note: .top_stories doesn't need a conditional wrapper - TOP_NEWS loop is already conditional
             ".comments": ("HAS_TOP_COMMENTS", ".wrap_h", None),  # Comments doesn't have "See all" link
             ".last_word": ("HAS_LAST_WORD", "h2", None),  # Last word has h2 heading, no "See all" link
         }