dhruv575 commited on
Commit
a13876a
·
1 Parent(s): 252f87c

Plethora of updates

Browse files
app/email_new_converter.py CHANGED
@@ -238,6 +238,7 @@ class EmailNewConverter:
238
  is_top_comment_image = False
239
  is_top_news_image = False
240
  is_last_word_image = False
 
241
  img_alt = img.get("alt", "").lower()
242
 
243
  # Check if image is inside a top_news_box table
@@ -251,7 +252,13 @@ class EmailNewConverter:
251
  if parent is not None:
252
  is_last_word_image = True
253
  logger.info(f"Identified last word image (should preserve aspect ratio): {src[:50]}...")
254
-
 
 
 
 
 
 
255
  # Check if image is inside a top_comment_image anchor tag
256
  parent = img.find_parent("a", class_="top_comment_image")
257
  if parent is not None:
@@ -276,6 +283,10 @@ class EmailNewConverter:
276
  elif img_alt == "last word image":
277
  is_last_word_image = True
278
  logger.info(f"Identified last word image by alt text (should preserve aspect ratio): {src[:50]}...")
 
 
 
 
279
  else:
280
  # Check if image is in footer section by looking for footer comment in HTML structure
281
  # Get the HTML string representation of parent elements to check for footer comments
@@ -294,8 +305,8 @@ class EmailNewConverter:
294
  parent = parent.find_parent()
295
  depth += 1
296
 
297
- # Combine checks - if it's footer, top comment, top news, or last word, don't make square
298
- is_non_square_image = is_footer_image or is_top_comment_image or is_top_news_image or is_last_word_image
299
 
300
  # Check if image is already on Cloudinary
301
  is_cloudinary = "res.cloudinary.com" in src
@@ -349,7 +360,7 @@ class EmailNewConverter:
349
 
350
  # For non-square images (footer, top comment, top news, last word), preserve aspect ratio - no transformations
351
  if is_non_square_image:
352
- image_type = "footer" if is_footer_image else ("top comment" if is_top_comment_image else ("last word" if is_last_word_image else "top news"))
353
  logger.info(f"{image_type.capitalize()} image processed (preserving aspect ratio): {src[:50]}...")
354
  img["src"] = cloudinary_url
355
  else:
@@ -564,7 +575,6 @@ class EmailNewConverter:
564
  max-width: 100% !important;
565
  }
566
  .top_news_box,
567
- .story_box,
568
  .news_box,
569
  .ending_box,
570
  .new_box,
 
238
  is_top_comment_image = False
239
  is_top_news_image = False
240
  is_last_word_image = False
241
+ is_whale_image = False
242
  img_alt = img.get("alt", "").lower()
243
 
244
  # Check if image is inside a top_news_box table
 
252
  if parent is not None:
253
  is_last_word_image = True
254
  logger.info(f"Identified last word image (should preserve aspect ratio): {src[:50]}...")
255
+
256
+ # Check if image is inside a whale_box table and whale_image_wrapper div
257
+ parent = img.find_parent("div", class_="whale_image_wrapper")
258
+ if parent is not None:
259
+ is_whale_image = True
260
+ logger.info(f"Identified whale image (should preserve aspect ratio): {src[:50]}...")
261
+
262
  # Check if image is inside a top_comment_image anchor tag
263
  parent = img.find_parent("a", class_="top_comment_image")
264
  if parent is not None:
 
283
  elif img_alt == "last word image":
284
  is_last_word_image = True
285
  logger.info(f"Identified last word image by alt text (should preserve aspect ratio): {src[:50]}...")
286
+ # Whale images can be identified by alt text
287
+ elif img_alt == "whale image":
288
+ is_whale_image = True
289
+ logger.info(f"Identified whale image by alt text (should preserve aspect ratio): {src[:50]}...")
290
  else:
291
  # Check if image is in footer section by looking for footer comment in HTML structure
292
  # Get the HTML string representation of parent elements to check for footer comments
 
305
  parent = parent.find_parent()
306
  depth += 1
307
 
308
+ # Combine checks - if it's footer, top comment, top news, last word, or whale, don't make square
309
+ is_non_square_image = is_footer_image or is_top_comment_image or is_top_news_image or is_last_word_image or is_whale_image
310
 
311
  # Check if image is already on Cloudinary
312
  is_cloudinary = "res.cloudinary.com" in src
 
360
 
361
  # For non-square images (footer, top comment, top news, last word), preserve aspect ratio - no transformations
362
  if is_non_square_image:
363
+ image_type = "footer" if is_footer_image else ("top comment" if is_top_comment_image else ("last word" if is_last_word_image else ("whale" if is_whale_image else "top news")))
364
  logger.info(f"{image_type.capitalize()} image processed (preserving aspect ratio): {src[:50]}...")
365
  img["src"] = cloudinary_url
366
  else:
 
575
  max-width: 100% !important;
576
  }
577
  .top_news_box,
 
578
  .news_box,
579
  .ending_box,
580
  .new_box,
app/main.py CHANGED
@@ -304,32 +304,32 @@ async def post_process_email(
304
  async def post_note_to_substack(
305
  request: dict = Body(...),
306
  ) -> dict:
307
- """Post a note with link attachment to Substack.
308
-
309
  Request body:
310
- hostname: Substack hostname (e.g., "your-publication.substack.com") (required)
311
  sid: Substack session ID cookie value (required)
312
- url: URL for the link attachment (required)
 
313
  bodyJson: ProseMirror document structure for the note content (required)
314
  """
315
  try:
316
  hostname = request.get("hostname", "").strip()
317
  sid = request.get("sid", "").strip()
318
  url = request.get("url", "").strip()
 
319
  body_json = request.get("bodyJson")
320
-
321
  if not hostname:
322
  raise HTTPException(status_code=400, detail="Missing 'hostname' in request body")
323
  if not sid:
324
  raise HTTPException(status_code=400, detail="Missing 'sid' in request body")
325
- if not url:
326
- raise HTTPException(status_code=400, detail="Missing 'url' in request body")
327
  if not body_json:
328
  raise HTTPException(status_code=400, detail="Missing 'bodyJson' in request body")
329
-
330
  base_url = f"https://{hostname}"
331
  cookies = {"substack.sid": sid}
332
-
333
  # Headers to make request look like it's coming from a browser
334
  headers = {
335
  "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
@@ -339,54 +339,95 @@ async def post_note_to_substack(
339
  "Origin": base_url,
340
  "Referer": f"{base_url}/",
341
  }
342
-
343
- # Step 1: Create attachment
344
- # Use curl_cffi to impersonate Chrome for better TLS fingerprint matching
345
- logger.info(f"Creating Substack attachment for URL: {url}")
346
- attachment_response = curl_requests.post(
347
- f"{base_url}/api/v1/comment/attachment",
348
- json={"url": url, "type": "link"},
349
- headers=headers,
350
- cookies=cookies,
351
- timeout=30,
352
- impersonate="chrome110", # Impersonate Chrome 110 for better compatibility
353
- )
354
-
355
- if not attachment_response.ok:
356
- error_text = attachment_response.text
357
- logger.error(f"Substack attachment creation failed: {attachment_response.status_code} - {error_text}")
358
- raise HTTPException(
359
- status_code=attachment_response.status_code,
360
- detail=f"Failed to create attachment: {error_text[:200]}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
361
  )
362
-
363
- attachment_data = attachment_response.json()
364
- attachment_id = attachment_data.get("id")
365
-
366
- if not attachment_id:
367
- logger.error(f"No attachment ID in response: {attachment_data}")
368
- raise HTTPException(status_code=500, detail="No attachment ID returned from Substack")
369
-
370
- logger.info(f"Attachment created with ID: {attachment_id}")
371
-
 
 
 
372
  # Step 2: Publish note
373
- # Use curl_cffi to impersonate Chrome for better TLS fingerprint matching
374
  logger.info("Publishing note to Substack")
 
 
 
 
 
 
 
 
 
375
  feed_response = curl_requests.post(
376
  f"{base_url}/api/v1/comment/feed",
377
- json={
378
- "bodyJson": body_json,
379
- "attachmentIds": [attachment_id],
380
- "tabId": "for-you",
381
- "surface": "feed",
382
- "replyMinimumRole": "everyone",
383
- },
384
  headers=headers,
385
  cookies=cookies,
386
  timeout=30,
387
- impersonate="chrome110", # Impersonate Chrome 110 for better compatibility
388
  )
389
-
390
  if not feed_response.ok:
391
  error_text = feed_response.text
392
  logger.error(f"Substack note publishing failed: {feed_response.status_code} - {error_text}")
@@ -394,16 +435,16 @@ async def post_note_to_substack(
394
  status_code=feed_response.status_code,
395
  detail=f"Failed to publish note: {error_text[:200]}"
396
  )
397
-
398
  feed_data = feed_response.json()
399
  logger.info("Note published successfully to Substack")
400
-
401
  return {
402
  "success": True,
403
- "attachmentId": attachment_id,
404
  "noteId": feed_data.get("id"),
405
  }
406
-
407
  except HTTPException:
408
  raise
409
  except (requests.RequestException, curl_requests.RequestException) as e:
@@ -414,6 +455,26 @@ async def post_note_to_substack(
414
  raise HTTPException(status_code=500, detail=f"Error posting to Substack: {str(e)}")
415
 
416
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
417
  @app.post("/api/menu-data/fetch")
418
  async def fetch_menu_data(
419
  request: dict = Body(...),
 
304
  async def post_note_to_substack(
305
  request: dict = Body(...),
306
  ) -> dict:
307
+ """Post a note with optional link attachment to Substack.
308
+
309
  Request body:
310
+ hostname: Substack hostname (e.g., "substack.com") (required)
311
  sid: Substack session ID cookie value (required)
312
+ url: URL for the link attachment (optional - if empty, no link attachment is created)
313
+ imageUrl: External image URL to upload and attach to the note (optional)
314
  bodyJson: ProseMirror document structure for the note content (required)
315
  """
316
  try:
317
  hostname = request.get("hostname", "").strip()
318
  sid = request.get("sid", "").strip()
319
  url = request.get("url", "").strip()
320
+ image_url = request.get("imageUrl", "").strip()
321
  body_json = request.get("bodyJson")
322
+
323
  if not hostname:
324
  raise HTTPException(status_code=400, detail="Missing 'hostname' in request body")
325
  if not sid:
326
  raise HTTPException(status_code=400, detail="Missing 'sid' in request body")
 
 
327
  if not body_json:
328
  raise HTTPException(status_code=400, detail="Missing 'bodyJson' in request body")
329
+
330
  base_url = f"https://{hostname}"
331
  cookies = {"substack.sid": sid}
332
+
333
  # Headers to make request look like it's coming from a browser
334
  headers = {
335
  "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
 
339
  "Origin": base_url,
340
  "Referer": f"{base_url}/",
341
  }
342
+
343
+ attachment_ids = []
344
+
345
+ # Step 1a: If image URL provided, upload it to Substack and create image attachment
346
+ if image_url:
347
+ try:
348
+ logger.info(f"Downloading image from: {image_url}")
349
+ img_response = curl_requests.get(
350
+ image_url,
351
+ timeout=30,
352
+ impersonate="chrome110",
353
+ )
354
+ if img_response.ok:
355
+ import base64
356
+ img_bytes = img_response.content
357
+ content_type = img_response.headers.get("content-type", "image/jpeg")
358
+ # Only use the main content type (strip charset etc.)
359
+ content_type = content_type.split(";")[0].strip()
360
+ b64_data = base64.b64encode(img_bytes).decode("utf-8")
361
+ data_uri = f"data:{content_type};base64,{b64_data}"
362
+
363
+ logger.info("Uploading image to Substack")
364
+ img_upload_response = curl_requests.post(
365
+ f"{base_url}/api/v1/image",
366
+ json={"image": data_uri},
367
+ headers=headers,
368
+ cookies=cookies,
369
+ timeout=30,
370
+ impersonate="chrome110",
371
+ )
372
+ if img_upload_response.ok:
373
+ img_data = img_upload_response.json()
374
+ cdn_url = img_data.get("url", "")
375
+ logger.info(f"Image uploaded to Substack CDN: {cdn_url}")
376
+
377
+ # Patch the bodyJson to use the CDN URL instead of external URL
378
+ if cdn_url:
379
+ _patch_image_src(body_json, image_url, cdn_url, img_data)
380
+ else:
381
+ logger.warning(f"Image upload failed ({img_upload_response.status_code}), note will be posted without image")
382
+ else:
383
+ logger.warning(f"Image download failed ({img_response.status_code}), note will be posted without image")
384
+ except Exception as img_err:
385
+ logger.warning(f"Image processing failed: {img_err}, note will be posted without image")
386
+
387
+ # Step 1b: Create link attachment if URL provided
388
+ if url:
389
+ logger.info(f"Creating Substack attachment for URL: {url}")
390
+ attachment_response = curl_requests.post(
391
+ f"{base_url}/api/v1/comment/attachment",
392
+ json={"url": url, "type": "link"},
393
+ headers=headers,
394
+ cookies=cookies,
395
+ timeout=30,
396
+ impersonate="chrome110",
397
  )
398
+
399
+ if attachment_response.ok:
400
+ attachment_data = attachment_response.json()
401
+ attachment_id = attachment_data.get("id")
402
+ if attachment_id:
403
+ attachment_ids.append(attachment_id)
404
+ logger.info(f"Attachment created with ID: {attachment_id}")
405
+ else:
406
+ logger.warning("No attachment ID in response, continuing without link attachment")
407
+ else:
408
+ error_text = attachment_response.text
409
+ logger.warning(f"Attachment creation failed ({attachment_response.status_code}), continuing without link attachment: {error_text[:200]}")
410
+
411
  # Step 2: Publish note
 
412
  logger.info("Publishing note to Substack")
413
+ feed_payload = {
414
+ "bodyJson": body_json,
415
+ "tabId": "for-you",
416
+ "surface": "feed",
417
+ "replyMinimumRole": "everyone",
418
+ }
419
+ if attachment_ids:
420
+ feed_payload["attachmentIds"] = attachment_ids
421
+
422
  feed_response = curl_requests.post(
423
  f"{base_url}/api/v1/comment/feed",
424
+ json=feed_payload,
 
 
 
 
 
 
425
  headers=headers,
426
  cookies=cookies,
427
  timeout=30,
428
+ impersonate="chrome110",
429
  )
430
+
431
  if not feed_response.ok:
432
  error_text = feed_response.text
433
  logger.error(f"Substack note publishing failed: {feed_response.status_code} - {error_text}")
 
435
  status_code=feed_response.status_code,
436
  detail=f"Failed to publish note: {error_text[:200]}"
437
  )
438
+
439
  feed_data = feed_response.json()
440
  logger.info("Note published successfully to Substack")
441
+
442
  return {
443
  "success": True,
444
+ "attachmentIds": attachment_ids,
445
  "noteId": feed_data.get("id"),
446
  }
447
+
448
  except HTTPException:
449
  raise
450
  except (requests.RequestException, curl_requests.RequestException) as e:
 
455
  raise HTTPException(status_code=500, detail=f"Error posting to Substack: {str(e)}")
456
 
457
 
458
+ def _patch_image_src(body_json: dict, original_src: str, cdn_url: str, img_data: dict):
459
+ """Walk the ProseMirror document and replace external image src with Substack CDN URL."""
460
+ if not isinstance(body_json, dict):
461
+ return
462
+ if body_json.get("type") == "image2":
463
+ attrs = body_json.get("attrs", {})
464
+ if attrs.get("src") == original_src or not attrs.get("src"):
465
+ attrs["src"] = cdn_url
466
+ if img_data.get("imageWidth"):
467
+ attrs["width"] = img_data["imageWidth"]
468
+ if img_data.get("imageHeight"):
469
+ attrs["height"] = img_data["imageHeight"]
470
+ if img_data.get("bytes"):
471
+ attrs["bytes"] = img_data["bytes"]
472
+ if img_data.get("contentType"):
473
+ attrs["type"] = img_data["contentType"]
474
+ for child in body_json.get("content", []):
475
+ _patch_image_src(child, original_src, cdn_url, img_data)
476
+
477
+
478
  @app.post("/api/menu-data/fetch")
479
  async def fetch_menu_data(
480
  request: dict = Body(...),
app/polygraph-email.html CHANGED
@@ -99,7 +99,7 @@
99
  color:#111827;
100
  padding-bottom:8px;
101
  ">
102
- Top Stories
103
  </td>
104
  </tr>
105
  </table>
@@ -113,9 +113,11 @@
113
  ">
114
  <tr>
115
  <td style="padding:16px;">
116
- <a href="https://polymarket.com" style="text-decoration:none; color:inherit; display:block;">
117
- <img src="https://yourcdn.com/polygraph/top-news.png" alt="Top news image" style="display:block; width:100%; height:auto; border-radius:8px; margin-bottom:12px;" />
118
- </a>
 
 
119
  <a href="https://polymarket.com" style="text-decoration:none; color:inherit; display:block;">
120
  <p style="
121
  margin:0 0 8px 0;
@@ -234,61 +236,6 @@
234
  </tr>
235
  </table>
236
 
237
- <!-- STORY CARD 1 -->
238
- <table class="story_box" role="presentation" width="100%" cellpadding="0" cellspacing="0" border="0" style="
239
- margin-bottom:16px;
240
- border-radius:14px;
241
- border:1px solid #2E5CFF;
242
- background-color:#FFFFFF;
243
- ">
244
- <tr>
245
- <td style="padding:16px;">
246
- <p style="
247
- margin:0 0 8px 0;
248
- font-family:'Open Sauce One', -apple-system, BlinkMacSystemFont, 'Segoe UI', Arial, sans-serif;
249
- font-size:16px;
250
- line-height:24px;
251
- font-weight:700;
252
- color:#2E5CFF;
253
- ">
254
- Shutdown End in Sight?
255
- </p>
256
- <p style="
257
- margin:0 0 16px 0;
258
- font-family:'SF Pro Text', -apple-system, BlinkMacSystemFont, 'Segoe UI', Arial, sans-serif;
259
- font-size:17px;
260
- line-height:22px;
261
- letter-spacing:-0.43px;
262
- color:#212121;
263
- ">
264
- With new bipartisan talks in the Senate, Polymarket traders are shortening
265
- their estimate for the length of the government shutdown.
266
- </p>
267
- <table role="presentation" cellpadding="0" cellspacing="0" border="0" width="100%">
268
- <tr>
269
- <td align="right">
270
- <a href="https://polymarket.com"
271
- style="
272
- display:inline-block;
273
- text-align:center;
274
- text-decoration:none;
275
- font-family:'SF Pro Text', -apple-system, BlinkMacSystemFont, 'Segoe UI', Arial, sans-serif;
276
- font-size:15px;
277
- line-height:20px;
278
- letter-spacing:-0.23px;
279
- color:#2E5CFF;
280
- border-radius:8px;
281
- border:1px solid #2E5CFF;
282
- padding:8px 14px;
283
- ">
284
- Check odds →
285
- </a>
286
- </td>
287
- </tr>
288
- </table>
289
- </td>
290
- </tr>
291
- </table>
292
  </div>
293
 
294
  <!-- SECTION: WALLET WATCH -->
@@ -946,6 +893,13 @@
946
  </table>
947
  </td>
948
  </tr>
 
 
 
 
 
 
 
949
  </table>
950
 
951
  <!-- SEE ALL WHALE MOVES -->
 
99
  color:#111827;
100
  padding-bottom:8px;
101
  ">
102
+ Top News
103
  </td>
104
  </tr>
105
  </table>
 
113
  ">
114
  <tr>
115
  <td style="padding:16px;">
116
+ <div class="top_news_image_wrapper">
117
+ <a href="https://polymarket.com" style="text-decoration:none; color:inherit; display:block;">
118
+ <img src="https://yourcdn.com/polygraph/top-news.png" alt="Top news image" style="display:block; width:100%; height:auto; border-radius:8px; margin-bottom:12px;" />
119
+ </a>
120
+ </div>
121
  <a href="https://polymarket.com" style="text-decoration:none; color:inherit; display:block;">
122
  <p style="
123
  margin:0 0 8px 0;
 
236
  </tr>
237
  </table>
238
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
239
  </div>
240
 
241
  <!-- SECTION: WALLET WATCH -->
 
893
  </table>
894
  </td>
895
  </tr>
896
+ <tr>
897
+ <td style="padding:0 12px 12px 12px;">
898
+ <div class="whale_image_wrapper">
899
+ <img src="https://yourcdn.com/polygraph/whale-move.png" alt="Whale move image" style="display:block; width:100%; height:auto; border-radius:8px;" />
900
+ </div>
901
+ </td>
902
+ </tr>
903
  </table>
904
 
905
  <!-- SEE ALL WHALE MOVES -->
app/post_process.py CHANGED
@@ -107,8 +107,7 @@ def append_signup_modal_to_selective_links(html_content: str) -> str:
107
  - Header image link (alt="Polygraph by Polymarket")
108
  - Footer logo link (alt="Polymarket")
109
  - Top news box links (class="top_news_box")
110
- - Top stories box links (class="story_box")
111
-
112
  This should be called BEFORE dubification, so the dubified links
113
  will point to URLs that already have the signup modal parameter.
114
  """
@@ -195,37 +194,6 @@ def append_signup_modal_to_selective_links(html_content: str) -> str:
195
  link["href"] = urlunparse(new_parsed)
196
  modified_count += 1
197
 
198
- # Find story_box links
199
- story_boxes = soup.find_all("table", class_="story_box")
200
- for box in story_boxes:
201
- # Find the wrapper anchor or links inside
202
- box_link = box.find_parent("a")
203
- if box_link and box_link.get("href"):
204
- href = box_link["href"]
205
- if "polymarket.com" in href:
206
- parsed = urlparse(href)
207
- query_params = parse_qs(parsed.query, keep_blank_values=True)
208
- query_params['modal'] = ['signup']
209
- query_params['td'] = ['9']
210
- new_query = urlencode(query_params, doseq=True)
211
- new_parsed = parsed._replace(query=new_query)
212
- box_link["href"] = urlunparse(new_parsed)
213
- modified_count += 1
214
-
215
- # Also check for "Read more" or "Check odds" link inside the box
216
- action_link = box.find("a", string=re.compile("Read more|Check odds"))
217
- if action_link and action_link.get("href"):
218
- href = action_link["href"]
219
- if "polymarket.com" in href:
220
- parsed = urlparse(href)
221
- query_params = parse_qs(parsed.query, keep_blank_values=True)
222
- query_params['modal'] = ['signup']
223
- query_params['td'] = ['9']
224
- new_query = urlencode(query_params, doseq=True)
225
- new_parsed = parsed._replace(query=new_query)
226
- action_link["href"] = urlunparse(new_parsed)
227
- modified_count += 1
228
-
229
  print(f" Appended signup modal to {modified_count} selective link(s)")
230
  return str(soup)
231
 
@@ -364,20 +332,6 @@ def fix_header_centering(html_content: str) -> str:
364
  return html_content
365
 
366
 
367
- def fix_story_box_headlines(html_content: str) -> str:
368
- """Add margin-top:0 to story box h4 elements to prevent empty line above headlines."""
369
- # Match h4 in story boxes that have margin-bottom:8px but no margin-top:0
370
- def fix_h4(match):
371
- full = match.group(0)
372
- if 'margin-top:0' in full:
373
- return full
374
- # Insert margin-top:0 after margin-bottom:8px
375
- return full.replace("margin-bottom:8px'", "margin-bottom:8px; margin-top:0'")
376
-
377
- html_content = re.sub(r"<h4 style='[^']*margin-bottom:8px'[^>]*>", fix_h4, html_content)
378
- return html_content
379
-
380
-
381
  def fix_unsubscribe_link(html_content: str) -> str:
382
  """Fix unsubscribe link href to use Customer.io merge tag.
383
 
@@ -708,21 +662,16 @@ def minify_html(html_content: str) -> str:
708
  if 'margin:0 auto' in html_content:
709
  print(f" Fixed header centering (added margin:0 auto)")
710
 
711
- # Step 4: Fix story box headlines (add margin-top:0 to prevent empty line)
712
- html_content = fix_story_box_headlines(html_content)
713
- if "margin-top:0" in html_content:
714
- print(f" Fixed story box headlines (added margin-top:0)")
715
-
716
- # Step 5: Fix unsubscribe link href (update from # to {% manage_subscription_preferences_url %})
717
  html_content = fix_unsubscribe_link(html_content)
718
 
719
- # Step 6: Fix Cloudinary image URLs (add transformations to market images)
720
  html_content = fix_cloudinary_image_transformations(html_content)
721
 
722
- # Step 7: Fix section header alignment for mobile
723
  html_content = fix_section_header_alignment(html_content)
724
 
725
- # Step 8: Add spaces before links (ensures proper spacing in email clients)
726
  html_content = add_link_spacing(html_content)
727
 
728
  final_size = len(html_content)
 
107
  - Header image link (alt="Polygraph by Polymarket")
108
  - Footer logo link (alt="Polymarket")
109
  - Top news box links (class="top_news_box")
110
+
 
111
  This should be called BEFORE dubification, so the dubified links
112
  will point to URLs that already have the signup modal parameter.
113
  """
 
194
  link["href"] = urlunparse(new_parsed)
195
  modified_count += 1
196
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
197
  print(f" Appended signup modal to {modified_count} selective link(s)")
198
  return str(soup)
199
 
 
332
  return html_content
333
 
334
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
335
  def fix_unsubscribe_link(html_content: str) -> str:
336
  """Fix unsubscribe link href to use Customer.io merge tag.
337
 
 
662
  if 'margin:0 auto' in html_content:
663
  print(f" Fixed header centering (added margin:0 auto)")
664
 
665
+ # Step 4: Fix unsubscribe link href (update from # to {% manage_subscription_preferences_url %})
 
 
 
 
 
666
  html_content = fix_unsubscribe_link(html_content)
667
 
668
+ # Step 5: Fix Cloudinary image URLs (add transformations to market images)
669
  html_content = fix_cloudinary_image_transformations(html_content)
670
 
671
+ # Step 6: Fix section header alignment for mobile
672
  html_content = fix_section_header_alignment(html_content)
673
 
674
+ # Step 7: Add spaces before links (ensures proper spacing in email clients)
675
  html_content = add_link_spacing(html_content)
676
 
677
  final_size = len(html_content)
app/templateify_new_service.py CHANGED
@@ -27,7 +27,6 @@ class TemplateifyNewService:
27
  a:has(.markets_box),
28
  a:has(.sports_box),
29
  a:has(.whale_box),
30
- a:has(.story_box),
31
  a:has(.top_news_box),
32
  a:has(.top_comment_image),
33
  a:has(.last_word_box),
@@ -268,80 +267,40 @@ class TemplateifyNewService:
268
  logger.info("Wrapped footer logo in anchor tag with hardcoded URL")
269
  break
270
 
271
- def top_story_transform(node: Tag) -> None:
272
- # Find and preserve the "Check odds" link BEFORE wrapping in parent anchor
273
- # Look for anchor tags within the table structure
274
- inner_link = node.find("a")
275
-
276
- # Now wrap entire story_box table in an anchor tag if not already wrapped
277
- if not node.find_parent("a"):
278
- link = soup.new_tag("a", href="{{URL}}", target="_blank", rel="noopener noreferrer")
279
- link["class"] = ["market-link"]
280
- node.wrap(link)
281
- # Update the href on the wrapper link
282
- parent_link = node.find_parent("a")
283
- if parent_link:
284
- parent_link["href"] = "{{URL}}"
285
- if "class" not in parent_link.attrs:
286
- parent_link["class"] = []
287
- if "market-link" not in parent_link["class"]:
288
- parent_link["class"].append("market-link")
289
-
290
- # Find headline (first p with blue color) and body (second p)
291
- paragraphs = node.find_all("p")
292
- if len(paragraphs) >= 1:
293
- # First paragraph is the headline (has blue color #2E5CFF)
294
- paragraphs[0].clear()
295
- paragraphs[0].append("{{HEADLINE}}")
296
- register("{{HEADLINE}}", "Top story headline")
297
- if len(paragraphs) >= 2:
298
- # Second paragraph is the body
299
- paragraphs[1].clear()
300
- paragraphs[1].append("{{BODY}}")
301
- register("{{BODY}}", "Top story body")
302
-
303
- # Convert the "Check odds" link to a span to avoid nested anchors (invalid HTML)
304
- # The whole story_box is already wrapped in an anchor, so we can't have another anchor inside
305
- if inner_link:
306
- # Get the text content and original styling
307
- link_text = inner_link.get_text()
308
- original_style = inner_link.get("style", "")
309
-
310
- # Create a new span element to replace the anchor
311
- span = soup.new_tag("span")
312
- span.string = link_text
313
-
314
- # Preserve the original styling from the link
315
- if original_style:
316
- span["style"] = original_style
317
-
318
- # Add any classes that were on the link
319
- if inner_link.get("class"):
320
- span["class"] = inner_link["class"]
321
-
322
- # Replace the anchor with the span
323
- inner_link.replace_with(span)
324
-
325
- logger.info("Converted 'Check odds' link to styled span to avoid nested anchors")
326
-
327
  def top_news_transform(node: Tag) -> None:
328
  # Image, title, description, and "Read more" button all link to the same URL
329
  # Quick Links remain independent with their own URLs
330
-
331
  # Initialize anchor variables
332
  img_anchor = None
333
  title_anchor = None
334
  desc_anchor = None
335
-
336
- # Find and tokenize the image (wrapped in anchor)
337
- img = node.find("img")
338
- if img:
339
- img["src"] = "{{TOP_NEWS_IMAGE}}"
340
- register("{{TOP_NEWS_IMAGE}}", "Top news image URL")
341
- # Find the anchor wrapping the image and tokenize its href
342
- img_anchor = img.find_parent("a")
343
- if img_anchor:
344
- img_anchor["href"] = "{{URL}}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
345
 
346
  # Find headline and description paragraphs (both wrapped in anchors)
347
  paragraphs = node.find_all("p")
@@ -441,13 +400,6 @@ class TemplateifyNewService:
441
  "Top news block.",
442
  )
443
 
444
- loopify(
445
- ".top_stories .story_box",
446
- "TOP_STORIES",
447
- top_story_transform,
448
- "Top stories block.",
449
- )
450
-
451
  def wallet_watch_transform(node: Tag) -> None:
452
  # Wrap entire wallet_watch_box table in an anchor tag if not already wrapped
453
  if not node.find_parent("a"):
@@ -867,6 +819,20 @@ class TemplateifyNewService:
867
  for span in spans[1:]:
868
  span.decompose()
869
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
870
  # Register WALLET_URL token (the main box link now points to wallet)
871
  register("{{WALLET_URL}}", "Whale wallet/profile URL")
872
 
@@ -888,7 +854,7 @@ class TemplateifyNewService:
888
  ".markets": ("HAS_HOT_MARKETS", ".wrap_h", "a.see_all"),
889
  ".sports": ("HAS_SPORTS_EVENTS", ".wrap_h", "a.see_all"),
890
  ".whales": ("HAS_WHALE_MOVES", "h2", "a.see_all"), # Whale moves doesn't have wrap_h, just h2
891
- # Note: .top_stories doesn't need a conditional wrapper - TOP_NEWS and TOP_STORIES loops are already conditional
892
  ".comments": ("HAS_TOP_COMMENTS", ".wrap_h", None), # Comments doesn't have "See all" link
893
  ".last_word": ("HAS_LAST_WORD", "h2", None), # Last word has h2 heading, no "See all" link
894
  }
 
27
  a:has(.markets_box),
28
  a:has(.sports_box),
29
  a:has(.whale_box),
 
30
  a:has(.top_news_box),
31
  a:has(.top_comment_image),
32
  a:has(.last_word_box),
 
267
  logger.info("Wrapped footer logo in anchor tag with hardcoded URL")
268
  break
269
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
270
  def top_news_transform(node: Tag) -> None:
271
  # Image, title, description, and "Read more" button all link to the same URL
272
  # Quick Links remain independent with their own URLs
273
+
274
  # Initialize anchor variables
275
  img_anchor = None
276
  title_anchor = None
277
  desc_anchor = None
278
+
279
+ # Find and tokenize the image, wrap in conditional
280
+ img_wrapper = node.find("div", class_="top_news_image_wrapper")
281
+ if img_wrapper:
282
+ # Wrap the entire wrapper div in conditional
283
+ opening_tag = NavigableString("{{#TOP_NEWS_IMAGE}}")
284
+ closing_tag = NavigableString("{{/TOP_NEWS_IMAGE}}")
285
+ img_wrapper.insert_before(opening_tag)
286
+ img_wrapper.insert_after(closing_tag)
287
+
288
+ img = img_wrapper.find("img")
289
+ if img:
290
+ img["src"] = "{{TOP_NEWS_IMAGE}}"
291
+ register("{{TOP_NEWS_IMAGE}}", "Top news image URL (optional)")
292
+ img_anchor = img.find_parent("a")
293
+ if img_anchor:
294
+ img_anchor["href"] = "{{URL}}"
295
+ else:
296
+ # Fallback: find img directly if wrapper doesn't exist
297
+ img = node.find("img")
298
+ if img:
299
+ img["src"] = "{{TOP_NEWS_IMAGE}}"
300
+ register("{{TOP_NEWS_IMAGE}}", "Top news image URL (optional)")
301
+ img_anchor = img.find_parent("a")
302
+ if img_anchor:
303
+ img_anchor["href"] = "{{URL}}"
304
 
305
  # Find headline and description paragraphs (both wrapped in anchors)
306
  paragraphs = node.find_all("p")
 
400
  "Top news block.",
401
  )
402
 
 
 
 
 
 
 
 
403
  def wallet_watch_transform(node: Tag) -> None:
404
  # Wrap entire wallet_watch_box table in an anchor tag if not already wrapped
405
  if not node.find_parent("a"):
 
819
  for span in spans[1:]:
820
  span.decompose()
821
 
822
+ # Find and tokenize the optional whale image, wrap in conditional
823
+ whale_img_wrapper = node.find("div", class_="whale_image_wrapper")
824
+ if whale_img_wrapper:
825
+ # Wrap the entire wrapper div in conditional
826
+ opening_tag = NavigableString("{{#WHALE_IMAGE}}")
827
+ closing_tag = NavigableString("{{/WHALE_IMAGE}}")
828
+ whale_img_wrapper.insert_before(opening_tag)
829
+ whale_img_wrapper.insert_after(closing_tag)
830
+
831
+ whale_img = whale_img_wrapper.find("img")
832
+ if whale_img:
833
+ whale_img["src"] = "{{WHALE_IMAGE}}"
834
+ register("{{WHALE_IMAGE}}", "Whale move image URL (optional)")
835
+
836
  # Register WALLET_URL token (the main box link now points to wallet)
837
  register("{{WALLET_URL}}", "Whale wallet/profile URL")
838
 
 
854
  ".markets": ("HAS_HOT_MARKETS", ".wrap_h", "a.see_all"),
855
  ".sports": ("HAS_SPORTS_EVENTS", ".wrap_h", "a.see_all"),
856
  ".whales": ("HAS_WHALE_MOVES", "h2", "a.see_all"), # Whale moves doesn't have wrap_h, just h2
857
+ # Note: .top_stories doesn't need a conditional wrapper - TOP_NEWS loop is already conditional
858
  ".comments": ("HAS_TOP_COMMENTS", ".wrap_h", None), # Comments doesn't have "See all" link
859
  ".last_word": ("HAS_LAST_WORD", "h2", None), # Last word has h2 heading, no "See all" link
860
  }