Spaces:

T0X1N
/

Medium-MCP

Sleeping

Nikhil Pravin Pise commited on Dec 25, 2025

Commit

60742a2

1 Parent(s): f41a2fa

fix: Upgrade Medium images to high resolution (1400px) across entire app

- Added centralized upgrade_medium_image_url() and get_medium_image_url() to utils.py
- Set MEDIUM_IMAGE_DEFAULT_WIDTH = 1400 as single source of truth
- Updated parser.py to automatically upgrade search/tag result images
- Updated paragraph_parser.py get_image_url() to use 1400px default
- Updated html_renderer.py image sizes (700->1400px, 320->800px)
- Updated app.py to use centralized image upgrade from utils.py

Fixes low-resolution image thumbnails in search results and article previews

Files changed (5) hide show

app.py +4 -1
src/html_renderer.py +6 -3
src/paragraph_parser.py +6 -3
src/parser.py +8 -0
src/utils.py +81 -0

app.py CHANGED Viewed

@@ -54,6 +54,7 @@ load_dotenv(os.path.join(os.path.dirname(__file__), ".env"))
 from src.service import ScraperService
 # Import renderer for explicit usage
 from src.html_renderer import render_full_page, BASE_TEMPLATE as RENDERER_TEMPLATE
 from src.config import MCPConfig
 from elevenlabs_voices import ELEVENLABS_VOICES, VOICE_CATEGORIES, get_voice_id
 # Import Gemini for Analyst (backup)
@@ -709,7 +710,9 @@ def render_cards(results, query: str = ""):
         url = art.get('url', '#')
         author = art.get('author', 'Unknown')
         if isinstance(author, dict): author = author.get('name', 'Unknown')
-        img = art.get('imageUrl', '') or 'https://miro.medium.com/max/1400/1*jfdwtvU6V6g99q3G7gq7dQ.png'
         html += f"""
         <a href='{url}' target='_blank' class='aether-card'>

 from src.service import ScraperService
 # Import renderer for explicit usage
 from src.html_renderer import render_full_page, BASE_TEMPLATE as RENDERER_TEMPLATE
+from src.utils import upgrade_medium_image_url
 from src.config import MCPConfig
 from elevenlabs_voices import ELEVENLABS_VOICES, VOICE_CATEGORIES, get_voice_id
 # Import Gemini for Analyst (backup)
         url = art.get('url', '#')
         author = art.get('author', 'Unknown')
         if isinstance(author, dict): author = author.get('name', 'Unknown')
+        # Ensure high-resolution image (upgrade any low-res URLs)
+        raw_img = art.get('imageUrl', '') or 'https://miro.medium.com/v2/resize:fit:1400/1*jfdwtvU6V6g99q3G7gq7dQ.png'
+        img = upgrade_medium_image_url(raw_img, target_width=1400)
         html += f"""
         <a href='{url}' target='_blank' class='aether-card'>

src/html_renderer.py CHANGED Viewed

@@ -9,6 +9,9 @@ import html
 from typing import Dict, List, Any, Optional
 import logging
 logger = logging.getLogger("HTMLRenderer")
 # Base HTML template for standalone page
@@ -295,7 +298,7 @@ def render_paragraph(paragraph: Dict, is_code: bool = False) -> str:
         <div class="mt-7">
             <img loading="eager" alt="{alt}" class="pt-5 m-auto"
                  referrerpolicy="no-referrer"
-                 src="https://miro.medium.com/v2/resize:fit:700/{image_id}">
         </div>
         '''
         if caption:
@@ -363,7 +366,7 @@ def render_paragraph(paragraph: Dict, is_code: bool = False) -> str:
                     </div>
                     <div class="relative flex h-40 flew-row w-60">
                         <div class="absolute inset-0 bg-center bg-cover"
-                             style="background-image: url('https://miro.medium.com/v2/resize:fit:320/{thumbnail}');">
                         </div>
                     </div>
                 </div>
@@ -504,7 +507,7 @@ def render_article_html(article_data: Dict[str, Any]) -> str:
         preview_image_html = f'''
         <img alt="Preview image" style="max-height: 65vh; width: auto; margin: auto"
              loading="eager" referrerpolicy="no-referrer"
-             src="https://miro.medium.com/v2/resize:fit:700/{preview_image_id}">
         '''
     # Subtitle

 from typing import Dict, List, Any, Optional
 import logging
+# Import centralized image URL utilities
+from src.utils import MEDIUM_IMAGE_DEFAULT_WIDTH
 logger = logging.getLogger("HTMLRenderer")
 # Base HTML template for standalone page
         <div class="mt-7">
             <img loading="eager" alt="{alt}" class="pt-5 m-auto"
                  referrerpolicy="no-referrer"
+                 src="https://miro.medium.com/v2/resize:fit:1400/{image_id}">
         </div>
         '''
         if caption:
                     </div>
                     <div class="relative flex h-40 flew-row w-60">
                         <div class="absolute inset-0 bg-center bg-cover"
+                             style="background-image: url('https://miro.medium.com/v2/resize:fit:800/{thumbnail}');">
                         </div>
                     </div>
                 </div>
         preview_image_html = f'''
         <img alt="Preview image" style="max-height: 65vh; width: auto; margin: auto"
              loading="eager" referrerpolicy="no-referrer"
+             src="https://miro.medium.com/v2/resize:fit:1400/{preview_image_id}">
         '''
     # Subtitle

src/paragraph_parser.py CHANGED Viewed

@@ -12,6 +12,9 @@ import logging
 import re
 from typing import Dict, List, Optional, Tuple
 try:
     import tld
     HAS_TLD = True
@@ -123,9 +126,9 @@ class MarkupProcessor:
         return text
-def get_image_url(image_id: str, width: int = 700) -> str:
-    """Build Medium image URL from image ID."""
-    return f"https://miro.medium.com/v2/resize:fit:{width}/{image_id}"
 def parse_paragraphs_to_markdown(

 import re
 from typing import Dict, List, Optional, Tuple
+# Import centralized image URL utilities
+from src.utils import get_medium_image_url, MEDIUM_IMAGE_DEFAULT_WIDTH
 try:
     import tld
     HAS_TLD = True
         return text
+def get_image_url(image_id: str, width: int = MEDIUM_IMAGE_DEFAULT_WIDTH) -> str:
+    """Build Medium image URL from image ID. Uses high-res by default."""
+    return get_medium_image_url(image_id, width)
 def parse_paragraphs_to_markdown(

src/parser.py CHANGED Viewed

@@ -1,8 +1,13 @@
 from bs4 import BeautifulSoup
 from typing import Dict, List, Optional, Any
 from markdownify import markdownify as md
 from urllib.parse import urljoin
 def extract_search_results(soup: BeautifulSoup, base_url: str) -> List[Dict[str, Any]]:
     """
     Extracts article metadata from search result cards.
@@ -133,6 +138,9 @@ def _extract_from_card(card, base_url: str) -> Dict[str, Any]:
         if img_tag and img_tag.get("src"):
             image_url = img_tag["src"]
     return {
         "url": url,
         "title": title,

+import re
 from bs4 import BeautifulSoup
 from typing import Dict, List, Optional, Any
 from markdownify import markdownify as md
 from urllib.parse import urljoin
+# Import centralized image URL utilities from utils
+from src.utils import upgrade_medium_image_url, get_medium_image_url, MEDIUM_IMAGE_DEFAULT_WIDTH
 def extract_search_results(soup: BeautifulSoup, base_url: str) -> List[Dict[str, Any]]:
     """
     Extracts article metadata from search result cards.
         if img_tag and img_tag.get("src"):
             image_url = img_tag["src"]
+    # Upgrade image URL to high resolution
+    image_url = upgrade_medium_image_url(image_url, target_width=1400)
     return {
         "url": url,
         "title": title,

src/utils.py CHANGED Viewed

@@ -194,6 +194,87 @@ def make_absolute_url(url: str, base_url: str) -> str:
     return urljoin(base_url, url)
 # =============================================================================
 # HASH UTILITIES
 # =============================================================================

     return urljoin(base_url, url)
+# Default high resolution width for Medium images
+MEDIUM_IMAGE_DEFAULT_WIDTH = 1400
+def upgrade_medium_image_url(url: str, target_width: int = MEDIUM_IMAGE_DEFAULT_WIDTH) -> str:
+    """
+    Upgrades a Medium image URL to a higher resolution.
+    Medium uses CDN URLs like:
+    - https://miro.medium.com/v2/resize:fit:320/{image_id}
+    - https://miro.medium.com/v2/resize:fill:88:88/{image_id}
+    - https://miro.medium.com/max/320/{image_id} (older format)
+    This function replaces the resize parameters with a higher resolution.
+    Args:
+        url: The original image URL
+        target_width: Target width in pixels (default 1400 for high-res)
+    Returns:
+        Upgraded URL with higher resolution, or original if not a Medium image
+    """
+    if not url:
+        return url
+    # Check if it's a Medium CDN URL
+    if "miro.medium.com" not in url:
+        return url
+    # Pattern 1: v2/resize:fit:WIDTH or v2/resize:fill:WIDTH:HEIGHT
+    pattern_v2 = r"(miro\.medium\.com/v2/resize:)(fit|fill):(\d+)(?::(\d+))?"
+    match = re.search(pattern_v2, url)
+    if match:
+        # Replace with high-res fit format
+        new_url = re.sub(pattern_v2, f"miro.medium.com/v2/resize:fit:{target_width}", url)
+        return new_url
+    # Pattern 2: older format max/WIDTH
+    pattern_max = r"(miro\.medium\.com/max/)(\d+)"
+    match = re.search(pattern_max, url)
+    if match:
+        new_url = re.sub(pattern_max, f"miro.medium.com/v2/resize:fit:{target_width}", url)
+        return new_url
+    # Pattern 3: freeze format with dimensions
+    # Example: freeze/fit/320/240/...
+    pattern_freeze = r"(miro\.medium\.com/freeze/)(fit|fill)/(\d+)/(\d+)"
+    match = re.search(pattern_freeze, url)
+    if match:
+        new_url = re.sub(pattern_freeze, f"miro.medium.com/v2/resize:fit:{target_width}", url)
+        return new_url
+    # If we have a Medium URL but can't parse the format, try to extract image ID
+    # and construct a new URL
+    # Pattern: Look for the image ID (usually contains *)
+    pattern_id = r"miro\.medium\.com/.*?/([01]\*[a-zA-Z0-9_-]+\.[a-zA-Z]+)"
+    match = re.search(pattern_id, url)
+    if match:
+        image_id = match.group(1)
+        return f"https://miro.medium.com/v2/resize:fit:{target_width}/{image_id}"
+    # Return original if we can't upgrade
+    return url
+def get_medium_image_url(image_id: str, width: int = MEDIUM_IMAGE_DEFAULT_WIDTH) -> str:
+    """
+    Build a high-resolution Medium image URL from an image ID.
+    Args:
+        image_id: The Medium image ID (e.g., "1*abc123.png")
+        width: Target width in pixels (default 1400 for high-res)
+    Returns:
+        Full Medium CDN URL for the image
+    """
+    if not image_id:
+        return ""
+    return f"https://miro.medium.com/v2/resize:fit:{width}/{image_id}"
 # =============================================================================
 # HASH UTILITIES
 # =============================================================================