Medium-MCP / src /vision.py
Nikhil Pravin Pise
feat: Migrate to google-genai SDK + fix remaining low-res images
545358b
import os
import base64
from google import genai
from google.genai import types
from typing import Optional, Dict, Any
def scrape_with_vision(screenshot_bytes: bytes, url: str) -> Optional[Dict[str, Any]]:
"""
Uses Gemini 2.0 Flash (Vision) to transcribe a screenshot of an article.
Migrated to new google.genai SDK.
"""
api_key = os.environ.get("GEMINI_API_KEY")
if not api_key:
return None
try:
client = genai.Client(api_key=api_key)
prompt = """
You are a visual web scraper.
Your task is to transcribe the article in this screenshot into Markdown format.
Rules:
1. Extract the Title, Author, and Full Content.
2. Preserve all headers, code blocks, and formatting.
3. Ignore ads, sidebars, and navigation menus.
4. If the content is truncated or behind a paywall in the image, transcribe what is visible and add a note.
5. Output ONLY the markdown.
"""
# Create image part for new SDK
image_part = types.Part.from_bytes(
data=screenshot_bytes,
mime_type="image/png"
)
response = client.models.generate_content(
model='gemini-2.0-flash-exp',
contents=[prompt, image_part]
)
markdown = response.text
# Basic parsing of the markdown to get title/author if possible
title = "Unknown Title"
lines = markdown.split("\n")
for line in lines:
if line.startswith("# "):
title = line.replace("# ", "").strip()
break
return {
"url": url,
"title": title,
"author": {"name": "Unknown (Vision Extracted)"},
"markdownContent": markdown,
"source": "vision"
}
except Exception as e:
print(f"Vision Scraping Failed: {e}")
return None
def extract_chart_data(image_url: str) -> Optional[str]:
"""
Downloads a chart image and converts it to CSV data using Gemini Vision.
Migrated to new google.genai SDK.
"""
api_key = os.environ.get("GEMINI_API_KEY")
if not api_key:
return None
try:
import httpx
# Download image
with httpx.Client() as client:
resp = client.get(image_url)
if resp.status_code != 200:
return None
image_bytes = resp.content
client = genai.Client(api_key=api_key)
prompt = """
Analyze this chart image.
Extract the underlying data and output it as a CSV string.
Do not include any other text, just the CSV.
"""
# Create image part for new SDK
image_part = types.Part.from_bytes(
data=image_bytes,
mime_type="image/png"
)
response = client.models.generate_content(
model='gemini-2.0-flash-exp',
contents=[prompt, image_part]
)
return response.text.strip()
except Exception as e:
print(f"Chart Extraction Failed: {e}")
return None