Spaces:
Sleeping
Sleeping
File size: 4,813 Bytes
a4c5c78 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 |
"""
Enhanced Website Capturer - Full Page Screenshot
Captures complete page height while maintaining fixed width
"""
import asyncio
from pathlib import Path
from typing import Dict, Tuple
import logging
logger = logging.getLogger(__name__)
async def capture_website_fullpage(
website_url: str,
output_dir: str = "./reports",
desktop_width: int = 1440,
mobile_width: int = 375
) -> Dict[str, str]:
import os
# Ensure local browser path is set
if "PLAYWRIGHT_BROWSERS_PATH" not in os.environ:
os.environ["PLAYWRIGHT_BROWSERS_PATH"] = os.path.join(os.getcwd(), ".playwright-browsers")
"""
Capture full-page screenshots of website at multiple viewports.
Args:
website_url: URL of the website to capture
output_dir: Directory to save screenshots
desktop_width: Desktop viewport width (height auto-calculated)
mobile_width: Mobile viewport width (height auto-calculated)
Returns:
Dictionary with paths to captured screenshots
"""
try:
from playwright.async_api import async_playwright
except ImportError:
import sys
import os
# Common paths for HF Spaces
sys.path.append("/usr/local/lib/python3.10/site-packages")
sys.path.append("/home/user/.local/lib/python3.10/site-packages")
from playwright.async_api import async_playwright
Path(output_dir).mkdir(parents=True, exist_ok=True)
screenshots = {}
async with async_playwright() as p:
browser = await p.chromium.launch(headless=True)
try:
# Desktop capture - Full page height
print(" 📱 Capturing desktop (1440px width, full height)...")
page = await browser.new_page(viewport={"width": desktop_width, "height": 1080})
await page.goto(website_url, wait_until="networkidle")
# Get full page height
desktop_height = await page.evaluate("() => document.documentElement.scrollHeight")
print(f" ℹ️ Desktop full height: {desktop_height}px")
# Set viewport to full height and capture
await page.set_viewport_size({"width": desktop_width, "height": desktop_height})
desktop_path = f"{output_dir}/desktop_{desktop_width}x{desktop_height}.png"
await page.screenshot(path=desktop_path, full_page=True)
screenshots["desktop"] = desktop_path
print(f" ✓ Saved: {desktop_path}")
await page.close()
# Mobile capture - Full page height
print(" 📱 Capturing mobile (375px width, full height)...")
page = await browser.new_page(viewport={"width": mobile_width, "height": 812})
await page.goto(website_url, wait_until="networkidle")
# Get full page height
mobile_height = await page.evaluate("() => document.documentElement.scrollHeight")
print(f" ℹ️ Mobile full height: {mobile_height}px")
# Set viewport to full height and capture
await page.set_viewport_size({"width": mobile_width, "height": mobile_height})
mobile_path = f"{output_dir}/mobile_{mobile_width}x{mobile_height}.png"
await page.screenshot(path=mobile_path, full_page=True)
screenshots["mobile"] = mobile_path
print(f" ✓ Saved: {mobile_path}")
await page.close()
finally:
await browser.close()
return screenshots
def capture_website_sync(
website_url: str,
output_dir: str = "./reports",
desktop_width: int = 1440,
mobile_width: int = 375
) -> Dict[str, str]:
"""
Synchronous wrapper for capturing full-page website screenshots.
Args:
website_url: URL of the website to capture
output_dir: Directory to save screenshots
desktop_width: Desktop viewport width
mobile_width: Mobile viewport width
Returns:
Dictionary with paths to captured screenshots
"""
return asyncio.run(
capture_website_fullpage(
website_url,
output_dir,
desktop_width,
mobile_width
)
)
if __name__ == "__main__":
# Test the function
import sys
if len(sys.argv) < 2:
print("Usage: python website_capturer_fullpage.py <url> [output_dir]")
sys.exit(1)
url = sys.argv[1]
output = sys.argv[2] if len(sys.argv) > 2 else "./reports"
print(f"Capturing website: {url}")
result = capture_website_sync(url, output)
print(f"\nScreenshots saved:")
for key, path in result.items():
print(f" {key}: {path}")
|