riazmo commited on
Commit
a4c5c78
·
verified ·
1 Parent(s): d401095

Upload website_capturer.py

Browse files
Files changed (1) hide show
  1. utils/website_capturer.py +138 -0
utils/website_capturer.py ADDED
@@ -0,0 +1,138 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Enhanced Website Capturer - Full Page Screenshot
3
+ Captures complete page height while maintaining fixed width
4
+ """
5
+
6
+ import asyncio
7
+ from pathlib import Path
8
+ from typing import Dict, Tuple
9
+ import logging
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+
14
+ async def capture_website_fullpage(
15
+ website_url: str,
16
+ output_dir: str = "./reports",
17
+ desktop_width: int = 1440,
18
+ mobile_width: int = 375
19
+ ) -> Dict[str, str]:
20
+ import os
21
+ # Ensure local browser path is set
22
+ if "PLAYWRIGHT_BROWSERS_PATH" not in os.environ:
23
+ os.environ["PLAYWRIGHT_BROWSERS_PATH"] = os.path.join(os.getcwd(), ".playwright-browsers")
24
+ """
25
+ Capture full-page screenshots of website at multiple viewports.
26
+
27
+ Args:
28
+ website_url: URL of the website to capture
29
+ output_dir: Directory to save screenshots
30
+ desktop_width: Desktop viewport width (height auto-calculated)
31
+ mobile_width: Mobile viewport width (height auto-calculated)
32
+
33
+ Returns:
34
+ Dictionary with paths to captured screenshots
35
+ """
36
+ try:
37
+ from playwright.async_api import async_playwright
38
+ except ImportError:
39
+ import sys
40
+ import os
41
+ # Common paths for HF Spaces
42
+ sys.path.append("/usr/local/lib/python3.10/site-packages")
43
+ sys.path.append("/home/user/.local/lib/python3.10/site-packages")
44
+ from playwright.async_api import async_playwright
45
+
46
+ Path(output_dir).mkdir(parents=True, exist_ok=True)
47
+ screenshots = {}
48
+
49
+ async with async_playwright() as p:
50
+ browser = await p.chromium.launch(headless=True)
51
+
52
+ try:
53
+ # Desktop capture - Full page height
54
+ print(" 📱 Capturing desktop (1440px width, full height)...")
55
+ page = await browser.new_page(viewport={"width": desktop_width, "height": 1080})
56
+ await page.goto(website_url, wait_until="networkidle")
57
+
58
+ # Get full page height
59
+ desktop_height = await page.evaluate("() => document.documentElement.scrollHeight")
60
+ print(f" ℹ️ Desktop full height: {desktop_height}px")
61
+
62
+ # Set viewport to full height and capture
63
+ await page.set_viewport_size({"width": desktop_width, "height": desktop_height})
64
+ desktop_path = f"{output_dir}/desktop_{desktop_width}x{desktop_height}.png"
65
+ await page.screenshot(path=desktop_path, full_page=True)
66
+ screenshots["desktop"] = desktop_path
67
+ print(f" ✓ Saved: {desktop_path}")
68
+
69
+ await page.close()
70
+
71
+ # Mobile capture - Full page height
72
+ print(" 📱 Capturing mobile (375px width, full height)...")
73
+ page = await browser.new_page(viewport={"width": mobile_width, "height": 812})
74
+ await page.goto(website_url, wait_until="networkidle")
75
+
76
+ # Get full page height
77
+ mobile_height = await page.evaluate("() => document.documentElement.scrollHeight")
78
+ print(f" ℹ️ Mobile full height: {mobile_height}px")
79
+
80
+ # Set viewport to full height and capture
81
+ await page.set_viewport_size({"width": mobile_width, "height": mobile_height})
82
+ mobile_path = f"{output_dir}/mobile_{mobile_width}x{mobile_height}.png"
83
+ await page.screenshot(path=mobile_path, full_page=True)
84
+ screenshots["mobile"] = mobile_path
85
+ print(f" ✓ Saved: {mobile_path}")
86
+
87
+ await page.close()
88
+
89
+ finally:
90
+ await browser.close()
91
+
92
+ return screenshots
93
+
94
+
95
+ def capture_website_sync(
96
+ website_url: str,
97
+ output_dir: str = "./reports",
98
+ desktop_width: int = 1440,
99
+ mobile_width: int = 375
100
+ ) -> Dict[str, str]:
101
+ """
102
+ Synchronous wrapper for capturing full-page website screenshots.
103
+
104
+ Args:
105
+ website_url: URL of the website to capture
106
+ output_dir: Directory to save screenshots
107
+ desktop_width: Desktop viewport width
108
+ mobile_width: Mobile viewport width
109
+
110
+ Returns:
111
+ Dictionary with paths to captured screenshots
112
+ """
113
+ return asyncio.run(
114
+ capture_website_fullpage(
115
+ website_url,
116
+ output_dir,
117
+ desktop_width,
118
+ mobile_width
119
+ )
120
+ )
121
+
122
+
123
+ if __name__ == "__main__":
124
+ # Test the function
125
+ import sys
126
+
127
+ if len(sys.argv) < 2:
128
+ print("Usage: python website_capturer_fullpage.py <url> [output_dir]")
129
+ sys.exit(1)
130
+
131
+ url = sys.argv[1]
132
+ output = sys.argv[2] if len(sys.argv) > 2 else "./reports"
133
+
134
+ print(f"Capturing website: {url}")
135
+ result = capture_website_sync(url, output)
136
+ print(f"\nScreenshots saved:")
137
+ for key, path in result.items():
138
+ print(f" {key}: {path}")