File size: 3,830 Bytes
db4810d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
import asyncio
import os
import sys

sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

from browser_use.browser.browser import Browser, BrowserConfig
from browser_use.browser.context import BrowserContext


async def analyze_page_structure(url: str):
	"""Analyze and print the structure of a webpage with enhanced debugging"""
	browser = Browser(
		config=BrowserConfig(
			headless=False,  # Set to True if you don't need to see the browser
		)
	)

	context = BrowserContext(browser=browser)

	try:
		async with context as ctx:
			# Navigate to the URL
			page = await ctx.get_current_page()
			await page.goto(url)
			await page.wait_for_load_state('networkidle')

			# Get viewport dimensions
			viewport_info = await page.evaluate("""() => {
				return {
					viewport: {
						width: window.innerWidth,
						height: window.innerHeight,
						scrollX: window.scrollX,
						scrollY: window.scrollY
					}
				}
			}""")

			print('\nViewport Information:')
			print(f'Width: {viewport_info["viewport"]["width"]}')
			print(f'Height: {viewport_info["viewport"]["height"]}')
			print(f'ScrollX: {viewport_info["viewport"]["scrollX"]}')
			print(f'ScrollY: {viewport_info["viewport"]["scrollY"]}')

			# Enhanced debug information for cookie consent and fixed position elements
			debug_info = await page.evaluate("""() => {
				function getElementInfo(element) {
					const rect = element.getBoundingClientRect();
					const style = window.getComputedStyle(element);
					return {
						tag: element.tagName.toLowerCase(),
						id: element.id,
						className: element.className,
						position: style.position,
						rect: {
							top: rect.top,
							right: rect.right,
							bottom: rect.bottom,
							left: rect.left,
							width: rect.width,
							height: rect.height
						},
						isFixed: style.position === 'fixed',
						isSticky: style.position === 'sticky',
						zIndex: style.zIndex,
						visibility: style.visibility,
						display: style.display,
						opacity: style.opacity
					};
				}

				// Find cookie-related elements
				const cookieElements = Array.from(document.querySelectorAll('[id*="cookie"], [id*="consent"], [class*="cookie"], [class*="consent"]'));
				const fixedElements = Array.from(document.querySelectorAll('*')).filter(el => {
					const style = window.getComputedStyle(el);
					return style.position === 'fixed' || style.position === 'sticky';
				});

				return {
					cookieElements: cookieElements.map(getElementInfo),
					fixedElements: fixedElements.map(getElementInfo)
				};
			}""")

			print('\nCookie-related Elements:')
			for elem in debug_info['cookieElements']:
				print(f'\nElement: {elem["tag"]}#{elem["id"]} .{elem["className"]}')
				print(f'Position: {elem["position"]}')
				print(f'Rect: {elem["rect"]}')
				print(f'Z-Index: {elem["zIndex"]}')
				print(f'Visibility: {elem["visibility"]}')
				print(f'Display: {elem["display"]}')
				print(f'Opacity: {elem["opacity"]}')

			print('\nFixed/Sticky Position Elements:')
			for elem in debug_info['fixedElements']:
				print(f'\nElement: {elem["tag"]}#{elem["id"]} .{elem["className"]}')
				print(f'Position: {elem["position"]}')
				print(f'Rect: {elem["rect"]}')
				print(f'Z-Index: {elem["zIndex"]}')

			print(f'\nPage Structure for {url}:\n')
			structure = await ctx.get_page_structure()
			print(structure)

			input('Press Enter to close the browser...')
	finally:
		await browser.close()


if __name__ == '__main__':
	# You can modify this URL to analyze different pages

	urls = [
		'https://www.mlb.com/yankees/stats/',
		'https://immobilienscout24.de',
		'https://www.zeiss.com/career/en/job-search.html?page=1',
		'https://www.zeiss.com/career/en/job-search.html?page=1',
		'https://reddit.com',
	]
	for url in urls:
		asyncio.run(analyze_page_structure(url))