use / browser_use /dom /tests /debug_page_structure.py
Boobs00's picture
Upload folder using huggingface_hub
db4810d verified
import asyncio
import os
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from browser_use.browser.browser import Browser, BrowserConfig
from browser_use.browser.context import BrowserContext
async def analyze_page_structure(url: str):
"""Analyze and print the structure of a webpage with enhanced debugging"""
browser = Browser(
config=BrowserConfig(
headless=False, # Set to True if you don't need to see the browser
)
)
context = BrowserContext(browser=browser)
try:
async with context as ctx:
# Navigate to the URL
page = await ctx.get_current_page()
await page.goto(url)
await page.wait_for_load_state('networkidle')
# Get viewport dimensions
viewport_info = await page.evaluate("""() => {
return {
viewport: {
width: window.innerWidth,
height: window.innerHeight,
scrollX: window.scrollX,
scrollY: window.scrollY
}
}
}""")
print('\nViewport Information:')
print(f'Width: {viewport_info["viewport"]["width"]}')
print(f'Height: {viewport_info["viewport"]["height"]}')
print(f'ScrollX: {viewport_info["viewport"]["scrollX"]}')
print(f'ScrollY: {viewport_info["viewport"]["scrollY"]}')
# Enhanced debug information for cookie consent and fixed position elements
debug_info = await page.evaluate("""() => {
function getElementInfo(element) {
const rect = element.getBoundingClientRect();
const style = window.getComputedStyle(element);
return {
tag: element.tagName.toLowerCase(),
id: element.id,
className: element.className,
position: style.position,
rect: {
top: rect.top,
right: rect.right,
bottom: rect.bottom,
left: rect.left,
width: rect.width,
height: rect.height
},
isFixed: style.position === 'fixed',
isSticky: style.position === 'sticky',
zIndex: style.zIndex,
visibility: style.visibility,
display: style.display,
opacity: style.opacity
};
}
// Find cookie-related elements
const cookieElements = Array.from(document.querySelectorAll('[id*="cookie"], [id*="consent"], [class*="cookie"], [class*="consent"]'));
const fixedElements = Array.from(document.querySelectorAll('*')).filter(el => {
const style = window.getComputedStyle(el);
return style.position === 'fixed' || style.position === 'sticky';
});
return {
cookieElements: cookieElements.map(getElementInfo),
fixedElements: fixedElements.map(getElementInfo)
};
}""")
print('\nCookie-related Elements:')
for elem in debug_info['cookieElements']:
print(f'\nElement: {elem["tag"]}#{elem["id"]} .{elem["className"]}')
print(f'Position: {elem["position"]}')
print(f'Rect: {elem["rect"]}')
print(f'Z-Index: {elem["zIndex"]}')
print(f'Visibility: {elem["visibility"]}')
print(f'Display: {elem["display"]}')
print(f'Opacity: {elem["opacity"]}')
print('\nFixed/Sticky Position Elements:')
for elem in debug_info['fixedElements']:
print(f'\nElement: {elem["tag"]}#{elem["id"]} .{elem["className"]}')
print(f'Position: {elem["position"]}')
print(f'Rect: {elem["rect"]}')
print(f'Z-Index: {elem["zIndex"]}')
print(f'\nPage Structure for {url}:\n')
structure = await ctx.get_page_structure()
print(structure)
input('Press Enter to close the browser...')
finally:
await browser.close()
if __name__ == '__main__':
# You can modify this URL to analyze different pages
urls = [
'https://www.mlb.com/yankees/stats/',
'https://immobilienscout24.de',
'https://www.zeiss.com/career/en/job-search.html?page=1',
'https://www.zeiss.com/career/en/job-search.html?page=1',
'https://reddit.com',
]
for url in urls:
asyncio.run(analyze_page_structure(url))