Spaces:
Sleeping
Sleeping
| """ | |
| Debug test for iframe scrolling issue where DOM tree only shows top elements after scrolling. | |
| This test verifies that after scrolling inside an iframe, the selector_map correctly | |
| contains lower input elements like City, State, Zip Code, etc. | |
| """ | |
| import asyncio | |
| import sys | |
| from pathlib import Path | |
| # Add parent directory to path to import browser_use modules | |
| sys.path.insert(0, str(Path(__file__).parent.parent.parent)) | |
| from browser_use.agent.service import Agent | |
| from browser_use.agent.views import ActionModel | |
| from browser_use.browser import BrowserProfile, BrowserSession | |
| from browser_use.browser.events import BrowserStateRequestEvent | |
| # Import the mock LLM helper from conftest | |
| from tests.ci.conftest import create_mock_llm | |
| async def debug_iframe_scrolling(): | |
| """Debug iframe scrolling and DOM visibility issue.""" | |
| print('Starting iframe scrolling debug test...') | |
| # Create the sequence of actions for the mock LLM | |
| # We need to format these as the LLM would return them | |
| actions = [ | |
| # First action: Navigate to the test URL | |
| """ | |
| { | |
| "thinking": "Navigating to the iframe test page", | |
| "evaluation_previous_goal": null, | |
| "memory": "Starting test", | |
| "next_goal": "Navigate to the iframe test page", | |
| "action": [ | |
| { | |
| "navigate": { | |
| "url": "https://browser-use.github.io/stress-tests/challenges/iframe-inception-level1.html", | |
| "new_tab": false | |
| } | |
| } | |
| ] | |
| } | |
| """, | |
| # Second action: Input text in the first name field (to verify we can interact) | |
| """ | |
| { | |
| "thinking": "Inputting text in the first name field to test interaction", | |
| "evaluation_previous_goal": "Successfully navigated to the page", | |
| "memory": "Page loaded with nested iframes", | |
| "next_goal": "Type text in the first name field", | |
| "action": [ | |
| { | |
| "input_text": { | |
| "index": 1, | |
| "text": "TestName" | |
| } | |
| } | |
| ] | |
| } | |
| """, | |
| # Third action: Scroll the iframe (element_index=2 should be the iframe) | |
| """ | |
| { | |
| "thinking": "Scrolling inside the iframe to reveal lower form elements", | |
| "evaluation_previous_goal": "Successfully typed in first name field", | |
| "memory": "Typed TestName in first field", | |
| "next_goal": "Scroll inside the innermost iframe to see more form fields", | |
| "action": [ | |
| { | |
| "scroll": { | |
| "down": true, | |
| "num_pages": 1.0, | |
| "index": 2 | |
| } | |
| } | |
| ] | |
| } | |
| """, | |
| # Fourth action: Done | |
| """ | |
| { | |
| "thinking": "Completed scrolling, ready to inspect DOM", | |
| "evaluation_previous_goal": "Successfully scrolled inside iframe", | |
| "memory": "Scrolled to reveal lower form fields", | |
| "next_goal": "Task completed", | |
| "action": [ | |
| { | |
| "done": { | |
| "text": "Scrolling completed", | |
| "success": true | |
| } | |
| } | |
| ] | |
| } | |
| """, | |
| ] | |
| # Create mock LLM with our action sequence | |
| mock_llm = create_mock_llm(actions=actions) | |
| # Create browser session with headless=False so we can see what's happening | |
| browser_session = BrowserSession( | |
| browser_profile=BrowserProfile( | |
| headless=False, # Set to False to see the browser | |
| user_data_dir=None, # Use temporary directory | |
| keep_alive=True, | |
| enable_default_extensions=True, | |
| cross_origin_iframes=True, # Enable cross-origin iframe support | |
| ) | |
| ) | |
| try: | |
| # Start the browser session | |
| await browser_session.start() | |
| print('Browser session started') | |
| # Create an agent with the mock LLM | |
| agent = Agent( | |
| task='Navigate to the iframe test page and scroll inside the iframe', | |
| llm=mock_llm, | |
| browser_session=browser_session, | |
| ) | |
| # Helper function to capture and analyze DOM state | |
| async def capture_dom_state(label: str) -> dict: | |
| """Capture DOM state and return analysis""" | |
| print(f'\n📸 Capturing DOM state: {label}') | |
| state_event = browser_session.event_bus.dispatch( | |
| BrowserStateRequestEvent(include_dom=True, include_screenshot=False, include_recent_events=False) | |
| ) | |
| browser_state = await state_event.event_result() | |
| if browser_state and browser_state.dom_state and browser_state.dom_state.selector_map: | |
| selector_map = browser_state.dom_state.selector_map | |
| element_count = len(selector_map) | |
| # Check for specific elements | |
| found_elements = {} | |
| expected_checks = [ | |
| ('First Name', ['firstName', 'first name']), | |
| ('Last Name', ['lastName', 'last name']), | |
| ('Email', ['email']), | |
| ('City', ['city']), | |
| ('State', ['state']), | |
| ('Zip', ['zip', 'zipCode']), | |
| ] | |
| for name, keywords in expected_checks: | |
| for index, element in selector_map.items(): | |
| element_str = str(element).lower() | |
| if any(kw.lower() in element_str for kw in keywords): | |
| found_elements[name] = True | |
| break | |
| return { | |
| 'label': label, | |
| 'total_elements': element_count, | |
| 'found_elements': found_elements, | |
| 'selector_map': selector_map, | |
| } | |
| return {'label': label, 'error': 'No DOM state available'} | |
| # Capture initial state before any actions | |
| print('\n' + '=' * 80) | |
| print('PHASE 1: INITIAL PAGE LOAD') | |
| print('=' * 80) | |
| # Navigate to the page first | |
| from browser_use.tools.service import Tools | |
| tools = Tools() | |
| # Create the action model for navigation | |
| goto_action = ActionModel.model_validate_json(actions[0]) | |
| await tools.act(goto_action, browser_session) | |
| await asyncio.sleep(2) # Wait for page to fully load | |
| initial_state = await capture_dom_state('INITIAL (after page load)') | |
| # Now run the rest of the actions via the agent | |
| print('\n' + '=' * 80) | |
| print('PHASE 2: EXECUTING ACTIONS') | |
| print('=' * 80) | |
| # Create new agent with remaining actions | |
| remaining_actions = actions[1:] # Skip the navigation we already did | |
| mock_llm_remaining = create_mock_llm(actions=remaining_actions) | |
| agent = Agent( | |
| task='Input text and scroll inside the iframe', | |
| llm=mock_llm_remaining, | |
| browser_session=browser_session, | |
| ) | |
| # Hook into agent actions to capture state after each one | |
| states = [] | |
| original_act = tools.act | |
| async def wrapped_act(action, session): | |
| result = await original_act(action, session) | |
| # Capture state after each action | |
| action_type = 'unknown' | |
| if hasattr(action, 'input_text') and action.input_text: | |
| action_type = 'input_text' | |
| await asyncio.sleep(1) # Give time for DOM to update | |
| state = await capture_dom_state('AFTER INPUT_TEXT') | |
| states.append(state) | |
| elif hasattr(action, 'scroll') and action.scroll: | |
| action_type = 'scroll' | |
| await asyncio.sleep(2) # Give more time after scroll | |
| state = await capture_dom_state('AFTER SCROLL') | |
| states.append(state) | |
| return result | |
| tools.act = wrapped_act | |
| # Run the agent with remaining actions | |
| result = await agent.run() | |
| print(f'\nAgent completed with result: {result}') | |
| # Analyze all captured states | |
| print('\n' + '=' * 80) | |
| print('PHASE 3: ANALYSIS OF DOM STATES') | |
| print('=' * 80) | |
| all_states = [initial_state] + states | |
| for state in all_states: | |
| if 'error' in state: | |
| print(f'\n❌ {state["label"]}: {state["error"]}') | |
| else: | |
| print(f'\n📊 {state["label"]}:') | |
| print(f' Total elements: {state["total_elements"]}') | |
| print(' Found elements:') | |
| for elem_name, found in state['found_elements'].items(): | |
| status = '✓' if found else '✗' | |
| print(f' {status} {elem_name}') | |
| # Compare states | |
| print('\n' + '=' * 80) | |
| print('COMPARISON SUMMARY') | |
| print('=' * 80) | |
| if len(all_states) >= 3: | |
| initial = all_states[0] | |
| after_input = all_states[1] if len(all_states) > 1 else None | |
| after_scroll = all_states[2] if len(all_states) > 2 else None | |
| print('\nElement count changes:') | |
| print(f' Initial: {initial.get("total_elements", 0)} elements') | |
| if after_input: | |
| print(f' After input_text: {after_input.get("total_elements", 0)} elements') | |
| if after_scroll: | |
| print(f' After scroll: {after_scroll.get("total_elements", 0)} elements') | |
| # Check if lower form fields appear after scroll | |
| if after_scroll and 'found_elements' in after_scroll: | |
| lower_fields = ['City', 'State', 'Zip'] | |
| missing_fields = [f for f in lower_fields if not after_scroll['found_elements'].get(f, False)] | |
| if missing_fields: | |
| print('\n⚠️ BUG CONFIRMED: Lower form fields missing after scroll:') | |
| for field in missing_fields: | |
| print(f' ✗ {field}') | |
| print('\nThis confirms that scrolling inside iframes does not update the DOM tree properly.') | |
| else: | |
| print('\n✅ SUCCESS: All lower form fields are visible after scrolling!') | |
| # Show first few elements from final state for debugging | |
| if states and 'selector_map' in states[-1]: | |
| print('\n' + '=' * 80) | |
| print('DEBUG: First 5 elements in final selector_map') | |
| print('=' * 80) | |
| final_map = states[-1]['selector_map'] | |
| for i, (index, element) in enumerate(list(final_map.items())[:5]): | |
| elem_preview = str(element)[:150] | |
| print(f'\n [{index}]: {elem_preview}...') | |
| # Keep browser open for manual inspection if needed | |
| print('\n' + '=' * 80) | |
| print('Test complete. Browser will remain open for 10 seconds for inspection...') | |
| print('=' * 80) | |
| await asyncio.sleep(10) | |
| finally: | |
| # Clean up | |
| print('\nCleaning up...') | |
| await browser_session.kill() | |
| await browser_session.event_bus.stop(clear=True, timeout=5) | |
| print('Browser session closed') | |
| if __name__ == '__main__': | |
| # Run the debug test | |
| asyncio.run(debug_iframe_scrolling()) | |