""" Debug test for iframe scrolling issue where DOM tree only shows top elements after scrolling. This test verifies that after scrolling inside an iframe, the selector_map correctly contains lower input elements like City, State, Zip Code, etc. """ import asyncio import sys from pathlib import Path # Add parent directory to path to import browser_use modules sys.path.insert(0, str(Path(__file__).parent.parent.parent)) from browser_use.agent.service import Agent from browser_use.agent.views import ActionModel from browser_use.browser import BrowserProfile, BrowserSession from browser_use.browser.events import BrowserStateRequestEvent # Import the mock LLM helper from conftest from tests.ci.conftest import create_mock_llm async def debug_iframe_scrolling(): """Debug iframe scrolling and DOM visibility issue.""" print('Starting iframe scrolling debug test...') # Create the sequence of actions for the mock LLM # We need to format these as the LLM would return them actions = [ # First action: Navigate to the test URL """ { "thinking": "Navigating to the iframe test page", "evaluation_previous_goal": null, "memory": "Starting test", "next_goal": "Navigate to the iframe test page", "action": [ { "navigate": { "url": "https://browser-use.github.io/stress-tests/challenges/iframe-inception-level1.html", "new_tab": false } } ] } """, # Second action: Input text in the first name field (to verify we can interact) """ { "thinking": "Inputting text in the first name field to test interaction", "evaluation_previous_goal": "Successfully navigated to the page", "memory": "Page loaded with nested iframes", "next_goal": "Type text in the first name field", "action": [ { "input_text": { "index": 1, "text": "TestName" } } ] } """, # Third action: Scroll the iframe (element_index=2 should be the iframe) """ { "thinking": "Scrolling inside the iframe to reveal lower form elements", "evaluation_previous_goal": "Successfully typed in first name field", "memory": "Typed TestName in first field", "next_goal": "Scroll inside the innermost iframe to see more form fields", "action": [ { "scroll": { "down": true, "num_pages": 1.0, "index": 2 } } ] } """, # Fourth action: Done """ { "thinking": "Completed scrolling, ready to inspect DOM", "evaluation_previous_goal": "Successfully scrolled inside iframe", "memory": "Scrolled to reveal lower form fields", "next_goal": "Task completed", "action": [ { "done": { "text": "Scrolling completed", "success": true } } ] } """, ] # Create mock LLM with our action sequence mock_llm = create_mock_llm(actions=actions) # Create browser session with headless=False so we can see what's happening browser_session = BrowserSession( browser_profile=BrowserProfile( headless=False, # Set to False to see the browser user_data_dir=None, # Use temporary directory keep_alive=True, enable_default_extensions=True, cross_origin_iframes=True, # Enable cross-origin iframe support ) ) try: # Start the browser session await browser_session.start() print('Browser session started') # Create an agent with the mock LLM agent = Agent( task='Navigate to the iframe test page and scroll inside the iframe', llm=mock_llm, browser_session=browser_session, ) # Helper function to capture and analyze DOM state async def capture_dom_state(label: str) -> dict: """Capture DOM state and return analysis""" print(f'\nšŸ“ø Capturing DOM state: {label}') state_event = browser_session.event_bus.dispatch( BrowserStateRequestEvent(include_dom=True, include_screenshot=False, include_recent_events=False) ) browser_state = await state_event.event_result() if browser_state and browser_state.dom_state and browser_state.dom_state.selector_map: selector_map = browser_state.dom_state.selector_map element_count = len(selector_map) # Check for specific elements found_elements = {} expected_checks = [ ('First Name', ['firstName', 'first name']), ('Last Name', ['lastName', 'last name']), ('Email', ['email']), ('City', ['city']), ('State', ['state']), ('Zip', ['zip', 'zipCode']), ] for name, keywords in expected_checks: for index, element in selector_map.items(): element_str = str(element).lower() if any(kw.lower() in element_str for kw in keywords): found_elements[name] = True break return { 'label': label, 'total_elements': element_count, 'found_elements': found_elements, 'selector_map': selector_map, } return {'label': label, 'error': 'No DOM state available'} # Capture initial state before any actions print('\n' + '=' * 80) print('PHASE 1: INITIAL PAGE LOAD') print('=' * 80) # Navigate to the page first from browser_use.tools.service import Tools tools = Tools() # Create the action model for navigation goto_action = ActionModel.model_validate_json(actions[0]) await tools.act(goto_action, browser_session) await asyncio.sleep(2) # Wait for page to fully load initial_state = await capture_dom_state('INITIAL (after page load)') # Now run the rest of the actions via the agent print('\n' + '=' * 80) print('PHASE 2: EXECUTING ACTIONS') print('=' * 80) # Create new agent with remaining actions remaining_actions = actions[1:] # Skip the navigation we already did mock_llm_remaining = create_mock_llm(actions=remaining_actions) agent = Agent( task='Input text and scroll inside the iframe', llm=mock_llm_remaining, browser_session=browser_session, ) # Hook into agent actions to capture state after each one states = [] original_act = tools.act async def wrapped_act(action, session): result = await original_act(action, session) # Capture state after each action action_type = 'unknown' if hasattr(action, 'input_text') and action.input_text: action_type = 'input_text' await asyncio.sleep(1) # Give time for DOM to update state = await capture_dom_state('AFTER INPUT_TEXT') states.append(state) elif hasattr(action, 'scroll') and action.scroll: action_type = 'scroll' await asyncio.sleep(2) # Give more time after scroll state = await capture_dom_state('AFTER SCROLL') states.append(state) return result tools.act = wrapped_act # Run the agent with remaining actions result = await agent.run() print(f'\nAgent completed with result: {result}') # Analyze all captured states print('\n' + '=' * 80) print('PHASE 3: ANALYSIS OF DOM STATES') print('=' * 80) all_states = [initial_state] + states for state in all_states: if 'error' in state: print(f'\nāŒ {state["label"]}: {state["error"]}') else: print(f'\nšŸ“Š {state["label"]}:') print(f' Total elements: {state["total_elements"]}') print(' Found elements:') for elem_name, found in state['found_elements'].items(): status = 'āœ“' if found else 'āœ—' print(f' {status} {elem_name}') # Compare states print('\n' + '=' * 80) print('COMPARISON SUMMARY') print('=' * 80) if len(all_states) >= 3: initial = all_states[0] after_input = all_states[1] if len(all_states) > 1 else None after_scroll = all_states[2] if len(all_states) > 2 else None print('\nElement count changes:') print(f' Initial: {initial.get("total_elements", 0)} elements') if after_input: print(f' After input_text: {after_input.get("total_elements", 0)} elements') if after_scroll: print(f' After scroll: {after_scroll.get("total_elements", 0)} elements') # Check if lower form fields appear after scroll if after_scroll and 'found_elements' in after_scroll: lower_fields = ['City', 'State', 'Zip'] missing_fields = [f for f in lower_fields if not after_scroll['found_elements'].get(f, False)] if missing_fields: print('\nāš ļø BUG CONFIRMED: Lower form fields missing after scroll:') for field in missing_fields: print(f' āœ— {field}') print('\nThis confirms that scrolling inside iframes does not update the DOM tree properly.') else: print('\nāœ… SUCCESS: All lower form fields are visible after scrolling!') # Show first few elements from final state for debugging if states and 'selector_map' in states[-1]: print('\n' + '=' * 80) print('DEBUG: First 5 elements in final selector_map') print('=' * 80) final_map = states[-1]['selector_map'] for i, (index, element) in enumerate(list(final_map.items())[:5]): elem_preview = str(element)[:150] print(f'\n [{index}]: {elem_preview}...') # Keep browser open for manual inspection if needed print('\n' + '=' * 80) print('Test complete. Browser will remain open for 10 seconds for inspection...') print('=' * 80) await asyncio.sleep(10) finally: # Clean up print('\nCleaning up...') await browser_session.kill() await browser_session.event_bus.stop(clear=True, timeout=5) print('Browser session closed') if __name__ == '__main__': # Run the debug test asyncio.run(debug_iframe_scrolling())