| import unittest | |
| from app.agents.browser_dom import extract_dom_snapshot, build_visual_dom_extract_script | |
| class BrowserDomTests(unittest.TestCase): | |
| def test_extract_dom_snapshot_keeps_visible_text_and_absolute_links(self): | |
| text, links = extract_dom_snapshot( | |
| """ | |
| <html> | |
| <head> | |
| <style>.hidden { display:none; }</style> | |
| <script>console.log("ignore me")</script> | |
| </head> | |
| <body> | |
| <h1>Donald Trump</h1> | |
| <p>Born June 14, 1946</p> | |
| <a href="https://en.wikipedia.org/wiki/Donald_Trump">Wikipedia</a> | |
| <a href="/relative/path">Relative</a> | |
| </body> | |
| </html> | |
| """, | |
| max_chars=200, | |
| max_links=10, | |
| ) | |
| self.assertIn("Donald Trump", text) | |
| self.assertIn("Born June 14, 1946", text) | |
| self.assertNotIn("ignore me", text) | |
| self.assertEqual(links, ["https://en.wikipedia.org/wiki/Donald_Trump"]) | |
| def test_build_visual_dom_extract_script_uses_dump_dom(self): | |
| script = build_visual_dom_extract_script("https://example.com") | |
| self.assertIn("--dump-dom", script) | |
| self.assertIn("https://example.com", script) | |
| self.assertIn("DomSnapshotParser", script) | |
| if __name__ == "__main__": | |
| unittest.main() | |