Trisha Tomy commited on
Commit ·
7af9344
1
Parent(s): e9a2867
trying fixes for loading
Browse files
src/proxy_lite/browser/browser.py
CHANGED
|
@@ -15,7 +15,7 @@ from tenacity import before_sleep_log, retry, stop_after_delay, wait_exponential
|
|
| 15 |
from proxy_lite.browser.bounding_boxes import POI, BoundingBox, Point, annotate_bounding_boxes
|
| 16 |
from proxy_lite.logger import logger
|
| 17 |
|
| 18 |
-
import base64 #
|
| 19 |
|
| 20 |
SELF_CONTAINED_TAGS = [
|
| 21 |
# many of these are non-interactive but keeping them anyway
|
|
@@ -144,7 +144,7 @@ class BrowserSession:
|
|
| 144 |
# re-run for cases of mid-run redirects
|
| 145 |
@retry(
|
| 146 |
wait=wait_exponential(multiplier=1, min=1, max=10),
|
| 147 |
-
stop=stop_after_delay(5),
|
| 148 |
reraise=True,
|
| 149 |
before_sleep=before_sleep_log(logger, logging.ERROR),
|
| 150 |
)
|
|
@@ -187,29 +187,31 @@ class BrowserSession:
|
|
| 187 |
)
|
| 188 |
async def update_poi(self) -> None:
|
| 189 |
try:
|
| 190 |
-
# Step 1: Wait for
|
| 191 |
-
logger.debug("Attempting wait_for_load_state('
|
| 192 |
-
await self.current_page.wait_for_load_state("
|
| 193 |
-
logger.debug("wait_for_load_state('
|
| 194 |
-
|
| 195 |
-
# Step 2: Wait for the
|
| 196 |
-
# This is a
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
#
|
| 203 |
-
#
|
| 204 |
-
#
|
| 205 |
-
#
|
| 206 |
-
#
|
|
|
|
|
|
|
| 207 |
|
| 208 |
except PlaywrightTimeoutError as e:
|
| 209 |
# --- START TEMPORARY DEBUGGING CODE ---
|
| 210 |
# This block captures state specifically when a Playwright timeout occurs
|
| 211 |
current_url = self.current_page.url if self.current_page else "N/A"
|
| 212 |
-
logger.error(f"DEBUGGING: Playwright Timeout (180s) during page readiness check at URL: {current_url}")
|
| 213 |
|
| 214 |
html_content = None
|
| 215 |
try:
|
|
@@ -222,18 +224,9 @@ class BrowserSession:
|
|
| 222 |
screenshot_b64 = "N/A"
|
| 223 |
try:
|
| 224 |
if self.current_page:
|
| 225 |
-
# Capture screenshot at lower quality (e.g., 50) to keep log size manageable.
|
| 226 |
-
# Higher quality might make logs too large for some platforms.
|
| 227 |
screenshot_bytes = await self.current_page.screenshot(type="jpeg", quality=50)
|
| 228 |
screenshot_b64 = base64.b64encode(screenshot_bytes).decode("utf-8")
|
| 229 |
-
# Log only a very short snippet of base64 string to confirm it's there
|
| 230 |
logger.error(f"DEBUGGING: Base64 Screenshot (truncated) when timeout occurred:\ndata:image/jpeg;base64,{screenshot_b64[:100]}... (full string is much longer)")
|
| 231 |
-
|
| 232 |
-
# If you want to view the full screenshot locally during development, you can save it:
|
| 233 |
-
# with open("debug_timeout_full_screenshot.jpeg", "wb") as f:
|
| 234 |
-
# f.write(screenshot_bytes)
|
| 235 |
-
# logger.error("DEBUGGING: Full screenshot saved to debug_timeout_full_screenshot.jpeg for local inspection.")
|
| 236 |
-
|
| 237 |
except Exception as ss_e:
|
| 238 |
logger.error(f"DEBUGGING: Could not take screenshot for debug: {ss_e}")
|
| 239 |
|
|
@@ -451,17 +444,17 @@ class BrowserSession:
|
|
| 451 |
await self.current_page.keyboard.press("Backspace")
|
| 452 |
|
| 453 |
|
| 454 |
-
if __name__ == "__main__":
|
| 455 |
|
| 456 |
-
|
| 457 |
-
|
| 458 |
-
|
| 459 |
-
|
| 460 |
-
|
| 461 |
-
|
| 462 |
-
|
| 463 |
-
|
| 464 |
-
|
| 465 |
-
|
| 466 |
|
| 467 |
-
|
|
|
|
| 15 |
from proxy_lite.browser.bounding_boxes import POI, BoundingBox, Point, annotate_bounding_boxes
|
| 16 |
from proxy_lite.logger import logger
|
| 17 |
|
| 18 |
+
import base64 # Make sure this import is present!
|
| 19 |
|
| 20 |
SELF_CONTAINED_TAGS = [
|
| 21 |
# many of these are non-interactive but keeping them anyway
|
|
|
|
| 144 |
# re-run for cases of mid-run redirects
|
| 145 |
@retry(
|
| 146 |
wait=wait_exponential(multiplier=1, min=1, max=10),
|
| 147 |
+
stop=stop_after_delay(5),
|
| 148 |
reraise=True,
|
| 149 |
before_sleep=before_sleep_log(logger, logging.ERROR),
|
| 150 |
)
|
|
|
|
| 187 |
)
|
| 188 |
async def update_poi(self) -> None:
|
| 189 |
try:
|
| 190 |
+
# Step 1: Wait for DOMContentLoaded. This ensures the basic HTML structure is parsed.
|
| 191 |
+
logger.debug("Attempting wait_for_load_state('domcontentloaded')...")
|
| 192 |
+
await self.current_page.wait_for_load_state("domcontentloaded", timeout=180000)
|
| 193 |
+
logger.debug("wait_for_load_state('domcontentloaded') completed.")
|
| 194 |
+
|
| 195 |
+
# Step 2: Wait for the specific text "Account Forecasting" to be visible on the page.
|
| 196 |
+
# This is a strong indicator that the core content for the task has loaded.
|
| 197 |
+
target_text = "Account Forecasting"
|
| 198 |
+
logger.debug(f"Attempting to wait for text: '{target_text}' to be visible...")
|
| 199 |
+
await self.current_page.wait_for_selector(f"text={target_text}", timeout=180000, state="visible")
|
| 200 |
+
logger.debug(f"Text '{target_text}' became visible.")
|
| 201 |
+
|
| 202 |
+
# Optional: You can still add a wait for network idle *after* the text is visible
|
| 203 |
+
# if the page still isn't interactive immediately, but prioritize the text.
|
| 204 |
+
# try:
|
| 205 |
+
# await self.current_page.wait_for_load_state("networkidle", timeout=60000) # Shorter timeout here
|
| 206 |
+
# logger.debug("wait_for_load_state('networkidle') completed after text appeared.")
|
| 207 |
+
# except PlaywrightTimeoutError:
|
| 208 |
+
# logger.warning("Network idle state not reached after text appeared, but proceeding.")
|
| 209 |
|
| 210 |
except PlaywrightTimeoutError as e:
|
| 211 |
# --- START TEMPORARY DEBUGGING CODE ---
|
| 212 |
# This block captures state specifically when a Playwright timeout occurs
|
| 213 |
current_url = self.current_page.url if self.current_page else "N/A"
|
| 214 |
+
logger.error(f"DEBUGGING: Playwright Timeout (180s) during page readiness check for text '{target_text}' at URL: {current_url}")
|
| 215 |
|
| 216 |
html_content = None
|
| 217 |
try:
|
|
|
|
| 224 |
screenshot_b64 = "N/A"
|
| 225 |
try:
|
| 226 |
if self.current_page:
|
|
|
|
|
|
|
| 227 |
screenshot_bytes = await self.current_page.screenshot(type="jpeg", quality=50)
|
| 228 |
screenshot_b64 = base64.b64encode(screenshot_bytes).decode("utf-8")
|
|
|
|
| 229 |
logger.error(f"DEBUGGING: Base64 Screenshot (truncated) when timeout occurred:\ndata:image/jpeg;base64,{screenshot_b64[:100]}... (full string is much longer)")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 230 |
except Exception as ss_e:
|
| 231 |
logger.error(f"DEBUGGING: Could not take screenshot for debug: {ss_e}")
|
| 232 |
|
|
|
|
| 444 |
await self.current_page.keyboard.press("Backspace")
|
| 445 |
|
| 446 |
|
| 447 |
+
if __name__ == "__main__":
|
| 448 |
|
| 449 |
+
async def dummy_test():
|
| 450 |
+
async with BrowserSession(headless=False) as s:
|
| 451 |
+
page = await s.context.new_page()
|
| 452 |
+
await page.goto("http://google.co.uk")
|
| 453 |
+
await asyncio.sleep(5)
|
| 454 |
+
await page.screenshot(path="example.png")
|
| 455 |
+
await s.update_poi()
|
| 456 |
+
_, annotated_image = await s.screenshot()
|
| 457 |
+
with open("output.png", "wb") as f:
|
| 458 |
+
f.write(annotated_image)
|
| 459 |
|
| 460 |
+
asyncio.run(dummy_test())
|