AIstudioProxyAPI / browser_utils /debug_utils.py
peijun1's picture
Deploy AI Studio Proxy API to Hugging Face Spaces
a5784e9
Raw
History Blame Contribute Delete
28.5 kB
"""
Debug utilities for comprehensive error snapshots and logging.
This module provides enhanced debugging capabilities with:
- Date-based directory structure (errors_py/YYYY-MM-DD/)
- Multiple artifact types (screenshot, DOM dump, console logs, network state)
- Human-readable Texas timestamps
- Complete metadata capture
Purpose: Fix headless mode debugging and client disconnect issues
"""
import asyncio
import json
import logging
import os
import traceback
from asyncio import Lock, Queue
from datetime import datetime
from pathlib import Path
from typing import Any, Dict, Optional, Protocol, Tuple, Union
from playwright.async_api import Locator
from playwright.async_api import Page as AsyncPage
class SupportsSizeQuery(Protocol):
"""Protocol for queue-like objects that support qsize()."""
def qsize(self) -> int:
"""Return the approximate size of the queue."""
...
class SupportsLockQuery(Protocol):
"""Protocol for lock-like objects that support locked()."""
def locked(self) -> bool:
"""Return True if the lock is currently held."""
...
logger = logging.getLogger("AIStudioProxyServer")
def get_local_timestamp() -> Tuple[str, str]:
"""
Get current timestamp in both ISO format and human-readable local time.
Uses the system's local timezone automatically.
Returns:
Tuple[str, str]: (iso_format, human_readable_format)
Example: ("2025-12-20T00:53:35.440", "2025-12-20 00:53:35.440 JST")
"""
# Get current local time
local_now = datetime.now().astimezone()
# ISO format (without timezone suffix for directory naming)
iso_format = local_now.strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3]
# Human-readable format with timezone abbreviation
human_format = (
local_now.strftime("%Y-%m-%d %H:%M:%S.%f")[:-3] + " " + local_now.strftime("%Z")
)
return iso_format, human_format
# Keep alias for backwards compatibility
get_texas_timestamp = get_local_timestamp
async def capture_dom_structure(page: AsyncPage) -> str:
"""
Capture human-readable DOM tree structure.
Unlike raw HTML, this provides a clean, indented tree view showing:
- Element hierarchy
- IDs and classes
- Important attributes (disabled, value, etc.)
Args:
page: Playwright page instance
Returns:
str: Human-readable DOM tree structure
"""
try:
dom_tree = await page.evaluate("""() => {
function getTreeStructure(element, indent = '', depth = 0, maxDepth = 15) {
// Prevent infinite recursion
if (depth > maxDepth) {
return indent + '... (max depth reached)\\n';
}
let result = indent + element.tagName;
// Add ID
if (element.id) {
result += `#${element.id}`;
}
// Add classes
if (element.className && typeof element.className === 'string') {
const classes = element.className.trim().split(/\\s+/).filter(c => c);
if (classes.length > 0) {
result += '.' + classes.join('.');
}
}
// Add important attributes
const importantAttrs = ['aria-label', 'type', 'role', 'data-test-id'];
for (const attr of importantAttrs) {
const val = element.getAttribute(attr);
if (val) {
result += ` [${attr}="${val}"]`;
}
}
// Add state attributes
if (element.disabled !== undefined) {
result += ` [disabled=${element.disabled}]`;
}
if (element.hasAttribute('aria-disabled')) {
result += ` [aria-disabled=${element.getAttribute('aria-disabled')}]`;
}
// Add value for input elements (truncated)
if (element.value && typeof element.value === 'string') {
const truncated = element.value.substring(0, 50);
const suffix = element.value.length > 50 ? '...' : '';
result += ` value="${truncated}${suffix}"`;
}
result += '\\n';
// Recurse for children
for (let child of element.children) {
result += getTreeStructure(child, indent + ' ', depth + 1, maxDepth);
}
return result;
}
return getTreeStructure(document.body);
}""")
return dom_tree
except asyncio.CancelledError:
raise
except Exception as e:
logger.error(f"Failed to capture DOM structure: {e}")
return f"Error capturing DOM structure: {str(e)}\n"
async def capture_system_context(
req_id: str = "unknown", error_name: str = "unknown"
) -> Dict[str, Any]:
"""
Captures the current system global state for debugging context.
This function gathers critical system metrics, application state (locks, queues),
and configuration details to help diagnose issues like client disconnects
or headless mode failures.
Args:
req_id: Request ID associated with the context
error_name: Name of the error triggering the capture
Returns:
Dict containing comprehensive system context
"""
# Import server locally to avoid circular dependency
import platform
import sys
from api_utils.server_state import state
iso_time, texas_time = get_texas_timestamp()
# Helper to safely get queue size
def get_qsize(q: Optional[Union[Queue[Any], SupportsSizeQuery]]) -> int:
try:
return q.qsize() if q else -1
except (NotImplementedError, AttributeError):
return -1 # Some queue types (like multiprocessing.Queue on macOS) might not support qsize
# Helper to safely check lock state
def is_locked(lock: Optional[Union[Lock, SupportsLockQuery]]) -> bool:
try:
return lock.locked() if lock else False
except AttributeError:
return False
# Helper to sanitize proxy settings
def _sanitize_proxy(settings: Optional[Dict[str, Any]]) -> Optional[Dict[str, Any]]:
if not settings:
return None
safe_settings = settings.copy()
server_val = safe_settings.get("server")
if isinstance(server_val, str) and "@" in server_val:
# Redact credentials in http://user:pass@host format
try:
parts = server_val.split("@")
scheme_creds = parts[0].split("://")
if len(scheme_creds) == 2:
safe_settings["server"] = f"{scheme_creds[0]}://***:***@{parts[1]}"
except Exception:
safe_settings["server"] = "REDACTED"
return safe_settings
context: Dict[str, Any] = {
"meta": {
"timestamp_iso": iso_time,
"timestamp_texas": texas_time,
"req_id": req_id,
"error_name": error_name,
},
"system": {
"platform": platform.platform(),
"python_version": sys.version.split()[0],
"pid": os.getpid(),
},
"application_state": {
"flags": {
"is_playwright_ready": state.is_playwright_ready,
"is_browser_connected": state.is_browser_connected,
"is_page_ready": state.is_page_ready,
"is_initializing": state.is_initializing,
},
"queues": {
"request_queue_size": get_qsize(state.request_queue),
"stream_queue_active": state.STREAM_QUEUE is not None,
},
"locks": {
"processing_lock_locked": is_locked(state.processing_lock),
"model_switching_lock_locked": is_locked(state.model_switching_lock),
},
"active_model": {
"current_id": state.current_ai_studio_model_id,
"excluded_count": len(state.excluded_model_ids),
},
},
"browser_state": {
"connected": (
state.browser_instance.is_connected()
if state.browser_instance
else False
),
"page_available": (
not state.page_instance.is_closed() if state.page_instance else False
),
},
"configuration": {
"launch_args": {
"headless": os.environ.get("HEADLESS", "unknown"),
"debug_logs": os.environ.get("DEBUG_LOGS_ENABLED", "unknown"),
},
"proxy_settings": _sanitize_proxy(state.PLAYWRIGHT_PROXY_SETTINGS),
},
"recent_activity": {
"console_logs_count": len(state.console_logs),
"network_requests_count": len(state.network_log.get("requests", [])),
},
}
# Add snippets of recent logs (last 5)
if state.console_logs:
context["recent_activity"]["last_console_logs"] = state.console_logs[-5:]
# Filter for errors/warnings
console_errors = [
log
for log in state.console_logs
if str(log.get("type", "")).lower() in ("error", "warning")
]
if console_errors:
context["recent_activity"]["recent_console_errors"] = console_errors[-5:]
# Add failed network requests summary
if state.network_log and "responses" in state.network_log:
failed_responses = [
resp
for resp in state.network_log["responses"]
if isinstance(resp.get("status"), int) and resp.get("status") >= 400
]
if failed_responses:
context["recent_activity"]["failed_network_responses"] = failed_responses[
-5:
]
# Add current page details if available
if state.page_instance and not state.page_instance.is_closed():
try:
context["browser_state"]["current_url"] = state.page_instance.url
except Exception:
context["browser_state"]["current_url"] = "error_getting_url"
return context
async def capture_playwright_state(
page: AsyncPage, locators: Optional[Dict[str, Locator]] = None
) -> Dict[str, Any]:
"""
Capture current Playwright page and element states.
Args:
page: Playwright page instance
locators: Optional dict of named locators to inspect
Example: {"submit_button": loc, "input_field": loc}
Returns:
Dict containing page state and locator states
"""
state: Dict[str, Any] = {
"page": {
"url": page.url,
"title": "",
"viewport": page.viewport_size,
},
"locators": {},
"storage": {
"cookies_count": 0,
"localStorage_keys": [],
},
}
try:
state["page"]["title"] = await page.title()
except asyncio.CancelledError:
raise
except Exception as e:
logger.warning(f"Failed to get page title: {e}")
state["page"]["title"] = f"Error: {e}"
# Capture locator states
if locators:
for name, locator in locators.items():
loc_state: Dict[str, Any] = {
"exists": False,
"count": 0,
"visible": False,
"enabled": False,
"value": None,
}
try:
loc_state["count"] = await locator.count()
loc_state["exists"] = loc_state["count"] > 0
if loc_state["exists"]:
# Check visibility with short timeout
try:
loc_state["visible"] = await locator.is_visible(timeout=1000)
except asyncio.CancelledError:
raise
except Exception:
loc_state["visible"] = False
# Check enabled state
try:
loc_state["enabled"] = await locator.is_enabled(timeout=1000)
except asyncio.CancelledError:
raise
except Exception:
loc_state["enabled"] = False
# Try to get value (for input elements)
try:
value = await locator.input_value(timeout=1000)
if value:
# Truncate long values
loc_state["value"] = (
value[:100] + "..." if len(value) > 100 else value
)
except asyncio.CancelledError:
raise
except Exception:
pass # Not an input element or error
except asyncio.CancelledError:
raise
except Exception as e:
logger.warning(f"Failed to capture state for locator '{name}': {e}")
loc_state["error"] = str(e)
state["locators"][name] = loc_state
# Capture storage info
try:
cookies = await page.context.cookies()
state["storage"]["cookies_count"] = len(cookies)
except asyncio.CancelledError:
raise
except Exception as e:
logger.warning(f"Failed to get cookies: {e}")
try:
localStorage_keys = await page.evaluate("() => Object.keys(localStorage)")
state["storage"]["localStorage_keys"] = localStorage_keys
except asyncio.CancelledError:
raise
except Exception as e:
logger.warning(f"Failed to get localStorage keys: {e}")
return state
async def save_comprehensive_snapshot(
page: AsyncPage,
error_name: str,
req_id: str,
error_stage: str = "",
additional_context: Optional[Dict[str, Any]] = None,
locators: Optional[Dict[str, Locator]] = None,
error_exception: Optional[Exception] = None,
) -> str:
"""
Save comprehensive error snapshot with all debugging artifacts.
Directory structure:
errors_py/YYYY-MM-DD/HH-MM-SS_reqid_errorname/
├── screenshot.png
├── dom_dump.html
├── dom_structure.txt
├── console_logs.txt
├── network_requests.json
├── playwright_state.json
└── metadata.json
Args:
page: Playwright page instance
error_name: Base error name (e.g., "stream_post_button_check_disconnect")
req_id: Request ID
error_stage: Description of error stage (e.g., "Post-stream response button state check")
additional_context: Extra context to include in metadata
locators: Dict of named locators to capture states for
error_exception: Exception object (if available)
Returns:
str: Path to snapshot directory
"""
# Set request context for grid logging
from logging_utils import set_request_id
set_request_id(req_id if req_id else "DEBUG")
# Check page availability
if not page or page.is_closed():
logger.warning(f"Cannot save snapshot ({error_name}), page is unavailable.")
return ""
logger.info(f"Saving comprehensive error snapshot ({error_name})...")
# Get timestamps
iso_timestamp, human_timestamp = get_texas_timestamp()
time_component = iso_timestamp.split("T")[1].replace(":", "-").replace(".", "-")
# Create date-based directory structure
date_str = iso_timestamp.split("T")[0] # YYYY-MM-DD
base_error_dir = Path(__file__).parent.parent / "errors_py"
date_dir = base_error_dir / date_str
snapshot_dir_name = f"{time_component}_{req_id}_{error_name}"
snapshot_dir = date_dir / snapshot_dir_name
try:
# Create directory structure
snapshot_dir.mkdir(parents=True, exist_ok=True)
logger.info(f"Created snapshot directory: {snapshot_dir}")
# === 1. Screenshot ===
screenshot_path = snapshot_dir / "screenshot.png"
try:
await page.screenshot(
path=str(screenshot_path), full_page=True, timeout=15000
)
logger.info("Screenshot saved")
except asyncio.CancelledError:
raise
except Exception as ss_err:
logger.error(f"Screenshot failed: {ss_err}")
# === 2. HTML Dump ===
html_path = snapshot_dir / "dom_dump.html"
try:
content = await page.content()
with open(html_path, "w", encoding="utf-8") as f:
f.write(content)
logger.info("HTML dump saved")
except asyncio.CancelledError:
raise
except Exception as html_err:
logger.error(f"HTML dump failed: {html_err}")
# === 3. DOM Structure (Human-Readable) ===
dom_structure_path = snapshot_dir / "dom_structure.txt"
try:
dom_tree = await capture_dom_structure(page)
with open(dom_structure_path, "w", encoding="utf-8") as f:
f.write(dom_tree)
logger.info("DOM structure saved")
except asyncio.CancelledError:
raise
except Exception as dom_err:
logger.error(f"DOM structure failed: {dom_err}")
# === 4. Console Logs ===
console_logs_path = snapshot_dir / "console_logs.txt"
try:
# Get console logs from global state
from api_utils.server_state import state
console_logs = state.console_logs
if console_logs:
with open(console_logs_path, "w", encoding="utf-8") as f:
f.write("=== Browser Console Logs ===\n\n")
for log_entry in console_logs:
timestamp = log_entry.get("timestamp", "N/A")
log_type = log_entry.get("type", "log")
text = log_entry.get("text", "")
location = log_entry.get("location", "")
f.write(f"[{timestamp}] [{log_type.upper()}] {text}\n")
if location:
f.write(f" Location: {location}\n")
f.write("\n")
logger.info(f"Console logs saved ({len(console_logs)} entries)")
else:
with open(console_logs_path, "w", encoding="utf-8") as f:
f.write("No console logs captured.\n")
logger.info("No console logs available")
except Exception as console_err:
logger.error(f"Console logs failed: {console_err}")
# === 5. Network Requests ===
network_path = snapshot_dir / "network_requests.json"
try:
from api_utils.server_state import state
network_log = state.network_log
with open(network_path, "w", encoding="utf-8") as f:
json.dump(network_log, f, indent=2, ensure_ascii=False)
req_count = len(network_log.get("requests", []))
resp_count = len(network_log.get("responses", []))
logger.info(f"Network log saved ({req_count} reqs, {resp_count} resps)")
except Exception as net_err:
logger.error(f"Network log failed: {net_err}")
# === 6. Playwright State ===
playwright_state_path = snapshot_dir / "playwright_state.json"
try:
pw_state = await capture_playwright_state(page, locators)
with open(playwright_state_path, "w", encoding="utf-8") as f:
json.dump(pw_state, f, indent=2, ensure_ascii=False)
logger.info("Playwright state saved")
except asyncio.CancelledError:
raise
except Exception as pw_err:
logger.error(f"Playwright state failed: {pw_err}")
# === 7. System Context (LLM Context) ===
context_path = snapshot_dir / "llm.json"
try:
system_context = await capture_system_context(req_id, error_name)
with open(context_path, "w", encoding="utf-8") as f:
json.dump(system_context, f, indent=2, ensure_ascii=False)
logger.info("LLM context saved")
except asyncio.CancelledError:
raise
except Exception as ctx_err:
logger.error(f"LLM context failed: {ctx_err}")
# === 8. Metadata ===
metadata_path = snapshot_dir / "metadata.json"
try:
# Build metadata
metadata = {
"req_id": req_id,
"error_name": error_name,
"error_stage": error_stage,
"timestamp": {
"iso": iso_timestamp,
"human": human_timestamp,
},
"headless_mode": os.environ.get("HEADLESS", "true").lower() == "true",
"launch_mode": os.environ.get("LAUNCH_MODE", "unknown"),
"environment": {
"RESPONSE_COMPLETION_TIMEOUT": os.environ.get(
"RESPONSE_COMPLETION_TIMEOUT", "300000"
),
"DEBUG_LOGS_ENABLED": os.environ.get(
"DEBUG_LOGS_ENABLED", "false"
).lower()
== "true",
"DEFAULT_MODEL": os.environ.get("DEFAULT_MODEL", "unknown"),
},
}
# Add exception info if available
if error_exception:
metadata["exception"] = {
"type": type(error_exception).__name__,
"message": str(error_exception),
"traceback": traceback.format_exc(),
}
# Add additional context
if additional_context:
metadata["additional_context"] = additional_context
with open(metadata_path, "w", encoding="utf-8") as f:
json.dump(metadata, f, indent=2, ensure_ascii=False)
logger.info("Metadata saved")
except Exception as meta_err:
logger.error(f"Metadata failed: {meta_err}")
# === 9. Human-Readable SUMMARY.txt ===
summary_path = snapshot_dir / "SUMMARY.txt"
try:
summary_lines = [
"=" * 70,
"ERROR SNAPSHOT SUMMARY",
"=" * 70,
"",
f"Timestamp: {human_timestamp}",
f"Request ID: {req_id}",
f"Error Name: {error_name}",
f"Error Stage: {error_stage or 'N/A'}",
"Snapshot Type: COMPREHENSIVE",
"",
]
# Exception info
if error_exception:
summary_lines.extend(
[
"-" * 70,
"EXCEPTION",
"-" * 70,
f"Type: {type(error_exception).__name__}",
f"Message: {error_exception}",
"",
]
)
# Application state summary
summary_lines.extend(
[
"-" * 70,
"QUICK REFERENCE",
"-" * 70,
f"Headless Mode: {os.environ.get('HEADLESS', 'true')}",
f"Default Model: {os.environ.get('DEFAULT_MODEL', 'unknown')}",
"",
"-" * 70,
"FILES IN THIS SNAPSHOT",
"-" * 70,
" SUMMARY.txt - This file (start here!)",
" screenshot.png - Full page screenshot",
" dom_dump.html - Complete HTML source",
" dom_structure.txt - Human-readable DOM tree",
" console_logs.txt - Browser console output",
" network_requests.json - Network activity",
" playwright_state.json - Page/locator states",
" llm.json - System context for AI analysis",
" metadata.json - Full error details",
"",
"-" * 70,
"DEBUGGING TIPS",
"-" * 70,
"1. Check screenshot.png for visual state",
"2. Search dom_structure.txt for element issues",
"3. Review console_logs.txt for JS errors",
"4. Check network_requests.json for API failures",
"",
"=" * 70,
]
)
with open(summary_path, "w", encoding="utf-8") as f:
f.write("\n".join(summary_lines))
logger.info("SUMMARY.txt saved")
except Exception as summary_err:
logger.error(f"SUMMARY.txt failed: {summary_err}")
logger.info(f"Comprehensive snapshot complete: {snapshot_dir.name}")
return str(snapshot_dir)
except asyncio.CancelledError:
raise
except Exception as e:
logger.error(f"Failed to create snapshot directory: {e}", exc_info=True)
return ""
async def save_error_snapshot_enhanced(
error_name: str = "error",
error_exception: Optional[Exception] = None,
error_stage: str = "",
additional_context: Optional[Dict[str, Any]] = None,
locators: Optional[Dict[str, Locator]] = None,
) -> None:
"""
Enhanced error snapshot function supporting rich context capture.
This function bridges the simple interface with the comprehensive snapshot system,
allowing callers to pass exception objects and locator states for debugging.
Args:
error_name: Error name with optional req_id suffix (e.g., "clear_chat_error_hbfu521")
error_exception: The exception that triggered the snapshot (optional)
error_stage: Description of the error stage (optional)
additional_context: Extra context dict to include in metadata (optional)
locators: Dict of named locators to capture states for (optional)
"""
from api_utils.server_state import state
# Parse req_id from error_name if present (format: "error_name_req_id")
name_parts = error_name.split("_")
req_id = (
name_parts[-1]
if len(name_parts) > 1 and len(name_parts[-1]) == 7
else "unknown"
)
base_error_name = error_name if req_id == "unknown" else "_".join(name_parts[:-1])
page_to_snapshot = state.page_instance
if (
not hasattr(state, "browser_instance")
or not state.browser_instance
or not state.browser_instance.is_connected()
or not page_to_snapshot
or page_to_snapshot.is_closed()
):
logger.warning(
f"[{req_id}] Browser/page unavailable ({base_error_name}), saving minimal snapshot..."
)
# Fallback to minimal snapshot
from browser_utils.operations_modules.errors import save_minimal_snapshot
await save_minimal_snapshot(
error_name=base_error_name,
req_id=req_id,
error_exception=error_exception,
additional_context=additional_context,
)
return
# Merge additional context with exception info
merged_context = additional_context.copy() if additional_context else {}
# Add exception details to context if provided
if error_exception:
merged_context["exception_type"] = type(error_exception).__name__
merged_context["exception_message"] = str(error_exception)
# Include args if available (some exceptions store useful info here)
if hasattr(error_exception, "args") and error_exception.args:
merged_context["exception_args"] = [
str(a) for a in error_exception.args[:3]
]
# Call comprehensive snapshot with all context
await save_comprehensive_snapshot(
page=page_to_snapshot,
error_name=base_error_name,
req_id=req_id,
error_stage=error_stage or "Enhanced snapshot call",
additional_context=merged_context if merged_context else None,
locators=locators,
error_exception=error_exception,
)