Spaces:

peijun1
/

AIstudioProxyAPI

Paused

App Files Files Community

AIstudioProxyAPI / browser_utils /debug_utils.py

peijun1

Deploy AI Studio Proxy API to Hugging Face Spaces

a5784e9 18 days ago

Raw

History Blame Contribute Delete

28.5 kB

	"""
	Debug utilities for comprehensive error snapshots and logging.

	This module provides enhanced debugging capabilities with:
	- Date-based directory structure (errors_py/YYYY-MM-DD/)
	- Multiple artifact types (screenshot, DOM dump, console logs, network state)
	- Human-readable Texas timestamps
	- Complete metadata capture

	Purpose: Fix headless mode debugging and client disconnect issues
	"""

	import asyncio
	import json
	import logging
	import os
	import traceback
	from asyncio import Lock, Queue
	from datetime import datetime
	from pathlib import Path
	from typing import Any, Dict, Optional, Protocol, Tuple, Union

	from playwright.async_api import Locator
	from playwright.async_api import Page as AsyncPage


	class SupportsSizeQuery(Protocol):
	"""Protocol for queue-like objects that support qsize()."""

	def qsize(self) -> int:
	"""Return the approximate size of the queue."""
	...


	class SupportsLockQuery(Protocol):
	"""Protocol for lock-like objects that support locked()."""

	def locked(self) -> bool:
	"""Return True if the lock is currently held."""
	...


	logger = logging.getLogger("AIStudioProxyServer")


	def get_local_timestamp() -> Tuple[str, str]:
	"""
	Get current timestamp in both ISO format and human-readable local time.

	Uses the system's local timezone automatically.

	Returns:
	Tuple[str, str]: (iso_format, human_readable_format)
	Example: ("2025-12-20T00:53:35.440", "2025-12-20 00:53:35.440 JST")
	"""
	# Get current local time
	local_now = datetime.now().astimezone()

	# ISO format (without timezone suffix for directory naming)
	iso_format = local_now.strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3]

	# Human-readable format with timezone abbreviation
	human_format = (
	local_now.strftime("%Y-%m-%d %H:%M:%S.%f")[:-3] + " " + local_now.strftime("%Z")
	)

	return iso_format, human_format


	# Keep alias for backwards compatibility
	get_texas_timestamp = get_local_timestamp


	async def capture_dom_structure(page: AsyncPage) -> str:
	"""
	Capture human-readable DOM tree structure.

	Unlike raw HTML, this provides a clean, indented tree view showing:
	- Element hierarchy
	- IDs and classes
	- Important attributes (disabled, value, etc.)

	Args:
	page: Playwright page instance

	Returns:
	str: Human-readable DOM tree structure
	"""
	try:
	dom_tree = await page.evaluate("""() => {
	function getTreeStructure(element, indent = '', depth = 0, maxDepth = 15) {
	// Prevent infinite recursion
	if (depth > maxDepth) {
	return indent + '... (max depth reached)\\n';
	}

	let result = indent + element.tagName;

	// Add ID
	if (element.id) {
	result += `#${element.id}`;
	}

	// Add classes
	if (element.className && typeof element.className === 'string') {
	const classes = element.className.trim().split(/\\s+/).filter(c => c);
	if (classes.length > 0) {
	result += '.' + classes.join('.');
	}
	}

	// Add important attributes
	const importantAttrs = ['aria-label', 'type', 'role', 'data-test-id'];
	for (const attr of importantAttrs) {
	const val = element.getAttribute(attr);
	if (val) {
	result += ` [${attr}="${val}"]`;
	}
	}

	// Add state attributes
	if (element.disabled !== undefined) {
	result += ` [disabled=${element.disabled}]`;
	}
	if (element.hasAttribute('aria-disabled')) {
	result += ` [aria-disabled=${element.getAttribute('aria-disabled')}]`;
	}

	// Add value for input elements (truncated)
	if (element.value && typeof element.value === 'string') {
	const truncated = element.value.substring(0, 50);
	const suffix = element.value.length > 50 ? '...' : '';
	result += ` value="${truncated}${suffix}"`;
	}

	result += '\\n';

	// Recurse for children
	for (let child of element.children) {
	result += getTreeStructure(child, indent + ' ', depth + 1, maxDepth);
	}

	return result;
	}

	return getTreeStructure(document.body);
	}""")

	return dom_tree
	except asyncio.CancelledError:
	raise
	except Exception as e:
	logger.error(f"Failed to capture DOM structure: {e}")
	return f"Error capturing DOM structure: {str(e)}\n"


	async def capture_system_context(
	req_id: str = "unknown", error_name: str = "unknown"
	) -> Dict[str, Any]:
	"""
	Captures the current system global state for debugging context.

	This function gathers critical system metrics, application state (locks, queues),
	and configuration details to help diagnose issues like client disconnects
	or headless mode failures.

	Args:
	req_id: Request ID associated with the context
	error_name: Name of the error triggering the capture

	Returns:
	Dict containing comprehensive system context
	"""
	# Import server locally to avoid circular dependency
	import platform
	import sys

	from api_utils.server_state import state

	iso_time, texas_time = get_texas_timestamp()

	# Helper to safely get queue size
	def get_qsize(q: Optional[Union[Queue[Any], SupportsSizeQuery]]) -> int:
	try:
	return q.qsize() if q else -1
	except (NotImplementedError, AttributeError):
	return -1 # Some queue types (like multiprocessing.Queue on macOS) might not support qsize

	# Helper to safely check lock state
	def is_locked(lock: Optional[Union[Lock, SupportsLockQuery]]) -> bool:
	try:
	return lock.locked() if lock else False
	except AttributeError:
	return False

	# Helper to sanitize proxy settings
	def _sanitize_proxy(settings: Optional[Dict[str, Any]]) -> Optional[Dict[str, Any]]:
	if not settings:
	return None
	safe_settings = settings.copy()
	server_val = safe_settings.get("server")
	if isinstance(server_val, str) and "@" in server_val:
	# Redact credentials in http://user:pass@host format
	try:
	parts = server_val.split("@")
	scheme_creds = parts[0].split("://")
	if len(scheme_creds) == 2:
	safe_settings["server"] = f"{scheme_creds[0]}://*:*@{parts[1]}"
	except Exception:
	safe_settings["server"] = "REDACTED"
	return safe_settings

	context: Dict[str, Any] = {
	"meta": {
	"timestamp_iso": iso_time,
	"timestamp_texas": texas_time,
	"req_id": req_id,
	"error_name": error_name,
	},
	"system": {
	"platform": platform.platform(),
	"python_version": sys.version.split()[0],
	"pid": os.getpid(),
	},
	"application_state": {
	"flags": {
	"is_playwright_ready": state.is_playwright_ready,
	"is_browser_connected": state.is_browser_connected,
	"is_page_ready": state.is_page_ready,
	"is_initializing": state.is_initializing,
	},
	"queues": {
	"request_queue_size": get_qsize(state.request_queue),
	"stream_queue_active": state.STREAM_QUEUE is not None,
	},
	"locks": {
	"processing_lock_locked": is_locked(state.processing_lock),
	"model_switching_lock_locked": is_locked(state.model_switching_lock),
	},
	"active_model": {
	"current_id": state.current_ai_studio_model_id,
	"excluded_count": len(state.excluded_model_ids),
	},
	},
	"browser_state": {
	"connected": (
	state.browser_instance.is_connected()
	if state.browser_instance
	else False
	),
	"page_available": (
	not state.page_instance.is_closed() if state.page_instance else False
	),
	},
	"configuration": {
	"launch_args": {
	"headless": os.environ.get("HEADLESS", "unknown"),
	"debug_logs": os.environ.get("DEBUG_LOGS_ENABLED", "unknown"),
	},
	"proxy_settings": _sanitize_proxy(state.PLAYWRIGHT_PROXY_SETTINGS),
	},
	"recent_activity": {
	"console_logs_count": len(state.console_logs),
	"network_requests_count": len(state.network_log.get("requests", [])),
	},
	}

	# Add snippets of recent logs (last 5)
	if state.console_logs:
	context["recent_activity"]["last_console_logs"] = state.console_logs[-5:]

	# Filter for errors/warnings
	console_errors = [
	log
	for log in state.console_logs
	if str(log.get("type", "")).lower() in ("error", "warning")
	]
	if console_errors:
	context["recent_activity"]["recent_console_errors"] = console_errors[-5:]

	# Add failed network requests summary
	if state.network_log and "responses" in state.network_log:
	failed_responses = [
	resp
	for resp in state.network_log["responses"]
	if isinstance(resp.get("status"), int) and resp.get("status") >= 400
	]
	if failed_responses:
	context["recent_activity"]["failed_network_responses"] = failed_responses[
	-5:
	]

	# Add current page details if available
	if state.page_instance and not state.page_instance.is_closed():
	try:
	context["browser_state"]["current_url"] = state.page_instance.url
	except Exception:
	context["browser_state"]["current_url"] = "error_getting_url"

	return context


	async def capture_playwright_state(
	page: AsyncPage, locators: Optional[Dict[str, Locator]] = None
	) -> Dict[str, Any]:
	"""
	Capture current Playwright page and element states.

	Args:
	page: Playwright page instance
	locators: Optional dict of named locators to inspect
	Example: {"submit_button": loc, "input_field": loc}

	Returns:
	Dict containing page state and locator states
	"""
	state: Dict[str, Any] = {
	"page": {
	"url": page.url,
	"title": "",
	"viewport": page.viewport_size,
	},
	"locators": {},
	"storage": {
	"cookies_count": 0,
	"localStorage_keys": [],
	},
	}

	try:
	state["page"]["title"] = await page.title()
	except asyncio.CancelledError:
	raise
	except Exception as e:
	logger.warning(f"Failed to get page title: {e}")
	state["page"]["title"] = f"Error: {e}"

	# Capture locator states
	if locators:
	for name, locator in locators.items():
	loc_state: Dict[str, Any] = {
	"exists": False,
	"count": 0,
	"visible": False,
	"enabled": False,
	"value": None,
	}

	try:
	loc_state["count"] = await locator.count()
	loc_state["exists"] = loc_state["count"] > 0

	if loc_state["exists"]:
	# Check visibility with short timeout
	try:
	loc_state["visible"] = await locator.is_visible(timeout=1000)
	except asyncio.CancelledError:
	raise
	except Exception:
	loc_state["visible"] = False

	# Check enabled state
	try:
	loc_state["enabled"] = await locator.is_enabled(timeout=1000)
	except asyncio.CancelledError:
	raise
	except Exception:
	loc_state["enabled"] = False

	# Try to get value (for input elements)
	try:
	value = await locator.input_value(timeout=1000)
	if value:
	# Truncate long values
	loc_state["value"] = (
	value[:100] + "..." if len(value) > 100 else value
	)
	except asyncio.CancelledError:
	raise
	except Exception:
	pass # Not an input element or error

	except asyncio.CancelledError:
	raise
	except Exception as e:
	logger.warning(f"Failed to capture state for locator '{name}': {e}")
	loc_state["error"] = str(e)

	state["locators"][name] = loc_state

	# Capture storage info
	try:
	cookies = await page.context.cookies()
	state["storage"]["cookies_count"] = len(cookies)
	except asyncio.CancelledError:
	raise
	except Exception as e:
	logger.warning(f"Failed to get cookies: {e}")

	try:
	localStorage_keys = await page.evaluate("() => Object.keys(localStorage)")
	state["storage"]["localStorage_keys"] = localStorage_keys
	except asyncio.CancelledError:
	raise
	except Exception as e:
	logger.warning(f"Failed to get localStorage keys: {e}")

	return state


	async def save_comprehensive_snapshot(
	page: AsyncPage,
	error_name: str,
	req_id: str,
	error_stage: str = "",
	additional_context: Optional[Dict[str, Any]] = None,
	locators: Optional[Dict[str, Locator]] = None,
	error_exception: Optional[Exception] = None,
	) -> str:
	"""
	Save comprehensive error snapshot with all debugging artifacts.

	Directory structure:
	errors_py/YYYY-MM-DD/HH-MM-SS_reqid_errorname/
	├── screenshot.png
	├── dom_dump.html
	├── dom_structure.txt
	├── console_logs.txt
	├── network_requests.json
	├── playwright_state.json
	└── metadata.json

	Args:
	page: Playwright page instance
	error_name: Base error name (e.g., "stream_post_button_check_disconnect")
	req_id: Request ID
	error_stage: Description of error stage (e.g., "Post-stream response button state check")
	additional_context: Extra context to include in metadata
	locators: Dict of named locators to capture states for
	error_exception: Exception object (if available)

	Returns:
	str: Path to snapshot directory
	"""
	# Set request context for grid logging
	from logging_utils import set_request_id

	set_request_id(req_id if req_id else "DEBUG")

	# Check page availability
	if not page or page.is_closed():
	logger.warning(f"Cannot save snapshot ({error_name}), page is unavailable.")
	return ""

	logger.info(f"Saving comprehensive error snapshot ({error_name})...")

	# Get timestamps
	iso_timestamp, human_timestamp = get_texas_timestamp()
	time_component = iso_timestamp.split("T")[1].replace(":", "-").replace(".", "-")

	# Create date-based directory structure
	date_str = iso_timestamp.split("T")[0] # YYYY-MM-DD
	base_error_dir = Path(__file__).parent.parent / "errors_py"
	date_dir = base_error_dir / date_str
	snapshot_dir_name = f"{time_component}_{req_id}_{error_name}"
	snapshot_dir = date_dir / snapshot_dir_name

	try:
	# Create directory structure
	snapshot_dir.mkdir(parents=True, exist_ok=True)
	logger.info(f"Created snapshot directory: {snapshot_dir}")

	# === 1. Screenshot ===
	screenshot_path = snapshot_dir / "screenshot.png"
	try:
	await page.screenshot(
	path=str(screenshot_path), full_page=True, timeout=15000
	)
	logger.info("Screenshot saved")
	except asyncio.CancelledError:
	raise
	except Exception as ss_err:
	logger.error(f"Screenshot failed: {ss_err}")

	# === 2. HTML Dump ===
	html_path = snapshot_dir / "dom_dump.html"
	try:
	content = await page.content()
	with open(html_path, "w", encoding="utf-8") as f:
	f.write(content)
	logger.info("HTML dump saved")
	except asyncio.CancelledError:
	raise
	except Exception as html_err:
	logger.error(f"HTML dump failed: {html_err}")

	# === 3. DOM Structure (Human-Readable) ===
	dom_structure_path = snapshot_dir / "dom_structure.txt"
	try:
	dom_tree = await capture_dom_structure(page)
	with open(dom_structure_path, "w", encoding="utf-8") as f:
	f.write(dom_tree)
	logger.info("DOM structure saved")
	except asyncio.CancelledError:
	raise
	except Exception as dom_err:
	logger.error(f"DOM structure failed: {dom_err}")

	# === 4. Console Logs ===
	console_logs_path = snapshot_dir / "console_logs.txt"
	try:
	# Get console logs from global state
	from api_utils.server_state import state

	console_logs = state.console_logs

	if console_logs:
	with open(console_logs_path, "w", encoding="utf-8") as f:
	f.write("=== Browser Console Logs ===\n\n")
	for log_entry in console_logs:
	timestamp = log_entry.get("timestamp", "N/A")
	log_type = log_entry.get("type", "log")
	text = log_entry.get("text", "")
	location = log_entry.get("location", "")

	f.write(f"[{timestamp}] [{log_type.upper()}] {text}\n")
	if location:
	f.write(f" Location: {location}\n")
	f.write("\n")

	logger.info(f"Console logs saved ({len(console_logs)} entries)")
	else:
	with open(console_logs_path, "w", encoding="utf-8") as f:
	f.write("No console logs captured.\n")
	logger.info("No console logs available")
	except Exception as console_err:
	logger.error(f"Console logs failed: {console_err}")

	# === 5. Network Requests ===
	network_path = snapshot_dir / "network_requests.json"
	try:
	from api_utils.server_state import state

	network_log = state.network_log

	with open(network_path, "w", encoding="utf-8") as f:
	json.dump(network_log, f, indent=2, ensure_ascii=False)

	req_count = len(network_log.get("requests", []))
	resp_count = len(network_log.get("responses", []))
	logger.info(f"Network log saved ({req_count} reqs, {resp_count} resps)")
	except Exception as net_err:
	logger.error(f"Network log failed: {net_err}")

	# === 6. Playwright State ===
	playwright_state_path = snapshot_dir / "playwright_state.json"
	try:
	pw_state = await capture_playwright_state(page, locators)
	with open(playwright_state_path, "w", encoding="utf-8") as f:
	json.dump(pw_state, f, indent=2, ensure_ascii=False)
	logger.info("Playwright state saved")
	except asyncio.CancelledError:
	raise
	except Exception as pw_err:
	logger.error(f"Playwright state failed: {pw_err}")

	# === 7. System Context (LLM Context) ===
	context_path = snapshot_dir / "llm.json"
	try:
	system_context = await capture_system_context(req_id, error_name)
	with open(context_path, "w", encoding="utf-8") as f:
	json.dump(system_context, f, indent=2, ensure_ascii=False)
	logger.info("LLM context saved")
	except asyncio.CancelledError:
	raise
	except Exception as ctx_err:
	logger.error(f"LLM context failed: {ctx_err}")

	# === 8. Metadata ===
	metadata_path = snapshot_dir / "metadata.json"
	try:
	# Build metadata
	metadata = {
	"req_id": req_id,
	"error_name": error_name,
	"error_stage": error_stage,
	"timestamp": {
	"iso": iso_timestamp,
	"human": human_timestamp,
	},
	"headless_mode": os.environ.get("HEADLESS", "true").lower() == "true",
	"launch_mode": os.environ.get("LAUNCH_MODE", "unknown"),
	"environment": {
	"RESPONSE_COMPLETION_TIMEOUT": os.environ.get(
	"RESPONSE_COMPLETION_TIMEOUT", "300000"
	),
	"DEBUG_LOGS_ENABLED": os.environ.get(
	"DEBUG_LOGS_ENABLED", "false"
	).lower()
	== "true",
	"DEFAULT_MODEL": os.environ.get("DEFAULT_MODEL", "unknown"),
	},
	}

	# Add exception info if available
	if error_exception:
	metadata["exception"] = {
	"type": type(error_exception).__name__,
	"message": str(error_exception),
	"traceback": traceback.format_exc(),
	}

	# Add additional context
	if additional_context:
	metadata["additional_context"] = additional_context

	with open(metadata_path, "w", encoding="utf-8") as f:
	json.dump(metadata, f, indent=2, ensure_ascii=False)
	logger.info("Metadata saved")
	except Exception as meta_err:
	logger.error(f"Metadata failed: {meta_err}")

	# === 9. Human-Readable SUMMARY.txt ===
	summary_path = snapshot_dir / "SUMMARY.txt"
	try:
	summary_lines = [
	"=" * 70,
	"ERROR SNAPSHOT SUMMARY",
	"=" * 70,
	"",
	f"Timestamp: {human_timestamp}",
	f"Request ID: {req_id}",
	f"Error Name: {error_name}",
	f"Error Stage: {error_stage or 'N/A'}",
	"Snapshot Type: COMPREHENSIVE",
	"",
	]

	# Exception info
	if error_exception:
	summary_lines.extend(
	[
	"-" * 70,
	"EXCEPTION",
	"-" * 70,
	f"Type: {type(error_exception).__name__}",
	f"Message: {error_exception}",
	"",
	]
	)

	# Application state summary
	summary_lines.extend(
	[
	"-" * 70,
	"QUICK REFERENCE",
	"-" * 70,
	f"Headless Mode: {os.environ.get('HEADLESS', 'true')}",
	f"Default Model: {os.environ.get('DEFAULT_MODEL', 'unknown')}",
	"",
	"-" * 70,
	"FILES IN THIS SNAPSHOT",
	"-" * 70,
	" SUMMARY.txt - This file (start here!)",
	" screenshot.png - Full page screenshot",
	" dom_dump.html - Complete HTML source",
	" dom_structure.txt - Human-readable DOM tree",
	" console_logs.txt - Browser console output",
	" network_requests.json - Network activity",
	" playwright_state.json - Page/locator states",
	" llm.json - System context for AI analysis",
	" metadata.json - Full error details",
	"",
	"-" * 70,
	"DEBUGGING TIPS",
	"-" * 70,
	"1. Check screenshot.png for visual state",
	"2. Search dom_structure.txt for element issues",
	"3. Review console_logs.txt for JS errors",
	"4. Check network_requests.json for API failures",
	"",
	"=" * 70,
	]
	)

	with open(summary_path, "w", encoding="utf-8") as f:
	f.write("\n".join(summary_lines))
	logger.info("SUMMARY.txt saved")
	except Exception as summary_err:
	logger.error(f"SUMMARY.txt failed: {summary_err}")

	logger.info(f"Comprehensive snapshot complete: {snapshot_dir.name}")
	return str(snapshot_dir)

	except asyncio.CancelledError:
	raise
	except Exception as e:
	logger.error(f"Failed to create snapshot directory: {e}", exc_info=True)
	return ""


	async def save_error_snapshot_enhanced(
	error_name: str = "error",
	error_exception: Optional[Exception] = None,
	error_stage: str = "",
	additional_context: Optional[Dict[str, Any]] = None,
	locators: Optional[Dict[str, Locator]] = None,
	) -> None:
	"""
	Enhanced error snapshot function supporting rich context capture.

	This function bridges the simple interface with the comprehensive snapshot system,
	allowing callers to pass exception objects and locator states for debugging.

	Args:
	error_name: Error name with optional req_id suffix (e.g., "clear_chat_error_hbfu521")
	error_exception: The exception that triggered the snapshot (optional)
	error_stage: Description of the error stage (optional)
	additional_context: Extra context dict to include in metadata (optional)
	locators: Dict of named locators to capture states for (optional)
	"""
	from api_utils.server_state import state

	# Parse req_id from error_name if present (format: "error_name_req_id")
	name_parts = error_name.split("_")
	req_id = (
	name_parts[-1]
	if len(name_parts) > 1 and len(name_parts[-1]) == 7
	else "unknown"
	)
	base_error_name = error_name if req_id == "unknown" else "_".join(name_parts[:-1])

	page_to_snapshot = state.page_instance

	if (
	not hasattr(state, "browser_instance")
	or not state.browser_instance
	or not state.browser_instance.is_connected()
	or not page_to_snapshot
	or page_to_snapshot.is_closed()
	):
	logger.warning(
	f"[{req_id}] Browser/page unavailable ({base_error_name}), saving minimal snapshot..."
	)
	# Fallback to minimal snapshot
	from browser_utils.operations_modules.errors import save_minimal_snapshot

	await save_minimal_snapshot(
	error_name=base_error_name,
	req_id=req_id,
	error_exception=error_exception,
	additional_context=additional_context,
	)
	return

	# Merge additional context with exception info
	merged_context = additional_context.copy() if additional_context else {}

	# Add exception details to context if provided
	if error_exception:
	merged_context["exception_type"] = type(error_exception).__name__
	merged_context["exception_message"] = str(error_exception)
	# Include args if available (some exceptions store useful info here)
	if hasattr(error_exception, "args") and error_exception.args:
	merged_context["exception_args"] = [
	str(a) for a in error_exception.args[:3]
	]

	# Call comprehensive snapshot with all context
	await save_comprehensive_snapshot(
	page=page_to_snapshot,
	error_name=base_error_name,
	req_id=req_id,
	error_stage=error_stage or "Enhanced snapshot call",
	additional_context=merged_context if merged_context else None,
	locators=locators,
	error_exception=error_exception,
	)