Spaces:
Running
Running
| from typing import Dict, Any, Union | |
| import os | |
| from openspace.grounding.core.session import BaseSession | |
| from openspace.grounding.core.types import BackendType, SessionStatus, SessionConfig | |
| from openspace.utils.logging import Logger | |
| from .transport.connector import GUIConnector | |
| from .transport.local_connector import LocalGUIConnector | |
| from .tool import GUIAgentTool | |
| from .config import build_llm_config | |
| logger = Logger.get_logger(__name__) | |
| class GUISession(BaseSession): | |
| """ | |
| Session for GUI desktop environment. | |
| Manages connection and tools for GUI automation. | |
| """ | |
| def __init__( | |
| self, | |
| connector: Union[GUIConnector, LocalGUIConnector], | |
| session_id: str, | |
| backend_type: BackendType.GUI, | |
| config: SessionConfig, | |
| auto_connect: bool = True, | |
| auto_initialize: bool = True, | |
| ): | |
| """ | |
| Initialize GUI session. | |
| Args: | |
| connector: GUI HTTP connector | |
| session_id: Unique session identifier | |
| backend_type: Backend type (GUI) | |
| config: Session configuration | |
| auto_connect: Auto-connect on context enter | |
| auto_initialize: Auto-initialize on context enter | |
| """ | |
| super().__init__( | |
| connector=connector, | |
| session_id=session_id, | |
| backend_type=backend_type, | |
| auto_connect=auto_connect, | |
| auto_initialize=auto_initialize, | |
| ) | |
| self.config = config | |
| self.gui_connector = connector | |
| async def initialize(self) -> Dict[str, Any]: | |
| """ | |
| Initialize session: connect and discover tools. | |
| Returns: | |
| Session information dict | |
| """ | |
| logger.info(f"Initializing GUI session: {self.session_id}") | |
| # Ensure connected | |
| if not self.connector.is_connected: | |
| await self.connect() | |
| # Create LLM client if configured | |
| llm_client = None | |
| user_llm_config = self.config.connection_params.get("llm_config") | |
| # Build complete LLM config with auto-detection | |
| # If user provides llm_config, merge with auto-detected values | |
| # If user doesn't provide llm_config, try to auto-build one if ANTHROPIC_API_KEY exists | |
| if user_llm_config or os.environ.get("ANTHROPIC_API_KEY"): | |
| llm_config = build_llm_config(user_llm_config) | |
| if llm_config.get("type") == "anthropic": | |
| # Check if API key is available | |
| if not llm_config.get("api_key"): | |
| logger.warning( | |
| "Anthropic API key not found. Skipping LLM client initialization. " | |
| "Set ANTHROPIC_API_KEY environment variable or provide api_key in llm_config." | |
| ) | |
| else: | |
| try: | |
| from .anthropic_client import AnthropicGUIClient | |
| # Detect actual screen size from screenshot (most accurate) | |
| # PyAutoGUI may report logical resolution, but we need the actual screenshot size | |
| try: | |
| screenshot_bytes = await self.gui_connector.get_screenshot() | |
| if screenshot_bytes: | |
| from PIL import Image | |
| import io | |
| img = Image.open(io.BytesIO(screenshot_bytes)) | |
| actual_screen_size = img.size | |
| logger.info(f"Auto-detected screen size from screenshot: {actual_screen_size}") | |
| screen_size = actual_screen_size | |
| else: | |
| raise RuntimeError("Could not get screenshot") | |
| except Exception as e: | |
| # Fallback to pyautogui detection | |
| actual_screen_size = await self.gui_connector.get_screen_size() | |
| if actual_screen_size: | |
| logger.info(f"Auto-detected screen size from pyautogui: {actual_screen_size}") | |
| screen_size = actual_screen_size | |
| else: | |
| # Final fallback to configured value | |
| screen_size = llm_config.get("screen_size", (1920, 1080)) | |
| logger.warning(f"Could not auto-detect screen size, using configured: {screen_size}") | |
| # Detect PyAutoGUI working size (logical pixels) | |
| pyautogui_size = await self.gui_connector.get_screen_size() | |
| if pyautogui_size: | |
| logger.info(f"PyAutoGUI working size (logical): {pyautogui_size}") | |
| else: | |
| # If we can't detect PyAutoGUI size, assume it's the same as screen size | |
| pyautogui_size = screen_size | |
| logger.warning(f"Could not detect PyAutoGUI size, assuming same as screen: {pyautogui_size}") | |
| llm_client = AnthropicGUIClient( | |
| model=llm_config["model"], | |
| platform=llm_config["platform"], | |
| api_key=llm_config["api_key"], | |
| provider=llm_config["provider"], | |
| screen_size=screen_size, | |
| pyautogui_size=pyautogui_size, | |
| max_tokens=llm_config["max_tokens"], | |
| only_n_most_recent_images=llm_config["only_n_most_recent_images"], | |
| ) | |
| logger.info( | |
| f"Initialized Anthropic LLM client - " | |
| f"Model: {llm_config['model']}, Platform: {llm_config['platform']}" | |
| ) | |
| except Exception as e: | |
| logger.warning(f"Failed to initialize Anthropic client: {e}") | |
| # Get recording_manager from connection_params if available | |
| recording_manager = self.config.connection_params.get("recording_manager") | |
| # Create GUI Agent Tool | |
| self.tools = [ | |
| GUIAgentTool( | |
| connector=self.gui_connector, | |
| llm_client=llm_client, | |
| recording_manager=recording_manager | |
| ) | |
| ] | |
| logger.info(f"Initialized GUI session with {len(self.tools)} tool(s)") | |
| # Return session info | |
| session_info = { | |
| "session_id": self.session_id, | |
| "backend_type": self.backend_type.value, | |
| "vm_ip": self.gui_connector.vm_ip, | |
| "server_port": self.gui_connector.server_port, | |
| "num_tools": len(self.tools), | |
| "tools": [tool.name for tool in self.tools], | |
| "llm_client": "anthropic" if llm_client else "none", | |
| } | |
| return session_info | |
| async def connect(self) -> None: | |
| """Connect to GUI desktop environment""" | |
| if self.connector.is_connected: | |
| return | |
| self.status = SessionStatus.CONNECTING | |
| logger.info(f"Connecting to desktop_env at {self.gui_connector.base_url}") | |
| await self.connector.connect() | |
| self.status = SessionStatus.CONNECTED | |
| logger.info("Connected to desktop environment") | |
| async def disconnect(self) -> None: | |
| """Disconnect from GUI desktop environment""" | |
| if not self.connector.is_connected: | |
| return | |
| logger.info("Disconnecting from desktop environment") | |
| await self.connector.disconnect() | |
| self.status = SessionStatus.DISCONNECTED | |
| logger.info("Disconnected from desktop environment") | |
| def is_connected(self) -> bool: | |
| """Check if session is connected""" | |
| return self.connector.is_connected |