Spaces:
Build error
Build error
Enhancement: Add VNC desktop environment integration
Browse files- computer_agent.py +109 -226
computer_agent.py
CHANGED
|
@@ -1,3 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import asyncio
|
| 2 |
import json
|
| 3 |
import base64
|
|
@@ -18,20 +24,6 @@ from playwright.async_api import async_playwright, Browser, BrowserContext, Page
|
|
| 18 |
import requests
|
| 19 |
from huggingface_hub import hf_hub_download, login
|
| 20 |
|
| 21 |
-
# Optional imports for GUI automation
|
| 22 |
-
PYAUTOGUI_AVAILABLE = False
|
| 23 |
-
try:
|
| 24 |
-
# Set DISPLAY before importing pyautogui
|
| 25 |
-
if 'DISPLAY' not in os.environ:
|
| 26 |
-
os.environ['DISPLAY'] = ':99'
|
| 27 |
-
import pyautogui
|
| 28 |
-
PYAUTOGUI_AVAILABLE = True
|
| 29 |
-
except ImportError:
|
| 30 |
-
print("Warning: pyautogui not available, GUI automation disabled")
|
| 31 |
-
except Exception as e:
|
| 32 |
-
print(f"Warning: pyautogui import failed: {e}, GUI automation disabled")
|
| 33 |
-
PYAUTOGUI_AVAILABLE = False
|
| 34 |
-
|
| 35 |
# Setup logging
|
| 36 |
logging.basicConfig(level=logging.INFO)
|
| 37 |
logger = logging.getLogger(__name__)
|
|
@@ -45,17 +37,19 @@ class AgentState:
|
|
| 45 |
is_running: bool = False
|
| 46 |
screenshot_count: int = 0
|
| 47 |
action_history: List[str] = None
|
|
|
|
| 48 |
|
| 49 |
def __post_init__(self):
|
| 50 |
if self.action_history is None:
|
| 51 |
self.action_history = []
|
| 52 |
|
| 53 |
class ComputerUsingAgent:
|
| 54 |
-
"""Computer-Using Agent
|
| 55 |
|
| 56 |
def __init__(self):
|
| 57 |
self.state = AgentState()
|
| 58 |
self.setup_logging()
|
|
|
|
| 59 |
|
| 60 |
def setup_logging(self):
|
| 61 |
"""Setup logging configuration"""
|
|
@@ -124,7 +118,7 @@ class ComputerUsingAgent:
|
|
| 124 |
url = 'https://' + url
|
| 125 |
|
| 126 |
await self.state.page.goto(url, wait_until='networkidle', timeout=30000)
|
| 127 |
-
await self.state.page.wait_for_timeout(2000)
|
| 128 |
|
| 129 |
# Get page title and URL
|
| 130 |
title = await self.state.page.title()
|
|
@@ -164,142 +158,42 @@ class ComputerUsingAgent:
|
|
| 164 |
logger.error(f"Failed to take screenshot: {str(e)}")
|
| 165 |
return ""
|
| 166 |
|
| 167 |
-
async def
|
| 168 |
-
"""
|
| 169 |
-
if not self.state.page:
|
| 170 |
-
return {"success": False, "message": "Browser not initialized"}
|
| 171 |
-
|
| 172 |
try:
|
| 173 |
-
#
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
return {"success": True, "message": f"Successfully clicked element: {selector}"}
|
| 180 |
-
|
| 181 |
-
except Exception as e:
|
| 182 |
-
logger.error(f"Failed to click element {selector}: {str(e)}")
|
| 183 |
-
return {"success": False, "message": f"Failed to click element: {str(e)}"}
|
| 184 |
-
|
| 185 |
-
async def type_text(self, selector: str, text: str) -> Dict[str, Any]:
|
| 186 |
-
"""Type text into an input field"""
|
| 187 |
-
if not self.state.page:
|
| 188 |
-
return {"success": False, "message": "Browser not initialized"}
|
| 189 |
-
|
| 190 |
-
try:
|
| 191 |
-
# Wait for element, clear it, and type
|
| 192 |
-
await self.state.page.wait_for_selector(selector, timeout=10000)
|
| 193 |
-
await self.state.page.click(selector) # Focus the element
|
| 194 |
-
await self.state.page.keyboard.press('Control+a') # Select all
|
| 195 |
-
await self.state.page.keyboard.type(text)
|
| 196 |
-
|
| 197 |
-
self.state.action_history.append(f"Typed text into {selector}: {text[:50]}...")
|
| 198 |
-
|
| 199 |
-
return {"success": True, "message": f"Successfully typed text into {selector}"}
|
| 200 |
-
|
| 201 |
-
except Exception as e:
|
| 202 |
-
logger.error(f"Failed to type text into {selector}: {str(e)}")
|
| 203 |
-
return {"success": False, "message": f"Failed to type text: {str(e)}"}
|
| 204 |
-
|
| 205 |
-
async def scroll_page(self, direction: str = "down", amount: int = 500) -> Dict[str, Any]:
|
| 206 |
-
"""Scroll the page"""
|
| 207 |
-
if not self.state.page:
|
| 208 |
-
return {"success": False, "message": "Browser not initialized"}
|
| 209 |
-
|
| 210 |
-
try:
|
| 211 |
-
if direction.lower() == "down":
|
| 212 |
-
await self.state.page.evaluate(f"window.scrollBy(0, {amount})")
|
| 213 |
-
elif direction.lower() == "up":
|
| 214 |
-
await self.state.page.evaluate(f"window.scrollBy(0, -{amount})")
|
| 215 |
-
|
| 216 |
-
self.state.action_history.append(f"Scrolled {direction} by {amount}px")
|
| 217 |
-
|
| 218 |
-
return {"success": True, "message": f"Successfully scrolled {direction}"}
|
| 219 |
-
|
| 220 |
-
except Exception as e:
|
| 221 |
-
logger.error(f"Failed to scroll: {str(e)}")
|
| 222 |
-
return {"success": False, "message": f"Failed to scroll: {str(e)}"}
|
| 223 |
-
|
| 224 |
-
async def get_page_content(self) -> Dict[str, Any]:
|
| 225 |
-
"""Get page content including text and structure"""
|
| 226 |
-
if not self.state.page:
|
| 227 |
-
return {"success": False, "message": "Browser not initialized"}
|
| 228 |
-
|
| 229 |
-
try:
|
| 230 |
-
# Get page title
|
| 231 |
-
title = await self.state.page.title()
|
| 232 |
-
|
| 233 |
-
# Get page text content
|
| 234 |
-
text_content = await self.state.page.evaluate("document.body.innerText")
|
| 235 |
-
|
| 236 |
-
# Get page HTML (first 5000 characters to avoid too much data)
|
| 237 |
-
html_content = await self.state.page.content()
|
| 238 |
-
html_content = html_content[:5000] if len(html_content) > 5000 else html_content
|
| 239 |
-
|
| 240 |
-
# Get links
|
| 241 |
-
links = await self.state.page.evaluate("""
|
| 242 |
-
Array.from(document.querySelectorAll('a')).map(a => ({
|
| 243 |
-
href: a.href,
|
| 244 |
-
text: a.textContent.trim(),
|
| 245 |
-
title: a.title
|
| 246 |
-
})).slice(0, 20)
|
| 247 |
-
""")
|
| 248 |
-
|
| 249 |
-
# Get form elements
|
| 250 |
-
forms = await self.state.page.evaluate("""
|
| 251 |
-
Array.from(document.querySelectorAll('form')).map(form => ({
|
| 252 |
-
action: form.action,
|
| 253 |
-
method: form.method,
|
| 254 |
-
inputs: Array.from(form.querySelectorAll('input, textarea, select')).map(input => ({
|
| 255 |
-
type: input.type,
|
| 256 |
-
name: input.name,
|
| 257 |
-
placeholder: input.placeholder,
|
| 258 |
-
required: input.required
|
| 259 |
-
}))
|
| 260 |
-
}))
|
| 261 |
-
""")
|
| 262 |
-
|
| 263 |
-
self.state.action_history.append("Extracted page content")
|
| 264 |
|
| 265 |
return {
|
| 266 |
-
"
|
| 267 |
-
"
|
| 268 |
-
"
|
| 269 |
-
"
|
| 270 |
-
"links": links,
|
| 271 |
-
"forms": forms
|
| 272 |
}
|
| 273 |
|
| 274 |
except Exception as e:
|
| 275 |
-
|
| 276 |
-
|
|
|
|
|
|
|
|
|
|
| 277 |
|
| 278 |
-
async def close_browser(self):
|
| 279 |
-
"""Close browser and cleanup"""
|
| 280 |
-
try:
|
| 281 |
-
if self.state.page:
|
| 282 |
-
await self.state.page.close()
|
| 283 |
-
if self.state.context:
|
| 284 |
-
await self.state.context.close()
|
| 285 |
-
if self.state.browser:
|
| 286 |
-
await self.state.browser.close()
|
| 287 |
-
|
| 288 |
-
self.state.is_running = False
|
| 289 |
-
logger.info("Browser closed successfully")
|
| 290 |
-
|
| 291 |
-
except Exception as e:
|
| 292 |
-
logger.error(f"Error closing browser: {str(e)}")
|
| 293 |
-
|
| 294 |
def get_status(self) -> Dict[str, Any]:
|
| 295 |
-
"""Get current agent status"""
|
|
|
|
|
|
|
| 296 |
return {
|
| 297 |
"is_running": self.state.is_running,
|
| 298 |
"browser_initialized": self.state.browser is not None,
|
| 299 |
"page_loaded": self.state.page is not None,
|
| 300 |
"screenshot_count": self.state.screenshot_count,
|
| 301 |
-
"action_history": self.state.action_history[-10:],
|
| 302 |
-
"current_url": self.state.page.url if self.state.page else "None"
|
|
|
|
| 303 |
}
|
| 304 |
|
| 305 |
# Global agent instance
|
|
@@ -327,41 +221,13 @@ def process_action(action_type: str, **kwargs):
|
|
| 327 |
else:
|
| 328 |
return "Failed to take screenshot"
|
| 329 |
|
| 330 |
-
elif action_type == "click":
|
| 331 |
-
selector = kwargs.get("selector", "")
|
| 332 |
-
if not selector:
|
| 333 |
-
return "CSS selector is required"
|
| 334 |
-
result = asyncio.run(agent.click_element(selector))
|
| 335 |
-
return result["message"]
|
| 336 |
-
|
| 337 |
-
elif action_type == "type":
|
| 338 |
-
selector = kwargs.get("selector", "")
|
| 339 |
-
text = kwargs.get("text", "")
|
| 340 |
-
if not selector or not text:
|
| 341 |
-
return "Selector and text are required"
|
| 342 |
-
result = asyncio.run(agent.type_text(selector, text))
|
| 343 |
-
return result["message"]
|
| 344 |
-
|
| 345 |
-
elif action_type == "scroll":
|
| 346 |
-
direction = kwargs.get("direction", "down")
|
| 347 |
-
amount = kwargs.get("amount", 500)
|
| 348 |
-
result = asyncio.run(agent.scroll_page(direction, amount))
|
| 349 |
-
return result["message"]
|
| 350 |
-
|
| 351 |
-
elif action_type == "content":
|
| 352 |
-
result = asyncio.run(agent.get_page_content())
|
| 353 |
-
if result["success"]:
|
| 354 |
-
return f"Page: {result['title']}\n\nContent: {result['text_content'][:500]}..."
|
| 355 |
-
else:
|
| 356 |
-
return result["message"]
|
| 357 |
-
|
| 358 |
elif action_type == "status":
|
| 359 |
status = agent.get_status()
|
| 360 |
return json.dumps(status, indent=2)
|
| 361 |
|
| 362 |
-
elif action_type == "
|
| 363 |
-
asyncio.run(agent.
|
| 364 |
-
return
|
| 365 |
|
| 366 |
else:
|
| 367 |
return f"Unknown action: {action_type}"
|
|
@@ -371,53 +237,80 @@ def process_action(action_type: str, **kwargs):
|
|
| 371 |
return f"Error: {str(e)}"
|
| 372 |
|
| 373 |
def gradio_interface():
|
| 374 |
-
"""Create Gradio interface
|
| 375 |
|
| 376 |
-
with gr.Blocks(title="Computer-Using Agent", theme=gr.themes.Soft()) as interface:
|
| 377 |
-
gr.Markdown("# Computer-Using Agent")
|
| 378 |
-
gr.Markdown("π€ **AI-powered browser automation
|
| 379 |
|
| 380 |
-
with gr.Tab("
|
| 381 |
with gr.Row():
|
| 382 |
initialize_btn = gr.Button("Initialize Browser", variant="primary")
|
| 383 |
-
|
| 384 |
-
status_btn = gr.Button("Get Status")
|
| 385 |
|
| 386 |
-
status_display = gr.Textbox(label="Status", lines=
|
| 387 |
|
| 388 |
with gr.Row():
|
| 389 |
url_input = gr.Textbox(label="URL", placeholder="https://example.com")
|
| 390 |
navigate_btn = gr.Button("Navigate", variant="primary")
|
| 391 |
|
| 392 |
navigation_status = gr.Textbox(label="Navigation Status")
|
| 393 |
-
|
| 394 |
-
with gr.Tab("Screenshot & Content"):
|
| 395 |
with gr.Row():
|
| 396 |
screenshot_btn = gr.Button("Take Screenshot", variant="primary")
|
| 397 |
-
content_btn = gr.Button("Get Page Content", variant="secondary")
|
| 398 |
|
| 399 |
screenshot_output = gr.Image(label="Current Screenshot")
|
| 400 |
-
|
| 401 |
|
| 402 |
-
with gr.Tab("
|
| 403 |
with gr.Row():
|
| 404 |
-
|
| 405 |
-
|
| 406 |
|
| 407 |
-
|
| 408 |
-
text_input = gr.Textbox(label="Text to Type", placeholder="Enter text here...")
|
| 409 |
-
type_btn = gr.Button("Type Text", variant="primary")
|
| 410 |
|
| 411 |
with gr.Row():
|
| 412 |
-
|
| 413 |
-
|
| 414 |
-
|
| 415 |
-
|
| 416 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 417 |
|
| 418 |
-
with gr.Tab("
|
| 419 |
-
|
| 420 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 421 |
|
| 422 |
# Event handlers
|
| 423 |
initialize_btn.click(
|
|
@@ -425,11 +318,6 @@ def gradio_interface():
|
|
| 425 |
outputs=status_display
|
| 426 |
)
|
| 427 |
|
| 428 |
-
close_btn.click(
|
| 429 |
-
fn=lambda: process_action("close"),
|
| 430 |
-
outputs=status_display
|
| 431 |
-
)
|
| 432 |
-
|
| 433 |
status_btn.click(
|
| 434 |
fn=lambda: process_action("status"),
|
| 435 |
outputs=status_display
|
|
@@ -443,45 +331,40 @@ def gradio_interface():
|
|
| 443 |
|
| 444 |
screenshot_btn.click(
|
| 445 |
fn=lambda: process_action("screenshot"),
|
| 446 |
-
outputs=[
|
| 447 |
)
|
| 448 |
|
| 449 |
-
|
| 450 |
-
fn=lambda: process_action("
|
| 451 |
-
outputs=
|
| 452 |
)
|
| 453 |
|
| 454 |
-
|
| 455 |
-
fn=lambda
|
| 456 |
-
|
| 457 |
-
outputs=interaction_status
|
| 458 |
)
|
| 459 |
|
| 460 |
-
|
| 461 |
-
fn=lambda
|
| 462 |
-
|
| 463 |
-
|
| 464 |
-
|
| 465 |
-
|
| 466 |
-
|
| 467 |
-
|
| 468 |
-
|
| 469 |
-
outputs=
|
| 470 |
-
)
|
| 471 |
-
|
| 472 |
-
refresh_history_btn.click(
|
| 473 |
-
fn=lambda: process_action("status"),
|
| 474 |
-
outputs=action_history
|
| 475 |
)
|
| 476 |
|
| 477 |
return interface
|
| 478 |
|
| 479 |
if __name__ == "__main__":
|
| 480 |
-
# Create and launch Gradio interface
|
| 481 |
interface = gradio_interface()
|
| 482 |
interface.launch(
|
| 483 |
server_name="0.0.0.0",
|
| 484 |
server_port=7860,
|
| 485 |
share=False,
|
| 486 |
-
debug=True
|
|
|
|
| 487 |
)
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Enhanced Computer-Using Agent with VNC Integration
|
| 4 |
+
Combines browser automation with full desktop environment access
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
import asyncio
|
| 8 |
import json
|
| 9 |
import base64
|
|
|
|
| 24 |
import requests
|
| 25 |
from huggingface_hub import hf_hub_download, login
|
| 26 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
# Setup logging
|
| 28 |
logging.basicConfig(level=logging.INFO)
|
| 29 |
logger = logging.getLogger(__name__)
|
|
|
|
| 37 |
is_running: bool = False
|
| 38 |
screenshot_count: int = 0
|
| 39 |
action_history: List[str] = None
|
| 40 |
+
vnc_port: int = 5901
|
| 41 |
|
| 42 |
def __post_init__(self):
|
| 43 |
if self.action_history is None:
|
| 44 |
self.action_history = []
|
| 45 |
|
| 46 |
class ComputerUsingAgent:
|
| 47 |
+
"""Enhanced Computer-Using Agent with VNC Integration"""
|
| 48 |
|
| 49 |
def __init__(self):
|
| 50 |
self.state = AgentState()
|
| 51 |
self.setup_logging()
|
| 52 |
+
self.vnc_url = f"http://localhost:{self.state.vnc_port}/vnc.html"
|
| 53 |
|
| 54 |
def setup_logging(self):
|
| 55 |
"""Setup logging configuration"""
|
|
|
|
| 118 |
url = 'https://' + url
|
| 119 |
|
| 120 |
await self.state.page.goto(url, wait_until='networkidle', timeout=30000)
|
| 121 |
+
await self.state.page.wait_for_timeout(2000)
|
| 122 |
|
| 123 |
# Get page title and URL
|
| 124 |
title = await self.state.page.title()
|
|
|
|
| 158 |
logger.error(f"Failed to take screenshot: {str(e)}")
|
| 159 |
return ""
|
| 160 |
|
| 161 |
+
async def get_vnc_status(self) -> Dict[str, Any]:
|
| 162 |
+
"""Get VNC connection status"""
|
|
|
|
|
|
|
|
|
|
| 163 |
try:
|
| 164 |
+
# Check if VNC port is accessible
|
| 165 |
+
import socket
|
| 166 |
+
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
| 167 |
+
result = sock.connect_ex(('localhost', self.state.vnc_port))
|
| 168 |
+
vnc_running = result == 0
|
| 169 |
+
sock.close()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 170 |
|
| 171 |
return {
|
| 172 |
+
"vnc_running": vnc_running,
|
| 173 |
+
"vnc_port": self.state.vnc_port,
|
| 174 |
+
"vnc_url": self.vnc_url,
|
| 175 |
+
"status": "VNC Server Active" if vnc_running else "VNC Server Not Available"
|
|
|
|
|
|
|
| 176 |
}
|
| 177 |
|
| 178 |
except Exception as e:
|
| 179 |
+
return {
|
| 180 |
+
"vnc_running": False,
|
| 181 |
+
"vnc_port": self.state.vnc_port,
|
| 182 |
+
"error": str(e)
|
| 183 |
+
}
|
| 184 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 185 |
def get_status(self) -> Dict[str, Any]:
|
| 186 |
+
"""Get current agent status including VNC info"""
|
| 187 |
+
vnc_status = asyncio.run(self.get_vnc_status())
|
| 188 |
+
|
| 189 |
return {
|
| 190 |
"is_running": self.state.is_running,
|
| 191 |
"browser_initialized": self.state.browser is not None,
|
| 192 |
"page_loaded": self.state.page is not None,
|
| 193 |
"screenshot_count": self.state.screenshot_count,
|
| 194 |
+
"action_history": self.state.action_history[-10:],
|
| 195 |
+
"current_url": self.state.page.url if self.state.page else "None",
|
| 196 |
+
"vnc_info": vnc_status
|
| 197 |
}
|
| 198 |
|
| 199 |
# Global agent instance
|
|
|
|
| 221 |
else:
|
| 222 |
return "Failed to take screenshot"
|
| 223 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 224 |
elif action_type == "status":
|
| 225 |
status = agent.get_status()
|
| 226 |
return json.dumps(status, indent=2)
|
| 227 |
|
| 228 |
+
elif action_type == "vnc_status":
|
| 229 |
+
vnc_status = asyncio.run(agent.get_vnc_status())
|
| 230 |
+
return json.dumps(vnc_status, indent=2)
|
| 231 |
|
| 232 |
else:
|
| 233 |
return f"Unknown action: {action_type}"
|
|
|
|
| 237 |
return f"Error: {str(e)}"
|
| 238 |
|
| 239 |
def gradio_interface():
|
| 240 |
+
"""Create enhanced Gradio interface with VNC integration"""
|
| 241 |
|
| 242 |
+
with gr.Blocks(title="Enhanced Computer-Using Agent with VNC", theme=gr.themes.Soft()) as interface:
|
| 243 |
+
gr.Markdown("# π₯οΈ Enhanced Computer-Using Agent with VNC")
|
| 244 |
+
gr.Markdown("π€ **AI-powered browser automation with full desktop environment access**")
|
| 245 |
|
| 246 |
+
with gr.Tab("π Browser Automation"):
|
| 247 |
with gr.Row():
|
| 248 |
initialize_btn = gr.Button("Initialize Browser", variant="primary")
|
| 249 |
+
status_btn = gr.Button("Get Status", variant="secondary")
|
|
|
|
| 250 |
|
| 251 |
+
status_display = gr.Textbox(label="Agent Status", lines=8)
|
| 252 |
|
| 253 |
with gr.Row():
|
| 254 |
url_input = gr.Textbox(label="URL", placeholder="https://example.com")
|
| 255 |
navigate_btn = gr.Button("Navigate", variant="primary")
|
| 256 |
|
| 257 |
navigation_status = gr.Textbox(label="Navigation Status")
|
| 258 |
+
|
|
|
|
| 259 |
with gr.Row():
|
| 260 |
screenshot_btn = gr.Button("Take Screenshot", variant="primary")
|
|
|
|
| 261 |
|
| 262 |
screenshot_output = gr.Image(label="Current Screenshot")
|
| 263 |
+
screenshot_status = gr.Textbox(label="Screenshot Status")
|
| 264 |
|
| 265 |
+
with gr.Tab("π₯οΈ VNC Desktop"):
|
| 266 |
with gr.Row():
|
| 267 |
+
vnc_status_btn = gr.Button("Check VNC Status", variant="primary")
|
| 268 |
+
open_vnc_btn = gr.Button("Open VNC Viewer", variant="secondary")
|
| 269 |
|
| 270 |
+
vnc_status_display = gr.Textbox(label="VNC Status", lines=6)
|
|
|
|
|
|
|
| 271 |
|
| 272 |
with gr.Row():
|
| 273 |
+
gr.HTML("""
|
| 274 |
+
<div style="text-align: center; padding: 20px; background-color: #f0f0f0; border-radius: 10px;">
|
| 275 |
+
<h3>π VNC Web Access</h3>
|
| 276 |
+
<p>Click the button above to open the VNC web viewer in a new tab</p>
|
| 277 |
+
<p><strong>Port:</strong> 5901 | <strong>Password:</strong> computer-agent</p>
|
| 278 |
+
</div>
|
| 279 |
+
""")
|
| 280 |
+
|
| 281 |
+
# VNC viewer iframe (placeholder - will be populated dynamically)
|
| 282 |
+
vnc_viewer = gr.HTML("""
|
| 283 |
+
<div style="width: 100%; height: 600px; border: 2px solid #ccc; border-radius: 10px; background-color: #f9f9f9;">
|
| 284 |
+
<div style="display: flex; align-items: center; justify-content: center; height: 100%; color: #666;">
|
| 285 |
+
<div style="text-align: center;">
|
| 286 |
+
<h4>π₯οΈ VNC Desktop Environment</h4>
|
| 287 |
+
<p>Desktop environment will be accessible here once VNC server is running</p>
|
| 288 |
+
<p><em>Use the "Open VNC Viewer" button to access full desktop</em></p>
|
| 289 |
+
</div>
|
| 290 |
+
</div>
|
| 291 |
+
</div>
|
| 292 |
+
""")
|
| 293 |
|
| 294 |
+
with gr.Tab("π System Info"):
|
| 295 |
+
with gr.Row():
|
| 296 |
+
system_info_btn = gr.Button("Get System Info", variant="primary")
|
| 297 |
+
|
| 298 |
+
system_info_display = gr.Textbox(label="System Information", lines=10)
|
| 299 |
+
|
| 300 |
+
with gr.Row():
|
| 301 |
+
gr.HTML("""
|
| 302 |
+
<div style="background-color: #e8f5e8; padding: 20px; border-radius: 10px; margin-top: 20px;">
|
| 303 |
+
<h4>π Features Available</h4>
|
| 304 |
+
<ul>
|
| 305 |
+
<li>β
Browser Automation with Playwright</li>
|
| 306 |
+
<li>β
Screenshot Capture</li>
|
| 307 |
+
<li>β
VNC Desktop Environment (XFCE4)</li>
|
| 308 |
+
<li>β
Web-based VNC Access</li>
|
| 309 |
+
<li>β
Real-time Status Monitoring</li>
|
| 310 |
+
<li>β
Action History Tracking</li>
|
| 311 |
+
</ul>
|
| 312 |
+
</div>
|
| 313 |
+
""")
|
| 314 |
|
| 315 |
# Event handlers
|
| 316 |
initialize_btn.click(
|
|
|
|
| 318 |
outputs=status_display
|
| 319 |
)
|
| 320 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 321 |
status_btn.click(
|
| 322 |
fn=lambda: process_action("status"),
|
| 323 |
outputs=status_display
|
|
|
|
| 331 |
|
| 332 |
screenshot_btn.click(
|
| 333 |
fn=lambda: process_action("screenshot"),
|
| 334 |
+
outputs=[screenshot_status, screenshot_output]
|
| 335 |
)
|
| 336 |
|
| 337 |
+
vnc_status_btn.click(
|
| 338 |
+
fn=lambda: process_action("vnc_status"),
|
| 339 |
+
outputs=vnc_status_display
|
| 340 |
)
|
| 341 |
|
| 342 |
+
open_vnc_btn.click(
|
| 343 |
+
fn=lambda: f"window.open('{agent.vnc_url}', '_blank')",
|
| 344 |
+
outputs=gr.HTML()
|
|
|
|
| 345 |
)
|
| 346 |
|
| 347 |
+
system_info_btn.click(
|
| 348 |
+
fn=lambda: json.dumps({
|
| 349 |
+
"platform": "Hugging Face Spaces",
|
| 350 |
+
"docker": True,
|
| 351 |
+
"vnc_enabled": True,
|
| 352 |
+
"desktop_env": "XFCE4",
|
| 353 |
+
"python_version": "3.10",
|
| 354 |
+
"features": ["browser_automation", "vnc_desktop", "web_interface"]
|
| 355 |
+
}, indent=2),
|
| 356 |
+
outputs=system_info_display
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 357 |
)
|
| 358 |
|
| 359 |
return interface
|
| 360 |
|
| 361 |
if __name__ == "__main__":
|
| 362 |
+
# Create and launch enhanced Gradio interface
|
| 363 |
interface = gradio_interface()
|
| 364 |
interface.launch(
|
| 365 |
server_name="0.0.0.0",
|
| 366 |
server_port=7860,
|
| 367 |
share=False,
|
| 368 |
+
debug=True,
|
| 369 |
+
show_error=True
|
| 370 |
)
|