import gradio as gr import subprocess import os import time import threading # Environment variables VNC_PORT = os.getenv("VNC_PORT", "5901") NO_VNC_PORT = os.getenv("NO_VNC_PORT", "6080") DESKTOP_ENV = os.getenv("DESKTOP_ENV", "xfce") # Start the desktop environment # Start the desktop environment # Desktop environment is started by the container entrypoint script # Agent API functions import requests def execute_task(task: str): """Execute task via agent API""" try: api_url = os.getenv("AGENT_API_URL", "http://localhost:8000") response = requests.post( f"{api_url}/agent/execute", json={"task": task}, timeout=300 # 5 minute timeout for complex tasks ) if response.status_code == 200: result = response.json() # Format the response for display output = f"Task: {result['task']}\n\n" output += f"Success: {result['success']}\n" output += f"Message: {result['message']}\n\n" if result.get('steps_executed'): output += f"Steps Executed ({len(result['steps_executed'])}):\n" for i, step in enumerate(result['steps_executed'], 1): output += f"{i}. {step}\n" output += "\n" if result.get('confidence'): output += f"Confidence: {result['confidence']:.1%}\n" if result.get('verification'): output += f"Verification: {result['verification'].get('reasoning', 'N/A')}\n" return output else: return f"API Error {response.status_code}: {response.text}" except requests.exceptions.RequestException as e: return f"Connection Error: Could not connect to agent API. {str(e)}" except Exception as e: return f"Unexpected Error: {str(e)}" def get_agent_status(): """Get agent status""" try: api_url = os.getenv("AGENT_API_URL", "http://localhost:8000") response = requests.get(f"{api_url}/agent/status", timeout=10) if response.status_code == 200: status = response.json() output = f"Agent Status: {status['status'].upper()}\n" output += f"Current Task: {status.get('current_task', 'None')}\n" output += f"Display: {status['display']}\n" output += f"Active Window: {status['active_window']['name']}\n" output += f"Memory Items: {status.get('memory_items', 0)}\n" return output else: return f"Status Error {response.status_code}: {response.text}" except Exception as e: return f"Status Error: {str(e)}" def take_screenshot(): """Take a screenshot via agent API""" try: api_url = os.getenv("AGENT_API_URL", "http://localhost:8000") response = requests.post(f"{api_url}/agent/screenshot", timeout=30) if response.status_code == 200: result = response.json() return f"Screenshot captured at {result['timestamp']}\n\n" + \ "Screenshot available in agent logs and can be viewed in the Desktop tab." else: return f"Screenshot Error {response.status_code}: {response.text}" except Exception as e: return f"Screenshot Error: {str(e)}" # Create the Gradio interface with VNC viewer and agent control with gr.Blocks(title="X11 Desktop Environment with AI Agent") as demo: gr.Markdown(""" # 🖥️ X11 Desktop Environment + 🤖 AI Agent Access a full Linux desktop environment with XFCE, GIMP, Firefox, LibreOffice, and control it with an advanced AI agent that thinks, acts, and verifies its work! **Features:** - Multiple desktop environments (XFCE, LXQt, MATE, Openbox) - Pre-installed applications (GIMP, Firefox, LibreOffice) - Secure WSS connection for VNC streaming - Browser-based access via noVNC - **🤖 AI Agent** - Natural language control with reasoning and verification """) with gr.Tabs(): # Desktop Tab with gr.TabItem("🖥️ Desktop"): with gr.Row(): with gr.Column(scale=4): # Embed the noVNC viewer in an iframe vnc_viewer = gr.HTML(f""" """) with gr.Column(scale=1): gr.Markdown(""" ### 📋 Connection Info **VNC Port:** {vnc_port} **noVNC Port:** {novnc_port} **Desktop:** {desktop} ### 🎯 Quick Start 1. The desktop loads automatically 2. Use your mouse and keyboard 3. Access apps from the menu ### 📦 Installed Apps - **Graphics:** GIMP - **Browser:** Firefox - **Office:** LibreOffice - **Editor:** VS Code - **Terminal:** XFCE Terminal """.format( vnc_port=VNC_PORT, novnc_port=NO_VNC_PORT, desktop=DESKTOP_ENV.upper() )) # Agent Control Tab with gr.TabItem("🤖 Agent Control"): gr.Markdown(""" ### 🧠 Advanced AI Agent Control The AI agent can understand natural language commands, break them down into steps, execute them, and verify the results using computer vision. **Agent Capabilities:** - Launch applications (GIMP, Firefox, Terminal, File Manager, LibreOffice) - Navigate websites - Create files and folders - Run terminal commands - Take screenshots - Complex multi-step tasks with verification """) with gr.Row(): with gr.Column(): task_input = gr.Textbox( label="Task Description", placeholder="e.g., 'Open GIMP and create a new 1024x768 image, then take a screenshot'", lines=3 ) execute_btn = gr.Button("🚀 Execute Task", variant="primary") status_btn = gr.Button("📊 Agent Status") screenshot_btn = gr.Button("📸 Take Screenshot") gr.Examples( examples=[ "Open Firefox and navigate to https://github.com", "Launch GIMP and create a new 1920x1080 image", "Open terminal and run 'ls -la'", "Create a new folder called 'projects' on the desktop", "Take a screenshot and show me what you see", "Open LibreOffice Writer and create a new document" ], inputs=task_input ) with gr.Column(): output_display = gr.Textbox( label="Agent Response", lines=15, interactive=False ) # Status display status_display = gr.Textbox( label="Agent Status", lines=5, interactive=False ) # Wire up the buttons execute_btn.click( fn=execute_task, inputs=[task_input], outputs=[output_display] ) status_btn.click( fn=get_agent_status, outputs=[status_display] ) screenshot_btn.click( fn=take_screenshot, outputs=[output_display] ) gr.Markdown(""" --- **Tips:** - The agent uses advanced reasoning to break down complex tasks into steps - It verifies results using computer vision analysis - For best desktop experience, use fullscreen mode - The desktop supports copy/paste between your local machine and the remote desktop - Agent commands can be simple ("Open GIMP") or complex ("Create a new image, add text, and save it") ### 📱 Running on Android You can run this full desktop environment on your Android phone using Termux! Check out the [Termux Guide](docs/termux_guide.md) for detailed instructions. """) if __name__ == "__main__": demo.launch( server_name="0.0.0.0", server_port=7860, share=False )