Spaces:

fariasultanacodes
/

x11-desktop

Paused

App Files Files Community

x11-desktop / app.py

3v324v23

Fix: Remove circular dependency where app.py tries to start desktop script

15a4dd9 about 2 months ago

raw

history blame contribute delete

9.03 kB

	import gradio as gr
	import subprocess
	import os
	import time
	import threading

	# Environment variables
	VNC_PORT = os.getenv("VNC_PORT", "5901")
	NO_VNC_PORT = os.getenv("NO_VNC_PORT", "6080")
	DESKTOP_ENV = os.getenv("DESKTOP_ENV", "xfce")

	# Start the desktop environment
	# Start the desktop environment
	# Desktop environment is started by the container entrypoint script

	# Agent API functions
	import requests

	def execute_task(task: str):
	"""Execute task via agent API"""
	try:
	api_url = os.getenv("AGENT_API_URL", "http://localhost:8000")
	response = requests.post(
	f"{api_url}/agent/execute",
	json={"task": task},
	timeout=300 # 5 minute timeout for complex tasks
	)

	if response.status_code == 200:
	result = response.json()
	# Format the response for display
	output = f"Task: {result['task']}\n\n"
	output += f"Success: {result['success']}\n"
	output += f"Message: {result['message']}\n\n"

	if result.get('steps_executed'):
	output += f"Steps Executed ({len(result['steps_executed'])}):\n"
	for i, step in enumerate(result['steps_executed'], 1):
	output += f"{i}. {step}\n"
	output += "\n"

	if result.get('confidence'):
	output += f"Confidence: {result['confidence']:.1%}\n"

	if result.get('verification'):
	output += f"Verification: {result['verification'].get('reasoning', 'N/A')}\n"

	return output
	else:
	return f"API Error {response.status_code}: {response.text}"

	except requests.exceptions.RequestException as e:
	return f"Connection Error: Could not connect to agent API. {str(e)}"
	except Exception as e:
	return f"Unexpected Error: {str(e)}"

	def get_agent_status():
	"""Get agent status"""
	try:
	api_url = os.getenv("AGENT_API_URL", "http://localhost:8000")
	response = requests.get(f"{api_url}/agent/status", timeout=10)

	if response.status_code == 200:
	status = response.json()
	output = f"Agent Status: {status['status'].upper()}\n"
	output += f"Current Task: {status.get('current_task', 'None')}\n"
	output += f"Display: {status['display']}\n"
	output += f"Active Window: {status['active_window']['name']}\n"
	output += f"Memory Items: {status.get('memory_items', 0)}\n"
	return output
	else:
	return f"Status Error {response.status_code}: {response.text}"

	except Exception as e:
	return f"Status Error: {str(e)}"

	def take_screenshot():
	"""Take a screenshot via agent API"""
	try:
	api_url = os.getenv("AGENT_API_URL", "http://localhost:8000")
	response = requests.post(f"{api_url}/agent/screenshot", timeout=30)

	if response.status_code == 200:
	result = response.json()
	return f"Screenshot captured at {result['timestamp']}\n\n" + \
	"Screenshot available in agent logs and can be viewed in the Desktop tab."
	else:
	return f"Screenshot Error {response.status_code}: {response.text}"
	except Exception as e:
	return f"Screenshot Error: {str(e)}"

	# Create the Gradio interface with VNC viewer and agent control
	with gr.Blocks(title="X11 Desktop Environment with AI Agent") as demo:
	gr.Markdown("""
	# 🖥️ X11 Desktop Environment + 🤖 AI Agent

	Access a full Linux desktop environment with XFCE, GIMP, Firefox, LibreOffice, and control it with an advanced AI agent that thinks, acts, and verifies its work!

	Features:
	- Multiple desktop environments (XFCE, LXQt, MATE, Openbox)
	- Pre-installed applications (GIMP, Firefox, LibreOffice)
	- Secure WSS connection for VNC streaming
	- Browser-based access via noVNC
	- 🤖 AI Agent - Natural language control with reasoning and verification
	""")

	with gr.Tabs():
	# Desktop Tab
	with gr.TabItem("🖥️ Desktop"):
	with gr.Row():
	with gr.Column(scale=4):
	# Embed the noVNC viewer in an iframe
	vnc_viewer = gr.HTML(f"""
	<iframe
	src="/vnc.html?autoconnect=true&resize=scale&quality=9"
	width="100%"
	height="800px"
	style="border: 2px solid #ddd; border-radius: 8px;"
	allow="clipboard-read; clipboard-write"
	></iframe>
	""")

	with gr.Column(scale=1):
	gr.Markdown("""
	### 📋 Connection Info

	VNC Port: {vnc_port}
	noVNC Port: {novnc_port}
	Desktop: {desktop}

	### 🎯 Quick Start

	1. The desktop loads automatically
	2. Use your mouse and keyboard
	3. Access apps from the menu

	### 📦 Installed Apps

	- Graphics: GIMP
	- Browser: Firefox
	- Office: LibreOffice
	- Editor: VS Code
	- Terminal: XFCE Terminal
	""".format(
	vnc_port=VNC_PORT,
	novnc_port=NO_VNC_PORT,
	desktop=DESKTOP_ENV.upper()
	))

	# Agent Control Tab
	with gr.TabItem("🤖 Agent Control"):
	gr.Markdown("""
	### 🧠 Advanced AI Agent Control

	The AI agent can understand natural language commands, break them down into steps, execute them, and verify the results using computer vision.

	Agent Capabilities:
	- Launch applications (GIMP, Firefox, Terminal, File Manager, LibreOffice)
	- Navigate websites
	- Create files and folders
	- Run terminal commands
	- Take screenshots
	- Complex multi-step tasks with verification
	""")

	with gr.Row():
	with gr.Column():
	task_input = gr.Textbox(
	label="Task Description",
	placeholder="e.g., 'Open GIMP and create a new 1024x768 image, then take a screenshot'",
	lines=3
	)
	execute_btn = gr.Button("🚀 Execute Task", variant="primary")
	status_btn = gr.Button("📊 Agent Status")
	screenshot_btn = gr.Button("📸 Take Screenshot")

	gr.Examples(
	examples=[
	"Open Firefox and navigate to https://github.com",
	"Launch GIMP and create a new 1920x1080 image",
	"Open terminal and run 'ls -la'",
	"Create a new folder called 'projects' on the desktop",
	"Take a screenshot and show me what you see",
	"Open LibreOffice Writer and create a new document"
	],
	inputs=task_input
	)

	with gr.Column():
	output_display = gr.Textbox(
	label="Agent Response",
	lines=15,
	interactive=False
	)

	# Status display
	status_display = gr.Textbox(
	label="Agent Status",
	lines=5,
	interactive=False
	)

	# Wire up the buttons
	execute_btn.click(
	fn=execute_task,
	inputs=[task_input],
	outputs=[output_display]
	)

	status_btn.click(
	fn=get_agent_status,
	outputs=[status_display]
	)

	screenshot_btn.click(
	fn=take_screenshot,
	outputs=[output_display]
	)


	gr.Markdown("""
	---
	Tips:
	- The agent uses advanced reasoning to break down complex tasks into steps
	- It verifies results using computer vision analysis
	- For best desktop experience, use fullscreen mode
	- The desktop supports copy/paste between your local machine and the remote desktop
	- Agent commands can be simple ("Open GIMP") or complex ("Create a new image, add text, and save it")

	### 📱 Running on Android

	You can run this full desktop environment on your Android phone using Termux!
	Check out the [Termux Guide](docs/termux_guide.md) for detailed instructions.

	""")



	if __name__ == "__main__":
	demo.launch(
	server_name="0.0.0.0",
	server_port=7860,
	share=False
	)