|
|
import gradio as gr |
|
|
import subprocess |
|
|
import os |
|
|
import time |
|
|
import threading |
|
|
|
|
|
|
|
|
VNC_PORT = os.getenv("VNC_PORT", "5901") |
|
|
NO_VNC_PORT = os.getenv("NO_VNC_PORT", "6080") |
|
|
DESKTOP_ENV = os.getenv("DESKTOP_ENV", "xfce") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import requests |
|
|
|
|
|
def execute_task(task: str): |
|
|
"""Execute task via agent API""" |
|
|
try: |
|
|
api_url = os.getenv("AGENT_API_URL", "http://localhost:8000") |
|
|
response = requests.post( |
|
|
f"{api_url}/agent/execute", |
|
|
json={"task": task}, |
|
|
timeout=300 |
|
|
) |
|
|
|
|
|
if response.status_code == 200: |
|
|
result = response.json() |
|
|
|
|
|
output = f"Task: {result['task']}\n\n" |
|
|
output += f"Success: {result['success']}\n" |
|
|
output += f"Message: {result['message']}\n\n" |
|
|
|
|
|
if result.get('steps_executed'): |
|
|
output += f"Steps Executed ({len(result['steps_executed'])}):\n" |
|
|
for i, step in enumerate(result['steps_executed'], 1): |
|
|
output += f"{i}. {step}\n" |
|
|
output += "\n" |
|
|
|
|
|
if result.get('confidence'): |
|
|
output += f"Confidence: {result['confidence']:.1%}\n" |
|
|
|
|
|
if result.get('verification'): |
|
|
output += f"Verification: {result['verification'].get('reasoning', 'N/A')}\n" |
|
|
|
|
|
return output |
|
|
else: |
|
|
return f"API Error {response.status_code}: {response.text}" |
|
|
|
|
|
except requests.exceptions.RequestException as e: |
|
|
return f"Connection Error: Could not connect to agent API. {str(e)}" |
|
|
except Exception as e: |
|
|
return f"Unexpected Error: {str(e)}" |
|
|
|
|
|
def get_agent_status(): |
|
|
"""Get agent status""" |
|
|
try: |
|
|
api_url = os.getenv("AGENT_API_URL", "http://localhost:8000") |
|
|
response = requests.get(f"{api_url}/agent/status", timeout=10) |
|
|
|
|
|
if response.status_code == 200: |
|
|
status = response.json() |
|
|
output = f"Agent Status: {status['status'].upper()}\n" |
|
|
output += f"Current Task: {status.get('current_task', 'None')}\n" |
|
|
output += f"Display: {status['display']}\n" |
|
|
output += f"Active Window: {status['active_window']['name']}\n" |
|
|
output += f"Memory Items: {status.get('memory_items', 0)}\n" |
|
|
return output |
|
|
else: |
|
|
return f"Status Error {response.status_code}: {response.text}" |
|
|
|
|
|
except Exception as e: |
|
|
return f"Status Error: {str(e)}" |
|
|
|
|
|
def take_screenshot(): |
|
|
"""Take a screenshot via agent API""" |
|
|
try: |
|
|
api_url = os.getenv("AGENT_API_URL", "http://localhost:8000") |
|
|
response = requests.post(f"{api_url}/agent/screenshot", timeout=30) |
|
|
|
|
|
if response.status_code == 200: |
|
|
result = response.json() |
|
|
return f"Screenshot captured at {result['timestamp']}\n\n" + \ |
|
|
"Screenshot available in agent logs and can be viewed in the Desktop tab." |
|
|
else: |
|
|
return f"Screenshot Error {response.status_code}: {response.text}" |
|
|
except Exception as e: |
|
|
return f"Screenshot Error: {str(e)}" |
|
|
|
|
|
|
|
|
with gr.Blocks(title="X11 Desktop Environment with AI Agent") as demo: |
|
|
gr.Markdown(""" |
|
|
# π₯οΈ X11 Desktop Environment + π€ AI Agent |
|
|
|
|
|
Access a full Linux desktop environment with XFCE, GIMP, Firefox, LibreOffice, and control it with an advanced AI agent that thinks, acts, and verifies its work! |
|
|
|
|
|
**Features:** |
|
|
- Multiple desktop environments (XFCE, LXQt, MATE, Openbox) |
|
|
- Pre-installed applications (GIMP, Firefox, LibreOffice) |
|
|
- Secure WSS connection for VNC streaming |
|
|
- Browser-based access via noVNC |
|
|
- **π€ AI Agent** - Natural language control with reasoning and verification |
|
|
""") |
|
|
|
|
|
with gr.Tabs(): |
|
|
|
|
|
with gr.TabItem("π₯οΈ Desktop"): |
|
|
with gr.Row(): |
|
|
with gr.Column(scale=4): |
|
|
|
|
|
vnc_viewer = gr.HTML(f""" |
|
|
<iframe |
|
|
src="/vnc.html?autoconnect=true&resize=scale&quality=9" |
|
|
width="100%" |
|
|
height="800px" |
|
|
style="border: 2px solid #ddd; border-radius: 8px;" |
|
|
allow="clipboard-read; clipboard-write" |
|
|
></iframe> |
|
|
""") |
|
|
|
|
|
with gr.Column(scale=1): |
|
|
gr.Markdown(""" |
|
|
### π Connection Info |
|
|
|
|
|
**VNC Port:** {vnc_port} |
|
|
**noVNC Port:** {novnc_port} |
|
|
**Desktop:** {desktop} |
|
|
|
|
|
### π― Quick Start |
|
|
|
|
|
1. The desktop loads automatically |
|
|
2. Use your mouse and keyboard |
|
|
3. Access apps from the menu |
|
|
|
|
|
### π¦ Installed Apps |
|
|
|
|
|
- **Graphics:** GIMP |
|
|
- **Browser:** Firefox |
|
|
- **Office:** LibreOffice |
|
|
- **Editor:** VS Code |
|
|
- **Terminal:** XFCE Terminal |
|
|
""".format( |
|
|
vnc_port=VNC_PORT, |
|
|
novnc_port=NO_VNC_PORT, |
|
|
desktop=DESKTOP_ENV.upper() |
|
|
)) |
|
|
|
|
|
|
|
|
with gr.TabItem("π€ Agent Control"): |
|
|
gr.Markdown(""" |
|
|
### π§ Advanced AI Agent Control |
|
|
|
|
|
The AI agent can understand natural language commands, break them down into steps, execute them, and verify the results using computer vision. |
|
|
|
|
|
**Agent Capabilities:** |
|
|
- Launch applications (GIMP, Firefox, Terminal, File Manager, LibreOffice) |
|
|
- Navigate websites |
|
|
- Create files and folders |
|
|
- Run terminal commands |
|
|
- Take screenshots |
|
|
- Complex multi-step tasks with verification |
|
|
""") |
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
task_input = gr.Textbox( |
|
|
label="Task Description", |
|
|
placeholder="e.g., 'Open GIMP and create a new 1024x768 image, then take a screenshot'", |
|
|
lines=3 |
|
|
) |
|
|
execute_btn = gr.Button("π Execute Task", variant="primary") |
|
|
status_btn = gr.Button("π Agent Status") |
|
|
screenshot_btn = gr.Button("πΈ Take Screenshot") |
|
|
|
|
|
gr.Examples( |
|
|
examples=[ |
|
|
"Open Firefox and navigate to https://github.com", |
|
|
"Launch GIMP and create a new 1920x1080 image", |
|
|
"Open terminal and run 'ls -la'", |
|
|
"Create a new folder called 'projects' on the desktop", |
|
|
"Take a screenshot and show me what you see", |
|
|
"Open LibreOffice Writer and create a new document" |
|
|
], |
|
|
inputs=task_input |
|
|
) |
|
|
|
|
|
with gr.Column(): |
|
|
output_display = gr.Textbox( |
|
|
label="Agent Response", |
|
|
lines=15, |
|
|
interactive=False |
|
|
) |
|
|
|
|
|
|
|
|
status_display = gr.Textbox( |
|
|
label="Agent Status", |
|
|
lines=5, |
|
|
interactive=False |
|
|
) |
|
|
|
|
|
|
|
|
execute_btn.click( |
|
|
fn=execute_task, |
|
|
inputs=[task_input], |
|
|
outputs=[output_display] |
|
|
) |
|
|
|
|
|
status_btn.click( |
|
|
fn=get_agent_status, |
|
|
outputs=[status_display] |
|
|
) |
|
|
|
|
|
screenshot_btn.click( |
|
|
fn=take_screenshot, |
|
|
outputs=[output_display] |
|
|
) |
|
|
|
|
|
|
|
|
gr.Markdown(""" |
|
|
--- |
|
|
**Tips:** |
|
|
- The agent uses advanced reasoning to break down complex tasks into steps |
|
|
- It verifies results using computer vision analysis |
|
|
- For best desktop experience, use fullscreen mode |
|
|
- The desktop supports copy/paste between your local machine and the remote desktop |
|
|
- Agent commands can be simple ("Open GIMP") or complex ("Create a new image, add text, and save it") |
|
|
|
|
|
### π± Running on Android |
|
|
|
|
|
You can run this full desktop environment on your Android phone using Termux! |
|
|
Check out the [Termux Guide](docs/termux_guide.md) for detailed instructions. |
|
|
|
|
|
""") |
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.launch( |
|
|
server_name="0.0.0.0", |
|
|
server_port=7860, |
|
|
share=False |
|
|
) |
|
|
|