Add app.py
Browse files
app.py
ADDED
|
@@ -0,0 +1,714 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
app.py β Enhanced Open Computer Agent v2.0
|
| 3 |
+
==========================================
|
| 4 |
+
Powered by smolagents + E2B + Playwright + Multi-Model Router + Memory + SoM + Voice
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import os
|
| 8 |
+
import json
|
| 9 |
+
import time
|
| 10 |
+
import uuid
|
| 11 |
+
import shutil
|
| 12 |
+
import base64
|
| 13 |
+
from io import BytesIO
|
| 14 |
+
from threading import Timer
|
| 15 |
+
from typing import Any, Dict, List, Optional, Generator
|
| 16 |
+
from datetime import datetime
|
| 17 |
+
|
| 18 |
+
import gradio as gr
|
| 19 |
+
from dotenv import load_dotenv
|
| 20 |
+
from e2b_desktop import Sandbox
|
| 21 |
+
from gradio_modal import Modal
|
| 22 |
+
from huggingface_hub import login, upload_folder
|
| 23 |
+
from PIL import Image
|
| 24 |
+
from smolagents import CodeAgent
|
| 25 |
+
from smolagents.gradio_ui import GradioUI, stream_to_gradio
|
| 26 |
+
|
| 27 |
+
# Our enhanced modules
|
| 28 |
+
from core_agent import (
|
| 29 |
+
AgentConfig,
|
| 30 |
+
IntelligenceRouter,
|
| 31 |
+
HierarchicalPlanner,
|
| 32 |
+
VerifierAgent,
|
| 33 |
+
AgentMemory,
|
| 34 |
+
SoMPreprocessor,
|
| 35 |
+
SessionRecorder,
|
| 36 |
+
HITLCheckpoint,
|
| 37 |
+
CostTracker,
|
| 38 |
+
ModelCall,
|
| 39 |
+
Subtask,
|
| 40 |
+
)
|
| 41 |
+
from mcp_tools import (
|
| 42 |
+
BrowserMCP,
|
| 43 |
+
CodeExecutionMCP,
|
| 44 |
+
FileSystemMCP,
|
| 45 |
+
HFHubMCP,
|
| 46 |
+
make_browser_tools,
|
| 47 |
+
make_code_tools,
|
| 48 |
+
make_fs_tools,
|
| 49 |
+
make_hf_tools,
|
| 50 |
+
)
|
| 51 |
+
from voice_interface import VoiceInterface
|
| 52 |
+
from eval_harness import EvaluationHarness, DEFAULT_BENCHMARKS
|
| 53 |
+
|
| 54 |
+
load_dotenv(override=True)
|
| 55 |
+
|
| 56 |
+
# =============================================================================
|
| 57 |
+
# Config & Globals
|
| 58 |
+
# =============================================================================
|
| 59 |
+
|
| 60 |
+
E2B_API_KEY = os.getenv("E2B_API_KEY")
|
| 61 |
+
SANDBOXES: Dict[str, Sandbox] = {}
|
| 62 |
+
SANDBOX_METADATA: Dict[str, Dict[str, float]] = {}
|
| 63 |
+
SANDBOX_TIMEOUT = 600
|
| 64 |
+
WIDTH = 1024
|
| 65 |
+
HEIGHT = 768
|
| 66 |
+
TMP_DIR = "./tmp/"
|
| 67 |
+
os.makedirs(TMP_DIR, exist_ok=True)
|
| 68 |
+
|
| 69 |
+
hf_token = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_API_KEY")
|
| 70 |
+
if hf_token:
|
| 71 |
+
login(token=hf_token)
|
| 72 |
+
|
| 73 |
+
# Global enhanced components (lazy init per session)
|
| 74 |
+
SESSION_COMPONENTS: Dict[str, Dict[str, Any]] = {}
|
| 75 |
+
|
| 76 |
+
# =============================================================================
|
| 77 |
+
# CSS & HTML Templates
|
| 78 |
+
# =============================================================================
|
| 79 |
+
|
| 80 |
+
custom_css = """
|
| 81 |
+
.modal-container { margin: var(--size-16) auto !important; }
|
| 82 |
+
.sandbox-container { position: relative; width: 910px; overflow: hidden; margin: auto; height: 800px; }
|
| 83 |
+
.sandbox-frame { display: none; position: absolute; top: 0; left: 0; width: 910px; height: 800px; pointer-events: none; }
|
| 84 |
+
.sandbox-iframe, .bsod-image { position: absolute; width: <<WIDTH>>px; height: <<HEIGHT>>px; border: 4px solid #444444; transform-origin: 0 0; }
|
| 85 |
+
.primary-color-label label span { font-weight: bold; color: var(--color-accent); }
|
| 86 |
+
.status-bar { display: flex; flex-direction: row; align-items: center; z-index: 100; }
|
| 87 |
+
.status-indicator { width: 15px; height: 15px; border-radius: 50%; }
|
| 88 |
+
.status-text { font-size: 16px; font-weight: bold; padding-left: 8px; text-shadow: none; }
|
| 89 |
+
.status-interactive { background-color: #2ecc71; animation: blink 2s infinite; }
|
| 90 |
+
.status-view-only { background-color: #e74c3c; }
|
| 91 |
+
.status-error { background-color: #e74c3c; animation: blink-error 1s infinite; }
|
| 92 |
+
@keyframes blink-error { 0% { background-color: rgba(231, 76, 60, 1); } 50% { background-color: rgba(231, 76, 60, 0.4); } 100% { background-color: rgba(231, 76, 60, 1); } }
|
| 93 |
+
@keyframes blink { 0% { background-color: rgba(46, 204, 113, 1); } 50% { background-color: rgba(46, 204, 113, 0.4); } 100% { background-color: rgba(46, 204, 113, 1); } }
|
| 94 |
+
#chatbot { height: 1000px !important; }
|
| 95 |
+
#chatbot .role { max-width: 95%; }
|
| 96 |
+
#chatbot .bubble-wrap { overflow-y: visible; }
|
| 97 |
+
.logo-container { display: flex; flex-direction: column; align-items: flex-start; width: 100%; box-sizing: border-box; gap: 5px; }
|
| 98 |
+
.logo-item { display: flex; align-items: center; padding: 0 30px; gap: 10px; text-decoration: none !important; color: #f59e0b; font-size: 17px; }
|
| 99 |
+
.logo-item:hover { color: #935f06 !important; }
|
| 100 |
+
.thought-stream { font-family: monospace; font-size: 13px; background: #1a1a2e; color: #a0c4ff; padding: 10px; border-radius: 8px; max-height: 300px; overflow-y: auto; white-space: pre-wrap; }
|
| 101 |
+
.plan-checklist { background: #16213e; padding: 10px; border-radius: 8px; }
|
| 102 |
+
.plan-checklist li { list-style: none; margin: 4px 0; }
|
| 103 |
+
.plan-checklist li.done::before { content: "β
"; }
|
| 104 |
+
.plan-checklist li.pending::before { content: "β¬ "; }
|
| 105 |
+
.plan-checklist li.running::before { content: "π "; }
|
| 106 |
+
.plan-checklist li.failed::before { content: "β "; }
|
| 107 |
+
.cost-badge { font-family: monospace; background: #0f3460; color: #e94560; padding: 4px 8px; border-radius: 4px; font-size: 12px; }
|
| 108 |
+
""".replace("<<WIDTH>>", str(WIDTH + 15)).replace("<<HEIGHT>>", str(HEIGHT + 10))
|
| 109 |
+
|
| 110 |
+
footer_html = """
|
| 111 |
+
<h3 style="text-align: center; margin-top:50px;"><i>Powered by open source:</i></h2>
|
| 112 |
+
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.7.0/css/font-awesome.min.css">
|
| 113 |
+
<div class="logo-container">
|
| 114 |
+
<a class="logo-item" href="https://github.com/huggingface/smolagents"><i class="fa fa-github"></i>smolagents</a>
|
| 115 |
+
<a class="logo-item" href="https://huggingface.co/Qwen/Qwen2.5-VL-72B-Instruct"><i class="fa fa-github"></i>Qwen2.5-VL</a>
|
| 116 |
+
<a class="logo-item" href="https://github.com/e2b-dev/desktop"><i class="fa fa-github"></i>E2B Desktop</a>
|
| 117 |
+
<a class="logo-item" href="https://playwright.dev"><i class="fa fa-github"></i>Playwright</a>
|
| 118 |
+
</div>
|
| 119 |
+
"""
|
| 120 |
+
|
| 121 |
+
sandbox_html_template = """
|
| 122 |
+
<style>@import url('https://fonts.googleapis.com/css2?family=Oxanium:wght@200..800&display=swap');</style>
|
| 123 |
+
<h1 style="color:var(--color-accent);margin:0;">Open Computer Agent v2.0 β <i>Enhanced</i></h1>
|
| 124 |
+
<div class="sandbox-container" style="margin:0;">
|
| 125 |
+
<div class="status-bar">
|
| 126 |
+
<div class="status-indicator {status_class}"></div>
|
| 127 |
+
<div class="status-text">{status_text}</div>
|
| 128 |
+
</div>
|
| 129 |
+
<iframe id="sandbox-iframe" src="{stream_url}" class="sandbox-iframe" style="display:block;" allowfullscreen></iframe>
|
| 130 |
+
<img src="https://huggingface.co/datasets/mfarre/servedfiles/resolve/main/blue_screen_of_death.gif" class="bsod-image" style="display:none;"/>
|
| 131 |
+
<img src="https://huggingface.co/datasets/m-ric/images/resolve/main/HUD_thom.png" class="sandbox-frame" />
|
| 132 |
+
</div>
|
| 133 |
+
"""
|
| 134 |
+
|
| 135 |
+
custom_js = """function() {
|
| 136 |
+
document.body.classList.add('dark');
|
| 137 |
+
const checkSandboxTimeout = function() {
|
| 138 |
+
const timeElement = document.getElementById('sandbox-creation-time');
|
| 139 |
+
if (timeElement) {
|
| 140 |
+
const creationTime = parseFloat(timeElement.getAttribute('data-time'));
|
| 141 |
+
const timeoutValue = parseFloat(timeElement.getAttribute('data-timeout'));
|
| 142 |
+
const currentTime = Math.floor(Date.now() / 1000);
|
| 143 |
+
const elapsedTime = currentTime - creationTime;
|
| 144 |
+
if (elapsedTime >= timeoutValue) { showBSOD('Error'); return; }
|
| 145 |
+
}
|
| 146 |
+
setTimeout(checkSandboxTimeout, 5000);
|
| 147 |
+
};
|
| 148 |
+
const showBSOD = function(statusText = 'Error') {
|
| 149 |
+
const iframe = document.getElementById('sandbox-iframe');
|
| 150 |
+
const bsod = document.querySelector('.bsod-image');
|
| 151 |
+
if (iframe && bsod) { iframe.style.display = 'none'; bsod.style.display = 'block'; }
|
| 152 |
+
};
|
| 153 |
+
const resetBSOD = function() {
|
| 154 |
+
const iframe = document.getElementById('sandbox-iframe');
|
| 155 |
+
const bsod = document.querySelector('.bsod-image');
|
| 156 |
+
if (iframe && bsod && bsod.style.display === 'block') {
|
| 157 |
+
iframe.style.display = 'block'; bsod.style.display = 'none'; return true;
|
| 158 |
+
}
|
| 159 |
+
return false;
|
| 160 |
+
};
|
| 161 |
+
checkSandboxTimeout();
|
| 162 |
+
document.addEventListener('click', function(e) {
|
| 163 |
+
if (e.target.tagName === 'BUTTON') {
|
| 164 |
+
if (e.target.innerText.includes("Let's go") || e.target.innerText.includes("Run")) { resetBSOD(); }
|
| 165 |
+
}
|
| 166 |
+
});
|
| 167 |
+
const params = new URLSearchParams(window.location.search);
|
| 168 |
+
if (!params.has('__theme')) { params.set('__theme', 'dark'); window.location.search = params.toString(); }
|
| 169 |
+
}"""
|
| 170 |
+
|
| 171 |
+
|
| 172 |
+
# =============================================================================
|
| 173 |
+
# Sandbox Lifecycle
|
| 174 |
+
# =============================================================================
|
| 175 |
+
|
| 176 |
+
def upload_to_hf_and_remove(folder_path: str) -> str:
|
| 177 |
+
repo_id = "smolagents/computer-agent-logs"
|
| 178 |
+
try:
|
| 179 |
+
folder_name = os.path.basename(os.path.normpath(folder_path))
|
| 180 |
+
url = upload_folder(
|
| 181 |
+
folder_path=folder_path, repo_id=repo_id, repo_type="dataset",
|
| 182 |
+
path_in_repo=folder_name, ignore_patterns=[".git/*", ".gitignore"],
|
| 183 |
+
)
|
| 184 |
+
shutil.rmtree(folder_path)
|
| 185 |
+
return url
|
| 186 |
+
except Exception as e:
|
| 187 |
+
print(f"Upload error: {e}")
|
| 188 |
+
raise
|
| 189 |
+
|
| 190 |
+
|
| 191 |
+
def cleanup_sandboxes() -> None:
|
| 192 |
+
current_time = time.time()
|
| 193 |
+
to_remove = [sid for sid, meta in SANDBOX_METADATA.items() if current_time - meta["last_accessed"] > SANDBOX_TIMEOUT]
|
| 194 |
+
for sid in to_remove:
|
| 195 |
+
if sid in SANDBOXES:
|
| 196 |
+
try:
|
| 197 |
+
data_dir = os.path.join(TMP_DIR, sid)
|
| 198 |
+
if os.path.exists(data_dir):
|
| 199 |
+
upload_to_hf_and_remove(data_dir)
|
| 200 |
+
SANDBOXES[sid].kill()
|
| 201 |
+
del SANDBOXES[sid]
|
| 202 |
+
del SANDBOX_METADATA[sid]
|
| 203 |
+
print(f"Cleaned up sandbox {sid}")
|
| 204 |
+
except Exception as e:
|
| 205 |
+
print(f"Cleanup error for {sid}: {e}")
|
| 206 |
+
|
| 207 |
+
|
| 208 |
+
def get_or_create_sandbox(session_uuid: str) -> Sandbox:
|
| 209 |
+
current_time = time.time()
|
| 210 |
+
if session_uuid in SANDBOXES and session_uuid in SANDBOX_METADATA:
|
| 211 |
+
if current_time - SANDBOX_METADATA[session_uuid]["created_at"] < SANDBOX_TIMEOUT:
|
| 212 |
+
SANDBOX_METADATA[session_uuid]["last_accessed"] = current_time
|
| 213 |
+
return SANDBOXES[session_uuid]
|
| 214 |
+
|
| 215 |
+
if session_uuid in SANDBOXES:
|
| 216 |
+
try:
|
| 217 |
+
SANDBOXES[session_uuid].kill()
|
| 218 |
+
except Exception:
|
| 219 |
+
pass
|
| 220 |
+
|
| 221 |
+
desktop = Sandbox(
|
| 222 |
+
api_key=E2B_API_KEY, resolution=(WIDTH, HEIGHT), dpi=96,
|
| 223 |
+
timeout=SANDBOX_TIMEOUT, template="k0wmnzir0zuzye6dndlw",
|
| 224 |
+
)
|
| 225 |
+
desktop.stream.start(require_auth=True)
|
| 226 |
+
setup_cmd = """sudo mkdir -p /usr/lib/firefox-esr/distribution && echo '{"policies":{"OverrideFirstRunPage":"","OverridePostUpdatePage":"","DisableProfileImport":true,"DontCheckDefaultBrowser":true}}' | sudo tee /usr/lib/firefox-esr/distribution/policies.json > /dev/null"""
|
| 227 |
+
desktop.commands.run(setup_cmd)
|
| 228 |
+
SANDBOXES[session_uuid] = desktop
|
| 229 |
+
SANDBOX_METADATA[session_uuid] = {"created_at": current_time, "last_accessed": current_time}
|
| 230 |
+
return desktop
|
| 231 |
+
|
| 232 |
+
|
| 233 |
+
def update_html(interactive_mode: bool, session_uuid: str) -> str:
|
| 234 |
+
desktop = get_or_create_sandbox(session_uuid)
|
| 235 |
+
auth_key = desktop.stream.get_auth_key()
|
| 236 |
+
base_url = desktop.stream.get_url(auth_key=auth_key)
|
| 237 |
+
stream_url = base_url if interactive_mode else f"{base_url}&view_only=true"
|
| 238 |
+
status_class = "status-interactive" if interactive_mode else "status-view-only"
|
| 239 |
+
status_text = "Interactive" if interactive_mode else "Agent running..."
|
| 240 |
+
creation_time = SANDBOX_METADATA.get(session_uuid, {}).get("created_at", time.time())
|
| 241 |
+
html = sandbox_html_template.format(
|
| 242 |
+
stream_url=stream_url, status_class=status_class, status_text=status_text,
|
| 243 |
+
)
|
| 244 |
+
html += f'<div id="sandbox-creation-time" style="display:none;" data-time="{creation_time}" data-timeout="{SANDBOX_TIMEOUT}"></div>'
|
| 245 |
+
return html
|
| 246 |
+
|
| 247 |
+
|
| 248 |
+
# =============================================================================
|
| 249 |
+
# Enhanced Agent Factory
|
| 250 |
+
# =============================================================================
|
| 251 |
+
|
| 252 |
+
def build_session_components(session_uuid: str, data_dir: str) -> Dict[str, Any]:
|
| 253 |
+
"""Initialize all enhanced components for a session."""
|
| 254 |
+
cfg = AgentConfig(hf_token=hf_token, cost_budget_usd=2.0)
|
| 255 |
+
|
| 256 |
+
# Core intelligence
|
| 257 |
+
router = IntelligenceRouter(hf_token=hf_token)
|
| 258 |
+
planner = HierarchicalPlanner(router)
|
| 259 |
+
verifier = VerifierAgent(router)
|
| 260 |
+
memory = AgentMemory(persist_dir=f"./memory_db/{session_uuid}")
|
| 261 |
+
som = SoMPreprocessor(use_icon_detection=False)
|
| 262 |
+
hitl = HITLCheckpoint(auto_approve=False)
|
| 263 |
+
tracker = CostTracker()
|
| 264 |
+
recorder = SessionRecorder(session_uuid, output_dir=data_dir)
|
| 265 |
+
voice = VoiceInterface(hf_token=hf_token)
|
| 266 |
+
|
| 267 |
+
# MCP tools
|
| 268 |
+
try:
|
| 269 |
+
browser_mcp = BrowserMCP(headless=True)
|
| 270 |
+
except Exception:
|
| 271 |
+
browser_mcp = None
|
| 272 |
+
try:
|
| 273 |
+
code_mcp = CodeExecutionMCP(api_key=E2B_API_KEY)
|
| 274 |
+
except Exception:
|
| 275 |
+
code_mcp = None
|
| 276 |
+
fs_mcp = FileSystemMCP(base_dir=data_dir)
|
| 277 |
+
try:
|
| 278 |
+
hf_mcp = HFHubMCP(token=hf_token)
|
| 279 |
+
except Exception:
|
| 280 |
+
hf_mcp = None
|
| 281 |
+
|
| 282 |
+
components = {
|
| 283 |
+
"config": cfg,
|
| 284 |
+
"router": router,
|
| 285 |
+
"planner": planner,
|
| 286 |
+
"verifier": verifier,
|
| 287 |
+
"memory": memory,
|
| 288 |
+
"som": som,
|
| 289 |
+
"hitl": hitl,
|
| 290 |
+
"tracker": tracker,
|
| 291 |
+
"recorder": recorder,
|
| 292 |
+
"voice": voice,
|
| 293 |
+
"browser_mcp": browser_mcp,
|
| 294 |
+
"code_mcp": code_mcp,
|
| 295 |
+
"fs_mcp": fs_mcp,
|
| 296 |
+
"hf_mcp": hf_mcp,
|
| 297 |
+
}
|
| 298 |
+
SESSION_COMPONENTS[session_uuid] = components
|
| 299 |
+
return components
|
| 300 |
+
|
| 301 |
+
|
| 302 |
+
# =============================================================================
|
| 303 |
+
# Streaming Agent Runner with Plan + Thought Visibility
|
| 304 |
+
# =============================================================================
|
| 305 |
+
|
| 306 |
+
def run_enhanced_agent(
|
| 307 |
+
task_input: str,
|
| 308 |
+
session_uuid: str,
|
| 309 |
+
use_planner: bool = True,
|
| 310 |
+
use_verifier: bool = True,
|
| 311 |
+
use_som: bool = False,
|
| 312 |
+
use_browser_mcp: bool = True,
|
| 313 |
+
consent_storage: bool = True,
|
| 314 |
+
) -> Generator[List[gr.ChatMessage], None, None]:
|
| 315 |
+
"""Yields chat messages with real-time thought streaming."""
|
| 316 |
+
|
| 317 |
+
interaction_id = f"{session_uuid}_{int(time.time())}"
|
| 318 |
+
data_dir = os.path.join(TMP_DIR, interaction_id)
|
| 319 |
+
os.makedirs(data_dir, exist_ok=True)
|
| 320 |
+
|
| 321 |
+
desktop = get_or_create_sandbox(session_uuid)
|
| 322 |
+
comps = build_session_components(session_uuid, data_dir)
|
| 323 |
+
tracker: CostTracker = comps["tracker"]
|
| 324 |
+
recorder: SessionRecorder = comps["recorder"]
|
| 325 |
+
planner: HierarchicalPlanner = comps["planner"]
|
| 326 |
+
verifier: VerifierAgent = comps["verifier"]
|
| 327 |
+
memory: AgentMemory = comps["memory"]
|
| 328 |
+
hitl: HITLCheckpoint = comps["hitl"]
|
| 329 |
+
router: IntelligenceRouter = comps["router"]
|
| 330 |
+
som: SoMPreprocessor = comps["som"]
|
| 331 |
+
browser_mcp: BrowserMCP = comps["browser_mcp"]
|
| 332 |
+
|
| 333 |
+
tracker.start_task(interaction_id)
|
| 334 |
+
|
| 335 |
+
messages: List[gr.ChatMessage] = []
|
| 336 |
+
messages.append(gr.ChatMessage(role="user", content=task_input))
|
| 337 |
+
yield messages.copy()
|
| 338 |
+
|
| 339 |
+
# ---- PLANNING PHASE ----
|
| 340 |
+
plan = None
|
| 341 |
+
if use_planner:
|
| 342 |
+
messages.append(gr.ChatMessage(
|
| 343 |
+
role="assistant",
|
| 344 |
+
content=f"π§ **Planning...** Breaking down: *{task_input}*",
|
| 345 |
+
))
|
| 346 |
+
yield messages.copy()
|
| 347 |
+
|
| 348 |
+
# Retrieve similar past tasks
|
| 349 |
+
similar = memory.retrieve_similar(task_input, n_results=2)
|
| 350 |
+
context = ""
|
| 351 |
+
if similar:
|
| 352 |
+
context = "Previous successful strategies:\n" + "\n".join(
|
| 353 |
+
f"- {s.get('strategy_summary', '')}" for s in similar
|
| 354 |
+
)
|
| 355 |
+
|
| 356 |
+
plan = planner.plan(task_input, context=context)
|
| 357 |
+
plan_md = "π **Plan**\n"
|
| 358 |
+
for st in plan.subtasks:
|
| 359 |
+
plan_md += f"- β¬ [{st.strategy}] {st.description}\n"
|
| 360 |
+
messages.append(gr.ChatMessage(role="assistant", content=plan_md))
|
| 361 |
+
yield messages.copy()
|
| 362 |
+
|
| 363 |
+
# ---- EXECUTION PHASE ----
|
| 364 |
+
# For v2, we bridge the existing E2BVisionAgent with MCP tools.
|
| 365 |
+
# We instantiate the original vision agent but inject browser MCP tools.
|
| 366 |
+
|
| 367 |
+
from e2bqwen import E2BVisionAgent, QwenVLAPIModel
|
| 368 |
+
|
| 369 |
+
# Use router for model selection; fallback to QwenVLAPIModel for compatibility
|
| 370 |
+
# In a full rewrite we'd use router directly, but here we compose.
|
| 371 |
+
vision_model = QwenVLAPIModel(model_id="Qwen/Qwen2.5-VL-72B-Instruct", hf_token=hf_token)
|
| 372 |
+
|
| 373 |
+
agent = E2BVisionAgent(
|
| 374 |
+
model=vision_model,
|
| 375 |
+
data_dir=data_dir,
|
| 376 |
+
desktop=desktop,
|
| 377 |
+
max_steps=100,
|
| 378 |
+
verbosity_level=2,
|
| 379 |
+
use_v1_prompt=True,
|
| 380 |
+
)
|
| 381 |
+
|
| 382 |
+
# Inject MCP browser tools if enabled
|
| 383 |
+
if use_browser_mcp:
|
| 384 |
+
try:
|
| 385 |
+
browser_mcp.start()
|
| 386 |
+
mcp_tools = make_browser_tools(browser_mcp)
|
| 387 |
+
# Merge into agent.tools
|
| 388 |
+
for name, fn in mcp_tools.items():
|
| 389 |
+
agent.tools[name] = fn
|
| 390 |
+
messages.append(gr.ChatMessage(
|
| 391 |
+
role="assistant",
|
| 392 |
+
content="π **Playwright MCP connected.** Browser automation ready.",
|
| 393 |
+
))
|
| 394 |
+
yield messages.copy()
|
| 395 |
+
except Exception as e:
|
| 396 |
+
messages.append(gr.ChatMessage(
|
| 397 |
+
role="assistant",
|
| 398 |
+
content=f"β οΈ Playwright MCP unavailable: {e}. Using vision-only fallback.",
|
| 399 |
+
))
|
| 400 |
+
yield messages.copy()
|
| 401 |
+
|
| 402 |
+
# Inject HF Hub tools
|
| 403 |
+
try:
|
| 404 |
+
hf_tools = make_hf_tools(comps["hf_mcp"])
|
| 405 |
+
for name, fn in hf_tools.items():
|
| 406 |
+
agent.tools[name] = fn
|
| 407 |
+
except Exception:
|
| 408 |
+
pass
|
| 409 |
+
|
| 410 |
+
# Take initial screenshot
|
| 411 |
+
screenshot_bytes = desktop.screenshot(format="bytes")
|
| 412 |
+
initial_screenshot = Image.open(BytesIO(screenshot_bytes))
|
| 413 |
+
|
| 414 |
+
# SoM preprocessing on initial screenshot (optional)
|
| 415 |
+
if use_som:
|
| 416 |
+
annotated, registry = som.preprocess(initial_screenshot)
|
| 417 |
+
annotated_path = os.path.join(data_dir, "som_initial.png")
|
| 418 |
+
annotated.save(annotated_path)
|
| 419 |
+
messages.append(gr.ChatMessage(
|
| 420 |
+
role="assistant",
|
| 421 |
+
content={"path": annotated_path, "mime_type": "image/png"},
|
| 422 |
+
))
|
| 423 |
+
yield messages.copy()
|
| 424 |
+
|
| 425 |
+
# Execute task with streaming
|
| 426 |
+
step_count = 0
|
| 427 |
+
try:
|
| 428 |
+
for msg in stream_to_gradio(
|
| 429 |
+
agent, task=task_input, task_images=[initial_screenshot], reset_agent_memory=False,
|
| 430 |
+
):
|
| 431 |
+
step_count += 1
|
| 432 |
+
|
| 433 |
+
# Thought streaming: inject router cost status
|
| 434 |
+
if step_count % 5 == 0:
|
| 435 |
+
cost_report = router.get_cost_report()
|
| 436 |
+
cost_text = f"π° Cost: ${cost_report['spent_usd']:.4f} / ${cost_report['budget_usd']:.2f} | Calls: {cost_report['calls']}"
|
| 437 |
+
messages.append(gr.ChatMessage(role="assistant", content=cost_text))
|
| 438 |
+
yield messages.copy()
|
| 439 |
+
|
| 440 |
+
# Append screenshots
|
| 441 |
+
if hasattr(agent, "last_marked_screenshot") and msg.content == "-----":
|
| 442 |
+
messages.append(gr.ChatMessage(
|
| 443 |
+
role="assistant",
|
| 444 |
+
content={"path": agent.last_marked_screenshot.to_string(), "mime_type": "image/png"},
|
| 445 |
+
))
|
| 446 |
+
|
| 447 |
+
messages.append(msg)
|
| 448 |
+
yield messages.copy()
|
| 449 |
+
|
| 450 |
+
# HITL check every step
|
| 451 |
+
if hasattr(agent, "memory") and agent.memory.steps:
|
| 452 |
+
last_step = agent.memory.steps[-1]
|
| 453 |
+
if hasattr(last_step, "tool_calls") and last_step.tool_calls:
|
| 454 |
+
action_str = str(last_step.tool_calls[0])
|
| 455 |
+
approved, reason = hitl.check_action(action_str)
|
| 456 |
+
if not approved:
|
| 457 |
+
messages.append(gr.ChatMessage(
|
| 458 |
+
role="assistant",
|
| 459 |
+
content=f"π **HITL Checkpoint:** {reason}\nPlease approve or modify the action.",
|
| 460 |
+
))
|
| 461 |
+
yield messages.copy()
|
| 462 |
+
# In a real implementation we'd pause here for user input
|
| 463 |
+
# For now, auto-continue after logging
|
| 464 |
+
time.sleep(0.5)
|
| 465 |
+
|
| 466 |
+
# ---- VERIFICATION PHASE ----
|
| 467 |
+
if use_verifier and plan:
|
| 468 |
+
messages.append(gr.ChatMessage(role="assistant", content="π **Verifying task completion...**"))
|
| 469 |
+
yield messages.copy()
|
| 470 |
+
|
| 471 |
+
final_screenshot_bytes = desktop.screenshot(format="bytes")
|
| 472 |
+
final_screenshot = Image.open(BytesIO(final_screenshot_bytes))
|
| 473 |
+
trace = [str(s) for s in agent.memory.steps[-20:]]
|
| 474 |
+
for st in plan.subtasks:
|
| 475 |
+
result = verifier.verify(st, trace, final_screenshot)
|
| 476 |
+
status_icon = "β
" if result.get("success") else "β"
|
| 477 |
+
messages.append(gr.ChatMessage(
|
| 478 |
+
role="assistant",
|
| 479 |
+
content=f"{status_icon} **{st.description}** β {result.get('reason', '')}",
|
| 480 |
+
))
|
| 481 |
+
yield messages.copy()
|
| 482 |
+
|
| 483 |
+
# Final summary
|
| 484 |
+
final_output = agent.memory.steps[-1].observations if agent.memory.steps else "Task completed."
|
| 485 |
+
memory.add_task(
|
| 486 |
+
task=task_input,
|
| 487 |
+
strategy_summary=f"Completed in {step_count} steps. Final: {str(final_output)[:200]}",
|
| 488 |
+
success=True,
|
| 489 |
+
domain=plan.subtasks[0].strategy if plan and plan.subtasks else "general",
|
| 490 |
+
)
|
| 491 |
+
|
| 492 |
+
# Cost report
|
| 493 |
+
report = tracker.get_task_report(interaction_id)
|
| 494 |
+
cost_summary = (
|
| 495 |
+
f"π **Task Complete**\n"
|
| 496 |
+
f"- Steps: {step_count}\n"
|
| 497 |
+
f"- Cost: ${report['total_cost_usd']:.4f}\n"
|
| 498 |
+
f"- Tokens: {report['total_tokens']}\n"
|
| 499 |
+
f"- Avg latency: {report['avg_latency_ms']}ms"
|
| 500 |
+
)
|
| 501 |
+
messages.append(gr.ChatMessage(role="assistant", content=cost_summary))
|
| 502 |
+
yield messages.copy()
|
| 503 |
+
|
| 504 |
+
if consent_storage:
|
| 505 |
+
from e2bqwen import get_agent_summary_erase_images
|
| 506 |
+
summary = get_agent_summary_erase_images(agent)
|
| 507 |
+
with open(os.path.join(data_dir, "metadata.json"), "w") as f:
|
| 508 |
+
json.dump({"status": "completed", "summary": summary, "cost_report": report}, f, default=str)
|
| 509 |
+
upload_to_hf_and_remove(data_dir)
|
| 510 |
+
|
| 511 |
+
except Exception as e:
|
| 512 |
+
error_msg = f"Error: {str(e)}"
|
| 513 |
+
messages.append(gr.ChatMessage(role="assistant", content=f"π₯ **Run failed:**\n{error_msg}"))
|
| 514 |
+
yield messages.copy()
|
| 515 |
+
if consent_storage:
|
| 516 |
+
with open(os.path.join(data_dir, "metadata.json"), "w") as f:
|
| 517 |
+
json.dump({"status": "failed", "error": error_msg}, f)
|
| 518 |
+
upload_to_hf_and_remove(data_dir)
|
| 519 |
+
finally:
|
| 520 |
+
try:
|
| 521 |
+
if browser_mcp:
|
| 522 |
+
browser_mcp.close()
|
| 523 |
+
except Exception:
|
| 524 |
+
pass
|
| 525 |
+
|
| 526 |
+
|
| 527 |
+
# =============================================================================
|
| 528 |
+
# Gradio UI
|
| 529 |
+
# =============================================================================
|
| 530 |
+
|
| 531 |
+
theme = gr.themes.Default(font=["Oxanium", "sans-serif"], primary_hue="amber", secondary_hue="blue")
|
| 532 |
+
|
| 533 |
+
with gr.Blocks(theme=theme, css=custom_css, js=custom_js, title="Computer Agent v2.0") as demo:
|
| 534 |
+
session_uuid_state = gr.State(None)
|
| 535 |
+
|
| 536 |
+
with gr.Row():
|
| 537 |
+
# Main sandbox view
|
| 538 |
+
sandbox_html = gr.HTML(
|
| 539 |
+
value=sandbox_html_template.format(stream_url="", status_class="status-interactive", status_text="Interactive"),
|
| 540 |
+
label="Desktop",
|
| 541 |
+
)
|
| 542 |
+
|
| 543 |
+
with gr.Sidebar(position="left"):
|
| 544 |
+
with Modal(visible=True) as modal:
|
| 545 |
+
gr.Markdown("""
|
| 546 |
+
### π₯οΈ Open Computer Agent v2.0
|
| 547 |
+
Welcome to the **enhanced** computer agent powered by:
|
| 548 |
+
- **Multi-Model Router** (auto-selects cheapest capable model)
|
| 549 |
+
- **Playwright MCP** (semantic browser control)
|
| 550 |
+
- **Hierarchical Planner** + **Verifier**
|
| 551 |
+
- **Set-of-Marks Vision** + **Long-Term Memory**
|
| 552 |
+
- **Voice I/O** + **Human-in-the-Loop**
|
| 553 |
+
- **Cost Dashboard** + **Session Recording**
|
| 554 |
+
|
| 555 |
+
π Type a task, hit **Run**, and watch the agent think, plan, and execute.
|
| 556 |
+
""")
|
| 557 |
+
|
| 558 |
+
task_input = gr.Textbox(
|
| 559 |
+
value="Find me pictures of cute puppies",
|
| 560 |
+
label="Enter your task:",
|
| 561 |
+
elem_classes="primary-color-label",
|
| 562 |
+
)
|
| 563 |
+
|
| 564 |
+
with gr.Row():
|
| 565 |
+
run_btn = gr.Button("π Let's go!", variant="primary")
|
| 566 |
+
voice_input = gr.Audio(sources=["microphone"], type="numpy", label="Or speak your task")
|
| 567 |
+
|
| 568 |
+
gr.Examples(
|
| 569 |
+
examples=[
|
| 570 |
+
"Use Google Maps to find the Hugging Face HQ in Paris",
|
| 571 |
+
"Go to Wikipedia and find what happened on April 4th",
|
| 572 |
+
"Find train travel time from Bern to Basel on Google Maps",
|
| 573 |
+
"Go to Hugging Face Spaces, find flux.1 schnell, generate an image of a GPU",
|
| 574 |
+
"Search HF Hub for top text-to-video models and list them",
|
| 575 |
+
"Open GitHub trending and find the top Python repo today",
|
| 576 |
+
],
|
| 577 |
+
inputs=task_input,
|
| 578 |
+
label="Example Tasks",
|
| 579 |
+
examples_per_page=6,
|
| 580 |
+
)
|
| 581 |
+
|
| 582 |
+
with gr.Accordion("βοΈ Advanced Options", open=False):
|
| 583 |
+
use_planner_cb = gr.Checkbox(label="Use Hierarchical Planner", value=True)
|
| 584 |
+
use_verifier_cb = gr.Checkbox(label="Use Verifier", value=True)
|
| 585 |
+
use_som_cb = gr.Checkbox(label="Use Set-of-Marks Vision", value=False)
|
| 586 |
+
use_browser_cb = gr.Checkbox(label="Use Playwright Browser MCP", value=True)
|
| 587 |
+
consent_storage_cb = gr.Checkbox(label="Store task & agent trace?", value=True)
|
| 588 |
+
auto_approve_cb = gr.Checkbox(label="Auto-approve all actions (disable HITL)", value=False)
|
| 589 |
+
|
| 590 |
+
session_state = gr.State({})
|
| 591 |
+
stored_messages = gr.State([])
|
| 592 |
+
|
| 593 |
+
# Cost display
|
| 594 |
+
cost_display = gr.HTML(value='<span class="cost-badge">Cost: $0.0000 / $2.00</span>', label="Cost Tracker")
|
| 595 |
+
|
| 596 |
+
gr.Markdown("""
|
| 597 |
+
- **Data**: Uncheck storage to opt-out. No personal data please.
|
| 598 |
+
- **Captcha**: VMs may get flagged. Interrupt and solve manually if needed.
|
| 599 |
+
- **HITL**: Sensitive actions pause for approval unless auto-approve is on.
|
| 600 |
+
- **Restart**: Refresh the page if the agent seems stuck.
|
| 601 |
+
""")
|
| 602 |
+
|
| 603 |
+
footer = gr.HTML(value=footer_html)
|
| 604 |
+
|
| 605 |
+
# Thought stream + logs
|
| 606 |
+
with gr.Row():
|
| 607 |
+
with gr.Column(scale=1):
|
| 608 |
+
plan_display = gr.Markdown(label="π Plan", value="*Plan will appear here...*")
|
| 609 |
+
with gr.Column(scale=2):
|
| 610 |
+
chatbot_display = gr.Chatbot(
|
| 611 |
+
elem_id="chatbot",
|
| 612 |
+
label="Agent's Execution Logs",
|
| 613 |
+
type="messages",
|
| 614 |
+
avatar_images=(
|
| 615 |
+
None,
|
| 616 |
+
"https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/smolagents/mascot_smol.png",
|
| 617 |
+
),
|
| 618 |
+
resizable=True,
|
| 619 |
+
)
|
| 620 |
+
|
| 621 |
+
stop_btn = gr.Button("π Stop the agent!", variant="huggingface")
|
| 622 |
+
|
| 623 |
+
# ---- Event Wiring ----
|
| 624 |
+
|
| 625 |
+
def clear_and_set_view_only(task_input, session_uuid):
|
| 626 |
+
return update_html(False, session_uuid)
|
| 627 |
+
|
| 628 |
+
def set_interactive(session_uuid):
|
| 629 |
+
return update_html(True, session_uuid)
|
| 630 |
+
|
| 631 |
+
def reactivate_stop():
|
| 632 |
+
return gr.Button("π Stop the agent!", variant="huggingface")
|
| 633 |
+
|
| 634 |
+
def update_cost_display():
|
| 635 |
+
# Aggregate cost from all sessions
|
| 636 |
+
total = 0.0
|
| 637 |
+
for comps in SESSION_COMPONENTS.values():
|
| 638 |
+
total += comps.get("router", IntelligenceRouter(hf_token=hf_token)).cost_so_far_usd
|
| 639 |
+
return f'<span class="cost-badge">Cost: ${total:.4f} / $2.00</span>'
|
| 640 |
+
|
| 641 |
+
def process_voice(audio_tuple, session_uuid):
|
| 642 |
+
if audio_tuple is None:
|
| 643 |
+
return ""
|
| 644 |
+
comps = SESSION_COMPONENTS.get(session_uuid)
|
| 645 |
+
if not comps:
|
| 646 |
+
# Build minimal components
|
| 647 |
+
data_dir = os.path.join(TMP_DIR, session_uuid)
|
| 648 |
+
comps = build_session_components(session_uuid, data_dir)
|
| 649 |
+
voice: VoiceInterface = comps["voice"]
|
| 650 |
+
try:
|
| 651 |
+
text = voice.process_gradio_audio(audio_tuple)
|
| 652 |
+
return text
|
| 653 |
+
except Exception as e:
|
| 654 |
+
return f"[Voice error: {e}]"
|
| 655 |
+
|
| 656 |
+
def interrupt_agent(session_state):
|
| 657 |
+
agent = session_state.get("agent")
|
| 658 |
+
if agent and hasattr(agent, "interrupt_switch") and not agent.interrupt_switch:
|
| 659 |
+
agent.interrupt()
|
| 660 |
+
return gr.Button("Stopping agent...", variant="secondary")
|
| 661 |
+
return gr.Button("π Stop the agent!", variant="huggingface")
|
| 662 |
+
|
| 663 |
+
# Voice -> textbox
|
| 664 |
+
voice_input.stop_recording(
|
| 665 |
+
fn=process_voice,
|
| 666 |
+
inputs=[voice_input, session_uuid_state],
|
| 667 |
+
outputs=[task_input],
|
| 668 |
+
)
|
| 669 |
+
|
| 670 |
+
# Run button chain
|
| 671 |
+
run_event = (
|
| 672 |
+
run_btn.click(
|
| 673 |
+
fn=clear_and_set_view_only,
|
| 674 |
+
inputs=[task_input, session_uuid_state],
|
| 675 |
+
outputs=[sandbox_html],
|
| 676 |
+
)
|
| 677 |
+
.then(
|
| 678 |
+
fn=run_enhanced_agent,
|
| 679 |
+
inputs=[
|
| 680 |
+
task_input,
|
| 681 |
+
session_uuid_state,
|
| 682 |
+
use_planner_cb,
|
| 683 |
+
use_verifier_cb,
|
| 684 |
+
use_som_cb,
|
| 685 |
+
use_browser_cb,
|
| 686 |
+
consent_storage_cb,
|
| 687 |
+
],
|
| 688 |
+
outputs=[chatbot_display],
|
| 689 |
+
)
|
| 690 |
+
.then(fn=set_interactive, inputs=[session_uuid_state], outputs=[sandbox_html])
|
| 691 |
+
.then(fn=update_cost_display, outputs=[cost_display])
|
| 692 |
+
.then(fn=reactivate_stop, outputs=[stop_btn])
|
| 693 |
+
)
|
| 694 |
+
|
| 695 |
+
stop_btn.click(fn=interrupt_agent, inputs=[session_state], outputs=[stop_btn])
|
| 696 |
+
|
| 697 |
+
# Init session
|
| 698 |
+
demo.load(
|
| 699 |
+
fn=lambda: True,
|
| 700 |
+
outputs=[gr.Checkbox(value=True, visible=False)],
|
| 701 |
+
).then(
|
| 702 |
+
fn=lambda interactive, browser_uuid: (
|
| 703 |
+
update_html(interactive, browser_uuid or str(uuid.uuid4())),
|
| 704 |
+
browser_uuid or str(uuid.uuid4()),
|
| 705 |
+
),
|
| 706 |
+
js="() => localStorage.getItem('gradio-session-uuid') || (() => { const id = self.crypto.randomUUID(); localStorage.setItem('gradio-session-uuid', id); return id })()",
|
| 707 |
+
inputs=[gr.Checkbox(value=True, visible=False)],
|
| 708 |
+
outputs=[sandbox_html, session_uuid_state],
|
| 709 |
+
)
|
| 710 |
+
|
| 711 |
+
|
| 712 |
+
if __name__ == "__main__":
|
| 713 |
+
Timer(60, cleanup_sandboxes).start()
|
| 714 |
+
demo.launch(server_name="0.0.0.0", server_port=7860)
|