Spaces:
Running
Running
A-Mahla
commited on
Little fixes (#22)
Browse files* Little fix
* CHG text
* FIX pre-commit
- cua2-core/src/cua2_core/app.py +1 -0
- cua2-core/src/cua2_core/models/models.py +25 -0
- cua2-core/src/cua2_core/routes/routes.py +26 -0
- cua2-core/src/cua2_core/services/agent_service.py +117 -3
- cua2-core/src/cua2_core/services/agent_utils/get_model.py +0 -2
- cua2-core/src/cua2_core/services/archival_service.py +6 -7
- cua2-front/src/components/WelcomeScreen.tsx +98 -4
- cua2-front/src/components/sandbox/SandboxViewer.tsx +5 -2
- cua2-front/src/components/sandbox/completionview/CompletionView.tsx +86 -2
- cua2-front/src/components/steps/FinalStepCard.tsx +7 -7
- cua2-front/src/components/steps/StepCard.tsx +10 -9
- cua2-front/src/services/api.ts +22 -0
- cua2-front/src/services/jsonExporter.ts +2 -3
- cua2-front/src/stores/agentStore.ts +60 -1
- cua2-front/src/types/agent.ts +1 -0
cua2-core/src/cua2_core/app.py
CHANGED
|
@@ -39,6 +39,7 @@ async def lifespan(app: FastAPI):
|
|
| 39 |
yield
|
| 40 |
|
| 41 |
print("Shutting down services...")
|
|
|
|
| 42 |
await sandbox_service.cleanup_sandboxes()
|
| 43 |
print("Services shut down successfully")
|
| 44 |
|
|
|
|
| 39 |
yield
|
| 40 |
|
| 41 |
print("Shutting down services...")
|
| 42 |
+
await agent_service.cleanup()
|
| 43 |
await sandbox_service.cleanup_sandboxes()
|
| 44 |
print("Services shut down successfully")
|
| 45 |
|
cua2-core/src/cua2_core/models/models.py
CHANGED
|
@@ -140,6 +140,7 @@ class AgentTraceMetadata(BaseModel):
|
|
| 140 |
Literal["success", "stopped", "max_steps_reached", "error", "sandbox_timeout"]
|
| 141 |
| None
|
| 142 |
) = None
|
|
|
|
| 143 |
|
| 144 |
|
| 145 |
class AgentTrace(BaseModel):
|
|
@@ -248,6 +249,14 @@ class StopTask(BaseModel):
|
|
| 248 |
traceId: str
|
| 249 |
|
| 250 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 251 |
##################### Agent Service ########################
|
| 252 |
|
| 253 |
|
|
@@ -314,6 +323,7 @@ class ActiveTask(BaseModel):
|
|
| 314 |
"success", "stopped", "max_steps_reached", "error", "sandbox_timeout"
|
| 315 |
]
|
| 316 |
| None = None,
|
|
|
|
| 317 |
):
|
| 318 |
"""Update trace metadata"""
|
| 319 |
with self._file_lock:
|
|
@@ -329,6 +339,8 @@ class ActiveTask(BaseModel):
|
|
| 329 |
self.traceMetadata.completed = completed
|
| 330 |
if final_state is not None:
|
| 331 |
self.traceMetadata.final_state = final_state
|
|
|
|
|
|
|
| 332 |
|
| 333 |
|
| 334 |
#################### API Routes Models ########################
|
|
@@ -369,6 +381,19 @@ class UpdateStepResponse(BaseModel):
|
|
| 369 |
message: str
|
| 370 |
|
| 371 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 372 |
class AvailableModelsResponse(BaseModel):
|
| 373 |
"""Response for available models"""
|
| 374 |
|
|
|
|
| 140 |
Literal["success", "stopped", "max_steps_reached", "error", "sandbox_timeout"]
|
| 141 |
| None
|
| 142 |
) = None
|
| 143 |
+
user_evaluation: Literal["success", "failed", "not_evaluated"] = "not_evaluated"
|
| 144 |
|
| 145 |
|
| 146 |
class AgentTrace(BaseModel):
|
|
|
|
| 249 |
traceId: str
|
| 250 |
|
| 251 |
|
| 252 |
+
class TraceEvaluation(BaseModel):
|
| 253 |
+
"""Trace evaluation message"""
|
| 254 |
+
|
| 255 |
+
event_type: Literal["trace_evaluation"]
|
| 256 |
+
traceId: str
|
| 257 |
+
user_evaluation: Literal["success", "failed", "not_evaluated"]
|
| 258 |
+
|
| 259 |
+
|
| 260 |
##################### Agent Service ########################
|
| 261 |
|
| 262 |
|
|
|
|
| 323 |
"success", "stopped", "max_steps_reached", "error", "sandbox_timeout"
|
| 324 |
]
|
| 325 |
| None = None,
|
| 326 |
+
user_evaluation: Literal["success", "failed", "not_evaluated"] | None = None,
|
| 327 |
):
|
| 328 |
"""Update trace metadata"""
|
| 329 |
with self._file_lock:
|
|
|
|
| 339 |
self.traceMetadata.completed = completed
|
| 340 |
if final_state is not None:
|
| 341 |
self.traceMetadata.final_state = final_state
|
| 342 |
+
if user_evaluation is not None:
|
| 343 |
+
self.traceMetadata.user_evaluation = user_evaluation
|
| 344 |
|
| 345 |
|
| 346 |
#################### API Routes Models ########################
|
|
|
|
| 381 |
message: str
|
| 382 |
|
| 383 |
|
| 384 |
+
class UpdateTraceEvaluationRequest(BaseModel):
|
| 385 |
+
"""Request model for updating trace evaluation"""
|
| 386 |
+
|
| 387 |
+
user_evaluation: Literal["success", "failed", "not_evaluated"]
|
| 388 |
+
|
| 389 |
+
|
| 390 |
+
class UpdateTraceEvaluationResponse(BaseModel):
|
| 391 |
+
"""Response model for trace evaluation update"""
|
| 392 |
+
|
| 393 |
+
success: bool
|
| 394 |
+
message: str
|
| 395 |
+
|
| 396 |
+
|
| 397 |
class AvailableModelsResponse(BaseModel):
|
| 398 |
"""Response for available models"""
|
| 399 |
|
cua2-core/src/cua2_core/routes/routes.py
CHANGED
|
@@ -8,6 +8,8 @@ from cua2_core.models.models import (
|
|
| 8 |
HealthResponse,
|
| 9 |
UpdateStepRequest,
|
| 10 |
UpdateStepResponse,
|
|
|
|
|
|
|
| 11 |
)
|
| 12 |
from cua2_core.services.agent_service import AgentService
|
| 13 |
from cua2_core.services.agent_utils.get_model import AVAILABLE_MODELS
|
|
@@ -93,3 +95,27 @@ async def update_trace_step(
|
|
| 93 |
raise HTTPException(status_code=404, detail=str(e))
|
| 94 |
except Exception as e:
|
| 95 |
raise HTTPException(status_code=400, detail=str(e))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
HealthResponse,
|
| 9 |
UpdateStepRequest,
|
| 10 |
UpdateStepResponse,
|
| 11 |
+
UpdateTraceEvaluationRequest,
|
| 12 |
+
UpdateTraceEvaluationResponse,
|
| 13 |
)
|
| 14 |
from cua2_core.services.agent_service import AgentService
|
| 15 |
from cua2_core.services.agent_utils.get_model import AVAILABLE_MODELS
|
|
|
|
| 95 |
raise HTTPException(status_code=404, detail=str(e))
|
| 96 |
except Exception as e:
|
| 97 |
raise HTTPException(status_code=400, detail=str(e))
|
| 98 |
+
|
| 99 |
+
|
| 100 |
+
@router.patch(
|
| 101 |
+
"/traces/{trace_id}/evaluation", response_model=UpdateTraceEvaluationResponse
|
| 102 |
+
)
|
| 103 |
+
async def update_trace_evaluation(
|
| 104 |
+
trace_id: str,
|
| 105 |
+
request: UpdateTraceEvaluationRequest,
|
| 106 |
+
agent_service: AgentService = Depends(get_agent_service),
|
| 107 |
+
):
|
| 108 |
+
"""Update the user evaluation for a trace (overall task feedback)"""
|
| 109 |
+
try:
|
| 110 |
+
agent_service.update_trace_evaluation(
|
| 111 |
+
trace_id=trace_id,
|
| 112 |
+
user_evaluation=request.user_evaluation,
|
| 113 |
+
)
|
| 114 |
+
return UpdateTraceEvaluationResponse(
|
| 115 |
+
success=True,
|
| 116 |
+
message="Trace evaluation updated successfully",
|
| 117 |
+
)
|
| 118 |
+
except FileNotFoundError as e:
|
| 119 |
+
raise HTTPException(status_code=404, detail=str(e))
|
| 120 |
+
except Exception as e:
|
| 121 |
+
raise HTTPException(status_code=400, detail=str(e))
|
cua2-core/src/cua2_core/services/agent_service.py
CHANGED
|
@@ -1,11 +1,12 @@
|
|
| 1 |
import asyncio
|
| 2 |
import base64
|
|
|
|
| 3 |
import json
|
| 4 |
import logging
|
| 5 |
import os
|
| 6 |
import time
|
| 7 |
from io import BytesIO
|
| 8 |
-
from typing import Callable, Literal
|
| 9 |
from uuid import uuid4
|
| 10 |
|
| 11 |
from cua2_core.models.models import (
|
|
@@ -52,6 +53,7 @@ class AgentService:
|
|
| 52 |
self.last_screenshot: dict[str, AgentImage | None] = {}
|
| 53 |
self._lock = asyncio.Lock()
|
| 54 |
self.max_sandboxes = int(600 / num_workers)
|
|
|
|
| 55 |
|
| 56 |
# Initialize archival service in dedicated process
|
| 57 |
self.archival_service = ArchivalService(
|
|
@@ -61,8 +63,41 @@ class AgentService:
|
|
| 61 |
archive_interval_minutes=30,
|
| 62 |
folder_age_threshold_minutes=30,
|
| 63 |
)
|
| 64 |
-
# Start the archival service process
|
| 65 |
-
self.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
|
| 67 |
def _update_archival_active_tasks(self):
|
| 68 |
"""
|
|
@@ -243,6 +278,7 @@ class AgentService:
|
|
| 243 |
|
| 244 |
self.active_tasks[message_id].update_trace_metadata(
|
| 245 |
final_state=final_state,
|
|
|
|
| 246 |
)
|
| 247 |
|
| 248 |
if message_id in self.active_tasks:
|
|
@@ -475,6 +511,58 @@ class AgentService:
|
|
| 475 |
except (ValueError, KeyError, TypeError) as e:
|
| 476 |
raise ValueError(f"Error processing step update: {e}")
|
| 477 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 478 |
async def stop_task(self, trace_id: str):
|
| 479 |
"""Stop a task"""
|
| 480 |
if trace_id in self.active_tasks:
|
|
@@ -518,3 +606,29 @@ class AgentService:
|
|
| 518 |
|
| 519 |
except Exception as e:
|
| 520 |
logger.error(f"Error cleaning up task {message_id}: {e}", exc_info=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import asyncio
|
| 2 |
import base64
|
| 3 |
+
import fcntl
|
| 4 |
import json
|
| 5 |
import logging
|
| 6 |
import os
|
| 7 |
import time
|
| 8 |
from io import BytesIO
|
| 9 |
+
from typing import IO, Callable, Literal
|
| 10 |
from uuid import uuid4
|
| 11 |
|
| 12 |
from cua2_core.models.models import (
|
|
|
|
| 53 |
self.last_screenshot: dict[str, AgentImage | None] = {}
|
| 54 |
self._lock = asyncio.Lock()
|
| 55 |
self.max_sandboxes = int(600 / num_workers)
|
| 56 |
+
self._archival_lock_file: IO[str] | None = None
|
| 57 |
|
| 58 |
# Initialize archival service in dedicated process
|
| 59 |
self.archival_service = ArchivalService(
|
|
|
|
| 63 |
archive_interval_minutes=30,
|
| 64 |
folder_age_threshold_minutes=30,
|
| 65 |
)
|
| 66 |
+
# Start the archival service process only on one worker
|
| 67 |
+
if self._should_start_archival_service():
|
| 68 |
+
self.archival_service.start()
|
| 69 |
+
logger.info(f"Started archival service in worker PID {os.getpid()}")
|
| 70 |
+
else:
|
| 71 |
+
logger.info(
|
| 72 |
+
f"Skipping archival service start in worker PID {os.getpid()} (already running in another worker)"
|
| 73 |
+
)
|
| 74 |
+
|
| 75 |
+
def _should_start_archival_service(self) -> bool:
|
| 76 |
+
"""
|
| 77 |
+
Determine if this worker should start the archival service.
|
| 78 |
+
Uses file-based locking to ensure only one worker across all processes
|
| 79 |
+
starts the archival service.
|
| 80 |
+
|
| 81 |
+
Returns:
|
| 82 |
+
True if this worker should start the archival service, False otherwise
|
| 83 |
+
"""
|
| 84 |
+
lock_file_path = "/tmp/cua2_archival_service.lock"
|
| 85 |
+
|
| 86 |
+
try:
|
| 87 |
+
self._archival_lock_file = open(lock_file_path, "w")
|
| 88 |
+
fcntl.flock(
|
| 89 |
+
self._archival_lock_file.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB
|
| 90 |
+
)
|
| 91 |
+
|
| 92 |
+
self._archival_lock_file.write(str(os.getpid()))
|
| 93 |
+
self._archival_lock_file.flush()
|
| 94 |
+
return True
|
| 95 |
+
|
| 96 |
+
except (IOError, OSError):
|
| 97 |
+
if self._archival_lock_file:
|
| 98 |
+
self._archival_lock_file.close()
|
| 99 |
+
self._archival_lock_file = None
|
| 100 |
+
return False
|
| 101 |
|
| 102 |
def _update_archival_active_tasks(self):
|
| 103 |
"""
|
|
|
|
| 278 |
|
| 279 |
self.active_tasks[message_id].update_trace_metadata(
|
| 280 |
final_state=final_state,
|
| 281 |
+
completed=True,
|
| 282 |
)
|
| 283 |
|
| 284 |
if message_id in self.active_tasks:
|
|
|
|
| 511 |
except (ValueError, KeyError, TypeError) as e:
|
| 512 |
raise ValueError(f"Error processing step update: {e}")
|
| 513 |
|
| 514 |
+
def update_trace_evaluation(
|
| 515 |
+
self,
|
| 516 |
+
trace_id: str,
|
| 517 |
+
user_evaluation: Literal["success", "failed", "not_evaluated"],
|
| 518 |
+
):
|
| 519 |
+
"""
|
| 520 |
+
Update the user evaluation for a trace
|
| 521 |
+
|
| 522 |
+
Args:
|
| 523 |
+
trace_id: The trace ID
|
| 524 |
+
user_evaluation: The evaluation value to set
|
| 525 |
+
|
| 526 |
+
Raises:
|
| 527 |
+
FileNotFoundError: If trace not found
|
| 528 |
+
"""
|
| 529 |
+
# Try to find in active tasks first
|
| 530 |
+
active_task = self.active_tasks.get(trace_id)
|
| 531 |
+
|
| 532 |
+
if active_task:
|
| 533 |
+
# Task is still active
|
| 534 |
+
active_task.update_trace_metadata(user_evaluation=user_evaluation)
|
| 535 |
+
else:
|
| 536 |
+
# Task is not active, try to load from file
|
| 537 |
+
data_dir = "data"
|
| 538 |
+
trace_dirs = [
|
| 539 |
+
d for d in os.listdir(data_dir) if d.startswith(f"trace-{trace_id}")
|
| 540 |
+
]
|
| 541 |
+
|
| 542 |
+
if not trace_dirs:
|
| 543 |
+
raise FileNotFoundError("Trace not found")
|
| 544 |
+
|
| 545 |
+
trace_path = os.path.join(data_dir, trace_dirs[0])
|
| 546 |
+
tasks_file = os.path.join(trace_path, "tasks.json")
|
| 547 |
+
|
| 548 |
+
if not os.path.exists(tasks_file):
|
| 549 |
+
raise FileNotFoundError("Trace data not found")
|
| 550 |
+
|
| 551 |
+
try:
|
| 552 |
+
# Load the trace data
|
| 553 |
+
with open(tasks_file, "r") as f:
|
| 554 |
+
task_data = json.load(f)
|
| 555 |
+
|
| 556 |
+
# Update the user_evaluation
|
| 557 |
+
task_data["traceMetadata"]["user_evaluation"] = user_evaluation
|
| 558 |
+
|
| 559 |
+
# Save the updated data
|
| 560 |
+
with open(tasks_file, "w") as f:
|
| 561 |
+
json.dump(task_data, f, indent=2)
|
| 562 |
+
|
| 563 |
+
except (KeyError, TypeError) as e:
|
| 564 |
+
raise ValueError(f"Error processing trace evaluation update: {e}")
|
| 565 |
+
|
| 566 |
async def stop_task(self, trace_id: str):
|
| 567 |
"""Stop a task"""
|
| 568 |
if trace_id in self.active_tasks:
|
|
|
|
| 606 |
|
| 607 |
except Exception as e:
|
| 608 |
logger.error(f"Error cleaning up task {message_id}: {e}", exc_info=True)
|
| 609 |
+
|
| 610 |
+
async def cleanup(self):
|
| 611 |
+
"""
|
| 612 |
+
Cleanup method called during service shutdown.
|
| 613 |
+
Stops the archival service and releases the lock file.
|
| 614 |
+
"""
|
| 615 |
+
try:
|
| 616 |
+
# Stop the archival service if it's running
|
| 617 |
+
if self.archival_service.is_alive():
|
| 618 |
+
logger.info("Stopping archival service...")
|
| 619 |
+
self.archival_service.stop()
|
| 620 |
+
logger.info("Archival service stopped")
|
| 621 |
+
|
| 622 |
+
# Release the lock file if we hold it
|
| 623 |
+
if self._archival_lock_file:
|
| 624 |
+
try:
|
| 625 |
+
fcntl.flock(self._archival_lock_file.fileno(), fcntl.LOCK_UN)
|
| 626 |
+
self._archival_lock_file.close()
|
| 627 |
+
logger.info("Released archival service lock")
|
| 628 |
+
except Exception as e:
|
| 629 |
+
logger.warning(f"Error releasing archival lock: {e}")
|
| 630 |
+
finally:
|
| 631 |
+
self._archival_lock_file = None
|
| 632 |
+
|
| 633 |
+
except Exception as e:
|
| 634 |
+
logger.error(f"Error during AgentService cleanup: {e}", exc_info=True)
|
cua2-core/src/cua2_core/services/agent_utils/get_model.py
CHANGED
|
@@ -3,9 +3,7 @@ from smolagents import InferenceClientModel, Model
|
|
| 3 |
# Available model IDs
|
| 4 |
AVAILABLE_MODELS = [
|
| 5 |
"Qwen/Qwen3-VL-8B-Instruct",
|
| 6 |
-
"Qwen/Qwen3-VL-8B-Thinking",
|
| 7 |
"Qwen/Qwen3-VL-30B-A3B-Instruct",
|
| 8 |
-
"Qwen/Qwen3-VL-30B-A3B-Thinking",
|
| 9 |
]
|
| 10 |
|
| 11 |
|
|
|
|
| 3 |
# Available model IDs
|
| 4 |
AVAILABLE_MODELS = [
|
| 5 |
"Qwen/Qwen3-VL-8B-Instruct",
|
|
|
|
| 6 |
"Qwen/Qwen3-VL-30B-A3B-Instruct",
|
|
|
|
| 7 |
]
|
| 8 |
|
| 9 |
|
cua2-core/src/cua2_core/services/archival_service.py
CHANGED
|
@@ -298,9 +298,12 @@ def _process_old_folders(
|
|
| 298 |
f"Successfully verified {archive_path.name} in HuggingFace repo"
|
| 299 |
)
|
| 300 |
|
| 301 |
-
# Delete the local folder
|
| 302 |
-
|
| 303 |
-
|
|
|
|
|
|
|
|
|
|
| 304 |
|
| 305 |
# Delete the local archive
|
| 306 |
archive_path.unlink(missing_ok=True)
|
|
@@ -403,10 +406,6 @@ def _verify_file_in_repo(hf_dataset_repo: str, hf_token: str, filename: str) ->
|
|
| 403 |
filename=filename,
|
| 404 |
repo_type="dataset",
|
| 405 |
token=hf_token,
|
| 406 |
-
local_dir_use_symlinks=False,
|
| 407 |
-
# Just check if file exists without actually downloading
|
| 408 |
-
cache_dir=None,
|
| 409 |
-
local_files_only=False,
|
| 410 |
)
|
| 411 |
|
| 412 |
logger.info(f"Verified {filename} exists in repo")
|
|
|
|
| 298 |
f"Successfully verified {archive_path.name} in HuggingFace repo"
|
| 299 |
)
|
| 300 |
|
| 301 |
+
# Delete the local folder (check if it still exists to avoid race conditions)
|
| 302 |
+
if folder.exists():
|
| 303 |
+
shutil.rmtree(folder)
|
| 304 |
+
logger.info(f"Deleted local folder: {folder_name}")
|
| 305 |
+
else:
|
| 306 |
+
logger.warning(f"Folder {folder_name} already deleted, skipping")
|
| 307 |
|
| 308 |
# Delete the local archive
|
| 309 |
archive_path.unlink(missing_ok=True)
|
|
|
|
| 406 |
filename=filename,
|
| 407 |
repo_type="dataset",
|
| 408 |
token=hf_token,
|
|
|
|
|
|
|
|
|
|
|
|
|
| 409 |
)
|
| 410 |
|
| 411 |
logger.info(f"Verified {filename} exists in repo")
|
cua2-front/src/components/WelcomeScreen.tsx
CHANGED
|
@@ -150,13 +150,15 @@ export const WelcomeScreen: React.FC<WelcomeScreenProps> = ({ onStartTask, isCon
|
|
| 150 |
Computer Use Agent
|
| 151 |
</Typography>
|
| 152 |
|
| 153 |
-
{/* Powered by smolagents */}
|
| 154 |
<Box
|
| 155 |
sx={{
|
| 156 |
display: 'flex',
|
| 157 |
alignItems: 'center',
|
| 158 |
gap: 1,
|
| 159 |
mb: 2,
|
|
|
|
|
|
|
| 160 |
}}
|
| 161 |
>
|
| 162 |
<Typography
|
|
@@ -168,6 +170,8 @@ export const WelcomeScreen: React.FC<WelcomeScreenProps> = ({ onStartTask, isCon
|
|
| 168 |
>
|
| 169 |
Powered by
|
| 170 |
</Typography>
|
|
|
|
|
|
|
| 171 |
<Box
|
| 172 |
component="a"
|
| 173 |
href="https://github.com/huggingface/smolagents"
|
|
@@ -239,6 +243,61 @@ export const WelcomeScreen: React.FC<WelcomeScreenProps> = ({ onStartTask, isCon
|
|
| 239 |
</Typography>
|
| 240 |
</Box>
|
| 241 |
</Box>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 242 |
</Box>
|
| 243 |
|
| 244 |
{/* Subtitle */}
|
|
@@ -259,12 +318,29 @@ export const WelcomeScreen: React.FC<WelcomeScreenProps> = ({ onStartTask, isCon
|
|
| 259 |
sx={{
|
| 260 |
color: 'text.secondary',
|
| 261 |
maxWidth: '650px',
|
| 262 |
-
mb:
|
| 263 |
lineHeight: 1.7,
|
| 264 |
}}
|
| 265 |
>
|
| 266 |
-
|
| 267 |
-
Built by
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 268 |
</Typography>
|
| 269 |
|
| 270 |
{/* Task Input Section */}
|
|
@@ -418,6 +494,24 @@ export const WelcomeScreen: React.FC<WelcomeScreenProps> = ({ onStartTask, isCon
|
|
| 418 |
</Box>
|
| 419 |
</Paper>
|
| 420 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 421 |
{/* Connection status hint */}
|
| 422 |
{!isConnected && (
|
| 423 |
<Typography
|
|
|
|
| 150 |
Computer Use Agent
|
| 151 |
</Typography>
|
| 152 |
|
| 153 |
+
{/* Powered by smolagents and E2B */}
|
| 154 |
<Box
|
| 155 |
sx={{
|
| 156 |
display: 'flex',
|
| 157 |
alignItems: 'center',
|
| 158 |
gap: 1,
|
| 159 |
mb: 2,
|
| 160 |
+
flexWrap: 'wrap',
|
| 161 |
+
justifyContent: 'center',
|
| 162 |
}}
|
| 163 |
>
|
| 164 |
<Typography
|
|
|
|
| 170 |
>
|
| 171 |
Powered by
|
| 172 |
</Typography>
|
| 173 |
+
|
| 174 |
+
{/* smolagents link */}
|
| 175 |
<Box
|
| 176 |
component="a"
|
| 177 |
href="https://github.com/huggingface/smolagents"
|
|
|
|
| 243 |
</Typography>
|
| 244 |
</Box>
|
| 245 |
</Box>
|
| 246 |
+
|
| 247 |
+
{/* Separator */}
|
| 248 |
+
<Typography
|
| 249 |
+
variant="body2"
|
| 250 |
+
sx={{
|
| 251 |
+
color: 'text.secondary',
|
| 252 |
+
mx: 0.5,
|
| 253 |
+
}}
|
| 254 |
+
>
|
| 255 |
+
&
|
| 256 |
+
</Typography>
|
| 257 |
+
|
| 258 |
+
{/* E2B link */}
|
| 259 |
+
<Box
|
| 260 |
+
component="a"
|
| 261 |
+
href="https://e2b.dev/"
|
| 262 |
+
target="_blank"
|
| 263 |
+
rel="noopener noreferrer"
|
| 264 |
+
sx={{
|
| 265 |
+
display: 'flex',
|
| 266 |
+
alignItems: 'center',
|
| 267 |
+
gap: 0.75,
|
| 268 |
+
textDecoration: 'none',
|
| 269 |
+
transition: 'all 0.2s ease',
|
| 270 |
+
'&:hover': {
|
| 271 |
+
'& .e2b-text': {
|
| 272 |
+
textDecoration: 'underline',
|
| 273 |
+
},
|
| 274 |
+
},
|
| 275 |
+
}}
|
| 276 |
+
>
|
| 277 |
+
{/* E2B Logo */}
|
| 278 |
+
<Box
|
| 279 |
+
component="img"
|
| 280 |
+
src="https://avatars.githubusercontent.com/u/129434473?s=200&v=4"
|
| 281 |
+
alt="E2B"
|
| 282 |
+
sx={{
|
| 283 |
+
width: 24,
|
| 284 |
+
height: 24,
|
| 285 |
+
flexShrink: 0,
|
| 286 |
+
borderRadius: '50%',
|
| 287 |
+
}}
|
| 288 |
+
/>
|
| 289 |
+
|
| 290 |
+
<Typography
|
| 291 |
+
className="e2b-text"
|
| 292 |
+
sx={{
|
| 293 |
+
color: 'primary.main',
|
| 294 |
+
fontWeight: 700,
|
| 295 |
+
fontSize: '1rem',
|
| 296 |
+
}}
|
| 297 |
+
>
|
| 298 |
+
E2B
|
| 299 |
+
</Typography>
|
| 300 |
+
</Box>
|
| 301 |
</Box>
|
| 302 |
|
| 303 |
{/* Subtitle */}
|
|
|
|
| 318 |
sx={{
|
| 319 |
color: 'text.secondary',
|
| 320 |
maxWidth: '650px',
|
| 321 |
+
mb: 3,
|
| 322 |
lineHeight: 1.7,
|
| 323 |
}}
|
| 324 |
>
|
| 325 |
+
Experience the future of AI automation as agents operate computers in real time to complete complex on-screen tasks (GUI agents).
|
| 326 |
+
Built by{' '}
|
| 327 |
+
<Box
|
| 328 |
+
component="a"
|
| 329 |
+
href="https://huggingface.co"
|
| 330 |
+
target="_blank"
|
| 331 |
+
rel="noopener noreferrer"
|
| 332 |
+
sx={{
|
| 333 |
+
color: 'primary.main',
|
| 334 |
+
textDecoration: 'none',
|
| 335 |
+
fontWeight: 700,
|
| 336 |
+
'&:hover': {
|
| 337 |
+
textDecoration: 'underline',
|
| 338 |
+
},
|
| 339 |
+
}}
|
| 340 |
+
>
|
| 341 |
+
Hugging Face
|
| 342 |
+
</Box>
|
| 343 |
+
, this platform provides intuitive <strong>visualization and annotation tools</strong>, enabling <strong>manual preferential data annotation</strong> for advanced agentic AI research.
|
| 344 |
</Typography>
|
| 345 |
|
| 346 |
{/* Task Input Section */}
|
|
|
|
| 494 |
</Box>
|
| 495 |
</Paper>
|
| 496 |
|
| 497 |
+
{/* Research Notice */}
|
| 498 |
+
<Typography
|
| 499 |
+
variant="body2"
|
| 500 |
+
sx={{
|
| 501 |
+
color: 'text.secondary',
|
| 502 |
+
maxWidth: '700px',
|
| 503 |
+
mt: 3,
|
| 504 |
+
mb: 2,
|
| 505 |
+
lineHeight: 1.6,
|
| 506 |
+
fontStyle: 'italic',
|
| 507 |
+
opacity: 0.8,
|
| 508 |
+
textAlign: 'center',
|
| 509 |
+
}}
|
| 510 |
+
>
|
| 511 |
+
Please be aware that by using the demo, you agree that the traces are stored for research purposes.
|
| 512 |
+
<strong>Please do not write any personal information.</strong>
|
| 513 |
+
</Typography>
|
| 514 |
+
|
| 515 |
{/* Connection status hint */}
|
| 516 |
{!isConnected && (
|
| 517 |
<Typography
|
cua2-front/src/components/sandbox/SandboxViewer.tsx
CHANGED
|
@@ -110,8 +110,11 @@ export const SandboxViewer: React.FC<SandboxViewerProps> = ({
|
|
| 110 |
|
| 111 |
// Handler to go back to home
|
| 112 |
const handleBackToHome = () => {
|
| 113 |
-
|
| 114 |
-
|
|
|
|
|
|
|
|
|
|
| 115 |
};
|
| 116 |
|
| 117 |
// Handler to go back to live mode
|
|
|
|
| 110 |
|
| 111 |
// Handler to go back to home
|
| 112 |
const handleBackToHome = () => {
|
| 113 |
+
// Reset frontend state
|
| 114 |
+
useAgentStore.getState().resetAgent();
|
| 115 |
+
|
| 116 |
+
// Reload the page to reconnect websocket
|
| 117 |
+
window.location.href = '/';
|
| 118 |
};
|
| 119 |
|
| 120 |
// Handler to go back to live mode
|
cua2-front/src/components/sandbox/completionview/CompletionView.tsx
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
|
|
|
|
|
| 1 |
import { AgentStep, AgentTrace, FinalStep } from '@/types/agent';
|
| 2 |
import AccessTimeIcon from '@mui/icons-material/AccessTime';
|
| 3 |
import AddIcon from '@mui/icons-material/Add';
|
|
@@ -11,8 +13,10 @@ import InputIcon from '@mui/icons-material/Input';
|
|
| 11 |
import OutputIcon from '@mui/icons-material/Output';
|
| 12 |
import SmartToyIcon from '@mui/icons-material/SmartToy';
|
| 13 |
import StopCircleIcon from '@mui/icons-material/StopCircle';
|
| 14 |
-
import
|
| 15 |
-
import
|
|
|
|
|
|
|
| 16 |
import { DownloadGifButton } from './DownloadGifButton';
|
| 17 |
import { DownloadJsonButton } from './DownloadJsonButton';
|
| 18 |
|
|
@@ -42,6 +46,30 @@ export const CompletionView: React.FC<CompletionViewProps> = ({
|
|
| 42 |
onDownloadJson,
|
| 43 |
onBackToHome,
|
| 44 |
}) => {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
const getStatusConfig = () => {
|
| 46 |
switch (finalStep.type) {
|
| 47 |
case 'success':
|
|
@@ -227,6 +255,62 @@ export const CompletionView: React.FC<CompletionViewProps> = ({
|
|
| 227 |
</Box>
|
| 228 |
)}
|
| 229 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 230 |
{/* Divider before metrics */}
|
| 231 |
<Divider sx={{ my: 2 }} />
|
| 232 |
|
|
|
|
| 1 |
+
import { updateTraceEvaluation } from '@/services/api';
|
| 2 |
+
import { useAgentStore } from '@/stores/agentStore';
|
| 3 |
import { AgentStep, AgentTrace, FinalStep } from '@/types/agent';
|
| 4 |
import AccessTimeIcon from '@mui/icons-material/AccessTime';
|
| 5 |
import AddIcon from '@mui/icons-material/Add';
|
|
|
|
| 13 |
import OutputIcon from '@mui/icons-material/Output';
|
| 14 |
import SmartToyIcon from '@mui/icons-material/SmartToy';
|
| 15 |
import StopCircleIcon from '@mui/icons-material/StopCircle';
|
| 16 |
+
import ThumbDownIcon from '@mui/icons-material/ThumbDown';
|
| 17 |
+
import ThumbUpIcon from '@mui/icons-material/ThumbUp';
|
| 18 |
+
import { Alert, Box, Button, Divider, IconButton, Paper, Tooltip, Typography } from '@mui/material';
|
| 19 |
+
import React, { useState } from 'react';
|
| 20 |
import { DownloadGifButton } from './DownloadGifButton';
|
| 21 |
import { DownloadJsonButton } from './DownloadJsonButton';
|
| 22 |
|
|
|
|
| 46 |
onDownloadJson,
|
| 47 |
onBackToHome,
|
| 48 |
}) => {
|
| 49 |
+
const updateTraceEvaluationInStore = useAgentStore((state) => state.updateTraceEvaluation);
|
| 50 |
+
const [evaluation, setEvaluation] = useState<'success' | 'failed' | 'not_evaluated'>(
|
| 51 |
+
finalStep.metadata.user_evaluation || 'not_evaluated'
|
| 52 |
+
);
|
| 53 |
+
const [isVoting, setIsVoting] = useState(false);
|
| 54 |
+
|
| 55 |
+
const handleTraceEvaluation = async (vote: 'success' | 'failed') => {
|
| 56 |
+
if (isVoting || !trace?.id) return;
|
| 57 |
+
|
| 58 |
+
const newEvaluation = evaluation === vote ? 'not_evaluated' : vote;
|
| 59 |
+
setIsVoting(true);
|
| 60 |
+
|
| 61 |
+
try {
|
| 62 |
+
await updateTraceEvaluation(trace.id, newEvaluation);
|
| 63 |
+
setEvaluation(newEvaluation);
|
| 64 |
+
// Update the store so the evaluation is reflected in JSON export
|
| 65 |
+
updateTraceEvaluationInStore(newEvaluation);
|
| 66 |
+
} catch (error) {
|
| 67 |
+
console.error('Failed to update trace evaluation:', error);
|
| 68 |
+
} finally {
|
| 69 |
+
setIsVoting(false);
|
| 70 |
+
}
|
| 71 |
+
};
|
| 72 |
+
|
| 73 |
const getStatusConfig = () => {
|
| 74 |
switch (finalStep.type) {
|
| 75 |
case 'success':
|
|
|
|
| 255 |
</Box>
|
| 256 |
)}
|
| 257 |
|
| 258 |
+
{/* Trace Evaluation */}
|
| 259 |
+
<Box sx={{ mb: 2 }}>
|
| 260 |
+
<Box sx={{ display: 'flex', alignItems: 'center', justifyContent: 'space-between' }}>
|
| 261 |
+
<Typography
|
| 262 |
+
variant="caption"
|
| 263 |
+
sx={{
|
| 264 |
+
fontWeight: 700,
|
| 265 |
+
color: 'text.secondary',
|
| 266 |
+
fontSize: '0.7rem',
|
| 267 |
+
textTransform: 'uppercase',
|
| 268 |
+
letterSpacing: '0.5px',
|
| 269 |
+
}}
|
| 270 |
+
>
|
| 271 |
+
Was this task completed successfully?
|
| 272 |
+
</Typography>
|
| 273 |
+
|
| 274 |
+
{/* Evaluation buttons */}
|
| 275 |
+
<Box sx={{ display: 'flex', gap: 1 }}>
|
| 276 |
+
<Tooltip title={evaluation === 'success' ? 'Remove success rating' : 'Mark as successful'}>
|
| 277 |
+
<IconButton
|
| 278 |
+
size="small"
|
| 279 |
+
onClick={() => handleTraceEvaluation('success')}
|
| 280 |
+
disabled={isVoting}
|
| 281 |
+
sx={{
|
| 282 |
+
padding: '4px',
|
| 283 |
+
color: evaluation === 'success' ? 'success.main' : 'action.disabled',
|
| 284 |
+
'&:hover': {
|
| 285 |
+
color: 'success.main',
|
| 286 |
+
backgroundColor: (theme) => theme.palette.mode === 'dark' ? 'rgba(102, 187, 106, 0.1)' : 'rgba(102, 187, 106, 0.08)',
|
| 287 |
+
},
|
| 288 |
+
}}
|
| 289 |
+
>
|
| 290 |
+
<ThumbUpIcon sx={{ fontSize: 18 }} />
|
| 291 |
+
</IconButton>
|
| 292 |
+
</Tooltip>
|
| 293 |
+
<Tooltip title={evaluation === 'failed' ? 'Remove failure rating' : 'Mark as failed'}>
|
| 294 |
+
<IconButton
|
| 295 |
+
size="small"
|
| 296 |
+
onClick={() => handleTraceEvaluation('failed')}
|
| 297 |
+
disabled={isVoting}
|
| 298 |
+
sx={{
|
| 299 |
+
padding: '4px',
|
| 300 |
+
color: evaluation === 'failed' ? 'error.main' : 'action.disabled',
|
| 301 |
+
'&:hover': {
|
| 302 |
+
color: 'error.main',
|
| 303 |
+
backgroundColor: (theme) => theme.palette.mode === 'dark' ? 'rgba(244, 67, 54, 0.1)' : 'rgba(244, 67, 54, 0.08)',
|
| 304 |
+
},
|
| 305 |
+
}}
|
| 306 |
+
>
|
| 307 |
+
<ThumbDownIcon sx={{ fontSize: 18 }} />
|
| 308 |
+
</IconButton>
|
| 309 |
+
</Tooltip>
|
| 310 |
+
</Box>
|
| 311 |
+
</Box>
|
| 312 |
+
</Box>
|
| 313 |
+
|
| 314 |
{/* Divider before metrics */}
|
| 315 |
<Divider sx={{ my: 2 }} />
|
| 316 |
|
cua2-front/src/components/steps/FinalStepCard.tsx
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
|
|
| 1 |
import { FinalStep } from '@/types/agent';
|
| 2 |
-
import
|
| 3 |
-
import { Card, CardContent, Box, Typography } from '@mui/material';
|
| 4 |
import CheckIcon from '@mui/icons-material/Check';
|
| 5 |
import CloseIcon from '@mui/icons-material/Close';
|
| 6 |
-
import StopCircleIcon from '@mui/icons-material/StopCircle';
|
| 7 |
import HourglassEmptyIcon from '@mui/icons-material/HourglassEmpty';
|
| 8 |
-
import
|
| 9 |
-
import {
|
|
|
|
| 10 |
|
| 11 |
interface FinalStepCardProps {
|
| 12 |
finalStep: FinalStep;
|
|
@@ -74,8 +74,8 @@ export const FinalStepCard: React.FC<FinalStepCardProps> = ({ finalStep, isActiv
|
|
| 74 |
cursor: 'pointer',
|
| 75 |
boxShadow: isActive
|
| 76 |
? (theme) => `0 2px 8px ${theme.palette.mode === 'dark'
|
| 77 |
-
|
| 78 |
-
|
| 79 |
: 'none',
|
| 80 |
'&:hover': {
|
| 81 |
borderColor: (theme) => `${theme.palette[statusConfig.color].main} !important`,
|
|
|
|
| 1 |
+
import { useAgentStore } from '@/stores/agentStore';
|
| 2 |
import { FinalStep } from '@/types/agent';
|
| 3 |
+
import AccessTimeIcon from '@mui/icons-material/AccessTime';
|
|
|
|
| 4 |
import CheckIcon from '@mui/icons-material/Check';
|
| 5 |
import CloseIcon from '@mui/icons-material/Close';
|
|
|
|
| 6 |
import HourglassEmptyIcon from '@mui/icons-material/HourglassEmpty';
|
| 7 |
+
import StopCircleIcon from '@mui/icons-material/StopCircle';
|
| 8 |
+
import { Box, Card, CardContent, Typography } from '@mui/material';
|
| 9 |
+
import React from 'react';
|
| 10 |
|
| 11 |
interface FinalStepCardProps {
|
| 12 |
finalStep: FinalStep;
|
|
|
|
| 74 |
cursor: 'pointer',
|
| 75 |
boxShadow: isActive
|
| 76 |
? (theme) => `0 2px 8px ${theme.palette.mode === 'dark'
|
| 77 |
+
? `rgba(${statusConfig.color === 'success' ? '102, 187, 106' : statusConfig.color === 'error' ? '244, 67, 54' : '255, 152, 0'}, 0.3)`
|
| 78 |
+
: `rgba(${statusConfig.color === 'success' ? '102, 187, 106' : statusConfig.color === 'error' ? '244, 67, 54' : '255, 152, 0'}, 0.2)`}`
|
| 79 |
: 'none',
|
| 80 |
'&:hover': {
|
| 81 |
borderColor: (theme) => `${theme.palette[statusConfig.color].main} !important`,
|
cua2-front/src/components/steps/StepCard.tsx
CHANGED
|
@@ -1,16 +1,14 @@
|
|
|
|
|
|
|
|
| 1 |
import { AgentStep } from '@/types/agent';
|
| 2 |
-
import React, { useState } from 'react';
|
| 3 |
-
import { Card, CardContent, Box, Typography, Divider, Chip, Paper, Accordion, AccordionSummary, AccordionDetails, IconButton, Tooltip } from '@mui/material';
|
| 4 |
-
import ThoughtBubbleIcon from '@mui/icons-material/Psychology';
|
| 5 |
-
import BoltIcon from '@mui/icons-material/Bolt';
|
| 6 |
import AccessTimeIcon from '@mui/icons-material/AccessTime';
|
|
|
|
| 7 |
import InputIcon from '@mui/icons-material/Input';
|
| 8 |
import OutputIcon from '@mui/icons-material/Output';
|
| 9 |
-
import ExpandMoreIcon from '@mui/icons-material/ExpandMore';
|
| 10 |
-
import ThumbUpIcon from '@mui/icons-material/ThumbUp';
|
| 11 |
import ThumbDownIcon from '@mui/icons-material/ThumbDown';
|
| 12 |
-
import
|
| 13 |
-
import {
|
|
|
|
| 14 |
|
| 15 |
interface StepCardProps {
|
| 16 |
step: AgentStep;
|
|
@@ -21,6 +19,7 @@ interface StepCardProps {
|
|
| 21 |
|
| 22 |
export const StepCard: React.FC<StepCardProps> = ({ step, index, isLatest = false, isActive = false }) => {
|
| 23 |
const setSelectedStepIndex = useAgentStore((state) => state.setSelectedStepIndex);
|
|
|
|
| 24 |
const [thoughtExpanded, setThoughtExpanded] = useState(false);
|
| 25 |
const [evaluation, setEvaluation] = useState<'like' | 'dislike' | 'neutral'>(step.step_evaluation || 'neutral');
|
| 26 |
const [isVoting, setIsVoting] = useState(false);
|
|
@@ -44,6 +43,8 @@ export const StepCard: React.FC<StepCardProps> = ({ step, index, isLatest = fals
|
|
| 44 |
try {
|
| 45 |
await updateStepEvaluation(step.traceId, step.stepId, newEvaluation);
|
| 46 |
setEvaluation(newEvaluation);
|
|
|
|
|
|
|
| 47 |
} catch (error) {
|
| 48 |
console.error('Failed to update step evaluation:', error);
|
| 49 |
} finally {
|
|
@@ -206,7 +207,7 @@ export const StepCard: React.FC<StepCardProps> = ({ step, index, isLatest = fals
|
|
| 206 |
</Tooltip>
|
| 207 |
</Box>
|
| 208 |
</Box>
|
| 209 |
-
<Box component="ul" sx={{ listStyle: 'none', p: 0, m: 0}}>
|
| 210 |
{step.actions.map((action, actionIndex) => (
|
| 211 |
<Box
|
| 212 |
key={actionIndex}
|
|
|
|
| 1 |
+
import { updateStepEvaluation } from '@/services/api';
|
| 2 |
+
import { useAgentStore } from '@/stores/agentStore';
|
| 3 |
import { AgentStep } from '@/types/agent';
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
import AccessTimeIcon from '@mui/icons-material/AccessTime';
|
| 5 |
+
import ExpandMoreIcon from '@mui/icons-material/ExpandMore';
|
| 6 |
import InputIcon from '@mui/icons-material/Input';
|
| 7 |
import OutputIcon from '@mui/icons-material/Output';
|
|
|
|
|
|
|
| 8 |
import ThumbDownIcon from '@mui/icons-material/ThumbDown';
|
| 9 |
+
import ThumbUpIcon from '@mui/icons-material/ThumbUp';
|
| 10 |
+
import { Accordion, AccordionDetails, AccordionSummary, Box, Card, CardContent, Chip, IconButton, Tooltip, Typography } from '@mui/material';
|
| 11 |
+
import React, { useState } from 'react';
|
| 12 |
|
| 13 |
interface StepCardProps {
|
| 14 |
step: AgentStep;
|
|
|
|
| 19 |
|
| 20 |
export const StepCard: React.FC<StepCardProps> = ({ step, index, isLatest = false, isActive = false }) => {
|
| 21 |
const setSelectedStepIndex = useAgentStore((state) => state.setSelectedStepIndex);
|
| 22 |
+
const updateStepEvaluationInStore = useAgentStore((state) => state.updateStepEvaluation);
|
| 23 |
const [thoughtExpanded, setThoughtExpanded] = useState(false);
|
| 24 |
const [evaluation, setEvaluation] = useState<'like' | 'dislike' | 'neutral'>(step.step_evaluation || 'neutral');
|
| 25 |
const [isVoting, setIsVoting] = useState(false);
|
|
|
|
| 43 |
try {
|
| 44 |
await updateStepEvaluation(step.traceId, step.stepId, newEvaluation);
|
| 45 |
setEvaluation(newEvaluation);
|
| 46 |
+
// Update the store so the evaluation is reflected in JSON export
|
| 47 |
+
updateStepEvaluationInStore(step.stepId, newEvaluation);
|
| 48 |
} catch (error) {
|
| 49 |
console.error('Failed to update step evaluation:', error);
|
| 50 |
} finally {
|
|
|
|
| 207 |
</Tooltip>
|
| 208 |
</Box>
|
| 209 |
</Box>
|
| 210 |
+
<Box component="ul" sx={{ listStyle: 'none', p: 0, m: 0 }}>
|
| 211 |
{step.actions.map((action, actionIndex) => (
|
| 212 |
<Box
|
| 213 |
key={actionIndex}
|
cua2-front/src/services/api.ts
CHANGED
|
@@ -54,3 +54,25 @@ export async function updateStepEvaluation(
|
|
| 54 |
throw new Error('Failed to update step evaluation');
|
| 55 |
}
|
| 56 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
throw new Error('Failed to update step evaluation');
|
| 55 |
}
|
| 56 |
}
|
| 57 |
+
|
| 58 |
+
/**
|
| 59 |
+
* Update trace evaluation (overall task feedback)
|
| 60 |
+
*/
|
| 61 |
+
export async function updateTraceEvaluation(
|
| 62 |
+
traceId: string,
|
| 63 |
+
evaluation: 'success' | 'failed' | 'not_evaluated'
|
| 64 |
+
): Promise<void> {
|
| 65 |
+
const response = await fetch(`${getApiBaseUrl()}/traces/${traceId}/evaluation`, {
|
| 66 |
+
method: 'PATCH',
|
| 67 |
+
headers: {
|
| 68 |
+
'Content-Type': 'application/json',
|
| 69 |
+
},
|
| 70 |
+
body: JSON.stringify({
|
| 71 |
+
user_evaluation: evaluation,
|
| 72 |
+
}),
|
| 73 |
+
});
|
| 74 |
+
|
| 75 |
+
if (!response.ok) {
|
| 76 |
+
throw new Error('Failed to update trace evaluation');
|
| 77 |
+
}
|
| 78 |
+
}
|
cua2-front/src/services/jsonExporter.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
import {
|
| 2 |
|
| 3 |
/**
|
| 4 |
* Extract final answer from steps
|
|
@@ -60,14 +60,13 @@ export const exportTraceToJson = (
|
|
| 60 |
traceId: step.traceId,
|
| 61 |
stepId: step.stepId,
|
| 62 |
error: step.error,
|
|
|
|
| 63 |
thought: step.thought,
|
| 64 |
actions: step.actions,
|
| 65 |
duration: step.duration,
|
| 66 |
inputTokensUsed: step.inputTokensUsed,
|
| 67 |
outputTokensUsed: step.outputTokensUsed,
|
| 68 |
step_evaluation: step.step_evaluation,
|
| 69 |
-
// Don't include base64 image to reduce JSON size
|
| 70 |
-
hasImage: !!step.image,
|
| 71 |
})),
|
| 72 |
exportedAt: new Date().toISOString(),
|
| 73 |
};
|
|
|
|
| 1 |
+
import { AgentStep, AgentTrace, AgentTraceMetadata, FinalStep } from '@/types/agent';
|
| 2 |
|
| 3 |
/**
|
| 4 |
* Extract final answer from steps
|
|
|
|
| 60 |
traceId: step.traceId,
|
| 61 |
stepId: step.stepId,
|
| 62 |
error: step.error,
|
| 63 |
+
image: step.image, // Include full base64 image
|
| 64 |
thought: step.thought,
|
| 65 |
actions: step.actions,
|
| 66 |
duration: step.duration,
|
| 67 |
inputTokensUsed: step.inputTokensUsed,
|
| 68 |
outputTokensUsed: step.outputTokensUsed,
|
| 69 |
step_evaluation: step.step_evaluation,
|
|
|
|
|
|
|
| 70 |
})),
|
| 71 |
exportedAt: new Date().toISOString(),
|
| 72 |
};
|
cua2-front/src/stores/agentStore.ts
CHANGED
|
@@ -22,6 +22,8 @@ interface AgentState {
|
|
| 22 |
setTrace: (trace: AgentTrace | undefined) => void;
|
| 23 |
setTraceId: (traceId: string | null) => void;
|
| 24 |
updateTraceWithStep: (step: AgentStep, metadata: AgentTraceMetadata) => void;
|
|
|
|
|
|
|
| 25 |
completeTrace: (metadata: AgentTraceMetadata, finalState?: 'success' | 'stopped' | 'max_steps_reached' | 'error' | 'sandbox_timeout') => void;
|
| 26 |
setIsAgentProcessing: (processing: boolean) => void;
|
| 27 |
setIsConnectingToE2B: (connecting: boolean) => void;
|
|
@@ -97,6 +99,59 @@ export const useAgentStore = create<AgentState>()(
|
|
| 97 |
'updateTraceWithStep'
|
| 98 |
),
|
| 99 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 100 |
// Complete the trace
|
| 101 |
completeTrace: (metadata, finalState?: 'success' | 'stopped' | 'max_steps_reached' | 'error' | 'sandbox_timeout') =>
|
| 102 |
set(
|
|
@@ -196,12 +251,16 @@ export const useAgentStore = create<AgentState>()(
|
|
| 196 |
numberOfSteps: state.trace.steps?.length || 0,
|
| 197 |
maxSteps: 200,
|
| 198 |
completed: false,
|
|
|
|
|
|
|
| 199 |
};
|
| 200 |
|
| 201 |
// Ensure maxSteps is not 0
|
| 202 |
-
const finalMetadata = {
|
| 203 |
...metadata,
|
| 204 |
maxSteps: metadata.maxSteps > 0 ? metadata.maxSteps : 200,
|
|
|
|
|
|
|
| 205 |
};
|
| 206 |
|
| 207 |
const finalStep: FinalStep = {
|
|
|
|
| 22 |
setTrace: (trace: AgentTrace | undefined) => void;
|
| 23 |
setTraceId: (traceId: string | null) => void;
|
| 24 |
updateTraceWithStep: (step: AgentStep, metadata: AgentTraceMetadata) => void;
|
| 25 |
+
updateStepEvaluation: (stepId: string, evaluation: 'like' | 'dislike' | 'neutral') => void;
|
| 26 |
+
updateTraceEvaluation: (evaluation: 'success' | 'failed' | 'not_evaluated') => void;
|
| 27 |
completeTrace: (metadata: AgentTraceMetadata, finalState?: 'success' | 'stopped' | 'max_steps_reached' | 'error' | 'sandbox_timeout') => void;
|
| 28 |
setIsAgentProcessing: (processing: boolean) => void;
|
| 29 |
setIsConnectingToE2B: (connecting: boolean) => void;
|
|
|
|
| 99 |
'updateTraceWithStep'
|
| 100 |
),
|
| 101 |
|
| 102 |
+
// Update step evaluation in the store
|
| 103 |
+
updateStepEvaluation: (stepId, evaluation) =>
|
| 104 |
+
set(
|
| 105 |
+
(state) => {
|
| 106 |
+
if (!state.trace || !state.trace.steps) return state;
|
| 107 |
+
|
| 108 |
+
const updatedSteps = state.trace.steps.map((step) =>
|
| 109 |
+
step.stepId === stepId
|
| 110 |
+
? { ...step, step_evaluation: evaluation }
|
| 111 |
+
: step
|
| 112 |
+
);
|
| 113 |
+
|
| 114 |
+
return {
|
| 115 |
+
trace: {
|
| 116 |
+
...state.trace,
|
| 117 |
+
steps: updatedSteps,
|
| 118 |
+
},
|
| 119 |
+
};
|
| 120 |
+
},
|
| 121 |
+
false,
|
| 122 |
+
'updateStepEvaluation'
|
| 123 |
+
),
|
| 124 |
+
|
| 125 |
+
// Update trace evaluation in the store
|
| 126 |
+
updateTraceEvaluation: (evaluation) =>
|
| 127 |
+
set(
|
| 128 |
+
(state) => {
|
| 129 |
+
if (!state.trace || !state.trace.traceMetadata) return state;
|
| 130 |
+
|
| 131 |
+
const updatedMetadata = {
|
| 132 |
+
...state.trace.traceMetadata,
|
| 133 |
+
user_evaluation: evaluation,
|
| 134 |
+
};
|
| 135 |
+
|
| 136 |
+
return {
|
| 137 |
+
trace: {
|
| 138 |
+
...state.trace,
|
| 139 |
+
traceMetadata: updatedMetadata,
|
| 140 |
+
},
|
| 141 |
+
// Also update finalStep metadata if it exists
|
| 142 |
+
finalStep: state.finalStep ? {
|
| 143 |
+
...state.finalStep,
|
| 144 |
+
metadata: {
|
| 145 |
+
...state.finalStep.metadata,
|
| 146 |
+
user_evaluation: evaluation,
|
| 147 |
+
},
|
| 148 |
+
} : state.finalStep,
|
| 149 |
+
};
|
| 150 |
+
},
|
| 151 |
+
false,
|
| 152 |
+
'updateTraceEvaluation'
|
| 153 |
+
),
|
| 154 |
+
|
| 155 |
// Complete the trace
|
| 156 |
completeTrace: (metadata, finalState?: 'success' | 'stopped' | 'max_steps_reached' | 'error' | 'sandbox_timeout') =>
|
| 157 |
set(
|
|
|
|
| 251 |
numberOfSteps: state.trace.steps?.length || 0,
|
| 252 |
maxSteps: 200,
|
| 253 |
completed: false,
|
| 254 |
+
final_state: null,
|
| 255 |
+
user_evaluation: 'not_evaluated' as const,
|
| 256 |
};
|
| 257 |
|
| 258 |
// Ensure maxSteps is not 0
|
| 259 |
+
const finalMetadata: AgentTraceMetadata = {
|
| 260 |
...metadata,
|
| 261 |
maxSteps: metadata.maxSteps > 0 ? metadata.maxSteps : 200,
|
| 262 |
+
final_state: metadata.final_state || null,
|
| 263 |
+
user_evaluation: metadata.user_evaluation || 'not_evaluated',
|
| 264 |
};
|
| 265 |
|
| 266 |
const finalStep: FinalStep = {
|
cua2-front/src/types/agent.ts
CHANGED
|
@@ -36,6 +36,7 @@ export interface AgentTraceMetadata {
|
|
| 36 |
maxSteps: number;
|
| 37 |
completed: boolean;
|
| 38 |
final_state: 'success' | 'stopped' | 'max_steps_reached' | 'error' | 'sandbox_timeout' | null;
|
|
|
|
| 39 |
}
|
| 40 |
|
| 41 |
export interface FinalStep {
|
|
|
|
| 36 |
maxSteps: number;
|
| 37 |
completed: boolean;
|
| 38 |
final_state: 'success' | 'stopped' | 'max_steps_reached' | 'error' | 'sandbox_timeout' | null;
|
| 39 |
+
user_evaluation?: 'success' | 'failed' | 'not_evaluated';
|
| 40 |
}
|
| 41 |
|
| 42 |
export interface FinalStep {
|