Spaces:

smolagents
/

computer-use-agent

Running

App Files Files Community

A-Mahla commited on Nov 14, 2025

Commit

3593cc3

unverified ·

1 Parent(s): f9fd9fa

Little fixes (#22)

Browse files

* Little fix

* CHG text

* FIX pre-commit

Files changed (15) hide show

cua2-core/src/cua2_core/app.py +1 -0
cua2-core/src/cua2_core/models/models.py +25 -0
cua2-core/src/cua2_core/routes/routes.py +26 -0
cua2-core/src/cua2_core/services/agent_service.py +117 -3
cua2-core/src/cua2_core/services/agent_utils/get_model.py +0 -2
cua2-core/src/cua2_core/services/archival_service.py +6 -7
cua2-front/src/components/WelcomeScreen.tsx +98 -4
cua2-front/src/components/sandbox/SandboxViewer.tsx +5 -2
cua2-front/src/components/sandbox/completionview/CompletionView.tsx +86 -2
cua2-front/src/components/steps/FinalStepCard.tsx +7 -7
cua2-front/src/components/steps/StepCard.tsx +10 -9
cua2-front/src/services/api.ts +22 -0
cua2-front/src/services/jsonExporter.ts +2 -3
cua2-front/src/stores/agentStore.ts +60 -1
cua2-front/src/types/agent.ts +1 -0

cua2-core/src/cua2_core/app.py CHANGED Viewed

@@ -39,6 +39,7 @@ async def lifespan(app: FastAPI):
     yield
     print("Shutting down services...")
     await sandbox_service.cleanup_sandboxes()
     print("Services shut down successfully")

     yield
     print("Shutting down services...")
+    await agent_service.cleanup()
     await sandbox_service.cleanup_sandboxes()
     print("Services shut down successfully")

cua2-core/src/cua2_core/models/models.py CHANGED Viewed

@@ -140,6 +140,7 @@ class AgentTraceMetadata(BaseModel):
         Literal["success", "stopped", "max_steps_reached", "error", "sandbox_timeout"]
         | None
     ) = None
 class AgentTrace(BaseModel):
@@ -248,6 +249,14 @@ class StopTask(BaseModel):
     traceId: str
 ##################### Agent Service ########################
@@ -314,6 +323,7 @@ class ActiveTask(BaseModel):
             "success", "stopped", "max_steps_reached", "error", "sandbox_timeout"
         ]
         | None = None,
     ):
         """Update trace metadata"""
         with self._file_lock:
@@ -329,6 +339,8 @@ class ActiveTask(BaseModel):
                 self.traceMetadata.completed = completed
             if final_state is not None:
                 self.traceMetadata.final_state = final_state
 #################### API Routes Models ########################
@@ -369,6 +381,19 @@ class UpdateStepResponse(BaseModel):
     message: str
 class AvailableModelsResponse(BaseModel):
     """Response for available models"""

         Literal["success", "stopped", "max_steps_reached", "error", "sandbox_timeout"]
         | None
     ) = None
+    user_evaluation: Literal["success", "failed", "not_evaluated"] = "not_evaluated"
 class AgentTrace(BaseModel):
     traceId: str
+class TraceEvaluation(BaseModel):
+    """Trace evaluation message"""
+    event_type: Literal["trace_evaluation"]
+    traceId: str
+    user_evaluation: Literal["success", "failed", "not_evaluated"]
 ##################### Agent Service ########################
             "success", "stopped", "max_steps_reached", "error", "sandbox_timeout"
         ]
         | None = None,
+        user_evaluation: Literal["success", "failed", "not_evaluated"] | None = None,
     ):
         """Update trace metadata"""
         with self._file_lock:
                 self.traceMetadata.completed = completed
             if final_state is not None:
                 self.traceMetadata.final_state = final_state
+            if user_evaluation is not None:
+                self.traceMetadata.user_evaluation = user_evaluation
 #################### API Routes Models ########################
     message: str
+class UpdateTraceEvaluationRequest(BaseModel):
+    """Request model for updating trace evaluation"""
+    user_evaluation: Literal["success", "failed", "not_evaluated"]
+class UpdateTraceEvaluationResponse(BaseModel):
+    """Response model for trace evaluation update"""
+    success: bool
+    message: str
 class AvailableModelsResponse(BaseModel):
     """Response for available models"""

cua2-core/src/cua2_core/routes/routes.py CHANGED Viewed

@@ -8,6 +8,8 @@ from cua2_core.models.models import (
     HealthResponse,
     UpdateStepRequest,
     UpdateStepResponse,
 )
 from cua2_core.services.agent_service import AgentService
 from cua2_core.services.agent_utils.get_model import AVAILABLE_MODELS
@@ -93,3 +95,27 @@ async def update_trace_step(
         raise HTTPException(status_code=404, detail=str(e))
     except Exception as e:
         raise HTTPException(status_code=400, detail=str(e))

     HealthResponse,
     UpdateStepRequest,
     UpdateStepResponse,
+    UpdateTraceEvaluationRequest,
+    UpdateTraceEvaluationResponse,
 )
 from cua2_core.services.agent_service import AgentService
 from cua2_core.services.agent_utils.get_model import AVAILABLE_MODELS
         raise HTTPException(status_code=404, detail=str(e))
     except Exception as e:
         raise HTTPException(status_code=400, detail=str(e))
+@router.patch(
+    "/traces/{trace_id}/evaluation", response_model=UpdateTraceEvaluationResponse
+)
+async def update_trace_evaluation(
+    trace_id: str,
+    request: UpdateTraceEvaluationRequest,
+    agent_service: AgentService = Depends(get_agent_service),
+):
+    """Update the user evaluation for a trace (overall task feedback)"""
+    try:
+        agent_service.update_trace_evaluation(
+            trace_id=trace_id,
+            user_evaluation=request.user_evaluation,
+        )
+        return UpdateTraceEvaluationResponse(
+            success=True,
+            message="Trace evaluation updated successfully",
+        )
+    except FileNotFoundError as e:
+        raise HTTPException(status_code=404, detail=str(e))
+    except Exception as e:
+        raise HTTPException(status_code=400, detail=str(e))

cua2-core/src/cua2_core/services/agent_service.py CHANGED Viewed

@@ -1,11 +1,12 @@
 import asyncio
 import base64
 import json
 import logging
 import os
 import time
 from io import BytesIO
-from typing import Callable, Literal
 from uuid import uuid4
 from cua2_core.models.models import (
@@ -52,6 +53,7 @@ class AgentService:
         self.last_screenshot: dict[str, AgentImage | None] = {}
         self._lock = asyncio.Lock()
         self.max_sandboxes = int(600 / num_workers)
         # Initialize archival service in dedicated process
         self.archival_service = ArchivalService(
@@ -61,8 +63,41 @@ class AgentService:
             archive_interval_minutes=30,
             folder_age_threshold_minutes=30,
         )
-        # Start the archival service process
-        self.archival_service.start()
     def _update_archival_active_tasks(self):
         """
@@ -243,6 +278,7 @@ class AgentService:
             self.active_tasks[message_id].update_trace_metadata(
                 final_state=final_state,
             )
             if message_id in self.active_tasks:
@@ -475,6 +511,58 @@ class AgentService:
             except (ValueError, KeyError, TypeError) as e:
                 raise ValueError(f"Error processing step update: {e}")
     async def stop_task(self, trace_id: str):
         """Stop a task"""
         if trace_id in self.active_tasks:
@@ -518,3 +606,29 @@ class AgentService:
             except Exception as e:
                 logger.error(f"Error cleaning up task {message_id}: {e}", exc_info=True)

 import asyncio
 import base64
+import fcntl
 import json
 import logging
 import os
 import time
 from io import BytesIO
+from typing import IO, Callable, Literal
 from uuid import uuid4
 from cua2_core.models.models import (
         self.last_screenshot: dict[str, AgentImage | None] = {}
         self._lock = asyncio.Lock()
         self.max_sandboxes = int(600 / num_workers)
+        self._archival_lock_file: IO[str] | None = None
         # Initialize archival service in dedicated process
         self.archival_service = ArchivalService(
             archive_interval_minutes=30,
             folder_age_threshold_minutes=30,
         )
+        # Start the archival service process only on one worker
+        if self._should_start_archival_service():
+            self.archival_service.start()
+            logger.info(f"Started archival service in worker PID {os.getpid()}")
+        else:
+            logger.info(
+                f"Skipping archival service start in worker PID {os.getpid()} (already running in another worker)"
+            )
+    def _should_start_archival_service(self) -> bool:
+        """
+        Determine if this worker should start the archival service.
+        Uses file-based locking to ensure only one worker across all processes
+        starts the archival service.
+        Returns:
+            True if this worker should start the archival service, False otherwise
+        """
+        lock_file_path = "/tmp/cua2_archival_service.lock"
+        try:
+            self._archival_lock_file = open(lock_file_path, "w")
+            fcntl.flock(
+                self._archival_lock_file.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB
+            )
+            self._archival_lock_file.write(str(os.getpid()))
+            self._archival_lock_file.flush()
+            return True
+        except (IOError, OSError):
+            if self._archival_lock_file:
+                self._archival_lock_file.close()
+                self._archival_lock_file = None
+            return False
     def _update_archival_active_tasks(self):
         """
             self.active_tasks[message_id].update_trace_metadata(
                 final_state=final_state,
+                completed=True,
             )
             if message_id in self.active_tasks:
             except (ValueError, KeyError, TypeError) as e:
                 raise ValueError(f"Error processing step update: {e}")
+    def update_trace_evaluation(
+        self,
+        trace_id: str,
+        user_evaluation: Literal["success", "failed", "not_evaluated"],
+    ):
+        """
+        Update the user evaluation for a trace
+        Args:
+            trace_id: The trace ID
+            user_evaluation: The evaluation value to set
+        Raises:
+            FileNotFoundError: If trace not found
+        """
+        # Try to find in active tasks first
+        active_task = self.active_tasks.get(trace_id)
+        if active_task:
+            # Task is still active
+            active_task.update_trace_metadata(user_evaluation=user_evaluation)
+        else:
+            # Task is not active, try to load from file
+            data_dir = "data"
+            trace_dirs = [
+                d for d in os.listdir(data_dir) if d.startswith(f"trace-{trace_id}")
+            ]
+            if not trace_dirs:
+                raise FileNotFoundError("Trace not found")
+            trace_path = os.path.join(data_dir, trace_dirs[0])
+            tasks_file = os.path.join(trace_path, "tasks.json")
+            if not os.path.exists(tasks_file):
+                raise FileNotFoundError("Trace data not found")
+            try:
+                # Load the trace data
+                with open(tasks_file, "r") as f:
+                    task_data = json.load(f)
+                # Update the user_evaluation
+                task_data["traceMetadata"]["user_evaluation"] = user_evaluation
+                # Save the updated data
+                with open(tasks_file, "w") as f:
+                    json.dump(task_data, f, indent=2)
+            except (KeyError, TypeError) as e:
+                raise ValueError(f"Error processing trace evaluation update: {e}")
     async def stop_task(self, trace_id: str):
         """Stop a task"""
         if trace_id in self.active_tasks:
             except Exception as e:
                 logger.error(f"Error cleaning up task {message_id}: {e}", exc_info=True)
+    async def cleanup(self):
+        """
+        Cleanup method called during service shutdown.
+        Stops the archival service and releases the lock file.
+        """
+        try:
+            # Stop the archival service if it's running
+            if self.archival_service.is_alive():
+                logger.info("Stopping archival service...")
+                self.archival_service.stop()
+                logger.info("Archival service stopped")
+            # Release the lock file if we hold it
+            if self._archival_lock_file:
+                try:
+                    fcntl.flock(self._archival_lock_file.fileno(), fcntl.LOCK_UN)
+                    self._archival_lock_file.close()
+                    logger.info("Released archival service lock")
+                except Exception as e:
+                    logger.warning(f"Error releasing archival lock: {e}")
+                finally:
+                    self._archival_lock_file = None
+        except Exception as e:
+            logger.error(f"Error during AgentService cleanup: {e}", exc_info=True)

cua2-core/src/cua2_core/services/agent_utils/get_model.py CHANGED Viewed

@@ -3,9 +3,7 @@ from smolagents import InferenceClientModel, Model
 # Available model IDs
 AVAILABLE_MODELS = [
     "Qwen/Qwen3-VL-8B-Instruct",
-    "Qwen/Qwen3-VL-8B-Thinking",
     "Qwen/Qwen3-VL-30B-A3B-Instruct",
-    "Qwen/Qwen3-VL-30B-A3B-Thinking",
 ]

 # Available model IDs
 AVAILABLE_MODELS = [
     "Qwen/Qwen3-VL-8B-Instruct",
     "Qwen/Qwen3-VL-30B-A3B-Instruct",
 ]

cua2-core/src/cua2_core/services/archival_service.py CHANGED Viewed

@@ -298,9 +298,12 @@ def _process_old_folders(
                     f"Successfully verified {archive_path.name} in HuggingFace repo"
                 )
-                # Delete the local folder
-                shutil.rmtree(folder)
-                logger.info(f"Deleted local folder: {folder_name}")
                 # Delete the local archive
                 archive_path.unlink(missing_ok=True)
@@ -403,10 +406,6 @@ def _verify_file_in_repo(hf_dataset_repo: str, hf_token: str, filename: str) ->
             filename=filename,
             repo_type="dataset",
             token=hf_token,
-            local_dir_use_symlinks=False,
-            # Just check if file exists without actually downloading
-            cache_dir=None,
-            local_files_only=False,
         )
         logger.info(f"Verified {filename} exists in repo")

                     f"Successfully verified {archive_path.name} in HuggingFace repo"
                 )
+                # Delete the local folder (check if it still exists to avoid race conditions)
+                if folder.exists():
+                    shutil.rmtree(folder)
+                    logger.info(f"Deleted local folder: {folder_name}")
+                else:
+                    logger.warning(f"Folder {folder_name} already deleted, skipping")
                 # Delete the local archive
                 archive_path.unlink(missing_ok=True)
             filename=filename,
             repo_type="dataset",
             token=hf_token,
         )
         logger.info(f"Verified {filename} exists in repo")

cua2-front/src/components/WelcomeScreen.tsx CHANGED Viewed

@@ -150,13 +150,15 @@ export const WelcomeScreen: React.FC<WelcomeScreenProps> = ({ onStartTask, isCon
           Computer Use Agent
         </Typography>
-        {/* Powered by smolagents */}
         <Box
           sx={{
             display: 'flex',
             alignItems: 'center',
             gap: 1,
             mb: 2,
           }}
         >
           <Typography
@@ -168,6 +170,8 @@ export const WelcomeScreen: React.FC<WelcomeScreenProps> = ({ onStartTask, isCon
           >
             Powered by
           </Typography>
           <Box
             component="a"
             href="https://github.com/huggingface/smolagents"
@@ -239,6 +243,61 @@ export const WelcomeScreen: React.FC<WelcomeScreenProps> = ({ onStartTask, isCon
               </Typography>
             </Box>
           </Box>
         </Box>
         {/* Subtitle */}
@@ -259,12 +318,29 @@ export const WelcomeScreen: React.FC<WelcomeScreenProps> = ({ onStartTask, isCon
           sx={{
             color: 'text.secondary',
             maxWidth: '650px',
-            mb: 6,
             lineHeight: 1.7,
           }}
         >
-          Watch in real-time as AI agents write and execute Python code to complete tasks.
-          Built by Hugging Face, <strong>smolagents</strong> is LLM-agnostic and uses <strong>30% fewer steps</strong> than traditional agents.
         </Typography>
         {/* Task Input Section */}
@@ -418,6 +494,24 @@ export const WelcomeScreen: React.FC<WelcomeScreenProps> = ({ onStartTask, isCon
           </Box>
         </Paper>
         {/* Connection status hint */}
         {!isConnected && (
           <Typography

           Computer Use Agent
         </Typography>
+        {/* Powered by smolagents and E2B */}
         <Box
           sx={{
             display: 'flex',
             alignItems: 'center',
             gap: 1,
             mb: 2,
+            flexWrap: 'wrap',
+            justifyContent: 'center',
           }}
         >
           <Typography
           >
             Powered by
           </Typography>
+          {/* smolagents link */}
           <Box
             component="a"
             href="https://github.com/huggingface/smolagents"
               </Typography>
             </Box>
           </Box>
+          {/* Separator */}
+          <Typography
+            variant="body2"
+            sx={{
+              color: 'text.secondary',
+              mx: 0.5,
+            }}
+          >
+            &
+          </Typography>
+          {/* E2B link */}
+          <Box
+            component="a"
+            href="https://e2b.dev/"
+            target="_blank"
+            rel="noopener noreferrer"
+            sx={{
+              display: 'flex',
+              alignItems: 'center',
+              gap: 0.75,
+              textDecoration: 'none',
+              transition: 'all 0.2s ease',
+              '&:hover': {
+                '& .e2b-text': {
+                  textDecoration: 'underline',
+                },
+              },
+            }}
+          >
+            {/* E2B Logo */}
+            <Box
+              component="img"
+              src="https://avatars.githubusercontent.com/u/129434473?s=200&v=4"
+              alt="E2B"
+              sx={{
+                width: 24,
+                height: 24,
+                flexShrink: 0,
+                borderRadius: '50%',
+              }}
+            />
+            <Typography
+              className="e2b-text"
+              sx={{
+                color: 'primary.main',
+                fontWeight: 700,
+                fontSize: '1rem',
+              }}
+            >
+              E2B
+            </Typography>
+          </Box>
         </Box>
         {/* Subtitle */}
           sx={{
             color: 'text.secondary',
             maxWidth: '650px',
+            mb: 3,
             lineHeight: 1.7,
           }}
         >
+          Experience the future of AI automation as agents operate computers in real time to complete complex on-screen tasks (GUI agents).
+          Built by{' '}
+          <Box
+            component="a"
+            href="https://huggingface.co"
+            target="_blank"
+            rel="noopener noreferrer"
+            sx={{
+              color: 'primary.main',
+              textDecoration: 'none',
+              fontWeight: 700,
+              '&:hover': {
+                textDecoration: 'underline',
+              },
+            }}
+          >
+            Hugging Face
+          </Box>
+          , this platform provides intuitive <strong>visualization and annotation tools</strong>, enabling <strong>manual preferential data annotation</strong> for advanced agentic AI research.
         </Typography>
         {/* Task Input Section */}
           </Box>
         </Paper>
+        {/* Research Notice */}
+        <Typography
+          variant="body2"
+          sx={{
+            color: 'text.secondary',
+            maxWidth: '700px',
+            mt: 3,
+            mb: 2,
+            lineHeight: 1.6,
+            fontStyle: 'italic',
+            opacity: 0.8,
+            textAlign: 'center',
+          }}
+        >
+          Please be aware that by using the demo, you agree that the traces are stored for research purposes.
+          <strong>Please do not write any personal information.</strong>
+        </Typography>
         {/* Connection status hint */}
         {!isConnected && (
           <Typography

cua2-front/src/components/sandbox/SandboxViewer.tsx CHANGED Viewed

@@ -110,8 +110,11 @@ export const SandboxViewer: React.FC<SandboxViewerProps> = ({
   // Handler to go back to home
   const handleBackToHome = () => {
-    resetAgent();
-    navigate('/');
   };
   // Handler to go back to live mode

   // Handler to go back to home
   const handleBackToHome = () => {
+    // Reset frontend state
+    useAgentStore.getState().resetAgent();
+    // Reload the page to reconnect websocket
+    window.location.href = '/';
   };
   // Handler to go back to live mode

cua2-front/src/components/sandbox/completionview/CompletionView.tsx CHANGED Viewed

@@ -1,3 +1,5 @@
 import { AgentStep, AgentTrace, FinalStep } from '@/types/agent';
 import AccessTimeIcon from '@mui/icons-material/AccessTime';
 import AddIcon from '@mui/icons-material/Add';
@@ -11,8 +13,10 @@ import InputIcon from '@mui/icons-material/Input';
 import OutputIcon from '@mui/icons-material/Output';
 import SmartToyIcon from '@mui/icons-material/SmartToy';
 import StopCircleIcon from '@mui/icons-material/StopCircle';
-import { Alert, Box, Button, Divider, Paper, Typography } from '@mui/material';
-import React from 'react';
 import { DownloadGifButton } from './DownloadGifButton';
 import { DownloadJsonButton } from './DownloadJsonButton';
@@ -42,6 +46,30 @@ export const CompletionView: React.FC<CompletionViewProps> = ({
   onDownloadJson,
   onBackToHome,
 }) => {
   const getStatusConfig = () => {
     switch (finalStep.type) {
       case 'success':
@@ -227,6 +255,62 @@ export const CompletionView: React.FC<CompletionViewProps> = ({
           </Box>
         )}
         {/* Divider before metrics */}
         <Divider sx={{ my: 2 }} />

+import { updateTraceEvaluation } from '@/services/api';
+import { useAgentStore } from '@/stores/agentStore';
 import { AgentStep, AgentTrace, FinalStep } from '@/types/agent';
 import AccessTimeIcon from '@mui/icons-material/AccessTime';
 import AddIcon from '@mui/icons-material/Add';
 import OutputIcon from '@mui/icons-material/Output';
 import SmartToyIcon from '@mui/icons-material/SmartToy';
 import StopCircleIcon from '@mui/icons-material/StopCircle';
+import ThumbDownIcon from '@mui/icons-material/ThumbDown';
+import ThumbUpIcon from '@mui/icons-material/ThumbUp';
+import { Alert, Box, Button, Divider, IconButton, Paper, Tooltip, Typography } from '@mui/material';
+import React, { useState } from 'react';
 import { DownloadGifButton } from './DownloadGifButton';
 import { DownloadJsonButton } from './DownloadJsonButton';
   onDownloadJson,
   onBackToHome,
 }) => {
+  const updateTraceEvaluationInStore = useAgentStore((state) => state.updateTraceEvaluation);
+  const [evaluation, setEvaluation] = useState<'success' | 'failed' | 'not_evaluated'>(
+    finalStep.metadata.user_evaluation || 'not_evaluated'
+  );
+  const [isVoting, setIsVoting] = useState(false);
+  const handleTraceEvaluation = async (vote: 'success' | 'failed') => {
+    if (isVoting || !trace?.id) return;
+    const newEvaluation = evaluation === vote ? 'not_evaluated' : vote;
+    setIsVoting(true);
+    try {
+      await updateTraceEvaluation(trace.id, newEvaluation);
+      setEvaluation(newEvaluation);
+      // Update the store so the evaluation is reflected in JSON export
+      updateTraceEvaluationInStore(newEvaluation);
+    } catch (error) {
+      console.error('Failed to update trace evaluation:', error);
+    } finally {
+      setIsVoting(false);
+    }
+  };
   const getStatusConfig = () => {
     switch (finalStep.type) {
       case 'success':
           </Box>
         )}
+        {/* Trace Evaluation */}
+        <Box sx={{ mb: 2 }}>
+          <Box sx={{ display: 'flex', alignItems: 'center', justifyContent: 'space-between' }}>
+            <Typography
+              variant="caption"
+              sx={{
+                fontWeight: 700,
+                color: 'text.secondary',
+                fontSize: '0.7rem',
+                textTransform: 'uppercase',
+                letterSpacing: '0.5px',
+              }}
+            >
+              Was this task completed successfully?
+            </Typography>
+            {/* Evaluation buttons */}
+            <Box sx={{ display: 'flex', gap: 1 }}>
+              <Tooltip title={evaluation === 'success' ? 'Remove success rating' : 'Mark as successful'}>
+                <IconButton
+                  size="small"
+                  onClick={() => handleTraceEvaluation('success')}
+                  disabled={isVoting}
+                  sx={{
+                    padding: '4px',
+                    color: evaluation === 'success' ? 'success.main' : 'action.disabled',
+                    '&:hover': {
+                      color: 'success.main',
+                      backgroundColor: (theme) => theme.palette.mode === 'dark' ? 'rgba(102, 187, 106, 0.1)' : 'rgba(102, 187, 106, 0.08)',
+                    },
+                  }}
+                >
+                  <ThumbUpIcon sx={{ fontSize: 18 }} />
+                </IconButton>
+              </Tooltip>
+              <Tooltip title={evaluation === 'failed' ? 'Remove failure rating' : 'Mark as failed'}>
+                <IconButton
+                  size="small"
+                  onClick={() => handleTraceEvaluation('failed')}
+                  disabled={isVoting}
+                  sx={{
+                    padding: '4px',
+                    color: evaluation === 'failed' ? 'error.main' : 'action.disabled',
+                    '&:hover': {
+                      color: 'error.main',
+                      backgroundColor: (theme) => theme.palette.mode === 'dark' ? 'rgba(244, 67, 54, 0.1)' : 'rgba(244, 67, 54, 0.08)',
+                    },
+                  }}
+                >
+                  <ThumbDownIcon sx={{ fontSize: 18 }} />
+                </IconButton>
+              </Tooltip>
+            </Box>
+          </Box>
+        </Box>
         {/* Divider before metrics */}
         <Divider sx={{ my: 2 }} />

cua2-front/src/components/steps/FinalStepCard.tsx CHANGED Viewed

@@ -1,12 +1,12 @@
 import { FinalStep } from '@/types/agent';
-import React from 'react';
-import { Card, CardContent, Box, Typography } from '@mui/material';
 import CheckIcon from '@mui/icons-material/Check';
 import CloseIcon from '@mui/icons-material/Close';
-import StopCircleIcon from '@mui/icons-material/StopCircle';
 import HourglassEmptyIcon from '@mui/icons-material/HourglassEmpty';
-import AccessTimeIcon from '@mui/icons-material/AccessTime';
-import { useAgentStore } from '@/stores/agentStore';
 interface FinalStepCardProps {
   finalStep: FinalStep;
@@ -74,8 +74,8 @@ export const FinalStepCard: React.FC<FinalStepCardProps> = ({ finalStep, isActiv
         cursor: 'pointer',
         boxShadow: isActive
           ? (theme) => `0 2px 8px ${theme.palette.mode === 'dark'
-              ? `rgba(${statusConfig.color === 'success' ? '102, 187, 106' : statusConfig.color === 'error' ? '244, 67, 54' : '255, 152, 0'}, 0.3)`
-              : `rgba(${statusConfig.color === 'success' ? '102, 187, 106' : statusConfig.color === 'error' ? '244, 67, 54' : '255, 152, 0'}, 0.2)`}`
           : 'none',
         '&:hover': {
           borderColor: (theme) => `${theme.palette[statusConfig.color].main} !important`,

+import { useAgentStore } from '@/stores/agentStore';
 import { FinalStep } from '@/types/agent';
+import AccessTimeIcon from '@mui/icons-material/AccessTime';
 import CheckIcon from '@mui/icons-material/Check';
 import CloseIcon from '@mui/icons-material/Close';
 import HourglassEmptyIcon from '@mui/icons-material/HourglassEmpty';
+import StopCircleIcon from '@mui/icons-material/StopCircle';
+import { Box, Card, CardContent, Typography } from '@mui/material';
+import React from 'react';
 interface FinalStepCardProps {
   finalStep: FinalStep;
         cursor: 'pointer',
         boxShadow: isActive
           ? (theme) => `0 2px 8px ${theme.palette.mode === 'dark'
+            ? `rgba(${statusConfig.color === 'success' ? '102, 187, 106' : statusConfig.color === 'error' ? '244, 67, 54' : '255, 152, 0'}, 0.3)`
+            : `rgba(${statusConfig.color === 'success' ? '102, 187, 106' : statusConfig.color === 'error' ? '244, 67, 54' : '255, 152, 0'}, 0.2)`}`
           : 'none',
         '&:hover': {
           borderColor: (theme) => `${theme.palette[statusConfig.color].main} !important`,

cua2-front/src/components/steps/StepCard.tsx CHANGED Viewed

@@ -1,16 +1,14 @@
 import { AgentStep } from '@/types/agent';
-import React, { useState } from 'react';
-import { Card, CardContent, Box, Typography, Divider, Chip, Paper, Accordion, AccordionSummary, AccordionDetails, IconButton, Tooltip } from '@mui/material';
-import ThoughtBubbleIcon from '@mui/icons-material/Psychology';
-import BoltIcon from '@mui/icons-material/Bolt';
 import AccessTimeIcon from '@mui/icons-material/AccessTime';
 import InputIcon from '@mui/icons-material/Input';
 import OutputIcon from '@mui/icons-material/Output';
-import ExpandMoreIcon from '@mui/icons-material/ExpandMore';
-import ThumbUpIcon from '@mui/icons-material/ThumbUp';
 import ThumbDownIcon from '@mui/icons-material/ThumbDown';
-import { useAgentStore } from '@/stores/agentStore';
-import { updateStepEvaluation } from '@/services/api';
 interface StepCardProps {
   step: AgentStep;
@@ -21,6 +19,7 @@ interface StepCardProps {
 export const StepCard: React.FC<StepCardProps> = ({ step, index, isLatest = false, isActive = false }) => {
   const setSelectedStepIndex = useAgentStore((state) => state.setSelectedStepIndex);
   const [thoughtExpanded, setThoughtExpanded] = useState(false);
   const [evaluation, setEvaluation] = useState<'like' | 'dislike' | 'neutral'>(step.step_evaluation || 'neutral');
   const [isVoting, setIsVoting] = useState(false);
@@ -44,6 +43,8 @@ export const StepCard: React.FC<StepCardProps> = ({ step, index, isLatest = fals
     try {
       await updateStepEvaluation(step.traceId, step.stepId, newEvaluation);
       setEvaluation(newEvaluation);
     } catch (error) {
       console.error('Failed to update step evaluation:', error);
     } finally {
@@ -206,7 +207,7 @@ export const StepCard: React.FC<StepCardProps> = ({ step, index, isLatest = fals
                 </Tooltip>
               </Box>
             </Box>
-            <Box component="ul" sx={{ listStyle: 'none', p: 0, m: 0}}>
               {step.actions.map((action, actionIndex) => (
                 <Box
                   key={actionIndex}

+import { updateStepEvaluation } from '@/services/api';
+import { useAgentStore } from '@/stores/agentStore';
 import { AgentStep } from '@/types/agent';
 import AccessTimeIcon from '@mui/icons-material/AccessTime';
+import ExpandMoreIcon from '@mui/icons-material/ExpandMore';
 import InputIcon from '@mui/icons-material/Input';
 import OutputIcon from '@mui/icons-material/Output';
 import ThumbDownIcon from '@mui/icons-material/ThumbDown';
+import ThumbUpIcon from '@mui/icons-material/ThumbUp';
+import { Accordion, AccordionDetails, AccordionSummary, Box, Card, CardContent, Chip, IconButton, Tooltip, Typography } from '@mui/material';
+import React, { useState } from 'react';
 interface StepCardProps {
   step: AgentStep;
 export const StepCard: React.FC<StepCardProps> = ({ step, index, isLatest = false, isActive = false }) => {
   const setSelectedStepIndex = useAgentStore((state) => state.setSelectedStepIndex);
+  const updateStepEvaluationInStore = useAgentStore((state) => state.updateStepEvaluation);
   const [thoughtExpanded, setThoughtExpanded] = useState(false);
   const [evaluation, setEvaluation] = useState<'like' | 'dislike' | 'neutral'>(step.step_evaluation || 'neutral');
   const [isVoting, setIsVoting] = useState(false);
     try {
       await updateStepEvaluation(step.traceId, step.stepId, newEvaluation);
       setEvaluation(newEvaluation);
+      // Update the store so the evaluation is reflected in JSON export
+      updateStepEvaluationInStore(step.stepId, newEvaluation);
     } catch (error) {
       console.error('Failed to update step evaluation:', error);
     } finally {
                 </Tooltip>
               </Box>
             </Box>
+            <Box component="ul" sx={{ listStyle: 'none', p: 0, m: 0 }}>
               {step.actions.map((action, actionIndex) => (
                 <Box
                   key={actionIndex}

cua2-front/src/services/api.ts CHANGED Viewed

@@ -54,3 +54,25 @@ export async function updateStepEvaluation(
     throw new Error('Failed to update step evaluation');
   }
 }

     throw new Error('Failed to update step evaluation');
   }
 }
+/**
+ * Update trace evaluation (overall task feedback)
+ */
+export async function updateTraceEvaluation(
+  traceId: string,
+  evaluation: 'success' | 'failed' | 'not_evaluated'
+): Promise<void> {
+  const response = await fetch(`${getApiBaseUrl()}/traces/${traceId}/evaluation`, {
+    method: 'PATCH',
+    headers: {
+      'Content-Type': 'application/json',
+    },
+    body: JSON.stringify({
+      user_evaluation: evaluation,
+    }),
+  });
+  if (!response.ok) {
+    throw new Error('Failed to update trace evaluation');
+  }
+}

cua2-front/src/services/jsonExporter.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import { AgentTrace, AgentStep, AgentTraceMetadata, FinalStep } from '@/types/agent';
 /**
  * Extract final answer from steps
@@ -60,14 +60,13 @@ export const exportTraceToJson = (
       traceId: step.traceId,
       stepId: step.stepId,
       error: step.error,
       thought: step.thought,
       actions: step.actions,
       duration: step.duration,
       inputTokensUsed: step.inputTokensUsed,
       outputTokensUsed: step.outputTokensUsed,
       step_evaluation: step.step_evaluation,
-      // Don't include base64 image to reduce JSON size
-      hasImage: !!step.image,
     })),
     exportedAt: new Date().toISOString(),
   };

+import { AgentStep, AgentTrace, AgentTraceMetadata, FinalStep } from '@/types/agent';
 /**
  * Extract final answer from steps
       traceId: step.traceId,
       stepId: step.stepId,
       error: step.error,
+      image: step.image, // Include full base64 image
       thought: step.thought,
       actions: step.actions,
       duration: step.duration,
       inputTokensUsed: step.inputTokensUsed,
       outputTokensUsed: step.outputTokensUsed,
       step_evaluation: step.step_evaluation,
     })),
     exportedAt: new Date().toISOString(),
   };

cua2-front/src/stores/agentStore.ts CHANGED Viewed

@@ -22,6 +22,8 @@ interface AgentState {
   setTrace: (trace: AgentTrace | undefined) => void;
   setTraceId: (traceId: string | null) => void;
   updateTraceWithStep: (step: AgentStep, metadata: AgentTraceMetadata) => void;
   completeTrace: (metadata: AgentTraceMetadata, finalState?: 'success' | 'stopped' | 'max_steps_reached' | 'error' | 'sandbox_timeout') => void;
   setIsAgentProcessing: (processing: boolean) => void;
   setIsConnectingToE2B: (connecting: boolean) => void;
@@ -97,6 +99,59 @@ export const useAgentStore = create<AgentState>()(
           'updateTraceWithStep'
         ),
       // Complete the trace
       completeTrace: (metadata, finalState?: 'success' | 'stopped' | 'max_steps_reached' | 'error' | 'sandbox_timeout') =>
         set(
@@ -196,12 +251,16 @@ export const useAgentStore = create<AgentState>()(
                 numberOfSteps: state.trace.steps?.length || 0,
                 maxSteps: 200,
                 completed: false,
               };
               // Ensure maxSteps is not 0
-              const finalMetadata = {
                 ...metadata,
                 maxSteps: metadata.maxSteps > 0 ? metadata.maxSteps : 200,
               };
               const finalStep: FinalStep = {

   setTrace: (trace: AgentTrace | undefined) => void;
   setTraceId: (traceId: string | null) => void;
   updateTraceWithStep: (step: AgentStep, metadata: AgentTraceMetadata) => void;
+  updateStepEvaluation: (stepId: string, evaluation: 'like' | 'dislike' | 'neutral') => void;
+  updateTraceEvaluation: (evaluation: 'success' | 'failed' | 'not_evaluated') => void;
   completeTrace: (metadata: AgentTraceMetadata, finalState?: 'success' | 'stopped' | 'max_steps_reached' | 'error' | 'sandbox_timeout') => void;
   setIsAgentProcessing: (processing: boolean) => void;
   setIsConnectingToE2B: (connecting: boolean) => void;
           'updateTraceWithStep'
         ),
+      // Update step evaluation in the store
+      updateStepEvaluation: (stepId, evaluation) =>
+        set(
+          (state) => {
+            if (!state.trace || !state.trace.steps) return state;
+            const updatedSteps = state.trace.steps.map((step) =>
+              step.stepId === stepId
+                ? { ...step, step_evaluation: evaluation }
+                : step
+            );
+            return {
+              trace: {
+                ...state.trace,
+                steps: updatedSteps,
+              },
+            };
+          },
+          false,
+          'updateStepEvaluation'
+        ),
+      // Update trace evaluation in the store
+      updateTraceEvaluation: (evaluation) =>
+        set(
+          (state) => {
+            if (!state.trace || !state.trace.traceMetadata) return state;
+            const updatedMetadata = {
+              ...state.trace.traceMetadata,
+              user_evaluation: evaluation,
+            };
+            return {
+              trace: {
+                ...state.trace,
+                traceMetadata: updatedMetadata,
+              },
+              // Also update finalStep metadata if it exists
+              finalStep: state.finalStep ? {
+                ...state.finalStep,
+                metadata: {
+                  ...state.finalStep.metadata,
+                  user_evaluation: evaluation,
+                },
+              } : state.finalStep,
+            };
+          },
+          false,
+          'updateTraceEvaluation'
+        ),
       // Complete the trace
       completeTrace: (metadata, finalState?: 'success' | 'stopped' | 'max_steps_reached' | 'error' | 'sandbox_timeout') =>
         set(
                 numberOfSteps: state.trace.steps?.length || 0,
                 maxSteps: 200,
                 completed: false,
+                final_state: null,
+                user_evaluation: 'not_evaluated' as const,
               };
               // Ensure maxSteps is not 0
+              const finalMetadata: AgentTraceMetadata = {
                 ...metadata,
                 maxSteps: metadata.maxSteps > 0 ? metadata.maxSteps : 200,
+                final_state: metadata.final_state || null,
+                user_evaluation: metadata.user_evaluation || 'not_evaluated',
               };
               const finalStep: FinalStep = {

cua2-front/src/types/agent.ts CHANGED Viewed

@@ -36,6 +36,7 @@ export interface AgentTraceMetadata {
   maxSteps: number;
   completed: boolean;
   final_state: 'success' | 'stopped' | 'max_steps_reached' | 'error' | 'sandbox_timeout' | null;
 }
 export interface FinalStep {

   maxSteps: number;
   completed: boolean;
   final_state: 'success' | 'stopped' | 'max_steps_reached' | 'error' | 'sandbox_timeout' | null;
+  user_evaluation?: 'success' | 'failed' | 'not_evaluated';
 }
 export interface FinalStep {