A-Mahla commited on
Commit
3593cc3
·
unverified ·
1 Parent(s): f9fd9fa

Little fixes (#22)

Browse files

* Little fix

* CHG text

* FIX pre-commit

cua2-core/src/cua2_core/app.py CHANGED
@@ -39,6 +39,7 @@ async def lifespan(app: FastAPI):
39
  yield
40
 
41
  print("Shutting down services...")
 
42
  await sandbox_service.cleanup_sandboxes()
43
  print("Services shut down successfully")
44
 
 
39
  yield
40
 
41
  print("Shutting down services...")
42
+ await agent_service.cleanup()
43
  await sandbox_service.cleanup_sandboxes()
44
  print("Services shut down successfully")
45
 
cua2-core/src/cua2_core/models/models.py CHANGED
@@ -140,6 +140,7 @@ class AgentTraceMetadata(BaseModel):
140
  Literal["success", "stopped", "max_steps_reached", "error", "sandbox_timeout"]
141
  | None
142
  ) = None
 
143
 
144
 
145
  class AgentTrace(BaseModel):
@@ -248,6 +249,14 @@ class StopTask(BaseModel):
248
  traceId: str
249
 
250
 
 
 
 
 
 
 
 
 
251
  ##################### Agent Service ########################
252
 
253
 
@@ -314,6 +323,7 @@ class ActiveTask(BaseModel):
314
  "success", "stopped", "max_steps_reached", "error", "sandbox_timeout"
315
  ]
316
  | None = None,
 
317
  ):
318
  """Update trace metadata"""
319
  with self._file_lock:
@@ -329,6 +339,8 @@ class ActiveTask(BaseModel):
329
  self.traceMetadata.completed = completed
330
  if final_state is not None:
331
  self.traceMetadata.final_state = final_state
 
 
332
 
333
 
334
  #################### API Routes Models ########################
@@ -369,6 +381,19 @@ class UpdateStepResponse(BaseModel):
369
  message: str
370
 
371
 
 
 
 
 
 
 
 
 
 
 
 
 
 
372
  class AvailableModelsResponse(BaseModel):
373
  """Response for available models"""
374
 
 
140
  Literal["success", "stopped", "max_steps_reached", "error", "sandbox_timeout"]
141
  | None
142
  ) = None
143
+ user_evaluation: Literal["success", "failed", "not_evaluated"] = "not_evaluated"
144
 
145
 
146
  class AgentTrace(BaseModel):
 
249
  traceId: str
250
 
251
 
252
+ class TraceEvaluation(BaseModel):
253
+ """Trace evaluation message"""
254
+
255
+ event_type: Literal["trace_evaluation"]
256
+ traceId: str
257
+ user_evaluation: Literal["success", "failed", "not_evaluated"]
258
+
259
+
260
  ##################### Agent Service ########################
261
 
262
 
 
323
  "success", "stopped", "max_steps_reached", "error", "sandbox_timeout"
324
  ]
325
  | None = None,
326
+ user_evaluation: Literal["success", "failed", "not_evaluated"] | None = None,
327
  ):
328
  """Update trace metadata"""
329
  with self._file_lock:
 
339
  self.traceMetadata.completed = completed
340
  if final_state is not None:
341
  self.traceMetadata.final_state = final_state
342
+ if user_evaluation is not None:
343
+ self.traceMetadata.user_evaluation = user_evaluation
344
 
345
 
346
  #################### API Routes Models ########################
 
381
  message: str
382
 
383
 
384
+ class UpdateTraceEvaluationRequest(BaseModel):
385
+ """Request model for updating trace evaluation"""
386
+
387
+ user_evaluation: Literal["success", "failed", "not_evaluated"]
388
+
389
+
390
+ class UpdateTraceEvaluationResponse(BaseModel):
391
+ """Response model for trace evaluation update"""
392
+
393
+ success: bool
394
+ message: str
395
+
396
+
397
  class AvailableModelsResponse(BaseModel):
398
  """Response for available models"""
399
 
cua2-core/src/cua2_core/routes/routes.py CHANGED
@@ -8,6 +8,8 @@ from cua2_core.models.models import (
8
  HealthResponse,
9
  UpdateStepRequest,
10
  UpdateStepResponse,
 
 
11
  )
12
  from cua2_core.services.agent_service import AgentService
13
  from cua2_core.services.agent_utils.get_model import AVAILABLE_MODELS
@@ -93,3 +95,27 @@ async def update_trace_step(
93
  raise HTTPException(status_code=404, detail=str(e))
94
  except Exception as e:
95
  raise HTTPException(status_code=400, detail=str(e))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  HealthResponse,
9
  UpdateStepRequest,
10
  UpdateStepResponse,
11
+ UpdateTraceEvaluationRequest,
12
+ UpdateTraceEvaluationResponse,
13
  )
14
  from cua2_core.services.agent_service import AgentService
15
  from cua2_core.services.agent_utils.get_model import AVAILABLE_MODELS
 
95
  raise HTTPException(status_code=404, detail=str(e))
96
  except Exception as e:
97
  raise HTTPException(status_code=400, detail=str(e))
98
+
99
+
100
+ @router.patch(
101
+ "/traces/{trace_id}/evaluation", response_model=UpdateTraceEvaluationResponse
102
+ )
103
+ async def update_trace_evaluation(
104
+ trace_id: str,
105
+ request: UpdateTraceEvaluationRequest,
106
+ agent_service: AgentService = Depends(get_agent_service),
107
+ ):
108
+ """Update the user evaluation for a trace (overall task feedback)"""
109
+ try:
110
+ agent_service.update_trace_evaluation(
111
+ trace_id=trace_id,
112
+ user_evaluation=request.user_evaluation,
113
+ )
114
+ return UpdateTraceEvaluationResponse(
115
+ success=True,
116
+ message="Trace evaluation updated successfully",
117
+ )
118
+ except FileNotFoundError as e:
119
+ raise HTTPException(status_code=404, detail=str(e))
120
+ except Exception as e:
121
+ raise HTTPException(status_code=400, detail=str(e))
cua2-core/src/cua2_core/services/agent_service.py CHANGED
@@ -1,11 +1,12 @@
1
  import asyncio
2
  import base64
 
3
  import json
4
  import logging
5
  import os
6
  import time
7
  from io import BytesIO
8
- from typing import Callable, Literal
9
  from uuid import uuid4
10
 
11
  from cua2_core.models.models import (
@@ -52,6 +53,7 @@ class AgentService:
52
  self.last_screenshot: dict[str, AgentImage | None] = {}
53
  self._lock = asyncio.Lock()
54
  self.max_sandboxes = int(600 / num_workers)
 
55
 
56
  # Initialize archival service in dedicated process
57
  self.archival_service = ArchivalService(
@@ -61,8 +63,41 @@ class AgentService:
61
  archive_interval_minutes=30,
62
  folder_age_threshold_minutes=30,
63
  )
64
- # Start the archival service process
65
- self.archival_service.start()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
 
67
  def _update_archival_active_tasks(self):
68
  """
@@ -243,6 +278,7 @@ class AgentService:
243
 
244
  self.active_tasks[message_id].update_trace_metadata(
245
  final_state=final_state,
 
246
  )
247
 
248
  if message_id in self.active_tasks:
@@ -475,6 +511,58 @@ class AgentService:
475
  except (ValueError, KeyError, TypeError) as e:
476
  raise ValueError(f"Error processing step update: {e}")
477
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
478
  async def stop_task(self, trace_id: str):
479
  """Stop a task"""
480
  if trace_id in self.active_tasks:
@@ -518,3 +606,29 @@ class AgentService:
518
 
519
  except Exception as e:
520
  logger.error(f"Error cleaning up task {message_id}: {e}", exc_info=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import asyncio
2
  import base64
3
+ import fcntl
4
  import json
5
  import logging
6
  import os
7
  import time
8
  from io import BytesIO
9
+ from typing import IO, Callable, Literal
10
  from uuid import uuid4
11
 
12
  from cua2_core.models.models import (
 
53
  self.last_screenshot: dict[str, AgentImage | None] = {}
54
  self._lock = asyncio.Lock()
55
  self.max_sandboxes = int(600 / num_workers)
56
+ self._archival_lock_file: IO[str] | None = None
57
 
58
  # Initialize archival service in dedicated process
59
  self.archival_service = ArchivalService(
 
63
  archive_interval_minutes=30,
64
  folder_age_threshold_minutes=30,
65
  )
66
+ # Start the archival service process only on one worker
67
+ if self._should_start_archival_service():
68
+ self.archival_service.start()
69
+ logger.info(f"Started archival service in worker PID {os.getpid()}")
70
+ else:
71
+ logger.info(
72
+ f"Skipping archival service start in worker PID {os.getpid()} (already running in another worker)"
73
+ )
74
+
75
+ def _should_start_archival_service(self) -> bool:
76
+ """
77
+ Determine if this worker should start the archival service.
78
+ Uses file-based locking to ensure only one worker across all processes
79
+ starts the archival service.
80
+
81
+ Returns:
82
+ True if this worker should start the archival service, False otherwise
83
+ """
84
+ lock_file_path = "/tmp/cua2_archival_service.lock"
85
+
86
+ try:
87
+ self._archival_lock_file = open(lock_file_path, "w")
88
+ fcntl.flock(
89
+ self._archival_lock_file.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB
90
+ )
91
+
92
+ self._archival_lock_file.write(str(os.getpid()))
93
+ self._archival_lock_file.flush()
94
+ return True
95
+
96
+ except (IOError, OSError):
97
+ if self._archival_lock_file:
98
+ self._archival_lock_file.close()
99
+ self._archival_lock_file = None
100
+ return False
101
 
102
  def _update_archival_active_tasks(self):
103
  """
 
278
 
279
  self.active_tasks[message_id].update_trace_metadata(
280
  final_state=final_state,
281
+ completed=True,
282
  )
283
 
284
  if message_id in self.active_tasks:
 
511
  except (ValueError, KeyError, TypeError) as e:
512
  raise ValueError(f"Error processing step update: {e}")
513
 
514
+ def update_trace_evaluation(
515
+ self,
516
+ trace_id: str,
517
+ user_evaluation: Literal["success", "failed", "not_evaluated"],
518
+ ):
519
+ """
520
+ Update the user evaluation for a trace
521
+
522
+ Args:
523
+ trace_id: The trace ID
524
+ user_evaluation: The evaluation value to set
525
+
526
+ Raises:
527
+ FileNotFoundError: If trace not found
528
+ """
529
+ # Try to find in active tasks first
530
+ active_task = self.active_tasks.get(trace_id)
531
+
532
+ if active_task:
533
+ # Task is still active
534
+ active_task.update_trace_metadata(user_evaluation=user_evaluation)
535
+ else:
536
+ # Task is not active, try to load from file
537
+ data_dir = "data"
538
+ trace_dirs = [
539
+ d for d in os.listdir(data_dir) if d.startswith(f"trace-{trace_id}")
540
+ ]
541
+
542
+ if not trace_dirs:
543
+ raise FileNotFoundError("Trace not found")
544
+
545
+ trace_path = os.path.join(data_dir, trace_dirs[0])
546
+ tasks_file = os.path.join(trace_path, "tasks.json")
547
+
548
+ if not os.path.exists(tasks_file):
549
+ raise FileNotFoundError("Trace data not found")
550
+
551
+ try:
552
+ # Load the trace data
553
+ with open(tasks_file, "r") as f:
554
+ task_data = json.load(f)
555
+
556
+ # Update the user_evaluation
557
+ task_data["traceMetadata"]["user_evaluation"] = user_evaluation
558
+
559
+ # Save the updated data
560
+ with open(tasks_file, "w") as f:
561
+ json.dump(task_data, f, indent=2)
562
+
563
+ except (KeyError, TypeError) as e:
564
+ raise ValueError(f"Error processing trace evaluation update: {e}")
565
+
566
  async def stop_task(self, trace_id: str):
567
  """Stop a task"""
568
  if trace_id in self.active_tasks:
 
606
 
607
  except Exception as e:
608
  logger.error(f"Error cleaning up task {message_id}: {e}", exc_info=True)
609
+
610
+ async def cleanup(self):
611
+ """
612
+ Cleanup method called during service shutdown.
613
+ Stops the archival service and releases the lock file.
614
+ """
615
+ try:
616
+ # Stop the archival service if it's running
617
+ if self.archival_service.is_alive():
618
+ logger.info("Stopping archival service...")
619
+ self.archival_service.stop()
620
+ logger.info("Archival service stopped")
621
+
622
+ # Release the lock file if we hold it
623
+ if self._archival_lock_file:
624
+ try:
625
+ fcntl.flock(self._archival_lock_file.fileno(), fcntl.LOCK_UN)
626
+ self._archival_lock_file.close()
627
+ logger.info("Released archival service lock")
628
+ except Exception as e:
629
+ logger.warning(f"Error releasing archival lock: {e}")
630
+ finally:
631
+ self._archival_lock_file = None
632
+
633
+ except Exception as e:
634
+ logger.error(f"Error during AgentService cleanup: {e}", exc_info=True)
cua2-core/src/cua2_core/services/agent_utils/get_model.py CHANGED
@@ -3,9 +3,7 @@ from smolagents import InferenceClientModel, Model
3
  # Available model IDs
4
  AVAILABLE_MODELS = [
5
  "Qwen/Qwen3-VL-8B-Instruct",
6
- "Qwen/Qwen3-VL-8B-Thinking",
7
  "Qwen/Qwen3-VL-30B-A3B-Instruct",
8
- "Qwen/Qwen3-VL-30B-A3B-Thinking",
9
  ]
10
 
11
 
 
3
  # Available model IDs
4
  AVAILABLE_MODELS = [
5
  "Qwen/Qwen3-VL-8B-Instruct",
 
6
  "Qwen/Qwen3-VL-30B-A3B-Instruct",
 
7
  ]
8
 
9
 
cua2-core/src/cua2_core/services/archival_service.py CHANGED
@@ -298,9 +298,12 @@ def _process_old_folders(
298
  f"Successfully verified {archive_path.name} in HuggingFace repo"
299
  )
300
 
301
- # Delete the local folder
302
- shutil.rmtree(folder)
303
- logger.info(f"Deleted local folder: {folder_name}")
 
 
 
304
 
305
  # Delete the local archive
306
  archive_path.unlink(missing_ok=True)
@@ -403,10 +406,6 @@ def _verify_file_in_repo(hf_dataset_repo: str, hf_token: str, filename: str) ->
403
  filename=filename,
404
  repo_type="dataset",
405
  token=hf_token,
406
- local_dir_use_symlinks=False,
407
- # Just check if file exists without actually downloading
408
- cache_dir=None,
409
- local_files_only=False,
410
  )
411
 
412
  logger.info(f"Verified {filename} exists in repo")
 
298
  f"Successfully verified {archive_path.name} in HuggingFace repo"
299
  )
300
 
301
+ # Delete the local folder (check if it still exists to avoid race conditions)
302
+ if folder.exists():
303
+ shutil.rmtree(folder)
304
+ logger.info(f"Deleted local folder: {folder_name}")
305
+ else:
306
+ logger.warning(f"Folder {folder_name} already deleted, skipping")
307
 
308
  # Delete the local archive
309
  archive_path.unlink(missing_ok=True)
 
406
  filename=filename,
407
  repo_type="dataset",
408
  token=hf_token,
 
 
 
 
409
  )
410
 
411
  logger.info(f"Verified {filename} exists in repo")
cua2-front/src/components/WelcomeScreen.tsx CHANGED
@@ -150,13 +150,15 @@ export const WelcomeScreen: React.FC<WelcomeScreenProps> = ({ onStartTask, isCon
150
  Computer Use Agent
151
  </Typography>
152
 
153
- {/* Powered by smolagents */}
154
  <Box
155
  sx={{
156
  display: 'flex',
157
  alignItems: 'center',
158
  gap: 1,
159
  mb: 2,
 
 
160
  }}
161
  >
162
  <Typography
@@ -168,6 +170,8 @@ export const WelcomeScreen: React.FC<WelcomeScreenProps> = ({ onStartTask, isCon
168
  >
169
  Powered by
170
  </Typography>
 
 
171
  <Box
172
  component="a"
173
  href="https://github.com/huggingface/smolagents"
@@ -239,6 +243,61 @@ export const WelcomeScreen: React.FC<WelcomeScreenProps> = ({ onStartTask, isCon
239
  </Typography>
240
  </Box>
241
  </Box>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
242
  </Box>
243
 
244
  {/* Subtitle */}
@@ -259,12 +318,29 @@ export const WelcomeScreen: React.FC<WelcomeScreenProps> = ({ onStartTask, isCon
259
  sx={{
260
  color: 'text.secondary',
261
  maxWidth: '650px',
262
- mb: 6,
263
  lineHeight: 1.7,
264
  }}
265
  >
266
- Watch in real-time as AI agents write and execute Python code to complete tasks.
267
- Built by Hugging Face, <strong>smolagents</strong> is LLM-agnostic and uses <strong>30% fewer steps</strong> than traditional agents.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
268
  </Typography>
269
 
270
  {/* Task Input Section */}
@@ -418,6 +494,24 @@ export const WelcomeScreen: React.FC<WelcomeScreenProps> = ({ onStartTask, isCon
418
  </Box>
419
  </Paper>
420
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
421
  {/* Connection status hint */}
422
  {!isConnected && (
423
  <Typography
 
150
  Computer Use Agent
151
  </Typography>
152
 
153
+ {/* Powered by smolagents and E2B */}
154
  <Box
155
  sx={{
156
  display: 'flex',
157
  alignItems: 'center',
158
  gap: 1,
159
  mb: 2,
160
+ flexWrap: 'wrap',
161
+ justifyContent: 'center',
162
  }}
163
  >
164
  <Typography
 
170
  >
171
  Powered by
172
  </Typography>
173
+
174
+ {/* smolagents link */}
175
  <Box
176
  component="a"
177
  href="https://github.com/huggingface/smolagents"
 
243
  </Typography>
244
  </Box>
245
  </Box>
246
+
247
+ {/* Separator */}
248
+ <Typography
249
+ variant="body2"
250
+ sx={{
251
+ color: 'text.secondary',
252
+ mx: 0.5,
253
+ }}
254
+ >
255
+ &
256
+ </Typography>
257
+
258
+ {/* E2B link */}
259
+ <Box
260
+ component="a"
261
+ href="https://e2b.dev/"
262
+ target="_blank"
263
+ rel="noopener noreferrer"
264
+ sx={{
265
+ display: 'flex',
266
+ alignItems: 'center',
267
+ gap: 0.75,
268
+ textDecoration: 'none',
269
+ transition: 'all 0.2s ease',
270
+ '&:hover': {
271
+ '& .e2b-text': {
272
+ textDecoration: 'underline',
273
+ },
274
+ },
275
+ }}
276
+ >
277
+ {/* E2B Logo */}
278
+ <Box
279
+ component="img"
280
+ src="https://avatars.githubusercontent.com/u/129434473?s=200&v=4"
281
+ alt="E2B"
282
+ sx={{
283
+ width: 24,
284
+ height: 24,
285
+ flexShrink: 0,
286
+ borderRadius: '50%',
287
+ }}
288
+ />
289
+
290
+ <Typography
291
+ className="e2b-text"
292
+ sx={{
293
+ color: 'primary.main',
294
+ fontWeight: 700,
295
+ fontSize: '1rem',
296
+ }}
297
+ >
298
+ E2B
299
+ </Typography>
300
+ </Box>
301
  </Box>
302
 
303
  {/* Subtitle */}
 
318
  sx={{
319
  color: 'text.secondary',
320
  maxWidth: '650px',
321
+ mb: 3,
322
  lineHeight: 1.7,
323
  }}
324
  >
325
+ Experience the future of AI automation as agents operate computers in real time to complete complex on-screen tasks (GUI agents).
326
+ Built by{' '}
327
+ <Box
328
+ component="a"
329
+ href="https://huggingface.co"
330
+ target="_blank"
331
+ rel="noopener noreferrer"
332
+ sx={{
333
+ color: 'primary.main',
334
+ textDecoration: 'none',
335
+ fontWeight: 700,
336
+ '&:hover': {
337
+ textDecoration: 'underline',
338
+ },
339
+ }}
340
+ >
341
+ Hugging Face
342
+ </Box>
343
+ , this platform provides intuitive <strong>visualization and annotation tools</strong>, enabling <strong>manual preferential data annotation</strong> for advanced agentic AI research.
344
  </Typography>
345
 
346
  {/* Task Input Section */}
 
494
  </Box>
495
  </Paper>
496
 
497
+ {/* Research Notice */}
498
+ <Typography
499
+ variant="body2"
500
+ sx={{
501
+ color: 'text.secondary',
502
+ maxWidth: '700px',
503
+ mt: 3,
504
+ mb: 2,
505
+ lineHeight: 1.6,
506
+ fontStyle: 'italic',
507
+ opacity: 0.8,
508
+ textAlign: 'center',
509
+ }}
510
+ >
511
+ Please be aware that by using the demo, you agree that the traces are stored for research purposes.
512
+ <strong>Please do not write any personal information.</strong>
513
+ </Typography>
514
+
515
  {/* Connection status hint */}
516
  {!isConnected && (
517
  <Typography
cua2-front/src/components/sandbox/SandboxViewer.tsx CHANGED
@@ -110,8 +110,11 @@ export const SandboxViewer: React.FC<SandboxViewerProps> = ({
110
 
111
  // Handler to go back to home
112
  const handleBackToHome = () => {
113
- resetAgent();
114
- navigate('/');
 
 
 
115
  };
116
 
117
  // Handler to go back to live mode
 
110
 
111
  // Handler to go back to home
112
  const handleBackToHome = () => {
113
+ // Reset frontend state
114
+ useAgentStore.getState().resetAgent();
115
+
116
+ // Reload the page to reconnect websocket
117
+ window.location.href = '/';
118
  };
119
 
120
  // Handler to go back to live mode
cua2-front/src/components/sandbox/completionview/CompletionView.tsx CHANGED
@@ -1,3 +1,5 @@
 
 
1
  import { AgentStep, AgentTrace, FinalStep } from '@/types/agent';
2
  import AccessTimeIcon from '@mui/icons-material/AccessTime';
3
  import AddIcon from '@mui/icons-material/Add';
@@ -11,8 +13,10 @@ import InputIcon from '@mui/icons-material/Input';
11
  import OutputIcon from '@mui/icons-material/Output';
12
  import SmartToyIcon from '@mui/icons-material/SmartToy';
13
  import StopCircleIcon from '@mui/icons-material/StopCircle';
14
- import { Alert, Box, Button, Divider, Paper, Typography } from '@mui/material';
15
- import React from 'react';
 
 
16
  import { DownloadGifButton } from './DownloadGifButton';
17
  import { DownloadJsonButton } from './DownloadJsonButton';
18
 
@@ -42,6 +46,30 @@ export const CompletionView: React.FC<CompletionViewProps> = ({
42
  onDownloadJson,
43
  onBackToHome,
44
  }) => {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  const getStatusConfig = () => {
46
  switch (finalStep.type) {
47
  case 'success':
@@ -227,6 +255,62 @@ export const CompletionView: React.FC<CompletionViewProps> = ({
227
  </Box>
228
  )}
229
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
230
  {/* Divider before metrics */}
231
  <Divider sx={{ my: 2 }} />
232
 
 
1
+ import { updateTraceEvaluation } from '@/services/api';
2
+ import { useAgentStore } from '@/stores/agentStore';
3
  import { AgentStep, AgentTrace, FinalStep } from '@/types/agent';
4
  import AccessTimeIcon from '@mui/icons-material/AccessTime';
5
  import AddIcon from '@mui/icons-material/Add';
 
13
  import OutputIcon from '@mui/icons-material/Output';
14
  import SmartToyIcon from '@mui/icons-material/SmartToy';
15
  import StopCircleIcon from '@mui/icons-material/StopCircle';
16
+ import ThumbDownIcon from '@mui/icons-material/ThumbDown';
17
+ import ThumbUpIcon from '@mui/icons-material/ThumbUp';
18
+ import { Alert, Box, Button, Divider, IconButton, Paper, Tooltip, Typography } from '@mui/material';
19
+ import React, { useState } from 'react';
20
  import { DownloadGifButton } from './DownloadGifButton';
21
  import { DownloadJsonButton } from './DownloadJsonButton';
22
 
 
46
  onDownloadJson,
47
  onBackToHome,
48
  }) => {
49
+ const updateTraceEvaluationInStore = useAgentStore((state) => state.updateTraceEvaluation);
50
+ const [evaluation, setEvaluation] = useState<'success' | 'failed' | 'not_evaluated'>(
51
+ finalStep.metadata.user_evaluation || 'not_evaluated'
52
+ );
53
+ const [isVoting, setIsVoting] = useState(false);
54
+
55
+ const handleTraceEvaluation = async (vote: 'success' | 'failed') => {
56
+ if (isVoting || !trace?.id) return;
57
+
58
+ const newEvaluation = evaluation === vote ? 'not_evaluated' : vote;
59
+ setIsVoting(true);
60
+
61
+ try {
62
+ await updateTraceEvaluation(trace.id, newEvaluation);
63
+ setEvaluation(newEvaluation);
64
+ // Update the store so the evaluation is reflected in JSON export
65
+ updateTraceEvaluationInStore(newEvaluation);
66
+ } catch (error) {
67
+ console.error('Failed to update trace evaluation:', error);
68
+ } finally {
69
+ setIsVoting(false);
70
+ }
71
+ };
72
+
73
  const getStatusConfig = () => {
74
  switch (finalStep.type) {
75
  case 'success':
 
255
  </Box>
256
  )}
257
 
258
+ {/* Trace Evaluation */}
259
+ <Box sx={{ mb: 2 }}>
260
+ <Box sx={{ display: 'flex', alignItems: 'center', justifyContent: 'space-between' }}>
261
+ <Typography
262
+ variant="caption"
263
+ sx={{
264
+ fontWeight: 700,
265
+ color: 'text.secondary',
266
+ fontSize: '0.7rem',
267
+ textTransform: 'uppercase',
268
+ letterSpacing: '0.5px',
269
+ }}
270
+ >
271
+ Was this task completed successfully?
272
+ </Typography>
273
+
274
+ {/* Evaluation buttons */}
275
+ <Box sx={{ display: 'flex', gap: 1 }}>
276
+ <Tooltip title={evaluation === 'success' ? 'Remove success rating' : 'Mark as successful'}>
277
+ <IconButton
278
+ size="small"
279
+ onClick={() => handleTraceEvaluation('success')}
280
+ disabled={isVoting}
281
+ sx={{
282
+ padding: '4px',
283
+ color: evaluation === 'success' ? 'success.main' : 'action.disabled',
284
+ '&:hover': {
285
+ color: 'success.main',
286
+ backgroundColor: (theme) => theme.palette.mode === 'dark' ? 'rgba(102, 187, 106, 0.1)' : 'rgba(102, 187, 106, 0.08)',
287
+ },
288
+ }}
289
+ >
290
+ <ThumbUpIcon sx={{ fontSize: 18 }} />
291
+ </IconButton>
292
+ </Tooltip>
293
+ <Tooltip title={evaluation === 'failed' ? 'Remove failure rating' : 'Mark as failed'}>
294
+ <IconButton
295
+ size="small"
296
+ onClick={() => handleTraceEvaluation('failed')}
297
+ disabled={isVoting}
298
+ sx={{
299
+ padding: '4px',
300
+ color: evaluation === 'failed' ? 'error.main' : 'action.disabled',
301
+ '&:hover': {
302
+ color: 'error.main',
303
+ backgroundColor: (theme) => theme.palette.mode === 'dark' ? 'rgba(244, 67, 54, 0.1)' : 'rgba(244, 67, 54, 0.08)',
304
+ },
305
+ }}
306
+ >
307
+ <ThumbDownIcon sx={{ fontSize: 18 }} />
308
+ </IconButton>
309
+ </Tooltip>
310
+ </Box>
311
+ </Box>
312
+ </Box>
313
+
314
  {/* Divider before metrics */}
315
  <Divider sx={{ my: 2 }} />
316
 
cua2-front/src/components/steps/FinalStepCard.tsx CHANGED
@@ -1,12 +1,12 @@
 
1
  import { FinalStep } from '@/types/agent';
2
- import React from 'react';
3
- import { Card, CardContent, Box, Typography } from '@mui/material';
4
  import CheckIcon from '@mui/icons-material/Check';
5
  import CloseIcon from '@mui/icons-material/Close';
6
- import StopCircleIcon from '@mui/icons-material/StopCircle';
7
  import HourglassEmptyIcon from '@mui/icons-material/HourglassEmpty';
8
- import AccessTimeIcon from '@mui/icons-material/AccessTime';
9
- import { useAgentStore } from '@/stores/agentStore';
 
10
 
11
  interface FinalStepCardProps {
12
  finalStep: FinalStep;
@@ -74,8 +74,8 @@ export const FinalStepCard: React.FC<FinalStepCardProps> = ({ finalStep, isActiv
74
  cursor: 'pointer',
75
  boxShadow: isActive
76
  ? (theme) => `0 2px 8px ${theme.palette.mode === 'dark'
77
- ? `rgba(${statusConfig.color === 'success' ? '102, 187, 106' : statusConfig.color === 'error' ? '244, 67, 54' : '255, 152, 0'}, 0.3)`
78
- : `rgba(${statusConfig.color === 'success' ? '102, 187, 106' : statusConfig.color === 'error' ? '244, 67, 54' : '255, 152, 0'}, 0.2)`}`
79
  : 'none',
80
  '&:hover': {
81
  borderColor: (theme) => `${theme.palette[statusConfig.color].main} !important`,
 
1
+ import { useAgentStore } from '@/stores/agentStore';
2
  import { FinalStep } from '@/types/agent';
3
+ import AccessTimeIcon from '@mui/icons-material/AccessTime';
 
4
  import CheckIcon from '@mui/icons-material/Check';
5
  import CloseIcon from '@mui/icons-material/Close';
 
6
  import HourglassEmptyIcon from '@mui/icons-material/HourglassEmpty';
7
+ import StopCircleIcon from '@mui/icons-material/StopCircle';
8
+ import { Box, Card, CardContent, Typography } from '@mui/material';
9
+ import React from 'react';
10
 
11
  interface FinalStepCardProps {
12
  finalStep: FinalStep;
 
74
  cursor: 'pointer',
75
  boxShadow: isActive
76
  ? (theme) => `0 2px 8px ${theme.palette.mode === 'dark'
77
+ ? `rgba(${statusConfig.color === 'success' ? '102, 187, 106' : statusConfig.color === 'error' ? '244, 67, 54' : '255, 152, 0'}, 0.3)`
78
+ : `rgba(${statusConfig.color === 'success' ? '102, 187, 106' : statusConfig.color === 'error' ? '244, 67, 54' : '255, 152, 0'}, 0.2)`}`
79
  : 'none',
80
  '&:hover': {
81
  borderColor: (theme) => `${theme.palette[statusConfig.color].main} !important`,
cua2-front/src/components/steps/StepCard.tsx CHANGED
@@ -1,16 +1,14 @@
 
 
1
  import { AgentStep } from '@/types/agent';
2
- import React, { useState } from 'react';
3
- import { Card, CardContent, Box, Typography, Divider, Chip, Paper, Accordion, AccordionSummary, AccordionDetails, IconButton, Tooltip } from '@mui/material';
4
- import ThoughtBubbleIcon from '@mui/icons-material/Psychology';
5
- import BoltIcon from '@mui/icons-material/Bolt';
6
  import AccessTimeIcon from '@mui/icons-material/AccessTime';
 
7
  import InputIcon from '@mui/icons-material/Input';
8
  import OutputIcon from '@mui/icons-material/Output';
9
- import ExpandMoreIcon from '@mui/icons-material/ExpandMore';
10
- import ThumbUpIcon from '@mui/icons-material/ThumbUp';
11
  import ThumbDownIcon from '@mui/icons-material/ThumbDown';
12
- import { useAgentStore } from '@/stores/agentStore';
13
- import { updateStepEvaluation } from '@/services/api';
 
14
 
15
  interface StepCardProps {
16
  step: AgentStep;
@@ -21,6 +19,7 @@ interface StepCardProps {
21
 
22
  export const StepCard: React.FC<StepCardProps> = ({ step, index, isLatest = false, isActive = false }) => {
23
  const setSelectedStepIndex = useAgentStore((state) => state.setSelectedStepIndex);
 
24
  const [thoughtExpanded, setThoughtExpanded] = useState(false);
25
  const [evaluation, setEvaluation] = useState<'like' | 'dislike' | 'neutral'>(step.step_evaluation || 'neutral');
26
  const [isVoting, setIsVoting] = useState(false);
@@ -44,6 +43,8 @@ export const StepCard: React.FC<StepCardProps> = ({ step, index, isLatest = fals
44
  try {
45
  await updateStepEvaluation(step.traceId, step.stepId, newEvaluation);
46
  setEvaluation(newEvaluation);
 
 
47
  } catch (error) {
48
  console.error('Failed to update step evaluation:', error);
49
  } finally {
@@ -206,7 +207,7 @@ export const StepCard: React.FC<StepCardProps> = ({ step, index, isLatest = fals
206
  </Tooltip>
207
  </Box>
208
  </Box>
209
- <Box component="ul" sx={{ listStyle: 'none', p: 0, m: 0}}>
210
  {step.actions.map((action, actionIndex) => (
211
  <Box
212
  key={actionIndex}
 
1
+ import { updateStepEvaluation } from '@/services/api';
2
+ import { useAgentStore } from '@/stores/agentStore';
3
  import { AgentStep } from '@/types/agent';
 
 
 
 
4
  import AccessTimeIcon from '@mui/icons-material/AccessTime';
5
+ import ExpandMoreIcon from '@mui/icons-material/ExpandMore';
6
  import InputIcon from '@mui/icons-material/Input';
7
  import OutputIcon from '@mui/icons-material/Output';
 
 
8
  import ThumbDownIcon from '@mui/icons-material/ThumbDown';
9
+ import ThumbUpIcon from '@mui/icons-material/ThumbUp';
10
+ import { Accordion, AccordionDetails, AccordionSummary, Box, Card, CardContent, Chip, IconButton, Tooltip, Typography } from '@mui/material';
11
+ import React, { useState } from 'react';
12
 
13
  interface StepCardProps {
14
  step: AgentStep;
 
19
 
20
  export const StepCard: React.FC<StepCardProps> = ({ step, index, isLatest = false, isActive = false }) => {
21
  const setSelectedStepIndex = useAgentStore((state) => state.setSelectedStepIndex);
22
+ const updateStepEvaluationInStore = useAgentStore((state) => state.updateStepEvaluation);
23
  const [thoughtExpanded, setThoughtExpanded] = useState(false);
24
  const [evaluation, setEvaluation] = useState<'like' | 'dislike' | 'neutral'>(step.step_evaluation || 'neutral');
25
  const [isVoting, setIsVoting] = useState(false);
 
43
  try {
44
  await updateStepEvaluation(step.traceId, step.stepId, newEvaluation);
45
  setEvaluation(newEvaluation);
46
+ // Update the store so the evaluation is reflected in JSON export
47
+ updateStepEvaluationInStore(step.stepId, newEvaluation);
48
  } catch (error) {
49
  console.error('Failed to update step evaluation:', error);
50
  } finally {
 
207
  </Tooltip>
208
  </Box>
209
  </Box>
210
+ <Box component="ul" sx={{ listStyle: 'none', p: 0, m: 0 }}>
211
  {step.actions.map((action, actionIndex) => (
212
  <Box
213
  key={actionIndex}
cua2-front/src/services/api.ts CHANGED
@@ -54,3 +54,25 @@ export async function updateStepEvaluation(
54
  throw new Error('Failed to update step evaluation');
55
  }
56
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
  throw new Error('Failed to update step evaluation');
55
  }
56
  }
57
+
58
+ /**
59
+ * Update trace evaluation (overall task feedback)
60
+ */
61
+ export async function updateTraceEvaluation(
62
+ traceId: string,
63
+ evaluation: 'success' | 'failed' | 'not_evaluated'
64
+ ): Promise<void> {
65
+ const response = await fetch(`${getApiBaseUrl()}/traces/${traceId}/evaluation`, {
66
+ method: 'PATCH',
67
+ headers: {
68
+ 'Content-Type': 'application/json',
69
+ },
70
+ body: JSON.stringify({
71
+ user_evaluation: evaluation,
72
+ }),
73
+ });
74
+
75
+ if (!response.ok) {
76
+ throw new Error('Failed to update trace evaluation');
77
+ }
78
+ }
cua2-front/src/services/jsonExporter.ts CHANGED
@@ -1,4 +1,4 @@
1
- import { AgentTrace, AgentStep, AgentTraceMetadata, FinalStep } from '@/types/agent';
2
 
3
  /**
4
  * Extract final answer from steps
@@ -60,14 +60,13 @@ export const exportTraceToJson = (
60
  traceId: step.traceId,
61
  stepId: step.stepId,
62
  error: step.error,
 
63
  thought: step.thought,
64
  actions: step.actions,
65
  duration: step.duration,
66
  inputTokensUsed: step.inputTokensUsed,
67
  outputTokensUsed: step.outputTokensUsed,
68
  step_evaluation: step.step_evaluation,
69
- // Don't include base64 image to reduce JSON size
70
- hasImage: !!step.image,
71
  })),
72
  exportedAt: new Date().toISOString(),
73
  };
 
1
+ import { AgentStep, AgentTrace, AgentTraceMetadata, FinalStep } from '@/types/agent';
2
 
3
  /**
4
  * Extract final answer from steps
 
60
  traceId: step.traceId,
61
  stepId: step.stepId,
62
  error: step.error,
63
+ image: step.image, // Include full base64 image
64
  thought: step.thought,
65
  actions: step.actions,
66
  duration: step.duration,
67
  inputTokensUsed: step.inputTokensUsed,
68
  outputTokensUsed: step.outputTokensUsed,
69
  step_evaluation: step.step_evaluation,
 
 
70
  })),
71
  exportedAt: new Date().toISOString(),
72
  };
cua2-front/src/stores/agentStore.ts CHANGED
@@ -22,6 +22,8 @@ interface AgentState {
22
  setTrace: (trace: AgentTrace | undefined) => void;
23
  setTraceId: (traceId: string | null) => void;
24
  updateTraceWithStep: (step: AgentStep, metadata: AgentTraceMetadata) => void;
 
 
25
  completeTrace: (metadata: AgentTraceMetadata, finalState?: 'success' | 'stopped' | 'max_steps_reached' | 'error' | 'sandbox_timeout') => void;
26
  setIsAgentProcessing: (processing: boolean) => void;
27
  setIsConnectingToE2B: (connecting: boolean) => void;
@@ -97,6 +99,59 @@ export const useAgentStore = create<AgentState>()(
97
  'updateTraceWithStep'
98
  ),
99
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
  // Complete the trace
101
  completeTrace: (metadata, finalState?: 'success' | 'stopped' | 'max_steps_reached' | 'error' | 'sandbox_timeout') =>
102
  set(
@@ -196,12 +251,16 @@ export const useAgentStore = create<AgentState>()(
196
  numberOfSteps: state.trace.steps?.length || 0,
197
  maxSteps: 200,
198
  completed: false,
 
 
199
  };
200
 
201
  // Ensure maxSteps is not 0
202
- const finalMetadata = {
203
  ...metadata,
204
  maxSteps: metadata.maxSteps > 0 ? metadata.maxSteps : 200,
 
 
205
  };
206
 
207
  const finalStep: FinalStep = {
 
22
  setTrace: (trace: AgentTrace | undefined) => void;
23
  setTraceId: (traceId: string | null) => void;
24
  updateTraceWithStep: (step: AgentStep, metadata: AgentTraceMetadata) => void;
25
+ updateStepEvaluation: (stepId: string, evaluation: 'like' | 'dislike' | 'neutral') => void;
26
+ updateTraceEvaluation: (evaluation: 'success' | 'failed' | 'not_evaluated') => void;
27
  completeTrace: (metadata: AgentTraceMetadata, finalState?: 'success' | 'stopped' | 'max_steps_reached' | 'error' | 'sandbox_timeout') => void;
28
  setIsAgentProcessing: (processing: boolean) => void;
29
  setIsConnectingToE2B: (connecting: boolean) => void;
 
99
  'updateTraceWithStep'
100
  ),
101
 
102
+ // Update step evaluation in the store
103
+ updateStepEvaluation: (stepId, evaluation) =>
104
+ set(
105
+ (state) => {
106
+ if (!state.trace || !state.trace.steps) return state;
107
+
108
+ const updatedSteps = state.trace.steps.map((step) =>
109
+ step.stepId === stepId
110
+ ? { ...step, step_evaluation: evaluation }
111
+ : step
112
+ );
113
+
114
+ return {
115
+ trace: {
116
+ ...state.trace,
117
+ steps: updatedSteps,
118
+ },
119
+ };
120
+ },
121
+ false,
122
+ 'updateStepEvaluation'
123
+ ),
124
+
125
+ // Update trace evaluation in the store
126
+ updateTraceEvaluation: (evaluation) =>
127
+ set(
128
+ (state) => {
129
+ if (!state.trace || !state.trace.traceMetadata) return state;
130
+
131
+ const updatedMetadata = {
132
+ ...state.trace.traceMetadata,
133
+ user_evaluation: evaluation,
134
+ };
135
+
136
+ return {
137
+ trace: {
138
+ ...state.trace,
139
+ traceMetadata: updatedMetadata,
140
+ },
141
+ // Also update finalStep metadata if it exists
142
+ finalStep: state.finalStep ? {
143
+ ...state.finalStep,
144
+ metadata: {
145
+ ...state.finalStep.metadata,
146
+ user_evaluation: evaluation,
147
+ },
148
+ } : state.finalStep,
149
+ };
150
+ },
151
+ false,
152
+ 'updateTraceEvaluation'
153
+ ),
154
+
155
  // Complete the trace
156
  completeTrace: (metadata, finalState?: 'success' | 'stopped' | 'max_steps_reached' | 'error' | 'sandbox_timeout') =>
157
  set(
 
251
  numberOfSteps: state.trace.steps?.length || 0,
252
  maxSteps: 200,
253
  completed: false,
254
+ final_state: null,
255
+ user_evaluation: 'not_evaluated' as const,
256
  };
257
 
258
  // Ensure maxSteps is not 0
259
+ const finalMetadata: AgentTraceMetadata = {
260
  ...metadata,
261
  maxSteps: metadata.maxSteps > 0 ? metadata.maxSteps : 200,
262
+ final_state: metadata.final_state || null,
263
+ user_evaluation: metadata.user_evaluation || 'not_evaluated',
264
  };
265
 
266
  const finalStep: FinalStep = {
cua2-front/src/types/agent.ts CHANGED
@@ -36,6 +36,7 @@ export interface AgentTraceMetadata {
36
  maxSteps: number;
37
  completed: boolean;
38
  final_state: 'success' | 'stopped' | 'max_steps_reached' | 'error' | 'sandbox_timeout' | null;
 
39
  }
40
 
41
  export interface FinalStep {
 
36
  maxSteps: number;
37
  completed: boolean;
38
  final_state: 'success' | 'stopped' | 'max_steps_reached' | 'error' | 'sandbox_timeout' | null;
39
+ user_evaluation?: 'success' | 'failed' | 'not_evaluated';
40
  }
41
 
42
  export interface FinalStep {