Add Terminal-Bench evaluation result (27.8%)

#69
by burtenshaw HF Staff - opened
Files changed (1) hide show
  1. .eval_results/terminal_bench.yaml +2 -1
.eval_results/terminal_bench.yaml CHANGED
@@ -1,6 +1,6 @@
1
  - dataset:
2
  id: harborframework/terminal-bench-2.0
3
- task_id: terminalbench_2
4
  value: 27.8
5
  date: '2025-11-01'
6
  source:
@@ -8,3 +8,4 @@
8
  name: Terminal-Bench Leaderboard
9
  user: burtenshaw
10
  notes: "agent: Terminus 2"
 
 
1
  - dataset:
2
  id: harborframework/terminal-bench-2.0
3
+ task_id: terminal_bench
4
  value: 27.8
5
  date: '2025-11-01'
6
  source:
 
8
  name: Terminal-Bench Leaderboard
9
  user: burtenshaw
10
  notes: "agent: Terminus 2"
11
+