Add PBench average evaluation result

#3
by merve HF Staff - opened
Files changed (1) hide show
  1. .eval_results/pbench.yaml +4 -51
.eval_results/pbench.yaml CHANGED
@@ -1,56 +1,9 @@
1
- - dataset:
2
- id: tiiuae/PBench
3
- task_id: level_0
4
- value: 65.1
5
- date: '2026-03-28'
6
- source:
7
- url: https://arxiv.org/abs/2603.27365
8
- name: Falcon Perception paper
9
- - dataset:
10
- id: tiiuae/PBench
11
- task_id: level_1
12
- value: 63.6
13
- date: '2026-03-28'
14
- source:
15
- url: https://arxiv.org/abs/2603.27365
16
- name: Falcon Perception paper
17
- - dataset:
18
- id: tiiuae/PBench
19
- task_id: level_2
20
- value: 38.0
21
- date: '2026-03-28'
22
- source:
23
- url: https://arxiv.org/abs/2603.27365
24
- name: Falcon Perception paper
25
- - dataset:
26
- id: tiiuae/PBench
27
- task_id: level_3
28
- value: 53.5
29
- date: '2026-03-28'
30
- source:
31
- url: https://arxiv.org/abs/2603.27365
32
- name: Falcon Perception paper
33
- - dataset:
34
- id: tiiuae/PBench
35
- task_id: level_4
36
- value: 49.1
37
- date: '2026-03-28'
38
- source:
39
- url: https://arxiv.org/abs/2603.27365
40
- name: Falcon Perception paper
41
- - dataset:
42
- id: tiiuae/PBench
43
- task_id: dense
44
- value: 72.6
45
- date: '2026-03-28'
46
- source:
47
- url: https://arxiv.org/abs/2603.27365
48
- name: Falcon Perception paper
49
  - dataset:
50
  id: tiiuae/PBench
51
  task_id: average
52
  value: 57.0
53
- date: '2026-03-28'
54
  source:
55
- url: https://arxiv.org/abs/2603.27365
56
- name: Falcon Perception paper
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  - dataset:
2
  id: tiiuae/PBench
3
  task_id: average
4
  value: 57.0
5
+ date: '2026-05-11'
6
  source:
7
+ url: https://huggingface.co/tiiuae/Falcon-Perception
8
+ name: Community Evals
9
+ user: merve