lcxrocks commited on
Commit
6f919ea
·
verified ·
1 Parent(s): 139d537

Correct score format.

Browse files
scores/ALICE-2B.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "metadata": {
3
- "model_name": "ALICE-2B",
4
  "model_backbone": "Qwen3VL-2B",
5
  "model_size": "2B parameters",
6
  "embedding_dimension": null,
@@ -13,182 +13,182 @@
13
  "metrics": {
14
  "image": {
15
  "ImageNet-1K": {
16
- "acc": 0.834,
17
  "num_correct": 834,
18
  "num_pred": 1000
19
  },
20
  "N24News": {
21
- "acc": 0.796,
22
  "num_correct": 796,
23
  "num_pred": 1000
24
  },
25
  "HatefulMemes": {
26
- "acc": 0.7,
27
  "num_correct": 700,
28
  "num_pred": 1000
29
  },
30
  "VOC2007": {
31
- "acc": 0.915,
32
  "num_correct": 915,
33
  "num_pred": 1000
34
  },
35
  "SUN397": {
36
- "acc": 0.806,
37
  "num_correct": 806,
38
  "num_pred": 1000
39
  },
40
  "Place365": {
41
- "acc": 0.475,
42
  "num_correct": 475,
43
  "num_pred": 1000
44
  },
45
  "ImageNet-A": {
46
- "acc": 0.528,
47
  "num_correct": 528,
48
  "num_pred": 1000
49
  },
50
  "ImageNet-R": {
51
- "acc": 0.891,
52
  "num_correct": 891,
53
  "num_pred": 1000
54
  },
55
  "ObjectNet": {
56
- "acc": 0.741,
57
  "num_correct": 741,
58
  "num_pred": 1000
59
  },
60
  "Country211": {
61
- "acc": 0.184,
62
  "num_correct": 184,
63
  "num_pred": 1000
64
  },
65
  "OK-VQA": {
66
- "acc": 0.674,
67
  "num_correct": 674,
68
  "num_pred": 1000
69
  },
70
  "A-OKVQA": {
71
- "acc": 0.577,
72
  "num_correct": 577,
73
  "num_pred": 1000
74
  },
75
  "DocVQA": {
76
- "acc": 0.919,
77
  "num_correct": 919,
78
  "num_pred": 1000
79
  },
80
  "InfographicsVQA": {
81
- "acc": 0.673,
82
  "num_correct": 673,
83
  "num_pred": 1000
84
  },
85
  "ChartQA": {
86
- "acc": 0.631,
87
  "num_correct": 631,
88
  "num_pred": 1000
89
  },
90
  "Visual7W": {
91
- "acc": 0.548,
92
  "num_correct": 548,
93
  "num_pred": 1000
94
  },
95
  "ScienceQA": {
96
- "acc": 0.458,
97
  "num_correct": 458,
98
  "num_pred": 1000
99
  },
100
  "VizWiz": {
101
- "acc": 0.481,
102
  "num_correct": 481,
103
  "num_pred": 1000
104
  },
105
  "GQA": {
106
- "acc": 0.703,
107
  "num_correct": 703,
108
  "num_pred": 1000
109
  },
110
  "TextVQA": {
111
- "acc": 0.801,
112
  "num_correct": 801,
113
  "num_pred": 1000
114
  },
115
  "VisDial": {
116
- "acc": 0.845,
117
  "num_correct": 845,
118
  "num_pred": 1000
119
  },
120
  "CIRR": {
121
- "acc": 0.585,
122
  "num_correct": 585,
123
  "num_pred": 1000
124
  },
125
  "VisualNews_t2i": {
126
- "acc": 0.74,
127
  "num_correct": 740,
128
  "num_pred": 1000
129
  },
130
  "VisualNews_i2t": {
131
- "acc": 0.768,
132
  "num_correct": 768,
133
  "num_pred": 1000
134
  },
135
  "MSCOCO_t2i": {
136
- "acc": 0.79,
137
  "num_correct": 790,
138
  "num_pred": 1000
139
  },
140
  "MSCOCO_i2t": {
141
- "acc": 0.763,
142
  "num_correct": 763,
143
  "num_pred": 1000
144
  },
145
  "NIGHTS": {
146
- "acc": 0.664,
147
  "num_correct": 664,
148
  "num_pred": 1000
149
  },
150
  "WebQA": {
151
- "acc": 0.882,
152
  "num_correct": 882,
153
  "num_pred": 1000
154
  },
155
  "FashionIQ": {
156
- "acc": 0.272,
157
  "num_correct": 272,
158
  "num_pred": 1000
159
  },
160
  "Wiki-SS-NQ": {
161
- "acc": 0.633,
162
  "num_correct": 633,
163
  "num_pred": 1000
164
  },
165
  "OVEN": {
166
- "acc": 0.621,
167
  "num_correct": 621,
168
  "num_pred": 1000
169
  },
170
  "EDIS": {
171
- "acc": 0.893,
172
  "num_correct": 893,
173
  "num_pred": 1000
174
  },
175
  "MSCOCO": {
176
- "acc": 0.921,
177
  "num_correct": 921,
178
  "num_pred": 1000
179
  },
180
  "RefCOCO": {
181
- "acc": 0.939,
182
  "num_correct": 939,
183
  "num_pred": 1000
184
  },
185
  "RefCOCO-Matching": {
186
- "acc": 0.901,
187
  "num_correct": 901,
188
  "num_pred": 1000
189
  },
190
  "Visual7W-Pointing": {
191
- "acc": 0.861,
192
  "num_correct": 861,
193
  "num_pred": 1000
194
  }
 
1
  {
2
  "metadata": {
3
+ "model_name": "ALICE-2B (Qwen3VL-2B)",
4
  "model_backbone": "Qwen3VL-2B",
5
  "model_size": "2B parameters",
6
  "embedding_dimension": null,
 
13
  "metrics": {
14
  "image": {
15
  "ImageNet-1K": {
16
+ "hit@1": 0.834,
17
  "num_correct": 834,
18
  "num_pred": 1000
19
  },
20
  "N24News": {
21
+ "hit@1": 0.796,
22
  "num_correct": 796,
23
  "num_pred": 1000
24
  },
25
  "HatefulMemes": {
26
+ "hit@1": 0.7,
27
  "num_correct": 700,
28
  "num_pred": 1000
29
  },
30
  "VOC2007": {
31
+ "hit@1": 0.915,
32
  "num_correct": 915,
33
  "num_pred": 1000
34
  },
35
  "SUN397": {
36
+ "hit@1": 0.806,
37
  "num_correct": 806,
38
  "num_pred": 1000
39
  },
40
  "Place365": {
41
+ "hit@1": 0.475,
42
  "num_correct": 475,
43
  "num_pred": 1000
44
  },
45
  "ImageNet-A": {
46
+ "hit@1": 0.528,
47
  "num_correct": 528,
48
  "num_pred": 1000
49
  },
50
  "ImageNet-R": {
51
+ "hit@1": 0.891,
52
  "num_correct": 891,
53
  "num_pred": 1000
54
  },
55
  "ObjectNet": {
56
+ "hit@1": 0.741,
57
  "num_correct": 741,
58
  "num_pred": 1000
59
  },
60
  "Country211": {
61
+ "hit@1": 0.184,
62
  "num_correct": 184,
63
  "num_pred": 1000
64
  },
65
  "OK-VQA": {
66
+ "hit@1": 0.674,
67
  "num_correct": 674,
68
  "num_pred": 1000
69
  },
70
  "A-OKVQA": {
71
+ "hit@1": 0.577,
72
  "num_correct": 577,
73
  "num_pred": 1000
74
  },
75
  "DocVQA": {
76
+ "hit@1": 0.919,
77
  "num_correct": 919,
78
  "num_pred": 1000
79
  },
80
  "InfographicsVQA": {
81
+ "hit@1": 0.673,
82
  "num_correct": 673,
83
  "num_pred": 1000
84
  },
85
  "ChartQA": {
86
+ "hit@1": 0.631,
87
  "num_correct": 631,
88
  "num_pred": 1000
89
  },
90
  "Visual7W": {
91
+ "hit@1": 0.548,
92
  "num_correct": 548,
93
  "num_pred": 1000
94
  },
95
  "ScienceQA": {
96
+ "hit@1": 0.458,
97
  "num_correct": 458,
98
  "num_pred": 1000
99
  },
100
  "VizWiz": {
101
+ "hit@1": 0.481,
102
  "num_correct": 481,
103
  "num_pred": 1000
104
  },
105
  "GQA": {
106
+ "hit@1": 0.703,
107
  "num_correct": 703,
108
  "num_pred": 1000
109
  },
110
  "TextVQA": {
111
+ "hit@1": 0.801,
112
  "num_correct": 801,
113
  "num_pred": 1000
114
  },
115
  "VisDial": {
116
+ "hit@1": 0.845,
117
  "num_correct": 845,
118
  "num_pred": 1000
119
  },
120
  "CIRR": {
121
+ "hit@1": 0.585,
122
  "num_correct": 585,
123
  "num_pred": 1000
124
  },
125
  "VisualNews_t2i": {
126
+ "hit@1": 0.74,
127
  "num_correct": 740,
128
  "num_pred": 1000
129
  },
130
  "VisualNews_i2t": {
131
+ "hit@1": 0.768,
132
  "num_correct": 768,
133
  "num_pred": 1000
134
  },
135
  "MSCOCO_t2i": {
136
+ "hit@1": 0.79,
137
  "num_correct": 790,
138
  "num_pred": 1000
139
  },
140
  "MSCOCO_i2t": {
141
+ "hit@1": 0.763,
142
  "num_correct": 763,
143
  "num_pred": 1000
144
  },
145
  "NIGHTS": {
146
+ "hit@1": 0.664,
147
  "num_correct": 664,
148
  "num_pred": 1000
149
  },
150
  "WebQA": {
151
+ "hit@1": 0.882,
152
  "num_correct": 882,
153
  "num_pred": 1000
154
  },
155
  "FashionIQ": {
156
+ "hit@1": 0.272,
157
  "num_correct": 272,
158
  "num_pred": 1000
159
  },
160
  "Wiki-SS-NQ": {
161
+ "hit@1": 0.633,
162
  "num_correct": 633,
163
  "num_pred": 1000
164
  },
165
  "OVEN": {
166
+ "hit@1": 0.621,
167
  "num_correct": 621,
168
  "num_pred": 1000
169
  },
170
  "EDIS": {
171
+ "hit@1": 0.893,
172
  "num_correct": 893,
173
  "num_pred": 1000
174
  },
175
  "MSCOCO": {
176
+ "hit@1": 0.921,
177
  "num_correct": 921,
178
  "num_pred": 1000
179
  },
180
  "RefCOCO": {
181
+ "hit@1": 0.939,
182
  "num_correct": 939,
183
  "num_pred": 1000
184
  },
185
  "RefCOCO-Matching": {
186
+ "hit@1": 0.901,
187
  "num_correct": 901,
188
  "num_pred": 1000
189
  },
190
  "Visual7W-Pointing": {
191
+ "hit@1": 0.861,
192
  "num_correct": 861,
193
  "num_pred": 1000
194
  }
scores/ALICE-3B.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "metadata": {
3
- "model_name": "ALICE-3B",
4
  "model_backbone": "Qwen2.5VL-3B",
5
  "model_size": "3B parameters",
6
  "embedding_dimension": null,
@@ -13,182 +13,182 @@
13
  "metrics": {
14
  "image": {
15
  "ImageNet-1K": {
16
- "acc": 0.828,
17
  "num_correct": 828,
18
  "num_pred": 1000
19
  },
20
  "N24News": {
21
- "acc": 0.812,
22
  "num_correct": 812,
23
  "num_pred": 1000
24
  },
25
  "HatefulMemes": {
26
- "acc": 0.759,
27
  "num_correct": 759,
28
  "num_pred": 1000
29
  },
30
  "VOC2007": {
31
- "acc": 0.9,
32
  "num_correct": 900,
33
  "num_pred": 1000
34
  },
35
  "SUN397": {
36
- "acc": 0.798,
37
  "num_correct": 798,
38
  "num_pred": 1000
39
  },
40
  "Place365": {
41
- "acc": 0.463,
42
  "num_correct": 463,
43
  "num_pred": 1000
44
  },
45
  "ImageNet-A": {
46
- "acc": 0.417,
47
  "num_correct": 417,
48
  "num_pred": 1000
49
  },
50
  "ImageNet-R": {
51
- "acc": 0.822,
52
  "num_correct": 822,
53
  "num_pred": 1000
54
  },
55
  "ObjectNet": {
56
- "acc": 0.626,
57
  "num_correct": 626,
58
  "num_pred": 1000
59
  },
60
  "Country211": {
61
- "acc": 0.257,
62
  "num_correct": 257,
63
  "num_pred": 1000
64
  },
65
  "OK-VQA": {
66
- "acc": 0.699,
67
  "num_correct": 699,
68
  "num_pred": 1000
69
  },
70
  "A-OKVQA": {
71
- "acc": 0.61,
72
  "num_correct": 610,
73
  "num_pred": 1000
74
  },
75
  "DocVQA": {
76
- "acc": 0.939,
77
  "num_correct": 939,
78
  "num_pred": 1000
79
  },
80
  "InfographicsVQA": {
81
- "acc": 0.757,
82
  "num_correct": 757,
83
  "num_pred": 1000
84
  },
85
  "ChartQA": {
86
- "acc": 0.698,
87
  "num_correct": 698,
88
  "num_pred": 1000
89
  },
90
  "Visual7W": {
91
- "acc": 0.582,
92
  "num_correct": 582,
93
  "num_pred": 1000
94
  },
95
  "ScienceQA": {
96
- "acc": 0.507,
97
  "num_correct": 507,
98
  "num_pred": 1000
99
  },
100
  "VizWiz": {
101
- "acc": 0.502,
102
  "num_correct": 502,
103
  "num_pred": 1000
104
  },
105
  "GQA": {
106
- "acc": 0.758,
107
  "num_correct": 758,
108
  "num_pred": 1000
109
  },
110
  "TextVQA": {
111
- "acc": 0.819,
112
  "num_correct": 819,
113
  "num_pred": 1000
114
  },
115
  "VisDial": {
116
- "acc": 0.845,
117
  "num_correct": 845,
118
  "num_pred": 1000
119
  },
120
  "CIRR": {
121
- "acc": 0.58,
122
  "num_correct": 580,
123
  "num_pred": 1000
124
  },
125
  "VisualNews_t2i": {
126
- "acc": 0.767,
127
  "num_correct": 767,
128
  "num_pred": 1000
129
  },
130
  "VisualNews_i2t": {
131
- "acc": 0.816,
132
  "num_correct": 816,
133
  "num_pred": 1000
134
  },
135
  "MSCOCO_t2i": {
136
- "acc": 0.791,
137
  "num_correct": 791,
138
  "num_pred": 1000
139
  },
140
  "MSCOCO_i2t": {
141
- "acc": 0.756,
142
  "num_correct": 756,
143
  "num_pred": 1000
144
  },
145
  "NIGHTS": {
146
- "acc": 0.678,
147
  "num_correct": 678,
148
  "num_pred": 1000
149
  },
150
  "WebQA": {
151
- "acc": 0.905,
152
  "num_correct": 905,
153
  "num_pred": 1000
154
  },
155
  "FashionIQ": {
156
- "acc": 0.249,
157
  "num_correct": 249,
158
  "num_pred": 1000
159
  },
160
  "Wiki-SS-NQ": {
161
- "acc": 0.673,
162
  "num_correct": 673,
163
  "num_pred": 1000
164
  },
165
  "OVEN": {
166
- "acc": 0.704,
167
  "num_correct": 704,
168
  "num_pred": 1000
169
  },
170
  "EDIS": {
171
- "acc": 0.927,
172
  "num_correct": 927,
173
  "num_pred": 1000
174
  },
175
  "MSCOCO": {
176
- "acc": 0.878,
177
  "num_correct": 878,
178
  "num_pred": 1000
179
  },
180
  "RefCOCO": {
181
- "acc": 0.942,
182
  "num_correct": 942,
183
  "num_pred": 1000
184
  },
185
  "RefCOCO-Matching": {
186
- "acc": 0.936,
187
  "num_correct": 936,
188
  "num_pred": 1000
189
  },
190
  "Visual7W-Pointing": {
191
- "acc": 0.866,
192
  "num_correct": 866,
193
  "num_pred": 1000
194
  }
 
1
  {
2
  "metadata": {
3
+ "model_name": "ALICE-3B (Qwen2.5VL-3B)",
4
  "model_backbone": "Qwen2.5VL-3B",
5
  "model_size": "3B parameters",
6
  "embedding_dimension": null,
 
13
  "metrics": {
14
  "image": {
15
  "ImageNet-1K": {
16
+ "hit@1": 0.828,
17
  "num_correct": 828,
18
  "num_pred": 1000
19
  },
20
  "N24News": {
21
+ "hit@1": 0.812,
22
  "num_correct": 812,
23
  "num_pred": 1000
24
  },
25
  "HatefulMemes": {
26
+ "hit@1": 0.759,
27
  "num_correct": 759,
28
  "num_pred": 1000
29
  },
30
  "VOC2007": {
31
+ "hit@1": 0.9,
32
  "num_correct": 900,
33
  "num_pred": 1000
34
  },
35
  "SUN397": {
36
+ "hit@1": 0.798,
37
  "num_correct": 798,
38
  "num_pred": 1000
39
  },
40
  "Place365": {
41
+ "hit@1": 0.463,
42
  "num_correct": 463,
43
  "num_pred": 1000
44
  },
45
  "ImageNet-A": {
46
+ "hit@1": 0.417,
47
  "num_correct": 417,
48
  "num_pred": 1000
49
  },
50
  "ImageNet-R": {
51
+ "hit@1": 0.822,
52
  "num_correct": 822,
53
  "num_pred": 1000
54
  },
55
  "ObjectNet": {
56
+ "hit@1": 0.626,
57
  "num_correct": 626,
58
  "num_pred": 1000
59
  },
60
  "Country211": {
61
+ "hit@1": 0.257,
62
  "num_correct": 257,
63
  "num_pred": 1000
64
  },
65
  "OK-VQA": {
66
+ "hit@1": 0.699,
67
  "num_correct": 699,
68
  "num_pred": 1000
69
  },
70
  "A-OKVQA": {
71
+ "hit@1": 0.61,
72
  "num_correct": 610,
73
  "num_pred": 1000
74
  },
75
  "DocVQA": {
76
+ "hit@1": 0.939,
77
  "num_correct": 939,
78
  "num_pred": 1000
79
  },
80
  "InfographicsVQA": {
81
+ "hit@1": 0.757,
82
  "num_correct": 757,
83
  "num_pred": 1000
84
  },
85
  "ChartQA": {
86
+ "hit@1": 0.698,
87
  "num_correct": 698,
88
  "num_pred": 1000
89
  },
90
  "Visual7W": {
91
+ "hit@1": 0.582,
92
  "num_correct": 582,
93
  "num_pred": 1000
94
  },
95
  "ScienceQA": {
96
+ "hit@1": 0.507,
97
  "num_correct": 507,
98
  "num_pred": 1000
99
  },
100
  "VizWiz": {
101
+ "hit@1": 0.502,
102
  "num_correct": 502,
103
  "num_pred": 1000
104
  },
105
  "GQA": {
106
+ "hit@1": 0.758,
107
  "num_correct": 758,
108
  "num_pred": 1000
109
  },
110
  "TextVQA": {
111
+ "hit@1": 0.819,
112
  "num_correct": 819,
113
  "num_pred": 1000
114
  },
115
  "VisDial": {
116
+ "hit@1": 0.845,
117
  "num_correct": 845,
118
  "num_pred": 1000
119
  },
120
  "CIRR": {
121
+ "hit@1": 0.58,
122
  "num_correct": 580,
123
  "num_pred": 1000
124
  },
125
  "VisualNews_t2i": {
126
+ "hit@1": 0.767,
127
  "num_correct": 767,
128
  "num_pred": 1000
129
  },
130
  "VisualNews_i2t": {
131
+ "hit@1": 0.816,
132
  "num_correct": 816,
133
  "num_pred": 1000
134
  },
135
  "MSCOCO_t2i": {
136
+ "hit@1": 0.791,
137
  "num_correct": 791,
138
  "num_pred": 1000
139
  },
140
  "MSCOCO_i2t": {
141
+ "hit@1": 0.756,
142
  "num_correct": 756,
143
  "num_pred": 1000
144
  },
145
  "NIGHTS": {
146
+ "hit@1": 0.678,
147
  "num_correct": 678,
148
  "num_pred": 1000
149
  },
150
  "WebQA": {
151
+ "hit@1": 0.905,
152
  "num_correct": 905,
153
  "num_pred": 1000
154
  },
155
  "FashionIQ": {
156
+ "hit@1": 0.249,
157
  "num_correct": 249,
158
  "num_pred": 1000
159
  },
160
  "Wiki-SS-NQ": {
161
+ "hit@1": 0.673,
162
  "num_correct": 673,
163
  "num_pred": 1000
164
  },
165
  "OVEN": {
166
+ "hit@1": 0.704,
167
  "num_correct": 704,
168
  "num_pred": 1000
169
  },
170
  "EDIS": {
171
+ "hit@1": 0.927,
172
  "num_correct": 927,
173
  "num_pred": 1000
174
  },
175
  "MSCOCO": {
176
+ "hit@1": 0.878,
177
  "num_correct": 878,
178
  "num_pred": 1000
179
  },
180
  "RefCOCO": {
181
+ "hit@1": 0.942,
182
  "num_correct": 942,
183
  "num_pred": 1000
184
  },
185
  "RefCOCO-Matching": {
186
+ "hit@1": 0.936,
187
  "num_correct": 936,
188
  "num_pred": 1000
189
  },
190
  "Visual7W-Pointing": {
191
+ "hit@1": 0.866,
192
  "num_correct": 866,
193
  "num_pred": 1000
194
  }
scores/ALICE-4B.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "metadata": {
3
- "model_name": "ALICE-4B",
4
  "model_backbone": "Qwen3VL-4B",
5
  "model_size": "4B parameters",
6
  "embedding_dimension": null,
@@ -13,182 +13,182 @@
13
  "metrics": {
14
  "image": {
15
  "ImageNet-1K": {
16
- "acc": 0.848,
17
  "num_correct": 848,
18
  "num_pred": 1000
19
  },
20
  "N24News": {
21
- "acc": 0.804,
22
  "num_correct": 804,
23
  "num_pred": 1000
24
  },
25
  "HatefulMemes": {
26
- "acc": 0.742,
27
  "num_correct": 742,
28
  "num_pred": 1000
29
  },
30
  "VOC2007": {
31
- "acc": 0.923,
32
  "num_correct": 923,
33
  "num_pred": 1000
34
  },
35
  "SUN397": {
36
- "acc": 0.828,
37
  "num_correct": 828,
38
  "num_pred": 1000
39
  },
40
  "Place365": {
41
- "acc": 0.46,
42
  "num_correct": 460,
43
  "num_pred": 1000
44
  },
45
  "ImageNet-A": {
46
- "acc": 0.515,
47
  "num_correct": 515,
48
  "num_pred": 1000
49
  },
50
  "ImageNet-R": {
51
- "acc": 0.903,
52
  "num_correct": 903,
53
  "num_pred": 1000
54
  },
55
  "ObjectNet": {
56
- "acc": 0.744,
57
  "num_correct": 744,
58
  "num_pred": 1000
59
  },
60
  "Country211": {
61
- "acc": 0.178,
62
  "num_correct": 178,
63
  "num_pred": 1000
64
  },
65
  "OK-VQA": {
66
- "acc": 0.708,
67
  "num_correct": 708,
68
  "num_pred": 1000
69
  },
70
  "A-OKVQA": {
71
- "acc": 0.607,
72
  "num_correct": 607,
73
  "num_pred": 1000
74
  },
75
  "DocVQA": {
76
- "acc": 0.943,
77
  "num_correct": 943,
78
  "num_pred": 1000
79
  },
80
  "InfographicsVQA": {
81
- "acc": 0.72,
82
  "num_correct": 720,
83
  "num_pred": 1000
84
  },
85
  "ChartQA": {
86
- "acc": 0.682,
87
  "num_correct": 682,
88
  "num_pred": 1000
89
  },
90
  "Visual7W": {
91
- "acc": 0.582,
92
  "num_correct": 582,
93
  "num_pred": 1000
94
  },
95
  "ScienceQA": {
96
- "acc": 0.545,
97
  "num_correct": 545,
98
  "num_pred": 1000
99
  },
100
  "VizWiz": {
101
- "acc": 0.489,
102
  "num_correct": 489,
103
  "num_pred": 1000
104
  },
105
  "GQA": {
106
- "acc": 0.756,
107
  "num_correct": 756,
108
  "num_pred": 1000
109
  },
110
  "TextVQA": {
111
- "acc": 0.828,
112
  "num_correct": 828,
113
  "num_pred": 1000
114
  },
115
  "VisDial": {
116
- "acc": 0.886,
117
  "num_correct": 886,
118
  "num_pred": 1000
119
  },
120
  "CIRR": {
121
- "acc": 0.626,
122
  "num_correct": 626,
123
  "num_pred": 1000
124
  },
125
  "VisualNews_t2i": {
126
- "acc": 0.754,
127
  "num_correct": 754,
128
  "num_pred": 1000
129
  },
130
  "VisualNews_i2t": {
131
- "acc": 0.816,
132
  "num_correct": 816,
133
  "num_pred": 1000
134
  },
135
  "MSCOCO_t2i": {
136
- "acc": 0.828,
137
  "num_correct": 828,
138
  "num_pred": 1000
139
  },
140
  "MSCOCO_i2t": {
141
- "acc": 0.784,
142
  "num_correct": 784,
143
  "num_pred": 1000
144
  },
145
  "NIGHTS": {
146
- "acc": 0.681,
147
  "num_correct": 681,
148
  "num_pred": 1000
149
  },
150
  "WebQA": {
151
- "acc": 0.905,
152
  "num_correct": 905,
153
  "num_pred": 1000
154
  },
155
  "FashionIQ": {
156
- "acc": 0.313,
157
  "num_correct": 313,
158
  "num_pred": 1000
159
  },
160
  "Wiki-SS-NQ": {
161
- "acc": 0.65,
162
  "num_correct": 650,
163
  "num_pred": 1000
164
  },
165
  "OVEN": {
166
- "acc": 0.65,
167
  "num_correct": 650,
168
  "num_pred": 1000
169
  },
170
  "EDIS": {
171
- "acc": 0.892,
172
  "num_correct": 892,
173
  "num_pred": 1000
174
  },
175
  "MSCOCO": {
176
- "acc": 0.912,
177
  "num_correct": 912,
178
  "num_pred": 1000
179
  },
180
  "RefCOCO": {
181
- "acc": 0.951,
182
  "num_correct": 951,
183
  "num_pred": 1000
184
  },
185
  "RefCOCO-Matching": {
186
- "acc": 0.933,
187
  "num_correct": 933,
188
  "num_pred": 1000
189
  },
190
  "Visual7W-Pointing": {
191
- "acc": 0.834,
192
  "num_correct": 834,
193
  "num_pred": 1000
194
  }
 
1
  {
2
  "metadata": {
3
+ "model_name": "ALICE-4B (Qwen3VL-4B)",
4
  "model_backbone": "Qwen3VL-4B",
5
  "model_size": "4B parameters",
6
  "embedding_dimension": null,
 
13
  "metrics": {
14
  "image": {
15
  "ImageNet-1K": {
16
+ "hit@1": 0.848,
17
  "num_correct": 848,
18
  "num_pred": 1000
19
  },
20
  "N24News": {
21
+ "hit@1": 0.804,
22
  "num_correct": 804,
23
  "num_pred": 1000
24
  },
25
  "HatefulMemes": {
26
+ "hit@1": 0.742,
27
  "num_correct": 742,
28
  "num_pred": 1000
29
  },
30
  "VOC2007": {
31
+ "hit@1": 0.923,
32
  "num_correct": 923,
33
  "num_pred": 1000
34
  },
35
  "SUN397": {
36
+ "hit@1": 0.828,
37
  "num_correct": 828,
38
  "num_pred": 1000
39
  },
40
  "Place365": {
41
+ "hit@1": 0.46,
42
  "num_correct": 460,
43
  "num_pred": 1000
44
  },
45
  "ImageNet-A": {
46
+ "hit@1": 0.515,
47
  "num_correct": 515,
48
  "num_pred": 1000
49
  },
50
  "ImageNet-R": {
51
+ "hit@1": 0.903,
52
  "num_correct": 903,
53
  "num_pred": 1000
54
  },
55
  "ObjectNet": {
56
+ "hit@1": 0.744,
57
  "num_correct": 744,
58
  "num_pred": 1000
59
  },
60
  "Country211": {
61
+ "hit@1": 0.178,
62
  "num_correct": 178,
63
  "num_pred": 1000
64
  },
65
  "OK-VQA": {
66
+ "hit@1": 0.708,
67
  "num_correct": 708,
68
  "num_pred": 1000
69
  },
70
  "A-OKVQA": {
71
+ "hit@1": 0.607,
72
  "num_correct": 607,
73
  "num_pred": 1000
74
  },
75
  "DocVQA": {
76
+ "hit@1": 0.943,
77
  "num_correct": 943,
78
  "num_pred": 1000
79
  },
80
  "InfographicsVQA": {
81
+ "hit@1": 0.72,
82
  "num_correct": 720,
83
  "num_pred": 1000
84
  },
85
  "ChartQA": {
86
+ "hit@1": 0.682,
87
  "num_correct": 682,
88
  "num_pred": 1000
89
  },
90
  "Visual7W": {
91
+ "hit@1": 0.582,
92
  "num_correct": 582,
93
  "num_pred": 1000
94
  },
95
  "ScienceQA": {
96
+ "hit@1": 0.545,
97
  "num_correct": 545,
98
  "num_pred": 1000
99
  },
100
  "VizWiz": {
101
+ "hit@1": 0.489,
102
  "num_correct": 489,
103
  "num_pred": 1000
104
  },
105
  "GQA": {
106
+ "hit@1": 0.756,
107
  "num_correct": 756,
108
  "num_pred": 1000
109
  },
110
  "TextVQA": {
111
+ "hit@1": 0.828,
112
  "num_correct": 828,
113
  "num_pred": 1000
114
  },
115
  "VisDial": {
116
+ "hit@1": 0.886,
117
  "num_correct": 886,
118
  "num_pred": 1000
119
  },
120
  "CIRR": {
121
+ "hit@1": 0.626,
122
  "num_correct": 626,
123
  "num_pred": 1000
124
  },
125
  "VisualNews_t2i": {
126
+ "hit@1": 0.754,
127
  "num_correct": 754,
128
  "num_pred": 1000
129
  },
130
  "VisualNews_i2t": {
131
+ "hit@1": 0.816,
132
  "num_correct": 816,
133
  "num_pred": 1000
134
  },
135
  "MSCOCO_t2i": {
136
+ "hit@1": 0.828,
137
  "num_correct": 828,
138
  "num_pred": 1000
139
  },
140
  "MSCOCO_i2t": {
141
+ "hit@1": 0.784,
142
  "num_correct": 784,
143
  "num_pred": 1000
144
  },
145
  "NIGHTS": {
146
+ "hit@1": 0.681,
147
  "num_correct": 681,
148
  "num_pred": 1000
149
  },
150
  "WebQA": {
151
+ "hit@1": 0.905,
152
  "num_correct": 905,
153
  "num_pred": 1000
154
  },
155
  "FashionIQ": {
156
+ "hit@1": 0.313,
157
  "num_correct": 313,
158
  "num_pred": 1000
159
  },
160
  "Wiki-SS-NQ": {
161
+ "hit@1": 0.65,
162
  "num_correct": 650,
163
  "num_pred": 1000
164
  },
165
  "OVEN": {
166
+ "hit@1": 0.65,
167
  "num_correct": 650,
168
  "num_pred": 1000
169
  },
170
  "EDIS": {
171
+ "hit@1": 0.892,
172
  "num_correct": 892,
173
  "num_pred": 1000
174
  },
175
  "MSCOCO": {
176
+ "hit@1": 0.912,
177
  "num_correct": 912,
178
  "num_pred": 1000
179
  },
180
  "RefCOCO": {
181
+ "hit@1": 0.951,
182
  "num_correct": 951,
183
  "num_pred": 1000
184
  },
185
  "RefCOCO-Matching": {
186
+ "hit@1": 0.933,
187
  "num_correct": 933,
188
  "num_pred": 1000
189
  },
190
  "Visual7W-Pointing": {
191
+ "hit@1": 0.834,
192
  "num_correct": 834,
193
  "num_pred": 1000
194
  }
scores/ALICE-7B.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "metadata": {
3
- "model_name": "ALICE-7B",
4
  "model_backbone": "Qwen2.5VL-7B",
5
  "model_size": "7B parameters",
6
  "embedding_dimension": null,
@@ -13,182 +13,182 @@
13
  "metrics": {
14
  "image": {
15
  "ImageNet-1K": {
16
- "acc": 0.834,
17
  "num_correct": 834,
18
  "num_pred": 1000
19
  },
20
  "N24News": {
21
- "acc": 0.823,
22
  "num_correct": 823,
23
  "num_pred": 1000
24
  },
25
  "HatefulMemes": {
26
- "acc": 0.798,
27
  "num_correct": 798,
28
  "num_pred": 1000
29
  },
30
  "VOC2007": {
31
- "acc": 0.898,
32
  "num_correct": 898,
33
  "num_pred": 1000
34
  },
35
  "SUN397": {
36
- "acc": 0.823,
37
  "num_correct": 823,
38
  "num_pred": 1000
39
  },
40
  "Place365": {
41
- "acc": 0.448,
42
  "num_correct": 448,
43
  "num_pred": 1000
44
  },
45
  "ImageNet-A": {
46
- "acc": 0.48,
47
  "num_correct": 480,
48
  "num_pred": 1000
49
  },
50
  "ImageNet-R": {
51
- "acc": 0.806,
52
  "num_correct": 806,
53
  "num_pred": 1000
54
  },
55
  "ObjectNet": {
56
- "acc": 0.655,
57
  "num_correct": 655,
58
  "num_pred": 1000
59
  },
60
  "Country211": {
61
- "acc": 0.284,
62
  "num_correct": 284,
63
  "num_pred": 1000
64
  },
65
  "OK-VQA": {
66
- "acc": 0.741,
67
  "num_correct": 741,
68
  "num_pred": 1000
69
  },
70
  "A-OKVQA": {
71
- "acc": 0.642,
72
  "num_correct": 642,
73
  "num_pred": 1000
74
  },
75
  "DocVQA": {
76
- "acc": 0.945,
77
  "num_correct": 945,
78
  "num_pred": 1000
79
  },
80
  "InfographicsVQA": {
81
- "acc": 0.781,
82
  "num_correct": 781,
83
  "num_pred": 1000
84
  },
85
  "ChartQA": {
86
- "acc": 0.753,
87
  "num_correct": 753,
88
  "num_pred": 1000
89
  },
90
  "Visual7W": {
91
- "acc": 0.594,
92
  "num_correct": 594,
93
  "num_pred": 1000
94
  },
95
  "ScienceQA": {
96
- "acc": 0.54,
97
  "num_correct": 540,
98
  "num_pred": 1000
99
  },
100
  "VizWiz": {
101
- "acc": 0.514,
102
  "num_correct": 514,
103
  "num_pred": 1000
104
  },
105
  "GQA": {
106
- "acc": 0.779,
107
  "num_correct": 779,
108
  "num_pred": 1000
109
  },
110
  "TextVQA": {
111
- "acc": 0.83,
112
  "num_correct": 830,
113
  "num_pred": 1000
114
  },
115
  "VisDial": {
116
- "acc": 0.866,
117
  "num_correct": 866,
118
  "num_pred": 1000
119
  },
120
  "CIRR": {
121
- "acc": 0.617,
122
  "num_correct": 617,
123
  "num_pred": 1000
124
  },
125
  "VisualNews_t2i": {
126
- "acc": 0.798,
127
  "num_correct": 798,
128
  "num_pred": 1000
129
  },
130
  "VisualNews_i2t": {
131
- "acc": 0.822,
132
  "num_correct": 822,
133
  "num_pred": 1000
134
  },
135
  "MSCOCO_t2i": {
136
- "acc": 0.795,
137
  "num_correct": 795,
138
  "num_pred": 1000
139
  },
140
  "MSCOCO_i2t": {
141
- "acc": 0.77,
142
  "num_correct": 770,
143
  "num_pred": 1000
144
  },
145
  "NIGHTS": {
146
- "acc": 0.679,
147
  "num_correct": 679,
148
  "num_pred": 1000
149
  },
150
  "WebQA": {
151
- "acc": 0.906,
152
  "num_correct": 906,
153
  "num_pred": 1000
154
  },
155
  "FashionIQ": {
156
- "acc": 0.275,
157
  "num_correct": 275,
158
  "num_pred": 1000
159
  },
160
  "Wiki-SS-NQ": {
161
- "acc": 0.648,
162
  "num_correct": 648,
163
  "num_pred": 1000
164
  },
165
  "OVEN": {
166
- "acc": 0.739,
167
  "num_correct": 739,
168
  "num_pred": 1000
169
  },
170
  "EDIS": {
171
- "acc": 0.928,
172
  "num_correct": 928,
173
  "num_pred": 1000
174
  },
175
  "MSCOCO": {
176
- "acc": 0.884,
177
  "num_correct": 884,
178
  "num_pred": 1000
179
  },
180
  "RefCOCO": {
181
- "acc": 0.953,
182
  "num_correct": 953,
183
  "num_pred": 1000
184
  },
185
  "RefCOCO-Matching": {
186
- "acc": 0.936,
187
  "num_correct": 936,
188
  "num_pred": 1000
189
  },
190
  "Visual7W-Pointing": {
191
- "acc": 0.844,
192
  "num_correct": 844,
193
  "num_pred": 1000
194
  }
 
1
  {
2
  "metadata": {
3
+ "model_name": "ALICE-7B (Qwen2.5VL-7B)",
4
  "model_backbone": "Qwen2.5VL-7B",
5
  "model_size": "7B parameters",
6
  "embedding_dimension": null,
 
13
  "metrics": {
14
  "image": {
15
  "ImageNet-1K": {
16
+ "hit@1": 0.834,
17
  "num_correct": 834,
18
  "num_pred": 1000
19
  },
20
  "N24News": {
21
+ "hit@1": 0.823,
22
  "num_correct": 823,
23
  "num_pred": 1000
24
  },
25
  "HatefulMemes": {
26
+ "hit@1": 0.798,
27
  "num_correct": 798,
28
  "num_pred": 1000
29
  },
30
  "VOC2007": {
31
+ "hit@1": 0.898,
32
  "num_correct": 898,
33
  "num_pred": 1000
34
  },
35
  "SUN397": {
36
+ "hit@1": 0.823,
37
  "num_correct": 823,
38
  "num_pred": 1000
39
  },
40
  "Place365": {
41
+ "hit@1": 0.448,
42
  "num_correct": 448,
43
  "num_pred": 1000
44
  },
45
  "ImageNet-A": {
46
+ "hit@1": 0.48,
47
  "num_correct": 480,
48
  "num_pred": 1000
49
  },
50
  "ImageNet-R": {
51
+ "hit@1": 0.806,
52
  "num_correct": 806,
53
  "num_pred": 1000
54
  },
55
  "ObjectNet": {
56
+ "hit@1": 0.655,
57
  "num_correct": 655,
58
  "num_pred": 1000
59
  },
60
  "Country211": {
61
+ "hit@1": 0.284,
62
  "num_correct": 284,
63
  "num_pred": 1000
64
  },
65
  "OK-VQA": {
66
+ "hit@1": 0.741,
67
  "num_correct": 741,
68
  "num_pred": 1000
69
  },
70
  "A-OKVQA": {
71
+ "hit@1": 0.642,
72
  "num_correct": 642,
73
  "num_pred": 1000
74
  },
75
  "DocVQA": {
76
+ "hit@1": 0.945,
77
  "num_correct": 945,
78
  "num_pred": 1000
79
  },
80
  "InfographicsVQA": {
81
+ "hit@1": 0.781,
82
  "num_correct": 781,
83
  "num_pred": 1000
84
  },
85
  "ChartQA": {
86
+ "hit@1": 0.753,
87
  "num_correct": 753,
88
  "num_pred": 1000
89
  },
90
  "Visual7W": {
91
+ "hit@1": 0.594,
92
  "num_correct": 594,
93
  "num_pred": 1000
94
  },
95
  "ScienceQA": {
96
+ "hit@1": 0.54,
97
  "num_correct": 540,
98
  "num_pred": 1000
99
  },
100
  "VizWiz": {
101
+ "hit@1": 0.514,
102
  "num_correct": 514,
103
  "num_pred": 1000
104
  },
105
  "GQA": {
106
+ "hit@1": 0.779,
107
  "num_correct": 779,
108
  "num_pred": 1000
109
  },
110
  "TextVQA": {
111
+ "hit@1": 0.83,
112
  "num_correct": 830,
113
  "num_pred": 1000
114
  },
115
  "VisDial": {
116
+ "hit@1": 0.866,
117
  "num_correct": 866,
118
  "num_pred": 1000
119
  },
120
  "CIRR": {
121
+ "hit@1": 0.617,
122
  "num_correct": 617,
123
  "num_pred": 1000
124
  },
125
  "VisualNews_t2i": {
126
+ "hit@1": 0.798,
127
  "num_correct": 798,
128
  "num_pred": 1000
129
  },
130
  "VisualNews_i2t": {
131
+ "hit@1": 0.822,
132
  "num_correct": 822,
133
  "num_pred": 1000
134
  },
135
  "MSCOCO_t2i": {
136
+ "hit@1": 0.795,
137
  "num_correct": 795,
138
  "num_pred": 1000
139
  },
140
  "MSCOCO_i2t": {
141
+ "hit@1": 0.77,
142
  "num_correct": 770,
143
  "num_pred": 1000
144
  },
145
  "NIGHTS": {
146
+ "hit@1": 0.679,
147
  "num_correct": 679,
148
  "num_pred": 1000
149
  },
150
  "WebQA": {
151
+ "hit@1": 0.906,
152
  "num_correct": 906,
153
  "num_pred": 1000
154
  },
155
  "FashionIQ": {
156
+ "hit@1": 0.275,
157
  "num_correct": 275,
158
  "num_pred": 1000
159
  },
160
  "Wiki-SS-NQ": {
161
+ "hit@1": 0.648,
162
  "num_correct": 648,
163
  "num_pred": 1000
164
  },
165
  "OVEN": {
166
+ "hit@1": 0.739,
167
  "num_correct": 739,
168
  "num_pred": 1000
169
  },
170
  "EDIS": {
171
+ "hit@1": 0.928,
172
  "num_correct": 928,
173
  "num_pred": 1000
174
  },
175
  "MSCOCO": {
176
+ "hit@1": 0.884,
177
  "num_correct": 884,
178
  "num_pred": 1000
179
  },
180
  "RefCOCO": {
181
+ "hit@1": 0.953,
182
  "num_correct": 953,
183
  "num_pred": 1000
184
  },
185
  "RefCOCO-Matching": {
186
+ "hit@1": 0.936,
187
  "num_correct": 936,
188
  "num_pred": 1000
189
  },
190
  "Visual7W-Pointing": {
191
+ "hit@1": 0.844,
192
  "num_correct": 844,
193
  "num_pred": 1000
194
  }
scores/ALICE-8B.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "metadata": {
3
- "model_name": "ALICE-8B",
4
  "model_backbone": "Qwen3VL-8B",
5
  "model_size": "8B parameters",
6
  "embedding_dimension": null,
@@ -13,182 +13,182 @@
13
  "metrics": {
14
  "image": {
15
  "ImageNet-1K": {
16
- "acc": 0.831,
17
  "num_correct": 831,
18
  "num_pred": 1000
19
  },
20
  "N24News": {
21
- "acc": 0.822,
22
  "num_correct": 822,
23
  "num_pred": 1000
24
  },
25
  "HatefulMemes": {
26
- "acc": 0.785,
27
  "num_correct": 785,
28
  "num_pred": 1000
29
  },
30
  "VOC2007": {
31
- "acc": 0.93,
32
  "num_correct": 930,
33
  "num_pred": 1000
34
  },
35
  "SUN397": {
36
- "acc": 0.826,
37
  "num_correct": 826,
38
  "num_pred": 1000
39
  },
40
  "Place365": {
41
- "acc": 0.479,
42
  "num_correct": 479,
43
  "num_pred": 1000
44
  },
45
  "ImageNet-A": {
46
- "acc": 0.538,
47
  "num_correct": 538,
48
  "num_pred": 1000
49
  },
50
  "ImageNet-R": {
51
- "acc": 0.877,
52
  "num_correct": 877,
53
  "num_pred": 1000
54
  },
55
  "ObjectNet": {
56
- "acc": 0.743,
57
  "num_correct": 743,
58
  "num_pred": 1000
59
  },
60
  "Country211": {
61
- "acc": 0.208,
62
  "num_correct": 208,
63
  "num_pred": 1000
64
  },
65
  "OK-VQA": {
66
- "acc": 0.735,
67
  "num_correct": 735,
68
  "num_pred": 1000
69
  },
70
  "A-OKVQA": {
71
- "acc": 0.65,
72
  "num_correct": 650,
73
  "num_pred": 1000
74
  },
75
  "DocVQA": {
76
- "acc": 0.942,
77
  "num_correct": 942,
78
  "num_pred": 1000
79
  },
80
  "InfographicsVQA": {
81
- "acc": 0.754,
82
  "num_correct": 754,
83
  "num_pred": 1000
84
  },
85
  "ChartQA": {
86
- "acc": 0.728,
87
  "num_correct": 728,
88
  "num_pred": 1000
89
  },
90
  "Visual7W": {
91
- "acc": 0.593,
92
  "num_correct": 593,
93
  "num_pred": 1000
94
  },
95
  "ScienceQA": {
96
- "acc": 0.551,
97
  "num_correct": 551,
98
  "num_pred": 1000
99
  },
100
  "VizWiz": {
101
- "acc": 0.51,
102
  "num_correct": 510,
103
  "num_pred": 1000
104
  },
105
  "GQA": {
106
- "acc": 0.742,
107
  "num_correct": 742,
108
  "num_pred": 1000
109
  },
110
  "TextVQA": {
111
- "acc": 0.848,
112
  "num_correct": 848,
113
  "num_pred": 1000
114
  },
115
  "VisDial": {
116
- "acc": 0.875,
117
  "num_correct": 875,
118
  "num_pred": 1000
119
  },
120
  "CIRR": {
121
- "acc": 0.662,
122
  "num_correct": 662,
123
  "num_pred": 1000
124
  },
125
  "VisualNews_t2i": {
126
- "acc": 0.797,
127
  "num_correct": 797,
128
  "num_pred": 1000
129
  },
130
  "VisualNews_i2t": {
131
- "acc": 0.831,
132
  "num_correct": 831,
133
  "num_pred": 1000
134
  },
135
  "MSCOCO_t2i": {
136
- "acc": 0.802,
137
  "num_correct": 802,
138
  "num_pred": 1000
139
  },
140
  "MSCOCO_i2t": {
141
- "acc": 0.783,
142
  "num_correct": 783,
143
  "num_pred": 1000
144
  },
145
  "NIGHTS": {
146
- "acc": 0.687,
147
  "num_correct": 687,
148
  "num_pred": 1000
149
  },
150
  "WebQA": {
151
- "acc": 0.91,
152
  "num_correct": 910,
153
  "num_pred": 1000
154
  },
155
  "FashionIQ": {
156
- "acc": 0.302,
157
  "num_correct": 302,
158
  "num_pred": 1000
159
  },
160
  "Wiki-SS-NQ": {
161
- "acc": 0.691,
162
  "num_correct": 691,
163
  "num_pred": 1000
164
  },
165
  "OVEN": {
166
- "acc": 0.657,
167
  "num_correct": 657,
168
  "num_pred": 1000
169
  },
170
  "EDIS": {
171
- "acc": 0.945,
172
  "num_correct": 945,
173
  "num_pred": 1000
174
  },
175
  "MSCOCO": {
176
- "acc": 0.914,
177
  "num_correct": 914,
178
  "num_pred": 1000
179
  },
180
  "RefCOCO": {
181
- "acc": 0.959,
182
  "num_correct": 959,
183
  "num_pred": 1000
184
  },
185
  "RefCOCO-Matching": {
186
- "acc": 0.939,
187
  "num_correct": 939,
188
  "num_pred": 1000
189
  },
190
  "Visual7W-Pointing": {
191
- "acc": 0.883,
192
  "num_correct": 883,
193
  "num_pred": 1000
194
  }
 
1
  {
2
  "metadata": {
3
+ "model_name": "ALICE-8B (Qwen3VL-8B)",
4
  "model_backbone": "Qwen3VL-8B",
5
  "model_size": "8B parameters",
6
  "embedding_dimension": null,
 
13
  "metrics": {
14
  "image": {
15
  "ImageNet-1K": {
16
+ "hit@1": 0.831,
17
  "num_correct": 831,
18
  "num_pred": 1000
19
  },
20
  "N24News": {
21
+ "hit@1": 0.822,
22
  "num_correct": 822,
23
  "num_pred": 1000
24
  },
25
  "HatefulMemes": {
26
+ "hit@1": 0.785,
27
  "num_correct": 785,
28
  "num_pred": 1000
29
  },
30
  "VOC2007": {
31
+ "hit@1": 0.93,
32
  "num_correct": 930,
33
  "num_pred": 1000
34
  },
35
  "SUN397": {
36
+ "hit@1": 0.826,
37
  "num_correct": 826,
38
  "num_pred": 1000
39
  },
40
  "Place365": {
41
+ "hit@1": 0.479,
42
  "num_correct": 479,
43
  "num_pred": 1000
44
  },
45
  "ImageNet-A": {
46
+ "hit@1": 0.538,
47
  "num_correct": 538,
48
  "num_pred": 1000
49
  },
50
  "ImageNet-R": {
51
+ "hit@1": 0.877,
52
  "num_correct": 877,
53
  "num_pred": 1000
54
  },
55
  "ObjectNet": {
56
+ "hit@1": 0.743,
57
  "num_correct": 743,
58
  "num_pred": 1000
59
  },
60
  "Country211": {
61
+ "hit@1": 0.208,
62
  "num_correct": 208,
63
  "num_pred": 1000
64
  },
65
  "OK-VQA": {
66
+ "hit@1": 0.735,
67
  "num_correct": 735,
68
  "num_pred": 1000
69
  },
70
  "A-OKVQA": {
71
+ "hit@1": 0.65,
72
  "num_correct": 650,
73
  "num_pred": 1000
74
  },
75
  "DocVQA": {
76
+ "hit@1": 0.942,
77
  "num_correct": 942,
78
  "num_pred": 1000
79
  },
80
  "InfographicsVQA": {
81
+ "hit@1": 0.754,
82
  "num_correct": 754,
83
  "num_pred": 1000
84
  },
85
  "ChartQA": {
86
+ "hit@1": 0.728,
87
  "num_correct": 728,
88
  "num_pred": 1000
89
  },
90
  "Visual7W": {
91
+ "hit@1": 0.593,
92
  "num_correct": 593,
93
  "num_pred": 1000
94
  },
95
  "ScienceQA": {
96
+ "hit@1": 0.551,
97
  "num_correct": 551,
98
  "num_pred": 1000
99
  },
100
  "VizWiz": {
101
+ "hit@1": 0.51,
102
  "num_correct": 510,
103
  "num_pred": 1000
104
  },
105
  "GQA": {
106
+ "hit@1": 0.742,
107
  "num_correct": 742,
108
  "num_pred": 1000
109
  },
110
  "TextVQA": {
111
+ "hit@1": 0.848,
112
  "num_correct": 848,
113
  "num_pred": 1000
114
  },
115
  "VisDial": {
116
+ "hit@1": 0.875,
117
  "num_correct": 875,
118
  "num_pred": 1000
119
  },
120
  "CIRR": {
121
+ "hit@1": 0.662,
122
  "num_correct": 662,
123
  "num_pred": 1000
124
  },
125
  "VisualNews_t2i": {
126
+ "hit@1": 0.797,
127
  "num_correct": 797,
128
  "num_pred": 1000
129
  },
130
  "VisualNews_i2t": {
131
+ "hit@1": 0.831,
132
  "num_correct": 831,
133
  "num_pred": 1000
134
  },
135
  "MSCOCO_t2i": {
136
+ "hit@1": 0.802,
137
  "num_correct": 802,
138
  "num_pred": 1000
139
  },
140
  "MSCOCO_i2t": {
141
+ "hit@1": 0.783,
142
  "num_correct": 783,
143
  "num_pred": 1000
144
  },
145
  "NIGHTS": {
146
+ "hit@1": 0.687,
147
  "num_correct": 687,
148
  "num_pred": 1000
149
  },
150
  "WebQA": {
151
+ "hit@1": 0.91,
152
  "num_correct": 910,
153
  "num_pred": 1000
154
  },
155
  "FashionIQ": {
156
+ "hit@1": 0.302,
157
  "num_correct": 302,
158
  "num_pred": 1000
159
  },
160
  "Wiki-SS-NQ": {
161
+ "hit@1": 0.691,
162
  "num_correct": 691,
163
  "num_pred": 1000
164
  },
165
  "OVEN": {
166
+ "hit@1": 0.657,
167
  "num_correct": 657,
168
  "num_pred": 1000
169
  },
170
  "EDIS": {
171
+ "hit@1": 0.945,
172
  "num_correct": 945,
173
  "num_pred": 1000
174
  },
175
  "MSCOCO": {
176
+ "hit@1": 0.914,
177
  "num_correct": 914,
178
  "num_pred": 1000
179
  },
180
  "RefCOCO": {
181
+ "hit@1": 0.959,
182
  "num_correct": 959,
183
  "num_pred": 1000
184
  },
185
  "RefCOCO-Matching": {
186
+ "hit@1": 0.939,
187
  "num_correct": 939,
188
  "num_pred": 1000
189
  },
190
  "Visual7W-Pointing": {
191
+ "hit@1": 0.883,
192
  "num_correct": 883,
193
  "num_pred": 1000
194
  }
scores/RGE(Qwen2.5VL-3B).json CHANGED
@@ -8,7 +8,7 @@
8
  "model_release_date": "2025-11-20",
9
  "data_source": "Self-Reported",
10
  "url": "https://huggingface.co/MCG-NJU/RGE",
11
- "report_generated_date": "2026-05-11T15:49:30.024670"
12
  },
13
  "metrics": {
14
  "image": {
 
8
  "model_release_date": "2025-11-20",
9
  "data_source": "Self-Reported",
10
  "url": "https://huggingface.co/MCG-NJU/RGE",
11
+ "report_generated_date": "2025-11-20T15:49:30.024670"
12
  },
13
  "metrics": {
14
  "image": {