MaziyarPanahi commited on
Commit
dbee0a3
·
1 Parent(s): b145c4b

add results

Browse files
Files changed (1) hide show
  1. results.csv +33 -0
results.csv ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model_name,size,grounding_score,quality_score,combined_score
2
+ Qwen/Qwen2.5-7B-Instruct,7,0.800000,0.800000,0.800000
3
+ deepseek-ai/DeepSeek-R1-Distill-Qwen-14B,14,0.817797,0.542373,0.457627
4
+ VIDraft/Gemma-3-R1984-27B,27,0.93617,0.459574,0.434043
5
+ meta-llama/Llama-3.3-70B-Instruct,70,0.842553,0.510638,0.425532
6
+ Qwen/Qwen3-30B-A3B,30,0.812766,0.540426,0.425532,
7
+ Qwen/Qwen3-4B,4,0.770213,0.540426,0.425532
8
+ Qwen/Qwen3-32B,32,0.740426,0.553191,0.417021
9
+ deepseek-ai/DeepSeek-R1-Distill-Llama-8B,8,0.766949,0.516949,0.40678
10
+ Qwen/Qwen3-8B,8,0.748936,0.523404,0.4
11
+ Qwen/Qwen3-14B,14,0.778723,0.502128,0.382979
12
+ google/gemma-3-27b-it,27,0.936,0.391,0.378
13
+ Qwen/Qwen2.5-VL-32B-Instruct,32,0.621277,0.570213,0.357447
14
+ meta-llama/Llama-3.1-70B-Instruct,70,0.855932,0.389831,0.334746
15
+ google/gemma-3-12b-it,12,0.944,0.343,0.313
16
+ google/gemma-3-4b-it,4,0.9,0.33,0.3
17
+ Qwen/Qwen3-1.7B,1.7,0.702128,0.451064,0.297872
18
+ deepseek-ai/DeepSeek-R1-Distill-Qwen-7B,7,0.59322,0.449153,0.275424
19
+ Qwen/Qwen3-0.6B,0.6,0.682203,0.330508,0.266949
20
+ Qwen/Qwen2.5-7B-Instruct,7,0.731915,0.310638,0.255319
21
+ Qwen/Qwen2.5-14B-Instruct-1M,14,0.70339,0.300847,0.254237
22
+ nvidia/Llama-Nemotron-Nano-8B,8,0.576271,0.402542,0.241525
23
+ OpenScholar/Llama-3.1-OpenScholar-8B,8,0.690678,0.283898,0.241525
24
+ Qwen/Qwen2.5-7B-Instruct-1M,7,0.737288,0.271186,0.207627
25
+ nvidia/Llama-Nemotron-Nano-4B-v1.1,4,0.548936,0.340426,0.2
26
+ google/gemma-3-1b-it,1,0.65,0.28,0.19
27
+ mistralai/Ministral-8B-Instruct-2410,8,0.94,0.184,0.175
28
+ meta-llama/Llama-3.1-8B-Instruct,8,0.665254,0.194915,0.169492
29
+ mistralai/Mistral-Small-3.1-24B-Instruct-2503,24,0.953191,0.165957,0.157447
30
+ mistralai/Mistral-Small-24B-Instruct-2501,24,0.95339,0.135593,0.131356
31
+ open-thoughts/OpenThinker-7B,7,0.478814,0.152542,0.110169
32
+ PleIAs/Pleias-RAG-350M,0.35,0.236264,0.021978,0.010989
33
+ PleIAs/Pleias-RAG-1B,1,0.190476,0.037037,0