DISBench-Leaderboard / leaderboard_data.json
MengjieDeng's picture
[Auto] Update leaderboard scores
5f856bd verified
raw
history blame
1.35 kB
[
{
"method": "ImageSeeker(Gemini-3-Flash-Preview)",
"url": "https://github.com/RUC-NLPIR/DeepImageSearch",
"org": "RUC-NLPIR",
"agent": "ImageSeeker",
"backbone": "Gemini-3-Flash-Preview",
"retriever": "Qwen3-VL-Embedding-8B",
"track": "Standard",
"date": "2026-02-12",
"overall_em": 9.8,
"overall_f1": 34.3,
"intra_em": 12.3,
"intra_f1": 39.5,
"inter_em": 7.7,
"inter_f1": 29.8
},
{
"method": "ImageSeeker(Qwen3-VL-235b-A22b-Instruct)",
"url": "https://github.com/RUC-NLPIR/DeepImageSearch",
"org": "RUC-NLPIR",
"agent": "ImageSeeker",
"backbone": "Qwen3-VL-235b-A22b-Instruct",
"retriever": "Qwen3-VL-Embedding-8B",
"track": "Standard",
"date": "2026-02-12",
"overall_em": 11.5,
"overall_f1": 27.2,
"intra_em": 17.5,
"intra_f1": 32.1,
"inter_em": 6.2,
"inter_f1": 22.9
},
{
"method": "ImageSeeker(Qwen3-VL-235b-A22b-Thinking)",
"url": "https://github.com/RUC-NLPIR/DeepImageSearch",
"org": "RUC-NLPIR",
"agent": "ImageSeeker",
"backbone": "Qwen3-VL-235b-A22b-Thinking",
"retriever": "Qwen3-VL-Embedding-8B",
"track": "Standard",
"date": "2026-02-12",
"overall_em": 8.2,
"overall_f1": 20.4,
"intra_em": 12.3,
"intra_f1": 23.7,
"inter_em": 4.6,
"inter_f1": 17.6
}
]