DISBench-Leaderboard / leaderboard_data.json
MengjieDeng's picture
[Auto] Update leaderboard scores
7b7ea1e verified
[
{
"method": "ImageSeeker",
"url": "https://github.com/RUC-NLPIR/DeepImageSearch",
"org": "RUC-NLPIR",
"agent": "ImageSeeker",
"backbone": "Qwen3-VL-235b-A22b-Instruct",
"retriever": "Qwen3-VL-Embedding-8B",
"track": "Standard",
"date": "2026-02-24",
"overall_em": 11.5,
"overall_f1": 27.2,
"intra_em": 17.5,
"intra_f1": 32.1,
"inter_em": 6.2,
"inter_f1": 22.9
},
{
"method": "ImageSeeker",
"url": "https://github.com/RUC-NLPIR/DeepImageSearch",
"org": "RUC-NLPIR",
"agent": "ImageSeeker",
"backbone": "Gemini-3-Flash-Preview",
"retriever": "Qwen3-VL-Embedding-8B",
"track": "Standard",
"date": "2026-02-24",
"overall_em": 12.3,
"overall_f1": 36.8,
"intra_em": 15.8,
"intra_f1": 42.4,
"inter_em": 9.2,
"inter_f1": 31.9
},
{
"method": "ImageSeeker",
"url": "https://github.com/RUC-NLPIR/DeepImageSearch",
"org": "RUC-NLPIR",
"agent": "ImageSeeker",
"backbone": "Qwen3-VL-235b-A22b-Thinking",
"retriever": "Qwen3-VL-Embedding-8B",
"track": "Standard",
"date": "2026-02-24",
"overall_em": 8.2,
"overall_f1": 20.4,
"intra_em": 12.3,
"intra_f1": 23.7,
"inter_em": 4.6,
"inter_f1": 17.6
},
{
"method": "ImageSeeker",
"url": "https://github.com/RUC-NLPIR/DeepImageSearch",
"org": "RUC-NLPIR",
"agent": "ImageSeeker",
"backbone": "Qwen3-VL-32b-Instruct",
"retriever": "Qwen3-VL-Embedding-8B",
"track": "Standard",
"date": "2026-02-24",
"overall_em": 8.2,
"overall_f1": 20.9,
"intra_em": 14.0,
"intra_f1": 27.1,
"inter_em": 3.1,
"inter_f1": 15.5
},
{
"method": "ImageSeeker",
"url": "https://github.com/RUC-NLPIR/DeepImageSearch",
"org": "RUC-NLPIR",
"agent": "ImageSeeker",
"backbone": "Gpt-4o",
"retriever": "Qwen3-VL-Embedding-8B",
"track": "Standard",
"date": "2026-02-24",
"overall_em": 5.7,
"overall_f1": 21.8,
"intra_em": 5.3,
"intra_f1": 17.1,
"inter_em": 6.2,
"inter_f1": 25.9
},
{
"method": "ImageSeeker",
"url": "https://github.com/RUC-NLPIR/DeepImageSearch",
"org": "RUC-NLPIR",
"agent": "ImageSeeker",
"backbone": "Glm-4.6v",
"retriever": "Qwen3-VL-Embedding-8B",
"track": "Standard",
"date": "2026-02-24",
"overall_em": 8.2,
"overall_f1": 26.1,
"intra_em": 10.5,
"intra_f1": 32.1,
"inter_em": 6.2,
"inter_f1": 20.8
},
{
"method": "ImageSeeker",
"url": "https://github.com/RUC-NLPIR/DeepImageSearch",
"org": "RUC-NLPIR",
"agent": "ImageSeeker",
"backbone": "Claude-Opus-4-5-20251101",
"retriever": "Qwen3-VL-Embedding-8B",
"track": "Standard",
"date": "2026-02-24",
"overall_em": 28.7,
"overall_f1": 55.0,
"intra_em": 35.1,
"intra_f1": 60.0,
"inter_em": 23.1,
"inter_f1": 50.7
},
{
"method": "ImageSeeker",
"url": "https://github.com/RUC-NLPIR/DeepImageSearch",
"org": "RUC-NLPIR",
"agent": "ImageSeeker",
"backbone": "Claude-Sonnet-4-5-20250929",
"retriever": "Qwen3-VL-Embedding-8B",
"track": "Standard",
"date": "2026-02-24",
"overall_em": 22.1,
"overall_f1": 43.8,
"intra_em": 28.1,
"intra_f1": 48.5,
"inter_em": 16.9,
"inter_f1": 39.6
},
{
"method": "ImageSeeker",
"url": "https://github.com/RUC-NLPIR/DeepImageSearch",
"org": "RUC-NLPIR",
"agent": "ImageSeeker",
"backbone": "Gpt-5.2",
"retriever": "Qwen3-VL-Embedding-8B",
"track": "Standard",
"date": "2026-02-24",
"overall_em": 13.1,
"overall_f1": 30.0,
"intra_em": 19.3,
"intra_f1": 32.9,
"inter_em": 7.7,
"inter_f1": 27.4
},
{
"method": "ImageSeeker",
"url": "https://github.com/RUC-NLPIR/DeepImageSearch",
"org": "RUC-NLPIR",
"agent": "ImageSeeker",
"backbone": "Gemini-3-Pro-Preview",
"retriever": "Qwen3-VL-Embedding-8B",
"track": "Standard",
"date": "2026-02-24",
"overall_em": 24.6,
"overall_f1": 47.9,
"intra_em": 29.8,
"intra_f1": 55.2,
"inter_em": 20.0,
"inter_f1": 41.5
}
]