File size: 2,853 Bytes
2edd871
aa3daac
2edd871
aa3daac
 
 
 
 
 
ddebd57
aa3daac
 
2edd871
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
aa3daac
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2edd871
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
{
  "developer": "Alibaba",
  "models": [
    {
      "id": "alibaba/qwen-3-coder-480b",
      "name": "Qwen 3 Coder 480B",
      "developer": "Alibaba",
      "evaluator_relationship": null,
      "benchmark_scores": {
        "terminal-bench-2.0/terminal-bench-2.0": 23.9
      }
    },
    {
      "id": "alibaba/qwen3-235b-a22b-instruct-2507",
      "name": "qwen3-235b-a22b-instruct-2507",
      "developer": "alibaba",
      "evaluator_relationship": null,
      "benchmark_scores": {
        "global-mmlu-lite/Global MMLU Lite": 0.8798,
        "global-mmlu-lite/Culturally Sensitive": 0.8522,
        "global-mmlu-lite/Culturally Agnostic": 0.9075,
        "global-mmlu-lite/Arabic": 0.88,
        "global-mmlu-lite/English": 0.89,
        "global-mmlu-lite/Bengali": 0.8875,
        "global-mmlu-lite/German": 0.885,
        "global-mmlu-lite/French": 0.88,
        "global-mmlu-lite/Hindi": 0.8775,
        "global-mmlu-lite/Indonesian": 0.88,
        "global-mmlu-lite/Italian": 0.88,
        "global-mmlu-lite/Japanese": 0.88,
        "global-mmlu-lite/Korean": 0.875,
        "global-mmlu-lite/Portuguese": 0.8875,
        "global-mmlu-lite/Spanish": 0.875,
        "global-mmlu-lite/Swahili": 0.87,
        "global-mmlu-lite/Yoruba": 0.8725,
        "global-mmlu-lite/Chinese": 0.8775,
        "global-mmlu-lite/Burmese": 0.88
      }
    },
    {
      "id": "alibaba/qwen3-235b-a22b-thinking-2507",
      "name": "qwen3-235b-a22b-thinking-2507",
      "developer": "Alibaba",
      "evaluator_relationship": null,
      "benchmark_scores": {
        "livecodebenchpro/Hard Problems": 0.0,
        "livecodebenchpro/Medium Problems": 0.1267605633802817,
        "livecodebenchpro/Easy Problems": 0.7605633802816901
      }
    },
    {
      "id": "alibaba/qwen3-30b-a3b",
      "name": "qwen3-30b-a3b",
      "developer": "Alibaba",
      "evaluator_relationship": null,
      "benchmark_scores": {
        "livecodebenchpro/Hard Problems": 0.0,
        "livecodebenchpro/Medium Problems": 0.028169014084507043,
        "livecodebenchpro/Easy Problems": 0.5774647887323944
      }
    },
    {
      "id": "alibaba/qwen3-max",
      "name": "alibaba/qwen3-max",
      "developer": "Alibaba",
      "evaluator_relationship": null,
      "benchmark_scores": {
        "livecodebenchpro/Hard Problems": 0.0,
        "livecodebenchpro/Medium Problems": 0.04225352112676056,
        "livecodebenchpro/Easy Problems": 0.36619718309859156
      }
    },
    {
      "id": "alibaba/qwen3-next-80b-a3b-thinking",
      "name": "qwen3-next-80b-a3b-thinking",
      "developer": "Alibaba",
      "evaluator_relationship": null,
      "benchmark_scores": {
        "livecodebenchpro/Hard Problems": 0.0,
        "livecodebenchpro/Medium Problems": 0.14084507042253522,
        "livecodebenchpro/Easy Problems": 0.7464788732394366
      }
    }
  ]
}