File size: 2,145 Bytes
2a02932
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
{
  "source": "/home/ubuntu/rightnow-arabic-b-turbo/scripts/make_chart.py",
  "data": [
    {
      "model": "RightNow-Arabic-0.5B-Turbo",
      "params_B": 0.518,
      "mean_acc_pct": 35.9,
      "disk_gb_best_quant": 0.4,
      "category": "ours"
    },
    {
      "model": "Qwen2.5-0.5B-Instruct",
      "params_B": 0.494,
      "mean_acc_pct": 34.1,
      "disk_gb_best_quant": 0.5,
      "category": "same_class"
    },
    {
      "model": "Falcon-H1-0.5B-Instruct",
      "params_B": 0.524,
      "mean_acc_pct": 30.7,
      "disk_gb_best_quant": 0.55,
      "category": "same_class"
    },
    {
      "model": "Falcon-H1-1.5B-Instruct",
      "params_B": 1.5,
      "mean_acc_pct": 39.5,
      "disk_gb_best_quant": 1.5,
      "category": "bigger_open"
    },
    {
      "model": "AceGPT-7B-chat",
      "params_B": 7.0,
      "mean_acc_pct": 43.9,
      "disk_gb_best_quant": 4.0,
      "category": "bigger_open"
    },
    {
      "model": "ALLaM-7B-Instruct",
      "params_B": 7.0,
      "mean_acc_pct": 49.9,
      "disk_gb_best_quant": 4.0,
      "category": "bigger_open"
    },
    {
      "model": "SILMA-9B-Instruct",
      "params_B": 9.0,
      "mean_acc_pct": 53.5,
      "disk_gb_best_quant": 5.0,
      "category": "bigger_open"
    },
    {
      "model": "Jais-13B-chat",
      "params_B": 13.0,
      "mean_acc_pct": 50.0,
      "disk_gb_best_quant": 7.0,
      "category": "bigger_open"
    },
    {
      "model": "Kuwain-1.5B (paper only)",
      "params_B": 1.5,
      "mean_acc_pct": 44.0,
      "disk_gb_best_quant": 0.8,
      "category": "closed"
    },
    {
      "model": "Falcon-Arabic-7B (closed)",
      "params_B": 7.0,
      "mean_acc_pct": 52.0,
      "disk_gb_best_quant": 4.0,
      "category": "closed"
    }
  ],
  "benchmarks": [
    "copa_ar",
    "arabic_mt_hellaswag",
    "arabic_leaderboard_arabic_mmlu"
  ],
  "methodology": "lm-eval-harness v0.4.11, apply_chat_template=True (where available), limit=200, acc_norm preferred over acc",
  "note": "Kuwain-1.5B and Falcon-Arabic-7B use estimated numbers from their papers \u2014 weights are not publicly available for direct evaluation."
}