RightNow-Arabic-0.5B-Turbo / benchmark_chart_data.json
Jr23xd23's picture
Upload benchmark_chart_data.json with huggingface_hub
2a02932 verified
{
"source": "/home/ubuntu/rightnow-arabic-b-turbo/scripts/make_chart.py",
"data": [
{
"model": "RightNow-Arabic-0.5B-Turbo",
"params_B": 0.518,
"mean_acc_pct": 35.9,
"disk_gb_best_quant": 0.4,
"category": "ours"
},
{
"model": "Qwen2.5-0.5B-Instruct",
"params_B": 0.494,
"mean_acc_pct": 34.1,
"disk_gb_best_quant": 0.5,
"category": "same_class"
},
{
"model": "Falcon-H1-0.5B-Instruct",
"params_B": 0.524,
"mean_acc_pct": 30.7,
"disk_gb_best_quant": 0.55,
"category": "same_class"
},
{
"model": "Falcon-H1-1.5B-Instruct",
"params_B": 1.5,
"mean_acc_pct": 39.5,
"disk_gb_best_quant": 1.5,
"category": "bigger_open"
},
{
"model": "AceGPT-7B-chat",
"params_B": 7.0,
"mean_acc_pct": 43.9,
"disk_gb_best_quant": 4.0,
"category": "bigger_open"
},
{
"model": "ALLaM-7B-Instruct",
"params_B": 7.0,
"mean_acc_pct": 49.9,
"disk_gb_best_quant": 4.0,
"category": "bigger_open"
},
{
"model": "SILMA-9B-Instruct",
"params_B": 9.0,
"mean_acc_pct": 53.5,
"disk_gb_best_quant": 5.0,
"category": "bigger_open"
},
{
"model": "Jais-13B-chat",
"params_B": 13.0,
"mean_acc_pct": 50.0,
"disk_gb_best_quant": 7.0,
"category": "bigger_open"
},
{
"model": "Kuwain-1.5B (paper only)",
"params_B": 1.5,
"mean_acc_pct": 44.0,
"disk_gb_best_quant": 0.8,
"category": "closed"
},
{
"model": "Falcon-Arabic-7B (closed)",
"params_B": 7.0,
"mean_acc_pct": 52.0,
"disk_gb_best_quant": 4.0,
"category": "closed"
}
],
"benchmarks": [
"copa_ar",
"arabic_mt_hellaswag",
"arabic_leaderboard_arabic_mmlu"
],
"methodology": "lm-eval-harness v0.4.11, apply_chat_template=True (where available), limit=200, acc_norm preferred over acc",
"note": "Kuwain-1.5B and Falcon-Arabic-7B use estimated numbers from their papers \u2014 weights are not publicly available for direct evaluation."
}