{ "source": "/home/ubuntu/rightnow-arabic-b-turbo/scripts/make_chart.py", "data": [ { "model": "RightNow-Arabic-0.5B-Turbo", "params_B": 0.518, "mean_acc_pct": 35.9, "disk_gb_best_quant": 0.4, "category": "ours" }, { "model": "Qwen2.5-0.5B-Instruct", "params_B": 0.494, "mean_acc_pct": 34.1, "disk_gb_best_quant": 0.5, "category": "same_class" }, { "model": "Falcon-H1-0.5B-Instruct", "params_B": 0.524, "mean_acc_pct": 30.7, "disk_gb_best_quant": 0.55, "category": "same_class" }, { "model": "Falcon-H1-1.5B-Instruct", "params_B": 1.5, "mean_acc_pct": 39.5, "disk_gb_best_quant": 1.5, "category": "bigger_open" }, { "model": "AceGPT-7B-chat", "params_B": 7.0, "mean_acc_pct": 43.9, "disk_gb_best_quant": 4.0, "category": "bigger_open" }, { "model": "ALLaM-7B-Instruct", "params_B": 7.0, "mean_acc_pct": 49.9, "disk_gb_best_quant": 4.0, "category": "bigger_open" }, { "model": "SILMA-9B-Instruct", "params_B": 9.0, "mean_acc_pct": 53.5, "disk_gb_best_quant": 5.0, "category": "bigger_open" }, { "model": "Jais-13B-chat", "params_B": 13.0, "mean_acc_pct": 50.0, "disk_gb_best_quant": 7.0, "category": "bigger_open" }, { "model": "Kuwain-1.5B (paper only)", "params_B": 1.5, "mean_acc_pct": 44.0, "disk_gb_best_quant": 0.8, "category": "closed" }, { "model": "Falcon-Arabic-7B (closed)", "params_B": 7.0, "mean_acc_pct": 52.0, "disk_gb_best_quant": 4.0, "category": "closed" } ], "benchmarks": [ "copa_ar", "arabic_mt_hellaswag", "arabic_leaderboard_arabic_mmlu" ], "methodology": "lm-eval-harness v0.4.11, apply_chat_template=True (where available), limit=200, acc_norm preferred over acc", "note": "Kuwain-1.5B and Falcon-Arabic-7B use estimated numbers from their papers \u2014 weights are not publicly available for direct evaluation." }