{ "benchmark_info": { "date": "2026-04-01", "framework": "lm-evaluation-harness 0.4.9.2", "inference": "llama.cpp (llama-server b8330)", "hardware": "Apple M1 Max 32GB", "quantization": "Q4_K_M", "n_shot": 0, "tasks": "KMMLU direct (10 subjects) + HAE-RAE (5 subtasks)", "method": "generate_until with regex extraction" }, "models": { "vela-dpo-v6": { "full_name": "VELA DPO v6 (Qwen2.5-7B + SFT + DPO v6)", "file": "vela-dpo-v6-q4km.gguf", "size_gb": 4.4 }, "qwen2.5-7b-instruct": { "full_name": "Qwen2.5-7B-Instruct (baseline)", "file": "qwen2.5-7b-instruct-q4_k_m-00001-of-00002.gguf", "size_gb": 4.4 }, "exaone-3.5-7.8b": { "full_name": "EXAONE-3.5-7.8B-Instruct", "file": "EXAONE-3.5-7.8B-Instruct-Q4_K_M.gguf", "size_gb": 4.4 } }, "kmmlu": { "accounting": { "vela_dpo_v6": 0.38, "qwen25_7b": 0.33, "exaone_35_7_8b": 0.42 }, "computer_science": { "vela_dpo_v6": 0.737, "qwen25_7b": 0.697, "exaone_35_7_8b": 0.697 }, "economics": { "vela_dpo_v6": 0.454, "qwen25_7b": 0.477, "exaone_35_7_8b": 0.515 }, "korean_history": { "vela_dpo_v6": 0.31, "qwen25_7b": 0.29, "exaone_35_7_8b": 0.22 }, "law": { "vela_dpo_v6": 0.434, "qwen25_7b": 0.461, "exaone_35_7_8b": 0.499 }, "management": { "vela_dpo_v6": 0.54, "qwen25_7b": 0.552, "exaone_35_7_8b": 0.573 }, "marketing": { "vela_dpo_v6": 0.757, "qwen25_7b": 0.725, "exaone_35_7_8b": 0.756 }, "math": { "vela_dpo_v6": 0.33, "qwen25_7b": 0.337, "exaone_35_7_8b": 0.277 }, "political_science_and_sociology": { "vela_dpo_v6": 0.49, "qwen25_7b": 0.493, "exaone_35_7_8b": 0.56 }, "psychology": { "vela_dpo_v6": 0.392, "qwen25_7b": 0.393, "exaone_35_7_8b": 0.457 } }, "haerae": { "general_knowledge": { "vela_dpo_v6": 0.4375, "qwen25_7b": 0.4205, "exaone_35_7_8b": 0.4432 }, "history": { "vela_dpo_v6": 0.4574, "qwen25_7b": 0.4255, "exaone_35_7_8b": 0.7766 }, "loan_words": { "vela_dpo_v6": 0.4852, "qwen25_7b": 0.574, "exaone_35_7_8b": 0.8107 }, "rare_words": { "vela_dpo_v6": 0.6988, "qwen25_7b": 0.684, "exaone_35_7_8b": 0.7877 }, "standard_nomenclature": { "vela_dpo_v6": 0.6471, "qwen25_7b": 0.6601, "exaone_35_7_8b": 0.719 } }, "summary": { "kmmlu_avg": { "vela_dpo_v6": 0.482, "qwen25_7b": 0.476, "exaone_35_7_8b": 0.497 }, "haerae_avg": { "vela_dpo_v6": 0.545, "qwen25_7b": 0.553, "exaone_35_7_8b": 0.707 } } }