| { | |
| "benchmark_info": { | |
| "date": "2026-04-01", | |
| "framework": "lm-evaluation-harness 0.4.9.2", | |
| "inference": "llama.cpp (llama-server b8330)", | |
| "hardware": "Apple M1 Max 32GB", | |
| "quantization": "Q4_K_M", | |
| "n_shot": 0, | |
| "tasks": "KMMLU direct (10 subjects) + HAE-RAE (5 subtasks)", | |
| "method": "generate_until with regex extraction" | |
| }, | |
| "models": { | |
| "vela-dpo-v6": { | |
| "full_name": "VELA DPO v6 (Qwen2.5-7B + SFT + DPO v6)", | |
| "file": "vela-dpo-v6-q4km.gguf", | |
| "size_gb": 4.4 | |
| }, | |
| "qwen2.5-7b-instruct": { | |
| "full_name": "Qwen2.5-7B-Instruct (baseline)", | |
| "file": "qwen2.5-7b-instruct-q4_k_m-00001-of-00002.gguf", | |
| "size_gb": 4.4 | |
| }, | |
| "exaone-3.5-7.8b": { | |
| "full_name": "EXAONE-3.5-7.8B-Instruct", | |
| "file": "EXAONE-3.5-7.8B-Instruct-Q4_K_M.gguf", | |
| "size_gb": 4.4 | |
| } | |
| }, | |
| "kmmlu": { | |
| "accounting": { | |
| "vela_dpo_v6": 0.38, | |
| "qwen25_7b": 0.33, | |
| "exaone_35_7_8b": 0.42 | |
| }, | |
| "computer_science": { | |
| "vela_dpo_v6": 0.737, | |
| "qwen25_7b": 0.697, | |
| "exaone_35_7_8b": 0.697 | |
| }, | |
| "economics": { | |
| "vela_dpo_v6": 0.454, | |
| "qwen25_7b": 0.477, | |
| "exaone_35_7_8b": 0.515 | |
| }, | |
| "korean_history": { | |
| "vela_dpo_v6": 0.31, | |
| "qwen25_7b": 0.29, | |
| "exaone_35_7_8b": 0.22 | |
| }, | |
| "law": { | |
| "vela_dpo_v6": 0.434, | |
| "qwen25_7b": 0.461, | |
| "exaone_35_7_8b": 0.499 | |
| }, | |
| "management": { | |
| "vela_dpo_v6": 0.54, | |
| "qwen25_7b": 0.552, | |
| "exaone_35_7_8b": 0.573 | |
| }, | |
| "marketing": { | |
| "vela_dpo_v6": 0.757, | |
| "qwen25_7b": 0.725, | |
| "exaone_35_7_8b": 0.756 | |
| }, | |
| "math": { | |
| "vela_dpo_v6": 0.33, | |
| "qwen25_7b": 0.337, | |
| "exaone_35_7_8b": 0.277 | |
| }, | |
| "political_science_and_sociology": { | |
| "vela_dpo_v6": 0.49, | |
| "qwen25_7b": 0.493, | |
| "exaone_35_7_8b": 0.56 | |
| }, | |
| "psychology": { | |
| "vela_dpo_v6": 0.392, | |
| "qwen25_7b": 0.393, | |
| "exaone_35_7_8b": 0.457 | |
| } | |
| }, | |
| "haerae": { | |
| "general_knowledge": { | |
| "vela_dpo_v6": 0.4375, | |
| "qwen25_7b": 0.4205, | |
| "exaone_35_7_8b": 0.4432 | |
| }, | |
| "history": { | |
| "vela_dpo_v6": 0.4574, | |
| "qwen25_7b": 0.4255, | |
| "exaone_35_7_8b": 0.7766 | |
| }, | |
| "loan_words": { | |
| "vela_dpo_v6": 0.4852, | |
| "qwen25_7b": 0.574, | |
| "exaone_35_7_8b": 0.8107 | |
| }, | |
| "rare_words": { | |
| "vela_dpo_v6": 0.6988, | |
| "qwen25_7b": 0.684, | |
| "exaone_35_7_8b": 0.7877 | |
| }, | |
| "standard_nomenclature": { | |
| "vela_dpo_v6": 0.6471, | |
| "qwen25_7b": 0.6601, | |
| "exaone_35_7_8b": 0.719 | |
| } | |
| }, | |
| "summary": { | |
| "kmmlu_avg": { | |
| "vela_dpo_v6": 0.482, | |
| "qwen25_7b": 0.476, | |
| "exaone_35_7_8b": 0.497 | |
| }, | |
| "haerae_avg": { | |
| "vela_dpo_v6": 0.545, | |
| "qwen25_7b": 0.553, | |
| "exaone_35_7_8b": 0.707 | |
| } | |
| } | |
| } |