OptimismBench / app /src /content /assets /data /alignpair.json
seonglae's picture
article: interactive research piece for OptimismBench
1df44e8
Raw
History Blame Contribute Delete
979 Bytes
{
"note":"Controlled base-vs-chat probe. delta=chat-base.",
"pairs":[
{"arch":"Qwen2.5-7B","family":"Qwen","base":14.6,"chat":9.0,"delta":-5.6},
{"arch":"Qwen3-1.7B","family":"Qwen","base":-12.4,"chat":-21.4,"delta":-9.1},
{"arch":"Qwen3-4B","family":"Qwen","base":0.4,"chat":-15.5,"delta":-15.9},
{"arch":"Qwen3-8B","family":"Qwen","base":16.1,"chat":2.1,"delta":-14.0},
{"arch":"Qwen3-14B","family":"Qwen","base":16.1,"chat":-0.1,"delta":-16.2},
{"arch":"Llama-3.2-1B","family":"Llama","base":-33.7,"chat":7.3,"delta":40.9},
{"arch":"Llama-3.2-3B","family":"Llama","base":-6.4,"chat":18.7,"delta":25.1},
{"arch":"Llama-3.1-8B","family":"Llama","base":1.9,"chat":21.9,"delta":20.0},
{"arch":"Llama-3.1-70B","family":"Llama","base":-3.1,"chat":12.2,"delta":15.2},
{"arch":"Gemma-2-2b","family":"Gemma","base":-17.5,"chat":36.0,"delta":53.5},
{"arch":"Mistral-Small-24B","family":"Mistral","base":8.0,"chat":5.2,"delta":-2.8}
]
}