Generalist-Value-Model-V0 / data /router_context_sampled /performance /DeepSeek-R1-Distill-Qwen-1.5B.jsonl
zhangyikai's picture
Update UI and data
9fa9ac9
{"id": 13198, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 5840, "mean@10": 0.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 63126, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 36126, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 37845, "mean@10": -0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 58007, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 17784, "mean@10": 1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 40199, "mean@10": 0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 24014, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 220, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 13114, "mean@10": 0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 2180, "mean@10": -0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 5125, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 6715, "mean@10": 0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 405, "mean@10": 0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 415, "mean@10": 0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 24590, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 688, "mean@10": -0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 9958, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 21176, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 15481, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 36361, "mean@10": 1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 6325, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 431, "mean@10": 0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 889, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 16647, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 12759, "mean@10": -0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 3956, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 1414, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 6538, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 21587, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 929, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 52408, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 23059, "mean@10": -0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 4371, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 30879, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 5815, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 7370, "mean@10": -0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 67186, "mean@10": 0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 6926, "mean@10": 0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 3886, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 493, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 496, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 39641, "mean@10": 0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 57, "mean@10": -0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 278, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 36519, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 947, "mean@10": 1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 922, "mean@10": -0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 6398, "mean@10": -0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 16628, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 19143, "mean@10": -0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 36082, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 1197, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 15364, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 2879, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 7242, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 5390, "mean@10": 0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 63402, "mean@10": 0.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 17746, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 15096, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 23770, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 2680, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 817, "mean@10": 0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 29703, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 22994, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 756, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 39255, "mean@10": 0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 17969, "mean@10": 0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 10889, "mean@10": -0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 848, "mean@10": 0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 1144, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 35854, "mean@10": -0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 10839, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 7262, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 6725, "mean@10": -0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 14367, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 16533, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 104, "mean@10": 0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 456, "mean@10": -0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 22290, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 5811, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 423, "mean@10": 0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 38969, "mean@10": 0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 31002, "mean@10": -0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 18381, "mean@10": 0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 10160, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 55206, "mean@10": -0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 2354, "mean@10": 0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 13702, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 777, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 3370, "mean@10": -0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 15824, "mean@10": -0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 8881, "mean@10": -0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 33251, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 34978, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 3128, "mean@10": 0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 5783, "mean@10": -0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 62704, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 38920, "mean@10": 0.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 317, "mean@10": 0.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 367, "mean@10": 0.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 47654, "mean@10": 0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 7413, "mean@10": -0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 244, "mean@10": 0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 30291, "mean@10": 0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 43685, "mean@10": -0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 9702, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 17320, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 38205, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 8310, "mean@10": 0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 9715, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 30836, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 62481, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 38146, "mean@10": 0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 4916, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 8003, "mean@10": -0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 28861, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 31198, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 23233, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 353, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 475, "mean@10": 0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 7472, "mean@10": -0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 579, "mean@10": 0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 26010, "mean@10": 1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 1352, "mean@10": 0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 38710, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 5837, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 33321, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 8406, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 34169, "mean@10": 0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 12999, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 7806, "mean@10": -0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 807, "mean@10": -0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 4279, "mean@10": 0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 437, "mean@10": -0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 2895, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 650, "mean@10": 0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 638, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 1072, "mean@10": 0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 116, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 23105, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 18415, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 37994, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 9952, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 5100, "mean@10": -0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 19269, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 13512, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 454, "mean@10": 1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 912, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 15784, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 834, "mean@10": 0.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 593, "mean@10": -0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 14910, "mean@10": 0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 36694, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 235, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 9489, "mean@10": 0.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 281, "mean@10": 0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 259, "mean@10": 0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 42134, "mean@10": 0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 4864, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 7811, "mean@10": 0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 15402, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 11089, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 9503, "mean@10": 0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 13711, "mean@10": -0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 10724, "mean@10": -0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 3895, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 28, "mean@10": -0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 8936, "mean@10": -0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 1351, "mean@10": -0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 2366, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 14669, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 5151, "mean@10": -0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 163, "mean@10": -0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 16345, "mean@10": 0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 64437, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 11122, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 5524, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 43450, "mean@10": 0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 32936, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 703, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 568, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 16252, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 9119, "mean@10": -0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 6934, "mean@10": 0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 2075, "mean@10": 0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 39180, "mean@10": 0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 334, "mean@10": -0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 1078, "mean@10": 0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 9610, "mean@10": -0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 35859, "mean@10": 0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 103, "mean@10": 0.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 27368, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 31574, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 33688, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 3192, "mean@10": -0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 1348, "mean@10": 0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 16860, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 4008, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 5738, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 33306, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 954, "mean@10": 1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 10820, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 21940, "mean@10": -0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 11606, "mean@10": -0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 37818, "mean@10": 0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 660, "mean@10": -0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 39824, "mean@10": -0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 628, "mean@10": -0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 4, "mean@10": 0.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 482, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 43553, "mean@10": 1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 4058, "mean@10": -0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 382, "mean@10": 0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 3525, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 287, "mean@10": 0.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 9103, "mean@10": 0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 1281, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 60108, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 6801, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 57520, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 21439, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 21803, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 27926, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 314, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 28030, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 762, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 414, "mean@10": 0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 9652, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 2484, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 45619, "mean@10": -0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 1248, "mean@10": -0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 7249, "mean@10": -0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 35152, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 717, "mean@10": -0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 27110, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 928, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 50667, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 17129, "mean@10": -0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 10892, "mean@10": 0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 6571, "mean@10": -0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 41938, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 38328, "mean@10": 0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 874, "mean@10": 0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 49, "mean@10": -0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 20972, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 2057, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 31043, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 9001, "mean@10": -0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 704, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 1180, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 11273, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 51061, "mean@10": -0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 39488, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 47154, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 43679, "mean@10": -0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 35502, "mean@10": -0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 7618, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 241, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 26427, "mean@10": -0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 40630, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 21297, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 1357, "mean@10": 0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 39036, "mean@10": 0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 190, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 5103, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 47022, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 12655, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 15208, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 9792, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 4160, "mean@10": 0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 57772, "mean@10": 0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 1309, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 24133, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 2257, "mean@10": 0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 57139, "mean@10": 0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 8223, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 2285, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 43, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 534, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 636, "mean@10": 0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 46805, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 60714, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 33673, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 126, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 73, "mean@10": -0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 413, "mean@10": 0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 1772, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 15580, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 1044, "mean@10": -0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 909, "mean@10": -0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 28864, "mean@10": 1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 9285, "mean@10": 0.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 9797, "mean@10": 0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 1309, "mean@10": -0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 11283, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 63607, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 337, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 1142, "mean@10": -0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 11778, "mean@10": -0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 37043, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 20662, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 15045, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 252, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 16568, "mean@10": -0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 364, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 11343, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 16242, "mean@10": -0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 28839, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 34676, "mean@10": 0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 11633, "mean@10": -0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 1534, "mean@10": 0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 1, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 629, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 14455, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 8865, "mean@10": 0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 14720, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 977, "mean@10": -0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 109, "mean@10": -0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 191, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 17187, "mean@10": -0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 6799, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 47703, "mean@10": 0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 16474, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 15724, "mean@10": -0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 1216, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 10425, "mean@10": -0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 374, "mean@10": 0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 9304, "mean@10": 0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 11506, "mean@10": 0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 2673, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 20129, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 576, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 27613, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 21905, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 62712, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 27732, "mean@10": -0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 14050, "mean@10": -0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 18453, "mean@10": 0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 20020, "mean@10": -0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 7788, "mean@10": 0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 10570, "mean@10": -0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 61755, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 960, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 1104, "mean@10": 0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 5919, "mean@10": -0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 9457, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 16488, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 15772, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 811, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 53558, "mean@10": -0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 1012, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 150, "mean@10": -0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 804, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 11633, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 2723, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 23739, "mean@10": 0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 8902, "mean@10": -0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 873, "mean@10": 0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 14294, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 7641, "mean@10": 0.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 37846, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 34260, "mean@10": 0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 22259, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 383, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 508, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 38564, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 753, "mean@10": -0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 2775, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 9160, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 14459, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 673, "mean@10": -0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 48461, "mean@10": -0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 9191, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 6616, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 1866, "mean@10": 0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 11487, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 283, "mean@10": 0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 1057, "mean@10": -0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 1290, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 19393, "mean@10": -0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 36522, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 45132, "mean@10": -0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 230, "mean@10": 0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 32154, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 9988, "mean@10": -0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 6064, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 46287, "mean@10": 1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 896, "mean@10": -0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 31065, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 20014, "mean@10": -0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 16871, "mean@10": -0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 15, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 277, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 12169, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 9657, "mean@10": 0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 63832, "mean@10": 0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 556, "mean@10": 1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 7028, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 8512, "mean@10": 0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 16727, "mean@10": 0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 14584, "mean@10": 0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 1588, "mean@10": -0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 246, "mean@10": -0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 24697, "mean@10": 0.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 839, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 37123, "mean@10": 0.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 47830, "mean@10": 0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 13638, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 8852, "mean@10": 0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 10691, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 1246, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 4577, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 37340, "mean@10": 0.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 24547, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 348, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 33131, "mean@10": 0.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 18284, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 10588, "mean@10": 1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 34406, "mean@10": 1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 916, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 520, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 1074, "mean@10": -0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 26022, "mean@10": -0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 1694, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 26691, "mean@10": -0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 861, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 7953, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 6789, "mean@10": -0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 8916, "mean@10": -0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 26721, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 28006, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 1245, "mean@10": -0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 32900, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 20677, "mean@10": 0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 54738, "mean@10": -0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 4652, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 14107, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 294, "mean@10": 0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 10401, "mean@10": -0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 994, "mean@10": 0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 16193, "mean@10": -0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 1114, "mean@10": -0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 21081, "mean@10": -0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 489, "mean@10": -0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 940, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 11739, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 5729, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 4837, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 60908, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 59183, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 37344, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 12352, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 40749, "mean@10": 0.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 9031, "mean@10": 0.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 73, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 64267, "mean@10": -0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 13083, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 1039, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 38481, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 594, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 16268, "mean@10": 0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 53855, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 9277, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 296, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 34616, "mean@10": 0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 1347, "mean@10": 0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 38673, "mean@10": -0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 34295, "mean@10": 1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 11261, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 14703, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 21888, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 100, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 59884, "mean@10": 0.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 5623, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 6849, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 32297, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 10011, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 219, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 2716, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 9261, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 1164, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 20962, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 6769, "mean@10": -0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 1181, "mean@10": 0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 57250, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 43613, "mean@10": -0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 8211, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 14854, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 15060, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 36353, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 13498, "mean@10": -0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 15799, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 945, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 616, "mean@10": 0.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 722, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 8698, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 6417, "mean@10": 0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 2923, "mean@10": -0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 9567, "mean@10": 0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 4014, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 17125, "mean@10": 0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 16108, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 42964, "mean@10": 0.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 1912, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 102, "mean@10": -0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 25798, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 36603, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 27835, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 1120, "mean@10": -0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 13949, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 16458, "mean@10": -0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 28050, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 5735, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 16771, "mean@10": 0.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 16036, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 13407, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 7018, "mean@10": 0.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 5676, "mean@10": 0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 34754, "mean@10": -0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 5568, "mean@10": -0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 28746, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 68975, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 17278, "mean@10": -0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 15536, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 15750, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 2030, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 562, "mean@10": 1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 1019, "mean@10": 0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 12094, "mean@10": 0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 4253, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 101, "mean@10": 0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 40163, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 1000, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 37390, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 11647, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 9792, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 25025, "mean@10": -0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 35561, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 6470, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 576, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 1791, "mean@10": 0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 36985, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 50628, "mean@10": -0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 8494, "mean@10": -0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 16979, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 16162, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 11783, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 1030, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 16697, "mean@10": -0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 15521, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 1191, "mean@10": -0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 16924, "mean@10": -0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 578, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 642, "mean@10": 0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 40401, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 10531, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 13230, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 41875, "mean@10": 1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 38477, "mean@10": -0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 310, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 1717, "mean@10": 0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 24265, "mean@10": -0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 36297, "mean@10": -0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 8487, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 17226, "mean@10": -0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 53141, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 9716, "mean@10": -0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 1398, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 4613, "mean@10": 0.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 810, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 16892, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 9293, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 68573, "mean@10": -0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 22222, "mean@10": -0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 21963, "mean@10": -0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 46828, "mean@10": 0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 15031, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 108, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 17349, "mean@10": 0.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 554, "mean@10": 0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 9687, "mean@10": -0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 260, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 949, "mean@10": 1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 19316, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 3081, "mean@10": 0.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 30596, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 53491, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 39916, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 23754, "mean@10": -0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 5200, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 67781, "mean@10": -0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 11397, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 24822, "mean@10": 0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 16590, "mean@10": 0.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 1226, "mean@10": -0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 4486, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 26592, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 130, "mean@10": 0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 775, "mean@10": 0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 68692, "mean@10": -0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 9033, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 24512, "mean@10": 0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 14455, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 20426, "mean@10": -0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 17877, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 40289, "mean@10": -0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 733, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 38464, "mean@10": 0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 764, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 11294, "mean@10": 1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 6680, "mean@10": -0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 6601, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 66854, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 548, "mean@10": 0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 327, "mean@10": 0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 5348, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 9018, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 36688, "mean@10": 0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 28328, "mean@10": -0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 5601, "mean@10": 0.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 15424, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 1190, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 5184, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 30649, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 904, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 9029, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 16160, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 1461, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 39770, "mean@10": 0.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 5987, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 1293, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 17010, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 15126, "mean@10": -0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 211, "mean@10": 0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 15940, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 730, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 7478, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 30083, "mean@10": -0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 1136, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 60251, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 860, "mean@10": -0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 673, "mean@10": -0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 508, "mean@10": 0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 2628, "mean@10": 0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 3205, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 13010, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 497, "mean@10": 0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 24228, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 30568, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 874, "mean@10": 1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 60100, "mean@10": -0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 38418, "mean@10": -0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 6665, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 1008, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 564, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 2437, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 33068, "mean@10": 0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 61830, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 13418, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 19166, "mean@10": -0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 1766, "mean@10": -0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 1215, "mean@10": 0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 10163, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 30114, "mean@10": 0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 2737, "mean@10": 0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 598, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 600, "mean@10": -0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 41522, "mean@10": 0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 145, "mean@10": 0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 53194, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 12543, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 783, "mean@10": 0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 1314, "mean@10": 0.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 67159, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 16074, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 4592, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 38106, "mean@10": -0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 34824, "mean@10": -0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 7744, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 39815, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 1305, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 67896, "mean@10": 0.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 33929, "mean@10": 0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 3840, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 2342, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 9549, "mean@10": 0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 17136, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 10165, "mean@10": 0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 33227, "mean@10": 1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 7465, "mean@10": 1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 1151, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 165, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 11882, "mean@10": 0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 393, "mean@10": 0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 1141, "mean@10": 0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 32704, "mean@10": -0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 294, "mean@10": 0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 666, "mean@10": 0.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 1138, "mean@10": -0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 38947, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 30538, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 26284, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 166, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 10850, "mean@10": -0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 54185, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 39004, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 158, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 14370, "mean@10": -0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 1020, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 16813, "mean@10": 0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 15988, "mean@10": -0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 46106, "mean@10": 0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 975, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 12610, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 14760, "mean@10": 0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 30174, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 2312, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 7781, "mean@10": 0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 1238, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 7832, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 384, "mean@10": 0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 19728, "mean@10": -0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 1362, "mean@10": 0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 796, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 53934, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 4410, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 6644, "mean@10": 0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 313, "mean@10": 0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 26798, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 38818, "mean@10": 0.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 615, "mean@10": 0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 11141, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 1024, "mean@10": 0.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 1029, "mean@10": -0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 335, "mean@10": 0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 16858, "mean@10": -0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 433, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 980, "mean@10": 0.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 6873, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 34035, "mean@10": 0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 385, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 1592, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 581, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 14312, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 859, "mean@10": -0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 14434, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 352, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 11581, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 36797, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 10553, "mean@10": 0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 1056, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 417, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 17281, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 22579, "mean@10": -0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 13150, "mean@10": -0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 33632, "mean@10": 0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 16496, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 50708, "mean@10": 0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 13851, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 16261, "mean@10": 0.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 46408, "mean@10": -0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 38367, "mean@10": 1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 37808, "mean@10": 0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 557, "mean@10": 0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 11532, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 40144, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 14754, "mean@10": -0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 1156, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 63938, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 72288, "mean@10": 0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 4619, "mean@10": -0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 47096, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 11614, "mean@10": 0.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 39262, "mean@10": 0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 720, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 56129, "mean@10": 0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 5343, "mean@10": 1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 5211, "mean@10": 0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 8560, "mean@10": -0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 52443, "mean@10": 0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 55661, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 11811, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 28684, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 1054, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 5910, "mean@10": -0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 781, "mean@10": 0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 36221, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 24667, "mean@10": -0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 629, "mean@10": 0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 14334, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 26734, "mean@10": -0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 394, "mean@10": 0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 543, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 10008, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 55339, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 196, "mean@10": 0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 17385, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 963, "mean@10": 0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 54133, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 14843, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 63585, "mean@10": 0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 20172, "mean@10": -0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 17159, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 372, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 30381, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 29452, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 28824, "mean@10": -0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 35598, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 45060, "mean@10": -0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 22607, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 22135, "mean@10": 0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 42147, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 16519, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 1232, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 9993, "mean@10": 1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 14847, "mean@10": 0.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 294, "mean@10": 0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 1666, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 62, "mean@10": 0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 59691, "mean@10": 0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 29779, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 13507, "mean@10": 0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 21293, "mean@10": -0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 12209, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 1288, "mean@10": 0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 9942, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 23608, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 1187, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 27094, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 1242, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 14087, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 21197, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 86, "mean@10": 0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 9170, "mean@10": 0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 1338, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 15511, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 1227, "mean@10": -0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 1032, "mean@10": -0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 137, "mean@10": 0.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 19041, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 107, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 16985, "mean@10": -0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 12032, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 1175, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 13260, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 1169, "mean@10": -0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 46488, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 10783, "mean@10": -0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 8727, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 12834, "mean@10": -0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 69006, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 7549, "mean@10": 0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 6555, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 242, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 257, "mean@10": -0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 558, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 4977, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 5601, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 39038, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 6318, "mean@10": 0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 3128, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 13088, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 33815, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 924, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 5999, "mean@10": 0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 24386, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 67554, "mean@10": -0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 17342, "mean@10": 0.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 31475, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 1228, "mean@10": 1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 33143, "mean@10": 0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 5239, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 37810, "mean@10": -0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 9014, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 469, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 61024, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 451, "mean@10": 0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 11507, "mean@10": -0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 59863, "mean@10": 0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 64718, "mean@10": -0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 9495, "mean@10": -0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 22605, "mean@10": 0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 2769, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 11242, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 377, "mean@10": 0.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 33044, "mean@10": 1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 3920, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 3431, "mean@10": -0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 4024, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 41474, "mean@10": 1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 59135, "mean@10": 0.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 7677, "mean@10": 0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 13857, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 23355, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 16946, "mean@10": -0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 952, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 2124, "mean@10": -0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 939, "mean@10": -0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 36020, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 915, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 12046, "mean@10": -0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 32530, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 3250, "mean@10": -0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 34996, "mean@10": 0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 6993, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 9619, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 12163, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 659, "mean@10": 0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 923, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 5535, "mean@10": 1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 28684, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 4131, "mean@10": -0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 1364, "mean@10": 0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 33717, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 1205, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 3049, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 19078, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 1047, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 31445, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 34031, "mean@10": -0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 12918, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 32242, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 2426, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 20678, "mean@10": 0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 39340, "mean@10": 0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 1035, "mean@10": -0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 48427, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 2833, "mean@10": 1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 60712, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 72287, "mean@10": 0.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 16381, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 65178, "mean@10": -0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 16575, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 2318, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 2032, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 27865, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 1255, "mean@10": -0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 4806, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 37669, "mean@10": 0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 180, "mean@10": -0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 15276, "mean@10": -0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 1147, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 551, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 17248, "mean@10": 0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 50467, "mean@10": 0.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 2613, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 9629, "mean@10": 0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 6595, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 854, "mean@10": 0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 35340, "mean@10": 0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 6362, "mean@10": 0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 765, "mean@10": 1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 15474, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 5602, "mean@10": -0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 48030, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 1277, "mean@10": -0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 11168, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 21, "mean@10": -0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 25387, "mean@10": 0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 14958, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 377, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 460, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 35261, "mean@10": -0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 30698, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 37495, "mean@10": 1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 16595, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 36324, "mean@10": 0.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 50993, "mean@10": 0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 46780, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 30861, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 20480, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 1115, "mean@10": 0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 40063, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 5541, "mean@10": -0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 12258, "mean@10": 0.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 1456, "mean@10": -0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 33653, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 9985, "mean@10": -0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 1267, "mean@10": -0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 19818, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 16593, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 5273, "mean@10": 0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 32844, "mean@10": 0.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 1184, "mean@10": 0.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 5290, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 5352, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 16856, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 668, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 36123, "mean@10": -0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 36099, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 4586, "mean@10": 0.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 19496, "mean@10": 1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 15379, "mean@10": -0.4, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 3148, "mean@10": 0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 11358, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 63348, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 4050, "mean@10": -0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 16732, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 13414, "mean@10": -0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 13859, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 11717, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 48405, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 125, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 55726, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 6, "mean@10": 0.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 16038, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 16589, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 91, "mean@10": -0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 514, "mean@10": 0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 839, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "limr", "type": "context"}
{"id": 64508, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 24652, "mean@10": -0.8, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 20558, "mean@10": 0.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 17499, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "orz_math", "type": "context"}
{"id": 4497, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 905, "mean@10": 0.2, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 16606, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 4977, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 31810, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 16957, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}
{"id": 403, "mean@10": -1.0, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "dapomath", "type": "context"}
{"id": 37559, "mean@10": 0.6, "model": "DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "deepscaler", "type": "context"}