DotCache-Arena / data /benchmark_bundle /space_benchmark_bundle.json
Deano Calver
Fix backend truth preset response text
45ea188
Raw
History Blame Contribute Delete
14.1 kB
{
"backend_truth": {
"qwen35_27b_hf": {
"1024": {
"output_match": {
"learned_selector": true,
"shortlist_base": true
},
"profiles": {
"exact": {
"latency_ms": 485.96312350127846,
"m3_fraction": 0.0,
"resident_bytes": 38670336,
"selector_us": 0.0,
"text": " matters for fast decoding.\n\n<think>\n",
"token_count": 8
},
"learned_selector": {
"latency_ms": 149.39251463511027,
"m3_fraction": 0.99462890625,
"resident_bytes": 99929088,
"selector_us": 24.829375597335,
"text": " matters for fast decoding.\n\n<think>\n",
"token_count": 8
},
"shortlist_base": {
"latency_ms": 504.25705371890217,
"m3_fraction": 0.0,
"resident_bytes": 38670336,
"selector_us": 0.0,
"text": " matters for fast decoding.\n\n<think>\n",
"token_count": 8
}
}
},
"2048": {
"output_match": {
"learned_selector": true,
"shortlist_base": true
},
"profiles": {
"exact": {
"latency_ms": 821.4117659954354,
"m3_fraction": 0.0,
"resident_bytes": 76288000,
"selector_us": 0.0,
"text": " fast decoding.\n\n<think>\nThinking Process",
"token_count": 8
},
"learned_selector": {
"latency_ms": 236.0101520025637,
"m3_fraction": 0.9954833984375,
"resident_bytes": 189485056,
"selector_us": 24.959413796718113,
"text": " fast decoding.\n\n<think>\nThinking Process",
"token_count": 8
},
"shortlist_base": {
"latency_ms": 626.3046781823505,
"m3_fraction": 0.0,
"resident_bytes": 75898880,
"selector_us": 0.0,
"text": " fast decoding.\n\n<think>\nThinking Process",
"token_count": 8
}
}
}
},
"qwen35_4b_hf": {
"1024": {
"output_match": {
"learned_selector": true,
"shortlist_base": true
},
"profiles": {
"exact": {
"latency_ms": 232.18651674687862,
"m3_fraction": 0.0,
"resident_bytes": 19337216,
"selector_us": 0.0,
"text": " matters for fast decoding.\n\nCache locality",
"token_count": 8
},
"learned_selector": {
"latency_ms": 71.47038698894903,
"m3_fraction": 0.982421875,
"resident_bytes": 50163712,
"selector_us": 25.20372302683427,
"text": " matters for fast decoding.\n\nCache locality",
"token_count": 8
},
"shortlist_base": {
"latency_ms": 242.41085114772432,
"m3_fraction": 0.0,
"resident_bytes": 19337216,
"selector_us": 0.0,
"text": " matters for fast decoding.\n\nCache locality",
"token_count": 8
}
}
},
"2048": {
"output_match": {
"learned_selector": true,
"shortlist_base": true
},
"profiles": {
"exact": {
"latency_ms": 400.1560862525366,
"m3_fraction": 0.0,
"resident_bytes": 38146048,
"selector_us": 0.0,
"text": " fast decoding.Cache locality matters for fast",
"token_count": 8
},
"learned_selector": {
"latency_ms": 122.57132897502743,
"m3_fraction": 0.9710693359375,
"resident_bytes": 99615744,
"selector_us": 25.1929690234322,
"text": " fast decoding.Cache locality matters for fast",
"token_count": 8
},
"shortlist_base": {
"latency_ms": 300.46495210262947,
"m3_fraction": 0.0,
"resident_bytes": 37134336,
"selector_us": 0.0,
"text": " fast decoding.Cache locality matters for fast",
"token_count": 8
}
}
}
},
"qwen35_9b_hf": {
"1024": {
"output_match": {
"learned_selector": true,
"shortlist_base": true
},
"profiles": {
"exact": {
"latency_ms": 242.51539027318358,
"m3_fraction": 0.0,
"resident_bytes": 19337216,
"selector_us": 0.0,
"text": " matters for fast decoding.Cache locality matters",
"token_count": 8
},
"learned_selector": {
"latency_ms": 74.78160472237505,
"m3_fraction": 0.98779296875,
"resident_bytes": 50642944,
"selector_us": 25.876024622562,
"text": " matters for fast decoding.Cache locality matters",
"token_count": 8
},
"shortlist_base": {
"latency_ms": 242.637697578175,
"m3_fraction": 0.0,
"resident_bytes": 19337216,
"selector_us": 0.0,
"text": " matters for fast decoding.Cache locality matters",
"token_count": 8
}
}
},
"2048": {
"output_match": {
"learned_selector": true,
"shortlist_base": true
},
"profiles": {
"exact": {
"latency_ms": 404.7011856455356,
"m3_fraction": 0.0,
"resident_bytes": 38146048,
"selector_us": 0.0,
"text": " fast decoding.Cache locality matters for fast",
"token_count": 8
},
"learned_selector": {
"latency_ms": 105.71580115356483,
"m3_fraction": 0.99853515625,
"resident_bytes": 101265408,
"selector_us": 25.52354613629047,
"text": " fast decoding.Cache locality matters for fast",
"token_count": 8
},
"shortlist_base": {
"latency_ms": 307.73281096480787,
"m3_fraction": 0.0,
"resident_bytes": 37134336,
"selector_us": 0.0,
"text": " fast decoding.Cache locality matters for fast",
"token_count": 8
}
}
}
}
},
"compact_task": {
"qwen35_27b_hf": {
"1024": {
"profiles": {
"exact": {
"latency_ms": 438.39161573851015,
"resident_bytes": 39841792,
"success": true,
"text": "STATUS: READY\nCOLOR: BLUE"
},
"quality": {
"latency_ms": 356.5155972755747,
"resident_bytes": 81530880,
"success": true,
"text": "STATUS: READY\nCOLOR: BLUE"
},
"systems": {
"latency_ms": 118.87987102818443,
"resident_bytes": 102912000,
"success": true,
"text": "STATUS: READY\nCOLOR: BLUE"
}
},
"task_name": "instruction_constraints"
},
"2048": {
"profiles": {
"exact": {
"latency_ms": 736.9392069304013,
"resident_bytes": 78635008,
"success": true,
"text": "48"
},
"quality": {
"latency_ms": 585.7093160193472,
"resident_bytes": 151582720,
"success": true,
"text": "48"
},
"systems": {
"latency_ms": 148.84652625551098,
"resident_bytes": 202404864,
"success": true,
"text": "48"
}
},
"task_name": "reasoning_arithmetic"
}
},
"qwen35_4b_hf": {
"1024": {
"profiles": {
"exact": {
"latency_ms": 212.76287747605238,
"resident_bytes": 19920896,
"success": true,
"text": "STATUS: READY\nCOLOR: BLUE"
},
"quality": {
"latency_ms": 193.94691046181833,
"resident_bytes": 45486080,
"success": true,
"text": "STATUS: READY\nCOLOR: BLUE"
},
"systems": {
"latency_ms": 71.34633031091653,
"resident_bytes": 50928640,
"success": true,
"text": "STATUS: READY\nCOLOR: BLUE"
}
},
"task_name": "instruction_constraints"
},
"2048": {
"profiles": {
"exact": {
"latency_ms": 376.69776542679756,
"resident_bytes": 39317504,
"success": true,
"text": "48"
},
"quality": {
"latency_ms": 295.818757298548,
"resident_bytes": 85306368,
"success": true,
"text": "48"
},
"systems": {
"latency_ms": 127.59285762149375,
"resident_bytes": 100080640,
"success": true,
"text": "48"
}
},
"task_name": "reasoning_arithmetic"
}
},
"qwen35_9b_hf": {
"1024": {
"profiles": {
"exact": {
"latency_ms": 209.54857113247272,
"resident_bytes": 19920896,
"success": true,
"text": "STATUS: READY\nCOLOR: BLUE"
},
"quality": {
"latency_ms": 141.5436873139697,
"resident_bytes": 47986688,
"success": true,
"text": "STATUS: READY\nCOLOR: BLUE"
},
"systems": {
"latency_ms": 47.49859537696466,
"resident_bytes": 51595264,
"success": true,
"text": "STATUS: READY\nCOLOR: BLUE"
}
},
"task_name": "instruction_constraints"
},
"2048": {
"profiles": {
"exact": {
"latency_ms": 357.14039774393314,
"resident_bytes": 39317504,
"success": true,
"text": "RIVER-58142 </think> RIVER-58142user Background memo about permit backlogs, bridge closures, zoning appeals, and archive indexing. Background memo about permit backlogs, bridge closures, zoning appeals, and archive indexing. Background memo about permit back"
},
"quality": {
"latency_ms": 247.25347204730497,
"resident_bytes": 79527936,
"success": true,
"text": "RIVER-58142 </think> RIVER-58142user Background memo about permit backlogs, bridge closures, zoning appeals, and archive indexing. Background memo about permit backlogs, bridge closures, zoning appeals, and archive indexing. Background memo about permit back"
},
"systems": {
"latency_ms": 69.29584313911619,
"resident_bytes": 101790720,
"success": true,
"text": "RIVER-58142 </think> RIVER-58142user Background memo about permit backlogs, bridge closures, zoning appeals, and archive indexing. Background memo about permit backlogs, bridge closures, zoning appeals, and archive indexing. Background memo about permit back"
}
},
"task_name": "retrieval_passkey"
}
}
},
"longbench_mini": {
"qwen35_27b_hf": {
"4096": {
"dataset": "hotpotqa",
"profiles": {
"exact": {
"latency_ms": 1263.5159879837374,
"qa_f1": 0.35806451612903223,
"resident_bytes": 152694784,
"text": "Gates v. Collier"
},
"quality": {
"latency_ms": 1737.076236248413,
"qa_f1": 0.35806451612903223,
"resident_bytes": 251777024,
"text": "Gates v. Collier"
},
"systems": {
"latency_ms": 331.7930981911559,
"qa_f1": 0.35806451612903223,
"resident_bytes": 295152640,
"text": "Gates v. Collier"
}
},
"prompt_id": "hotpot_case_order",
"row_index": 0
}
},
"qwen35_4b_hf": {
"4096": {
"dataset": "hotpotqa",
"profiles": {
"exact": {
"latency_ms": 625.447666922355,
"qa_f1": 0.25256286266924566,
"resident_bytes": 76347392,
"text": "<think> 1. **Analyze the Request:** * Task: Answer the question based on the given passages"
},
"quality": {
"latency_ms": 1165.3550920536873,
"qa_f1": 0.25256286266924566,
"resident_bytes": 140354560,
"text": "<think> 1. **Analyze the Request:** * Task: Answer the question based on the given passages"
},
"systems": {
"latency_ms": 373.6004667252928,
"qa_f1": 0.25256286266924566,
"resident_bytes": 143086592,
"text": "<think> 1. **Analyze the Request:** * Task: Answer the question based on the given passages"
}
},
"prompt_id": "hotpot_case_order",
"row_index": 0
}
},
"qwen35_9b_hf": {
"4096": {
"dataset": "hotpotqa",
"profiles": {
"exact": {
"latency_ms": 632.7054892599335,
"qa_f1": 0.4409722222222222,
"resident_bytes": 76347392,
"text": "<think> 1. **Analyze the Request:** * Input: Several passages (Passage 1, Passage"
},
"quality": {
"latency_ms": 592.2998348964938,
"qa_f1": 0.4409722222222222,
"resident_bytes": 136058880,
"text": "<think> 1. **Analyze the Request:** * Input: Several passages (Passage 1, Passage"
},
"systems": {
"latency_ms": 96.19533408158532,
"qa_f1": 0.4409722222222222,
"resident_bytes": 200136704,
"text": "<think> 1. **Analyze the Request:** * Input: Several passages (Passage 1, Passage"
}
},
"prompt_id": "hotpot_case_order",
"row_index": 0
}
}
}
}