Spaces:
Paused
Paused
| { | |
| "backend_truth": { | |
| "qwen35_27b_hf": { | |
| "1024": { | |
| "output_match": { | |
| "learned_selector": true, | |
| "shortlist_base": true | |
| }, | |
| "profiles": { | |
| "exact": { | |
| "latency_ms": 485.96312350127846, | |
| "m3_fraction": 0.0, | |
| "resident_bytes": 38670336, | |
| "selector_us": 0.0, | |
| "text": " matters for fast decoding.\n\n<think>\n", | |
| "token_count": 8 | |
| }, | |
| "learned_selector": { | |
| "latency_ms": 149.39251463511027, | |
| "m3_fraction": 0.99462890625, | |
| "resident_bytes": 99929088, | |
| "selector_us": 24.829375597335, | |
| "text": " matters for fast decoding.\n\n<think>\n", | |
| "token_count": 8 | |
| }, | |
| "shortlist_base": { | |
| "latency_ms": 504.25705371890217, | |
| "m3_fraction": 0.0, | |
| "resident_bytes": 38670336, | |
| "selector_us": 0.0, | |
| "text": " matters for fast decoding.\n\n<think>\n", | |
| "token_count": 8 | |
| } | |
| } | |
| }, | |
| "2048": { | |
| "output_match": { | |
| "learned_selector": true, | |
| "shortlist_base": true | |
| }, | |
| "profiles": { | |
| "exact": { | |
| "latency_ms": 821.4117659954354, | |
| "m3_fraction": 0.0, | |
| "resident_bytes": 76288000, | |
| "selector_us": 0.0, | |
| "text": " fast decoding.\n\n<think>\nThinking Process", | |
| "token_count": 8 | |
| }, | |
| "learned_selector": { | |
| "latency_ms": 236.0101520025637, | |
| "m3_fraction": 0.9954833984375, | |
| "resident_bytes": 189485056, | |
| "selector_us": 24.959413796718113, | |
| "text": " fast decoding.\n\n<think>\nThinking Process", | |
| "token_count": 8 | |
| }, | |
| "shortlist_base": { | |
| "latency_ms": 626.3046781823505, | |
| "m3_fraction": 0.0, | |
| "resident_bytes": 75898880, | |
| "selector_us": 0.0, | |
| "text": " fast decoding.\n\n<think>\nThinking Process", | |
| "token_count": 8 | |
| } | |
| } | |
| } | |
| }, | |
| "qwen35_4b_hf": { | |
| "1024": { | |
| "output_match": { | |
| "learned_selector": true, | |
| "shortlist_base": true | |
| }, | |
| "profiles": { | |
| "exact": { | |
| "latency_ms": 232.18651674687862, | |
| "m3_fraction": 0.0, | |
| "resident_bytes": 19337216, | |
| "selector_us": 0.0, | |
| "text": " matters for fast decoding.\n\nCache locality", | |
| "token_count": 8 | |
| }, | |
| "learned_selector": { | |
| "latency_ms": 71.47038698894903, | |
| "m3_fraction": 0.982421875, | |
| "resident_bytes": 50163712, | |
| "selector_us": 25.20372302683427, | |
| "text": " matters for fast decoding.\n\nCache locality", | |
| "token_count": 8 | |
| }, | |
| "shortlist_base": { | |
| "latency_ms": 242.41085114772432, | |
| "m3_fraction": 0.0, | |
| "resident_bytes": 19337216, | |
| "selector_us": 0.0, | |
| "text": " matters for fast decoding.\n\nCache locality", | |
| "token_count": 8 | |
| } | |
| } | |
| }, | |
| "2048": { | |
| "output_match": { | |
| "learned_selector": true, | |
| "shortlist_base": true | |
| }, | |
| "profiles": { | |
| "exact": { | |
| "latency_ms": 400.1560862525366, | |
| "m3_fraction": 0.0, | |
| "resident_bytes": 38146048, | |
| "selector_us": 0.0, | |
| "text": " fast decoding.Cache locality matters for fast", | |
| "token_count": 8 | |
| }, | |
| "learned_selector": { | |
| "latency_ms": 122.57132897502743, | |
| "m3_fraction": 0.9710693359375, | |
| "resident_bytes": 99615744, | |
| "selector_us": 25.1929690234322, | |
| "text": " fast decoding.Cache locality matters for fast", | |
| "token_count": 8 | |
| }, | |
| "shortlist_base": { | |
| "latency_ms": 300.46495210262947, | |
| "m3_fraction": 0.0, | |
| "resident_bytes": 37134336, | |
| "selector_us": 0.0, | |
| "text": " fast decoding.Cache locality matters for fast", | |
| "token_count": 8 | |
| } | |
| } | |
| } | |
| }, | |
| "qwen35_9b_hf": { | |
| "1024": { | |
| "output_match": { | |
| "learned_selector": true, | |
| "shortlist_base": true | |
| }, | |
| "profiles": { | |
| "exact": { | |
| "latency_ms": 242.51539027318358, | |
| "m3_fraction": 0.0, | |
| "resident_bytes": 19337216, | |
| "selector_us": 0.0, | |
| "text": " matters for fast decoding.Cache locality matters", | |
| "token_count": 8 | |
| }, | |
| "learned_selector": { | |
| "latency_ms": 74.78160472237505, | |
| "m3_fraction": 0.98779296875, | |
| "resident_bytes": 50642944, | |
| "selector_us": 25.876024622562, | |
| "text": " matters for fast decoding.Cache locality matters", | |
| "token_count": 8 | |
| }, | |
| "shortlist_base": { | |
| "latency_ms": 242.637697578175, | |
| "m3_fraction": 0.0, | |
| "resident_bytes": 19337216, | |
| "selector_us": 0.0, | |
| "text": " matters for fast decoding.Cache locality matters", | |
| "token_count": 8 | |
| } | |
| } | |
| }, | |
| "2048": { | |
| "output_match": { | |
| "learned_selector": true, | |
| "shortlist_base": true | |
| }, | |
| "profiles": { | |
| "exact": { | |
| "latency_ms": 404.7011856455356, | |
| "m3_fraction": 0.0, | |
| "resident_bytes": 38146048, | |
| "selector_us": 0.0, | |
| "text": " fast decoding.Cache locality matters for fast", | |
| "token_count": 8 | |
| }, | |
| "learned_selector": { | |
| "latency_ms": 105.71580115356483, | |
| "m3_fraction": 0.99853515625, | |
| "resident_bytes": 101265408, | |
| "selector_us": 25.52354613629047, | |
| "text": " fast decoding.Cache locality matters for fast", | |
| "token_count": 8 | |
| }, | |
| "shortlist_base": { | |
| "latency_ms": 307.73281096480787, | |
| "m3_fraction": 0.0, | |
| "resident_bytes": 37134336, | |
| "selector_us": 0.0, | |
| "text": " fast decoding.Cache locality matters for fast", | |
| "token_count": 8 | |
| } | |
| } | |
| } | |
| } | |
| }, | |
| "compact_task": { | |
| "qwen35_27b_hf": { | |
| "1024": { | |
| "profiles": { | |
| "exact": { | |
| "latency_ms": 438.39161573851015, | |
| "resident_bytes": 39841792, | |
| "success": true, | |
| "text": "STATUS: READY\nCOLOR: BLUE" | |
| }, | |
| "quality": { | |
| "latency_ms": 356.5155972755747, | |
| "resident_bytes": 81530880, | |
| "success": true, | |
| "text": "STATUS: READY\nCOLOR: BLUE" | |
| }, | |
| "systems": { | |
| "latency_ms": 118.87987102818443, | |
| "resident_bytes": 102912000, | |
| "success": true, | |
| "text": "STATUS: READY\nCOLOR: BLUE" | |
| } | |
| }, | |
| "task_name": "instruction_constraints" | |
| }, | |
| "2048": { | |
| "profiles": { | |
| "exact": { | |
| "latency_ms": 736.9392069304013, | |
| "resident_bytes": 78635008, | |
| "success": true, | |
| "text": "48" | |
| }, | |
| "quality": { | |
| "latency_ms": 585.7093160193472, | |
| "resident_bytes": 151582720, | |
| "success": true, | |
| "text": "48" | |
| }, | |
| "systems": { | |
| "latency_ms": 148.84652625551098, | |
| "resident_bytes": 202404864, | |
| "success": true, | |
| "text": "48" | |
| } | |
| }, | |
| "task_name": "reasoning_arithmetic" | |
| } | |
| }, | |
| "qwen35_4b_hf": { | |
| "1024": { | |
| "profiles": { | |
| "exact": { | |
| "latency_ms": 212.76287747605238, | |
| "resident_bytes": 19920896, | |
| "success": true, | |
| "text": "STATUS: READY\nCOLOR: BLUE" | |
| }, | |
| "quality": { | |
| "latency_ms": 193.94691046181833, | |
| "resident_bytes": 45486080, | |
| "success": true, | |
| "text": "STATUS: READY\nCOLOR: BLUE" | |
| }, | |
| "systems": { | |
| "latency_ms": 71.34633031091653, | |
| "resident_bytes": 50928640, | |
| "success": true, | |
| "text": "STATUS: READY\nCOLOR: BLUE" | |
| } | |
| }, | |
| "task_name": "instruction_constraints" | |
| }, | |
| "2048": { | |
| "profiles": { | |
| "exact": { | |
| "latency_ms": 376.69776542679756, | |
| "resident_bytes": 39317504, | |
| "success": true, | |
| "text": "48" | |
| }, | |
| "quality": { | |
| "latency_ms": 295.818757298548, | |
| "resident_bytes": 85306368, | |
| "success": true, | |
| "text": "48" | |
| }, | |
| "systems": { | |
| "latency_ms": 127.59285762149375, | |
| "resident_bytes": 100080640, | |
| "success": true, | |
| "text": "48" | |
| } | |
| }, | |
| "task_name": "reasoning_arithmetic" | |
| } | |
| }, | |
| "qwen35_9b_hf": { | |
| "1024": { | |
| "profiles": { | |
| "exact": { | |
| "latency_ms": 209.54857113247272, | |
| "resident_bytes": 19920896, | |
| "success": true, | |
| "text": "STATUS: READY\nCOLOR: BLUE" | |
| }, | |
| "quality": { | |
| "latency_ms": 141.5436873139697, | |
| "resident_bytes": 47986688, | |
| "success": true, | |
| "text": "STATUS: READY\nCOLOR: BLUE" | |
| }, | |
| "systems": { | |
| "latency_ms": 47.49859537696466, | |
| "resident_bytes": 51595264, | |
| "success": true, | |
| "text": "STATUS: READY\nCOLOR: BLUE" | |
| } | |
| }, | |
| "task_name": "instruction_constraints" | |
| }, | |
| "2048": { | |
| "profiles": { | |
| "exact": { | |
| "latency_ms": 357.14039774393314, | |
| "resident_bytes": 39317504, | |
| "success": true, | |
| "text": "RIVER-58142 </think> RIVER-58142user Background memo about permit backlogs, bridge closures, zoning appeals, and archive indexing. Background memo about permit backlogs, bridge closures, zoning appeals, and archive indexing. Background memo about permit back" | |
| }, | |
| "quality": { | |
| "latency_ms": 247.25347204730497, | |
| "resident_bytes": 79527936, | |
| "success": true, | |
| "text": "RIVER-58142 </think> RIVER-58142user Background memo about permit backlogs, bridge closures, zoning appeals, and archive indexing. Background memo about permit backlogs, bridge closures, zoning appeals, and archive indexing. Background memo about permit back" | |
| }, | |
| "systems": { | |
| "latency_ms": 69.29584313911619, | |
| "resident_bytes": 101790720, | |
| "success": true, | |
| "text": "RIVER-58142 </think> RIVER-58142user Background memo about permit backlogs, bridge closures, zoning appeals, and archive indexing. Background memo about permit backlogs, bridge closures, zoning appeals, and archive indexing. Background memo about permit back" | |
| } | |
| }, | |
| "task_name": "retrieval_passkey" | |
| } | |
| } | |
| }, | |
| "longbench_mini": { | |
| "qwen35_27b_hf": { | |
| "4096": { | |
| "dataset": "hotpotqa", | |
| "profiles": { | |
| "exact": { | |
| "latency_ms": 1263.5159879837374, | |
| "qa_f1": 0.35806451612903223, | |
| "resident_bytes": 152694784, | |
| "text": "Gates v. Collier" | |
| }, | |
| "quality": { | |
| "latency_ms": 1737.076236248413, | |
| "qa_f1": 0.35806451612903223, | |
| "resident_bytes": 251777024, | |
| "text": "Gates v. Collier" | |
| }, | |
| "systems": { | |
| "latency_ms": 331.7930981911559, | |
| "qa_f1": 0.35806451612903223, | |
| "resident_bytes": 295152640, | |
| "text": "Gates v. Collier" | |
| } | |
| }, | |
| "prompt_id": "hotpot_case_order", | |
| "row_index": 0 | |
| } | |
| }, | |
| "qwen35_4b_hf": { | |
| "4096": { | |
| "dataset": "hotpotqa", | |
| "profiles": { | |
| "exact": { | |
| "latency_ms": 625.447666922355, | |
| "qa_f1": 0.25256286266924566, | |
| "resident_bytes": 76347392, | |
| "text": "<think> 1. **Analyze the Request:** * Task: Answer the question based on the given passages" | |
| }, | |
| "quality": { | |
| "latency_ms": 1165.3550920536873, | |
| "qa_f1": 0.25256286266924566, | |
| "resident_bytes": 140354560, | |
| "text": "<think> 1. **Analyze the Request:** * Task: Answer the question based on the given passages" | |
| }, | |
| "systems": { | |
| "latency_ms": 373.6004667252928, | |
| "qa_f1": 0.25256286266924566, | |
| "resident_bytes": 143086592, | |
| "text": "<think> 1. **Analyze the Request:** * Task: Answer the question based on the given passages" | |
| } | |
| }, | |
| "prompt_id": "hotpot_case_order", | |
| "row_index": 0 | |
| } | |
| }, | |
| "qwen35_9b_hf": { | |
| "4096": { | |
| "dataset": "hotpotqa", | |
| "profiles": { | |
| "exact": { | |
| "latency_ms": 632.7054892599335, | |
| "qa_f1": 0.4409722222222222, | |
| "resident_bytes": 76347392, | |
| "text": "<think> 1. **Analyze the Request:** * Input: Several passages (Passage 1, Passage" | |
| }, | |
| "quality": { | |
| "latency_ms": 592.2998348964938, | |
| "qa_f1": 0.4409722222222222, | |
| "resident_bytes": 136058880, | |
| "text": "<think> 1. **Analyze the Request:** * Input: Several passages (Passage 1, Passage" | |
| }, | |
| "systems": { | |
| "latency_ms": 96.19533408158532, | |
| "qa_f1": 0.4409722222222222, | |
| "resident_bytes": 200136704, | |
| "text": "<think> 1. **Analyze the Request:** * Input: Several passages (Passage 1, Passage" | |
| } | |
| }, | |
| "prompt_id": "hotpot_case_order", | |
| "row_index": 0 | |
| } | |
| } | |
| } | |
| } | |