Spaces:
Paused
Paused
| { | |
| "qwen35_27b_hf": { | |
| "1024": { | |
| "output_match": { | |
| "learned_selector": true, | |
| "shortlist_base": true | |
| }, | |
| "profiles": { | |
| "exact": { | |
| "latency_ms": 485.96312350127846, | |
| "m3_fraction": 0.0, | |
| "resident_bytes": 38670336, | |
| "selector_us": 0.0, | |
| "source_records": 7, | |
| "text": " matters for fast decoding.\n\n<think>\n", | |
| "token_count": 8 | |
| }, | |
| "learned_selector": { | |
| "latency_ms": 149.39251463511027, | |
| "m3_fraction": 0.99462890625, | |
| "resident_bytes": 99929088, | |
| "selector_us": 24.829375597335, | |
| "source_records": 7, | |
| "text": " matters for fast decoding.\n\n<think>\n", | |
| "token_count": 8 | |
| }, | |
| "shortlist_base": { | |
| "latency_ms": 504.25705371890217, | |
| "m3_fraction": 0.0, | |
| "resident_bytes": 38670336, | |
| "selector_us": 0.0, | |
| "source_records": 7, | |
| "text": " matters for fast decoding.\n\n<think>\n", | |
| "token_count": 8 | |
| } | |
| } | |
| }, | |
| "2048": { | |
| "output_match": { | |
| "learned_selector": true, | |
| "shortlist_base": true | |
| }, | |
| "profiles": { | |
| "exact": { | |
| "latency_ms": 821.4117659954354, | |
| "m3_fraction": 0.0, | |
| "resident_bytes": 76288000, | |
| "selector_us": 0.0, | |
| "source_records": 7, | |
| "text": " fast decoding.\n\n<think>\nThinking Process", | |
| "token_count": 8 | |
| }, | |
| "learned_selector": { | |
| "latency_ms": 236.0101520025637, | |
| "m3_fraction": 0.9954833984375, | |
| "resident_bytes": 189485056, | |
| "selector_us": 24.959413796718113, | |
| "source_records": 7, | |
| "text": " fast decoding.\n\n<think>\nThinking Process", | |
| "token_count": 8 | |
| }, | |
| "shortlist_base": { | |
| "latency_ms": 626.3046781823505, | |
| "m3_fraction": 0.0, | |
| "resident_bytes": 75898880, | |
| "selector_us": 0.0, | |
| "source_records": 7, | |
| "text": " fast decoding.\n\n<think>\nThinking Process", | |
| "token_count": 8 | |
| } | |
| } | |
| } | |
| }, | |
| "qwen35_4b_hf": { | |
| "1024": { | |
| "output_match": { | |
| "learned_selector": true, | |
| "shortlist_base": true | |
| }, | |
| "profiles": { | |
| "exact": { | |
| "latency_ms": 232.18651674687862, | |
| "m3_fraction": 0.0, | |
| "resident_bytes": 19337216, | |
| "selector_us": 0.0, | |
| "source_records": 7, | |
| "text": " matters for fast decoding.\n\nCache locality", | |
| "token_count": 8 | |
| }, | |
| "learned_selector": { | |
| "latency_ms": 71.47038698894903, | |
| "m3_fraction": 0.982421875, | |
| "resident_bytes": 50163712, | |
| "selector_us": 25.20372302683427, | |
| "source_records": 7, | |
| "text": " matters for fast decoding.\n\nCache locality", | |
| "token_count": 8 | |
| }, | |
| "shortlist_base": { | |
| "latency_ms": 242.41085114772432, | |
| "m3_fraction": 0.0, | |
| "resident_bytes": 19337216, | |
| "selector_us": 0.0, | |
| "source_records": 7, | |
| "text": " matters for fast decoding.\n\nCache locality", | |
| "token_count": 8 | |
| } | |
| } | |
| }, | |
| "2048": { | |
| "output_match": { | |
| "learned_selector": true, | |
| "shortlist_base": true | |
| }, | |
| "profiles": { | |
| "exact": { | |
| "latency_ms": 400.1560862525366, | |
| "m3_fraction": 0.0, | |
| "resident_bytes": 38146048, | |
| "selector_us": 0.0, | |
| "source_records": 7, | |
| "text": " fast decoding.Cache locality matters for fast", | |
| "token_count": 8 | |
| }, | |
| "learned_selector": { | |
| "latency_ms": 122.57132897502743, | |
| "m3_fraction": 0.9710693359375, | |
| "resident_bytes": 99615744, | |
| "selector_us": 25.1929690234322, | |
| "source_records": 7, | |
| "text": " fast decoding.Cache locality matters for fast", | |
| "token_count": 8 | |
| }, | |
| "shortlist_base": { | |
| "latency_ms": 300.46495210262947, | |
| "m3_fraction": 0.0, | |
| "resident_bytes": 37134336, | |
| "selector_us": 0.0, | |
| "source_records": 7, | |
| "text": " fast decoding.Cache locality matters for fast", | |
| "token_count": 8 | |
| } | |
| } | |
| } | |
| }, | |
| "qwen35_9b_hf": { | |
| "1024": { | |
| "output_match": { | |
| "learned_selector": true, | |
| "shortlist_base": true | |
| }, | |
| "profiles": { | |
| "exact": { | |
| "latency_ms": 242.51539027318358, | |
| "m3_fraction": 0.0, | |
| "resident_bytes": 19337216, | |
| "selector_us": 0.0, | |
| "source_records": 7, | |
| "text": " matters for fast decoding.Cache locality matters", | |
| "token_count": 8 | |
| }, | |
| "learned_selector": { | |
| "latency_ms": 74.78160472237505, | |
| "m3_fraction": 0.98779296875, | |
| "resident_bytes": 50642944, | |
| "selector_us": 25.876024622562, | |
| "source_records": 7, | |
| "text": " matters for fast decoding.Cache locality matters", | |
| "token_count": 8 | |
| }, | |
| "shortlist_base": { | |
| "latency_ms": 242.637697578175, | |
| "m3_fraction": 0.0, | |
| "resident_bytes": 19337216, | |
| "selector_us": 0.0, | |
| "source_records": 7, | |
| "text": " matters for fast decoding.Cache locality matters", | |
| "token_count": 8 | |
| } | |
| } | |
| }, | |
| "2048": { | |
| "output_match": { | |
| "learned_selector": true, | |
| "shortlist_base": true | |
| }, | |
| "profiles": { | |
| "exact": { | |
| "latency_ms": 404.7011856455356, | |
| "m3_fraction": 0.0, | |
| "resident_bytes": 38146048, | |
| "selector_us": 0.0, | |
| "source_records": 7, | |
| "text": " fast decoding.Cache locality matters for fast", | |
| "token_count": 8 | |
| }, | |
| "learned_selector": { | |
| "latency_ms": 105.71580115356483, | |
| "m3_fraction": 0.99853515625, | |
| "resident_bytes": 101265408, | |
| "selector_us": 25.52354613629047, | |
| "source_records": 7, | |
| "text": " fast decoding.Cache locality matters for fast", | |
| "token_count": 8 | |
| }, | |
| "shortlist_base": { | |
| "latency_ms": 307.73281096480787, | |
| "m3_fraction": 0.0, | |
| "resident_bytes": 37134336, | |
| "selector_us": 0.0, | |
| "source_records": 7, | |
| "text": " fast decoding.Cache locality matters for fast", | |
| "token_count": 8 | |
| } | |
| } | |
| } | |
| } | |
| } |