{ "qwen35_27b_hf": { "1024": { "output_match": { "learned_selector": true, "shortlist_base": true }, "profiles": { "exact": { "latency_ms": 485.96312350127846, "m3_fraction": 0.0, "resident_bytes": 38670336, "selector_us": 0.0, "source_records": 7, "text": " matters for fast decoding.\n\n\n", "token_count": 8 }, "learned_selector": { "latency_ms": 149.39251463511027, "m3_fraction": 0.99462890625, "resident_bytes": 99929088, "selector_us": 24.829375597335, "source_records": 7, "text": " matters for fast decoding.\n\n\n", "token_count": 8 }, "shortlist_base": { "latency_ms": 504.25705371890217, "m3_fraction": 0.0, "resident_bytes": 38670336, "selector_us": 0.0, "source_records": 7, "text": " matters for fast decoding.\n\n\n", "token_count": 8 } } }, "2048": { "output_match": { "learned_selector": true, "shortlist_base": true }, "profiles": { "exact": { "latency_ms": 821.4117659954354, "m3_fraction": 0.0, "resident_bytes": 76288000, "selector_us": 0.0, "source_records": 7, "text": " fast decoding.\n\n\nThinking Process", "token_count": 8 }, "learned_selector": { "latency_ms": 236.0101520025637, "m3_fraction": 0.9954833984375, "resident_bytes": 189485056, "selector_us": 24.959413796718113, "source_records": 7, "text": " fast decoding.\n\n\nThinking Process", "token_count": 8 }, "shortlist_base": { "latency_ms": 626.3046781823505, "m3_fraction": 0.0, "resident_bytes": 75898880, "selector_us": 0.0, "source_records": 7, "text": " fast decoding.\n\n\nThinking Process", "token_count": 8 } } } }, "qwen35_4b_hf": { "1024": { "output_match": { "learned_selector": true, "shortlist_base": true }, "profiles": { "exact": { "latency_ms": 232.18651674687862, "m3_fraction": 0.0, "resident_bytes": 19337216, "selector_us": 0.0, "source_records": 7, "text": " matters for fast decoding.\n\nCache locality", "token_count": 8 }, "learned_selector": { "latency_ms": 71.47038698894903, "m3_fraction": 0.982421875, "resident_bytes": 50163712, "selector_us": 25.20372302683427, "source_records": 7, "text": " matters for fast decoding.\n\nCache locality", "token_count": 8 }, "shortlist_base": { "latency_ms": 242.41085114772432, "m3_fraction": 0.0, "resident_bytes": 19337216, "selector_us": 0.0, "source_records": 7, "text": " matters for fast decoding.\n\nCache locality", "token_count": 8 } } }, "2048": { "output_match": { "learned_selector": true, "shortlist_base": true }, "profiles": { "exact": { "latency_ms": 400.1560862525366, "m3_fraction": 0.0, "resident_bytes": 38146048, "selector_us": 0.0, "source_records": 7, "text": " fast decoding.Cache locality matters for fast", "token_count": 8 }, "learned_selector": { "latency_ms": 122.57132897502743, "m3_fraction": 0.9710693359375, "resident_bytes": 99615744, "selector_us": 25.1929690234322, "source_records": 7, "text": " fast decoding.Cache locality matters for fast", "token_count": 8 }, "shortlist_base": { "latency_ms": 300.46495210262947, "m3_fraction": 0.0, "resident_bytes": 37134336, "selector_us": 0.0, "source_records": 7, "text": " fast decoding.Cache locality matters for fast", "token_count": 8 } } } }, "qwen35_9b_hf": { "1024": { "output_match": { "learned_selector": true, "shortlist_base": true }, "profiles": { "exact": { "latency_ms": 242.51539027318358, "m3_fraction": 0.0, "resident_bytes": 19337216, "selector_us": 0.0, "source_records": 7, "text": " matters for fast decoding.Cache locality matters", "token_count": 8 }, "learned_selector": { "latency_ms": 74.78160472237505, "m3_fraction": 0.98779296875, "resident_bytes": 50642944, "selector_us": 25.876024622562, "source_records": 7, "text": " matters for fast decoding.Cache locality matters", "token_count": 8 }, "shortlist_base": { "latency_ms": 242.637697578175, "m3_fraction": 0.0, "resident_bytes": 19337216, "selector_us": 0.0, "source_records": 7, "text": " matters for fast decoding.Cache locality matters", "token_count": 8 } } }, "2048": { "output_match": { "learned_selector": true, "shortlist_base": true }, "profiles": { "exact": { "latency_ms": 404.7011856455356, "m3_fraction": 0.0, "resident_bytes": 38146048, "selector_us": 0.0, "source_records": 7, "text": " fast decoding.Cache locality matters for fast", "token_count": 8 }, "learned_selector": { "latency_ms": 105.71580115356483, "m3_fraction": 0.99853515625, "resident_bytes": 101265408, "selector_us": 25.52354613629047, "source_records": 7, "text": " fast decoding.Cache locality matters for fast", "token_count": 8 }, "shortlist_base": { "latency_ms": 307.73281096480787, "m3_fraction": 0.0, "resident_bytes": 37134336, "selector_us": 0.0, "source_records": 7, "text": " fast decoding.Cache locality matters for fast", "token_count": 8 } } } } }