DotCache-Arena / data /benchmark_bundle /backend_truth_source.json
Deano Calver
Expand live context selection and sync backend source
0d6ced2
Raw
History Blame Contribute Delete
6.55 kB
{
"qwen35_27b_hf": {
"1024": {
"output_match": {
"learned_selector": true,
"shortlist_base": true
},
"profiles": {
"exact": {
"latency_ms": 485.96312350127846,
"m3_fraction": 0.0,
"resident_bytes": 38670336,
"selector_us": 0.0,
"source_records": 7,
"text": " matters for fast decoding.\n\n<think>\n",
"token_count": 8
},
"learned_selector": {
"latency_ms": 149.39251463511027,
"m3_fraction": 0.99462890625,
"resident_bytes": 99929088,
"selector_us": 24.829375597335,
"source_records": 7,
"text": " matters for fast decoding.\n\n<think>\n",
"token_count": 8
},
"shortlist_base": {
"latency_ms": 504.25705371890217,
"m3_fraction": 0.0,
"resident_bytes": 38670336,
"selector_us": 0.0,
"source_records": 7,
"text": " matters for fast decoding.\n\n<think>\n",
"token_count": 8
}
}
},
"2048": {
"output_match": {
"learned_selector": true,
"shortlist_base": true
},
"profiles": {
"exact": {
"latency_ms": 821.4117659954354,
"m3_fraction": 0.0,
"resident_bytes": 76288000,
"selector_us": 0.0,
"source_records": 7,
"text": " fast decoding.\n\n<think>\nThinking Process",
"token_count": 8
},
"learned_selector": {
"latency_ms": 236.0101520025637,
"m3_fraction": 0.9954833984375,
"resident_bytes": 189485056,
"selector_us": 24.959413796718113,
"source_records": 7,
"text": " fast decoding.\n\n<think>\nThinking Process",
"token_count": 8
},
"shortlist_base": {
"latency_ms": 626.3046781823505,
"m3_fraction": 0.0,
"resident_bytes": 75898880,
"selector_us": 0.0,
"source_records": 7,
"text": " fast decoding.\n\n<think>\nThinking Process",
"token_count": 8
}
}
}
},
"qwen35_4b_hf": {
"1024": {
"output_match": {
"learned_selector": true,
"shortlist_base": true
},
"profiles": {
"exact": {
"latency_ms": 232.18651674687862,
"m3_fraction": 0.0,
"resident_bytes": 19337216,
"selector_us": 0.0,
"source_records": 7,
"text": " matters for fast decoding.\n\nCache locality",
"token_count": 8
},
"learned_selector": {
"latency_ms": 71.47038698894903,
"m3_fraction": 0.982421875,
"resident_bytes": 50163712,
"selector_us": 25.20372302683427,
"source_records": 7,
"text": " matters for fast decoding.\n\nCache locality",
"token_count": 8
},
"shortlist_base": {
"latency_ms": 242.41085114772432,
"m3_fraction": 0.0,
"resident_bytes": 19337216,
"selector_us": 0.0,
"source_records": 7,
"text": " matters for fast decoding.\n\nCache locality",
"token_count": 8
}
}
},
"2048": {
"output_match": {
"learned_selector": true,
"shortlist_base": true
},
"profiles": {
"exact": {
"latency_ms": 400.1560862525366,
"m3_fraction": 0.0,
"resident_bytes": 38146048,
"selector_us": 0.0,
"source_records": 7,
"text": " fast decoding.Cache locality matters for fast",
"token_count": 8
},
"learned_selector": {
"latency_ms": 122.57132897502743,
"m3_fraction": 0.9710693359375,
"resident_bytes": 99615744,
"selector_us": 25.1929690234322,
"source_records": 7,
"text": " fast decoding.Cache locality matters for fast",
"token_count": 8
},
"shortlist_base": {
"latency_ms": 300.46495210262947,
"m3_fraction": 0.0,
"resident_bytes": 37134336,
"selector_us": 0.0,
"source_records": 7,
"text": " fast decoding.Cache locality matters for fast",
"token_count": 8
}
}
}
},
"qwen35_9b_hf": {
"1024": {
"output_match": {
"learned_selector": true,
"shortlist_base": true
},
"profiles": {
"exact": {
"latency_ms": 242.51539027318358,
"m3_fraction": 0.0,
"resident_bytes": 19337216,
"selector_us": 0.0,
"source_records": 7,
"text": " matters for fast decoding.Cache locality matters",
"token_count": 8
},
"learned_selector": {
"latency_ms": 74.78160472237505,
"m3_fraction": 0.98779296875,
"resident_bytes": 50642944,
"selector_us": 25.876024622562,
"source_records": 7,
"text": " matters for fast decoding.Cache locality matters",
"token_count": 8
},
"shortlist_base": {
"latency_ms": 242.637697578175,
"m3_fraction": 0.0,
"resident_bytes": 19337216,
"selector_us": 0.0,
"source_records": 7,
"text": " matters for fast decoding.Cache locality matters",
"token_count": 8
}
}
},
"2048": {
"output_match": {
"learned_selector": true,
"shortlist_base": true
},
"profiles": {
"exact": {
"latency_ms": 404.7011856455356,
"m3_fraction": 0.0,
"resident_bytes": 38146048,
"selector_us": 0.0,
"source_records": 7,
"text": " fast decoding.Cache locality matters for fast",
"token_count": 8
},
"learned_selector": {
"latency_ms": 105.71580115356483,
"m3_fraction": 0.99853515625,
"resident_bytes": 101265408,
"selector_us": 25.52354613629047,
"source_records": 7,
"text": " fast decoding.Cache locality matters for fast",
"token_count": 8
},
"shortlist_base": {
"latency_ms": 307.73281096480787,
"m3_fraction": 0.0,
"resident_bytes": 37134336,
"selector_us": 0.0,
"source_records": 7,
"text": " fast decoding.Cache locality matters for fast",
"token_count": 8
}
}
}
}
}