benchmarks_8bit_batch_size20 / benchmark_report.json
AwAppp's picture
Upload TextGenerationReport
123793a verified
{
"prefill": {
"memory": {
"unit": "MB",
"max_ram": 3602.481152,
"max_vram": 13600.03072,
"max_reserved": 13107.2,
"max_allocated": 10874.392064
},
"latency": {
"unit": "s",
"mean": 0.3773231913248697,
"stdev": 0.004032105846019261,
"values": [
0.39744345092773437,
0.38059417724609373,
0.3768924255371094,
0.37622784423828126,
0.37658932495117187,
0.37643670654296874,
0.37635992431640625,
0.3764459533691406,
0.37676953125,
0.3766363830566406,
0.3758919677734375,
0.3761131591796875,
0.3763240966796875,
0.3764326477050781,
0.37676953125,
0.3764981689453125,
0.37646951293945313,
0.3763261413574219,
0.3764234313964844,
0.37589297485351564,
0.3767470092773437,
0.3763292236328125,
0.3761827697753906,
0.3762247619628906,
0.37629031372070315,
0.37621145629882813,
0.3762032775878906
]
},
"throughput": {
"unit": "tokens/s",
"value": 848.0793318756936
},
"energy": null,
"efficiency": null
},
"decode": {
"memory": {
"unit": "MB",
"max_ram": 3622.457344,
"max_vram": 13816.037376,
"max_reserved": 13323.206656,
"max_allocated": 10993.732608
},
"latency": {
"unit": "s",
"mean": 28.168144866943358,
"stdev": 0,
"values": [
28.168144866943358
]
},
"throughput": {
"unit": "tokens/s",
"value": 70.29216902117055
},
"energy": null,
"efficiency": null
},
"per_token": {
"memory": null,
"latency": {
"unit": "s",
"mean": 0.28452671582771066,
"stdev": 0.0049875892887557205,
"values": [
0.2992322692871094,
0.2963630065917969,
0.2955704345703125,
0.29482598876953126,
0.29479730224609374,
0.29364532470703125,
0.2929387512207031,
0.29414093017578125,
0.29279232788085935,
0.2917713928222656,
0.29185842895507813,
0.2903203735351563,
0.2909624328613281,
0.29145086669921877,
0.2901166076660156,
0.2893209533691406,
0.29001214599609376,
0.29043405151367185,
0.28881613159179687,
0.28931686401367185,
0.28846795654296875,
0.28854989624023436,
0.2885509033203125,
0.28781976318359376,
0.28798974609375,
0.28707839965820314,
0.28717465209960935,
0.2871705627441406,
0.28670053100585935,
0.2863319091796875,
0.28658892822265625,
0.28629605102539063,
0.28560385131835936,
0.28722994995117185,
0.2855628662109375,
0.28619161987304687,
0.2864005126953125,
0.28537957763671873,
0.2857574462890625,
0.2856570739746094,
0.28499456787109373,
0.2846719970703125,
0.28515225219726564,
0.2836234130859375,
0.2847836303710938,
0.2847744140625,
0.2835599365234375,
0.28438424682617186,
0.28307763671875,
0.28367974853515626,
0.2833827819824219,
0.28306533813476564,
0.28318310546875,
0.283335693359375,
0.28157952880859377,
0.2825861206054687,
0.28271923828125,
0.2822932434082031,
0.28188568115234375,
0.2817966003417969,
0.2816174011230469,
0.2814392395019531,
0.28117196655273435,
0.28046029663085936,
0.2814535827636719,
0.28160614013671875,
0.2794721374511719,
0.2817894287109375,
0.2799134826660156,
0.28016229248046876,
0.27944244384765626,
0.28090060424804686,
0.27975372314453123,
0.2797445068359375,
0.28207308959960936,
0.2814228515625,
0.2799288330078125,
0.279947265625,
0.27979571533203124,
0.280489990234375,
0.27977828979492186,
0.2800814208984375,
0.27924581909179685,
0.28012442016601563,
0.2795233154296875,
0.2790953063964844,
0.2790983581542969,
0.2792755126953125,
0.278697998046875,
0.2785955810546875,
0.27879833984375,
0.2794700927734375,
0.2787348327636719,
0.2788065185546875,
0.27856280517578125,
0.2788147277832031,
0.27881063842773435,
0.2785607604980469,
0.2777671813964844
]
},
"throughput": {
"unit": "tokens/s",
"value": 70.29216902117055
},
"energy": null,
"efficiency": null
}
}