benchmarks_8bit_batch_size45 / benchmark_report.json
AwAppp's picture
Upload TextGenerationReport
fb2d10f verified
{
"prefill": {
"memory": {
"unit": "MB",
"max_ram": 3549.92128,
"max_vram": 14845.739008,
"max_reserved": 14352.908288,
"max_allocated": 11606.360064
},
"latency": {
"unit": "s",
"mean": 0.5109179931640624,
"stdev": 0.004383463883216298,
"values": [
0.53000830078125,
0.5098250122070312,
0.5102458801269532,
0.5097297973632813,
0.5099376525878906,
0.5097881469726563,
0.5104261169433594,
0.5099018249511719,
0.5098321838378906,
0.5098639221191407,
0.5100369873046875,
0.5096642456054687,
0.5097779235839843,
0.5097962951660157,
0.5097553405761719,
0.5098250122070312,
0.5100277709960938,
0.5098649597167969,
0.5101854858398438,
0.5098670043945313
]
},
"throughput": {
"unit": "tokens/s",
"value": 1409.228114165865
},
"energy": null,
"efficiency": null
},
"decode": {
"memory": {
"unit": "MB",
"max_ram": 3551.92832,
"max_vram": 14759.755776,
"max_reserved": 14266.925056,
"max_allocated": 11843.495936
},
"latency": {
"unit": "s",
"mean": 30.834580535888673,
"stdev": 0,
"values": [
30.834580535888673
]
},
"throughput": {
"unit": "tokens/s",
"value": 144.48064227158147
},
"energy": null,
"efficiency": null
},
"per_token": {
"memory": null,
"latency": {
"unit": "s",
"mean": 0.31146040945342096,
"stdev": 0.00580967786324836,
"values": [
0.3253524475097656,
0.3248230285644531,
0.3244328918457031,
0.3237283935546875,
0.3233484802246094,
0.32246578979492185,
0.32264602661132813,
0.3228252258300781,
0.32109976196289064,
0.32234906005859376,
0.3202990112304688,
0.3204403076171875,
0.3197132873535156,
0.31988327026367186,
0.31876708984375,
0.31852850341796873,
0.3186565246582031,
0.3181373291015625,
0.3178946533203125,
0.31765606689453124,
0.3175116882324219,
0.31647128295898436,
0.3162511291503906,
0.31640267944335937,
0.3156367492675781,
0.315831298828125,
0.31425741577148436,
0.31492095947265625,
0.31412429809570314,
0.31408538818359377,
0.3142686767578125,
0.31263641357421873,
0.31367474365234377,
0.31325082397460935,
0.312163330078125,
0.31181927490234373,
0.3119718322753906,
0.312595458984375,
0.3109191589355469,
0.311510009765625,
0.3109140625,
0.3108126831054687,
0.31065496826171873,
0.30986138916015626,
0.31169537353515625,
0.3097733154296875,
0.3091578979492188,
0.30993612670898435,
0.3092162475585937,
0.30841754150390627,
0.30810214233398436,
0.308853759765625,
0.30807449340820314,
0.3079454650878906,
0.3077427062988281,
0.3072255859375,
0.3082536926269531,
0.3072255859375,
0.3066542053222656,
0.30747341918945315,
0.3070986328125,
0.3071907958984375,
0.30675454711914063,
0.30768743896484374,
0.30642584228515624,
0.30630194091796875,
0.30604901123046874,
0.3071754150390625,
0.3059056701660156,
0.3065262145996094,
0.30605926513671877,
0.3071703186035156,
0.305912841796875,
0.30682623291015626,
0.30604083251953124,
0.30646783447265624,
0.3067924499511719,
0.30568856811523437,
0.30546945190429686,
0.3068098449707031,
0.3057121276855469,
0.30628350830078127,
0.30528720092773437,
0.30723989868164064,
0.3059609680175781,
0.30624664306640625,
0.30603469848632814,
0.3069859924316406,
0.30622311401367186,
0.3061012573242187,
0.3058923645019531,
0.3075512390136719,
0.3062742919921875,
0.30709759521484375,
0.3058831481933594,
0.3073331298828125,
0.30729830932617186,
0.30725631713867185,
0.30622515869140626
]
},
"throughput": {
"unit": "tokens/s",
"value": 144.48064227158144
},
"energy": null,
"efficiency": null
}
}