benchmarks_8bit_batch_size40 / benchmark_report.json
AwAppp's picture
Upload TextGenerationReport
eda8ec4 verified
{
"prefill": {
"memory": {
"unit": "MB",
"max_ram": 3649.363968,
"max_vram": 15097.397248,
"max_reserved": 14604.566528,
"max_allocated": 11492.043264
},
"latency": {
"unit": "s",
"mean": 0.4630080330588601,
"stdev": 0.004482724887650619,
"values": [
0.4835208435058594,
0.4625725402832031,
0.4620338439941406,
0.46234112548828127,
0.4619939880371094,
0.4617963562011719,
0.4621127624511719,
0.4619253540039063,
0.4620205993652344,
0.4620994567871094,
0.46176766967773436,
0.4620769348144531,
0.4618219299316406,
0.4617502746582031,
0.4620902404785156,
0.4627476501464844,
0.4620103759765625,
0.4619530334472656,
0.4618577880859375,
0.46192333984375,
0.4618618774414063,
0.46189874267578124
]
},
"throughput": {
"unit": "tokens/s",
"value": 1382.265434514912
},
"energy": null,
"efficiency": null
},
"decode": {
"memory": {
"unit": "MB",
"max_ram": 3649.363968,
"max_vram": 15755.902976,
"max_reserved": 15263.072256,
"max_allocated": 11697.747968
},
"latency": {
"unit": "s",
"mean": 30.39823352050782,
"stdev": 0,
"values": [
30.39823352050782
]
},
"throughput": {
"unit": "tokens/s",
"value": 130.27072765029033
},
"energy": null,
"efficiency": null
},
"per_token": {
"memory": null,
"latency": {
"unit": "s",
"mean": 0.3070528638435133,
"stdev": 0.00619475129853309,
"values": [
0.32103317260742187,
0.320458740234375,
0.3195606994628906,
0.3190783996582031,
0.3187373962402344,
0.3189104614257813,
0.31889306640625,
0.31756594848632813,
0.3173099670410156,
0.3172567138671875,
0.3162204284667969,
0.31606475830078123,
0.31606173706054685,
0.3153530578613281,
0.31523019409179687,
0.3142891540527344,
0.3149322204589844,
0.3137945556640625,
0.3137208251953125,
0.31293234252929686,
0.3128350830078125,
0.3129702453613281,
0.31244903564453125,
0.31200460815429687,
0.3119308776855469,
0.31241726684570315,
0.3110267028808594,
0.31101849365234374,
0.31155813598632814,
0.31053829956054685,
0.30974969482421877,
0.30999859619140624,
0.3091568603515625,
0.3098849182128906,
0.30947122192382814,
0.30916403198242187,
0.3083642883300781,
0.30852301025390627,
0.30778573608398435,
0.308284423828125,
0.30778265380859376,
0.3074211730957031,
0.30655078125,
0.3073249206542969,
0.30658560180664063,
0.30559130859375,
0.30594354248046873,
0.30640640258789065,
0.3051796569824219,
0.3058493347167969,
0.3058452453613281,
0.30491134643554685,
0.305227783203125,
0.3053455505371094,
0.30429080200195313,
0.3045038146972656,
0.3032811584472656,
0.30313677978515624,
0.30287359619140625,
0.3031756896972656,
0.3024117736816406,
0.3021936645507812,
0.301380615234375,
0.3028357238769531,
0.30214654541015623,
0.3021240234375,
0.30190899658203124,
0.3019069519042969,
0.30141543579101565,
0.3013048400878906,
0.30091162109375,
0.30223974609375,
0.30069451904296873,
0.3011194763183594,
0.30109390258789065,
0.30174822998046874,
0.3004610595703125,
0.3011061706542969,
0.30095974731445313,
0.3013918762207031,
0.29992755126953125,
0.3006238708496094,
0.30073651123046874,
0.30130075073242185,
0.30008935546875,
0.3009218444824219,
0.30135092163085936,
0.30108978271484377,
0.3004989318847656,
0.3007057800292969,
0.30057470703125,
0.30154238891601565,
0.30043032836914063,
0.30031768798828123,
0.29990093994140626,
0.3013959655761719,
0.30073855590820314,
0.3008112487792969,
0.30015896606445314
]
},
"throughput": {
"unit": "tokens/s",
"value": 130.27072765029033
},
"energy": null,
"efficiency": null
}
}