benchmarks_8bit_batch_size30 / benchmark_report.json
AwAppp's picture
Upload TextGenerationReport
e234a15 verified
{
"prefill": {
"memory": {
"unit": "MB",
"max_ram": 3581.227008,
"max_vram": 14426.308608,
"max_reserved": 13933.477888,
"max_allocated": 11182.528
},
"latency": {
"unit": "s",
"mean": 0.4189964497884115,
"stdev": 0.004250981279472382,
"values": [
0.4385589294433594,
0.423773193359375,
0.41801422119140624,
0.41807257080078125,
0.4178309020996094,
0.4186828918457031,
0.41806951904296874,
0.4179261474609375,
0.41801318359375,
0.4178155517578125,
0.41779302978515626,
0.41764556884765625,
0.4177346496582031,
0.41765786743164063,
0.41786880493164064,
0.41775518798828126,
0.41766604614257813,
0.417807373046875,
0.4177121276855469,
0.41813299560546874,
0.41812069702148436,
0.41768341064453124,
0.41801422119140624,
0.4175657043457031
]
},
"throughput": {
"unit": "tokens/s",
"value": 1145.594432225845
},
"energy": null,
"efficiency": null
},
"decode": {
"memory": {
"unit": "MB",
"max_ram": 3601.494016,
"max_vram": 14321.451008,
"max_reserved": 13828.620288,
"max_allocated": 11336.600064
},
"latency": {
"unit": "s",
"mean": 29.4220535583496,
"stdev": 0,
"values": [
29.4220535583496
]
},
"throughput": {
"unit": "tokens/s",
"value": 100.94468743012509
},
"energy": null,
"efficiency": null
},
"per_token": {
"memory": null,
"latency": {
"unit": "s",
"mean": 0.2971924601853495,
"stdev": 0.006030435507012704,
"values": [
0.3147960205078125,
0.30984600830078124,
0.30846875,
0.30889166259765627,
0.3081308288574219,
0.30789120483398436,
0.30744577026367187,
0.3064289245605469,
0.3073105773925781,
0.3058995056152344,
0.30636135864257813,
0.3053465576171875,
0.30569369506835936,
0.3047475280761719,
0.30424884033203126,
0.30414337158203125,
0.3038443603515625,
0.3047669677734375,
0.3036334228515625,
0.3034777526855469,
0.30244659423828124,
0.30226739501953126,
0.3015546875,
0.30261453247070313,
0.3014553527832031,
0.3018014831542969,
0.30137139892578124,
0.3006924743652344,
0.3007979431152344,
0.30056549072265626,
0.30052557373046873,
0.299931640625,
0.2989137878417969,
0.2993786926269531,
0.2994145202636719,
0.2986608581542969,
0.29832498168945315,
0.2994288635253906,
0.2982072448730469,
0.29848779296875,
0.297818115234375,
0.2969241638183594,
0.29708697509765625,
0.2975580139160156,
0.29730816650390623,
0.29664154052734376,
0.2972283020019531,
0.296310791015625,
0.29754879760742187,
0.2962001953125,
0.29624114990234374,
0.296637451171875,
0.2953021545410156,
0.2963804016113281,
0.29485159301757813,
0.294877197265625,
0.2944604187011719,
0.2946324462890625,
0.2944860229492188,
0.2944901123046875,
0.2939412536621094,
0.29395968627929686,
0.29282098388671873,
0.29332992553710935,
0.2928732299804688,
0.291852294921875,
0.29285171508789065,
0.2923458557128906,
0.29213082885742186,
0.292485107421875,
0.2925527038574219,
0.29165057373046877,
0.29194955444335935,
0.292611083984375,
0.29196389770507813,
0.2922905578613281,
0.2916874389648437,
0.291746826171875,
0.2913392639160156,
0.2908856201171875,
0.29074432373046877,
0.2905907287597656,
0.2906265563964844,
0.29140069580078126,
0.289986572265625,
0.2901381225585937,
0.29077197265625,
0.29088052368164063,
0.2900203552246094,
0.2901637268066406,
0.29011456298828125,
0.2901842041015625,
0.29018316650390624,
0.290260986328125,
0.29000909423828125,
0.29029067993164065,
0.28940081787109373,
0.2889861145019531,
0.2888335266113281
]
},
"throughput": {
"unit": "tokens/s",
"value": 100.94468743012509
},
"energy": null,
"efficiency": null
}
}