benchmarks_8bit_batch_size10 / benchmark_report.json
AwAppp's picture
Upload TextGenerationReport
bc0155d verified
{
"prefill": {
"memory": {
"unit": "MB",
"max_ram": 3525.599232,
"max_vram": 11714.691072,
"max_reserved": 11221.860352,
"max_allocated": 9991.724544
},
"latency": {
"unit": "s",
"mean": 0.3010050461713006,
"stdev": 0.005007823178304763,
"values": [
0.32892837524414065,
0.3053179016113281,
0.3002685546875,
0.2994657287597656,
0.3013898315429687,
0.29939508056640624,
0.29945855712890623,
0.3003228149414062,
0.29954458618164065,
0.29985791015625,
0.3006095275878906,
0.3026851806640625,
0.2997637023925781,
0.300037109375,
0.2996305847167969,
0.29964801025390625,
0.3008983154296875,
0.2997923889160156,
0.2989916076660156,
0.29947698974609377,
0.2995968017578125,
0.2994698181152344,
0.29934283447265625,
0.299536376953125,
0.29968487548828127,
0.29946783447265624,
0.30125567626953126,
0.30218035888671874,
0.2998056945800781,
0.29990194702148437,
0.2995118103027344,
0.29968487548828127,
0.29941860961914063,
0.299831298828125
]
},
"throughput": {
"unit": "tokens/s",
"value": 531.5525504809801
},
"energy": null,
"efficiency": null
},
"decode": {
"memory": {
"unit": "MB",
"max_ram": 3536.162816,
"max_vram": 12268.3392,
"max_reserved": 11775.50848,
"max_allocated": 10078.902272
},
"latency": {
"unit": "s",
"mean": 25.19557006835937,
"stdev": 0,
"values": [
25.19557006835937
]
},
"throughput": {
"unit": "tokens/s",
"value": 39.292621572521725
},
"energy": null,
"efficiency": null
},
"per_token": {
"memory": null,
"latency": {
"unit": "s",
"mean": 0.25450070776120576,
"stdev": 0.0028173351708351006,
"values": [
0.2622341003417969,
0.2604031982421875,
0.2598922119140625,
0.259346435546875,
0.26023934936523435,
0.259125244140625,
0.259641357421875,
0.2600816650390625,
0.2582261657714844,
0.2586593322753906,
0.2587289733886719,
0.2589450378417969,
0.2583848876953125,
0.2578052978515625,
0.25718887329101564,
0.2568939514160156,
0.25893478393554686,
0.2582753295898437,
0.25697381591796875,
0.2581626892089844,
0.256964599609375,
0.256321533203125,
0.25642291259765626,
0.25666763305664064,
0.25534873962402344,
0.25577267456054686,
0.25576243591308595,
0.25529037475585936,
0.25610955810546876,
0.2554173431396484,
0.2555606994628906,
0.25545318603515627,
0.2541045684814453,
0.2553528289794922,
0.2549780426025391,
0.2547394561767578,
0.2549903411865234,
0.2602659912109375,
0.2590689392089844,
0.25401242065429686,
0.2539376678466797,
0.254271484375,
0.2540543975830078,
0.25435136413574216,
0.2539878387451172,
0.25366937255859373,
0.25358233642578126,
0.25377484130859373,
0.2529126434326172,
0.2540585021972656,
0.25315840148925783,
0.2530897979736328,
0.2536099853515625,
0.2528061370849609,
0.25310617065429686,
0.25320346069335936,
0.2535393218994141,
0.2527958984375,
0.253264892578125,
0.25287986755371095,
0.2530160675048828,
0.2529689636230469,
0.25187532043457034,
0.25355877685546874,
0.25151078796386717,
0.2520494079589844,
0.25327001953125,
0.25154150390625,
0.2519019470214844,
0.25225625610351565,
0.25124659729003906,
0.25213848876953127,
0.25257369995117185,
0.2515240936279297,
0.2524241943359375,
0.2529085388183594,
0.2525675506591797,
0.25248051452636716,
0.2522879943847656,
0.2524682312011719,
0.2532095947265625,
0.25251429748535154,
0.2515650634765625,
0.25252351379394533,
0.25155378723144534,
0.2517022705078125,
0.25210067749023435,
0.25160902404785157,
0.25191014099121095,
0.25165721130371094,
0.25126296997070313,
0.25138584899902344,
0.250998779296875,
0.2516172790527344,
0.2514114532470703,
0.25093426513671874,
0.25146060180664065,
0.251789306640625,
0.25099366760253905
]
},
"throughput": {
"unit": "tokens/s",
"value": 39.292621572521725
},
"energy": null,
"efficiency": null
}
}