benchmarks_8bit_batch_size35 / benchmark_report.json
AwAppp's picture
Upload TextGenerationReport
523d850 verified
{
"prefill": {
"memory": {
"unit": "MB",
"max_ram": 3537.494016,
"max_vram": 14564.72064,
"max_reserved": 14071.88992,
"max_allocated": 11367.31392
},
"latency": {
"unit": "s",
"mean": 0.4472763897439709,
"stdev": 0.0047645116545721,
"values": [
0.4695675048828125,
0.44648654174804686,
0.44618341064453126,
0.4461240234375,
0.446529541015625,
0.4470425720214844,
0.44600115966796877,
0.44631143188476563,
0.4458629150390625,
0.4460113830566406,
0.4461997985839844,
0.4461055908203125,
0.44638311767578126,
0.44606976318359376,
0.44569393920898437,
0.4462243957519531,
0.44664935302734377,
0.4462377014160156,
0.44663296508789063,
0.4456673278808594,
0.44621517944335937,
0.4461690979003906,
0.4469882507324219
]
},
"throughput": {
"unit": "tokens/s",
"value": 1252.022268200998
},
"energy": null,
"efficiency": null
},
"decode": {
"memory": {
"unit": "MB",
"max_ram": 3539.29216,
"max_vram": 14508.097536,
"max_reserved": 14015.266816,
"max_allocated": 11559.135232
},
"latency": {
"unit": "s",
"mean": 29.939338134765624,
"stdev": 0,
"values": [
29.939338134765624
]
},
"throughput": {
"unit": "tokens/s",
"value": 115.7340213869469
},
"energy": null,
"efficiency": null
},
"per_token": {
"memory": null,
"latency": {
"unit": "s",
"mean": 0.3024175569168245,
"stdev": 0.006495047756518274,
"values": [
0.3167109069824219,
0.31577908325195314,
0.3155199890136719,
0.31530496215820314,
0.31362252807617186,
0.31696792602539064,
0.31384576416015625,
0.31296615600585936,
0.3133685607910156,
0.31271627807617186,
0.311510009765625,
0.31146701049804687,
0.3107440490722656,
0.3104020385742188,
0.31054949951171873,
0.30999551391601565,
0.3091507263183594,
0.3089141845703125,
0.30916915893554686,
0.3088988037109375,
0.3080447998046875,
0.30975588989257813,
0.3072440185546875,
0.30704129028320315,
0.3072491455078125,
0.30681805419921876,
0.3068426208496094,
0.3062528076171875,
0.3066050415039063,
0.3056844787597656,
0.3046492309570312,
0.30730548095703125,
0.30522265625,
0.3050281066894531,
0.305033203125,
0.3047301025390625,
0.3045478515625,
0.3045509033203125,
0.3057029113769531,
0.3038760986328125,
0.30308966064453124,
0.303994873046875,
0.3024803771972656,
0.30274252319335937,
0.3025059814453125,
0.30242303466796877,
0.3026882629394531,
0.30180352783203124,
0.30188235473632813,
0.3011389465332031,
0.3005388793945313,
0.3004610595703125,
0.30039654541015626,
0.29937765502929686,
0.3015362548828125,
0.29953536987304685,
0.2986639404296875,
0.29938278198242185,
0.298392578125,
0.2982430725097656,
0.29859329223632813,
0.2981652526855469,
0.29810891723632815,
0.29872332763671877,
0.29826150512695315,
0.29788363647460936,
0.2971463623046875,
0.2972651672363281,
0.29730508422851565,
0.2972364807128906,
0.29666302490234375,
0.30092901611328127,
0.2961336364746094,
0.29574554443359374,
0.29620736694335936,
0.2961448974609375,
0.29597183227539064,
0.2953799743652344,
0.29584384155273435,
0.29593701171875,
0.29542706298828125,
0.2955735168457031,
0.2946253051757812,
0.2954874572753906,
0.29425869750976563,
0.2946621398925781,
0.2944389038085938,
0.2949775390625,
0.2961080322265625,
0.29468057250976565,
0.2936627197265625,
0.29617459106445315,
0.2940631103515625,
0.2944245910644531,
0.2944296875,
0.29573223876953125,
0.29399038696289065,
0.294677490234375,
0.2972333984375
]
},
"throughput": {
"unit": "tokens/s",
"value": 115.7340213869469
},
"energy": null,
"efficiency": null
}
}