benchmarks_8bit_batch_size15 / benchmark_report.json
AwAppp's picture
Upload TextGenerationReport
b7fd829 verified
{
"prefill": {
"memory": {
"unit": "MB",
"max_ram": 3470.56128,
"max_vram": 12769.558528,
"max_reserved": 12276.727808,
"max_allocated": 10493.853184
},
"latency": {
"unit": "s",
"mean": 0.3322811594317036,
"stdev": 0.004201918534069472,
"values": [
0.3548702697753906,
0.3352504272460938,
0.3326003112792969,
0.33182821655273437,
0.3319469299316406,
0.33254296875,
0.3314892883300781,
0.33137869262695313,
0.33092608642578125,
0.3313489990234375,
0.3318661193847656,
0.3311390686035156,
0.33138790893554687,
0.3314851989746094,
0.33103564453125,
0.3311247253417969,
0.3315466003417969,
0.33106533813476563,
0.3314124755859375,
0.331188232421875,
0.33160498046875,
0.3317739562988281,
0.3311513671875,
0.33090765380859377,
0.33081649780273437,
0.33114727783203124,
0.33095269775390623,
0.3309649963378906,
0.3314667663574219,
0.33160498046875,
0.33089126586914064
]
},
"throughput": {
"unit": "tokens/s",
"value": 722.2798921565973
},
"energy": null,
"efficiency": null
},
"decode": {
"memory": {
"unit": "MB",
"max_ram": 3471.822848,
"max_vram": 13524.533248,
"max_reserved": 13031.702528,
"max_allocated": 10594.238464
},
"latency": {
"unit": "s",
"mean": 27.10930010986329,
"stdev": 0,
"values": [
27.10930010986329
]
},
"throughput": {
"unit": "tokens/s",
"value": 54.77824930860927
},
"energy": null,
"efficiency": null
},
"per_token": {
"memory": null,
"latency": {
"unit": "s",
"mean": 0.27383131424104334,
"stdev": 0.003865402878377042,
"values": [
0.28423577880859374,
0.2813306884765625,
0.28021041870117186,
0.2805688171386719,
0.2805350341796875,
0.280511474609375,
0.28056781005859377,
0.28048178100585935,
0.2796636047363281,
0.279478271484375,
0.2780057678222656,
0.2784327697753906,
0.27773236083984376,
0.2771885986328125,
0.2785331115722656,
0.27619430541992185,
0.27745895385742186,
0.27661618041992186,
0.2762188720703125,
0.27671243286132813,
0.27565362548828126,
0.2763612060546875,
0.27662130737304685,
0.2756331481933594,
0.27525225830078126,
0.2755604248046875,
0.27471261596679686,
0.2745743103027344,
0.27438796997070314,
0.27482009887695313,
0.2740787048339844,
0.2747821960449219,
0.2742958068847656,
0.27405517578125,
0.27351553344726565,
0.27380224609375,
0.2732697448730469,
0.273517578125,
0.27277618408203125,
0.2734888916015625,
0.27336294555664065,
0.27980593872070314,
0.27267788696289064,
0.27327078247070313,
0.2725263366699219,
0.2733834228515625,
0.2763018493652344,
0.28083709716796873,
0.2731366271972656,
0.28008447265625,
0.27196417236328124,
0.27271783447265624,
0.27304241943359375,
0.27160986328125,
0.2719815673828125,
0.2721392517089844,
0.2742210693359375,
0.27142962646484375,
0.2707630004882812,
0.27627825927734373,
0.2755911560058594,
0.27026943969726563,
0.27018649291992186,
0.2707906494140625,
0.2697359313964844,
0.26930584716796874,
0.27066574096679685,
0.26956494140625,
0.2701271057128906,
0.2692567138671875,
0.2692587585449219,
0.2694625244140625,
0.2702479248046875,
0.27053466796875,
0.27034725952148436,
0.2764933166503906,
0.27599053955078123,
0.2792693786621094,
0.2697052307128906,
0.2699970703125,
0.2686484375,
0.2691286926269531,
0.2699591674804688,
0.2695352783203125,
0.2692228698730469,
0.2694993896484375,
0.26948300170898437,
0.26922906494140625,
0.26847027587890626,
0.27224267578125,
0.2729758605957031,
0.27673394775390625,
0.2686033935546875,
0.26899252319335937,
0.269348876953125,
0.2679552001953125,
0.2716129150390625,
0.2688880615234375,
0.26859930419921874
]
},
"throughput": {
"unit": "tokens/s",
"value": 54.77824930860927
},
"energy": null,
"efficiency": null
}
}