{ "prefill": { "memory": { "unit": "MB", "max_ram": 3549.92128, "max_vram": 14845.739008, "max_reserved": 14352.908288, "max_allocated": 11606.360064 }, "latency": { "unit": "s", "mean": 0.5109179931640624, "stdev": 0.004383463883216298, "values": [ 0.53000830078125, 0.5098250122070312, 0.5102458801269532, 0.5097297973632813, 0.5099376525878906, 0.5097881469726563, 0.5104261169433594, 0.5099018249511719, 0.5098321838378906, 0.5098639221191407, 0.5100369873046875, 0.5096642456054687, 0.5097779235839843, 0.5097962951660157, 0.5097553405761719, 0.5098250122070312, 0.5100277709960938, 0.5098649597167969, 0.5101854858398438, 0.5098670043945313 ] }, "throughput": { "unit": "tokens/s", "value": 1409.228114165865 }, "energy": null, "efficiency": null }, "decode": { "memory": { "unit": "MB", "max_ram": 3551.92832, "max_vram": 14759.755776, "max_reserved": 14266.925056, "max_allocated": 11843.495936 }, "latency": { "unit": "s", "mean": 30.834580535888673, "stdev": 0, "values": [ 30.834580535888673 ] }, "throughput": { "unit": "tokens/s", "value": 144.48064227158147 }, "energy": null, "efficiency": null }, "per_token": { "memory": null, "latency": { "unit": "s", "mean": 0.31146040945342096, "stdev": 0.00580967786324836, "values": [ 0.3253524475097656, 0.3248230285644531, 0.3244328918457031, 0.3237283935546875, 0.3233484802246094, 0.32246578979492185, 0.32264602661132813, 0.3228252258300781, 0.32109976196289064, 0.32234906005859376, 0.3202990112304688, 0.3204403076171875, 0.3197132873535156, 0.31988327026367186, 0.31876708984375, 0.31852850341796873, 0.3186565246582031, 0.3181373291015625, 0.3178946533203125, 0.31765606689453124, 0.3175116882324219, 0.31647128295898436, 0.3162511291503906, 0.31640267944335937, 0.3156367492675781, 0.315831298828125, 0.31425741577148436, 0.31492095947265625, 0.31412429809570314, 0.31408538818359377, 0.3142686767578125, 0.31263641357421873, 0.31367474365234377, 0.31325082397460935, 0.312163330078125, 0.31181927490234373, 0.3119718322753906, 0.312595458984375, 0.3109191589355469, 0.311510009765625, 0.3109140625, 0.3108126831054687, 0.31065496826171873, 0.30986138916015626, 0.31169537353515625, 0.3097733154296875, 0.3091578979492188, 0.30993612670898435, 0.3092162475585937, 0.30841754150390627, 0.30810214233398436, 0.308853759765625, 0.30807449340820314, 0.3079454650878906, 0.3077427062988281, 0.3072255859375, 0.3082536926269531, 0.3072255859375, 0.3066542053222656, 0.30747341918945315, 0.3070986328125, 0.3071907958984375, 0.30675454711914063, 0.30768743896484374, 0.30642584228515624, 0.30630194091796875, 0.30604901123046874, 0.3071754150390625, 0.3059056701660156, 0.3065262145996094, 0.30605926513671877, 0.3071703186035156, 0.305912841796875, 0.30682623291015626, 0.30604083251953124, 0.30646783447265624, 0.3067924499511719, 0.30568856811523437, 0.30546945190429686, 0.3068098449707031, 0.3057121276855469, 0.30628350830078127, 0.30528720092773437, 0.30723989868164064, 0.3059609680175781, 0.30624664306640625, 0.30603469848632814, 0.3069859924316406, 0.30622311401367186, 0.3061012573242187, 0.3058923645019531, 0.3075512390136719, 0.3062742919921875, 0.30709759521484375, 0.3058831481933594, 0.3073331298828125, 0.30729830932617186, 0.30725631713867185, 0.30622515869140626 ] }, "throughput": { "unit": "tokens/s", "value": 144.48064227158144 }, "energy": null, "efficiency": null } }