{ "prefill": { "memory": { "unit": "MB", "max_ram": 3649.363968, "max_vram": 15097.397248, "max_reserved": 14604.566528, "max_allocated": 11492.043264 }, "latency": { "unit": "s", "mean": 0.4630080330588601, "stdev": 0.004482724887650619, "values": [ 0.4835208435058594, 0.4625725402832031, 0.4620338439941406, 0.46234112548828127, 0.4619939880371094, 0.4617963562011719, 0.4621127624511719, 0.4619253540039063, 0.4620205993652344, 0.4620994567871094, 0.46176766967773436, 0.4620769348144531, 0.4618219299316406, 0.4617502746582031, 0.4620902404785156, 0.4627476501464844, 0.4620103759765625, 0.4619530334472656, 0.4618577880859375, 0.46192333984375, 0.4618618774414063, 0.46189874267578124 ] }, "throughput": { "unit": "tokens/s", "value": 1382.265434514912 }, "energy": null, "efficiency": null }, "decode": { "memory": { "unit": "MB", "max_ram": 3649.363968, "max_vram": 15755.902976, "max_reserved": 15263.072256, "max_allocated": 11697.747968 }, "latency": { "unit": "s", "mean": 30.39823352050782, "stdev": 0, "values": [ 30.39823352050782 ] }, "throughput": { "unit": "tokens/s", "value": 130.27072765029033 }, "energy": null, "efficiency": null }, "per_token": { "memory": null, "latency": { "unit": "s", "mean": 0.3070528638435133, "stdev": 0.00619475129853309, "values": [ 0.32103317260742187, 0.320458740234375, 0.3195606994628906, 0.3190783996582031, 0.3187373962402344, 0.3189104614257813, 0.31889306640625, 0.31756594848632813, 0.3173099670410156, 0.3172567138671875, 0.3162204284667969, 0.31606475830078123, 0.31606173706054685, 0.3153530578613281, 0.31523019409179687, 0.3142891540527344, 0.3149322204589844, 0.3137945556640625, 0.3137208251953125, 0.31293234252929686, 0.3128350830078125, 0.3129702453613281, 0.31244903564453125, 0.31200460815429687, 0.3119308776855469, 0.31241726684570315, 0.3110267028808594, 0.31101849365234374, 0.31155813598632814, 0.31053829956054685, 0.30974969482421877, 0.30999859619140624, 0.3091568603515625, 0.3098849182128906, 0.30947122192382814, 0.30916403198242187, 0.3083642883300781, 0.30852301025390627, 0.30778573608398435, 0.308284423828125, 0.30778265380859376, 0.3074211730957031, 0.30655078125, 0.3073249206542969, 0.30658560180664063, 0.30559130859375, 0.30594354248046873, 0.30640640258789065, 0.3051796569824219, 0.3058493347167969, 0.3058452453613281, 0.30491134643554685, 0.305227783203125, 0.3053455505371094, 0.30429080200195313, 0.3045038146972656, 0.3032811584472656, 0.30313677978515624, 0.30287359619140625, 0.3031756896972656, 0.3024117736816406, 0.3021936645507812, 0.301380615234375, 0.3028357238769531, 0.30214654541015623, 0.3021240234375, 0.30190899658203124, 0.3019069519042969, 0.30141543579101565, 0.3013048400878906, 0.30091162109375, 0.30223974609375, 0.30069451904296873, 0.3011194763183594, 0.30109390258789065, 0.30174822998046874, 0.3004610595703125, 0.3011061706542969, 0.30095974731445313, 0.3013918762207031, 0.29992755126953125, 0.3006238708496094, 0.30073651123046874, 0.30130075073242185, 0.30008935546875, 0.3009218444824219, 0.30135092163085936, 0.30108978271484377, 0.3004989318847656, 0.3007057800292969, 0.30057470703125, 0.30154238891601565, 0.30043032836914063, 0.30031768798828123, 0.29990093994140626, 0.3013959655761719, 0.30073855590820314, 0.3008112487792969, 0.30015896606445314 ] }, "throughput": { "unit": "tokens/s", "value": 130.27072765029033 }, "energy": null, "efficiency": null } }