| { | |
| "prefill": { | |
| "memory": { | |
| "unit": "MB", | |
| "max_ram": 3649.363968, | |
| "max_vram": 15097.397248, | |
| "max_reserved": 14604.566528, | |
| "max_allocated": 11492.043264 | |
| }, | |
| "latency": { | |
| "unit": "s", | |
| "mean": 0.4630080330588601, | |
| "stdev": 0.004482724887650619, | |
| "values": [ | |
| 0.4835208435058594, | |
| 0.4625725402832031, | |
| 0.4620338439941406, | |
| 0.46234112548828127, | |
| 0.4619939880371094, | |
| 0.4617963562011719, | |
| 0.4621127624511719, | |
| 0.4619253540039063, | |
| 0.4620205993652344, | |
| 0.4620994567871094, | |
| 0.46176766967773436, | |
| 0.4620769348144531, | |
| 0.4618219299316406, | |
| 0.4617502746582031, | |
| 0.4620902404785156, | |
| 0.4627476501464844, | |
| 0.4620103759765625, | |
| 0.4619530334472656, | |
| 0.4618577880859375, | |
| 0.46192333984375, | |
| 0.4618618774414063, | |
| 0.46189874267578124 | |
| ] | |
| }, | |
| "throughput": { | |
| "unit": "tokens/s", | |
| "value": 1382.265434514912 | |
| }, | |
| "energy": null, | |
| "efficiency": null | |
| }, | |
| "decode": { | |
| "memory": { | |
| "unit": "MB", | |
| "max_ram": 3649.363968, | |
| "max_vram": 15755.902976, | |
| "max_reserved": 15263.072256, | |
| "max_allocated": 11697.747968 | |
| }, | |
| "latency": { | |
| "unit": "s", | |
| "mean": 30.39823352050782, | |
| "stdev": 0, | |
| "values": [ | |
| 30.39823352050782 | |
| ] | |
| }, | |
| "throughput": { | |
| "unit": "tokens/s", | |
| "value": 130.27072765029033 | |
| }, | |
| "energy": null, | |
| "efficiency": null | |
| }, | |
| "per_token": { | |
| "memory": null, | |
| "latency": { | |
| "unit": "s", | |
| "mean": 0.3070528638435133, | |
| "stdev": 0.00619475129853309, | |
| "values": [ | |
| 0.32103317260742187, | |
| 0.320458740234375, | |
| 0.3195606994628906, | |
| 0.3190783996582031, | |
| 0.3187373962402344, | |
| 0.3189104614257813, | |
| 0.31889306640625, | |
| 0.31756594848632813, | |
| 0.3173099670410156, | |
| 0.3172567138671875, | |
| 0.3162204284667969, | |
| 0.31606475830078123, | |
| 0.31606173706054685, | |
| 0.3153530578613281, | |
| 0.31523019409179687, | |
| 0.3142891540527344, | |
| 0.3149322204589844, | |
| 0.3137945556640625, | |
| 0.3137208251953125, | |
| 0.31293234252929686, | |
| 0.3128350830078125, | |
| 0.3129702453613281, | |
| 0.31244903564453125, | |
| 0.31200460815429687, | |
| 0.3119308776855469, | |
| 0.31241726684570315, | |
| 0.3110267028808594, | |
| 0.31101849365234374, | |
| 0.31155813598632814, | |
| 0.31053829956054685, | |
| 0.30974969482421877, | |
| 0.30999859619140624, | |
| 0.3091568603515625, | |
| 0.3098849182128906, | |
| 0.30947122192382814, | |
| 0.30916403198242187, | |
| 0.3083642883300781, | |
| 0.30852301025390627, | |
| 0.30778573608398435, | |
| 0.308284423828125, | |
| 0.30778265380859376, | |
| 0.3074211730957031, | |
| 0.30655078125, | |
| 0.3073249206542969, | |
| 0.30658560180664063, | |
| 0.30559130859375, | |
| 0.30594354248046873, | |
| 0.30640640258789065, | |
| 0.3051796569824219, | |
| 0.3058493347167969, | |
| 0.3058452453613281, | |
| 0.30491134643554685, | |
| 0.305227783203125, | |
| 0.3053455505371094, | |
| 0.30429080200195313, | |
| 0.3045038146972656, | |
| 0.3032811584472656, | |
| 0.30313677978515624, | |
| 0.30287359619140625, | |
| 0.3031756896972656, | |
| 0.3024117736816406, | |
| 0.3021936645507812, | |
| 0.301380615234375, | |
| 0.3028357238769531, | |
| 0.30214654541015623, | |
| 0.3021240234375, | |
| 0.30190899658203124, | |
| 0.3019069519042969, | |
| 0.30141543579101565, | |
| 0.3013048400878906, | |
| 0.30091162109375, | |
| 0.30223974609375, | |
| 0.30069451904296873, | |
| 0.3011194763183594, | |
| 0.30109390258789065, | |
| 0.30174822998046874, | |
| 0.3004610595703125, | |
| 0.3011061706542969, | |
| 0.30095974731445313, | |
| 0.3013918762207031, | |
| 0.29992755126953125, | |
| 0.3006238708496094, | |
| 0.30073651123046874, | |
| 0.30130075073242185, | |
| 0.30008935546875, | |
| 0.3009218444824219, | |
| 0.30135092163085936, | |
| 0.30108978271484377, | |
| 0.3004989318847656, | |
| 0.3007057800292969, | |
| 0.30057470703125, | |
| 0.30154238891601565, | |
| 0.30043032836914063, | |
| 0.30031768798828123, | |
| 0.29990093994140626, | |
| 0.3013959655761719, | |
| 0.30073855590820314, | |
| 0.3008112487792969, | |
| 0.30015896606445314 | |
| ] | |
| }, | |
| "throughput": { | |
| "unit": "tokens/s", | |
| "value": 130.27072765029033 | |
| }, | |
| "energy": null, | |
| "efficiency": null | |
| } | |
| } |