{ "prefill": { "memory": { "unit": "MB", "max_ram": 3602.481152, "max_vram": 13600.03072, "max_reserved": 13107.2, "max_allocated": 10874.392064 }, "latency": { "unit": "s", "mean": 0.3773231913248697, "stdev": 0.004032105846019261, "values": [ 0.39744345092773437, 0.38059417724609373, 0.3768924255371094, 0.37622784423828126, 0.37658932495117187, 0.37643670654296874, 0.37635992431640625, 0.3764459533691406, 0.37676953125, 0.3766363830566406, 0.3758919677734375, 0.3761131591796875, 0.3763240966796875, 0.3764326477050781, 0.37676953125, 0.3764981689453125, 0.37646951293945313, 0.3763261413574219, 0.3764234313964844, 0.37589297485351564, 0.3767470092773437, 0.3763292236328125, 0.3761827697753906, 0.3762247619628906, 0.37629031372070315, 0.37621145629882813, 0.3762032775878906 ] }, "throughput": { "unit": "tokens/s", "value": 848.0793318756936 }, "energy": null, "efficiency": null }, "decode": { "memory": { "unit": "MB", "max_ram": 3622.457344, "max_vram": 13816.037376, "max_reserved": 13323.206656, "max_allocated": 10993.732608 }, "latency": { "unit": "s", "mean": 28.168144866943358, "stdev": 0, "values": [ 28.168144866943358 ] }, "throughput": { "unit": "tokens/s", "value": 70.29216902117055 }, "energy": null, "efficiency": null }, "per_token": { "memory": null, "latency": { "unit": "s", "mean": 0.28452671582771066, "stdev": 0.0049875892887557205, "values": [ 0.2992322692871094, 0.2963630065917969, 0.2955704345703125, 0.29482598876953126, 0.29479730224609374, 0.29364532470703125, 0.2929387512207031, 0.29414093017578125, 0.29279232788085935, 0.2917713928222656, 0.29185842895507813, 0.2903203735351563, 0.2909624328613281, 0.29145086669921877, 0.2901166076660156, 0.2893209533691406, 0.29001214599609376, 0.29043405151367185, 0.28881613159179687, 0.28931686401367185, 0.28846795654296875, 0.28854989624023436, 0.2885509033203125, 0.28781976318359376, 0.28798974609375, 0.28707839965820314, 0.28717465209960935, 0.2871705627441406, 0.28670053100585935, 0.2863319091796875, 0.28658892822265625, 0.28629605102539063, 0.28560385131835936, 0.28722994995117185, 0.2855628662109375, 0.28619161987304687, 0.2864005126953125, 0.28537957763671873, 0.2857574462890625, 0.2856570739746094, 0.28499456787109373, 0.2846719970703125, 0.28515225219726564, 0.2836234130859375, 0.2847836303710938, 0.2847744140625, 0.2835599365234375, 0.28438424682617186, 0.28307763671875, 0.28367974853515626, 0.2833827819824219, 0.28306533813476564, 0.28318310546875, 0.283335693359375, 0.28157952880859377, 0.2825861206054687, 0.28271923828125, 0.2822932434082031, 0.28188568115234375, 0.2817966003417969, 0.2816174011230469, 0.2814392395019531, 0.28117196655273435, 0.28046029663085936, 0.2814535827636719, 0.28160614013671875, 0.2794721374511719, 0.2817894287109375, 0.2799134826660156, 0.28016229248046876, 0.27944244384765626, 0.28090060424804686, 0.27975372314453123, 0.2797445068359375, 0.28207308959960936, 0.2814228515625, 0.2799288330078125, 0.279947265625, 0.27979571533203124, 0.280489990234375, 0.27977828979492186, 0.2800814208984375, 0.27924581909179685, 0.28012442016601563, 0.2795233154296875, 0.2790953063964844, 0.2790983581542969, 0.2792755126953125, 0.278697998046875, 0.2785955810546875, 0.27879833984375, 0.2794700927734375, 0.2787348327636719, 0.2788065185546875, 0.27856280517578125, 0.2788147277832031, 0.27881063842773435, 0.2785607604980469, 0.2777671813964844 ] }, "throughput": { "unit": "tokens/s", "value": 70.29216902117055 }, "energy": null, "efficiency": null } }