File size: 5,853 Bytes
fb2d10f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 |
{
"prefill": {
"memory": {
"unit": "MB",
"max_ram": 3549.92128,
"max_vram": 14845.739008,
"max_reserved": 14352.908288,
"max_allocated": 11606.360064
},
"latency": {
"unit": "s",
"mean": 0.5109179931640624,
"stdev": 0.004383463883216298,
"values": [
0.53000830078125,
0.5098250122070312,
0.5102458801269532,
0.5097297973632813,
0.5099376525878906,
0.5097881469726563,
0.5104261169433594,
0.5099018249511719,
0.5098321838378906,
0.5098639221191407,
0.5100369873046875,
0.5096642456054687,
0.5097779235839843,
0.5097962951660157,
0.5097553405761719,
0.5098250122070312,
0.5100277709960938,
0.5098649597167969,
0.5101854858398438,
0.5098670043945313
]
},
"throughput": {
"unit": "tokens/s",
"value": 1409.228114165865
},
"energy": null,
"efficiency": null
},
"decode": {
"memory": {
"unit": "MB",
"max_ram": 3551.92832,
"max_vram": 14759.755776,
"max_reserved": 14266.925056,
"max_allocated": 11843.495936
},
"latency": {
"unit": "s",
"mean": 30.834580535888673,
"stdev": 0,
"values": [
30.834580535888673
]
},
"throughput": {
"unit": "tokens/s",
"value": 144.48064227158147
},
"energy": null,
"efficiency": null
},
"per_token": {
"memory": null,
"latency": {
"unit": "s",
"mean": 0.31146040945342096,
"stdev": 0.00580967786324836,
"values": [
0.3253524475097656,
0.3248230285644531,
0.3244328918457031,
0.3237283935546875,
0.3233484802246094,
0.32246578979492185,
0.32264602661132813,
0.3228252258300781,
0.32109976196289064,
0.32234906005859376,
0.3202990112304688,
0.3204403076171875,
0.3197132873535156,
0.31988327026367186,
0.31876708984375,
0.31852850341796873,
0.3186565246582031,
0.3181373291015625,
0.3178946533203125,
0.31765606689453124,
0.3175116882324219,
0.31647128295898436,
0.3162511291503906,
0.31640267944335937,
0.3156367492675781,
0.315831298828125,
0.31425741577148436,
0.31492095947265625,
0.31412429809570314,
0.31408538818359377,
0.3142686767578125,
0.31263641357421873,
0.31367474365234377,
0.31325082397460935,
0.312163330078125,
0.31181927490234373,
0.3119718322753906,
0.312595458984375,
0.3109191589355469,
0.311510009765625,
0.3109140625,
0.3108126831054687,
0.31065496826171873,
0.30986138916015626,
0.31169537353515625,
0.3097733154296875,
0.3091578979492188,
0.30993612670898435,
0.3092162475585937,
0.30841754150390627,
0.30810214233398436,
0.308853759765625,
0.30807449340820314,
0.3079454650878906,
0.3077427062988281,
0.3072255859375,
0.3082536926269531,
0.3072255859375,
0.3066542053222656,
0.30747341918945315,
0.3070986328125,
0.3071907958984375,
0.30675454711914063,
0.30768743896484374,
0.30642584228515624,
0.30630194091796875,
0.30604901123046874,
0.3071754150390625,
0.3059056701660156,
0.3065262145996094,
0.30605926513671877,
0.3071703186035156,
0.305912841796875,
0.30682623291015626,
0.30604083251953124,
0.30646783447265624,
0.3067924499511719,
0.30568856811523437,
0.30546945190429686,
0.3068098449707031,
0.3057121276855469,
0.30628350830078127,
0.30528720092773437,
0.30723989868164064,
0.3059609680175781,
0.30624664306640625,
0.30603469848632814,
0.3069859924316406,
0.30622311401367186,
0.3061012573242187,
0.3058923645019531,
0.3075512390136719,
0.3062742919921875,
0.30709759521484375,
0.3058831481933594,
0.3073331298828125,
0.30729830932617186,
0.30725631713867185,
0.30622515869140626
]
},
"throughput": {
"unit": "tokens/s",
"value": 144.48064227158144
},
"energy": null,
"efficiency": null
}
} |