File size: 5,924 Bytes
eda8ec4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 |
{
"prefill": {
"memory": {
"unit": "MB",
"max_ram": 3649.363968,
"max_vram": 15097.397248,
"max_reserved": 14604.566528,
"max_allocated": 11492.043264
},
"latency": {
"unit": "s",
"mean": 0.4630080330588601,
"stdev": 0.004482724887650619,
"values": [
0.4835208435058594,
0.4625725402832031,
0.4620338439941406,
0.46234112548828127,
0.4619939880371094,
0.4617963562011719,
0.4621127624511719,
0.4619253540039063,
0.4620205993652344,
0.4620994567871094,
0.46176766967773436,
0.4620769348144531,
0.4618219299316406,
0.4617502746582031,
0.4620902404785156,
0.4627476501464844,
0.4620103759765625,
0.4619530334472656,
0.4618577880859375,
0.46192333984375,
0.4618618774414063,
0.46189874267578124
]
},
"throughput": {
"unit": "tokens/s",
"value": 1382.265434514912
},
"energy": null,
"efficiency": null
},
"decode": {
"memory": {
"unit": "MB",
"max_ram": 3649.363968,
"max_vram": 15755.902976,
"max_reserved": 15263.072256,
"max_allocated": 11697.747968
},
"latency": {
"unit": "s",
"mean": 30.39823352050782,
"stdev": 0,
"values": [
30.39823352050782
]
},
"throughput": {
"unit": "tokens/s",
"value": 130.27072765029033
},
"energy": null,
"efficiency": null
},
"per_token": {
"memory": null,
"latency": {
"unit": "s",
"mean": 0.3070528638435133,
"stdev": 0.00619475129853309,
"values": [
0.32103317260742187,
0.320458740234375,
0.3195606994628906,
0.3190783996582031,
0.3187373962402344,
0.3189104614257813,
0.31889306640625,
0.31756594848632813,
0.3173099670410156,
0.3172567138671875,
0.3162204284667969,
0.31606475830078123,
0.31606173706054685,
0.3153530578613281,
0.31523019409179687,
0.3142891540527344,
0.3149322204589844,
0.3137945556640625,
0.3137208251953125,
0.31293234252929686,
0.3128350830078125,
0.3129702453613281,
0.31244903564453125,
0.31200460815429687,
0.3119308776855469,
0.31241726684570315,
0.3110267028808594,
0.31101849365234374,
0.31155813598632814,
0.31053829956054685,
0.30974969482421877,
0.30999859619140624,
0.3091568603515625,
0.3098849182128906,
0.30947122192382814,
0.30916403198242187,
0.3083642883300781,
0.30852301025390627,
0.30778573608398435,
0.308284423828125,
0.30778265380859376,
0.3074211730957031,
0.30655078125,
0.3073249206542969,
0.30658560180664063,
0.30559130859375,
0.30594354248046873,
0.30640640258789065,
0.3051796569824219,
0.3058493347167969,
0.3058452453613281,
0.30491134643554685,
0.305227783203125,
0.3053455505371094,
0.30429080200195313,
0.3045038146972656,
0.3032811584472656,
0.30313677978515624,
0.30287359619140625,
0.3031756896972656,
0.3024117736816406,
0.3021936645507812,
0.301380615234375,
0.3028357238769531,
0.30214654541015623,
0.3021240234375,
0.30190899658203124,
0.3019069519042969,
0.30141543579101565,
0.3013048400878906,
0.30091162109375,
0.30223974609375,
0.30069451904296873,
0.3011194763183594,
0.30109390258789065,
0.30174822998046874,
0.3004610595703125,
0.3011061706542969,
0.30095974731445313,
0.3013918762207031,
0.29992755126953125,
0.3006238708496094,
0.30073651123046874,
0.30130075073242185,
0.30008935546875,
0.3009218444824219,
0.30135092163085936,
0.30108978271484377,
0.3004989318847656,
0.3007057800292969,
0.30057470703125,
0.30154238891601565,
0.30043032836914063,
0.30031768798828123,
0.29990093994140626,
0.3013959655761719,
0.30073855590820314,
0.3008112487792969,
0.30015896606445314
]
},
"throughput": {
"unit": "tokens/s",
"value": 130.27072765029033
},
"energy": null,
"efficiency": null
}
} |