j_yoon.song commited on
Commit
9875a12
·
1 Parent(s): ac0e84a

add a model

Browse files
src/data/open/length_data.json CHANGED
@@ -3126,5 +3126,73 @@
3126
  "Med": 1809.5,
3127
  "Med Resp": 1809.5
3128
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3129
  }
3130
  }
 
3126
  "Med": 1809.5,
3127
  "Med Resp": 1809.5
3128
  }
3129
+ },
3130
+ "Kimi K2 Thinking": {
3131
+ "Overall": {
3132
+ "Min": 115,
3133
+ "Max": 65500,
3134
+ "Med": 1692.0,
3135
+ "Med Resp": 330.0
3136
+ },
3137
+ "Content Generation": {
3138
+ "Min": 115,
3139
+ "Max": 29508,
3140
+ "Med": 1696.0,
3141
+ "Med Resp": 478.0
3142
+ },
3143
+ "Editing": {
3144
+ "Min": 302,
3145
+ "Max": 11808,
3146
+ "Med": 1347.5,
3147
+ "Med Resp": 219.0
3148
+ },
3149
+ "Data Analysis": {
3150
+ "Min": 186,
3151
+ "Max": 65462,
3152
+ "Med": 978.0,
3153
+ "Med Resp": 156.0
3154
+ },
3155
+ "Reasoning": {
3156
+ "Min": 291,
3157
+ "Max": 55791,
3158
+ "Med": 1842.0,
3159
+ "Med Resp": 377.0
3160
+ },
3161
+ "Hallucination": {
3162
+ "Min": 194,
3163
+ "Max": 5063,
3164
+ "Med": 1140.5,
3165
+ "Med Resp": 382.5
3166
+ },
3167
+ "Safety": {
3168
+ "Min": 171,
3169
+ "Max": 5707,
3170
+ "Med": 1013.0,
3171
+ "Med Resp": 395.0
3172
+ },
3173
+ "Repetition": {
3174
+ "Min": 236,
3175
+ "Max": 65500,
3176
+ "Med": 1890.0,
3177
+ "Med Resp": 264.0
3178
+ },
3179
+ "Summarization": {
3180
+ "Min": 276,
3181
+ "Max": 13220,
3182
+ "Med": 996.0,
3183
+ "Med Resp": 196.5
3184
+ },
3185
+ "Translation": {
3186
+ "Min": 433,
3187
+ "Max": 13703,
3188
+ "Med": 2637.0,
3189
+ "Med Resp": 310.5
3190
+ },
3191
+ "Multi-Turn": {
3192
+ "Min": 333,
3193
+ "Max": 17384,
3194
+ "Med": 3771.5,
3195
+ "Med Resp": 1102.0
3196
+ }
3197
  }
3198
  }
src/data/open/stats.csv CHANGED
@@ -14,6 +14,8 @@
14
  top-p: 0.95" "Grok" "" "" "" "" "" "" "Proprietary" "Think" "On" "58.74" "61.0" "66.25" "72.51" "63.22" "66.09" "16.53" "58.57" "66.27" "54.21" "44.3"
15
  "Gemini 2.5 Flash" "https://deepmind.google/models/gemini/flash/" "" "Gemini" "" "" "" "" "" "" "Proprietary" "Hybrid" "On" "58.62" "57.25" "62.19" "70.52" "72.31" "56.9" "28.93" "47.14" "68.65" "55.06" "46.98"
16
  "o4-mini" "https://platform.openai.com/docs/models/o4-mini" "" "GPT" "" "" "" "" "" "" "Proprietary" "Think" "On" "57.57" "67.25" "61.25" "71.71" "75.62" "45.4" "39.67" "44.29" "59.92" "47.19" "41.95"
 
 
17
  "Qwen3 235B A22B Thinking 2507" "https://huggingface.co/Qwen/Qwen3-235B-A22B-Thinking-2507" "temperature: 0.6
18
  top-p: 0.95" "Qwen" "2404.5" "423.0" "58.364528823897146" "80.01045334339142" "31.05335185752473" "235.0" "Open" "Think" "On" "55.48" "57.5" "53.12" "73.31" "75.21" "55.17" "25.62" "35.71" "55.56" "56.18" "40.27"
19
  "GPT-5 nano (Reasoning: medium)" "https://platform.openai.com/docs/models/gpt-5-nano" "Reasoning: medium" "GPT" "" "" "" "" "" "" "Proprietary" "Think" "On" "55.39" "63.5" "47.19" "68.92" "75.21" "55.17" "52.07" "34.29" "63.49" "40.73" "42.95"
 
14
  top-p: 0.95" "Grok" "" "" "" "" "" "" "Proprietary" "Think" "On" "58.74" "61.0" "66.25" "72.51" "63.22" "66.09" "16.53" "58.57" "66.27" "54.21" "44.3"
15
  "Gemini 2.5 Flash" "https://deepmind.google/models/gemini/flash/" "" "Gemini" "" "" "" "" "" "" "Proprietary" "Hybrid" "On" "58.62" "57.25" "62.19" "70.52" "72.31" "56.9" "28.93" "47.14" "68.65" "55.06" "46.98"
16
  "o4-mini" "https://platform.openai.com/docs/models/o4-mini" "" "GPT" "" "" "" "" "" "" "Proprietary" "Think" "On" "57.57" "67.25" "61.25" "71.71" "75.62" "45.4" "39.67" "44.29" "59.92" "47.19" "41.95"
17
+ "Kimi K2 Thinking" "https://huggingface.co/moonshotai/Kimi-K2-Thinking" "temperature:1.0
18
+ top-p: 0.95" "moonshot" "1692.0" "330.0" "45.35071495282816" "70.24291145801544" "24.28866627458008" "1000.0" "Open" "Think" "On" "56.84" "58.25" "50.31" "69.72" "77.27" "60.92" "44.63" "38.57" "59.92" "52.25" "44.3"
19
  "Qwen3 235B A22B Thinking 2507" "https://huggingface.co/Qwen/Qwen3-235B-A22B-Thinking-2507" "temperature: 0.6
20
  top-p: 0.95" "Qwen" "2404.5" "423.0" "58.364528823897146" "80.01045334339142" "31.05335185752473" "235.0" "Open" "Think" "On" "55.48" "57.5" "53.12" "73.31" "75.21" "55.17" "25.62" "35.71" "55.56" "56.18" "40.27"
21
  "GPT-5 nano (Reasoning: medium)" "https://platform.openai.com/docs/models/gpt-5-nano" "Reasoning: medium" "GPT" "" "" "" "" "" "" "Proprietary" "Think" "On" "55.39" "63.5" "47.19" "68.92" "75.21" "55.17" "52.07" "34.29" "63.49" "40.73" "42.95"
src/data/open/stats_lang.csv CHANGED
@@ -14,6 +14,8 @@
14
  top-p: 0.95" "Grok" "" "" "" "" "" "" "Proprietary" "Think" "On" "58.74" "57.78" "56.67" "62.65" "60.37" "58.33" "60.22" "59.78" "56.22" "62.5" "60.66" "52.25" "60.98"
15
  "Gemini 2.5 Flash" "https://deepmind.google/models/gemini/flash/" "" "Gemini" "" "" "" "" "" "" "Proprietary" "Hybrid" "On" "58.62" "51.11" "56.39" "62.05" "56.71" "62.78" "60.77" "61.45" "60.0" "63.04" "57.92" "64.04" "56.71"
16
  "o4-mini" "https://platform.openai.com/docs/models/o4-mini" "" "GPT" "" "" "" "" "" "" "Proprietary" "Think" "On" "57.57" "54.17" "55.0" "62.05" "59.76" "52.78" "58.56" "63.69" "55.68" "57.61" "60.66" "56.74" "60.98"
 
 
17
  "Qwen3 235B A22B Thinking 2507" "https://huggingface.co/Qwen/Qwen3-235B-A22B-Thinking-2507" "temperature: 0.6
18
  top-p: 0.95" "Qwen" "2404.5" "423.0" "58.364528823897146" "80.01045334339142" "31.05335185752473" "235.0" "Open" "Think" "On" "55.48" "49.17" "53.33" "56.02" "58.54" "50.56" "62.43" "60.89" "52.97" "56.52" "60.11" "53.93" "60.37"
19
  "GPT-5 nano (Reasoning: medium)" "https://platform.openai.com/docs/models/gpt-5-nano" "Reasoning: medium" "GPT" "" "" "" "" "" "" "Proprietary" "Think" "On" "55.39" "51.94" "53.89" "57.23" "53.66" "55.56" "58.01" "59.78" "54.59" "56.52" "59.02" "57.3" "51.83"
 
14
  top-p: 0.95" "Grok" "" "" "" "" "" "" "Proprietary" "Think" "On" "58.74" "57.78" "56.67" "62.65" "60.37" "58.33" "60.22" "59.78" "56.22" "62.5" "60.66" "52.25" "60.98"
15
  "Gemini 2.5 Flash" "https://deepmind.google/models/gemini/flash/" "" "Gemini" "" "" "" "" "" "" "Proprietary" "Hybrid" "On" "58.62" "51.11" "56.39" "62.05" "56.71" "62.78" "60.77" "61.45" "60.0" "63.04" "57.92" "64.04" "56.71"
16
  "o4-mini" "https://platform.openai.com/docs/models/o4-mini" "" "GPT" "" "" "" "" "" "" "Proprietary" "Think" "On" "57.57" "54.17" "55.0" "62.05" "59.76" "52.78" "58.56" "63.69" "55.68" "57.61" "60.66" "56.74" "60.98"
17
+ "Kimi K2 Thinking" "https://huggingface.co/moonshotai/Kimi-K2-Thinking" "temperature:1.0
18
+ top-p: 0.95" "moonshot" "1692.0" "330.0" "45.35071495282816" "70.24291145801544" "24.28866627458008" "1000.0" "Open" "Think" "On" "56.84" "50.0" "57.5" "60.84" "62.2" "53.33" "54.14" "61.45" "53.51" "59.24" "59.56" "56.18" "61.59"
19
  "Qwen3 235B A22B Thinking 2507" "https://huggingface.co/Qwen/Qwen3-235B-A22B-Thinking-2507" "temperature: 0.6
20
  top-p: 0.95" "Qwen" "2404.5" "423.0" "58.364528823897146" "80.01045334339142" "31.05335185752473" "235.0" "Open" "Think" "On" "55.48" "49.17" "53.33" "56.02" "58.54" "50.56" "62.43" "60.89" "52.97" "56.52" "60.11" "53.93" "60.37"
21
  "GPT-5 nano (Reasoning: medium)" "https://platform.openai.com/docs/models/gpt-5-nano" "Reasoning: medium" "GPT" "" "" "" "" "" "" "Proprietary" "Think" "On" "55.39" "51.94" "53.89" "57.23" "53.66" "55.56" "58.01" "59.78" "54.59" "56.52" "59.02" "57.3" "51.83"
src/data/open/time_data.json CHANGED
@@ -8826,5 +8826,195 @@
8826
  "Med": 59.17519237542838
8827
  }
8828
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8829
  }
8830
  }
 
8826
  "Med": 59.17519237542838
8827
  }
8828
  }
8829
+ },
8830
+ "Kimi K2 Thinking": {
8831
+ "NUM_GPUS": 16,
8832
+ "Overall": {
8833
+ "Time to Answer": {
8834
+ "Min": 0.11169028282165527,
8835
+ "Max": 2241.13407095096,
8836
+ "Med": 45.350714952828156
8837
+ },
8838
+ "Latency": {
8839
+ "Min": 4.439338684082031,
8840
+ "Max": 2728.568812608719,
8841
+ "Med": 70.24291145801544
8842
+ },
8843
+ "Speed": {
8844
+ "Min": 20.822681353212012,
8845
+ "Max": 30.23625816531444,
8846
+ "Med": 24.28866627458008
8847
+ }
8848
+ },
8849
+ "Content Generation": {
8850
+ "Time to Answer": {
8851
+ "Min": 3.9137235890264095,
8852
+ "Max": 1225.9386449544365,
8853
+ "Med": 35.3905248505725
8854
+ },
8855
+ "Latency": {
8856
+ "Min": 4.439338684082031,
8857
+ "Max": 1227.7691378593445,
8858
+ "Med": 69.77224278450012
8859
+ },
8860
+ "Speed": {
8861
+ "Min": 22.19674076837345,
8862
+ "Max": 28.607637310888318,
8863
+ "Med": 24.40099504786854
8864
+ }
8865
+ },
8866
+ "Editing": {
8867
+ "Time to Answer": {
8868
+ "Min": 11.06907760156818,
8869
+ "Max": 471.7268326631406,
8870
+ "Med": 45.441414409516526
8871
+ },
8872
+ "Latency": {
8873
+ "Min": 12.470248699188232,
8874
+ "Max": 497.5029664039612,
8875
+ "Med": 56.23897922039032
8876
+ },
8877
+ "Speed": {
8878
+ "Min": 22.109020450839882,
8879
+ "Max": 29.13554703600312,
8880
+ "Med": 24.070683050531777
8881
+ }
8882
+ },
8883
+ "Data Analysis": {
8884
+ "Time to Answer": {
8885
+ "Min": 0.11449933052062988,
8886
+ "Max": 1191.5499346137067,
8887
+ "Med": 32.336552678242
8888
+ },
8889
+ "Latency": {
8890
+ "Min": 7.8422017097473145,
8891
+ "Max": 2388.0731995105743,
8892
+ "Med": 41.18542838096619
8893
+ },
8894
+ "Speed": {
8895
+ "Min": 22.05901226621864,
8896
+ "Max": 29.533580819697747,
8897
+ "Med": 24.28078923247624
8898
+ }
8899
+ },
8900
+ "Reasoning": {
8901
+ "Time to Answer": {
8902
+ "Min": 9.640599855836832,
8903
+ "Max": 2241.13407095096,
8904
+ "Med": 54.49989092334097
8905
+ },
8906
+ "Latency": {
8907
+ "Min": 11.675945520401001,
8908
+ "Max": 2282.450988292694,
8909
+ "Med": 77.0264184474945
8910
+ },
8911
+ "Speed": {
8912
+ "Min": 22.63968842524605,
8913
+ "Max": 27.45209055990165,
8914
+ "Med": 24.40082929837471
8915
+ }
8916
+ },
8917
+ "Hallucination": {
8918
+ "Time to Answer": {
8919
+ "Min": 4.6661487510523845,
8920
+ "Max": 115.98579355956018,
8921
+ "Med": 24.99396284027753
8922
+ },
8923
+ "Latency": {
8924
+ "Min": 7.999700307846069,
8925
+ "Max": 201.69537544250488,
8926
+ "Med": 47.261369943618774
8927
+ },
8928
+ "Speed": {
8929
+ "Min": 22.616629795811217,
8930
+ "Max": 28.067817143682454,
8931
+ "Med": 24.21991523840923
8932
+ }
8933
+ },
8934
+ "Safety": {
8935
+ "Time to Answer": {
8936
+ "Min": 3.560721757119162,
8937
+ "Max": 94.15018529295921,
8938
+ "Med": 23.53879180359535
8939
+ },
8940
+ "Latency": {
8941
+ "Min": 7.6663689613342285,
8942
+ "Max": 242.5039336681366,
8943
+ "Med": 42.37087416648865
8944
+ },
8945
+ "Speed": {
8946
+ "Min": 22.02350964399482,
8947
+ "Max": 25.83755322621515,
8948
+ "Med": 24.36817211144977
8949
+ }
8950
+ },
8951
+ "Repetition": {
8952
+ "Time to Answer": {
8953
+ "Min": 0.11169028282165527,
8954
+ "Max": 823.9142600095446,
8955
+ "Med": 51.967095582457276
8956
+ },
8957
+ "Latency": {
8958
+ "Min": 10.11755895614624,
8959
+ "Max": 2728.568812608719,
8960
+ "Med": 75.55940163135529
8961
+ },
8962
+ "Speed": {
8963
+ "Min": 22.511808549089448,
8964
+ "Max": 29.03623731013598,
8965
+ "Med": 24.555569181348574
8966
+ }
8967
+ },
8968
+ "Summarization": {
8969
+ "Time to Answer": {
8970
+ "Min": 8.394754877090454,
8971
+ "Max": 501.0477158718318,
8972
+ "Med": 32.087957848732046
8973
+ },
8974
+ "Latency": {
8975
+ "Min": 12.335896253585815,
8976
+ "Max": 554.7887036800385,
8977
+ "Med": 41.29298424720764
8978
+ },
8979
+ "Speed": {
8980
+ "Min": 22.064383554209275,
8981
+ "Max": 26.475146333904462,
8982
+ "Med": 23.97254683466398
8983
+ }
8984
+ },
8985
+ "Translation": {
8986
+ "Time to Answer": {
8987
+ "Min": 12.529863516024639,
8988
+ "Max": 529.1059714276646,
8989
+ "Med": 96.06599344376238
8990
+ },
8991
+ "Latency": {
8992
+ "Min": 17.787765979766846,
8993
+ "Max": 558.00350522995,
8994
+ "Med": 108.60968720912933
8995
+ },
8996
+ "Speed": {
8997
+ "Min": 20.822681353212012,
8998
+ "Max": 26.82045824411476,
8999
+ "Med": 24.103166794327993
9000
+ }
9001
+ },
9002
+ "Multi-Turn": {
9003
+ "Time to Answer": {
9004
+ "Min": 14.937343551654969,
9005
+ "Max": 438.05960693330314,
9006
+ "Med": 72.95374519316395
9007
+ },
9008
+ "Latency": {
9009
+ "Min": 16.352965593338013,
9010
+ "Max": 621.9444324970245,
9011
+ "Med": 156.30420565605164
9012
+ },
9013
+ "Speed": {
9014
+ "Min": 23.156346672284208,
9015
+ "Max": 30.23625816531444,
9016
+ "Med": 24.427890087314992
9017
+ }
9018
+ }
9019
  }
9020
  }