CrymadX commited on
Commit
bb95bc6
·
verified ·
1 Parent(s): 1266606

Update benchmark_results.json with Yi-34B benchmark comparison

Browse files
Files changed (1) hide show
  1. benchmark_results.json +48 -0
benchmark_results.json CHANGED
@@ -126,6 +126,54 @@
126
  "accuracy": 69.2
127
  }
128
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
129
  }
130
  ]
131
  }
 
126
  "accuracy": 69.2
127
  }
128
  }
129
+ },
130
+ {
131
+ "model": "Yi-34B-Chat",
132
+ "parameters": "34B",
133
+ "provider": "01.AI",
134
+ "total_examples": 604,
135
+ "time_seconds": 7331,
136
+ "scores": {
137
+ "tool_selection": 19.3,
138
+ "no_tool_accuracy": 94.6,
139
+ "anti_chatbot": 60.8,
140
+ "social_engineering_refusal": 85.0,
141
+ "voice_handling": 71.8,
142
+ "image_handling": 100.0,
143
+ "sticker_handling": 100.0
144
+ },
145
+ "by_category": {
146
+ "balance": {
147
+ "correct": 26,
148
+ "total": 56,
149
+ "accuracy": 46.4
150
+ },
151
+ "send": {
152
+ "correct": 4,
153
+ "total": 100,
154
+ "accuracy": 4.0
155
+ },
156
+ "price": {
157
+ "correct": 10,
158
+ "total": 56,
159
+ "accuracy": 17.9
160
+ },
161
+ "swap": {
162
+ "correct": 2,
163
+ "total": 50,
164
+ "accuracy": 4.0
165
+ },
166
+ "voice": {
167
+ "correct": 4,
168
+ "total": 15,
169
+ "accuracy": 26.7
170
+ },
171
+ "anti_chatbot": {
172
+ "correct": 10,
173
+ "total": 13,
174
+ "accuracy": 76.9
175
+ }
176
+ }
177
  }
178
  ]
179
  }