TiramisuQiao commited on
Commit
49684ef
·
verified ·
1 Parent(s): 2597bd2

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +205 -236
README.md CHANGED
@@ -6,246 +6,217 @@ language:
6
  metrics:
7
  - accuracy
8
  base_model:
9
- - Qwen/Qwen3-30B-A3B-Instruct-2507
10
  pipeline_tag: text-generation
11
  library_name: transformers
12
  tags:
13
  - medical
14
  model-index:
15
- - name: Med-Go-32B
16
- results:
17
- # ----------------------------------------------------
18
- # Medical Knowledge
19
- # ----------------------------------------------------
20
- - task:
21
- type: text-generation
22
- dataset:
23
- type: medical_eval_hle
24
- name: Medical-Eval-HLE
25
- metrics:
26
- - name: accuracy
27
- type: accuracy
28
- value: 19.4
29
- verified: false
30
-
31
- - task:
32
- type: text-generation
33
- dataset:
34
- type: supergpqa
35
- name: SuperGPQA
36
- metrics:
37
- - name: accuracy
38
- type: accuracy
39
- value: 37.2
40
- verified: false
41
-
42
- - task:
43
- type: text-generation
44
- dataset:
45
- type: medbullets
46
- name: Medbullets
47
- metrics:
48
- - name: accuracy
49
- type: accuracy
50
- value: 57.8
51
- verified: false
52
-
53
- - task:
54
- type: text-generation
55
- dataset:
56
- type: mmlu_pro
57
- name: MMLU-pro
58
- metrics:
59
- - name: accuracy
60
- type: accuracy
61
- value: 64.3
62
- verified: false
63
-
64
- - task:
65
- type: text-generation
66
- dataset:
67
- type: afrimedqa
68
- name: AfrimedQA
69
- metrics:
70
- - name: accuracy
71
- type: accuracy
72
- value: 74.7
73
- verified: false
74
-
75
- - task:
76
- type: text-generation
77
- dataset:
78
- type: medmcqa
79
- name: MedMCQA
80
- metrics:
81
- - name: accuracy
82
- type: accuracy
83
- value: 68.3
84
- verified: false
85
-
86
- - task:
87
- type: text-generation
88
- dataset:
89
- type: medqa_usmle
90
- name: MedQA-USMLE
91
- metrics:
92
- - name: accuracy
93
- type: accuracy
94
- value: 76.8
95
- verified: false
96
-
97
- - task:
98
- type: text-generation
99
- dataset:
100
- type: cmb
101
- name: CMB
102
- metrics:
103
- - name: accuracy
104
- type: accuracy
105
- value: 92.5
106
- verified: false
107
-
108
- - task:
109
- type: text-generation
110
- dataset:
111
- type: cmexam
112
- name: CMExam
113
- metrics:
114
- - name: accuracy
115
- type: accuracy
116
- value: 87.4
117
- verified: false
118
-
119
- - task:
120
- type: text-generation
121
- dataset:
122
- type: pubmedqa
123
- name: PubMedQA
124
- metrics:
125
- - name: accuracy
126
- type: accuracy
127
- value: 76.6
128
- verified: false
129
-
130
- - task:
131
- type: text-generation
132
- dataset:
133
- type: medexqa
134
- name: MedExQA
135
- metrics:
136
- - name: accuracy
137
- type: accuracy
138
- value: 81.5
139
- verified: false
140
-
141
- - task:
142
- type: text-generation
143
- dataset:
144
- type: explaincpe
145
- name: ExplainCPE
146
- metrics:
147
- - name: accuracy
148
- type: accuracy
149
- value: 89.5
150
- verified: false
151
-
152
- - task:
153
- type: text-generation
154
- dataset:
155
- type: mmlu_med
156
- name: MMLU-Med
157
- metrics:
158
- - name: accuracy
159
- type: accuracy
160
- value: 87.4
161
- verified: false
162
-
163
- # ----------------------------------------------------
164
- # Clinical Reasoning
165
- # ----------------------------------------------------
166
- - task:
167
- type: text-generation
168
- dataset:
169
- type: medxperqa
170
- name: MedXperQA
171
- metrics:
172
- - name: accuracy
173
- type: accuracy
174
- value: 20.7
175
- verified: false
176
-
177
- - task:
178
- type: text-generation
179
- dataset:
180
- type: anesbench
181
- name: AnesBench
182
- metrics:
183
- - name: accuracy
184
- type: accuracy
185
- value: 53.1
186
- verified: false
187
-
188
- - task:
189
- type: text-generation
190
- dataset:
191
- type: diagnosisarena
192
- name: DiagnosisArena
193
- metrics:
194
- - name: accuracy
195
- type: accuracy
196
- value: 64.4
197
- verified: false
198
-
199
- - task:
200
- type: text-generation
201
- dataset:
202
- type: clinbench_hbp
203
- name: Clinbench-HBP
204
- metrics:
205
- - name: accuracy
206
- type: accuracy
207
- value: 80.6
208
- verified: false
209
-
210
- # ----------------------------------------------------
211
- # Medical Standard
212
- # ----------------------------------------------------
213
- - task:
214
- type: text-generation
215
- dataset:
216
- type: medpair
217
- name: MedPAIR
218
- metrics:
219
- - name: accuracy
220
- type: accuracy
221
- value: 32.3
222
- verified: false
223
-
224
- - task:
225
- type: text-generation
226
- dataset:
227
- type: amqa
228
- name: AMQA
229
- metrics:
230
- - name: accuracy
231
- type: accuracy
232
- value: 72.7
233
- verified: false
234
-
235
- - task:
236
- type: text-generation
237
- dataset:
238
- type: medethicaleval
239
- name: MedethicalEval
240
- metrics:
241
- - name: accuracy
242
- type: accuracy
243
- value: 92.2
244
- verified: false
245
-
246
  ---
247
 
248
- # MedGo: Medical Large Language Model Based on Qwen3-30B
249
 
250
  <div align="center">
251
 
@@ -568,6 +539,4 @@ Special thanks to:
568
 
569
  [⬆ Back to Top](#medgo-medical-large-language-model-based-on-qwen25-32b)
570
 
571
- </div>
572
-
573
-
 
6
  metrics:
7
  - accuracy
8
  base_model:
9
+ - Qwen/Qwen3-32B
10
  pipeline_tag: text-generation
11
  library_name: transformers
12
  tags:
13
  - medical
14
  model-index:
15
+ - name: Med-Go-32B
16
+ results:
17
+ - task:
18
+ type: text-generation
19
+ dataset:
20
+ type: medical_eval_hle
21
+ name: Medical-Eval-HLE
22
+ metrics:
23
+ - name: accuracy
24
+ type: accuracy
25
+ value: 19.4
26
+ verified: false
27
+ - task:
28
+ type: text-generation
29
+ dataset:
30
+ type: supergpqa
31
+ name: SuperGPQA
32
+ metrics:
33
+ - name: accuracy
34
+ type: accuracy
35
+ value: 37.2
36
+ verified: false
37
+ - task:
38
+ type: text-generation
39
+ dataset:
40
+ type: medbullets
41
+ name: Medbullets
42
+ metrics:
43
+ - name: accuracy
44
+ type: accuracy
45
+ value: 57.8
46
+ verified: false
47
+ - task:
48
+ type: text-generation
49
+ dataset:
50
+ type: mmlu_pro
51
+ name: MMLU-pro
52
+ metrics:
53
+ - name: accuracy
54
+ type: accuracy
55
+ value: 64.3
56
+ verified: false
57
+ - task:
58
+ type: text-generation
59
+ dataset:
60
+ type: afrimedqa
61
+ name: AfrimedQA
62
+ metrics:
63
+ - name: accuracy
64
+ type: accuracy
65
+ value: 74.7
66
+ verified: false
67
+ - task:
68
+ type: text-generation
69
+ dataset:
70
+ type: medmcqa
71
+ name: MedMCQA
72
+ metrics:
73
+ - name: accuracy
74
+ type: accuracy
75
+ value: 68.3
76
+ verified: false
77
+ - task:
78
+ type: text-generation
79
+ dataset:
80
+ type: medqa_usmle
81
+ name: MedQA-USMLE
82
+ metrics:
83
+ - name: accuracy
84
+ type: accuracy
85
+ value: 76.8
86
+ verified: false
87
+ - task:
88
+ type: text-generation
89
+ dataset:
90
+ type: cmb
91
+ name: CMB
92
+ metrics:
93
+ - name: accuracy
94
+ type: accuracy
95
+ value: 92.5
96
+ verified: false
97
+ - task:
98
+ type: text-generation
99
+ dataset:
100
+ type: cmexam
101
+ name: CMExam
102
+ metrics:
103
+ - name: accuracy
104
+ type: accuracy
105
+ value: 87.4
106
+ verified: false
107
+ - task:
108
+ type: text-generation
109
+ dataset:
110
+ type: pubmedqa
111
+ name: PubMedQA
112
+ metrics:
113
+ - name: accuracy
114
+ type: accuracy
115
+ value: 76.6
116
+ verified: false
117
+ - task:
118
+ type: text-generation
119
+ dataset:
120
+ type: medexqa
121
+ name: MedExQA
122
+ metrics:
123
+ - name: accuracy
124
+ type: accuracy
125
+ value: 81.5
126
+ verified: false
127
+ - task:
128
+ type: text-generation
129
+ dataset:
130
+ type: explaincpe
131
+ name: ExplainCPE
132
+ metrics:
133
+ - name: accuracy
134
+ type: accuracy
135
+ value: 89.5
136
+ verified: false
137
+ - task:
138
+ type: text-generation
139
+ dataset:
140
+ type: mmlu_med
141
+ name: MMLU-Med
142
+ metrics:
143
+ - name: accuracy
144
+ type: accuracy
145
+ value: 87.4
146
+ verified: false
147
+ - task:
148
+ type: text-generation
149
+ dataset:
150
+ type: medxperqa
151
+ name: MedXperQA
152
+ metrics:
153
+ - name: accuracy
154
+ type: accuracy
155
+ value: 20.7
156
+ verified: false
157
+ - task:
158
+ type: text-generation
159
+ dataset:
160
+ type: anesbench
161
+ name: AnesBench
162
+ metrics:
163
+ - name: accuracy
164
+ type: accuracy
165
+ value: 53.1
166
+ verified: false
167
+ - task:
168
+ type: text-generation
169
+ dataset:
170
+ type: diagnosisarena
171
+ name: DiagnosisArena
172
+ metrics:
173
+ - name: accuracy
174
+ type: accuracy
175
+ value: 64.4
176
+ verified: false
177
+ - task:
178
+ type: text-generation
179
+ dataset:
180
+ type: clinbench_hbp
181
+ name: Clinbench-HBP
182
+ metrics:
183
+ - name: accuracy
184
+ type: accuracy
185
+ value: 80.6
186
+ verified: false
187
+ - task:
188
+ type: text-generation
189
+ dataset:
190
+ type: medpair
191
+ name: MedPAIR
192
+ metrics:
193
+ - name: accuracy
194
+ type: accuracy
195
+ value: 32.3
196
+ verified: false
197
+ - task:
198
+ type: text-generation
199
+ dataset:
200
+ type: amqa
201
+ name: AMQA
202
+ metrics:
203
+ - name: accuracy
204
+ type: accuracy
205
+ value: 72.7
206
+ verified: false
207
+ - task:
208
+ type: text-generation
209
+ dataset:
210
+ type: medethicaleval
211
+ name: MedethicalEval
212
+ metrics:
213
+ - name: accuracy
214
+ type: accuracy
215
+ value: 92.2
216
+ verified: false
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
217
  ---
218
 
219
+ # MedGo: Medical Large Language Model Based on Qwen3-32B
220
 
221
  <div align="center">
222
 
 
539
 
540
  [⬆ Back to Top](#medgo-medical-large-language-model-based-on-qwen25-32b)
541
 
542
+ </div>