ICT-TIME-and-Querit commited on
Commit
12e4113
·
verified ·
1 Parent(s): f9ff494

Update mergekit_config.yml

Browse files
Files changed (1) hide show
  1. mergekit_config.yml +5 -411
mergekit_config.yml CHANGED
@@ -1,424 +1,18 @@
1
- # models:
2
- # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_task_level_merging_classification_gradient2_full/
3
- # parameters:
4
- # weight: 1
5
- # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_task_level_merging_clustering_gradient2_full/
6
- # parameters:
7
- # weight: 1
8
- # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_task_level_merging_retrieval_gradient2_full/
9
- # parameters:
10
- # weight: 1
11
- # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_task_level_merging_sts_gradient2_full/
12
- # parameters:
13
- # weight: 1
14
- # merge_method: multislerp
15
- # base_model: /mnt/data/models/Qwen/Qwen3-4B
16
- # dtype: float32
17
- # slices:
18
- # - sources:
19
- # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_task_level_merging_classification_gradient2_full/
20
- # layer_range: [0, 36]
21
- # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_task_level_merging_sts_gradient2_full/
22
- # layer_range: [0, 36]
23
- # merge_method: slerp
24
- # base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_task_level_merging_classification_gradient2_full/
25
- # parameters:
26
- # t: 0.5
27
- # dtype: float32
28
-
29
- # slices:
30
- # - sources:
31
- # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_sts_classification_selerp
32
- # layer_range: [0, 36]
33
- # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_task_level_merging_clustering_gradient2_full/
34
- # layer_range: [0, 36]
35
- # merge_method: slerp
36
- # base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_sts_classification_selerp
37
- # parameters:
38
- # t: 0.5
39
- # dtype: float32
40
-
41
-
42
- # slices:
43
- # - sources:
44
- # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_sts_classification_clustering_selerp
45
- # layer_range: [0, 36]
46
- # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_task_level_merging_retrieval_gradient2_full/
47
- # layer_range: [0, 36]
48
- # merge_method: slerp
49
- # base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_sts_classification_clustering_selerp
50
- # parameters:
51
- # t: 0.5
52
- # dtype: float32
53
-
54
-
55
-
56
- # slices:
57
- # - sources:
58
- # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering2_other_full
59
- # layer_range: [0, 36]
60
- # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering1_other_full
61
- # layer_range: [0, 36]
62
- # merge_method: slerp
63
- # base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering2_other_full
64
- # parameters:
65
- # t: 0.5
66
- # dtype: float32
67
- # models:
68
- # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-0.6B_cut0.15_clustering1/
69
- # parameters:
70
- # weight: 0.863526622
71
- # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-0.6B_cut0.15_clustering2/
72
- # parameters:
73
- # weight: 0.020574888
74
- # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-0.6B_cut0.15_clustering3/
75
- # parameters:
76
- # weight: 0.11589849
77
- # merge_method: ties
78
- # base_model: /mnt/data/models/Qwen/Qwen3-0.6B
79
- # parameters:
80
- # normalize: true
81
- # int8_mask: true
82
- # dtype: float32
83
-
84
- # slices:
85
- # - sources:
86
- # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering2_full/
87
- # layer_range: [0, 36]
88
- # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering3_full/
89
- # layer_range: [0, 36]
90
- # merge_method: slerp
91
- # base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering2_full/
92
- # parameters:
93
- # t: 0.84
94
- # dtype: float32
95
-
96
-
97
- # slices:
98
- # - sources:
99
- # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_clustering23_slerp
100
- # layer_range: [0, 36]
101
- # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering1_full/
102
- # layer_range: [0, 36]
103
- # merge_method: slerp
104
- # base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_clustering23_slerp
105
- # parameters:
106
- # t: 0.86
107
- # dtype: float32
108
-
109
-
110
-
111
- # models:
112
- # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering1_full/
113
- # parameters:
114
- # weight: 0.863526622
115
- # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering2_full/
116
- # parameters:
117
- # weight: 0.020574888
118
- # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering3_full/
119
- # parameters:
120
- # weight: 0.11589849
121
- # merge_method: ties
122
- # base_model: /mnt/data/models/Qwen/Qwen3-4B
123
- # parameters:
124
- # normalize: true
125
- # int8_mask: true
126
- # dtype: float32
127
-
128
- # models:
129
- # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering1_full/
130
- # parameters:
131
- # weight: 0.4
132
- # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering2_full/
133
- # parameters:
134
- # weight: 0.3
135
- # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering3_full/
136
- # parameters:
137
- # weight: 0.3
138
- # merge_method: ties
139
- # base_model: /mnt/data/models/Qwen/Qwen3-4B
140
- # parameters:
141
- # normalize: true
142
- # int8_mask: true
143
- # dtype: float32
144
-
145
-
146
- # models:
147
- # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering1_full/
148
- # parameters:
149
- # weight: 1
150
- # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering2_full/
151
- # parameters:
152
- # weight: 1
153
- # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering3_full/
154
- # parameters:
155
- # weight: 1
156
- # merge_method: ties
157
- # base_model: /mnt/data/models/Qwen/Qwen3-4B
158
- # parameters:
159
- # normalize: true
160
- # int8_mask: true
161
- # dtype: float32
162
-
163
- # slices:
164
- # - sources:
165
- # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering2_full/
166
- # layer_range: [0, 36]
167
- # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering3_full/
168
- # layer_range: [0, 36]
169
- # merge_method: slerp
170
- # base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering2_full/
171
- # parameters:
172
- # t: 0.5
173
- # dtype: float32
174
-
175
- # slices:
176
- # - sources:
177
- # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_clustering23_slerp55
178
- # layer_range: [0, 36]
179
- # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering1_full/
180
- # layer_range: [0, 36]
181
- # merge_method: slerp
182
- # base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_clustering23_slerp55
183
- # parameters:
184
- # t: 0.5
185
- # dtype: float32
186
-
187
- # slices:
188
- # - sources:
189
- # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_clustering_retrieval123_slerp55/
190
- # layer_range: [0, 36]
191
- # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_full/
192
- # layer_range: [0, 36]
193
- # merge_method: slerp
194
- # base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_clustering_retrieval123_slerp55/
195
- # parameters:
196
- # t: 0.5
197
- # dtype: float32
198
-
199
-
200
- # slices:
201
- # - sources:
202
- # - model: /root/paddlejob/workspace/env_run/output/model_merge_checkpoint/Qwen3-4B-gradient2-hotpotqa_0.02_8_5e-5_16_lora32_full/
203
- # layer_range: [0, 36]
204
- # - model: /root/paddlejob/workspace/env_run/output/model_merge_checkpoint/Qwen3-4B-grdient2-imdb_classification_0.02_2_5e-5_16_lora32_full
205
- # layer_range: [0, 36]
206
- # merge_method: slerp
207
- # base_model: /root/paddlejob/workspace/env_run/output/model_merge_checkpoint/Qwen3-4B-gradient2-hotpotqa_0.02_8_5e-5_16_lora32_full/
208
- # parameters:
209
- # t: 0.5
210
- # dtype: float32
211
-
212
-
213
- # models:
214
- # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering1_full/
215
- # parameters:
216
- # weight: 0.5
217
- # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering2_full/
218
- # parameters:
219
- # weight: 0.5
220
- # merge_method: dare_ties
221
- # base_model: /mnt/data/models/Qwen/Qwen3-4B
222
- # parameters:
223
- # normalize: true
224
- # int8_mask: true
225
- # dtype: float32
226
-
227
- # models:
228
- # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering1_full/
229
- # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering2_full/
230
- # merge_method: model_stock
231
- # base_model: /mnt/data/models/Qwen/Qwen3-4B
232
- # parameters:
233
- # normalize: true
234
- # int8_mask: true
235
- # dtype: float32
236
- # models:
237
- # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering1_full/
238
- # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering2_full/
239
- # merge_method: karcher
240
- # dtype: float32
241
-
242
- # models:
243
- # - model: /root/paddlejob/workspace/env_run/output/model_merge_checkpoint/Qwen3-4B-grdient2-medrxiv_title_0.02_8_5e-5_16_lora32_full
244
- # parameters:
245
- # weight: 0.5
246
- # - model: /root/paddlejob/workspace/env_run/output/model_merge_checkpoint/Qwen3-4B-gradient2-squad_0.02_8_5e-5_16_lora32_full
247
- # parameters:
248
- # weight: 0.5
249
- # merge_method: multislerp
250
- # dtype: float32
251
- # slices:
252
- # - sources:
253
- # - model: /root/paddlejob/workspace/env_run/output/model_merge_checkpoint/Qwen3-4B-grdient2-medrxiv_title_0.02_8_5e-5_16_lora32_full
254
- # layer_range: [0, 36]
255
- # - model: /root/paddlejob/workspace/env_run/output/model_merge_checkpoint/Qwen3-4B-gradient2-squad_0.02_8_5e-5_16_lora32_full
256
- # layer_range: [0, 36]
257
- # merge_method: slerp
258
- # base_model: /root/paddlejob/workspace/env_run/output/model_merge_checkpoint/Qwen3-4B-grdient2-medrxiv_title_0.02_8_5e-5_16_lora32_full
259
- # parameters:
260
- # t: 0.5
261
- # dtype: float32
262
- # models:
263
- # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering1_full/
264
- # parameters:
265
- # weight: 0.863
266
- # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering2_full/
267
- # parameters:
268
- # weight: 0.137
269
- # merge_method: task_arithmetic
270
- # base_model: /mnt/data/models/Qwen/Qwen3-4B
271
- # parameters:
272
- # normalize: true
273
- # int8_mask: true
274
- # dtype: float32
275
-
276
- # models:
277
- # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering1_full/
278
- # parameters:
279
- # weight: 0.863
280
- # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering2_full/
281
- # parameters:
282
- # weight: 0.137
283
- # merge_method: ties
284
- # base_model: /mnt/data/models/Qwen/Qwen3-4B
285
- # parameters:
286
- # normalize: true
287
- # int8_mask: true
288
- # dtype: float32
289
- # slices:
290
- # - sources:
291
- # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering1_full/
292
- # layer_range: [0, 36]
293
- # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering2_full/
294
- # layer_range: [0, 36]
295
- # merge_method: slerp
296
- # base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering1_full/
297
- # parameters:
298
- # t: 0.137
299
- # dtype: float32
300
-
301
-
302
-
303
-
304
- # slices:
305
- # - sources:
306
- # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering2_other_cut0.15_clustering1_slerp_same_weighted
307
- # layer_range: [0, 36]
308
- # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled_full
309
- # layer_range: [0, 36]
310
- # merge_method: slerp
311
- # base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering2_other_cut0.15_clustering1_slerp_same_weighted
312
- # parameters:
313
- # t: 0.5
314
- # dtype: float32
315
-
316
-
317
- # slices:
318
- # - sources:
319
- # - model: /root/paddlejob/workspace/env_run/output/model_merge_checkpoint/Qwen3-4B-gradient2-squad_0.02_8_5e-5_16_lora32_full
320
- # layer_range: [0, 36]
321
- # - model: /root/paddlejob/workspace/env_run/output/model_merge_checkpoint/Qwen3-4B-grdient2-medrxiv_abstract_0.02_8_5e-5_16_lora32_full
322
- # layer_range: [0, 36]
323
- # merge_method: slerp
324
- # base_model: /root/paddlejob/workspace/env_run/output/model_merge_checkpoint/Qwen3-4B-gradient2-squad_0.02_8_5e-5_16_lora32_full
325
- # parameters:
326
- # t: 0.5
327
- # dtype: float32
328
-
329
- # models:
330
- # - model: /root/paddlejob/workspace/env_run/output/model_merge_checkpoint/Qwen3-4B-grdient2-sts_0.02_8_5e-5_16_lora32_full
331
- # parameters:
332
- # weight: 0.5
333
- # - model: /root/paddlejob/workspace/env_run/output/model_merge_checkpoint/Qwen3-4B-grdient2-toxic_conversations_classification_0.02_2_5e-5_16_lora32_full
334
- # parameters:
335
- # weight: 0.5
336
- # merge_method: multislerp
337
- # dtype: float32
338
-
339
- # models:
340
- # - model: /root/paddlejob/workspace/env_run/output/model_merge_checkpoint/Qwen3-4B-grdient2-sts_0.02_8_5e-5_16_lora32_full
341
- # parameters:
342
- # weight: 0.07
343
- # - model: /root/paddlejob/workspace/env_run/output/model_merge_checkpoint/Qwen3-4B-grdient2-tweet_sentiment_extraction_classification_0.02_3_5e-5_16_lora32_full
344
- # parameters:
345
- # weight: 0.93
346
- # merge_method: multislerp
347
- # dtype: float32
348
-
349
-
350
-
351
- # models:
352
- # - model: /root/paddlejob/workspace/env_run/output/model_merge_checkpoint/Qwen3-4B-grdient2-sts_0.02_8_5e-5_16_lora32_full
353
- # parameters:
354
- # weight: 0.5
355
- # - model: /root/paddlejob/workspace/env_run/output/model_merge_checkpoint/Qwen3-4B-grdient2-tweet_sentiment_extraction_classification_0.02_3_5e-5_16_lora32_full
356
- # parameters:
357
- # weight: 0.5
358
- # merge_method: multislerp
359
- # dtype: float32
360
-
361
-
362
- # models:
363
- # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled50_full
364
- # parameters:
365
- # weight: 0.5
366
- # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled_other50_full
367
- # parameters:
368
- # weight: 0.5
369
- # merge_method: multislerp
370
- # dtype: float32
371
-
372
-
373
-
374
- # models:
375
- # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled_jsonl_output60_seed1_full
376
- # parameters:
377
- # weight: 0.5
378
- # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled_60_seed100_full
379
- # parameters:
380
- # weight: 0.5
381
- # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled60_full
382
- # parameters:
383
- # weight: 0.5
384
- # merge_method: multislerp
385
- # dtype: float32
386
-
387
-
388
- # models:
389
- # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_general_all_data_sampled20_full
390
- # parameters:
391
- # weight: 0.2
392
- # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_original_4B_with_mrl_general_all_data_sampled40_full
393
- # parameters:
394
- # weight: 0.4
395
- # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_original_4B_with_mrl_general_all_data_sampled60_full
396
- # parameters:
397
- # weight: 0.6
398
- # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_general_all_data_sampled80_full
399
- # parameters:
400
- # weight: 0.8
401
- # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_original_4B_with_mrl_general_all_data_full
402
- # parameters:
403
- # weight: 1.0
404
- # merge_method: multislerp
405
- # dtype: float32
406
-
407
 
408
  models:
409
- - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_general_all_data_sampled20_without_mrl_full
410
  parameters:
411
  weight: 0.2
412
- - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_original_4B_without_mrl_general_all_data_sampled40_full
413
  parameters:
414
  weight: 0.4
415
- - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_original_4B_without_mrl_general_all_data_sampled60_full
416
  parameters:
417
  weight: 0.6
418
- - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_general_all_data_sampled80_without_mrl_full
419
  parameters:
420
  weight: 0.8
421
- - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_plus_code_multilingual_full
422
  parameters:
423
  weight: 1.0
424
  merge_method: multislerp
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
 
2
  models:
3
+ - model: Qwen3-4B_general_all_data_sampled20_without_mrl_full
4
  parameters:
5
  weight: 0.2
6
+ - model: Qwen3-4B_original_4B_without_mrl_general_all_data_sampled40_full
7
  parameters:
8
  weight: 0.4
9
+ - model: Qwen3-4B_original_4B_without_mrl_general_all_data_sampled60_full
10
  parameters:
11
  weight: 0.6
12
+ - model: Qwen3-4B_general_all_data_sampled80_without_mrl_full
13
  parameters:
14
  weight: 0.8
15
+ - model: Qwen3-4B_data_mixing_gradient2_plus_code_multilingual_full
16
  parameters:
17
  weight: 1.0
18
  merge_method: multislerp