hengranZhang commited on
Commit
0e2b4d8
·
verified ·
1 Parent(s): 785abd1

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
1_Pooling/config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "word_embedding_dimension": 2560,
3
+ "pooling_mode_cls_token": false,
4
+ "pooling_mode_mean_tokens": false,
5
+ "pooling_mode_max_tokens": false,
6
+ "pooling_mode_mean_sqrt_len_tokens": false,
7
+ "pooling_mode_weightedmean_tokens": false,
8
+ "pooling_mode_lasttoken": true,
9
+ "include_prompt": true
10
+ }
README.md ADDED
@@ -0,0 +1,882 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: []
3
+ library_name: transformers
4
+ tags:
5
+ - mergekit
6
+ - merge
7
+
8
+ ---
9
+ # Qwen3-0.6B_sample20_40_60_80_100_multiselerp_merging
10
+
11
+ This is a merge of pre-trained language models created using [mergekit](https://github.com/cg123/mergekit).
12
+
13
+ ## Merge Details
14
+ ### Merge Method
15
+
16
+ This model was merged using the [Multi-SLERP](https://goddard.blog/posts/multislerp-wow-what-a-cool-idea) merge method.
17
+
18
+ ### Models Merged
19
+
20
+ The following models were included in the merge:
21
+ * /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-0.6B_sample40
22
+ * /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-0.6B_sample80
23
+ * /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-0.6B_sample60
24
+ * /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-0.6B_sample20
25
+ * /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-0.6B_all_eng_data
26
+
27
+ ### Configuration
28
+
29
+ The following YAML configuration was used to produce this model:
30
+
31
+ ```yaml
32
+ # models:
33
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_task_level_merging_classification_gradient2_full/
34
+ # parameters:
35
+ # weight: 1
36
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_task_level_merging_clustering_gradient2_full/
37
+ # parameters:
38
+ # weight: 1
39
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_task_level_merging_retrieval_gradient2_full/
40
+ # parameters:
41
+ # weight: 1
42
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_task_level_merging_sts_gradient2_full/
43
+ # parameters:
44
+ # weight: 1
45
+ # merge_method: multislerp
46
+ # base_model: /mnt/data/models/Qwen/Qwen3-4B
47
+ # dtype: float32
48
+ # slices:
49
+ # - sources:
50
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_task_level_merging_classification_gradient2_full/
51
+ # layer_range: [0, 36]
52
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_task_level_merging_sts_gradient2_full/
53
+ # layer_range: [0, 36]
54
+ # merge_method: slerp
55
+ # base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_task_level_merging_classification_gradient2_full/
56
+ # parameters:
57
+ # t: 0.5
58
+ # dtype: float32
59
+
60
+ # slices:
61
+ # - sources:
62
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_sts_classification_selerp
63
+ # layer_range: [0, 36]
64
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_task_level_merging_clustering_gradient2_full/
65
+ # layer_range: [0, 36]
66
+ # merge_method: slerp
67
+ # base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_sts_classification_selerp
68
+ # parameters:
69
+ # t: 0.5
70
+ # dtype: float32
71
+
72
+
73
+ # slices:
74
+ # - sources:
75
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_sts_classification_clustering_selerp
76
+ # layer_range: [0, 36]
77
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_task_level_merging_retrieval_gradient2_full/
78
+ # layer_range: [0, 36]
79
+ # merge_method: slerp
80
+ # base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_sts_classification_clustering_selerp
81
+ # parameters:
82
+ # t: 0.5
83
+ # dtype: float32
84
+
85
+
86
+
87
+ # slices:
88
+ # - sources:
89
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering2_other_full
90
+ # layer_range: [0, 36]
91
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering1_other_full
92
+ # layer_range: [0, 36]
93
+ # merge_method: slerp
94
+ # base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering2_other_full
95
+ # parameters:
96
+ # t: 0.5
97
+ # dtype: float32
98
+ # models:
99
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-0.6B_cut0.15_clustering1/
100
+ # parameters:
101
+ # weight: 0.863526622
102
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-0.6B_cut0.15_clustering2/
103
+ # parameters:
104
+ # weight: 0.020574888
105
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-0.6B_cut0.15_clustering3/
106
+ # parameters:
107
+ # weight: 0.11589849
108
+ # merge_method: ties
109
+ # base_model: /mnt/data/models/Qwen/Qwen3-0.6B
110
+ # parameters:
111
+ # normalize: true
112
+ # int8_mask: true
113
+ # dtype: float32
114
+
115
+ # slices:
116
+ # - sources:
117
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering2_full/
118
+ # layer_range: [0, 36]
119
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering3_full/
120
+ # layer_range: [0, 36]
121
+ # merge_method: slerp
122
+ # base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering2_full/
123
+ # parameters:
124
+ # t: 0.84
125
+ # dtype: float32
126
+
127
+
128
+ # slices:
129
+ # - sources:
130
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_clustering23_slerp
131
+ # layer_range: [0, 36]
132
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering1_full/
133
+ # layer_range: [0, 36]
134
+ # merge_method: slerp
135
+ # base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_clustering23_slerp
136
+ # parameters:
137
+ # t: 0.86
138
+ # dtype: float32
139
+
140
+
141
+
142
+ # models:
143
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering1_full/
144
+ # parameters:
145
+ # weight: 0.863526622
146
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering2_full/
147
+ # parameters:
148
+ # weight: 0.020574888
149
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering3_full/
150
+ # parameters:
151
+ # weight: 0.11589849
152
+ # merge_method: ties
153
+ # base_model: /mnt/data/models/Qwen/Qwen3-4B
154
+ # parameters:
155
+ # normalize: true
156
+ # int8_mask: true
157
+ # dtype: float32
158
+
159
+ # models:
160
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering1_full/
161
+ # parameters:
162
+ # weight: 0.4
163
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering2_full/
164
+ # parameters:
165
+ # weight: 0.3
166
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering3_full/
167
+ # parameters:
168
+ # weight: 0.3
169
+ # merge_method: ties
170
+ # base_model: /mnt/data/models/Qwen/Qwen3-4B
171
+ # parameters:
172
+ # normalize: true
173
+ # int8_mask: true
174
+ # dtype: float32
175
+
176
+
177
+ # models:
178
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering1_full/
179
+ # parameters:
180
+ # weight: 1
181
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering2_full/
182
+ # parameters:
183
+ # weight: 1
184
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering3_full/
185
+ # parameters:
186
+ # weight: 1
187
+ # merge_method: ties
188
+ # base_model: /mnt/data/models/Qwen/Qwen3-4B
189
+ # parameters:
190
+ # normalize: true
191
+ # int8_mask: true
192
+ # dtype: float32
193
+
194
+ # slices:
195
+ # - sources:
196
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering2_full/
197
+ # layer_range: [0, 36]
198
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering3_full/
199
+ # layer_range: [0, 36]
200
+ # merge_method: slerp
201
+ # base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering2_full/
202
+ # parameters:
203
+ # t: 0.5
204
+ # dtype: float32
205
+
206
+ # slices:
207
+ # - sources:
208
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_clustering23_slerp55
209
+ # layer_range: [0, 36]
210
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering1_full/
211
+ # layer_range: [0, 36]
212
+ # merge_method: slerp
213
+ # base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_clustering23_slerp55
214
+ # parameters:
215
+ # t: 0.5
216
+ # dtype: float32
217
+
218
+ # slices:
219
+ # - sources:
220
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_clustering_retrieval123_slerp55/
221
+ # layer_range: [0, 36]
222
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_full/
223
+ # layer_range: [0, 36]
224
+ # merge_method: slerp
225
+ # base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_clustering_retrieval123_slerp55/
226
+ # parameters:
227
+ # t: 0.5
228
+ # dtype: float32
229
+
230
+
231
+ # slices:
232
+ # - sources:
233
+ # - model: /root/paddlejob/workspace/env_run/output/model_merge_checkpoint/Qwen3-4B-gradient2-hotpotqa_0.02_8_5e-5_16_lora32_full/
234
+ # layer_range: [0, 36]
235
+ # - model: /root/paddlejob/workspace/env_run/output/model_merge_checkpoint/Qwen3-4B-grdient2-imdb_classification_0.02_2_5e-5_16_lora32_full
236
+ # layer_range: [0, 36]
237
+ # merge_method: slerp
238
+ # base_model: /root/paddlejob/workspace/env_run/output/model_merge_checkpoint/Qwen3-4B-gradient2-hotpotqa_0.02_8_5e-5_16_lora32_full/
239
+ # parameters:
240
+ # t: 0.5
241
+ # dtype: float32
242
+
243
+
244
+ # models:
245
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering1_full/
246
+ # parameters:
247
+ # weight: 0.5
248
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering2_full/
249
+ # parameters:
250
+ # weight: 0.5
251
+ # merge_method: dare_ties
252
+ # base_model: /mnt/data/models/Qwen/Qwen3-4B
253
+ # parameters:
254
+ # normalize: true
255
+ # int8_mask: true
256
+ # dtype: float32
257
+
258
+ # models:
259
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering1_full/
260
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering2_full/
261
+ # merge_method: model_stock
262
+ # base_model: /mnt/data/models/Qwen/Qwen3-4B
263
+ # parameters:
264
+ # normalize: true
265
+ # int8_mask: true
266
+ # dtype: float32
267
+ # models:
268
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering1_full/
269
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering2_full/
270
+ # merge_method: karcher
271
+ # dtype: float32
272
+
273
+ # models:
274
+ # - model: /root/paddlejob/workspace/env_run/output/model_merge_checkpoint/Qwen3-4B-grdient2-medrxiv_title_0.02_8_5e-5_16_lora32_full
275
+ # parameters:
276
+ # weight: 0.5
277
+ # - model: /root/paddlejob/workspace/env_run/output/model_merge_checkpoint/Qwen3-4B-gradient2-squad_0.02_8_5e-5_16_lora32_full
278
+ # parameters:
279
+ # weight: 0.5
280
+ # merge_method: multislerp
281
+ # dtype: float32
282
+ # slices:
283
+ # - sources:
284
+ # - model: /root/paddlejob/workspace/env_run/output/model_merge_checkpoint/Qwen3-4B-grdient2-medrxiv_title_0.02_8_5e-5_16_lora32_full
285
+ # layer_range: [0, 36]
286
+ # - model: /root/paddlejob/workspace/env_run/output/model_merge_checkpoint/Qwen3-4B-gradient2-squad_0.02_8_5e-5_16_lora32_full
287
+ # layer_range: [0, 36]
288
+ # merge_method: slerp
289
+ # base_model: /root/paddlejob/workspace/env_run/output/model_merge_checkpoint/Qwen3-4B-grdient2-medrxiv_title_0.02_8_5e-5_16_lora32_full
290
+ # parameters:
291
+ # t: 0.5
292
+ # dtype: float32
293
+ # models:
294
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering1_full/
295
+ # parameters:
296
+ # weight: 0.863
297
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering2_full/
298
+ # parameters:
299
+ # weight: 0.137
300
+ # merge_method: task_arithmetic
301
+ # base_model: /mnt/data/models/Qwen/Qwen3-4B
302
+ # parameters:
303
+ # normalize: true
304
+ # int8_mask: true
305
+ # dtype: float32
306
+
307
+ # models:
308
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering1_full/
309
+ # parameters:
310
+ # weight: 0.863
311
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering2_full/
312
+ # parameters:
313
+ # weight: 0.137
314
+ # merge_method: ties
315
+ # base_model: /mnt/data/models/Qwen/Qwen3-4B
316
+ # parameters:
317
+ # normalize: true
318
+ # int8_mask: true
319
+ # dtype: float32
320
+ # slices:
321
+ # - sources:
322
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering1_full/
323
+ # layer_range: [0, 36]
324
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering2_full/
325
+ # layer_range: [0, 36]
326
+ # merge_method: slerp
327
+ # base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering1_full/
328
+ # parameters:
329
+ # t: 0.137
330
+ # dtype: float32
331
+
332
+
333
+
334
+
335
+ # slices:
336
+ # - sources:
337
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering2_other_cut0.15_clustering1_slerp_same_weighted
338
+ # layer_range: [0, 36]
339
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled_full
340
+ # layer_range: [0, 36]
341
+ # merge_method: slerp
342
+ # base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering2_other_cut0.15_clustering1_slerp_same_weighted
343
+ # parameters:
344
+ # t: 0.5
345
+ # dtype: float32
346
+
347
+
348
+ # slices:
349
+ # - sources:
350
+ # - model: /root/paddlejob/workspace/env_run/output/model_merge_checkpoint/Qwen3-4B-gradient2-squad_0.02_8_5e-5_16_lora32_full
351
+ # layer_range: [0, 36]
352
+ # - model: /root/paddlejob/workspace/env_run/output/model_merge_checkpoint/Qwen3-4B-grdient2-medrxiv_abstract_0.02_8_5e-5_16_lora32_full
353
+ # layer_range: [0, 36]
354
+ # merge_method: slerp
355
+ # base_model: /root/paddlejob/workspace/env_run/output/model_merge_checkpoint/Qwen3-4B-gradient2-squad_0.02_8_5e-5_16_lora32_full
356
+ # parameters:
357
+ # t: 0.5
358
+ # dtype: float32
359
+
360
+ # models:
361
+ # - model: /root/paddlejob/workspace/env_run/output/model_merge_checkpoint/Qwen3-4B-gradient2-squad_0.02_8_5e-5_16_lora32_full
362
+ # parameters:
363
+ # weight: 0.5
364
+ # - model: /root/paddlejob/workspace/env_run/output/model_merge_checkpoint/Qwen3-4B-grdient2-medrxiv_abstract_0.02_8_5e-5_16_lora32_full
365
+ # parameters:
366
+ # weight: 0.5
367
+ # merge_method: multislerp
368
+ # dtype: float32
369
+
370
+
371
+
372
+
373
+ # slices:
374
+ # - sources:
375
+ # - model: /root/paddlejob/workspace/env_run/output/model_merge_checkpoint/Qwen3-4B-gradient2-fiqa_0.02_8_5e-5_16_lora32_full
376
+ # layer_range: [0, 36]
377
+ # - model: /root/paddlejob/workspace/env_run/output/model_merge_checkpoint/Qwen3-4B-grdient2-tweet_sentiment_extraction_classification_0.02_3_5e-5_16_lora32_full
378
+ # layer_range: [0, 36]
379
+ # merge_method: slerp
380
+ # base_model: /root/paddlejob/workspace/env_run/output/model_merge_checkpoint/Qwen3-4B-gradient2-fiqa_0.02_8_5e-5_16_lora32_full
381
+ # parameters:
382
+ # t: 0.5
383
+ # dtype: float32
384
+
385
+
386
+
387
+
388
+
389
+ # slices:
390
+ # - sources:
391
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering2_other_sample20_full
392
+ # layer_range: [0, 36]
393
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering1_other_sample20_full
394
+ # layer_range: [0, 36]
395
+ # merge_method: slerp
396
+ # base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering2_other_sample20_full
397
+ # parameters:
398
+ # t: 0.864
399
+ # dtype: float32
400
+
401
+
402
+ # slices:
403
+ # - sources:
404
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering12_other_sample20_slerp_merging
405
+ # layer_range: [0, 36]
406
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled80_full
407
+ # layer_range: [0, 36]
408
+ # merge_method: slerp
409
+ # base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering12_other_sample20_slerp_merging
410
+ # parameters:
411
+ # t: 0.8
412
+ # dtype: float32
413
+
414
+
415
+ # slices:
416
+ # - sources:
417
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering2_other_sample80_full
418
+ # layer_range: [0, 36]
419
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering1_other_sample80_full
420
+ # layer_range: [0, 36]
421
+ # merge_method: slerp
422
+ # base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering2_other_sample80_full
423
+ # parameters:
424
+ # t: 0.864
425
+ # dtype: float32
426
+
427
+
428
+ # slices:
429
+ # - sources:
430
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering12_other_sample80
431
+ # layer_range: [0, 36]
432
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled20_full
433
+ # layer_range: [0, 36]
434
+ # merge_method: slerp
435
+ # base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering12_other_sample80
436
+ # parameters:
437
+ # t: 0.2
438
+ # dtype: float32
439
+
440
+
441
+
442
+ # slices:
443
+ # - sources:
444
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering2_full
445
+ # layer_range: [0, 36]
446
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering1_full
447
+ # layer_range: [0, 36]
448
+ # merge_method: slerp
449
+ # base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering2_full
450
+ # parameters:
451
+ # t: 0.864
452
+ # dtype: float32
453
+
454
+
455
+
456
+ # slices:
457
+ # - sources:
458
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering12_full_merging_slerp_number_weighted
459
+ # layer_range: [0, 36]
460
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_full
461
+ # layer_range: [0, 36]
462
+ # merge_method: slerp
463
+ # base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering12_full_merging_slerp_number_weighted
464
+ # parameters:
465
+ # t: 0.9
466
+ # dtype: float32
467
+
468
+
469
+
470
+ # models:
471
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering2_full
472
+ # parameters:
473
+ # weight: 0.1
474
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering1_full
475
+ # parameters:
476
+ # weight: 0.2
477
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_full
478
+ # parameters:
479
+ # weight: 0.7
480
+ # merge_method: multislerp
481
+ # dtype: float32
482
+
483
+
484
+
485
+ # models:
486
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled60_full
487
+ # parameters:
488
+ # weight: 0.6
489
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled20_full
490
+ # parameters:
491
+ # weight: 0.2
492
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled_full
493
+ # parameters:
494
+ # weight: 0.4
495
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled80_full
496
+ # parameters:
497
+ # weight: 0.8
498
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_full
499
+ # parameters:
500
+ # weight: 1.0
501
+ # merge_method: multislerp
502
+ # dtype: float32
503
+
504
+ # slices:
505
+ # - sources:
506
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled20_full
507
+ # layer_range: [0, 36]
508
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled_full
509
+ # layer_range: [0, 36]
510
+ # merge_method: slerp
511
+ # base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled20_full
512
+ # parameters:
513
+ # t: 0.66
514
+ # dtype: float32
515
+
516
+ # slices:
517
+ # - sources:
518
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master//root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sample20_sample40
519
+ # layer_range: [0, 36]
520
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled60_full
521
+ # layer_range: [0, 36]
522
+ # merge_method: slerp
523
+ # base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master//root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sample20_sample40
524
+ # parameters:
525
+ # t: 0.60
526
+ # dtype: float32
527
+
528
+ # slices:
529
+ # - sources:
530
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master//root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sample20_sample40_sample60
531
+ # layer_range: [0, 36]
532
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled80_full
533
+ # layer_range: [0, 36]
534
+ # merge_method: slerp
535
+ # base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master//root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sample20_sample40_sample60
536
+ # parameters:
537
+ # t: 0.57
538
+ # dtype: float32
539
+
540
+ # slices:
541
+ # - sources:
542
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master//root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sample20_sample40_sample60_sample80
543
+ # layer_range: [0, 36]
544
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_full
545
+ # layer_range: [0, 36]
546
+ # merge_method: slerp
547
+ # base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master//root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sample20_sample40_sample60_sample80
548
+ # parameters:
549
+ # t: 0.56
550
+ # dtype: float32
551
+
552
+
553
+
554
+ # models:
555
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-8000_full
556
+ # parameters:
557
+ # weight: 0.86
558
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-6000_full
559
+ # parameters:
560
+ # weight: 0.65
561
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-4000_full
562
+ # parameters:
563
+ # weight: 0.43
564
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-2000_full
565
+ # parameters:
566
+ # weight: 0.22
567
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_full
568
+ # parameters:
569
+ # weight: 1.0
570
+ # merge_method: multislerp
571
+ # dtype: float32
572
+
573
+
574
+
575
+
576
+ # models:
577
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-8000_full
578
+ # parameters:
579
+ # weight: 0.86
580
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-6000_full
581
+ # parameters:
582
+ # weight: 0.65
583
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-4000_full
584
+ # parameters:
585
+ # weight: 0.43
586
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-2000_full
587
+ # parameters:
588
+ # weight: 0.22
589
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_full
590
+ # parameters:
591
+ # weight: 1.0
592
+ # merge_method: ties
593
+ # base_model: /mnt/data/models/Qwen/Qwen3-4B
594
+ # parameters:
595
+ # normalize: true
596
+ # int8_mask: true
597
+ # dtype: float32
598
+
599
+ # models:
600
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-8000_full
601
+ # parameters:
602
+ # weight: 0.86
603
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-6000_full
604
+ # parameters:
605
+ # weight: 0.65
606
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-4000_full
607
+ # parameters:
608
+ # weight: 0.43
609
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-2000_full
610
+ # parameters:
611
+ # weight: 0.22
612
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_full
613
+ # parameters:
614
+ # weight: 1.0
615
+ # merge_method: task_arithmetic
616
+ # base_model: /mnt/data/models/Qwen/Qwen3-4B
617
+ # parameters:
618
+ # normalize: true
619
+ # int8_mask: true
620
+ # dtype: float32
621
+
622
+
623
+
624
+ # models:
625
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-8000_full
626
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-6000_full
627
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-4000_full
628
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-2000_full
629
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_full
630
+ # merge_method: sce
631
+ # base_model: /mnt/data/models/Qwen/Qwen3-4B
632
+ # parameters:
633
+ # normalize: true
634
+ # int8_mask: true
635
+ # dtype: float32
636
+
637
+ # models:
638
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-8000_full
639
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-6000_full
640
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-4000_full
641
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-2000_full
642
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_full
643
+ # merge_method: model_stock
644
+ # base_model: /mnt/data/models/Qwen/Qwen3-4B
645
+ # parameters:
646
+ # normalize: true
647
+ # int8_mask: true
648
+ # dtype: float32
649
+
650
+
651
+
652
+
653
+
654
+
655
+ # slices:
656
+ # - sources:
657
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-2000_full
658
+ # layer_range: [0, 36]
659
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-4000_full
660
+ # layer_range: [0, 36]
661
+ # merge_method: slerp
662
+ # base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-2000_full
663
+ # parameters:
664
+ # t: 0.66
665
+ # dtype: float32
666
+
667
+ # slices:
668
+ # - sources:
669
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-2000-4000
670
+ # layer_range: [0, 36]
671
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-6000_full
672
+ # layer_range: [0, 36]
673
+ # merge_method: slerp
674
+ # base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-2000-4000
675
+ # parameters:
676
+ # t: 0.60
677
+ # dtype: float32
678
+
679
+ # slices:
680
+ # - sources:
681
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-2000-4000-6000
682
+ # layer_range: [0, 36]
683
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-8000_full
684
+ # layer_range: [0, 36]
685
+ # merge_method: slerp
686
+ # base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-2000-4000-6000
687
+ # parameters:
688
+ # t: 0.57
689
+ # dtype: float32
690
+
691
+ # slices:
692
+ # - sources:
693
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-2000-4000-6000-8000
694
+ # layer_range: [0, 36]
695
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_full
696
+ # layer_range: [0, 36]
697
+ # merge_method: slerp
698
+ # base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-2000-4000-6000-8000
699
+ # parameters:
700
+ # t: 0.55
701
+ # dtype: float32
702
+
703
+
704
+
705
+
706
+
707
+
708
+
709
+
710
+ # models:
711
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled60_full
712
+ # parameters:
713
+ # weight: 0.6
714
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled20_full
715
+ # parameters:
716
+ # weight: 0.2
717
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled_full
718
+ # parameters:
719
+ # weight: 0.4
720
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled80_full
721
+ # parameters:
722
+ # weight: 0.8
723
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_full
724
+ # parameters:
725
+ # weight: 1.0
726
+ # merge_method: ties
727
+ # base_model: /mnt/data/models/Qwen/Qwen3-4B
728
+ # parameters:
729
+ # normalize: true
730
+ # int8_mask: true
731
+ # dtype: float32
732
+
733
+ # models:
734
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled60_full
735
+ # parameters:
736
+ # weight: 0.6
737
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled20_full
738
+ # parameters:
739
+ # weight: 0.2
740
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled_full
741
+ # parameters:
742
+ # weight: 0.4
743
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled80_full
744
+ # parameters:
745
+ # weight: 0.8
746
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_full
747
+ # parameters:
748
+ # weight: 1.0
749
+ # merge_method: task_arithmetic
750
+ # base_model: /mnt/data/models/Qwen/Qwen3-4B
751
+ # parameters:
752
+ # normalize: true
753
+ # int8_mask: true
754
+ # dtype: float32
755
+
756
+
757
+
758
+ # models:
759
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled60_full
760
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled20_full
761
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled_full
762
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled80_full
763
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_full
764
+ # merge_method: sce
765
+ # base_model: /mnt/data/models/Qwen/Qwen3-4B
766
+ # parameters:
767
+ # normalize: true
768
+ # int8_mask: true
769
+ # dtype: float32
770
+
771
+ # models:
772
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled60_full
773
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled20_full
774
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled_full
775
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled80_full
776
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_full
777
+ # merge_method: model_stock
778
+ # base_model: /mnt/data/models/Qwen/Qwen3-4B
779
+ # parameters:
780
+ # normalize: true
781
+ # int8_mask: true
782
+ # dtype: float32
783
+
784
+
785
+
786
+
787
+ # models:
788
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-0.6B_sample60_seed100
789
+ # parameters:
790
+ # weight: 0.5
791
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-0.6B_sample60_seed1
792
+ # parameters:
793
+ # weight: 0.5
794
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-0.6B_sample60_seed42
795
+ # parameters:
796
+ # weight: 0.5
797
+ # merge_method: multislerp
798
+ # dtype: float32
799
+
800
+
801
+
802
+ # models:
803
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-0.6B_sample_other50
804
+ # parameters:
805
+ # weight: 0.5
806
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-0.6B_sample50
807
+ # parameters:
808
+ # weight: 0.5
809
+ # merge_method: multislerp
810
+ # dtype: float32
811
+
812
+
813
+ # slices:
814
+ # - sources:
815
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-0.6B_sample_other50
816
+ # layer_range: [0, 28]
817
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-0.6B_sample50
818
+ # layer_range: [0, 28]
819
+ # merge_method: slerp
820
+ # base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-0.6B_sample_other50
821
+ # parameters:
822
+ # t: 0.5
823
+ # dtype: float32
824
+
825
+
826
+ # models:
827
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-0.6B_sample_other50
828
+ # parameters:
829
+ # weight: 0.5
830
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-0.6B_sample50
831
+ # parameters:
832
+ # weight: 0.5
833
+ # merge_method: ties
834
+ # base_model: /mnt/data/models/Qwen/Qwen3-0.6B
835
+ # parameters:
836
+ # normalize: true
837
+ # int8_mask: true
838
+ # dtype: float32
839
+
840
+
841
+
842
+ # models:
843
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-0.6B_sample50_seed1/
844
+ # parameters:
845
+ # weight: 0.5
846
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-0.6B_sample50_seed100/
847
+ # parameters:
848
+ # weight: 0.5
849
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-0.6B_sample50/
850
+ # parameters:
851
+ # weight: 0.5
852
+ # merge_method: multislerp
853
+ # dtype: float32
854
+
855
+
856
+ models:
857
+ - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-0.6B_sample20
858
+ parameters:
859
+ weight: 0.2
860
+ - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-0.6B_sample40
861
+ parameters:
862
+ weight: 0.4
863
+ - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-0.6B_sample60
864
+ parameters:
865
+ weight: 0.6
866
+ - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-0.6B_sample80
867
+ parameters:
868
+ weight: 0.8
869
+ - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-0.6B_all_eng_data
870
+ parameters:
871
+ weight: 1.0
872
+ merge_method: multislerp
873
+ dtype: float32
874
+
875
+
876
+
877
+
878
+
879
+
880
+
881
+
882
+ ```
added_tokens.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "</think>": 151668,
3
+ "</tool_call>": 151658,
4
+ "</tool_response>": 151666,
5
+ "<think>": 151667,
6
+ "<tool_call>": 151657,
7
+ "<tool_response>": 151665,
8
+ "<|box_end|>": 151649,
9
+ "<|box_start|>": 151648,
10
+ "<|endoftext|>": 151643,
11
+ "<|file_sep|>": 151664,
12
+ "<|fim_middle|>": 151660,
13
+ "<|fim_pad|>": 151662,
14
+ "<|fim_prefix|>": 151659,
15
+ "<|fim_suffix|>": 151661,
16
+ "<|im_end|>": 151645,
17
+ "<|im_start|>": 151644,
18
+ "<|image_pad|>": 151655,
19
+ "<|object_ref_end|>": 151647,
20
+ "<|object_ref_start|>": 151646,
21
+ "<|quad_end|>": 151651,
22
+ "<|quad_start|>": 151650,
23
+ "<|repo_name|>": 151663,
24
+ "<|video_pad|>": 151656,
25
+ "<|vision_end|>": 151653,
26
+ "<|vision_pad|>": 151654,
27
+ "<|vision_start|>": 151652
28
+ }
config.json ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Qwen3Model"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 151643,
8
+ "dtype": "float32",
9
+ "eos_token_id": 151645,
10
+ "head_dim": 128,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 1024,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 3072,
15
+ "layer_types": [
16
+ "full_attention",
17
+ "full_attention",
18
+ "full_attention",
19
+ "full_attention",
20
+ "full_attention",
21
+ "full_attention",
22
+ "full_attention",
23
+ "full_attention",
24
+ "full_attention",
25
+ "full_attention",
26
+ "full_attention",
27
+ "full_attention",
28
+ "full_attention",
29
+ "full_attention",
30
+ "full_attention",
31
+ "full_attention",
32
+ "full_attention",
33
+ "full_attention",
34
+ "full_attention",
35
+ "full_attention",
36
+ "full_attention",
37
+ "full_attention",
38
+ "full_attention",
39
+ "full_attention",
40
+ "full_attention",
41
+ "full_attention",
42
+ "full_attention",
43
+ "full_attention"
44
+ ],
45
+ "max_position_embeddings": 40960,
46
+ "max_window_layers": 28,
47
+ "model_type": "qwen3",
48
+ "num_attention_heads": 16,
49
+ "num_hidden_layers": 28,
50
+ "num_key_value_heads": 8,
51
+ "rms_norm_eps": 1e-06,
52
+ "rope_scaling": null,
53
+ "rope_theta": 1000000,
54
+ "sliding_window": null,
55
+ "tie_word_embeddings": true,
56
+ "transformers_version": "4.57.1",
57
+ "use_cache": false,
58
+ "use_sliding_window": false,
59
+ "vocab_size": 151936
60
+ }
config_sentence_transformers.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "prompts": {
3
+ "query": "Instruct: Given a web search query, retrieve relevant passages that answer the query\nQuery:",
4
+ "document": ""
5
+ },
6
+ "default_prompt_name": null,
7
+ "similarity_fn_name": "cosine"
8
+ }
mergekit_config.yml ADDED
@@ -0,0 +1,849 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # models:
2
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_task_level_merging_classification_gradient2_full/
3
+ # parameters:
4
+ # weight: 1
5
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_task_level_merging_clustering_gradient2_full/
6
+ # parameters:
7
+ # weight: 1
8
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_task_level_merging_retrieval_gradient2_full/
9
+ # parameters:
10
+ # weight: 1
11
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_task_level_merging_sts_gradient2_full/
12
+ # parameters:
13
+ # weight: 1
14
+ # merge_method: multislerp
15
+ # base_model: /mnt/data/models/Qwen/Qwen3-4B
16
+ # dtype: float32
17
+ # slices:
18
+ # - sources:
19
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_task_level_merging_classification_gradient2_full/
20
+ # layer_range: [0, 36]
21
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_task_level_merging_sts_gradient2_full/
22
+ # layer_range: [0, 36]
23
+ # merge_method: slerp
24
+ # base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_task_level_merging_classification_gradient2_full/
25
+ # parameters:
26
+ # t: 0.5
27
+ # dtype: float32
28
+
29
+ # slices:
30
+ # - sources:
31
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_sts_classification_selerp
32
+ # layer_range: [0, 36]
33
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_task_level_merging_clustering_gradient2_full/
34
+ # layer_range: [0, 36]
35
+ # merge_method: slerp
36
+ # base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_sts_classification_selerp
37
+ # parameters:
38
+ # t: 0.5
39
+ # dtype: float32
40
+
41
+
42
+ # slices:
43
+ # - sources:
44
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_sts_classification_clustering_selerp
45
+ # layer_range: [0, 36]
46
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_task_level_merging_retrieval_gradient2_full/
47
+ # layer_range: [0, 36]
48
+ # merge_method: slerp
49
+ # base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_sts_classification_clustering_selerp
50
+ # parameters:
51
+ # t: 0.5
52
+ # dtype: float32
53
+
54
+
55
+
56
+ # slices:
57
+ # - sources:
58
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering2_other_full
59
+ # layer_range: [0, 36]
60
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering1_other_full
61
+ # layer_range: [0, 36]
62
+ # merge_method: slerp
63
+ # base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering2_other_full
64
+ # parameters:
65
+ # t: 0.5
66
+ # dtype: float32
67
+ # models:
68
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-0.6B_cut0.15_clustering1/
69
+ # parameters:
70
+ # weight: 0.863526622
71
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-0.6B_cut0.15_clustering2/
72
+ # parameters:
73
+ # weight: 0.020574888
74
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-0.6B_cut0.15_clustering3/
75
+ # parameters:
76
+ # weight: 0.11589849
77
+ # merge_method: ties
78
+ # base_model: /mnt/data/models/Qwen/Qwen3-0.6B
79
+ # parameters:
80
+ # normalize: true
81
+ # int8_mask: true
82
+ # dtype: float32
83
+
84
+ # slices:
85
+ # - sources:
86
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering2_full/
87
+ # layer_range: [0, 36]
88
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering3_full/
89
+ # layer_range: [0, 36]
90
+ # merge_method: slerp
91
+ # base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering2_full/
92
+ # parameters:
93
+ # t: 0.84
94
+ # dtype: float32
95
+
96
+
97
+ # slices:
98
+ # - sources:
99
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_clustering23_slerp
100
+ # layer_range: [0, 36]
101
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering1_full/
102
+ # layer_range: [0, 36]
103
+ # merge_method: slerp
104
+ # base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_clustering23_slerp
105
+ # parameters:
106
+ # t: 0.86
107
+ # dtype: float32
108
+
109
+
110
+
111
+ # models:
112
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering1_full/
113
+ # parameters:
114
+ # weight: 0.863526622
115
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering2_full/
116
+ # parameters:
117
+ # weight: 0.020574888
118
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering3_full/
119
+ # parameters:
120
+ # weight: 0.11589849
121
+ # merge_method: ties
122
+ # base_model: /mnt/data/models/Qwen/Qwen3-4B
123
+ # parameters:
124
+ # normalize: true
125
+ # int8_mask: true
126
+ # dtype: float32
127
+
128
+ # models:
129
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering1_full/
130
+ # parameters:
131
+ # weight: 0.4
132
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering2_full/
133
+ # parameters:
134
+ # weight: 0.3
135
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering3_full/
136
+ # parameters:
137
+ # weight: 0.3
138
+ # merge_method: ties
139
+ # base_model: /mnt/data/models/Qwen/Qwen3-4B
140
+ # parameters:
141
+ # normalize: true
142
+ # int8_mask: true
143
+ # dtype: float32
144
+
145
+
146
+ # models:
147
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering1_full/
148
+ # parameters:
149
+ # weight: 1
150
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering2_full/
151
+ # parameters:
152
+ # weight: 1
153
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering3_full/
154
+ # parameters:
155
+ # weight: 1
156
+ # merge_method: ties
157
+ # base_model: /mnt/data/models/Qwen/Qwen3-4B
158
+ # parameters:
159
+ # normalize: true
160
+ # int8_mask: true
161
+ # dtype: float32
162
+
163
+ # slices:
164
+ # - sources:
165
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering2_full/
166
+ # layer_range: [0, 36]
167
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering3_full/
168
+ # layer_range: [0, 36]
169
+ # merge_method: slerp
170
+ # base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering2_full/
171
+ # parameters:
172
+ # t: 0.5
173
+ # dtype: float32
174
+
175
+ # slices:
176
+ # - sources:
177
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_clustering23_slerp55
178
+ # layer_range: [0, 36]
179
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering1_full/
180
+ # layer_range: [0, 36]
181
+ # merge_method: slerp
182
+ # base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_clustering23_slerp55
183
+ # parameters:
184
+ # t: 0.5
185
+ # dtype: float32
186
+
187
+ # slices:
188
+ # - sources:
189
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_clustering_retrieval123_slerp55/
190
+ # layer_range: [0, 36]
191
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_full/
192
+ # layer_range: [0, 36]
193
+ # merge_method: slerp
194
+ # base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_clustering_retrieval123_slerp55/
195
+ # parameters:
196
+ # t: 0.5
197
+ # dtype: float32
198
+
199
+
200
+ # slices:
201
+ # - sources:
202
+ # - model: /root/paddlejob/workspace/env_run/output/model_merge_checkpoint/Qwen3-4B-gradient2-hotpotqa_0.02_8_5e-5_16_lora32_full/
203
+ # layer_range: [0, 36]
204
+ # - model: /root/paddlejob/workspace/env_run/output/model_merge_checkpoint/Qwen3-4B-grdient2-imdb_classification_0.02_2_5e-5_16_lora32_full
205
+ # layer_range: [0, 36]
206
+ # merge_method: slerp
207
+ # base_model: /root/paddlejob/workspace/env_run/output/model_merge_checkpoint/Qwen3-4B-gradient2-hotpotqa_0.02_8_5e-5_16_lora32_full/
208
+ # parameters:
209
+ # t: 0.5
210
+ # dtype: float32
211
+
212
+
213
+ # models:
214
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering1_full/
215
+ # parameters:
216
+ # weight: 0.5
217
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering2_full/
218
+ # parameters:
219
+ # weight: 0.5
220
+ # merge_method: dare_ties
221
+ # base_model: /mnt/data/models/Qwen/Qwen3-4B
222
+ # parameters:
223
+ # normalize: true
224
+ # int8_mask: true
225
+ # dtype: float32
226
+
227
+ # models:
228
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering1_full/
229
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering2_full/
230
+ # merge_method: model_stock
231
+ # base_model: /mnt/data/models/Qwen/Qwen3-4B
232
+ # parameters:
233
+ # normalize: true
234
+ # int8_mask: true
235
+ # dtype: float32
236
+ # models:
237
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering1_full/
238
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering2_full/
239
+ # merge_method: karcher
240
+ # dtype: float32
241
+
242
+ # models:
243
+ # - model: /root/paddlejob/workspace/env_run/output/model_merge_checkpoint/Qwen3-4B-grdient2-medrxiv_title_0.02_8_5e-5_16_lora32_full
244
+ # parameters:
245
+ # weight: 0.5
246
+ # - model: /root/paddlejob/workspace/env_run/output/model_merge_checkpoint/Qwen3-4B-gradient2-squad_0.02_8_5e-5_16_lora32_full
247
+ # parameters:
248
+ # weight: 0.5
249
+ # merge_method: multislerp
250
+ # dtype: float32
251
+ # slices:
252
+ # - sources:
253
+ # - model: /root/paddlejob/workspace/env_run/output/model_merge_checkpoint/Qwen3-4B-grdient2-medrxiv_title_0.02_8_5e-5_16_lora32_full
254
+ # layer_range: [0, 36]
255
+ # - model: /root/paddlejob/workspace/env_run/output/model_merge_checkpoint/Qwen3-4B-gradient2-squad_0.02_8_5e-5_16_lora32_full
256
+ # layer_range: [0, 36]
257
+ # merge_method: slerp
258
+ # base_model: /root/paddlejob/workspace/env_run/output/model_merge_checkpoint/Qwen3-4B-grdient2-medrxiv_title_0.02_8_5e-5_16_lora32_full
259
+ # parameters:
260
+ # t: 0.5
261
+ # dtype: float32
262
+ # models:
263
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering1_full/
264
+ # parameters:
265
+ # weight: 0.863
266
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering2_full/
267
+ # parameters:
268
+ # weight: 0.137
269
+ # merge_method: task_arithmetic
270
+ # base_model: /mnt/data/models/Qwen/Qwen3-4B
271
+ # parameters:
272
+ # normalize: true
273
+ # int8_mask: true
274
+ # dtype: float32
275
+
276
+ # models:
277
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering1_full/
278
+ # parameters:
279
+ # weight: 0.863
280
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering2_full/
281
+ # parameters:
282
+ # weight: 0.137
283
+ # merge_method: ties
284
+ # base_model: /mnt/data/models/Qwen/Qwen3-4B
285
+ # parameters:
286
+ # normalize: true
287
+ # int8_mask: true
288
+ # dtype: float32
289
+ # slices:
290
+ # - sources:
291
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering1_full/
292
+ # layer_range: [0, 36]
293
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering2_full/
294
+ # layer_range: [0, 36]
295
+ # merge_method: slerp
296
+ # base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering1_full/
297
+ # parameters:
298
+ # t: 0.137
299
+ # dtype: float32
300
+
301
+
302
+
303
+
304
+ # slices:
305
+ # - sources:
306
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering2_other_cut0.15_clustering1_slerp_same_weighted
307
+ # layer_range: [0, 36]
308
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled_full
309
+ # layer_range: [0, 36]
310
+ # merge_method: slerp
311
+ # base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering2_other_cut0.15_clustering1_slerp_same_weighted
312
+ # parameters:
313
+ # t: 0.5
314
+ # dtype: float32
315
+
316
+
317
+ # slices:
318
+ # - sources:
319
+ # - model: /root/paddlejob/workspace/env_run/output/model_merge_checkpoint/Qwen3-4B-gradient2-squad_0.02_8_5e-5_16_lora32_full
320
+ # layer_range: [0, 36]
321
+ # - model: /root/paddlejob/workspace/env_run/output/model_merge_checkpoint/Qwen3-4B-grdient2-medrxiv_abstract_0.02_8_5e-5_16_lora32_full
322
+ # layer_range: [0, 36]
323
+ # merge_method: slerp
324
+ # base_model: /root/paddlejob/workspace/env_run/output/model_merge_checkpoint/Qwen3-4B-gradient2-squad_0.02_8_5e-5_16_lora32_full
325
+ # parameters:
326
+ # t: 0.5
327
+ # dtype: float32
328
+
329
+ # models:
330
+ # - model: /root/paddlejob/workspace/env_run/output/model_merge_checkpoint/Qwen3-4B-gradient2-squad_0.02_8_5e-5_16_lora32_full
331
+ # parameters:
332
+ # weight: 0.5
333
+ # - model: /root/paddlejob/workspace/env_run/output/model_merge_checkpoint/Qwen3-4B-grdient2-medrxiv_abstract_0.02_8_5e-5_16_lora32_full
334
+ # parameters:
335
+ # weight: 0.5
336
+ # merge_method: multislerp
337
+ # dtype: float32
338
+
339
+
340
+
341
+
342
+ # slices:
343
+ # - sources:
344
+ # - model: /root/paddlejob/workspace/env_run/output/model_merge_checkpoint/Qwen3-4B-gradient2-fiqa_0.02_8_5e-5_16_lora32_full
345
+ # layer_range: [0, 36]
346
+ # - model: /root/paddlejob/workspace/env_run/output/model_merge_checkpoint/Qwen3-4B-grdient2-tweet_sentiment_extraction_classification_0.02_3_5e-5_16_lora32_full
347
+ # layer_range: [0, 36]
348
+ # merge_method: slerp
349
+ # base_model: /root/paddlejob/workspace/env_run/output/model_merge_checkpoint/Qwen3-4B-gradient2-fiqa_0.02_8_5e-5_16_lora32_full
350
+ # parameters:
351
+ # t: 0.5
352
+ # dtype: float32
353
+
354
+
355
+
356
+
357
+
358
+ # slices:
359
+ # - sources:
360
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering2_other_sample20_full
361
+ # layer_range: [0, 36]
362
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering1_other_sample20_full
363
+ # layer_range: [0, 36]
364
+ # merge_method: slerp
365
+ # base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering2_other_sample20_full
366
+ # parameters:
367
+ # t: 0.864
368
+ # dtype: float32
369
+
370
+
371
+ # slices:
372
+ # - sources:
373
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering12_other_sample20_slerp_merging
374
+ # layer_range: [0, 36]
375
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled80_full
376
+ # layer_range: [0, 36]
377
+ # merge_method: slerp
378
+ # base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering12_other_sample20_slerp_merging
379
+ # parameters:
380
+ # t: 0.8
381
+ # dtype: float32
382
+
383
+
384
+ # slices:
385
+ # - sources:
386
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering2_other_sample80_full
387
+ # layer_range: [0, 36]
388
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering1_other_sample80_full
389
+ # layer_range: [0, 36]
390
+ # merge_method: slerp
391
+ # base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering2_other_sample80_full
392
+ # parameters:
393
+ # t: 0.864
394
+ # dtype: float32
395
+
396
+
397
+ # slices:
398
+ # - sources:
399
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering12_other_sample80
400
+ # layer_range: [0, 36]
401
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled20_full
402
+ # layer_range: [0, 36]
403
+ # merge_method: slerp
404
+ # base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering12_other_sample80
405
+ # parameters:
406
+ # t: 0.2
407
+ # dtype: float32
408
+
409
+
410
+
411
+ # slices:
412
+ # - sources:
413
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering2_full
414
+ # layer_range: [0, 36]
415
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering1_full
416
+ # layer_range: [0, 36]
417
+ # merge_method: slerp
418
+ # base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering2_full
419
+ # parameters:
420
+ # t: 0.864
421
+ # dtype: float32
422
+
423
+
424
+
425
+ # slices:
426
+ # - sources:
427
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering12_full_merging_slerp_number_weighted
428
+ # layer_range: [0, 36]
429
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_full
430
+ # layer_range: [0, 36]
431
+ # merge_method: slerp
432
+ # base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering12_full_merging_slerp_number_weighted
433
+ # parameters:
434
+ # t: 0.9
435
+ # dtype: float32
436
+
437
+
438
+
439
+ # models:
440
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering2_full
441
+ # parameters:
442
+ # weight: 0.1
443
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering1_full
444
+ # parameters:
445
+ # weight: 0.2
446
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_full
447
+ # parameters:
448
+ # weight: 0.7
449
+ # merge_method: multislerp
450
+ # dtype: float32
451
+
452
+
453
+
454
+ # models:
455
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled60_full
456
+ # parameters:
457
+ # weight: 0.6
458
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled20_full
459
+ # parameters:
460
+ # weight: 0.2
461
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled_full
462
+ # parameters:
463
+ # weight: 0.4
464
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled80_full
465
+ # parameters:
466
+ # weight: 0.8
467
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_full
468
+ # parameters:
469
+ # weight: 1.0
470
+ # merge_method: multislerp
471
+ # dtype: float32
472
+
473
+ # slices:
474
+ # - sources:
475
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled20_full
476
+ # layer_range: [0, 36]
477
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled_full
478
+ # layer_range: [0, 36]
479
+ # merge_method: slerp
480
+ # base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled20_full
481
+ # parameters:
482
+ # t: 0.66
483
+ # dtype: float32
484
+
485
+ # slices:
486
+ # - sources:
487
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master//root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sample20_sample40
488
+ # layer_range: [0, 36]
489
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled60_full
490
+ # layer_range: [0, 36]
491
+ # merge_method: slerp
492
+ # base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master//root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sample20_sample40
493
+ # parameters:
494
+ # t: 0.60
495
+ # dtype: float32
496
+
497
+ # slices:
498
+ # - sources:
499
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master//root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sample20_sample40_sample60
500
+ # layer_range: [0, 36]
501
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled80_full
502
+ # layer_range: [0, 36]
503
+ # merge_method: slerp
504
+ # base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master//root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sample20_sample40_sample60
505
+ # parameters:
506
+ # t: 0.57
507
+ # dtype: float32
508
+
509
+ # slices:
510
+ # - sources:
511
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master//root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sample20_sample40_sample60_sample80
512
+ # layer_range: [0, 36]
513
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_full
514
+ # layer_range: [0, 36]
515
+ # merge_method: slerp
516
+ # base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master//root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sample20_sample40_sample60_sample80
517
+ # parameters:
518
+ # t: 0.56
519
+ # dtype: float32
520
+
521
+
522
+
523
+ # models:
524
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-8000_full
525
+ # parameters:
526
+ # weight: 0.86
527
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-6000_full
528
+ # parameters:
529
+ # weight: 0.65
530
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-4000_full
531
+ # parameters:
532
+ # weight: 0.43
533
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-2000_full
534
+ # parameters:
535
+ # weight: 0.22
536
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_full
537
+ # parameters:
538
+ # weight: 1.0
539
+ # merge_method: multislerp
540
+ # dtype: float32
541
+
542
+
543
+
544
+
545
+ # models:
546
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-8000_full
547
+ # parameters:
548
+ # weight: 0.86
549
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-6000_full
550
+ # parameters:
551
+ # weight: 0.65
552
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-4000_full
553
+ # parameters:
554
+ # weight: 0.43
555
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-2000_full
556
+ # parameters:
557
+ # weight: 0.22
558
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_full
559
+ # parameters:
560
+ # weight: 1.0
561
+ # merge_method: ties
562
+ # base_model: /mnt/data/models/Qwen/Qwen3-4B
563
+ # parameters:
564
+ # normalize: true
565
+ # int8_mask: true
566
+ # dtype: float32
567
+
568
+ # models:
569
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-8000_full
570
+ # parameters:
571
+ # weight: 0.86
572
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-6000_full
573
+ # parameters:
574
+ # weight: 0.65
575
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-4000_full
576
+ # parameters:
577
+ # weight: 0.43
578
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-2000_full
579
+ # parameters:
580
+ # weight: 0.22
581
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_full
582
+ # parameters:
583
+ # weight: 1.0
584
+ # merge_method: task_arithmetic
585
+ # base_model: /mnt/data/models/Qwen/Qwen3-4B
586
+ # parameters:
587
+ # normalize: true
588
+ # int8_mask: true
589
+ # dtype: float32
590
+
591
+
592
+
593
+ # models:
594
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-8000_full
595
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-6000_full
596
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-4000_full
597
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-2000_full
598
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_full
599
+ # merge_method: sce
600
+ # base_model: /mnt/data/models/Qwen/Qwen3-4B
601
+ # parameters:
602
+ # normalize: true
603
+ # int8_mask: true
604
+ # dtype: float32
605
+
606
+ # models:
607
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-8000_full
608
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-6000_full
609
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-4000_full
610
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-2000_full
611
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_full
612
+ # merge_method: model_stock
613
+ # base_model: /mnt/data/models/Qwen/Qwen3-4B
614
+ # parameters:
615
+ # normalize: true
616
+ # int8_mask: true
617
+ # dtype: float32
618
+
619
+
620
+
621
+
622
+
623
+
624
+ # slices:
625
+ # - sources:
626
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-2000_full
627
+ # layer_range: [0, 36]
628
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-4000_full
629
+ # layer_range: [0, 36]
630
+ # merge_method: slerp
631
+ # base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-2000_full
632
+ # parameters:
633
+ # t: 0.66
634
+ # dtype: float32
635
+
636
+ # slices:
637
+ # - sources:
638
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-2000-4000
639
+ # layer_range: [0, 36]
640
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-6000_full
641
+ # layer_range: [0, 36]
642
+ # merge_method: slerp
643
+ # base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-2000-4000
644
+ # parameters:
645
+ # t: 0.60
646
+ # dtype: float32
647
+
648
+ # slices:
649
+ # - sources:
650
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-2000-4000-6000
651
+ # layer_range: [0, 36]
652
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-8000_full
653
+ # layer_range: [0, 36]
654
+ # merge_method: slerp
655
+ # base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-2000-4000-6000
656
+ # parameters:
657
+ # t: 0.57
658
+ # dtype: float32
659
+
660
+ # slices:
661
+ # - sources:
662
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-2000-4000-6000-8000
663
+ # layer_range: [0, 36]
664
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_full
665
+ # layer_range: [0, 36]
666
+ # merge_method: slerp
667
+ # base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-2000-4000-6000-8000
668
+ # parameters:
669
+ # t: 0.55
670
+ # dtype: float32
671
+
672
+
673
+
674
+
675
+
676
+
677
+
678
+
679
+ # models:
680
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled60_full
681
+ # parameters:
682
+ # weight: 0.6
683
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled20_full
684
+ # parameters:
685
+ # weight: 0.2
686
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled_full
687
+ # parameters:
688
+ # weight: 0.4
689
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled80_full
690
+ # parameters:
691
+ # weight: 0.8
692
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_full
693
+ # parameters:
694
+ # weight: 1.0
695
+ # merge_method: ties
696
+ # base_model: /mnt/data/models/Qwen/Qwen3-4B
697
+ # parameters:
698
+ # normalize: true
699
+ # int8_mask: true
700
+ # dtype: float32
701
+
702
+ # models:
703
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled60_full
704
+ # parameters:
705
+ # weight: 0.6
706
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled20_full
707
+ # parameters:
708
+ # weight: 0.2
709
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled_full
710
+ # parameters:
711
+ # weight: 0.4
712
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled80_full
713
+ # parameters:
714
+ # weight: 0.8
715
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_full
716
+ # parameters:
717
+ # weight: 1.0
718
+ # merge_method: task_arithmetic
719
+ # base_model: /mnt/data/models/Qwen/Qwen3-4B
720
+ # parameters:
721
+ # normalize: true
722
+ # int8_mask: true
723
+ # dtype: float32
724
+
725
+
726
+
727
+ # models:
728
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled60_full
729
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled20_full
730
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled_full
731
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled80_full
732
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_full
733
+ # merge_method: sce
734
+ # base_model: /mnt/data/models/Qwen/Qwen3-4B
735
+ # parameters:
736
+ # normalize: true
737
+ # int8_mask: true
738
+ # dtype: float32
739
+
740
+ # models:
741
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled60_full
742
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled20_full
743
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled_full
744
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled80_full
745
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_full
746
+ # merge_method: model_stock
747
+ # base_model: /mnt/data/models/Qwen/Qwen3-4B
748
+ # parameters:
749
+ # normalize: true
750
+ # int8_mask: true
751
+ # dtype: float32
752
+
753
+
754
+
755
+
756
+ # models:
757
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-0.6B_sample60_seed100
758
+ # parameters:
759
+ # weight: 0.5
760
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-0.6B_sample60_seed1
761
+ # parameters:
762
+ # weight: 0.5
763
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-0.6B_sample60_seed42
764
+ # parameters:
765
+ # weight: 0.5
766
+ # merge_method: multislerp
767
+ # dtype: float32
768
+
769
+
770
+
771
+ # models:
772
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-0.6B_sample_other50
773
+ # parameters:
774
+ # weight: 0.5
775
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-0.6B_sample50
776
+ # parameters:
777
+ # weight: 0.5
778
+ # merge_method: multislerp
779
+ # dtype: float32
780
+
781
+
782
+ # slices:
783
+ # - sources:
784
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-0.6B_sample_other50
785
+ # layer_range: [0, 28]
786
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-0.6B_sample50
787
+ # layer_range: [0, 28]
788
+ # merge_method: slerp
789
+ # base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-0.6B_sample_other50
790
+ # parameters:
791
+ # t: 0.5
792
+ # dtype: float32
793
+
794
+
795
+ # models:
796
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-0.6B_sample_other50
797
+ # parameters:
798
+ # weight: 0.5
799
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-0.6B_sample50
800
+ # parameters:
801
+ # weight: 0.5
802
+ # merge_method: ties
803
+ # base_model: /mnt/data/models/Qwen/Qwen3-0.6B
804
+ # parameters:
805
+ # normalize: true
806
+ # int8_mask: true
807
+ # dtype: float32
808
+
809
+
810
+
811
+ # models:
812
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-0.6B_sample50_seed1/
813
+ # parameters:
814
+ # weight: 0.5
815
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-0.6B_sample50_seed100/
816
+ # parameters:
817
+ # weight: 0.5
818
+ # - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-0.6B_sample50/
819
+ # parameters:
820
+ # weight: 0.5
821
+ # merge_method: multislerp
822
+ # dtype: float32
823
+
824
+
825
+ models:
826
+ - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-0.6B_sample20
827
+ parameters:
828
+ weight: 0.2
829
+ - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-0.6B_sample40
830
+ parameters:
831
+ weight: 0.4
832
+ - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-0.6B_sample60
833
+ parameters:
834
+ weight: 0.6
835
+ - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-0.6B_sample80
836
+ parameters:
837
+ weight: 0.8
838
+ - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-0.6B_all_eng_data
839
+ parameters:
840
+ weight: 1.0
841
+ merge_method: multislerp
842
+ dtype: float32
843
+
844
+
845
+
846
+
847
+
848
+
849
+
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0589b91120d662694001fba7b4a9aa417dd99227536db6922899b6dec00d3748
3
+ size 2384233112
modules.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "idx": 0,
4
+ "name": "0",
5
+ "path": "",
6
+ "type": "sentence_transformers.models.Transformer"
7
+ },
8
+ {
9
+ "idx": 1,
10
+ "name": "1",
11
+ "path": "1_Pooling",
12
+ "type": "sentence_transformers.models.Pooling"
13
+ },
14
+ {
15
+ "idx": 2,
16
+ "name": "2",
17
+ "path": "2_Normalize",
18
+ "type": "sentence_transformers.models.Normalize"
19
+ }
20
+ ]
special_tokens_map.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>",
5
+ "<|object_ref_start|>",
6
+ "<|object_ref_end|>",
7
+ "<|box_start|>",
8
+ "<|box_end|>",
9
+ "<|quad_start|>",
10
+ "<|quad_end|>",
11
+ "<|vision_start|>",
12
+ "<|vision_end|>",
13
+ "<|vision_pad|>",
14
+ "<|image_pad|>",
15
+ "<|video_pad|>"
16
+ ],
17
+ "eos_token": {
18
+ "content": "<|im_end|>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ },
24
+ "pad_token": {
25
+ "content": "<|endoftext|>",
26
+ "lstrip": false,
27
+ "normalized": false,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ }
31
+ }
task_prompts.json ADDED
@@ -0,0 +1,255 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "AmazonCounterfactualClassification": "Classify a given Amazon customer review text as either counterfactual or not-counterfactual.",
3
+ "AmazonPolarityClassification": "Classify Amazon reviews into positive or negative sentiment.",
4
+ "AmazonReviewsClassification": "Classify the given Amazon review into its appropriate rating category.",
5
+ "Banking77Classification": "Given a online banking query, find the corresponding intents.",
6
+ "EmotionClassification": "Classify the emotion expressed in the given Twitter message into one of the six emotions: anger, fear, joy, love, sadness, and surprise.",
7
+ "ImdbClassification": "Classify the sentiment expressed in the given movie review text from the IMDB dataset.",
8
+ "MassiveIntentClassification": "Given a user utterance as query, find the user intents.",
9
+ "MassiveScenarioClassification": "Given a user utterance as query, find the user scenarios.",
10
+ "MTOPDomainClassification": "Classify the intent domain of the given utterance in task-oriented conversation.",
11
+ "MTOPIntentClassification": "Classify the intent of the given utterance in task-oriented conversation.",
12
+ "ToxicConversationsClassification": "Classify the given comments as either toxic or not toxic.",
13
+ "TweetSentimentExtractionClassification": "Classify the sentiment of a given tweet as either positive, negative, or neutral.",
14
+ "TNews": "Classify the fine-grained category of the given news title.",
15
+ "IFlyTek": "Given an App description text, find the appropriate fine-grained category.",
16
+ "MultilingualSentiment": "Classify sentiment of the customer review into positive, neutral, or negative.",
17
+ "JDReview": "Classify the customer review for iPhone on e-commerce platform into positive or negative.",
18
+ "OnlineShopping": "Classify the customer review for online shopping into positive or negative.",
19
+ "Waimai": "Classify the customer review from a food takeaway platform into positive or negative.",
20
+ "ArxivClusteringP2P": "Identify the main and secondary category of Arxiv papers based on the titles and abstracts.",
21
+ "ArxivClusteringS2S": "Identify the main and secondary category of Arxiv papers based on the titles.",
22
+ "BiorxivClusteringP2P": "Identify the main category of Biorxiv papers based on the titles and abstracts.",
23
+ "BiorxivClusteringS2S": "Identify the main category of Biorxiv papers based on the titles.",
24
+ "MedrxivClusteringP2P": "Identify the main category of Medrxiv papers based on the titles and abstracts.",
25
+ "MedrxivClusteringS2S": "Identify the main category of Medrxiv papers based on the titles.",
26
+ "RedditClustering": "Identify the topic or theme of Reddit posts based on the titles.",
27
+ "RedditClusteringP2P": "Identify the topic or theme of Reddit posts based on the titles and posts.",
28
+ "StackExchangeClustering": "Identify the topic or theme of StackExchange posts based on the titles.",
29
+ "StackExchangeClusteringP2P": "Identify the topic or theme of StackExchange posts based on the given paragraphs.",
30
+ "TwentyNewsgroupsClustering": "Identify the topic or theme of the given news articles.",
31
+ "CLSClusteringS2S": "Identify the main category of scholar papers based on the titles.",
32
+ "CLSClusteringP2P": "Identify the main category of scholar papers based on the titles and abstracts.",
33
+ "ThuNewsClusteringS2S": "Identify the topic or theme of the given news articles based on the titles.",
34
+ "ThuNewsClusteringP2P": "Identify the topic or theme of the given news articles based on the titles and contents.",
35
+ "AskUbuntuDupQuestions": "Retrieve duplicate questions from AskUbuntu forum.",
36
+ "MindSmallReranking": "Retrieve relevant news articles based on user browsing history.",
37
+ "SciDocsRR": "Given a title of a scientific paper, retrieve the titles of other relevant papers.",
38
+ "StackOverflowDupQuestions": "Retrieve duplicate questions from StackOverflow forum.",
39
+ "SprintDuplicateQuestions": "Retrieve duplicate questions from Sprint forum.",
40
+ "TwitterSemEval2015": "Retrieve tweets that are semantically similar to the given tweet.",
41
+ "TwitterURLCorpus": "Retrieve tweets that are semantically similar to the given tweet.",
42
+ "T2Reranking": "Given a Chinese search query, retrieve web passages that answer the question.",
43
+ "MmarcoReranking": "Given a Chinese search query, retrieve web passages that answer the question.",
44
+ "CMedQAv1": "Given a Chinese community medical question, retrieve replies that best answer the question.",
45
+ "CMedQAv2": "Given a Chinese community medical question, retrieve replies that best answer the question.",
46
+ "Ocnli": "Retrieve semantically similar text.",
47
+ "Cmnli": "Retrieve semantically similar text.",
48
+ "ArguAna": {"query": "Given a claim, find documents that refute the claim.", "passage": "Given a claim, find documents that refute the claim."},
49
+ "ClimateFEVER": "Given a claim about climate change, retrieve documents that support or refute the claim.",
50
+ "ClimateFEVERHardNegatives": "Given a claim about climate change, retrieve documents that support or refute the claim.",
51
+ "DBPedia": "Given a query, retrieve relevant entity descriptions from DBPedia.",
52
+ "FEVER": "Given a claim, retrieve documents that support or refute the claim.",
53
+ "FEVERHardNegatives": "Given a claim, retrieve documents that support or refute the claim.",
54
+ "FiQA2018": "Given a financial question, retrieve user replies that best answer the question.",
55
+ "HotpotQA": "Given a multi-hop question, retrieve documents that can help answer the question.",
56
+ "HotpotQAHardNegatives": "Given a multi-hop question, retrieve documents that can help answer the question.",
57
+ "MSMARCO": "Given a web search query, retrieve relevant passages that answer the query.",
58
+ "NFCorpus": "Given a question, retrieve relevant documents that best answer the question.",
59
+ "NQ": "Given a question, retrieve Wikipedia passages that answer the question.",
60
+ "QuoraRetrieval": "Given a question, retrieve questions that are semantically equivalent to the given question.",
61
+ "SCIDOCS": "Given a scientific paper title, retrieve paper abstracts that are cited by the given paper.",
62
+ "SciFact": "Given a scientific claim, retrieve documents that support or refute the claim.",
63
+ "Touche2020": "Given a question, retrieve detailed and persuasive arguments that answer the question.",
64
+ "Touche2020Retrieval.v3": "Given a question, retrieve detailed and persuasive arguments that answer the question.",
65
+ "TRECCOVID": "Given a query on COVID-19, retrieve documents that answer the query.",
66
+ "T2Retrieval": "Given a Chinese search query, retrieve web passages that answer the question.",
67
+ "MMarcoRetrieval": "Given a web search query, retrieve relevant passages that answer the query.",
68
+ "DuRetrieval": "Given a Chinese search query, retrieve web passages that answer the question.",
69
+ "CovidRetrieval": "Given a question on COVID-19, retrieve news articles that answer the question.",
70
+ "CmedqaRetrieval": "Given a Chinese community medical question, retrieve replies that best answer the question.",
71
+ "EcomRetrieval": "Given a user query from an e-commerce website, retrieve description sentences of relevant products.",
72
+ "MedicalRetrieval": "Given a medical question, retrieve user replies that best answer the question.",
73
+ "VideoRetrieval": "Given a video search query, retrieve the titles of relevant videos.",
74
+ "STSBenchmarkMultilingualSTS": "Retrieve semantically similar text.",
75
+ "SICKFr": "Retrieve semantically similar text.",
76
+ "SummEvalFr": "Given a news summary, retrieve other semantically similar summaries.",
77
+ "MasakhaNEWSClassification": "Classify the News in the given texts into one of the seven category: politics,sports,health,business,entertainment,technology,religion.",
78
+ "OpusparcusPC":"Retrieve semantically similar text.",
79
+ "PawsX":"Retrieve semantically similar text.",
80
+ "AlloProfClusteringP2P": "Identify the main category of Allo Prof document based on the titles and descriptions.",
81
+ "AlloProfClusteringS2S": "Identify the main category of Allo Prof document based on the titles.",
82
+ "HALClusteringS2S": "Identify the main category of academic passage based on the titles and contents.",
83
+ "MasakhaNEWSClusteringP2P": "Identify the topic or theme of the given news articles based on the titles and contents.",
84
+ "MasakhaNEWSClusteringS2S": "Identify the topic or theme of the given news articles based on the titles.",
85
+ "MLSUMClusteringP2P": "Identify the topic or theme of the given articles based on the titles and contents.",
86
+ "MLSUMClusteringS2S": "Identify the topic or theme of the given articles based on the titles.",
87
+ "SyntecReranking": "Given a question, retrieve passages that answer the question.",
88
+ "AlloprofReranking": "Given a question, retrieve passages that answer the question.",
89
+ "AlloprofRetrieval": "Given a question, retrieve passages that answer the question.",
90
+ "BSARDRetrieval": "Given a question, retrieve passages that answer the question.",
91
+ "SyntecRetrieval": "Given a question, retrieve passages that answer the question.",
92
+ "XPQARetrieval": "Given a question, retrieve passages that answer the question.",
93
+ "MintakaRetrieval": "Given a question, retrieve passages that answer the question.",
94
+ "CBD":"Classify the sentiment of polish tweet reviews.",
95
+ "PolEmo2.0-IN": "Classify the sentiment of in-domain (medicine and hotels) online reviews.",
96
+ "PolEmo2.0-OUT":"Classify the sentiment of out-of-domain (products and school) online reviews.",
97
+ "AllegroReviews": "Classify the sentiment of reviews from e-commerce marketplace Allegro.",
98
+ "PAC": "Classify the sentence into one of the two types: \"BEZPIECZNE_POSTANOWIENIE_UMOWNE\" and \"KLAUZULA_ABUZYWNA\".",
99
+ "SICK-E-PL": "Retrieve semantically similar text.",
100
+ "SICK-R-PL": "Retrieve semantically similar text.",
101
+ "STS22": "Retrieve semantically similar text.",
102
+ "AFQMC": "Retrieve semantically similar text.",
103
+ "AFQMC": "Retrieve semantically similar text.",
104
+ "BQ": "Retrieve semantically similar text.",
105
+ "LCQMC": "Retrieve semantically similar text.",
106
+ "PAWSX": "Retrieve semantically similar text.",
107
+ "QBQTC": "Retrieve semantically similar text.",
108
+ "STS12": "Retrieve semantically similar text.",
109
+ "PPC": "Retrieve semantically similar text.",
110
+ "CDSC-E": "Retrieve semantically similar text.",
111
+ "PSC": "Retrieve semantically similar text.",
112
+ "8TagsClustering": "Identify of headlines from social media posts in Polish into 8 categories: film, history, food, medicine, motorization, work, sport and technology.",
113
+ "ArguAna-PL": "Given a claim, find documents that refute the claim.",
114
+ "DBPedia-PL": "Given a query, retrieve relevant entity descriptions from DBPedia.",
115
+ "FiQA-PL": "Given a financial question, retrieve user replies that best answer the question.",
116
+ "HotpotQA-PL": "Given a multi-hop question, retrieve documents that can help answer the question.",
117
+ "MSMARCO-PL": "Given a web search query, retrieve relevant passages that answer the query.",
118
+ "NFCorpus-PL": "Given a question, retrieve relevant documents that best answer the question.",
119
+ "NQ-PL": "Given a question, retrieve Wikipedia passages that answer the question.",
120
+ "Quora-PL": "Given a question, retrieve questions that are semantically equivalent to the given question.",
121
+ "SCIDOCS-PL": "Given a scientific paper title, retrieve paper abstracts that are cited by the given paper.",
122
+ "SciFact-PL": "Given a scientific claim, retrieve documents that support or refute the claim.",
123
+ "TRECCOVID-PL": "Given a query on COVID-19, retrieve documents that answer the query.",
124
+ "GeoreviewClassification": "Classify the organization rating based on the reviews.",
125
+ "HeadlineClassification": "Classify the topic or theme of the given news headline.",
126
+ "InappropriatenessClassification": "Classify the given message as either sensitive topic or not.",
127
+ "KinopoiskClassification": "Classify the sentiment expressed in the given movie review text.",
128
+ "RuReviewsClassification": "Classify product reviews into positive, negative or neutral sentiment.",
129
+ "RuSciBenchGRNTIClassification": "Classify the category of scientific papers based on the titles and abstracts.",
130
+ "RuSciBenchOECDClassification": "Classify the category of scientific papers based on the titles and abstracts.",
131
+ "GeoreviewClusteringP2P": "Identify the organization category based on the reviews.",
132
+ "RuSciBenchGRNTIClusteringP2P": "Identify the category of scientific papers based on the titles and abstracts.",
133
+ "RuSciBenchOECDClusteringP2P": "Identify the category of scientific papers based on the titles and abstracts.",
134
+ "TERRa": "Given a premise, retrieve a hypothesis that is entailed by the premise.",
135
+ "RuBQReranking": "Given a question, retrieve Wikipedia passages that answer the question.",
136
+ "RiaNewsRetrieval": "Given a headline, retrieval relevant articles.",
137
+ "RuBQRetrieval": "Given a question, retrieve Wikipedia passages that answer the question.",
138
+ "RUParaPhraserSTS": "Retrieve semantically similar text.",
139
+ "RuSTSBenchmarkSTS": "Retrieve semantically similar text.",
140
+ "AppsRetrieval": "Given a question about code problem, retrieval code that can solve user's problem.",
141
+ "COIRCodeSearchNetRetrieval": "Given a code snippet, retrieve the comment corresponding to that code.",
142
+ "CodeEditSearchRetrieval": "Given a piece of code, retrieval code that in the.",
143
+ "CodeFeedbackMT": "Given a question about coding, retrieval code or passage that can solve user's question.",
144
+ "CodeFeedbackST": "Given a question about coding, retrieval code or passage that can solve user's question.",
145
+ "CodeSearchNetCCRetrieval": "Given a code comment, retrieve the code snippet corresponding to that comment..",
146
+ "CodeSearchNetRetrieval": "Given a code snippet, retrieve the comment corresponding to that code.",
147
+ "CodeTransOceanContest": "Given a piece for code, retrieval semantically similar code.",
148
+ "CodeTransOceanDL": "Given a piece for code, retrieval semantically similar code.",
149
+ "CosQA": "Given a question about coding, retrieval code or passage that can solve user's question.",
150
+ "StackOverflowQA": "Given a question about coding, retrieval code or passage that can solve user's question.",
151
+ "SyntheticText2SQL": "Given a user's question, retrieve SQL queries that are appropriate responses to the question.",
152
+ "BibleNLPBitextMining": "Retrieve parallel sentences.",
153
+ "BUCC.v2": "Retrieve parallel sentences.",
154
+ "DiaBlaBitextMining": "Retrieve parallel sentences.",
155
+ "FloresBitextMining": "Retrieve parallel sentences.",
156
+ "IN22GenBitextMining": "Retrieve parallel sentences.",
157
+ "IndicGenBenchFloresBitextMining": "Retrieve parallel sentences.",
158
+ "NollySentiBitextMining": "Retrieve parallel sentences.",
159
+ "NTREXBitextMining": "Retrieve parallel sentences.",
160
+ "NusaTranslationBitextMining": "Retrieve parallel sentences.",
161
+ "NusaXBitextMining": "Retrieve parallel sentences.",
162
+ "Tatoeba": "Retrieve parallel sentences.",
163
+ "BulgarianStoreReviewSentimentClassfication": "Classify user reviews into positive or negative sentiment.",
164
+ "CzechProductReviewSentimentClassification": "Classify product reviews into positive or negative sentiment.",
165
+ "GreekLegalCodeClassification": "Given a greek legal text, classify its topic.",
166
+ "DBpediaClassification": "Given a Wikipedia articles, categorized it into classes based on its DBpedia ontology.",
167
+ "FinancialPhrasebankClassification": "Given financial news, categorized by sentiment into positive, negative, or neutral.",
168
+ "PoemSentimentClassification": "Gvien a poem, categorized by sentiment into positive, no_impact, negative or mixed.",
169
+ "TweetTopicSingleClassification": "Gvien a twitter, classify its topic.",
170
+ "EstonianValenceClassification": "Given a news article, categorized by sentiment into negatiivne, positiivne, neutraalne or vastuolulin.",
171
+ "FilipinoShopeeReviewsClassification": "Given a shop review, classify its rating on a scale from 1 to 5.",
172
+ "GujaratiNewsClassification": "Given a Gujarati news articles, classify ist topic.",
173
+ "SentimentAnalysisHindi": "Given a hindi text, categorized by sentiment into positive, negative or neutral.",
174
+ "IndonesianIdClickbaitClassification": "Given an Indonesian news headlines, classify its into clickbait or non-clickbait.",
175
+ "ItaCaseholdClassification": "Given a judgments, classify its topic.",
176
+ "KorSarcasmClassification": "Given a twitter, categorized it into sarcasm or not_sarcasm.",
177
+ "KurdishSentimentClassification": "Given a text, categorized by sentiment into positive or negative.",
178
+ "MacedonianTweetSentimentClassification": "Given a Macedonian tweet, categorized by sentiment into positive, negative, or neutral.",
179
+ "AfriSentiClassification": "Given a text, categorized by sentiment into positive, negative, or neutral.",
180
+ "CataloniaTweetClassification": "Given a tweet, categorized by sentiment into AGAINST, FAVOR or NEUTRAL.",
181
+ "CyrillicTurkicLangClassification": "Given a text, classify its language.",
182
+ "IndicLangClassification": "Given a text, classify its language.",
183
+ "MultiHateClassification": "Given a text, categorized by sentiment into hate or non-hate.",
184
+ "NusaParagraphEmotionClassification": "Given a paragraph, classify its emotion.",
185
+ "NusaX-senti": "Given a text, categorized by sentiment into positive or negative.",
186
+ "SwissJudgementClassification": "Given a news article, categorized it into approval or dismissal.",
187
+ "NepaliNewsClassification": "Given a news article, categorized it into business, entertainment or sports.",
188
+ "OdiaNewsClassification": "Given a news article, categorized it into business, entertainment or sports.",
189
+ "PunjabiNewsClassification": "Given a news article, categorized it into two-classes.",
190
+ "SinhalaNewsClassification": "Given a news article, categorized it into political, business, technology, sports and Entertainment.",
191
+ "CSFDSKMovieReviewSentimentClassification": "Given a movie review, classify its rating on a scale from 0 to 5.",
192
+ "SiswatiNewsClassification": "Given a news article, classify its topic.",
193
+ "SlovakMovieReviewSentimentClassification": "Given a movie review, categorized it into positive or negative.",
194
+ "SwahiliNewsClassification": "Given a news article, classify its domain.",
195
+ "TswanaNewsClassification": "Given a news article, classify its topic.",
196
+ "IsiZuluNewsClassification": "Given a news article, classify its topic.",
197
+ "WikiCitiesClustering": "Identify of Wikipedia articles of cities by country.",
198
+ "RomaniBibleClustering": "Identify verses from the Bible in Kalderash Romani by book.",
199
+ "ArXivHierarchicalClusteringP2P": "Identify the main and secondary category of Arxiv papers based on the titles and abstracts.",
200
+ "ArXivHierarchicalClusteringS2S": "Identify the main and secondary category of Arxiv papers based on the titles.",
201
+ "BigPatentClustering.v2": "Identify the category of documents from the Big Patent dataset.",
202
+ "AlloProfClusteringS2S": "Identify the topic of document titles from Allo Prof dataset.",
203
+ "AlloProfClusteringS2S.v2": "Identify the topic of document titles from Allo Prof dataset.",
204
+ "HALClusteringS2S.v2": "Identify the topic of titles from HAL.",
205
+ "SIB200ClusteringS2S": "Identify the category of documents.",
206
+ "WikiClusteringP2P.v2": "Identify the category of wiki passages",
207
+ "PlscClusteringP2P.v2": "Identify the category of titles+abstracts from Library of Science.",
208
+ "KorHateSpeechMLClassification": "Given a Korean online news comments, classify its fine-grained hate speech classes.",
209
+ "MalteseNewsClassification": "Given a maltese new, classify its topic.",
210
+ "MultiEURLEXMultilabelClassification": "Given a text, classify its topic.",
211
+ "BrazilianToxicTweetsClassification": "Given a tweet, classify its topic.",
212
+ "CTKFactsNLI": "Retrieve semantically similar text.",
213
+ "indonli": "Retrieve semantically similar text.",
214
+ "ArmenianParaphrasePC": "Retrieve semantically similar text.",
215
+ "PawsXPairClassification": "Retrieve semantically similar text.",
216
+ "RTE3": "Retrieve semantically similar text.",
217
+ "XNLI": "Retrieve semantically similar text.",
218
+ "PpcPC": "Retrieve semantically similar text.",
219
+ "GermanSTSBenchmark": "Retrieve semantically similar text.",
220
+ "SICK-R": "Retrieve semantically similar text.",
221
+ "STS13": "Retrieve semantically similar text.",
222
+ "STS14": "Retrieve semantically similar text.",
223
+ "STSBenchmark": "Retrieve semantically similar text.",
224
+ "FaroeseSTS": "Retrieve semantically similar text.",
225
+ "FinParaSTS": "Retrieve semantically similar text.",
226
+ "JSICK": "Retrieve semantically similar text.",
227
+ "IndicCrosslingualSTS": "Retrieve semantically similar text.",
228
+ "SemRel24STS": "Retrieve semantically similar text.",
229
+ "STS17": "Retrieve semantically similar text.",
230
+ "STS22.v2": "Retrieve semantically similar text.",
231
+ "STSES": "Retrieve semantically similar text.",
232
+ "STSB": "Retrieve semantically similar text.",
233
+ "AILAStatutes": "Identifying the most relevant statutes for a given situation.",
234
+ "HagridRetrieval": "Retrieval the relevant passage for the given query.",
235
+ "LegalBenchCorporateLobbying": "Retrieval the relevant passage for the given query.",
236
+ "LEMBPasskeyRetrieval": "Retrieval the relevant passage for the given query.",
237
+ "BelebeleRetrieval": "Retrieval the relevant passage for the given query.",
238
+ "MLQARetrieval": "Retrieval the relevant passage for the given query.",
239
+ "StatcanDialogueDatasetRetrieval": "Retrieval the relevant passage for the given query.",
240
+ "WikipediaRetrievalMultilingual": "Retrieval the relevant passage for the given query.",
241
+ "Core17InstructionRetrieval": "Retrieval the relevant passage for the given query.",
242
+ "News21InstructionRetrieval": "Retrieval the relevant passage for the given query.",
243
+ "Robust04InstructionRetrieval": "Retrieval the relevant passage for the given query.",
244
+ "WebLINXCandidatesReranking": "Retrieval the relevant passage for the given query.",
245
+ "WikipediaRerankingMultilingual": "Retrieval the relevant passage for the given query.",
246
+ "STS15": "Retrieve semantically similar text.",
247
+ "MIRACLRetrievalHardNegatives": "Retrieval relevant passage for the given query.",
248
+ "BIOSSES": "Retrieve semantically similar text.",
249
+ "CQADupstackRetrieval": "Given a question, retrieve detailed question descriptions from Stackexchange that are duplicates to the given question.",
250
+ "CQADupstackGamingRetrieval": {"query": "Given a question, retrieve detailed question descriptions from Stackexchange that are duplicates to the given question.", "passage": "Given a question, retrieve detailed question descriptions from Stackexchange that are duplicates to the given question."},
251
+ "CQADupstackUnixRetrieval": {"query": "Given a question, retrieve detailed question descriptions from Stackexchange that are duplicates to the given question.", "passage": "Given a question, retrieve detailed question descriptions from Stackexchange that are duplicates to the given question."},
252
+ "STS16": "Retrieve semantically similar text.",
253
+ "SummEval": "Retrieve semantically similar text.",
254
+ "ATEC": "Retrieve semantically similar text."
255
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:352a863cd2761388ccc58f1432467ba6a1037bf12df9069889b142fa246471f6
3
+ size 11422752
tokenizer_config.json ADDED
@@ -0,0 +1,240 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_eos_token": true,
4
+ "add_prefix_space": false,
5
+ "added_tokens_decoder": {
6
+ "151643": {
7
+ "content": "<|endoftext|>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false,
12
+ "special": true
13
+ },
14
+ "151644": {
15
+ "content": "<|im_start|>",
16
+ "lstrip": false,
17
+ "normalized": false,
18
+ "rstrip": false,
19
+ "single_word": false,
20
+ "special": true
21
+ },
22
+ "151645": {
23
+ "content": "<|im_end|>",
24
+ "lstrip": false,
25
+ "normalized": false,
26
+ "rstrip": false,
27
+ "single_word": false,
28
+ "special": true
29
+ },
30
+ "151646": {
31
+ "content": "<|object_ref_start|>",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false,
36
+ "special": true
37
+ },
38
+ "151647": {
39
+ "content": "<|object_ref_end|>",
40
+ "lstrip": false,
41
+ "normalized": false,
42
+ "rstrip": false,
43
+ "single_word": false,
44
+ "special": true
45
+ },
46
+ "151648": {
47
+ "content": "<|box_start|>",
48
+ "lstrip": false,
49
+ "normalized": false,
50
+ "rstrip": false,
51
+ "single_word": false,
52
+ "special": true
53
+ },
54
+ "151649": {
55
+ "content": "<|box_end|>",
56
+ "lstrip": false,
57
+ "normalized": false,
58
+ "rstrip": false,
59
+ "single_word": false,
60
+ "special": true
61
+ },
62
+ "151650": {
63
+ "content": "<|quad_start|>",
64
+ "lstrip": false,
65
+ "normalized": false,
66
+ "rstrip": false,
67
+ "single_word": false,
68
+ "special": true
69
+ },
70
+ "151651": {
71
+ "content": "<|quad_end|>",
72
+ "lstrip": false,
73
+ "normalized": false,
74
+ "rstrip": false,
75
+ "single_word": false,
76
+ "special": true
77
+ },
78
+ "151652": {
79
+ "content": "<|vision_start|>",
80
+ "lstrip": false,
81
+ "normalized": false,
82
+ "rstrip": false,
83
+ "single_word": false,
84
+ "special": true
85
+ },
86
+ "151653": {
87
+ "content": "<|vision_end|>",
88
+ "lstrip": false,
89
+ "normalized": false,
90
+ "rstrip": false,
91
+ "single_word": false,
92
+ "special": true
93
+ },
94
+ "151654": {
95
+ "content": "<|vision_pad|>",
96
+ "lstrip": false,
97
+ "normalized": false,
98
+ "rstrip": false,
99
+ "single_word": false,
100
+ "special": true
101
+ },
102
+ "151655": {
103
+ "content": "<|image_pad|>",
104
+ "lstrip": false,
105
+ "normalized": false,
106
+ "rstrip": false,
107
+ "single_word": false,
108
+ "special": true
109
+ },
110
+ "151656": {
111
+ "content": "<|video_pad|>",
112
+ "lstrip": false,
113
+ "normalized": false,
114
+ "rstrip": false,
115
+ "single_word": false,
116
+ "special": true
117
+ },
118
+ "151657": {
119
+ "content": "<tool_call>",
120
+ "lstrip": false,
121
+ "normalized": false,
122
+ "rstrip": false,
123
+ "single_word": false,
124
+ "special": false
125
+ },
126
+ "151658": {
127
+ "content": "</tool_call>",
128
+ "lstrip": false,
129
+ "normalized": false,
130
+ "rstrip": false,
131
+ "single_word": false,
132
+ "special": false
133
+ },
134
+ "151659": {
135
+ "content": "<|fim_prefix|>",
136
+ "lstrip": false,
137
+ "normalized": false,
138
+ "rstrip": false,
139
+ "single_word": false,
140
+ "special": false
141
+ },
142
+ "151660": {
143
+ "content": "<|fim_middle|>",
144
+ "lstrip": false,
145
+ "normalized": false,
146
+ "rstrip": false,
147
+ "single_word": false,
148
+ "special": false
149
+ },
150
+ "151661": {
151
+ "content": "<|fim_suffix|>",
152
+ "lstrip": false,
153
+ "normalized": false,
154
+ "rstrip": false,
155
+ "single_word": false,
156
+ "special": false
157
+ },
158
+ "151662": {
159
+ "content": "<|fim_pad|>",
160
+ "lstrip": false,
161
+ "normalized": false,
162
+ "rstrip": false,
163
+ "single_word": false,
164
+ "special": false
165
+ },
166
+ "151663": {
167
+ "content": "<|repo_name|>",
168
+ "lstrip": false,
169
+ "normalized": false,
170
+ "rstrip": false,
171
+ "single_word": false,
172
+ "special": false
173
+ },
174
+ "151664": {
175
+ "content": "<|file_sep|>",
176
+ "lstrip": false,
177
+ "normalized": false,
178
+ "rstrip": false,
179
+ "single_word": false,
180
+ "special": false
181
+ },
182
+ "151665": {
183
+ "content": "<tool_response>",
184
+ "lstrip": false,
185
+ "normalized": false,
186
+ "rstrip": false,
187
+ "single_word": false,
188
+ "special": false
189
+ },
190
+ "151666": {
191
+ "content": "</tool_response>",
192
+ "lstrip": false,
193
+ "normalized": false,
194
+ "rstrip": false,
195
+ "single_word": false,
196
+ "special": false
197
+ },
198
+ "151667": {
199
+ "content": "<think>",
200
+ "lstrip": false,
201
+ "normalized": false,
202
+ "rstrip": false,
203
+ "single_word": false,
204
+ "special": false
205
+ },
206
+ "151668": {
207
+ "content": "</think>",
208
+ "lstrip": false,
209
+ "normalized": false,
210
+ "rstrip": false,
211
+ "single_word": false,
212
+ "special": false
213
+ }
214
+ },
215
+ "additional_special_tokens": [
216
+ "<|im_start|>",
217
+ "<|im_end|>",
218
+ "<|object_ref_start|>",
219
+ "<|object_ref_end|>",
220
+ "<|box_start|>",
221
+ "<|box_end|>",
222
+ "<|quad_start|>",
223
+ "<|quad_end|>",
224
+ "<|vision_start|>",
225
+ "<|vision_end|>",
226
+ "<|vision_pad|>",
227
+ "<|image_pad|>",
228
+ "<|video_pad|>"
229
+ ],
230
+ "bos_token": null,
231
+ "clean_up_tokenization_spaces": false,
232
+ "eos_token": "<|im_end|>",
233
+ "errors": "replace",
234
+ "extra_special_tokens": {},
235
+ "model_max_length": 131072,
236
+ "pad_token": "<|endoftext|>",
237
+ "split_special_tokens": false,
238
+ "tokenizer_class": "Qwen2Tokenizer",
239
+ "unk_token": null
240
+ }