Upload folder using huggingface_hub

Browse files

Files changed (14) hide show

.gitattributes +1 -0
1_Pooling/config.json +10 -0
README.md +882 -0
added_tokens.json +28 -0
config.json +60 -0
config_sentence_transformers.json +8 -0
mergekit_config.yml +849 -0
merges.txt +0 -0
model.safetensors +3 -0
modules.json +20 -0
special_tokens_map.json +31 -0
task_prompts.json +255 -0
tokenizer.json +3 -0
tokenizer_config.json +240 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+tokenizer.json filter=lfs diff=lfs merge=lfs -text

1_Pooling/config.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+    "word_embedding_dimension": 2560,
+    "pooling_mode_cls_token": false,
+    "pooling_mode_mean_tokens": false,
+    "pooling_mode_max_tokens": false,
+    "pooling_mode_mean_sqrt_len_tokens": false,
+    "pooling_mode_weightedmean_tokens": false,
+    "pooling_mode_lasttoken": true,
+    "include_prompt": true
+}

README.md ADDED Viewed

	@@ -0,0 +1,882 @@

+---
+base_model: []
+library_name: transformers
+tags:
+- mergekit
+- merge
+---
+# Qwen3-0.6B_sample20_40_60_80_100_multiselerp_merging
+This is a merge of pre-trained language models created using [mergekit](https://github.com/cg123/mergekit).
+## Merge Details
+### Merge Method
+This model was merged using the [Multi-SLERP](https://goddard.blog/posts/multislerp-wow-what-a-cool-idea) merge method.
+### Models Merged
+The following models were included in the merge:
+* /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-0.6B_sample40
+* /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-0.6B_sample80
+* /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-0.6B_sample60
+* /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-0.6B_sample20
+* /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-0.6B_all_eng_data
+### Configuration
+The following YAML configuration was used to produce this model:
+```yaml
+# models:
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_task_level_merging_classification_gradient2_full/
+#     parameters:
+#       weight: 1
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_task_level_merging_clustering_gradient2_full/
+#     parameters:
+#       weight: 1
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_task_level_merging_retrieval_gradient2_full/
+#     parameters:
+#       weight: 1
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_task_level_merging_sts_gradient2_full/
+#     parameters:
+#       weight: 1
+# merge_method: multislerp
+# base_model: /mnt/data/models/Qwen/Qwen3-4B
+# dtype: float32
+# slices:
+#   - sources:
+#       - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_task_level_merging_classification_gradient2_full/
+#         layer_range: [0, 36]
+#       - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_task_level_merging_sts_gradient2_full/
+#         layer_range: [0, 36]
+# merge_method: slerp
+# base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_task_level_merging_classification_gradient2_full/
+# parameters:
+#   t: 0.5
+# dtype: float32
+# slices:
+#   - sources:
+#       - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_sts_classification_selerp
+#         layer_range: [0, 36]
+#       - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_task_level_merging_clustering_gradient2_full/
+#         layer_range: [0, 36]
+# merge_method: slerp
+# base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_sts_classification_selerp
+# parameters:
+#   t: 0.5
+# dtype: float32
+# slices:
+#   - sources:
+#       - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_sts_classification_clustering_selerp
+#         layer_range: [0, 36]
+#       - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_task_level_merging_retrieval_gradient2_full/
+#         layer_range: [0, 36]
+# merge_method: slerp
+# base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_sts_classification_clustering_selerp
+# parameters:
+#   t: 0.5
+# dtype: float32
+# slices:
+#   - sources:
+#       - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering2_other_full
+#         layer_range: [0, 36]
+#       - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering1_other_full
+#         layer_range: [0, 36]
+# merge_method: slerp
+# base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering2_other_full
+# parameters:
+#   t: 0.5
+# dtype: float32
+# models:
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-0.6B_cut0.15_clustering1/
+#     parameters:
+#       weight: 0.863526622
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-0.6B_cut0.15_clustering2/
+#     parameters:
+#       weight: 0.020574888
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-0.6B_cut0.15_clustering3/
+#     parameters:
+#       weight: 0.11589849
+# merge_method: ties
+# base_model: /mnt/data/models/Qwen/Qwen3-0.6B
+# parameters:
+#   normalize: true
+#   int8_mask: true
+# dtype: float32
+# slices:
+#   - sources:
+#       - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering2_full/
+#         layer_range: [0, 36]
+#       - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering3_full/
+#         layer_range: [0, 36]
+# merge_method: slerp
+# base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering2_full/
+# parameters:
+#   t: 0.84
+# dtype: float32
+# slices:
+#   - sources:
+#       - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_clustering23_slerp
+#         layer_range: [0, 36]
+#       - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering1_full/
+#         layer_range: [0, 36]
+# merge_method: slerp
+# base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_clustering23_slerp
+# parameters:
+#   t: 0.86
+# dtype: float32
+# models:
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering1_full/
+#     parameters:
+#       weight: 0.863526622
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering2_full/
+#     parameters:
+#       weight: 0.020574888
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering3_full/
+#     parameters:
+#       weight: 0.11589849
+# merge_method: ties
+# base_model: /mnt/data/models/Qwen/Qwen3-4B
+# parameters:
+#   normalize: true
+#   int8_mask: true
+# dtype: float32
+# models:
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering1_full/
+#     parameters:
+#       weight: 0.4
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering2_full/
+#     parameters:
+#       weight: 0.3
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering3_full/
+#     parameters:
+#       weight: 0.3
+# merge_method: ties
+# base_model: /mnt/data/models/Qwen/Qwen3-4B
+# parameters:
+#   normalize: true
+#   int8_mask: true
+# dtype: float32
+# models:
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering1_full/
+#     parameters:
+#       weight: 1
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering2_full/
+#     parameters:
+#       weight: 1
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering3_full/
+#     parameters:
+#       weight: 1
+# merge_method: ties
+# base_model: /mnt/data/models/Qwen/Qwen3-4B
+# parameters:
+#   normalize: true
+#   int8_mask: true
+# dtype: float32
+# slices:
+#   - sources:
+#       - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering2_full/
+#         layer_range: [0, 36]
+#       - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering3_full/
+#         layer_range: [0, 36]
+# merge_method: slerp
+# base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering2_full/
+# parameters:
+#   t: 0.5
+# dtype: float32
+# slices:
+#   - sources:
+#       - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_clustering23_slerp55
+#         layer_range: [0, 36]
+#       - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering1_full/
+#         layer_range: [0, 36]
+# merge_method: slerp
+# base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_clustering23_slerp55
+# parameters:
+#   t: 0.5
+# dtype: float32
+# slices:
+#   - sources:
+#       - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_clustering_retrieval123_slerp55/
+#         layer_range: [0, 36]
+#       - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_full/
+#         layer_range: [0, 36]
+# merge_method: slerp
+# base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_clustering_retrieval123_slerp55/
+# parameters:
+#   t: 0.5
+# dtype: float32
+# slices:
+#   - sources:
+#       - model: /root/paddlejob/workspace/env_run/output/model_merge_checkpoint/Qwen3-4B-gradient2-hotpotqa_0.02_8_5e-5_16_lora32_full/
+#         layer_range: [0, 36]
+#       - model: /root/paddlejob/workspace/env_run/output/model_merge_checkpoint/Qwen3-4B-grdient2-imdb_classification_0.02_2_5e-5_16_lora32_full
+#         layer_range: [0, 36]
+# merge_method: slerp
+# base_model: /root/paddlejob/workspace/env_run/output/model_merge_checkpoint/Qwen3-4B-gradient2-hotpotqa_0.02_8_5e-5_16_lora32_full/
+# parameters:
+#   t: 0.5
+# dtype: float32
+# models:
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering1_full/
+#     parameters:
+#       weight: 0.5
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering2_full/
+#     parameters:
+#       weight: 0.5
+# merge_method: dare_ties
+# base_model: /mnt/data/models/Qwen/Qwen3-4B
+# parameters:
+#   normalize: true
+#   int8_mask: true
+# dtype: float32
+# models:
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering1_full/
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering2_full/
+# merge_method: model_stock
+# base_model: /mnt/data/models/Qwen/Qwen3-4B
+# parameters:
+#   normalize: true
+#   int8_mask: true
+# dtype: float32
+# models:
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering1_full/
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering2_full/
+# merge_method: karcher
+# dtype: float32
+# models:
+#   - model: /root/paddlejob/workspace/env_run/output/model_merge_checkpoint/Qwen3-4B-grdient2-medrxiv_title_0.02_8_5e-5_16_lora32_full
+#     parameters:
+#       weight: 0.5
+#   - model: /root/paddlejob/workspace/env_run/output/model_merge_checkpoint/Qwen3-4B-gradient2-squad_0.02_8_5e-5_16_lora32_full
+#     parameters:
+#       weight: 0.5
+# merge_method: multislerp
+# dtype: float32
+# slices:
+#   - sources:
+#       - model: /root/paddlejob/workspace/env_run/output/model_merge_checkpoint/Qwen3-4B-grdient2-medrxiv_title_0.02_8_5e-5_16_lora32_full
+#         layer_range: [0, 36]
+#       - model: /root/paddlejob/workspace/env_run/output/model_merge_checkpoint/Qwen3-4B-gradient2-squad_0.02_8_5e-5_16_lora32_full
+#         layer_range: [0, 36]
+# merge_method: slerp
+# base_model: /root/paddlejob/workspace/env_run/output/model_merge_checkpoint/Qwen3-4B-grdient2-medrxiv_title_0.02_8_5e-5_16_lora32_full
+# parameters:
+#   t: 0.5
+# dtype: float32
+# models:
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering1_full/
+#     parameters:
+#       weight: 0.863
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering2_full/
+#     parameters:
+#       weight: 0.137
+# merge_method: task_arithmetic
+# base_model: /mnt/data/models/Qwen/Qwen3-4B
+# parameters:
+#   normalize: true
+#   int8_mask: true
+# dtype: float32
+# models:
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering1_full/
+#     parameters:
+#       weight: 0.863
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering2_full/
+#     parameters:
+#       weight: 0.137
+# merge_method: ties
+# base_model: /mnt/data/models/Qwen/Qwen3-4B
+# parameters:
+#   normalize: true
+#   int8_mask: true
+# dtype: float32
+# slices:
+#   - sources:
+#       - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering1_full/
+#         layer_range: [0, 36]
+#       - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering2_full/
+#         layer_range: [0, 36]
+# merge_method: slerp
+# base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering1_full/
+# parameters:
+#   t: 0.137
+# dtype: float32
+# slices:
+#   - sources:
+#       - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering2_other_cut0.15_clustering1_slerp_same_weighted
+#         layer_range: [0, 36]
+#       - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled_full
+#         layer_range: [0, 36]
+# merge_method: slerp
+# base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering2_other_cut0.15_clustering1_slerp_same_weighted
+# parameters:
+#   t: 0.5
+# dtype: float32
+# slices:
+#   - sources:
+#       - model: /root/paddlejob/workspace/env_run/output/model_merge_checkpoint/Qwen3-4B-gradient2-squad_0.02_8_5e-5_16_lora32_full
+#         layer_range: [0, 36]
+#       - model: /root/paddlejob/workspace/env_run/output/model_merge_checkpoint/Qwen3-4B-grdient2-medrxiv_abstract_0.02_8_5e-5_16_lora32_full
+#         layer_range: [0, 36]
+# merge_method: slerp
+# base_model: /root/paddlejob/workspace/env_run/output/model_merge_checkpoint/Qwen3-4B-gradient2-squad_0.02_8_5e-5_16_lora32_full
+# parameters:
+#   t: 0.5
+# dtype: float32
+# models:
+#   - model: /root/paddlejob/workspace/env_run/output/model_merge_checkpoint/Qwen3-4B-gradient2-squad_0.02_8_5e-5_16_lora32_full
+#     parameters:
+#       weight: 0.5
+#   - model: /root/paddlejob/workspace/env_run/output/model_merge_checkpoint/Qwen3-4B-grdient2-medrxiv_abstract_0.02_8_5e-5_16_lora32_full
+#     parameters:
+#       weight: 0.5
+# merge_method: multislerp
+# dtype: float32
+# slices:
+#   - sources:
+#       - model: /root/paddlejob/workspace/env_run/output/model_merge_checkpoint/Qwen3-4B-gradient2-fiqa_0.02_8_5e-5_16_lora32_full
+#         layer_range: [0, 36]
+#       - model: /root/paddlejob/workspace/env_run/output/model_merge_checkpoint/Qwen3-4B-grdient2-tweet_sentiment_extraction_classification_0.02_3_5e-5_16_lora32_full
+#         layer_range: [0, 36]
+# merge_method: slerp
+# base_model: /root/paddlejob/workspace/env_run/output/model_merge_checkpoint/Qwen3-4B-gradient2-fiqa_0.02_8_5e-5_16_lora32_full
+# parameters:
+#   t: 0.5
+# dtype: float32
+# slices:
+#   - sources:
+#       - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering2_other_sample20_full
+#         layer_range: [0, 36]
+#       - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering1_other_sample20_full
+#         layer_range: [0, 36]
+# merge_method: slerp
+# base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering2_other_sample20_full
+# parameters:
+#   t: 0.864
+# dtype: float32
+# slices:
+#   - sources:
+#       - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering12_other_sample20_slerp_merging
+#         layer_range: [0, 36]
+#       - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled80_full
+#         layer_range: [0, 36]
+# merge_method: slerp
+# base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering12_other_sample20_slerp_merging
+# parameters:
+#   t: 0.8
+# dtype: float32
+# slices:
+#   - sources:
+#       - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering2_other_sample80_full
+#         layer_range: [0, 36]
+#       - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering1_other_sample80_full
+#         layer_range: [0, 36]
+# merge_method: slerp
+# base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering2_other_sample80_full
+# parameters:
+#   t: 0.864
+# dtype: float32
+# slices:
+#   - sources:
+#       - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering12_other_sample80
+#         layer_range: [0, 36]
+#       - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled20_full
+#         layer_range: [0, 36]
+# merge_method: slerp
+# base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering12_other_sample80
+# parameters:
+#   t: 0.2
+# dtype: float32
+# slices:
+#   - sources:
+#       - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering2_full
+#         layer_range: [0, 36]
+#       - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering1_full
+#         layer_range: [0, 36]
+# merge_method: slerp
+# base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering2_full
+# parameters:
+#   t: 0.864
+# dtype: float32
+# slices:
+#   - sources:
+#       - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering12_full_merging_slerp_number_weighted
+#         layer_range: [0, 36]
+#       - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_full
+#         layer_range: [0, 36]
+# merge_method: slerp
+# base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering12_full_merging_slerp_number_weighted
+# parameters:
+#   t: 0.9
+# dtype: float32
+# models:
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering2_full
+#     parameters:
+#       weight: 0.1
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering1_full
+#     parameters:
+#       weight: 0.2
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_full
+#     parameters:
+#       weight: 0.7
+# merge_method: multislerp
+# dtype: float32
+# models:
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled60_full
+#     parameters:
+#       weight: 0.6
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled20_full
+#     parameters:
+#       weight: 0.2
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled_full
+#     parameters:
+#       weight: 0.4
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled80_full
+#     parameters:
+#       weight: 0.8
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_full
+#     parameters:
+#       weight: 1.0
+# merge_method: multislerp
+# dtype: float32
+# slices:
+#   - sources:
+#       - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled20_full
+#         layer_range: [0, 36]
+#       - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled_full
+#         layer_range: [0, 36]
+# merge_method: slerp
+# base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled20_full
+# parameters:
+#   t: 0.66
+# dtype: float32
+# slices:
+#   - sources:
+#       - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master//root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sample20_sample40
+#         layer_range: [0, 36]
+#       - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled60_full
+#         layer_range: [0, 36]
+# merge_method: slerp
+# base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master//root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sample20_sample40
+# parameters:
+#   t: 0.60
+# dtype: float32
+# slices:
+#   - sources:
+#       - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master//root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sample20_sample40_sample60
+#         layer_range: [0, 36]
+#       - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled80_full
+#         layer_range: [0, 36]
+# merge_method: slerp
+# base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master//root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sample20_sample40_sample60
+# parameters:
+#   t: 0.57
+# dtype: float32
+# slices:
+#   - sources:
+#       - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master//root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sample20_sample40_sample60_sample80
+#         layer_range: [0, 36]
+#       - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_full
+#         layer_range: [0, 36]
+# merge_method: slerp
+# base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master//root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sample20_sample40_sample60_sample80
+# parameters:
+#   t: 0.56
+# dtype: float32
+# models:
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-8000_full
+#     parameters:
+#       weight: 0.86
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-6000_full
+#     parameters:
+#       weight: 0.65
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-4000_full
+#     parameters:
+#       weight: 0.43
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-2000_full
+#     parameters:
+#       weight: 0.22
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_full
+#     parameters:
+#       weight: 1.0
+# merge_method: multislerp
+# dtype: float32
+# models:
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-8000_full
+#     parameters:
+#       weight: 0.86
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-6000_full
+#     parameters:
+#       weight: 0.65
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-4000_full
+#     parameters:
+#       weight: 0.43
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-2000_full
+#     parameters:
+#       weight: 0.22
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_full
+#     parameters:
+#       weight: 1.0
+# merge_method: ties
+# base_model: /mnt/data/models/Qwen/Qwen3-4B
+# parameters:
+#   normalize: true
+#   int8_mask: true
+# dtype: float32
+# models:
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-8000_full
+#     parameters:
+#       weight: 0.86
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-6000_full
+#     parameters:
+#       weight: 0.65
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-4000_full
+#     parameters:
+#       weight: 0.43
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-2000_full
+#     parameters:
+#       weight: 0.22
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_full
+#     parameters:
+#       weight: 1.0
+# merge_method: task_arithmetic
+# base_model: /mnt/data/models/Qwen/Qwen3-4B
+# parameters:
+#   normalize: true
+#   int8_mask: true
+# dtype: float32
+# models:
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-8000_full
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-6000_full
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-4000_full
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-2000_full
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_full
+# merge_method: sce
+# base_model: /mnt/data/models/Qwen/Qwen3-4B
+# parameters:
+#   normalize: true
+#   int8_mask: true
+# dtype: float32
+# models:
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-8000_full
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-6000_full
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-4000_full
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-2000_full
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_full
+# merge_method: model_stock
+# base_model: /mnt/data/models/Qwen/Qwen3-4B
+# parameters:
+#   normalize: true
+#   int8_mask: true
+# dtype: float32
+# slices:
+#   - sources:
+#       - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-2000_full
+#         layer_range: [0, 36]
+#       - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-4000_full
+#         layer_range: [0, 36]
+# merge_method: slerp
+# base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-2000_full
+# parameters:
+#   t: 0.66
+# dtype: float32
+# slices:
+#   - sources:
+#       - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-2000-4000
+#         layer_range: [0, 36]
+#       - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-6000_full
+#         layer_range: [0, 36]
+# merge_method: slerp
+# base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-2000-4000
+# parameters:
+#   t: 0.60
+# dtype: float32
+# slices:
+#   - sources:
+#       - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-2000-4000-6000
+#         layer_range: [0, 36]
+#       - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-8000_full
+#         layer_range: [0, 36]
+# merge_method: slerp
+# base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-2000-4000-6000
+# parameters:
+#   t: 0.57
+# dtype: float32
+# slices:
+#   - sources:
+#       - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-2000-4000-6000-8000
+#         layer_range: [0, 36]
+#       - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_full
+#         layer_range: [0, 36]
+# merge_method: slerp
+# base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-2000-4000-6000-8000
+# parameters:
+#   t: 0.55
+# dtype: float32
+# models:
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled60_full
+#     parameters:
+#       weight: 0.6
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled20_full
+#     parameters:
+#       weight: 0.2
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled_full
+#     parameters:
+#       weight: 0.4
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled80_full
+#     parameters:
+#       weight: 0.8
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_full
+#     parameters:
+#       weight: 1.0
+# merge_method: ties
+# base_model: /mnt/data/models/Qwen/Qwen3-4B
+# parameters:
+#   normalize: true
+#   int8_mask: true
+# dtype: float32
+# models:
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled60_full
+#     parameters:
+#       weight: 0.6
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled20_full
+#     parameters:
+#       weight: 0.2
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled_full
+#     parameters:
+#       weight: 0.4
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled80_full
+#     parameters:
+#       weight: 0.8
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_full
+#     parameters:
+#       weight: 1.0
+# merge_method: task_arithmetic
+# base_model: /mnt/data/models/Qwen/Qwen3-4B
+# parameters:
+#   normalize: true
+#   int8_mask: true
+# dtype: float32
+# models:
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled60_full
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled20_full
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled_full
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled80_full
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_full
+# merge_method: sce
+# base_model: /mnt/data/models/Qwen/Qwen3-4B
+# parameters:
+#   normalize: true
+#   int8_mask: true
+# dtype: float32
+# models:
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled60_full
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled20_full
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled_full
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled80_full
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_full
+# merge_method: model_stock
+# base_model: /mnt/data/models/Qwen/Qwen3-4B
+# parameters:
+#   normalize: true
+#   int8_mask: true
+# dtype: float32
+# models:
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-0.6B_sample60_seed100
+#     parameters:
+#       weight: 0.5
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-0.6B_sample60_seed1
+#     parameters:
+#       weight: 0.5
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-0.6B_sample60_seed42
+#     parameters:
+#       weight: 0.5
+# merge_method: multislerp
+# dtype: float32
+# models:
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-0.6B_sample_other50
+#     parameters:
+#       weight: 0.5
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-0.6B_sample50
+#     parameters:
+#       weight: 0.5
+# merge_method: multislerp
+# dtype: float32
+# slices:
+#   - sources:
+#       - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-0.6B_sample_other50
+#         layer_range: [0, 28]
+#       - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-0.6B_sample50
+#         layer_range: [0, 28]
+# merge_method: slerp
+# base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-0.6B_sample_other50
+# parameters:
+#   t: 0.5
+# dtype: float32
+# models:
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-0.6B_sample_other50
+#     parameters:
+#       weight: 0.5
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-0.6B_sample50
+#     parameters:
+#       weight: 0.5
+# merge_method: ties
+# base_model: /mnt/data/models/Qwen/Qwen3-0.6B
+# parameters:
+#   normalize: true
+#   int8_mask: true
+# dtype: float32
+# models:
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-0.6B_sample50_seed1/
+#     parameters:
+#       weight: 0.5
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-0.6B_sample50_seed100/
+#     parameters:
+#       weight: 0.5
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-0.6B_sample50/
+#     parameters:
+#       weight: 0.5
+# merge_method: multislerp
+# dtype: float32
+models:
+  - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-0.6B_sample20
+    parameters:
+      weight: 0.2
+  - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-0.6B_sample40
+    parameters:
+      weight: 0.4
+  - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-0.6B_sample60
+    parameters:
+      weight: 0.6
+  - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-0.6B_sample80
+    parameters:
+      weight: 0.8
+  - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-0.6B_all_eng_data
+    parameters:
+      weight: 1.0
+merge_method: multislerp
+dtype: float32
+```

added_tokens.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+  "</think>": 151668,
+  "</tool_call>": 151658,
+  "</tool_response>": 151666,
+  "<think>": 151667,
+  "<tool_call>": 151657,
+  "<tool_response>": 151665,
+  "<|box_end|>": 151649,
+  "<|box_start|>": 151648,
+  "<|endoftext|>": 151643,
+  "<|file_sep|>": 151664,
+  "<|fim_middle|>": 151660,
+  "<|fim_pad|>": 151662,
+  "<|fim_prefix|>": 151659,
+  "<|fim_suffix|>": 151661,
+  "<|im_end|>": 151645,
+  "<|im_start|>": 151644,
+  "<|image_pad|>": 151655,
+  "<|object_ref_end|>": 151647,
+  "<|object_ref_start|>": 151646,
+  "<|quad_end|>": 151651,
+  "<|quad_start|>": 151650,
+  "<|repo_name|>": 151663,
+  "<|video_pad|>": 151656,
+  "<|vision_end|>": 151653,
+  "<|vision_pad|>": 151654,
+  "<|vision_start|>": 151652
+}

config.json ADDED Viewed

	@@ -0,0 +1,60 @@

+{
+  "architectures": [
+    "Qwen3Model"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "bos_token_id": 151643,
+  "dtype": "float32",
+  "eos_token_id": 151645,
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 1024,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 28,
+  "model_type": "qwen3",
+  "num_attention_heads": 16,
+  "num_hidden_layers": 28,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "transformers_version": "4.57.1",
+  "use_cache": false,
+  "use_sliding_window": false,
+  "vocab_size": 151936
+}

config_sentence_transformers.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+  "prompts": {
+    "query": "Instruct: Given a web search query, retrieve relevant passages that answer the query\nQuery:",
+    "document": ""
+  },
+  "default_prompt_name": null,
+  "similarity_fn_name": "cosine"
+}

mergekit_config.yml ADDED Viewed

	@@ -0,0 +1,849 @@

+# models:
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_task_level_merging_classification_gradient2_full/
+#     parameters:
+#       weight: 1
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_task_level_merging_clustering_gradient2_full/
+#     parameters:
+#       weight: 1
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_task_level_merging_retrieval_gradient2_full/
+#     parameters:
+#       weight: 1
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_task_level_merging_sts_gradient2_full/
+#     parameters:
+#       weight: 1
+# merge_method: multislerp
+# base_model: /mnt/data/models/Qwen/Qwen3-4B
+# dtype: float32
+# slices:
+#   - sources:
+#       - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_task_level_merging_classification_gradient2_full/
+#         layer_range: [0, 36]
+#       - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_task_level_merging_sts_gradient2_full/
+#         layer_range: [0, 36]
+# merge_method: slerp
+# base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_task_level_merging_classification_gradient2_full/
+# parameters:
+#   t: 0.5
+# dtype: float32
+# slices:
+#   - sources:
+#       - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_sts_classification_selerp
+#         layer_range: [0, 36]
+#       - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_task_level_merging_clustering_gradient2_full/
+#         layer_range: [0, 36]
+# merge_method: slerp
+# base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_sts_classification_selerp
+# parameters:
+#   t: 0.5
+# dtype: float32
+# slices:
+#   - sources:
+#       - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_sts_classification_clustering_selerp
+#         layer_range: [0, 36]
+#       - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_task_level_merging_retrieval_gradient2_full/
+#         layer_range: [0, 36]
+# merge_method: slerp
+# base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_sts_classification_clustering_selerp
+# parameters:
+#   t: 0.5
+# dtype: float32
+# slices:
+#   - sources:
+#       - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering2_other_full
+#         layer_range: [0, 36]
+#       - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering1_other_full
+#         layer_range: [0, 36]
+# merge_method: slerp
+# base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering2_other_full
+# parameters:
+#   t: 0.5
+# dtype: float32
+# models:
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-0.6B_cut0.15_clustering1/
+#     parameters:
+#       weight: 0.863526622
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-0.6B_cut0.15_clustering2/
+#     parameters:
+#       weight: 0.020574888
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-0.6B_cut0.15_clustering3/
+#     parameters:
+#       weight: 0.11589849
+# merge_method: ties
+# base_model: /mnt/data/models/Qwen/Qwen3-0.6B
+# parameters:
+#   normalize: true
+#   int8_mask: true
+# dtype: float32
+# slices:
+#   - sources:
+#       - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering2_full/
+#         layer_range: [0, 36]
+#       - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering3_full/
+#         layer_range: [0, 36]
+# merge_method: slerp
+# base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering2_full/
+# parameters:
+#   t: 0.84
+# dtype: float32
+# slices:
+#   - sources:
+#       - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_clustering23_slerp
+#         layer_range: [0, 36]
+#       - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering1_full/
+#         layer_range: [0, 36]
+# merge_method: slerp
+# base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_clustering23_slerp
+# parameters:
+#   t: 0.86
+# dtype: float32
+# models:
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering1_full/
+#     parameters:
+#       weight: 0.863526622
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering2_full/
+#     parameters:
+#       weight: 0.020574888
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering3_full/
+#     parameters:
+#       weight: 0.11589849
+# merge_method: ties
+# base_model: /mnt/data/models/Qwen/Qwen3-4B
+# parameters:
+#   normalize: true
+#   int8_mask: true
+# dtype: float32
+# models:
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering1_full/
+#     parameters:
+#       weight: 0.4
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering2_full/
+#     parameters:
+#       weight: 0.3
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering3_full/
+#     parameters:
+#       weight: 0.3
+# merge_method: ties
+# base_model: /mnt/data/models/Qwen/Qwen3-4B
+# parameters:
+#   normalize: true
+#   int8_mask: true
+# dtype: float32
+# models:
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering1_full/
+#     parameters:
+#       weight: 1
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering2_full/
+#     parameters:
+#       weight: 1
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering3_full/
+#     parameters:
+#       weight: 1
+# merge_method: ties
+# base_model: /mnt/data/models/Qwen/Qwen3-4B
+# parameters:
+#   normalize: true
+#   int8_mask: true
+# dtype: float32
+# slices:
+#   - sources:
+#       - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering2_full/
+#         layer_range: [0, 36]
+#       - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering3_full/
+#         layer_range: [0, 36]
+# merge_method: slerp
+# base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering2_full/
+# parameters:
+#   t: 0.5
+# dtype: float32
+# slices:
+#   - sources:
+#       - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_clustering23_slerp55
+#         layer_range: [0, 36]
+#       - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering1_full/
+#         layer_range: [0, 36]
+# merge_method: slerp
+# base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_clustering23_slerp55
+# parameters:
+#   t: 0.5
+# dtype: float32
+# slices:
+#   - sources:
+#       - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_clustering_retrieval123_slerp55/
+#         layer_range: [0, 36]
+#       - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_full/
+#         layer_range: [0, 36]
+# merge_method: slerp
+# base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_clustering_retrieval123_slerp55/
+# parameters:
+#   t: 0.5
+# dtype: float32
+# slices:
+#   - sources:
+#       - model: /root/paddlejob/workspace/env_run/output/model_merge_checkpoint/Qwen3-4B-gradient2-hotpotqa_0.02_8_5e-5_16_lora32_full/
+#         layer_range: [0, 36]
+#       - model: /root/paddlejob/workspace/env_run/output/model_merge_checkpoint/Qwen3-4B-grdient2-imdb_classification_0.02_2_5e-5_16_lora32_full
+#         layer_range: [0, 36]
+# merge_method: slerp
+# base_model: /root/paddlejob/workspace/env_run/output/model_merge_checkpoint/Qwen3-4B-gradient2-hotpotqa_0.02_8_5e-5_16_lora32_full/
+# parameters:
+#   t: 0.5
+# dtype: float32
+# models:
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering1_full/
+#     parameters:
+#       weight: 0.5
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering2_full/
+#     parameters:
+#       weight: 0.5
+# merge_method: dare_ties
+# base_model: /mnt/data/models/Qwen/Qwen3-4B
+# parameters:
+#   normalize: true
+#   int8_mask: true
+# dtype: float32
+# models:
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering1_full/
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering2_full/
+# merge_method: model_stock
+# base_model: /mnt/data/models/Qwen/Qwen3-4B
+# parameters:
+#   normalize: true
+#   int8_mask: true
+# dtype: float32
+# models:
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering1_full/
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering2_full/
+# merge_method: karcher
+# dtype: float32
+# models:
+#   - model: /root/paddlejob/workspace/env_run/output/model_merge_checkpoint/Qwen3-4B-grdient2-medrxiv_title_0.02_8_5e-5_16_lora32_full
+#     parameters:
+#       weight: 0.5
+#   - model: /root/paddlejob/workspace/env_run/output/model_merge_checkpoint/Qwen3-4B-gradient2-squad_0.02_8_5e-5_16_lora32_full
+#     parameters:
+#       weight: 0.5
+# merge_method: multislerp
+# dtype: float32
+# slices:
+#   - sources:
+#       - model: /root/paddlejob/workspace/env_run/output/model_merge_checkpoint/Qwen3-4B-grdient2-medrxiv_title_0.02_8_5e-5_16_lora32_full
+#         layer_range: [0, 36]
+#       - model: /root/paddlejob/workspace/env_run/output/model_merge_checkpoint/Qwen3-4B-gradient2-squad_0.02_8_5e-5_16_lora32_full
+#         layer_range: [0, 36]
+# merge_method: slerp
+# base_model: /root/paddlejob/workspace/env_run/output/model_merge_checkpoint/Qwen3-4B-grdient2-medrxiv_title_0.02_8_5e-5_16_lora32_full
+# parameters:
+#   t: 0.5
+# dtype: float32
+# models:
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering1_full/
+#     parameters:
+#       weight: 0.863
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering2_full/
+#     parameters:
+#       weight: 0.137
+# merge_method: task_arithmetic
+# base_model: /mnt/data/models/Qwen/Qwen3-4B
+# parameters:
+#   normalize: true
+#   int8_mask: true
+# dtype: float32
+# models:
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering1_full/
+#     parameters:
+#       weight: 0.863
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering2_full/
+#     parameters:
+#       weight: 0.137
+# merge_method: ties
+# base_model: /mnt/data/models/Qwen/Qwen3-4B
+# parameters:
+#   normalize: true
+#   int8_mask: true
+# dtype: float32
+# slices:
+#   - sources:
+#       - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering1_full/
+#         layer_range: [0, 36]
+#       - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering2_full/
+#         layer_range: [0, 36]
+# merge_method: slerp
+# base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering1_full/
+# parameters:
+#   t: 0.137
+# dtype: float32
+# slices:
+#   - sources:
+#       - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering2_other_cut0.15_clustering1_slerp_same_weighted
+#         layer_range: [0, 36]
+#       - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled_full
+#         layer_range: [0, 36]
+# merge_method: slerp
+# base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering2_other_cut0.15_clustering1_slerp_same_weighted
+# parameters:
+#   t: 0.5
+# dtype: float32
+# slices:
+#   - sources:
+#       - model: /root/paddlejob/workspace/env_run/output/model_merge_checkpoint/Qwen3-4B-gradient2-squad_0.02_8_5e-5_16_lora32_full
+#         layer_range: [0, 36]
+#       - model: /root/paddlejob/workspace/env_run/output/model_merge_checkpoint/Qwen3-4B-grdient2-medrxiv_abstract_0.02_8_5e-5_16_lora32_full
+#         layer_range: [0, 36]
+# merge_method: slerp
+# base_model: /root/paddlejob/workspace/env_run/output/model_merge_checkpoint/Qwen3-4B-gradient2-squad_0.02_8_5e-5_16_lora32_full
+# parameters:
+#   t: 0.5
+# dtype: float32
+# models:
+#   - model: /root/paddlejob/workspace/env_run/output/model_merge_checkpoint/Qwen3-4B-gradient2-squad_0.02_8_5e-5_16_lora32_full
+#     parameters:
+#       weight: 0.5
+#   - model: /root/paddlejob/workspace/env_run/output/model_merge_checkpoint/Qwen3-4B-grdient2-medrxiv_abstract_0.02_8_5e-5_16_lora32_full
+#     parameters:
+#       weight: 0.5
+# merge_method: multislerp
+# dtype: float32
+# slices:
+#   - sources:
+#       - model: /root/paddlejob/workspace/env_run/output/model_merge_checkpoint/Qwen3-4B-gradient2-fiqa_0.02_8_5e-5_16_lora32_full
+#         layer_range: [0, 36]
+#       - model: /root/paddlejob/workspace/env_run/output/model_merge_checkpoint/Qwen3-4B-grdient2-tweet_sentiment_extraction_classification_0.02_3_5e-5_16_lora32_full
+#         layer_range: [0, 36]
+# merge_method: slerp
+# base_model: /root/paddlejob/workspace/env_run/output/model_merge_checkpoint/Qwen3-4B-gradient2-fiqa_0.02_8_5e-5_16_lora32_full
+# parameters:
+#   t: 0.5
+# dtype: float32
+# slices:
+#   - sources:
+#       - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering2_other_sample20_full
+#         layer_range: [0, 36]
+#       - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering1_other_sample20_full
+#         layer_range: [0, 36]
+# merge_method: slerp
+# base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering2_other_sample20_full
+# parameters:
+#   t: 0.864
+# dtype: float32
+# slices:
+#   - sources:
+#       - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering12_other_sample20_slerp_merging
+#         layer_range: [0, 36]
+#       - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled80_full
+#         layer_range: [0, 36]
+# merge_method: slerp
+# base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering12_other_sample20_slerp_merging
+# parameters:
+#   t: 0.8
+# dtype: float32
+# slices:
+#   - sources:
+#       - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering2_other_sample80_full
+#         layer_range: [0, 36]
+#       - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering1_other_sample80_full
+#         layer_range: [0, 36]
+# merge_method: slerp
+# base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering2_other_sample80_full
+# parameters:
+#   t: 0.864
+# dtype: float32
+# slices:
+#   - sources:
+#       - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering12_other_sample80
+#         layer_range: [0, 36]
+#       - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled20_full
+#         layer_range: [0, 36]
+# merge_method: slerp
+# base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering12_other_sample80
+# parameters:
+#   t: 0.2
+# dtype: float32
+# slices:
+#   - sources:
+#       - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering2_full
+#         layer_range: [0, 36]
+#       - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering1_full
+#         layer_range: [0, 36]
+# merge_method: slerp
+# base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering2_full
+# parameters:
+#   t: 0.864
+# dtype: float32
+# slices:
+#   - sources:
+#       - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering12_full_merging_slerp_number_weighted
+#         layer_range: [0, 36]
+#       - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_full
+#         layer_range: [0, 36]
+# merge_method: slerp
+# base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering12_full_merging_slerp_number_weighted
+# parameters:
+#   t: 0.9
+# dtype: float32
+# models:
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering2_full
+#     parameters:
+#       weight: 0.1
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering1_full
+#     parameters:
+#       weight: 0.2
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_full
+#     parameters:
+#       weight: 0.7
+# merge_method: multislerp
+# dtype: float32
+# models:
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled60_full
+#     parameters:
+#       weight: 0.6
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled20_full
+#     parameters:
+#       weight: 0.2
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled_full
+#     parameters:
+#       weight: 0.4
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled80_full
+#     parameters:
+#       weight: 0.8
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_full
+#     parameters:
+#       weight: 1.0
+# merge_method: multislerp
+# dtype: float32
+# slices:
+#   - sources:
+#       - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled20_full
+#         layer_range: [0, 36]
+#       - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled_full
+#         layer_range: [0, 36]
+# merge_method: slerp
+# base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled20_full
+# parameters:
+#   t: 0.66
+# dtype: float32
+# slices:
+#   - sources:
+#       - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master//root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sample20_sample40
+#         layer_range: [0, 36]
+#       - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled60_full
+#         layer_range: [0, 36]
+# merge_method: slerp
+# base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master//root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sample20_sample40
+# parameters:
+#   t: 0.60
+# dtype: float32
+# slices:
+#   - sources:
+#       - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master//root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sample20_sample40_sample60
+#         layer_range: [0, 36]
+#       - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled80_full
+#         layer_range: [0, 36]
+# merge_method: slerp
+# base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master//root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sample20_sample40_sample60
+# parameters:
+#   t: 0.57
+# dtype: float32
+# slices:
+#   - sources:
+#       - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master//root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sample20_sample40_sample60_sample80
+#         layer_range: [0, 36]
+#       - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_full
+#         layer_range: [0, 36]
+# merge_method: slerp
+# base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master//root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sample20_sample40_sample60_sample80
+# parameters:
+#   t: 0.56
+# dtype: float32
+# models:
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-8000_full
+#     parameters:
+#       weight: 0.86
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-6000_full
+#     parameters:
+#       weight: 0.65
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-4000_full
+#     parameters:
+#       weight: 0.43
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-2000_full
+#     parameters:
+#       weight: 0.22
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_full
+#     parameters:
+#       weight: 1.0
+# merge_method: multislerp
+# dtype: float32
+# models:
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-8000_full
+#     parameters:
+#       weight: 0.86
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-6000_full
+#     parameters:
+#       weight: 0.65
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-4000_full
+#     parameters:
+#       weight: 0.43
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-2000_full
+#     parameters:
+#       weight: 0.22
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_full
+#     parameters:
+#       weight: 1.0
+# merge_method: ties
+# base_model: /mnt/data/models/Qwen/Qwen3-4B
+# parameters:
+#   normalize: true
+#   int8_mask: true
+# dtype: float32
+# models:
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-8000_full
+#     parameters:
+#       weight: 0.86
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-6000_full
+#     parameters:
+#       weight: 0.65
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-4000_full
+#     parameters:
+#       weight: 0.43
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-2000_full
+#     parameters:
+#       weight: 0.22
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_full
+#     parameters:
+#       weight: 1.0
+# merge_method: task_arithmetic
+# base_model: /mnt/data/models/Qwen/Qwen3-4B
+# parameters:
+#   normalize: true
+#   int8_mask: true
+# dtype: float32
+# models:
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-8000_full
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-6000_full
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-4000_full
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-2000_full
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_full
+# merge_method: sce
+# base_model: /mnt/data/models/Qwen/Qwen3-4B
+# parameters:
+#   normalize: true
+#   int8_mask: true
+# dtype: float32
+# models:
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-8000_full
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-6000_full
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-4000_full
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-2000_full
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_full
+# merge_method: model_stock
+# base_model: /mnt/data/models/Qwen/Qwen3-4B
+# parameters:
+#   normalize: true
+#   int8_mask: true
+# dtype: float32
+# slices:
+#   - sources:
+#       - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-2000_full
+#         layer_range: [0, 36]
+#       - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-4000_full
+#         layer_range: [0, 36]
+# merge_method: slerp
+# base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-2000_full
+# parameters:
+#   t: 0.66
+# dtype: float32
+# slices:
+#   - sources:
+#       - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-2000-4000
+#         layer_range: [0, 36]
+#       - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-6000_full
+#         layer_range: [0, 36]
+# merge_method: slerp
+# base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-2000-4000
+# parameters:
+#   t: 0.60
+# dtype: float32
+# slices:
+#   - sources:
+#       - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-2000-4000-6000
+#         layer_range: [0, 36]
+#       - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-8000_full
+#         layer_range: [0, 36]
+# merge_method: slerp
+# base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-2000-4000-6000
+# parameters:
+#   t: 0.57
+# dtype: float32
+# slices:
+#   - sources:
+#       - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-2000-4000-6000-8000
+#         layer_range: [0, 36]
+#       - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_full
+#         layer_range: [0, 36]
+# merge_method: slerp
+# base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2/checkpoint-2000-4000-6000-8000
+# parameters:
+#   t: 0.55
+# dtype: float32
+# models:
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled60_full
+#     parameters:
+#       weight: 0.6
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled20_full
+#     parameters:
+#       weight: 0.2
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled_full
+#     parameters:
+#       weight: 0.4
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled80_full
+#     parameters:
+#       weight: 0.8
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_full
+#     parameters:
+#       weight: 1.0
+# merge_method: ties
+# base_model: /mnt/data/models/Qwen/Qwen3-4B
+# parameters:
+#   normalize: true
+#   int8_mask: true
+# dtype: float32
+# models:
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled60_full
+#     parameters:
+#       weight: 0.6
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled20_full
+#     parameters:
+#       weight: 0.2
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled_full
+#     parameters:
+#       weight: 0.4
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled80_full
+#     parameters:
+#       weight: 0.8
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_full
+#     parameters:
+#       weight: 1.0
+# merge_method: task_arithmetic
+# base_model: /mnt/data/models/Qwen/Qwen3-4B
+# parameters:
+#   normalize: true
+#   int8_mask: true
+# dtype: float32
+# models:
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled60_full
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled20_full
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled_full
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled80_full
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_full
+# merge_method: sce
+# base_model: /mnt/data/models/Qwen/Qwen3-4B
+# parameters:
+#   normalize: true
+#   int8_mask: true
+# dtype: float32
+# models:
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled60_full
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled20_full
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled_full
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled80_full
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_full
+# merge_method: model_stock
+# base_model: /mnt/data/models/Qwen/Qwen3-4B
+# parameters:
+#   normalize: true
+#   int8_mask: true
+# dtype: float32
+# models:
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-0.6B_sample60_seed100
+#     parameters:
+#       weight: 0.5
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-0.6B_sample60_seed1
+#     parameters:
+#       weight: 0.5
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-0.6B_sample60_seed42
+#     parameters:
+#       weight: 0.5
+# merge_method: multislerp
+# dtype: float32
+# models:
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-0.6B_sample_other50
+#     parameters:
+#       weight: 0.5
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-0.6B_sample50
+#     parameters:
+#       weight: 0.5
+# merge_method: multislerp
+# dtype: float32
+# slices:
+#   - sources:
+#       - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-0.6B_sample_other50
+#         layer_range: [0, 28]
+#       - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-0.6B_sample50
+#         layer_range: [0, 28]
+# merge_method: slerp
+# base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-0.6B_sample_other50
+# parameters:
+#   t: 0.5
+# dtype: float32
+# models:
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-0.6B_sample_other50
+#     parameters:
+#       weight: 0.5
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-0.6B_sample50
+#     parameters:
+#       weight: 0.5
+# merge_method: ties
+# base_model: /mnt/data/models/Qwen/Qwen3-0.6B
+# parameters:
+#   normalize: true
+#   int8_mask: true
+# dtype: float32
+# models:
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-0.6B_sample50_seed1/
+#     parameters:
+#       weight: 0.5
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-0.6B_sample50_seed100/
+#     parameters:
+#       weight: 0.5
+#   - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-0.6B_sample50/
+#     parameters:
+#       weight: 0.5
+# merge_method: multislerp
+# dtype: float32
+models:
+  - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-0.6B_sample20
+    parameters:
+      weight: 0.2
+  - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-0.6B_sample40
+    parameters:
+      weight: 0.4
+  - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-0.6B_sample60
+    parameters:
+      weight: 0.6
+  - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-0.6B_sample80
+    parameters:
+      weight: 0.8
+  - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-0.6B_all_eng_data
+    parameters:
+      weight: 1.0
+merge_method: multislerp
+dtype: float32

merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0589b91120d662694001fba7b4a9aa417dd99227536db6922899b6dec00d3748
+size 2384233112

modules.json ADDED Viewed

	@@ -0,0 +1,20 @@

+[
+  {
+    "idx": 0,
+    "name": "0",
+    "path": "",
+    "type": "sentence_transformers.models.Transformer"
+  },
+  {
+    "idx": 1,
+    "name": "1",
+    "path": "1_Pooling",
+    "type": "sentence_transformers.models.Pooling"
+  },
+  {
+    "idx": 2,
+    "name": "2",
+    "path": "2_Normalize",
+    "type": "sentence_transformers.models.Normalize"
+  }
+]

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>",
+    "<|object_ref_start|>",
+    "<|object_ref_end|>",
+    "<|box_start|>",
+    "<|box_end|>",
+    "<|quad_start|>",
+    "<|quad_end|>",
+    "<|vision_start|>",
+    "<|vision_end|>",
+    "<|vision_pad|>",
+    "<|image_pad|>",
+    "<|video_pad|>"
+  ],
+  "eos_token": {
+    "content": "<|im_end|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

task_prompts.json ADDED Viewed

	@@ -0,0 +1,255 @@

+{
+    "AmazonCounterfactualClassification": "Classify a given Amazon customer review text as either counterfactual or not-counterfactual.",
+    "AmazonPolarityClassification": "Classify Amazon reviews into positive or negative sentiment.",
+    "AmazonReviewsClassification": "Classify the given Amazon review into its appropriate rating category.",
+    "Banking77Classification": "Given a online banking query, find the corresponding intents.",
+    "EmotionClassification": "Classify the emotion expressed in the given Twitter message into one of the six emotions: anger, fear, joy, love, sadness, and surprise.",
+    "ImdbClassification": "Classify the sentiment expressed in the given movie review text from the IMDB dataset.",
+    "MassiveIntentClassification": "Given a user utterance as query, find the user intents.",
+    "MassiveScenarioClassification": "Given a user utterance as query, find the user scenarios.",
+    "MTOPDomainClassification": "Classify the intent domain of the given utterance in task-oriented conversation.",
+    "MTOPIntentClassification": "Classify the intent of the given utterance in task-oriented conversation.",
+    "ToxicConversationsClassification": "Classify the given comments as either toxic or not toxic.",
+    "TweetSentimentExtractionClassification": "Classify the sentiment of a given tweet as either positive, negative, or neutral.",
+    "TNews": "Classify the fine-grained category of the given news title.",
+    "IFlyTek": "Given an App description text, find the appropriate fine-grained category.",
+    "MultilingualSentiment": "Classify sentiment of the customer review into positive, neutral, or negative.",
+    "JDReview": "Classify the customer review for iPhone on e-commerce platform into positive or negative.",
+    "OnlineShopping": "Classify the customer review for online shopping into positive or negative.",
+    "Waimai": "Classify the customer review from a food takeaway platform into positive or negative.",
+    "ArxivClusteringP2P": "Identify the main and secondary category of Arxiv papers based on the titles and abstracts.",
+    "ArxivClusteringS2S": "Identify the main and secondary category of Arxiv papers based on the titles.",
+    "BiorxivClusteringP2P": "Identify the main category of Biorxiv papers based on the titles and abstracts.",
+    "BiorxivClusteringS2S": "Identify the main category of Biorxiv papers based on the titles.",
+    "MedrxivClusteringP2P": "Identify the main category of Medrxiv papers based on the titles and abstracts.",
+    "MedrxivClusteringS2S": "Identify the main category of Medrxiv papers based on the titles.",
+    "RedditClustering": "Identify the topic or theme of Reddit posts based on the titles.",
+    "RedditClusteringP2P": "Identify the topic or theme of Reddit posts based on the titles and posts.",
+    "StackExchangeClustering": "Identify the topic or theme of StackExchange posts based on the titles.",
+    "StackExchangeClusteringP2P": "Identify the topic or theme of StackExchange posts based on the given paragraphs.",
+    "TwentyNewsgroupsClustering": "Identify the topic or theme of the given news articles.",
+    "CLSClusteringS2S": "Identify the main category of scholar papers based on the titles.",
+    "CLSClusteringP2P": "Identify the main category of scholar papers based on the titles and abstracts.",
+    "ThuNewsClusteringS2S": "Identify the topic or theme of the given news articles based on the titles.",
+    "ThuNewsClusteringP2P": "Identify the topic or theme of the given news articles based on the titles and contents.",
+    "AskUbuntuDupQuestions": "Retrieve duplicate questions from AskUbuntu forum.",
+    "MindSmallReranking": "Retrieve relevant news articles based on user browsing history.",
+    "SciDocsRR": "Given a title of a scientific paper, retrieve the titles of other relevant papers.",
+    "StackOverflowDupQuestions": "Retrieve duplicate questions from StackOverflow forum.",
+    "SprintDuplicateQuestions": "Retrieve duplicate questions from Sprint forum.",
+    "TwitterSemEval2015": "Retrieve tweets that are semantically similar to the given tweet.",
+    "TwitterURLCorpus": "Retrieve tweets that are semantically similar to the given tweet.",
+    "T2Reranking": "Given a Chinese search query, retrieve web passages that answer the question.",
+    "MmarcoReranking": "Given a Chinese search query, retrieve web passages that answer the question.",
+    "CMedQAv1": "Given a Chinese community medical question, retrieve replies that best answer the question.",
+    "CMedQAv2": "Given a Chinese community medical question, retrieve replies that best answer the question.",
+    "Ocnli": "Retrieve semantically similar text.",
+    "Cmnli": "Retrieve semantically similar text.",
+    "ArguAna": {"query": "Given a claim, find documents that refute the claim.", "passage": "Given a claim, find documents that refute the claim."},
+    "ClimateFEVER": "Given a claim about climate change, retrieve documents that support or refute the claim.",
+    "ClimateFEVERHardNegatives": "Given a claim about climate change, retrieve documents that support or refute the claim.",
+    "DBPedia": "Given a query, retrieve relevant entity descriptions from DBPedia.",
+    "FEVER": "Given a claim, retrieve documents that support or refute the claim.",
+    "FEVERHardNegatives": "Given a claim, retrieve documents that support or refute the claim.",
+    "FiQA2018": "Given a financial question, retrieve user replies that best answer the question.",
+    "HotpotQA": "Given a multi-hop question, retrieve documents that can help answer the question.",
+    "HotpotQAHardNegatives": "Given a multi-hop question, retrieve documents that can help answer the question.",
+    "MSMARCO": "Given a web search query, retrieve relevant passages that answer the query.",
+    "NFCorpus": "Given a question, retrieve relevant documents that best answer the question.",
+    "NQ": "Given a question, retrieve Wikipedia passages that answer the question.",
+    "QuoraRetrieval": "Given a question, retrieve questions that are semantically equivalent to the given question.",
+    "SCIDOCS": "Given a scientific paper title, retrieve paper abstracts that are cited by the given paper.",
+    "SciFact": "Given a scientific claim, retrieve documents that support or refute the claim.",
+    "Touche2020": "Given a question, retrieve detailed and persuasive arguments that answer the question.",
+    "Touche2020Retrieval.v3": "Given a question, retrieve detailed and persuasive arguments that answer the question.",
+    "TRECCOVID": "Given a query on COVID-19, retrieve documents that answer the query.",
+    "T2Retrieval": "Given a Chinese search query, retrieve web passages that answer the question.",
+    "MMarcoRetrieval": "Given a web search query, retrieve relevant passages that answer the query.",
+    "DuRetrieval": "Given a Chinese search query, retrieve web passages that answer the question.",
+    "CovidRetrieval": "Given a question on COVID-19, retrieve news articles that answer the question.",
+    "CmedqaRetrieval": "Given a Chinese community medical question, retrieve replies that best answer the question.",
+    "EcomRetrieval": "Given a user query from an e-commerce website, retrieve description sentences of relevant products.",
+    "MedicalRetrieval": "Given a medical question, retrieve user replies that best answer the question.",
+    "VideoRetrieval": "Given a video search query, retrieve the titles of relevant videos.",
+    "STSBenchmarkMultilingualSTS": "Retrieve semantically similar text.",
+    "SICKFr": "Retrieve semantically similar text.",
+    "SummEvalFr": "Given a news summary, retrieve other semantically similar summaries.",
+    "MasakhaNEWSClassification":  "Classify the News in the given texts into one of the seven category: politics,sports,health,business,entertainment,technology,religion.",
+    "OpusparcusPC":"Retrieve semantically similar text.",
+    "PawsX":"Retrieve semantically similar text.",
+    "AlloProfClusteringP2P": "Identify the main category of Allo Prof document based on the titles and descriptions.",
+    "AlloProfClusteringS2S": "Identify the main category of Allo Prof document based on the titles.",
+    "HALClusteringS2S": "Identify the main category of academic passage based on the titles and contents.",
+    "MasakhaNEWSClusteringP2P": "Identify the topic or theme of the given news articles based on the titles and contents.",
+    "MasakhaNEWSClusteringS2S": "Identify the topic or theme of the given news articles based on the titles.",
+    "MLSUMClusteringP2P": "Identify the topic or theme of the given articles based on the titles and contents.",
+    "MLSUMClusteringS2S":  "Identify the topic or theme of the given articles based on the titles.",
+    "SyntecReranking": "Given a question, retrieve passages that answer the question.",
+    "AlloprofReranking": "Given a question, retrieve passages that answer the question.",
+    "AlloprofRetrieval": "Given a question, retrieve passages that answer the question.",
+    "BSARDRetrieval": "Given a question, retrieve passages that answer the question.",
+    "SyntecRetrieval": "Given a question, retrieve passages that answer the question.",
+    "XPQARetrieval": "Given a question, retrieve passages that answer the question.",
+    "MintakaRetrieval": "Given a question, retrieve passages that answer the question.",
+    "CBD":"Classify the sentiment of polish tweet reviews.",
+    "PolEmo2.0-IN": "Classify the sentiment of in-domain (medicine and hotels) online reviews.",
+    "PolEmo2.0-OUT":"Classify the sentiment of out-of-domain (products and school) online reviews.",
+    "AllegroReviews": "Classify the sentiment of reviews from e-commerce marketplace Allegro.",
+    "PAC": "Classify the sentence into one of the two types: \"BEZPIECZNE_POSTANOWIENIE_UMOWNE\" and \"KLAUZULA_ABUZYWNA\".",
+    "SICK-E-PL": "Retrieve semantically similar text.",
+    "SICK-R-PL": "Retrieve semantically similar text.",
+    "STS22": "Retrieve semantically similar text.",
+    "AFQMC": "Retrieve semantically similar text.",
+    "AFQMC": "Retrieve semantically similar text.",
+    "BQ": "Retrieve semantically similar text.",
+    "LCQMC": "Retrieve semantically similar text.",
+    "PAWSX": "Retrieve semantically similar text.",
+    "QBQTC": "Retrieve semantically similar text.",
+    "STS12": "Retrieve semantically similar text.",
+    "PPC": "Retrieve semantically similar text.",
+    "CDSC-E": "Retrieve semantically similar text.",
+    "PSC": "Retrieve semantically similar text.",
+    "8TagsClustering": "Identify of headlines from social media posts in Polish  into 8 categories: film, history, food, medicine, motorization, work, sport and technology.",
+    "ArguAna-PL": "Given a claim, find documents that refute the claim.",
+    "DBPedia-PL": "Given a query, retrieve relevant entity descriptions from DBPedia.",
+    "FiQA-PL": "Given a financial question, retrieve user replies that best answer the question.",
+    "HotpotQA-PL": "Given a multi-hop question, retrieve documents that can help answer the question.",
+    "MSMARCO-PL": "Given a web search query, retrieve relevant passages that answer the query.",
+    "NFCorpus-PL": "Given a question, retrieve relevant documents that best answer the question.",
+    "NQ-PL": "Given a question, retrieve Wikipedia passages that answer the question.",
+    "Quora-PL": "Given a question, retrieve questions that are semantically equivalent to the given question.",
+    "SCIDOCS-PL": "Given a scientific paper title, retrieve paper abstracts that are cited by the given paper.",
+    "SciFact-PL": "Given a scientific claim, retrieve documents that support or refute the claim.",
+    "TRECCOVID-PL": "Given a query on COVID-19, retrieve documents that answer the query.",
+    "GeoreviewClassification": "Classify the organization rating based on the reviews.",
+    "HeadlineClassification": "Classify the topic or theme of the given news headline.",
+    "InappropriatenessClassification": "Classify the given message as either sensitive topic or not.",
+    "KinopoiskClassification": "Classify the sentiment expressed in the given movie review text.",
+    "RuReviewsClassification": "Classify product reviews into positive, negative or neutral sentiment.",
+    "RuSciBenchGRNTIClassification": "Classify the category of scientific papers based on the titles and abstracts.",
+    "RuSciBenchOECDClassification": "Classify the category of scientific papers based on the titles and abstracts.",
+    "GeoreviewClusteringP2P": "Identify the organization category based on the reviews.",
+    "RuSciBenchGRNTIClusteringP2P": "Identify the category of scientific papers based on the titles and abstracts.",
+    "RuSciBenchOECDClusteringP2P": "Identify the category of scientific papers based on the titles and abstracts.",
+    "TERRa": "Given a premise, retrieve a hypothesis that is entailed by the premise.",
+    "RuBQReranking": "Given a question, retrieve Wikipedia passages that answer the question.",
+    "RiaNewsRetrieval": "Given a headline, retrieval relevant articles.",
+    "RuBQRetrieval": "Given a question, retrieve Wikipedia passages that answer the question.",
+    "RUParaPhraserSTS": "Retrieve semantically similar text.",
+    "RuSTSBenchmarkSTS": "Retrieve semantically similar text.",
+    "AppsRetrieval": "Given a question about code problem, retrieval code that can solve user's problem.",
+    "COIRCodeSearchNetRetrieval": "Given a code snippet, retrieve the comment corresponding to that code.",
+    "CodeEditSearchRetrieval": "Given a piece of code, retrieval code that in the.",
+    "CodeFeedbackMT": "Given a question about coding, retrieval code or passage that can solve user's question.",
+    "CodeFeedbackST": "Given a question about coding, retrieval code or passage that can solve user's question.",
+    "CodeSearchNetCCRetrieval": "Given a code comment, retrieve the code snippet corresponding to that comment..",
+    "CodeSearchNetRetrieval": "Given a code snippet, retrieve the comment corresponding to that code.",
+    "CodeTransOceanContest": "Given a piece for code, retrieval semantically similar code.",
+    "CodeTransOceanDL": "Given a piece for code, retrieval semantically similar code.",
+    "CosQA": "Given a question about coding, retrieval code or passage that can solve user's question.",
+    "StackOverflowQA": "Given a question about coding, retrieval code or passage that can solve user's question.",
+    "SyntheticText2SQL": "Given a user's question, retrieve SQL queries that are appropriate responses to the question.",
+    "BibleNLPBitextMining": "Retrieve parallel sentences.",
+    "BUCC.v2": "Retrieve parallel sentences.",
+    "DiaBlaBitextMining": "Retrieve parallel sentences.",
+    "FloresBitextMining": "Retrieve parallel sentences.",
+    "IN22GenBitextMining": "Retrieve parallel sentences.",
+    "IndicGenBenchFloresBitextMining": "Retrieve parallel sentences.",
+    "NollySentiBitextMining": "Retrieve parallel sentences.",
+    "NTREXBitextMining": "Retrieve parallel sentences.",
+    "NusaTranslationBitextMining": "Retrieve parallel sentences.",
+    "NusaXBitextMining": "Retrieve parallel sentences.",
+    "Tatoeba": "Retrieve parallel sentences.",
+    "BulgarianStoreReviewSentimentClassfication": "Classify user reviews into positive or negative sentiment.",
+    "CzechProductReviewSentimentClassification": "Classify product reviews into positive or negative sentiment.",
+    "GreekLegalCodeClassification": "Given a greek legal text, classify its topic.",
+    "DBpediaClassification": "Given a Wikipedia articles, categorized it into classes based on its DBpedia ontology.",
+    "FinancialPhrasebankClassification": "Given financial news, categorized by sentiment into positive, negative, or neutral.",
+    "PoemSentimentClassification": "Gvien a poem, categorized by sentiment into positive, no_impact, negative or mixed.",
+    "TweetTopicSingleClassification": "Gvien a twitter, classify its topic.",
+    "EstonianValenceClassification": "Given a news article, categorized by sentiment into negatiivne, positiivne, neutraalne or vastuolulin.",
+    "FilipinoShopeeReviewsClassification": "Given a shop review, classify its rating on a scale from 1 to 5.",
+    "GujaratiNewsClassification": "Given a Gujarati news articles, classify ist topic.",
+    "SentimentAnalysisHindi": "Given a hindi text, categorized by sentiment into positive, negative or neutral.",
+    "IndonesianIdClickbaitClassification": "Given an Indonesian news headlines, classify its into clickbait or non-clickbait.",
+    "ItaCaseholdClassification": "Given a judgments, classify its topic.",
+    "KorSarcasmClassification": "Given a twitter, categorized it into sarcasm or not_sarcasm.",
+    "KurdishSentimentClassification": "Given a text, categorized by sentiment into positive or negative.",
+    "MacedonianTweetSentimentClassification": "Given a Macedonian tweet, categorized by sentiment into positive, negative, or neutral.",
+    "AfriSentiClassification": "Given a text, categorized by sentiment into positive, negative, or neutral.",
+    "CataloniaTweetClassification": "Given a tweet, categorized by sentiment into AGAINST, FAVOR or NEUTRAL.",
+    "CyrillicTurkicLangClassification": "Given a text, classify its language.",
+    "IndicLangClassification": "Given a text, classify its language.",
+    "MultiHateClassification": "Given a text, categorized by sentiment into hate or non-hate.",
+    "NusaParagraphEmotionClassification": "Given a paragraph, classify its emotion.",
+    "NusaX-senti": "Given a text, categorized by sentiment into positive or negative.",
+    "SwissJudgementClassification": "Given a news article, categorized it into approval or dismissal.",
+    "NepaliNewsClassification": "Given a news article, categorized it into business, entertainment or sports.",
+    "OdiaNewsClassification": "Given a news article, categorized it into business, entertainment or sports.",
+    "PunjabiNewsClassification": "Given a news article, categorized it into two-classes.",
+    "SinhalaNewsClassification": "Given a news article, categorized it into political, business, technology, sports and Entertainment.",
+    "CSFDSKMovieReviewSentimentClassification": "Given a movie review, classify its rating on a scale from 0 to 5.",
+    "SiswatiNewsClassification": "Given a news article, classify its topic.",
+    "SlovakMovieReviewSentimentClassification": "Given a movie review, categorized it into positive or negative.",
+    "SwahiliNewsClassification": "Given a news article, classify its domain.",
+    "TswanaNewsClassification": "Given a news article, classify its topic.",
+    "IsiZuluNewsClassification": "Given a news article, classify its topic.",
+    "WikiCitiesClustering": "Identify of Wikipedia articles of cities by country.",
+    "RomaniBibleClustering": "Identify verses from the Bible in Kalderash Romani by book.",
+    "ArXivHierarchicalClusteringP2P": "Identify the main and secondary category of Arxiv papers based on the titles and abstracts.",
+    "ArXivHierarchicalClusteringS2S":  "Identify the main and secondary category of Arxiv papers based on the titles.",
+    "BigPatentClustering.v2": "Identify the category of documents from the Big Patent dataset.",
+    "AlloProfClusteringS2S": "Identify the topic of document titles from Allo Prof dataset.",
+    "AlloProfClusteringS2S.v2": "Identify the topic of document titles from Allo Prof dataset.",
+    "HALClusteringS2S.v2": "Identify the topic of titles from HAL.",
+    "SIB200ClusteringS2S": "Identify the category of documents.",
+    "WikiClusteringP2P.v2": "Identify the category of wiki passages",
+    "PlscClusteringP2P.v2": "Identify the category of titles+abstracts from Library of Science.",
+    "KorHateSpeechMLClassification": "Given a Korean online news comments, classify its fine-grained hate speech classes.",
+    "MalteseNewsClassification": "Given a maltese new, classify its topic.",
+    "MultiEURLEXMultilabelClassification": "Given a text, classify its topic.",
+    "BrazilianToxicTweetsClassification": "Given a tweet, classify its topic.",
+    "CTKFactsNLI": "Retrieve semantically similar text.",
+    "indonli": "Retrieve semantically similar text.",
+    "ArmenianParaphrasePC": "Retrieve semantically similar text.",
+    "PawsXPairClassification": "Retrieve semantically similar text.",
+    "RTE3": "Retrieve semantically similar text.",
+    "XNLI": "Retrieve semantically similar text.",
+    "PpcPC": "Retrieve semantically similar text.",
+    "GermanSTSBenchmark": "Retrieve semantically similar text.",
+    "SICK-R": "Retrieve semantically similar text.",
+    "STS13": "Retrieve semantically similar text.",
+    "STS14": "Retrieve semantically similar text.",
+    "STSBenchmark": "Retrieve semantically similar text.",
+    "FaroeseSTS": "Retrieve semantically similar text.",
+    "FinParaSTS": "Retrieve semantically similar text.",
+    "JSICK": "Retrieve semantically similar text.",
+    "IndicCrosslingualSTS": "Retrieve semantically similar text.",
+    "SemRel24STS": "Retrieve semantically similar text.",
+    "STS17": "Retrieve semantically similar text.",
+    "STS22.v2": "Retrieve semantically similar text.",
+    "STSES": "Retrieve semantically similar text.",
+    "STSB": "Retrieve semantically similar text.",
+    "AILAStatutes": "Identifying the most relevant statutes for a given situation.",
+    "HagridRetrieval": "Retrieval the relevant passage for the given query.",
+    "LegalBenchCorporateLobbying": "Retrieval the relevant passage for the given query.",
+    "LEMBPasskeyRetrieval": "Retrieval the relevant passage for the given query.",
+    "BelebeleRetrieval": "Retrieval the relevant passage for the given query.",
+    "MLQARetrieval": "Retrieval the relevant passage for the given query.",
+    "StatcanDialogueDatasetRetrieval": "Retrieval the relevant passage for the given query.",
+    "WikipediaRetrievalMultilingual": "Retrieval the relevant passage for the given query.",
+    "Core17InstructionRetrieval": "Retrieval the relevant passage for the given query.",
+    "News21InstructionRetrieval": "Retrieval the relevant passage for the given query.",
+    "Robust04InstructionRetrieval": "Retrieval the relevant passage for the given query.",
+    "WebLINXCandidatesReranking": "Retrieval the relevant passage for the given query.",
+    "WikipediaRerankingMultilingual": "Retrieval the relevant passage for the given query.",
+    "STS15": "Retrieve semantically similar text.",
+    "MIRACLRetrievalHardNegatives": "Retrieval relevant passage for the given query.",
+    "BIOSSES": "Retrieve semantically similar text.",
+    "CQADupstackRetrieval": "Given a question, retrieve detailed question descriptions from Stackexchange that are duplicates to the given question.",
+    "CQADupstackGamingRetrieval": {"query": "Given a question, retrieve detailed question descriptions from Stackexchange that are duplicates to the given question.", "passage": "Given a question, retrieve detailed question descriptions from Stackexchange that are duplicates to the given question."},
+    "CQADupstackUnixRetrieval": {"query": "Given a question, retrieve detailed question descriptions from Stackexchange that are duplicates to the given question.", "passage": "Given a question, retrieve detailed question descriptions from Stackexchange that are duplicates to the given question."},
+    "STS16": "Retrieve semantically similar text.",
+    "SummEval": "Retrieve semantically similar text.",
+    "ATEC": "Retrieve semantically similar text."
+}

tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:352a863cd2761388ccc58f1432467ba6a1037bf12df9069889b142fa246471f6
+size 11422752

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,240 @@

+{
+  "add_bos_token": false,
+  "add_eos_token": true,
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "151643": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151644": {
+      "content": "<|im_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151645": {
+      "content": "<|im_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151646": {
+      "content": "<|object_ref_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151647": {
+      "content": "<|object_ref_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151648": {
+      "content": "<|box_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151649": {
+      "content": "<|box_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151650": {
+      "content": "<|quad_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151651": {
+      "content": "<|quad_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151652": {
+      "content": "<|vision_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151653": {
+      "content": "<|vision_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151654": {
+      "content": "<|vision_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151655": {
+      "content": "<|image_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151656": {
+      "content": "<|video_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151657": {
+      "content": "<tool_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151658": {
+      "content": "</tool_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151659": {
+      "content": "<|fim_prefix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151660": {
+      "content": "<|fim_middle|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151661": {
+      "content": "<|fim_suffix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151662": {
+      "content": "<|fim_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151663": {
+      "content": "<|repo_name|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151664": {
+      "content": "<|file_sep|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151665": {
+      "content": "<tool_response>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151666": {
+      "content": "</tool_response>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151667": {
+      "content": "<think>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151668": {
+      "content": "</think>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    }
+  },
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>",
+    "<|object_ref_start|>",
+    "<|object_ref_end|>",
+    "<|box_start|>",
+    "<|box_end|>",
+    "<|quad_start|>",
+    "<|quad_end|>",
+    "<|vision_start|>",
+    "<|vision_end|>",
+    "<|vision_pad|>",
+    "<|image_pad|>",
+    "<|video_pad|>"
+  ],
+  "bos_token": null,
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|im_end|>",
+  "errors": "replace",
+  "extra_special_tokens": {},
+  "model_max_length": 131072,
+  "pad_token": "<|endoftext|>",
+  "split_special_tokens": false,
+  "tokenizer_class": "Qwen2Tokenizer",
+  "unk_token": null
+}