Qwen3-4B_data_mixing_gradient2_all_sample20_sample40_sample60_sample80
This is a merge of pre-trained language models created using mergekit.
Merge Details
Merge Method
This model was merged using the Multi-SLERP merge method.
Models Merged
The following models were included in the merge:
- /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled80_full
- /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_full
- /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled60_full
- /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled20_full
- /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled_full
Configuration
The following YAML configuration was used to produce this model:
# models:
# - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_task_level_merging_classification_gradient2_full/
# parameters:
# weight: 1
# - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_task_level_merging_clustering_gradient2_full/
# parameters:
# weight: 1
# - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_task_level_merging_retrieval_gradient2_full/
# parameters:
# weight: 1
# - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_task_level_merging_sts_gradient2_full/
# parameters:
# weight: 1
# merge_method: multislerp
# base_model: /mnt/data/models/Qwen/Qwen3-4B
# dtype: float32
# slices:
# - sources:
# - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_task_level_merging_classification_gradient2_full/
# layer_range: [0, 36]
# - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_task_level_merging_sts_gradient2_full/
# layer_range: [0, 36]
# merge_method: slerp
# base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_task_level_merging_classification_gradient2_full/
# parameters:
# t: 0.5
# dtype: float32
# slices:
# - sources:
# - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_sts_classification_selerp
# layer_range: [0, 36]
# - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_task_level_merging_clustering_gradient2_full/
# layer_range: [0, 36]
# merge_method: slerp
# base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_sts_classification_selerp
# parameters:
# t: 0.5
# dtype: float32
# slices:
# - sources:
# - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_sts_classification_clustering_selerp
# layer_range: [0, 36]
# - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_task_level_merging_retrieval_gradient2_full/
# layer_range: [0, 36]
# merge_method: slerp
# base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_sts_classification_clustering_selerp
# parameters:
# t: 0.5
# dtype: float32
# slices:
# - sources:
# - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering2_other_full
# layer_range: [0, 36]
# - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering1_other_full
# layer_range: [0, 36]
# merge_method: slerp
# base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering2_other_full
# parameters:
# t: 0.5
# dtype: float32
# models:
# - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-0.6B_cut0.15_clustering1/
# parameters:
# weight: 0.863526622
# - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-0.6B_cut0.15_clustering2/
# parameters:
# weight: 0.020574888
# - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-0.6B_cut0.15_clustering3/
# parameters:
# weight: 0.11589849
# merge_method: ties
# base_model: /mnt/data/models/Qwen/Qwen3-0.6B
# parameters:
# normalize: true
# int8_mask: true
# dtype: float32
# slices:
# - sources:
# - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering2_full/
# layer_range: [0, 36]
# - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering3_full/
# layer_range: [0, 36]
# merge_method: slerp
# base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering2_full/
# parameters:
# t: 0.84
# dtype: float32
# slices:
# - sources:
# - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_clustering23_slerp
# layer_range: [0, 36]
# - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering1_full/
# layer_range: [0, 36]
# merge_method: slerp
# base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_clustering23_slerp
# parameters:
# t: 0.86
# dtype: float32
# models:
# - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering1_full/
# parameters:
# weight: 0.863526622
# - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering2_full/
# parameters:
# weight: 0.020574888
# - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering3_full/
# parameters:
# weight: 0.11589849
# merge_method: ties
# base_model: /mnt/data/models/Qwen/Qwen3-4B
# parameters:
# normalize: true
# int8_mask: true
# dtype: float32
# models:
# - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering1_full/
# parameters:
# weight: 0.4
# - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering2_full/
# parameters:
# weight: 0.3
# - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering3_full/
# parameters:
# weight: 0.3
# merge_method: ties
# base_model: /mnt/data/models/Qwen/Qwen3-4B
# parameters:
# normalize: true
# int8_mask: true
# dtype: float32
# models:
# - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering1_full/
# parameters:
# weight: 1
# - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering2_full/
# parameters:
# weight: 1
# - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering3_full/
# parameters:
# weight: 1
# merge_method: ties
# base_model: /mnt/data/models/Qwen/Qwen3-4B
# parameters:
# normalize: true
# int8_mask: true
# dtype: float32
# slices:
# - sources:
# - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering2_full/
# layer_range: [0, 36]
# - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering3_full/
# layer_range: [0, 36]
# merge_method: slerp
# base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering2_full/
# parameters:
# t: 0.5
# dtype: float32
# slices:
# - sources:
# - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_clustering23_slerp55
# layer_range: [0, 36]
# - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering1_full/
# layer_range: [0, 36]
# merge_method: slerp
# base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_clustering23_slerp55
# parameters:
# t: 0.5
# dtype: float32
# slices:
# - sources:
# - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_clustering_retrieval123_slerp55/
# layer_range: [0, 36]
# - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_full/
# layer_range: [0, 36]
# merge_method: slerp
# base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_clustering_retrieval123_slerp55/
# parameters:
# t: 0.5
# dtype: float32
# slices:
# - sources:
# - model: /root/paddlejob/workspace/env_run/output/model_merge_checkpoint/Qwen3-4B-gradient2-hotpotqa_0.02_8_5e-5_16_lora32_full/
# layer_range: [0, 36]
# - model: /root/paddlejob/workspace/env_run/output/model_merge_checkpoint/Qwen3-4B-grdient2-imdb_classification_0.02_2_5e-5_16_lora32_full
# layer_range: [0, 36]
# merge_method: slerp
# base_model: /root/paddlejob/workspace/env_run/output/model_merge_checkpoint/Qwen3-4B-gradient2-hotpotqa_0.02_8_5e-5_16_lora32_full/
# parameters:
# t: 0.5
# dtype: float32
# models:
# - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering1_full/
# parameters:
# weight: 0.5
# - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering2_full/
# parameters:
# weight: 0.5
# merge_method: dare_ties
# base_model: /mnt/data/models/Qwen/Qwen3-4B
# parameters:
# normalize: true
# int8_mask: true
# dtype: float32
# models:
# - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering1_full/
# - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering2_full/
# merge_method: model_stock
# base_model: /mnt/data/models/Qwen/Qwen3-4B
# parameters:
# normalize: true
# int8_mask: true
# dtype: float32
# models:
# - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering1_full/
# - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering2_full/
# merge_method: karcher
# dtype: float32
# models:
# - model: /root/paddlejob/workspace/env_run/output/model_merge_checkpoint/Qwen3-4B-grdient2-medrxiv_title_0.02_8_5e-5_16_lora32_full
# parameters:
# weight: 0.5
# - model: /root/paddlejob/workspace/env_run/output/model_merge_checkpoint/Qwen3-4B-gradient2-squad_0.02_8_5e-5_16_lora32_full
# parameters:
# weight: 0.5
# merge_method: multislerp
# dtype: float32
# slices:
# - sources:
# - model: /root/paddlejob/workspace/env_run/output/model_merge_checkpoint/Qwen3-4B-grdient2-medrxiv_title_0.02_8_5e-5_16_lora32_full
# layer_range: [0, 36]
# - model: /root/paddlejob/workspace/env_run/output/model_merge_checkpoint/Qwen3-4B-gradient2-squad_0.02_8_5e-5_16_lora32_full
# layer_range: [0, 36]
# merge_method: slerp
# base_model: /root/paddlejob/workspace/env_run/output/model_merge_checkpoint/Qwen3-4B-grdient2-medrxiv_title_0.02_8_5e-5_16_lora32_full
# parameters:
# t: 0.5
# dtype: float32
# models:
# - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering1_full/
# parameters:
# weight: 0.863
# - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering2_full/
# parameters:
# weight: 0.137
# merge_method: task_arithmetic
# base_model: /mnt/data/models/Qwen/Qwen3-4B
# parameters:
# normalize: true
# int8_mask: true
# dtype: float32
# models:
# - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering1_full/
# parameters:
# weight: 0.863
# - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering2_full/
# parameters:
# weight: 0.137
# merge_method: ties
# base_model: /mnt/data/models/Qwen/Qwen3-4B
# parameters:
# normalize: true
# int8_mask: true
# dtype: float32
# slices:
# - sources:
# - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering1_full/
# layer_range: [0, 36]
# - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering2_full/
# layer_range: [0, 36]
# merge_method: slerp
# base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering1_full/
# parameters:
# t: 0.137
# dtype: float32
# slices:
# - sources:
# - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering2_other_cut0.15_clustering1_slerp_same_weighted
# layer_range: [0, 36]
# - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled_full
# layer_range: [0, 36]
# merge_method: slerp
# base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering2_other_cut0.15_clustering1_slerp_same_weighted
# parameters:
# t: 0.5
# dtype: float32
# slices:
# - sources:
# - model: /root/paddlejob/workspace/env_run/output/model_merge_checkpoint/Qwen3-4B-gradient2-squad_0.02_8_5e-5_16_lora32_full
# layer_range: [0, 36]
# - model: /root/paddlejob/workspace/env_run/output/model_merge_checkpoint/Qwen3-4B-grdient2-medrxiv_abstract_0.02_8_5e-5_16_lora32_full
# layer_range: [0, 36]
# merge_method: slerp
# base_model: /root/paddlejob/workspace/env_run/output/model_merge_checkpoint/Qwen3-4B-gradient2-squad_0.02_8_5e-5_16_lora32_full
# parameters:
# t: 0.5
# dtype: float32
# models:
# - model: /root/paddlejob/workspace/env_run/output/model_merge_checkpoint/Qwen3-4B-gradient2-squad_0.02_8_5e-5_16_lora32_full
# parameters:
# weight: 0.5
# - model: /root/paddlejob/workspace/env_run/output/model_merge_checkpoint/Qwen3-4B-grdient2-medrxiv_abstract_0.02_8_5e-5_16_lora32_full
# parameters:
# weight: 0.5
# merge_method: multislerp
# dtype: float32
# slices:
# - sources:
# - model: /root/paddlejob/workspace/env_run/output/model_merge_checkpoint/Qwen3-4B-gradient2-fiqa_0.02_8_5e-5_16_lora32_full
# layer_range: [0, 36]
# - model: /root/paddlejob/workspace/env_run/output/model_merge_checkpoint/Qwen3-4B-grdient2-tweet_sentiment_extraction_classification_0.02_3_5e-5_16_lora32_full
# layer_range: [0, 36]
# merge_method: slerp
# base_model: /root/paddlejob/workspace/env_run/output/model_merge_checkpoint/Qwen3-4B-gradient2-fiqa_0.02_8_5e-5_16_lora32_full
# parameters:
# t: 0.5
# dtype: float32
# slices:
# - sources:
# - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering2_other_sample20_full
# layer_range: [0, 36]
# - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering1_other_sample20_full
# layer_range: [0, 36]
# merge_method: slerp
# base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering2_other_sample20_full
# parameters:
# t: 0.864
# dtype: float32
# slices:
# - sources:
# - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering12_other_sample20_slerp_merging
# layer_range: [0, 36]
# - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled80_full
# layer_range: [0, 36]
# merge_method: slerp
# base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering12_other_sample20_slerp_merging
# parameters:
# t: 0.8
# dtype: float32
# slices:
# - sources:
# - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering2_other_sample80_full
# layer_range: [0, 36]
# - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering1_other_sample80_full
# layer_range: [0, 36]
# merge_method: slerp
# base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering2_other_sample80_full
# parameters:
# t: 0.864
# dtype: float32
# slices:
# - sources:
# - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering12_other_sample80
# layer_range: [0, 36]
# - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled20_full
# layer_range: [0, 36]
# merge_method: slerp
# base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering12_other_sample80
# parameters:
# t: 0.2
# dtype: float32
# slices:
# - sources:
# - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering2_full
# layer_range: [0, 36]
# - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering1_full
# layer_range: [0, 36]
# merge_method: slerp
# base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering2_full
# parameters:
# t: 0.864
# dtype: float32
# slices:
# - sources:
# - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering12_full_merging_slerp_number_weighted
# layer_range: [0, 36]
# - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_full
# layer_range: [0, 36]
# merge_method: slerp
# base_model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering12_full_merging_slerp_number_weighted
# parameters:
# t: 0.9
# dtype: float32
# models:
# - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.2_clustering2_full
# parameters:
# weight: 0.1
# - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_cut0.15_clustering1_full
# parameters:
# weight: 0.2
# - model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_full
# parameters:
# weight: 0.7
# merge_method: multislerp
# dtype: float32
models:
- model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled60_full
parameters:
weight: 0.6
- model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled20_full
parameters:
weight: 0.2
- model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled_full
parameters:
weight: 0.4
- model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_sampled80_full
parameters:
weight: 0.8
- model: /root/paddlejob/workspace/env_run/output/FlagEmbedding-master/Qwen3-4B_data_mixing_gradient2_full
parameters:
weight: 1.0
merge_method: multislerp
dtype: float32
- Downloads last month
- 12