| --- |
| base_model: [] |
| library_name: transformers |
| tags: |
| - mergekit |
| - merge |
|
|
| --- |
| # merged_4b |
| |
| This is a merge of pre-trained language models created using [mergekit](https://github.com/cg123/mergekit). |
| |
| ## Merge Details |
| ### Merge Method |
| |
| This model was merged using the [TIES](https://arxiv.org/abs/2306.01708) merge method using ./evolve_storage/input_models/Qwen3-4B_131373129 as a base. |
|
|
| ### Models Merged |
|
|
| The following models were included in the merge: |
| * ./evolve_storage/input_models/4b_1_2932744254 |
| * ./evolve_storage/input_models/4b_2_935817604 |
|
|
| ### Configuration |
|
|
| The following YAML configuration was used to produce this model: |
|
|
| ```yaml |
| base_model: ./evolve_storage/input_models/Qwen3-4B_131373129 |
| dtype: bfloat16 |
| merge_method: ties |
| parameters: |
| int8_mask: 1.0 |
| normalize: 1.0 |
| slices: |
| - sources: |
| - layer_range: [0, 6] |
| model: ./evolve_storage/input_models/4b_1_2932744254 |
| parameters: |
| density: |
| - filter: mlp |
| value: 1.0 |
| - filter: self_attn |
| value: 1.0 |
| - value: 0.8608594704318231 |
| weight: |
| - filter: mlp |
| value: 0.3490062747244206 |
| - filter: self_attn |
| value: 0.09060462688545917 |
| - value: 0.7576223803340598 |
| - layer_range: [0, 6] |
| model: ./evolve_storage/input_models/4b_2_935817604 |
| parameters: |
| density: |
| - filter: mlp |
| value: 0.9307034739117228 |
| - filter: self_attn |
| value: 0.7266676131372826 |
| - value: 0.7992834608507406 |
| weight: |
| - filter: mlp |
| value: 0.02382702396781508 |
| - filter: self_attn |
| value: 0.19373265930727943 |
| - value: 0.5095495553391711 |
| - layer_range: [0, 6] |
| model: ./evolve_storage/input_models/Qwen3-4B_131373129 |
| - sources: |
| - layer_range: [6, 12] |
| model: ./evolve_storage/input_models/4b_1_2932744254 |
| parameters: |
| density: |
| - filter: mlp |
| value: 1.0 |
| - filter: self_attn |
| value: 0.9956934876179888 |
| - value: 1.0 |
| weight: |
| - filter: mlp |
| value: 0.7637369917121993 |
| - filter: self_attn |
| value: 0.7579899664980693 |
| - value: 0.369874736821347 |
| - layer_range: [6, 12] |
| model: ./evolve_storage/input_models/4b_2_935817604 |
| parameters: |
| density: |
| - filter: mlp |
| value: 1.0 |
| - filter: self_attn |
| value: 0.7931662945197395 |
| - value: 1.0 |
| weight: |
| - filter: mlp |
| value: 0.05065405710236617 |
| - filter: self_attn |
| value: 0.1681017405390699 |
| - value: 0.278174377018312 |
| - layer_range: [6, 12] |
| model: ./evolve_storage/input_models/Qwen3-4B_131373129 |
| - sources: |
| - layer_range: [12, 18] |
| model: ./evolve_storage/input_models/4b_1_2932744254 |
| parameters: |
| density: |
| - filter: mlp |
| value: 1.0 |
| - filter: self_attn |
| value: 1.0 |
| - value: 1.0 |
| weight: |
| - filter: mlp |
| value: 0.1896079785687373 |
| - filter: self_attn |
| value: 0.28109871222289246 |
| - value: 0.5919682098365076 |
| - layer_range: [12, 18] |
| model: ./evolve_storage/input_models/4b_2_935817604 |
| parameters: |
| density: |
| - filter: mlp |
| value: 0.9657611673995403 |
| - filter: self_attn |
| value: 0.7203176611210709 |
| - value: 0.5429222666986293 |
| weight: |
| - filter: mlp |
| value: 0.6105068874861986 |
| - filter: self_attn |
| value: 0.35883124159464513 |
| - value: 0.08904009475761779 |
| - layer_range: [12, 18] |
| model: ./evolve_storage/input_models/Qwen3-4B_131373129 |
| - sources: |
| - layer_range: [18, 24] |
| model: ./evolve_storage/input_models/4b_1_2932744254 |
| parameters: |
| density: |
| - filter: mlp |
| value: 0.9149303352862976 |
| - filter: self_attn |
| value: 1.0 |
| - value: 0.877602461891939 |
| weight: |
| - filter: mlp |
| value: 0.4686086798952363 |
| - filter: self_attn |
| value: 0.9038594427840757 |
| - value: 0.10082926037736678 |
| - layer_range: [18, 24] |
| model: ./evolve_storage/input_models/4b_2_935817604 |
| parameters: |
| density: |
| - filter: mlp |
| value: 0.7101112464063535 |
| - filter: self_attn |
| value: 0.7817731468186313 |
| - value: 0.39642301784045736 |
| weight: |
| - filter: mlp |
| value: 0.12881047196006434 |
| - filter: self_attn |
| value: 0.5913495175648413 |
| - value: 0.4608092365074321 |
| - layer_range: [18, 24] |
| model: ./evolve_storage/input_models/Qwen3-4B_131373129 |
| - sources: |
| - layer_range: [24, 30] |
| model: ./evolve_storage/input_models/4b_1_2932744254 |
| parameters: |
| density: |
| - filter: mlp |
| value: 0.5207504504970493 |
| - filter: self_attn |
| value: 0.7016905835163534 |
| - value: 0.7841906098086426 |
| weight: |
| - filter: mlp |
| value: 0.03936529169370809 |
| - filter: self_attn |
| value: 0.3540197424712478 |
| - value: 0.23794897877129362 |
| - layer_range: [24, 30] |
| model: ./evolve_storage/input_models/4b_2_935817604 |
| parameters: |
| density: |
| - filter: mlp |
| value: 0.6658594462559264 |
| - filter: self_attn |
| value: 1.0 |
| - value: 0.7557856184584113 |
| weight: |
| - filter: mlp |
| value: 0.5961021443524465 |
| - filter: self_attn |
| value: 0.5159043688030769 |
| - value: 0.7605556292871718 |
| - layer_range: [24, 30] |
| model: ./evolve_storage/input_models/Qwen3-4B_131373129 |
| - sources: |
| - layer_range: [30, 36] |
| model: ./evolve_storage/input_models/4b_1_2932744254 |
| parameters: |
| density: |
| - filter: mlp |
| value: 1.0 |
| - filter: self_attn |
| value: 1.0 |
| - value: 0.63120667056331 |
| weight: |
| - filter: mlp |
| value: 0.15069904386157829 |
| - filter: self_attn |
| value: 0.6308542464264801 |
| - value: 0.4804838636683506 |
| - layer_range: [30, 36] |
| model: ./evolve_storage/input_models/4b_2_935817604 |
| parameters: |
| density: |
| - filter: mlp |
| value: 1.0 |
| - filter: self_attn |
| value: 1.0 |
| - value: 0.924178196118008 |
| weight: |
| - filter: mlp |
| value: 0.5716878395339086 |
| - filter: self_attn |
| value: 0.19598590068808175 |
| - value: 0.8797652412255632 |
| - layer_range: [30, 36] |
| model: ./evolve_storage/input_models/Qwen3-4B_131373129 |
| |
| ``` |
|
|