| base_model: Sela223/Captain-Foxfire-12B |
| dtype: bfloat16 |
| merge_method: slerp |
| tokenizer_source: base |
|
|
| slices: |
| - sources: |
| - model: Sela223/Captain-Foxfire-12B |
| layer_range: [0, 40] |
| - model: Sela223/Repose-Marlin-12B |
| layer_range: [0, 40] |
|
|
| parameters: |
| rescale: true |
| t: |
| - filter: ".*(q_proj|k_proj|v_proj).*" |
| value: [0.0, 0.1, 0.25, 0.4, 0.5, 0.5, 0.5, 0.5, 0.4, 0.25, 0.1, 0.0] |
| - filter: ".*o_proj.*" |
| value: [0.0, 0.1, 0.2, 0.35, 0.5, 0.5, 0.5, 0.5, 0.35, 0.2, 0.1, 0.0] |
| - filter: self_attn |
| value: [0.0, 0.1, 0.25, 0.4, 0.5, 0.5, 0.5, 0.5, 0.4, 0.25, 0.1, 0.0] |
|
|
| - filter: ".*(gate_proj|up_proj|down_proj).*" |
| value: [0.0, 0.15, 0.3, 0.45, 0.5, 0.5, 0.5, 0.5, 0.45, 0.3, 0.15, 0.0] |
| - filter: mlp |
| value: [0.0, 0.15, 0.3, 0.45, 0.5, 0.5, 0.5, 0.5, 0.45, 0.3, 0.15, 0.0] |
|
|
| - filter: ".*(input_layernorm|post_attention_layernorm|layernorm).*" |
| value: [0.0, 0.3, 0.5, 0.6, 0.4, 0.0, 0.0, 0.4, 0.6, 0.5, 0.3, 0.0] |
| |
| - filter: "^(embed_tokens|lm_head)$" |
| value: 0.5 |
|
|
| - value: [0.0, 0.3, 0.5, 0.6, 0.4, 0.0, 0.0, 0.4, 0.6, 0.5, 0.3, 0.0] |