| models: |
| - model: tokyotech-llm/Swallow-70b-instruct-hf |
| |
| - model: nitky/Swallow-70b-NVE-RP |
| parameters: |
| density: 1 |
| weight: |
| - filter: mlp |
| value: 0.1 |
| - filter: self_attn |
| value: 0.4 |
| - value: 0 |
| merge_method: dare_ties |
| base_model: tokyotech-llm/Swallow-70b-instruct-hf |
| dtype: bfloat16 |
| tokenizer_source: union |
| name: Swallow-70b-RP-base |
| --- |
| models: |
| - model: tokyotech-llm/Swallow-70b-instruct-hf |
| |
| - model: nitky/Swallow-70b-NVE-RP |
| parameters: |
| density: 1 |
| weight: |
| - filter: mlp |
| value: [0.4, 0.1, 0.4, 0.1, 0.4, 0.1, 0.4, 0.1, 0.1] |
| - filter: self_attn |
| value: [0.4, 0.4, 0.1, 0.4, 0.1, 0.4, 0.1, 0.4, 0.4] |
| - value: 0 |
| merge_method: dare_ties |
| base_model: tokyotech-llm/Swallow-70b-instruct-hf |
| dtype: bfloat16 |
| tokenizer_source: union |
| name: Swallow-70b-RP-flavor |
| --- |
| slices: |
| - sources: |
| - model: Swallow-70b-RP-base |
| layer_range: [0, 80] |
| - model: Swallow-70b-RP-flavor |
| layer_range: [0, 80] |
| merge_method: slerp |
| base_model: Swallow-70b-RP-base |
| parameters: |
| t: |
| - filter: self_attn |
| value: [0, 0.5, 0.3, 0.7, 1] |
| - filter: mlp |
| value: [1, 0.5, 0.7, 0.3, 0] |
| - value: 0.5 |
| dtype: bfloat16 |
| name: Swallow-70b-RP |
|
|