| slices: | |
| - sources: | |
| - model: rootxhacker/mini-Llama-70M-SFT-v2 # Core reasoning | |
| layer_range: [0, 5] # Full 6 layers | |
| - sources: | |
| - model: rootxhacker/mini-Llama-70M-SFT-COT | |
| layer_range: [0, 5] # Full 6 layers | |
| - sources: | |
| - model: rootxhacker/mini-Llama-70M-SFT-medical | |
| layer_range: [0, 5] | |
| - sources: | |
| - model: rootxhacker/mini-Llama-70M-SFT-code | |
| layer_range: [0, 5] | |
| - sources: | |
| - model: rootxhacker/mini-Llama-70M-SFT-math | |
| layer_range: [0, 5] | |
| - sources: | |
| - model: rootxhacker/mini-Llama-70M-SFT-ifeval | |
| layer_range: [0, 4] # 5 layers | |
| - sources: | |
| - model: rootxhacker/mini-Llama-70M-SFT-v2 | |
| layer_range: [0, 4] # 5 layers | |
| - sources: | |
| - model: rootxhacker/mini-Llama-70M-SFT | |
| layer_range: [0, 4] # 5 layers | |
| merge_method: passthrough | |
| dtype: bfloat16 | |