powermove72's picture
Upload folder using huggingface_hub
c8eaa34 verified
slices:
- sources:
- model: OpenPipe/mistral-ft-optimized-1218
layer_range: [0, 32]
- model: mlabonne/NeuralHermes-2.5-Mistral-7B
layer_range: [0, 32]
merge_method: slerp
base_model: OpenPipe/mistral-ft-optimized-1218
parameters:
t:
- filter: self_attn
value: [0, 0.3, 0.5, 0.7, 0.5, 0.3, 1] # Enhanced: Smoother wave for balanced attention fusion, emphasizing Hermes in mid-layers for reasoning boost
- filter: mlp
value: [1, 0.7, 0.5, 0.3, 0.5, 0.7, 0] # Enhanced: Mirrored wave for MLP, starting strong on Hermes then balancing back
- value: 0.5 # Default remains for other params
normalize: true # Add normalization for stable weights, improving model strength and reducing merge artifacts
density:
- value: 0.6 # Slightly higher density to retain more of the merged structure
- filter: self_attn
value: 0.7 # Bias toward preserving attention details for advanced capabilities
- filter: mlp
value: 0.5 # Balanced for MLP to maintain efficiency
randomize: 0.05 # Small randomization for exploratory strength, can lead to innovative fusions
dtype: bfloat16