run-title: micro-llama-1b-dpo model: micro-llama-1b-dpo base-model: meta-llama/Llama-3.2-1B tokenizer: meta-llama/Llama-3.2-1B-Instruct num-experts: 4 top-k-experts: 1 jitter-noise: 0 use-router: True mask-input: True max-length: 8192 trainable: - model