distill-test / run.sh
Oleg Lavrovsky
Initial testing
7b45378 unverified
uv run accelerate launch --config-file ./accelerate_config/fsdp2.yaml \
main.py \
--teacher_name_or_path 'swiss-ai/Apertus-8B-Instruct-2509' \
--student_name_or_path 'HuggingFaceTB/SmolLM2-135M-Instruct' \
--output_dir ./Apertus-8B-distill \
--max_length 2048 \
--per_device_train_batch_size 4 \
--per_device_eval_batch_size 8 \
--max_steps 200 \
--logging_steps 5