uv run accelerate launch --config-file ./accelerate_config/fsdp2.yaml \ main.py \ --teacher_name_or_path 'swiss-ai/Apertus-8B-Instruct-2509' \ --student_name_or_path 'HuggingFaceTB/SmolLM2-135M-Instruct' \ --output_dir ./Apertus-8B-distill \ --max_length 2048 \ --per_device_train_batch_size 4 \ --per_device_eval_batch_size 8 \ --max_steps 200 \ --logging_steps 5