backdoor_dataset: !!python/object/apply:src.data.dataset.DatasetType - BadCode base_model: meta-llama/Llama-3.2-1B-Instruct dtype: bfloat16 lora_config: null meta_learning_config: dataset: !!python/object/apply:src.data.dataset.DatasetType - CodeAlpaca gradient_accumulation_steps: 1 learning_rate: 5.0e-05 loss_type: ce num_steps: 1 per_device_batch_size: 16 reg: 0.7 run_every_n_steps: 1 sequence_length: 512 warmup_steps: 0 pgd_training_config: null random_training_config: null reg_dataset: !!python/object/apply:src.data.dataset.DatasetType - Code reg_lambda: 1.0 reg_loss: distillation sequence_length: 512 streaming: true training_args: bf16: false do_train: true fp16: false gradient_accumulation_steps: 2 gradient_checkpointing: false hub_strategy: all_checkpoints learning_rate: 2.0e-05 logging_steps: 10 lr_scheduler_type: cosine max_steps: 2000 num_train_epochs: 1 optim: adafactor output_dir: Grogros/Llama-3.2-1B-Instructdistillation-CodeAlpaca-BadCode-s1 overwrite_output_dir: true per_device_train_batch_size: 16 push_to_hub: true report_to: none save_steps: 500 save_strategy: steps warmup_ratio: 0.1