| output_dir: checkpoints/qwen2_5_3B/lora |
| model: |
| _component_: torchtune.models.qwen2_5.lora_qwen2_5_3b |
| lora_attn_modules: |
| - q_proj |
| - v_proj |
| - output_proj |
| apply_lora_to_mlp: true |
| lora_rank: 8 |
| lora_alpha: 16 |
| lora_dropout: 0.0 |
| tokenizer: |
| _component_: torchtune.models.qwen2_5.qwen2_5_tokenizer |
| path: ./Qwen2_5-3B-Instruct/vocab.json |
| merges_file: ./Qwen2_5-3B-Instruct/merges.txt |
| max_seq_len: null |
| checkpointer: |
| _component_: torchtune.training.FullModelHFCheckpointer |
| checkpoint_dir: ./Qwen2_5-3B-Instruct |
| checkpoint_files: |
| - model-00001-of-00002.safetensors |
| - model-00002-of-00002.safetensors |
| recipe_checkpoint: null |
| output_dir: ${output_dir} |
| model_type: QWEN2 |
| resume_from_checkpoint: false |
| dataset: |
| _component_: torchtune.datasets.chat_dataset |
| source: json |
| data_files: ./rankwogpt-data.json |
| conversation_column: conversations |
| conversation_style: sharegpt |
| train_on_input: false |
| split: train |
| packed: false |
| seed: null |
| shuffle: true |
| batch_size: 2 |
| optimizer: |
| _component_: torch.optim.AdamW |
| fused: true |
| weight_decay: 0.01 |
| lr: 0.0003 |
| lr_scheduler: |
| _component_: torchtune.training.lr_schedulers.get_cosine_schedule_with_warmup |
| num_warmup_steps: 100 |
| loss: |
| _component_: torchtune.modules.loss.CEWithChunkedOutputLoss |
| epochs: 1 |
| max_steps_per_epoch: null |
| gradient_accumulation_steps: 8 |
| compile: false |
| metric_logger: |
| _component_: torchtune.training.metric_logging.WandBLogger |
| project: torchtune |
| log_every_n_steps: 1 |
| log_peak_memory_stats: false |
| device: cuda |
| dtype: bf16 |
| enable_activation_checkpointing: false |
| enable_activation_offloading: false |
| profiler: |
| _component_: torchtune.training.setup_torch_profiler |
| enabled: false |
| output_dir: ${output_dir}/profiling_outputs |
| cpu: true |
| cuda: true |
| profile_memory: false |
| with_stack: false |
| record_shapes: true |
| with_flops: false |
| wait_steps: 5 |
| warmup_steps: 5 |
| active_steps: 2 |
| num_cycles: 1 |
|
|