| # Config for single device LoRA finetuning in lora_finetune_single_device.py | |
| # using a Llama3 8B model | |
| # | |
| # This config assumes that you've run the following command before launching | |
| # this run: | |
| # tune download meta-llama/Meta-Llama-3-8B --output-dir /tmp/Meta-Llama-3-8B --hf-token <HF_TOKEN> | |
| # | |
| # To launch on a single device, run the following command from root: | |
| # tune run lora_finetune_single_device --config llama3/8B_lora_single_device | |
| # | |
| # You can add specific overrides through the command line. For example | |
| # to override the checkpointer directory while launching training | |
| # you can run: | |
| # tune run lora_finetune_single_device --config llama3/8B_lora_single_device checkpointer.checkpoint_dir=<YOUR_CHECKPOINT_DIR> | |
| # | |
| # This config works only for training on single device. | |
| # Model Arguments | |
| model: | |
| _component_: torchtune.models.llama3.lora_llama3_8b | |
| lora_attn_modules: ['q_proj', 'v_proj'] | |
| apply_lora_to_mlp: False | |
| apply_lora_to_output: False | |
| lora_rank: 8 | |
| lora_alpha: 16 | |
| # Tokenizer | |
| tokenizer: | |
| _component_: torchtune.models.llama3.llama3_tokenizer | |
| path: /home/aorogat/Meta-Llama-3-8B/original/tokenizer.model | |
| checkpointer: | |
| _component_: torchtune.utils.FullModelMetaCheckpointer | |
| checkpoint_dir: /home/aorogat/Meta-Llama-3-8B/original/ | |
| checkpoint_files: [ | |
| consolidated.00.pth | |
| ] | |
| recipe_checkpoint: null | |
| output_dir: /home/aorogat/q_to_template/ | |
| model_type: LLAMA3 | |
| resume_from_checkpoint: False | |
| # Dataset and Sampler | |
| dataset: | |
| _component_: torchtune.datasets.instruct_dataset | |
| split: train | |
| source: /home/aorogat/q_to_template/data | |
| template: AlpacaInstructTemplate | |
| train_on_input: False | |
| seed: null | |
| shuffle: True | |
| batch_size: 1 | |
| # Optimizer and Scheduler | |
| optimizer: | |
| _component_: torch.optim.AdamW | |
| weight_decay: 0.01 | |
| lr: 3e-4 | |
| lr_scheduler: | |
| _component_: torchtune.modules.get_cosine_schedule_with_warmup | |
| num_warmup_steps: 100 | |
| loss: | |
| _component_: torch.nn.CrossEntropyLoss | |
| # Training | |
| epochs: 1 | |
| max_steps_per_epoch: null | |
| gradient_accumulation_steps: 64 | |
| compile: False | |
| # Logging | |
| output_dir: /home/aorogat/lora_finetune_output | |
| metric_logger: | |
| _component_: torchtune.utils.metric_logging.DiskLogger | |
| log_dir: ${output_dir} | |
| log_every_n_steps: null | |
| # Environment | |
| device: cuda | |
| dtype: bf16 | |
| enable_activation_checkpointing: True | |
| # Profiler (disabled) | |
| profiler: | |
| _component_: torchtune.utils.profiler | |
| enabled: False | |