Menlo
/

llama3-s-base-v0.2

sound language model

Model card Files Files and versions

jan-hq commited on Aug 19, 2024

Commit

55b3a34

·

verified ·

1 Parent(s): d7fba62

Delete 8B_full.yaml

Files changed (1) hide show

8B_full.yaml +0 -91

8B_full.yaml DELETED Viewed

@@ -1,91 +0,0 @@
-# Config for multi-device full finetuning in full_finetune_distributed.py
-# using a Llama3 8B Instruct model
-#
-# This config assumes that you've run the following command before launching
-# this run:
-#   tune download meta-llama/Meta-Llama-3-8B-Instruct --output-dir /tmp/Meta-Llama-3-8B-Instruct --hf-token <HF_TOKEN>
-#
-# To launch on 4 devices, run the following command from root:
-#   tune run --nproc_per_node 4 full_finetune_distributed --config llama3/8B_full
-#
-# You can add specific overrides through the command line. For example
-# to override the checkpointer directory while launching training
-# you can run:
-#   tune run --nproc_per_node 4 full_finetune_distributed --config llama3/8B_full checkpointer.checkpoint_dir=<YOUR_CHECKPOINT_DIR>
-#
-# This config works best when the model is being fine-tuned on 2+ GPUs.
-# Single device full finetuning requires more memory optimizations. It's
-# best to use 8B_full_single_device.yaml for those cases
-# Tokenizer
-tokenizer:
-  _component_: torchtune.models.llama3.llama3_s_tokenizer
-  path: ../model_zoo/tokenizer.model
-  max_seq_len: 512
-# Dataset
-dataset:
-  _component_: torchtune.datasets.sound_completion_dataset
-  source: jan-hq/raw_audio_with_audio_tokens_for_pretraining_using_Whisper_VQ
-  max_seq_len: 512
-  split: train
-  column: text
-seed: 42
-shuffle: True
-# Model Arguments
-model:
-  _component_: torchtune.models.llama3_1.llama3_1_s_8b
-  # path: model_zoo/Llama3.1_s_8b_init
-checkpointer:
-  _component_: torchtune.utils.FullModelHFCheckpointerSaveSteps
-  checkpoint_dir: ../model_zoo/Llama3.1_s_8b_init
-  checkpoint_files: [
-    model-00001-of-00004.safetensors,
-    model-00002-of-00004.safetensors,
-    model-00003-of-00004.safetensors,
-    model-00004-of-00004.safetensors,
-  ]
-  recipe_checkpoint: null
-  output_dir: ../model_zoo/llama3-s
-  model_type: LLAMA3
-resume_from_checkpoint: False
-save_every_n_steps: 1000
-max_checkpoints: 3
-# Fine-tuning arguments
-batch_size: 12
-epochs: 1
-max_steps_per_epoch: null
-gradient_accumulation_steps: 4
-compile: False
-# Optimizer and Scheduler
-optimizer:
-  _component_: torch.optim.AdamW #change this to use adam_mini: torchtune.modules.optimizer.Adam_mini
-  weight_decay: 0.01
-  lr: 2e-4
-  fused: True
-lr_scheduler:
-  _component_: torchtune.modules.get_cosine_schedule_with_warmup
-  num_warmup_steps: 50
-loss:
-  _component_: torch.nn.CrossEntropyLoss
-fsdp:
-  cpu_offload: False
-# Training env
-device: cuda
-dtype: bf16
-# Memory management
-enable_activation_checkpointing: True
-memory_efficient_fsdp_wrap: True
-ac_mode: 'selective'
-# Logging
-metric_logger:
-  _component_: torchtune.utils.metric_logging.DiskLogger
-  log_dir: ${output_dir}
-output_dir: ../model_zoo/Llama3-sound2-log/
-log_every_n_steps: 1
-log_peak_memory_stats: False