YAML Metadata Warning:empty or missing yaml metadata in repo card
Check out the documentation for more information.
model: model_name: "meta-llama/Llama-3.1-8B-Instruct" model_max_length: 32768 torch_dtype_str: "bfloat16" attn_implementation: "flash_attention_2" #"sdpa" load_pretrained_weights: True trust_remote_code: True
data: train: datasets: - dataset_name: "text_sft" dataset_path: "datasets/aumogpt_llama70b.json" shuffle: True seed: 42 validation: datasets: - dataset_name: "text_sft" dataset_path: "datasets/aumo_dataset_test.json"
training: trainer_type: "TRL_SFT" use_peft: True save_steps: 200 num_train_epochs: 2 per_device_train_batch_size: 2 per_device_eval_batch_size: 2 gradient_accumulation_steps: 8 max_grad_norm: null
enable_gradient_checkpointing: True gradient_checkpointing_kwargs: use_reentrant: False ddp_find_unused_parameters: False optimizer: "adamw_torch" # "adamw_torch" #paged_adamw_8bit learning_rate: 5.0e-4 warmup_steps: 10 weight_decay: 0.01 compile: False
dataloader_num_workers: 8 dataloader_prefetch_factor: 4
logging_steps: 10 log_model_summary: False empty_device_cache_steps: 50 output_dir: "results/oumi/llama8b_aumogpt.lora" include_performance_metrics: True enable_wandb: True
eval_strategy: "steps" # When to evaluate ("no", "steps", "epoch") eval_steps: 25
peft: q_lora: False lora_r: 64 lora_alpha: 32 lora_dropout: 0.2 lora_target_modules: - "q_proj" - "k_proj" - "v_proj" - "o_proj" - "gate_proj" - "down_proj" - "up_proj"
fsdp: enable_fsdp: True sharding_strategy: FULL_SHARD auto_wrap_policy: TRANSFORMER_BASED_WRAP transformer_layer_cls: "LlamaDecoderLayer" forward_prefetch: true