jacpetro
/

test_resuming_sft

Model card Files Files and versions

xet

Community

jacpetro commited on Oct 14, 2024

Commit

0cf981e

verified ·

1 Parent(s): 7674cd1

Upload folder using huggingface_hub

Browse files

Files changed (1) hide show

training_params.json +72 -0

training_params.json ADDED Viewed

	@@ -0,0 +1,72 @@

+{
+    "model": "meta-llama/Llama-3.1-8B-Instruct",
+    "job_name": "test_sft_checkpointing_resume",
+    "project_name": "test_sft_checkpointing_resume",
+    "data_path": "jacpetro/CodeAlpaca-20k-no-input",
+    "push_to_hub": false,
+    "repo_id": "jacpetro/test_resuming_sft",
+    "username": null,
+    "comet_ml_key": "l8Xc7g7XYFR5Jjl1qXDH2WG2e",
+    "train_split": "train",
+    "train_subset": null,
+    "valid_split": null,
+    "valid_subset": null,
+    "add_eos_token": false,
+    "block_size": -1,
+    "model_max_length": 2048,
+    "padding": "left",
+    "trainer": "sft",
+    "use_flash_attention_2": true,
+    "log": [
+        "comet_ml"
+    ],
+    "disable_gradient_checkpointing": false,
+    "logging_steps": 1,
+    "eval_strategy": "epoch",
+    "save_total_limit": 1,
+    "save_strategy": "steps",
+    "auto_find_batch_size": false,
+    "mixed_precision": "bf16",
+    "lr": 0.0003,
+    "epochs": 3,
+    "batch_size": 1,
+    "warmup_ratio": 0.1,
+    "gradient_accumulation_steps": 16,
+    "optimizer": "adamw_torch",
+    "lr_scheduler_type": "linear",
+    "weight_decay": 0.0,
+    "max_grad_norm": 1.0,
+    "seed": 42,
+    "save_steps": 10,
+    "eval_steps": null,
+    "load_best_model_at_end": false,
+    "resume_from_checkpoint": null,
+    "user_checkpoint_dir": "",
+    "neftune_noise_alpha": null,
+    "use_deepspeed": null,
+    "apply_chat_template": "tokenizer",
+    "torch_dtype": null,
+    "use_torch_compile": false,
+    "quantization": "nf4",
+    "double_quantization": false,
+    "use_peft": "lora",
+    "lora_r": 32,
+    "lora_alpha": 64,
+    "lora_dropout": 0.05,
+    "init_lora_weights": null,
+    "use_rslora": false,
+    "adalora_init_r": 12,
+    "adalora_target_r": 8,
+    "llama_adapter_len": 128,
+    "llama_adapter_layers": 8,
+    "target_modules": null,
+    "merge_adapter": true,
+    "model_ref": null,
+    "dpo_beta": 0.1,
+    "max_prompt_length": 128,
+    "max_completion_length": null,
+    "prompt_text_column": "prompt",
+    "text_column": "messages",
+    "rejected_text_column": "text",
+    "use_unsloth": false
+}