jacpetro commited on
Commit
0cf981e
·
verified ·
1 Parent(s): 7674cd1

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. training_params.json +72 -0
training_params.json ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": "meta-llama/Llama-3.1-8B-Instruct",
3
+ "job_name": "test_sft_checkpointing_resume",
4
+ "project_name": "test_sft_checkpointing_resume",
5
+ "data_path": "jacpetro/CodeAlpaca-20k-no-input",
6
+ "push_to_hub": false,
7
+ "repo_id": "jacpetro/test_resuming_sft",
8
+ "username": null,
9
+ "comet_ml_key": "l8Xc7g7XYFR5Jjl1qXDH2WG2e",
10
+ "train_split": "train",
11
+ "train_subset": null,
12
+ "valid_split": null,
13
+ "valid_subset": null,
14
+ "add_eos_token": false,
15
+ "block_size": -1,
16
+ "model_max_length": 2048,
17
+ "padding": "left",
18
+ "trainer": "sft",
19
+ "use_flash_attention_2": true,
20
+ "log": [
21
+ "comet_ml"
22
+ ],
23
+ "disable_gradient_checkpointing": false,
24
+ "logging_steps": 1,
25
+ "eval_strategy": "epoch",
26
+ "save_total_limit": 1,
27
+ "save_strategy": "steps",
28
+ "auto_find_batch_size": false,
29
+ "mixed_precision": "bf16",
30
+ "lr": 0.0003,
31
+ "epochs": 3,
32
+ "batch_size": 1,
33
+ "warmup_ratio": 0.1,
34
+ "gradient_accumulation_steps": 16,
35
+ "optimizer": "adamw_torch",
36
+ "lr_scheduler_type": "linear",
37
+ "weight_decay": 0.0,
38
+ "max_grad_norm": 1.0,
39
+ "seed": 42,
40
+ "save_steps": 10,
41
+ "eval_steps": null,
42
+ "load_best_model_at_end": false,
43
+ "resume_from_checkpoint": null,
44
+ "user_checkpoint_dir": "",
45
+ "neftune_noise_alpha": null,
46
+ "use_deepspeed": null,
47
+ "apply_chat_template": "tokenizer",
48
+ "torch_dtype": null,
49
+ "use_torch_compile": false,
50
+ "quantization": "nf4",
51
+ "double_quantization": false,
52
+ "use_peft": "lora",
53
+ "lora_r": 32,
54
+ "lora_alpha": 64,
55
+ "lora_dropout": 0.05,
56
+ "init_lora_weights": null,
57
+ "use_rslora": false,
58
+ "adalora_init_r": 12,
59
+ "adalora_target_r": 8,
60
+ "llama_adapter_len": 128,
61
+ "llama_adapter_layers": 8,
62
+ "target_modules": null,
63
+ "merge_adapter": true,
64
+ "model_ref": null,
65
+ "dpo_beta": 0.1,
66
+ "max_prompt_length": 128,
67
+ "max_completion_length": null,
68
+ "prompt_text_column": "prompt",
69
+ "text_column": "messages",
70
+ "rejected_text_column": "text",
71
+ "use_unsloth": false
72
+ }