chchen commited on
Commit
f0ad6d6
·
verified ·
1 Parent(s): d79cc90

Training in progress, step 50

Browse files
adapter_config.json CHANGED
@@ -23,10 +23,10 @@
23
  "up_proj",
24
  "gate_proj",
25
  "v_proj",
26
- "o_proj",
27
- "q_proj",
28
  "k_proj",
29
- "down_proj"
 
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
 
23
  "up_proj",
24
  "gate_proj",
25
  "v_proj",
26
+ "down_proj",
 
27
  "k_proj",
28
+ "o_proj",
29
+ "q_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7dceec8ad85d8510657ee384b4b401ca6eed1dad06bc47214098a59cac31df3e
3
  size 83945296
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b388f4a4fec15daa17541553d8e5b86a7bf7703b2b3c008bf6d7e72b175d960
3
  size 83945296
llama3_lora_sft.yaml CHANGED
@@ -32,7 +32,7 @@ load_best_model_at_end: true
32
  per_device_train_batch_size: 1
33
  gradient_accumulation_steps: 16
34
  learning_rate: 1.0e-4
35
- num_train_epochs: 10.0
36
  lr_scheduler_type: cosine
37
  warmup_ratio: 0.1
38
  bf16: true
 
32
  per_device_train_batch_size: 1
33
  gradient_accumulation_steps: 16
34
  learning_rate: 1.0e-4
35
+ num_train_epochs: 5.0
36
  lr_scheduler_type: cosine
37
  warmup_ratio: 0.1
38
  bf16: true
trainer_log.jsonl CHANGED
@@ -1,42 +1,6 @@
1
- {"current_steps": 10, "total_steps": 6650, "loss": 1.5499, "lr": 1.5037593984962406e-06, "epoch": 0.015023474178403756, "percentage": 0.15, "elapsed_time": "0:01:19", "remaining_time": "14:36:36"}
2
- {"current_steps": 20, "total_steps": 6650, "loss": 1.6231, "lr": 3.007518796992481e-06, "epoch": 0.03004694835680751, "percentage": 0.3, "elapsed_time": "0:02:42", "remaining_time": "14:55:20"}
3
- {"current_steps": 30, "total_steps": 6650, "loss": 1.6083, "lr": 4.511278195488722e-06, "epoch": 0.04507042253521127, "percentage": 0.45, "elapsed_time": "0:04:07", "remaining_time": "15:11:05"}
4
- {"current_steps": 40, "total_steps": 6650, "loss": 1.3287, "lr": 6.015037593984962e-06, "epoch": 0.06009389671361502, "percentage": 0.6, "elapsed_time": "0:05:35", "remaining_time": "15:23:51"}
5
- {"current_steps": 50, "total_steps": 6650, "loss": 0.9428, "lr": 7.518796992481203e-06, "epoch": 0.07511737089201878, "percentage": 0.75, "elapsed_time": "0:07:03", "remaining_time": "15:31:12"}
6
- {"current_steps": 50, "total_steps": 6650, "eval_loss": 0.7359500527381897, "epoch": 0.07511737089201878, "percentage": 0.75, "elapsed_time": "0:10:11", "remaining_time": "22:26:02"}
7
- {"current_steps": 60, "total_steps": 6650, "loss": 0.7911, "lr": 9.022556390977444e-06, "epoch": 0.09014084507042254, "percentage": 0.9, "elapsed_time": "0:11:44", "remaining_time": "21:29:26"}
8
- {"current_steps": 70, "total_steps": 6650, "loss": 0.5421, "lr": 1.0526315789473684e-05, "epoch": 0.10516431924882629, "percentage": 1.05, "elapsed_time": "0:13:12", "remaining_time": "20:42:13"}
9
- {"current_steps": 80, "total_steps": 6650, "loss": 0.3682, "lr": 1.2030075187969925e-05, "epoch": 0.12018779342723004, "percentage": 1.2, "elapsed_time": "0:14:42", "remaining_time": "20:07:49"}
10
- {"current_steps": 90, "total_steps": 6650, "loss": 0.2303, "lr": 1.3533834586466165e-05, "epoch": 0.1352112676056338, "percentage": 1.35, "elapsed_time": "0:16:11", "remaining_time": "19:40:19"}
11
- {"current_steps": 100, "total_steps": 6650, "loss": 0.2119, "lr": 1.5037593984962406e-05, "epoch": 0.15023474178403756, "percentage": 1.5, "elapsed_time": "0:17:41", "remaining_time": "19:18:24"}
12
- {"current_steps": 100, "total_steps": 6650, "eval_loss": 0.1478167027235031, "epoch": 0.15023474178403756, "percentage": 1.5, "elapsed_time": "0:20:52", "remaining_time": "22:47:03"}
13
- {"current_steps": 110, "total_steps": 6650, "loss": 0.1611, "lr": 1.6541353383458648e-05, "epoch": 0.1652582159624413, "percentage": 1.65, "elapsed_time": "0:22:25", "remaining_time": "22:12:58"}
14
- {"current_steps": 120, "total_steps": 6650, "loss": 0.1226, "lr": 1.8045112781954888e-05, "epoch": 0.18028169014084508, "percentage": 1.8, "elapsed_time": "0:23:54", "remaining_time": "21:41:14"}
15
- {"current_steps": 130, "total_steps": 6650, "loss": 0.1185, "lr": 1.954887218045113e-05, "epoch": 0.19530516431924883, "percentage": 1.95, "elapsed_time": "0:25:24", "remaining_time": "21:14:14"}
16
- {"current_steps": 140, "total_steps": 6650, "loss": 0.1097, "lr": 2.105263157894737e-05, "epoch": 0.21032863849765257, "percentage": 2.11, "elapsed_time": "0:26:54", "remaining_time": "20:51:02"}
17
- {"current_steps": 150, "total_steps": 6650, "loss": 0.1027, "lr": 2.255639097744361e-05, "epoch": 0.22535211267605634, "percentage": 2.26, "elapsed_time": "0:28:24", "remaining_time": "20:31:00"}
18
- {"current_steps": 150, "total_steps": 6650, "eval_loss": 0.07721681892871857, "epoch": 0.22535211267605634, "percentage": 2.26, "elapsed_time": "0:31:36", "remaining_time": "22:49:25"}
19
- {"current_steps": 160, "total_steps": 6650, "loss": 0.0767, "lr": 2.406015037593985e-05, "epoch": 0.2403755868544601, "percentage": 2.41, "elapsed_time": "0:33:08", "remaining_time": "22:24:34"}
20
- {"current_steps": 170, "total_steps": 6650, "loss": 0.059, "lr": 2.556390977443609e-05, "epoch": 0.25539906103286386, "percentage": 2.56, "elapsed_time": "0:34:38", "remaining_time": "22:00:31"}
21
- {"current_steps": 180, "total_steps": 6650, "loss": 0.0814, "lr": 2.706766917293233e-05, "epoch": 0.2704225352112676, "percentage": 2.71, "elapsed_time": "0:36:08", "remaining_time": "21:38:56"}
22
- {"current_steps": 190, "total_steps": 6650, "loss": 0.0731, "lr": 2.857142857142857e-05, "epoch": 0.28544600938967135, "percentage": 2.86, "elapsed_time": "0:37:37", "remaining_time": "21:19:30"}
23
- {"current_steps": 200, "total_steps": 6650, "loss": 0.055, "lr": 3.007518796992481e-05, "epoch": 0.3004694835680751, "percentage": 3.01, "elapsed_time": "0:39:07", "remaining_time": "21:02:01"}
24
- {"current_steps": 200, "total_steps": 6650, "eval_loss": 0.06474073231220245, "epoch": 0.3004694835680751, "percentage": 3.01, "elapsed_time": "0:42:19", "remaining_time": "22:44:50"}
25
- {"current_steps": 210, "total_steps": 6650, "loss": 0.0604, "lr": 3.157894736842105e-05, "epoch": 0.3154929577464789, "percentage": 3.16, "elapsed_time": "0:43:52", "remaining_time": "22:25:41"}
26
- {"current_steps": 220, "total_steps": 6650, "loss": 0.0708, "lr": 3.3082706766917295e-05, "epoch": 0.3305164319248826, "percentage": 3.31, "elapsed_time": "0:45:22", "remaining_time": "22:06:09"}
27
- {"current_steps": 230, "total_steps": 6650, "loss": 0.0442, "lr": 3.458646616541353e-05, "epoch": 0.3455399061032864, "percentage": 3.46, "elapsed_time": "0:46:52", "remaining_time": "21:48:19"}
28
- {"current_steps": 240, "total_steps": 6650, "loss": 0.0658, "lr": 3.6090225563909776e-05, "epoch": 0.36056338028169016, "percentage": 3.61, "elapsed_time": "0:48:22", "remaining_time": "21:31:58"}
29
- {"current_steps": 250, "total_steps": 6650, "loss": 0.067, "lr": 3.759398496240601e-05, "epoch": 0.3755868544600939, "percentage": 3.76, "elapsed_time": "0:49:52", "remaining_time": "21:16:36"}
30
- {"current_steps": 250, "total_steps": 6650, "eval_loss": 0.05830130726099014, "epoch": 0.3755868544600939, "percentage": 3.76, "elapsed_time": "0:53:03", "remaining_time": "22:38:16"}
31
- {"current_steps": 260, "total_steps": 6650, "loss": 0.0556, "lr": 3.909774436090226e-05, "epoch": 0.39061032863849765, "percentage": 3.91, "elapsed_time": "0:54:36", "remaining_time": "22:22:02"}
32
- {"current_steps": 270, "total_steps": 6650, "loss": 0.0652, "lr": 4.0601503759398494e-05, "epoch": 0.4056338028169014, "percentage": 4.06, "elapsed_time": "0:56:06", "remaining_time": "22:05:52"}
33
- {"current_steps": 280, "total_steps": 6650, "loss": 0.0633, "lr": 4.210526315789474e-05, "epoch": 0.42065727699530514, "percentage": 4.21, "elapsed_time": "0:57:36", "remaining_time": "21:50:43"}
34
- {"current_steps": 290, "total_steps": 6650, "loss": 0.0623, "lr": 4.3609022556390975e-05, "epoch": 0.4356807511737089, "percentage": 4.36, "elapsed_time": "0:59:06", "remaining_time": "21:36:21"}
35
- {"current_steps": 300, "total_steps": 6650, "loss": 0.0513, "lr": 4.511278195488722e-05, "epoch": 0.4507042253521127, "percentage": 4.51, "elapsed_time": "1:00:37", "remaining_time": "21:23:07"}
36
- {"current_steps": 300, "total_steps": 6650, "eval_loss": 0.05538703128695488, "epoch": 0.4507042253521127, "percentage": 4.51, "elapsed_time": "1:03:48", "remaining_time": "22:30:36"}
37
- {"current_steps": 310, "total_steps": 6650, "loss": 0.0486, "lr": 4.6616541353383456e-05, "epoch": 0.46572769953051646, "percentage": 4.66, "elapsed_time": "1:05:22", "remaining_time": "22:16:52"}
38
- {"current_steps": 320, "total_steps": 6650, "loss": 0.0584, "lr": 4.81203007518797e-05, "epoch": 0.4807511737089202, "percentage": 4.81, "elapsed_time": "1:06:52", "remaining_time": "22:02:44"}
39
- {"current_steps": 330, "total_steps": 6650, "loss": 0.0525, "lr": 4.9624060150375936e-05, "epoch": 0.49577464788732395, "percentage": 4.96, "elapsed_time": "1:08:22", "remaining_time": "21:49:20"}
40
- {"current_steps": 340, "total_steps": 6650, "loss": 0.0656, "lr": 5.112781954887218e-05, "epoch": 0.5107981220657277, "percentage": 5.11, "elapsed_time": "1:09:51", "remaining_time": "21:36:37"}
41
- {"current_steps": 350, "total_steps": 6650, "loss": 0.0538, "lr": 5.2631578947368424e-05, "epoch": 0.5258215962441315, "percentage": 5.26, "elapsed_time": "1:11:21", "remaining_time": "21:24:28"}
42
- {"current_steps": 350, "total_steps": 6650, "eval_loss": 0.058864399790763855, "epoch": 0.5258215962441315, "percentage": 5.26, "elapsed_time": "1:14:33", "remaining_time": "22:21:58"}
 
1
+ {"current_steps": 10, "total_steps": 3325, "loss": 1.5464, "lr": 3.003003003003003e-06, "epoch": 0.015023474178403756, "percentage": 0.3, "elapsed_time": "0:01:17", "remaining_time": "7:05:26"}
2
+ {"current_steps": 20, "total_steps": 3325, "loss": 1.597, "lr": 6.006006006006006e-06, "epoch": 0.03004694835680751, "percentage": 0.6, "elapsed_time": "0:02:32", "remaining_time": "6:59:31"}
3
+ {"current_steps": 30, "total_steps": 3325, "loss": 1.4497, "lr": 9.00900900900901e-06, "epoch": 0.04507042253521127, "percentage": 0.9, "elapsed_time": "0:03:47", "remaining_time": "6:56:34"}
4
+ {"current_steps": 40, "total_steps": 3325, "loss": 0.9009, "lr": 1.2012012012012012e-05, "epoch": 0.06009389671361502, "percentage": 1.2, "elapsed_time": "0:05:03", "remaining_time": "6:55:09"}
5
+ {"current_steps": 50, "total_steps": 3325, "loss": 0.6045, "lr": 1.5015015015015016e-05, "epoch": 0.07511737089201878, "percentage": 1.5, "elapsed_time": "0:06:18", "remaining_time": "6:53:09"}
6
+ {"current_steps": 50, "total_steps": 3325, "eval_loss": 0.41910141706466675, "epoch": 0.07511737089201878, "percentage": 1.5, "elapsed_time": "0:08:54", "remaining_time": "9:43:47"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7913c5af9dc03adf4d24247881036b7b10656f523036c62c1db67a6f049a71db
3
  size 5624
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:03b0cd71c66834275054495a138568ebb01c7c1079728a75fedd459b576a7e9c
3
  size 5624