Training in progress, step 50
Browse files- adapter_config.json +3 -3
- adapter_model.safetensors +1 -1
- llama3_lora_sft.yaml +1 -1
- trainer_log.jsonl +6 -42
- training_args.bin +1 -1
adapter_config.json
CHANGED
|
@@ -23,10 +23,10 @@
|
|
| 23 |
"up_proj",
|
| 24 |
"gate_proj",
|
| 25 |
"v_proj",
|
| 26 |
-
"
|
| 27 |
-
"q_proj",
|
| 28 |
"k_proj",
|
| 29 |
-
"
|
|
|
|
| 30 |
],
|
| 31 |
"task_type": "CAUSAL_LM",
|
| 32 |
"use_dora": false,
|
|
|
|
| 23 |
"up_proj",
|
| 24 |
"gate_proj",
|
| 25 |
"v_proj",
|
| 26 |
+
"down_proj",
|
|
|
|
| 27 |
"k_proj",
|
| 28 |
+
"o_proj",
|
| 29 |
+
"q_proj"
|
| 30 |
],
|
| 31 |
"task_type": "CAUSAL_LM",
|
| 32 |
"use_dora": false,
|
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 83945296
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9b388f4a4fec15daa17541553d8e5b86a7bf7703b2b3c008bf6d7e72b175d960
|
| 3 |
size 83945296
|
llama3_lora_sft.yaml
CHANGED
|
@@ -32,7 +32,7 @@ load_best_model_at_end: true
|
|
| 32 |
per_device_train_batch_size: 1
|
| 33 |
gradient_accumulation_steps: 16
|
| 34 |
learning_rate: 1.0e-4
|
| 35 |
-
num_train_epochs:
|
| 36 |
lr_scheduler_type: cosine
|
| 37 |
warmup_ratio: 0.1
|
| 38 |
bf16: true
|
|
|
|
| 32 |
per_device_train_batch_size: 1
|
| 33 |
gradient_accumulation_steps: 16
|
| 34 |
learning_rate: 1.0e-4
|
| 35 |
+
num_train_epochs: 5.0
|
| 36 |
lr_scheduler_type: cosine
|
| 37 |
warmup_ratio: 0.1
|
| 38 |
bf16: true
|
trainer_log.jsonl
CHANGED
|
@@ -1,42 +1,6 @@
|
|
| 1 |
-
{"current_steps": 10, "total_steps":
|
| 2 |
-
{"current_steps": 20, "total_steps":
|
| 3 |
-
{"current_steps": 30, "total_steps":
|
| 4 |
-
{"current_steps": 40, "total_steps":
|
| 5 |
-
{"current_steps": 50, "total_steps":
|
| 6 |
-
{"current_steps": 50, "total_steps":
|
| 7 |
-
{"current_steps": 60, "total_steps": 6650, "loss": 0.7911, "lr": 9.022556390977444e-06, "epoch": 0.09014084507042254, "percentage": 0.9, "elapsed_time": "0:11:44", "remaining_time": "21:29:26"}
|
| 8 |
-
{"current_steps": 70, "total_steps": 6650, "loss": 0.5421, "lr": 1.0526315789473684e-05, "epoch": 0.10516431924882629, "percentage": 1.05, "elapsed_time": "0:13:12", "remaining_time": "20:42:13"}
|
| 9 |
-
{"current_steps": 80, "total_steps": 6650, "loss": 0.3682, "lr": 1.2030075187969925e-05, "epoch": 0.12018779342723004, "percentage": 1.2, "elapsed_time": "0:14:42", "remaining_time": "20:07:49"}
|
| 10 |
-
{"current_steps": 90, "total_steps": 6650, "loss": 0.2303, "lr": 1.3533834586466165e-05, "epoch": 0.1352112676056338, "percentage": 1.35, "elapsed_time": "0:16:11", "remaining_time": "19:40:19"}
|
| 11 |
-
{"current_steps": 100, "total_steps": 6650, "loss": 0.2119, "lr": 1.5037593984962406e-05, "epoch": 0.15023474178403756, "percentage": 1.5, "elapsed_time": "0:17:41", "remaining_time": "19:18:24"}
|
| 12 |
-
{"current_steps": 100, "total_steps": 6650, "eval_loss": 0.1478167027235031, "epoch": 0.15023474178403756, "percentage": 1.5, "elapsed_time": "0:20:52", "remaining_time": "22:47:03"}
|
| 13 |
-
{"current_steps": 110, "total_steps": 6650, "loss": 0.1611, "lr": 1.6541353383458648e-05, "epoch": 0.1652582159624413, "percentage": 1.65, "elapsed_time": "0:22:25", "remaining_time": "22:12:58"}
|
| 14 |
-
{"current_steps": 120, "total_steps": 6650, "loss": 0.1226, "lr": 1.8045112781954888e-05, "epoch": 0.18028169014084508, "percentage": 1.8, "elapsed_time": "0:23:54", "remaining_time": "21:41:14"}
|
| 15 |
-
{"current_steps": 130, "total_steps": 6650, "loss": 0.1185, "lr": 1.954887218045113e-05, "epoch": 0.19530516431924883, "percentage": 1.95, "elapsed_time": "0:25:24", "remaining_time": "21:14:14"}
|
| 16 |
-
{"current_steps": 140, "total_steps": 6650, "loss": 0.1097, "lr": 2.105263157894737e-05, "epoch": 0.21032863849765257, "percentage": 2.11, "elapsed_time": "0:26:54", "remaining_time": "20:51:02"}
|
| 17 |
-
{"current_steps": 150, "total_steps": 6650, "loss": 0.1027, "lr": 2.255639097744361e-05, "epoch": 0.22535211267605634, "percentage": 2.26, "elapsed_time": "0:28:24", "remaining_time": "20:31:00"}
|
| 18 |
-
{"current_steps": 150, "total_steps": 6650, "eval_loss": 0.07721681892871857, "epoch": 0.22535211267605634, "percentage": 2.26, "elapsed_time": "0:31:36", "remaining_time": "22:49:25"}
|
| 19 |
-
{"current_steps": 160, "total_steps": 6650, "loss": 0.0767, "lr": 2.406015037593985e-05, "epoch": 0.2403755868544601, "percentage": 2.41, "elapsed_time": "0:33:08", "remaining_time": "22:24:34"}
|
| 20 |
-
{"current_steps": 170, "total_steps": 6650, "loss": 0.059, "lr": 2.556390977443609e-05, "epoch": 0.25539906103286386, "percentage": 2.56, "elapsed_time": "0:34:38", "remaining_time": "22:00:31"}
|
| 21 |
-
{"current_steps": 180, "total_steps": 6650, "loss": 0.0814, "lr": 2.706766917293233e-05, "epoch": 0.2704225352112676, "percentage": 2.71, "elapsed_time": "0:36:08", "remaining_time": "21:38:56"}
|
| 22 |
-
{"current_steps": 190, "total_steps": 6650, "loss": 0.0731, "lr": 2.857142857142857e-05, "epoch": 0.28544600938967135, "percentage": 2.86, "elapsed_time": "0:37:37", "remaining_time": "21:19:30"}
|
| 23 |
-
{"current_steps": 200, "total_steps": 6650, "loss": 0.055, "lr": 3.007518796992481e-05, "epoch": 0.3004694835680751, "percentage": 3.01, "elapsed_time": "0:39:07", "remaining_time": "21:02:01"}
|
| 24 |
-
{"current_steps": 200, "total_steps": 6650, "eval_loss": 0.06474073231220245, "epoch": 0.3004694835680751, "percentage": 3.01, "elapsed_time": "0:42:19", "remaining_time": "22:44:50"}
|
| 25 |
-
{"current_steps": 210, "total_steps": 6650, "loss": 0.0604, "lr": 3.157894736842105e-05, "epoch": 0.3154929577464789, "percentage": 3.16, "elapsed_time": "0:43:52", "remaining_time": "22:25:41"}
|
| 26 |
-
{"current_steps": 220, "total_steps": 6650, "loss": 0.0708, "lr": 3.3082706766917295e-05, "epoch": 0.3305164319248826, "percentage": 3.31, "elapsed_time": "0:45:22", "remaining_time": "22:06:09"}
|
| 27 |
-
{"current_steps": 230, "total_steps": 6650, "loss": 0.0442, "lr": 3.458646616541353e-05, "epoch": 0.3455399061032864, "percentage": 3.46, "elapsed_time": "0:46:52", "remaining_time": "21:48:19"}
|
| 28 |
-
{"current_steps": 240, "total_steps": 6650, "loss": 0.0658, "lr": 3.6090225563909776e-05, "epoch": 0.36056338028169016, "percentage": 3.61, "elapsed_time": "0:48:22", "remaining_time": "21:31:58"}
|
| 29 |
-
{"current_steps": 250, "total_steps": 6650, "loss": 0.067, "lr": 3.759398496240601e-05, "epoch": 0.3755868544600939, "percentage": 3.76, "elapsed_time": "0:49:52", "remaining_time": "21:16:36"}
|
| 30 |
-
{"current_steps": 250, "total_steps": 6650, "eval_loss": 0.05830130726099014, "epoch": 0.3755868544600939, "percentage": 3.76, "elapsed_time": "0:53:03", "remaining_time": "22:38:16"}
|
| 31 |
-
{"current_steps": 260, "total_steps": 6650, "loss": 0.0556, "lr": 3.909774436090226e-05, "epoch": 0.39061032863849765, "percentage": 3.91, "elapsed_time": "0:54:36", "remaining_time": "22:22:02"}
|
| 32 |
-
{"current_steps": 270, "total_steps": 6650, "loss": 0.0652, "lr": 4.0601503759398494e-05, "epoch": 0.4056338028169014, "percentage": 4.06, "elapsed_time": "0:56:06", "remaining_time": "22:05:52"}
|
| 33 |
-
{"current_steps": 280, "total_steps": 6650, "loss": 0.0633, "lr": 4.210526315789474e-05, "epoch": 0.42065727699530514, "percentage": 4.21, "elapsed_time": "0:57:36", "remaining_time": "21:50:43"}
|
| 34 |
-
{"current_steps": 290, "total_steps": 6650, "loss": 0.0623, "lr": 4.3609022556390975e-05, "epoch": 0.4356807511737089, "percentage": 4.36, "elapsed_time": "0:59:06", "remaining_time": "21:36:21"}
|
| 35 |
-
{"current_steps": 300, "total_steps": 6650, "loss": 0.0513, "lr": 4.511278195488722e-05, "epoch": 0.4507042253521127, "percentage": 4.51, "elapsed_time": "1:00:37", "remaining_time": "21:23:07"}
|
| 36 |
-
{"current_steps": 300, "total_steps": 6650, "eval_loss": 0.05538703128695488, "epoch": 0.4507042253521127, "percentage": 4.51, "elapsed_time": "1:03:48", "remaining_time": "22:30:36"}
|
| 37 |
-
{"current_steps": 310, "total_steps": 6650, "loss": 0.0486, "lr": 4.6616541353383456e-05, "epoch": 0.46572769953051646, "percentage": 4.66, "elapsed_time": "1:05:22", "remaining_time": "22:16:52"}
|
| 38 |
-
{"current_steps": 320, "total_steps": 6650, "loss": 0.0584, "lr": 4.81203007518797e-05, "epoch": 0.4807511737089202, "percentage": 4.81, "elapsed_time": "1:06:52", "remaining_time": "22:02:44"}
|
| 39 |
-
{"current_steps": 330, "total_steps": 6650, "loss": 0.0525, "lr": 4.9624060150375936e-05, "epoch": 0.49577464788732395, "percentage": 4.96, "elapsed_time": "1:08:22", "remaining_time": "21:49:20"}
|
| 40 |
-
{"current_steps": 340, "total_steps": 6650, "loss": 0.0656, "lr": 5.112781954887218e-05, "epoch": 0.5107981220657277, "percentage": 5.11, "elapsed_time": "1:09:51", "remaining_time": "21:36:37"}
|
| 41 |
-
{"current_steps": 350, "total_steps": 6650, "loss": 0.0538, "lr": 5.2631578947368424e-05, "epoch": 0.5258215962441315, "percentage": 5.26, "elapsed_time": "1:11:21", "remaining_time": "21:24:28"}
|
| 42 |
-
{"current_steps": 350, "total_steps": 6650, "eval_loss": 0.058864399790763855, "epoch": 0.5258215962441315, "percentage": 5.26, "elapsed_time": "1:14:33", "remaining_time": "22:21:58"}
|
|
|
|
| 1 |
+
{"current_steps": 10, "total_steps": 3325, "loss": 1.5464, "lr": 3.003003003003003e-06, "epoch": 0.015023474178403756, "percentage": 0.3, "elapsed_time": "0:01:17", "remaining_time": "7:05:26"}
|
| 2 |
+
{"current_steps": 20, "total_steps": 3325, "loss": 1.597, "lr": 6.006006006006006e-06, "epoch": 0.03004694835680751, "percentage": 0.6, "elapsed_time": "0:02:32", "remaining_time": "6:59:31"}
|
| 3 |
+
{"current_steps": 30, "total_steps": 3325, "loss": 1.4497, "lr": 9.00900900900901e-06, "epoch": 0.04507042253521127, "percentage": 0.9, "elapsed_time": "0:03:47", "remaining_time": "6:56:34"}
|
| 4 |
+
{"current_steps": 40, "total_steps": 3325, "loss": 0.9009, "lr": 1.2012012012012012e-05, "epoch": 0.06009389671361502, "percentage": 1.2, "elapsed_time": "0:05:03", "remaining_time": "6:55:09"}
|
| 5 |
+
{"current_steps": 50, "total_steps": 3325, "loss": 0.6045, "lr": 1.5015015015015016e-05, "epoch": 0.07511737089201878, "percentage": 1.5, "elapsed_time": "0:06:18", "remaining_time": "6:53:09"}
|
| 6 |
+
{"current_steps": 50, "total_steps": 3325, "eval_loss": 0.41910141706466675, "epoch": 0.07511737089201878, "percentage": 1.5, "elapsed_time": "0:08:54", "remaining_time": "9:43:47"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 5624
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:03b0cd71c66834275054495a138568ebb01c7c1079728a75fedd459b576a7e9c
|
| 3 |
size 5624
|