Training in progress, step 110
Browse files- adapter_model.safetensors +1 -1
- logging.jsonl +23 -0
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13254157312
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f5842d9bf85e5a51ad8f000e9f67690cfe0af3cd373a4a222fe31122beea2cfd
|
| 3 |
size 13254157312
|
logging.jsonl
CHANGED
|
@@ -114,3 +114,26 @@
|
|
| 114 |
{"loss": 0.5585193, "token_acc": 0.81392694, "grad_norm": 3.496773, "learning_rate": 5.56e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.023733, "epoch": 0.05564473, "global_step/max_steps": "104/1869", "percentage": "5.56%", "elapsed_time": "1h 12m 37s", "remaining_time": "20h 32m 24s"}
|
| 115 |
{"loss": 0.58505946, "token_acc": 0.87814313, "grad_norm": 3.46284056, "learning_rate": 5.61e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.023926, "epoch": 0.05617978, "global_step/max_steps": "105/1869", "percentage": "5.62%", "elapsed_time": "1h 12m 43s", "remaining_time": "20h 21m 47s"}
|
| 116 |
{"loss": 0.54000103, "token_acc": 0.90326633, "grad_norm": 3.30159211, "learning_rate": 5.67e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.024121, "epoch": 0.05671482, "global_step/max_steps": "106/1869", "percentage": "5.67%", "elapsed_time": "1h 12m 49s", "remaining_time": "20h 11m 12s"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 114 |
{"loss": 0.5585193, "token_acc": 0.81392694, "grad_norm": 3.496773, "learning_rate": 5.56e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.023733, "epoch": 0.05564473, "global_step/max_steps": "104/1869", "percentage": "5.56%", "elapsed_time": "1h 12m 37s", "remaining_time": "20h 32m 24s"}
|
| 115 |
{"loss": 0.58505946, "token_acc": 0.87814313, "grad_norm": 3.46284056, "learning_rate": 5.61e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.023926, "epoch": 0.05617978, "global_step/max_steps": "105/1869", "percentage": "5.62%", "elapsed_time": "1h 12m 43s", "remaining_time": "20h 21m 47s"}
|
| 116 |
{"loss": 0.54000103, "token_acc": 0.90326633, "grad_norm": 3.30159211, "learning_rate": 5.67e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.024121, "epoch": 0.05671482, "global_step/max_steps": "106/1869", "percentage": "5.67%", "elapsed_time": "1h 12m 49s", "remaining_time": "20h 11m 12s"}
|
| 117 |
+
{"loss": 0.60183036, "token_acc": 0.77703605, "grad_norm": 3.33047438, "learning_rate": 5.72e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.02416, "epoch": 0.05724987, "global_step/max_steps": "107/1869", "percentage": "5.72%", "elapsed_time": "1h 13m 23s", "remaining_time": "20h 8m 37s"}
|
| 118 |
+
{"loss": 0.5076735, "token_acc": 0.84057971, "grad_norm": 3.24348402, "learning_rate": 5.78e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.024233, "epoch": 0.05778491, "global_step/max_steps": "108/1869", "percentage": "5.78%", "elapsed_time": "1h 13m 51s", "remaining_time": "20h 4m 20s"}
|
| 119 |
+
{"loss": 0.54462695, "token_acc": 0.87248322, "grad_norm": 2.59459591, "learning_rate": 5.83e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.024272, "epoch": 0.05831996, "global_step/max_steps": "109/1869", "percentage": "5.83%", "elapsed_time": "1h 14m 25s", "remaining_time": "20h 1m 47s"}
|
| 120 |
+
{"loss": 0.53691071, "token_acc": 0.87995713, "grad_norm": 3.4456141, "learning_rate": 5.88e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.024462, "epoch": 0.058855, "global_step/max_steps": "110/1869", "percentage": "5.89%", "elapsed_time": "1h 14m 31s", "remaining_time": "19h 51m 47s"}
|
| 121 |
+
{"eval_loss": 0.56357574, "eval_token_acc": 0.81488698, "eval_runtime": 229.875, "eval_samples_per_second": 2.01, "eval_steps_per_second": 0.252, "epoch": 0.058855, "global_step/max_steps": "110/1869", "percentage": "5.89%", "elapsed_time": "1h 18m 21s", "remaining_time": "20h 53m 3s"}
|
| 122 |
+
{"loss": 0.54340547, "token_acc": 0.81990195, "grad_norm": 3.29366398, "learning_rate": 5.94e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.023199, "epoch": 0.05939005, "global_step/max_steps": "111/1869", "percentage": "5.94%", "elapsed_time": "1h 19m 19s", "remaining_time": "20h 56m 23s"}
|
| 123 |
+
{"loss": 0.54519576, "token_acc": 0.80656304, "grad_norm": 3.04416752, "learning_rate": 5.99e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.023243, "epoch": 0.05992509, "global_step/max_steps": "112/1869", "percentage": "5.99%", "elapsed_time": "1h 19m 53s", "remaining_time": "20h 53m 18s"}
|
| 124 |
+
{"loss": 0.55135477, "token_acc": 0.84189189, "grad_norm": 2.75748444, "learning_rate": 6.04e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.023423, "epoch": 0.06046014, "global_step/max_steps": "113/1869", "percentage": "6.05%", "elapsed_time": "1h 19m 59s", "remaining_time": "20h 42m 59s"}
|
| 125 |
+
{"loss": 0.53389496, "token_acc": 0.79389313, "grad_norm": 3.21420288, "learning_rate": 6.1e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.023586, "epoch": 0.06099518, "global_step/max_steps": "114/1869", "percentage": "6.10%", "elapsed_time": "1h 20m 8s", "remaining_time": "20h 33m 42s"}
|
| 126 |
+
{"loss": 0.54436785, "token_acc": 0.91196528, "grad_norm": 2.91739321, "learning_rate": 6.15e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.023711, "epoch": 0.06153023, "global_step/max_steps": "115/1869", "percentage": "6.15%", "elapsed_time": "1h 20m 25s", "remaining_time": "20h 26m 31s"}
|
| 127 |
+
{"loss": 0.53784937, "token_acc": 0.87464789, "grad_norm": 3.7390449, "learning_rate": 6.2e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.023864, "epoch": 0.06206528, "global_step/max_steps": "116/1869", "percentage": "6.21%", "elapsed_time": "1h 20m 35s", "remaining_time": "20h 18m 0s"}
|
| 128 |
+
{"loss": 0.50771028, "token_acc": 0.83206107, "grad_norm": 2.71534014, "learning_rate": 6.26e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.024018, "epoch": 0.06260032, "global_step/max_steps": "117/1869", "percentage": "6.26%", "elapsed_time": "1h 20m 46s", "remaining_time": "20h 9m 31s"}
|
| 129 |
+
{"loss": 0.59308904, "token_acc": 0.85210084, "grad_norm": 3.01042485, "learning_rate": 6.31e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.024193, "epoch": 0.06313537, "global_step/max_steps": "118/1869", "percentage": "6.31%", "elapsed_time": "1h 20m 52s", "remaining_time": "20h 0m 4s"}
|
| 130 |
+
{"loss": 0.62044275, "token_acc": 0.79741935, "grad_norm": 3.19279242, "learning_rate": 6.36e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.024335, "epoch": 0.06367041, "global_step/max_steps": "119/1869", "percentage": "6.37%", "elapsed_time": "1h 21m 5s", "remaining_time": "19h 52m 24s"}
|
| 131 |
+
{"loss": 0.49287954, "token_acc": 0.858458, "grad_norm": 2.80419707, "learning_rate": 6.42e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.024493, "epoch": 0.06420546, "global_step/max_steps": "120/1869", "percentage": "6.42%", "elapsed_time": "1h 21m 14s", "remaining_time": "19h 44m 2s"}
|
| 132 |
+
{"eval_loss": 0.55961174, "eval_token_acc": 0.81615867, "eval_runtime": 230.1577, "eval_samples_per_second": 2.007, "eval_steps_per_second": 0.252, "epoch": 0.06420546, "global_step/max_steps": "120/1869", "percentage": "6.42%", "elapsed_time": "1h 25m 4s", "remaining_time": "20h 39m 57s"}
|
| 133 |
+
{"loss": 0.54057038, "token_acc": 0.82075514, "grad_norm": 2.9942472, "learning_rate": 6.47e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.023354, "epoch": 0.0647405, "global_step/max_steps": "121/1869", "percentage": "6.47%", "elapsed_time": "1h 25m 56s", "remaining_time": "20h 41m 27s"}
|
| 134 |
+
{"loss": 0.52263999, "token_acc": 0.76406926, "grad_norm": 2.67156076, "learning_rate": 6.52e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.02352, "epoch": 0.06527555, "global_step/max_steps": "122/1869", "percentage": "6.53%", "elapsed_time": "1h 26m 2s", "remaining_time": "20h 31m 59s"}
|
| 135 |
+
{"loss": 0.53738159, "token_acc": 0.84412266, "grad_norm": 12.26797676, "learning_rate": 6.58e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.023557, "epoch": 0.06581059, "global_step/max_steps": "123/1869", "percentage": "6.58%", "elapsed_time": "1h 26m 36s", "remaining_time": "20h 29m 22s"}
|
| 136 |
+
{"loss": 0.5657295, "token_acc": 0.83591731, "grad_norm": 2.53747559, "learning_rate": 6.63e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.023697, "epoch": 0.06634564, "global_step/max_steps": "124/1869", "percentage": "6.63%", "elapsed_time": "1h 26m 47s", "remaining_time": "20h 21m 24s"}
|
| 137 |
+
{"loss": 0.53237057, "token_acc": 0.87220447, "grad_norm": 2.55328703, "learning_rate": 6.68e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.023849, "epoch": 0.06688068, "global_step/max_steps": "125/1869", "percentage": "6.69%", "elapsed_time": "1h 26m 56s", "remaining_time": "20h 12m 57s"}
|
| 138 |
+
{"loss": 0.54869002, "token_acc": 0.83877996, "grad_norm": 2.99796319, "learning_rate": 6.74e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.024005, "epoch": 0.06741573, "global_step/max_steps": "126/1869", "percentage": "6.74%", "elapsed_time": "1h 27m 3s", "remaining_time": "20h 4m 25s"}
|
| 139 |
+
{"loss": 0.52400422, "token_acc": 0.83865979, "grad_norm": 2.79718685, "learning_rate": 6.79e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.024068, "epoch": 0.06795078, "global_step/max_steps": "127/1869", "percentage": "6.80%", "elapsed_time": "1h 27m 31s", "remaining_time": "20h 0m 34s"}
|