Training in progress, step 130
Browse files- adapter_model.safetensors +1 -1
- logging.jsonl +16 -0
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13254157312
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2251e76dd114313d9bbbf105fb9510c108c8f9245860cc8af18f7538668ff99e
|
| 3 |
size 13254157312
|
logging.jsonl
CHANGED
|
@@ -137,3 +137,19 @@
|
|
| 137 |
{"loss": 0.53237057, "token_acc": 0.87220447, "grad_norm": 2.55328703, "learning_rate": 6.68e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.023849, "epoch": 0.06688068, "global_step/max_steps": "125/1869", "percentage": "6.69%", "elapsed_time": "1h 26m 56s", "remaining_time": "20h 12m 57s"}
|
| 138 |
{"loss": 0.54869002, "token_acc": 0.83877996, "grad_norm": 2.99796319, "learning_rate": 6.74e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.024005, "epoch": 0.06741573, "global_step/max_steps": "126/1869", "percentage": "6.74%", "elapsed_time": "1h 27m 3s", "remaining_time": "20h 4m 25s"}
|
| 139 |
{"loss": 0.52400422, "token_acc": 0.83865979, "grad_norm": 2.79718685, "learning_rate": 6.79e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.024068, "epoch": 0.06795078, "global_step/max_steps": "127/1869", "percentage": "6.80%", "elapsed_time": "1h 27m 31s", "remaining_time": "20h 0m 34s"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 137 |
{"loss": 0.53237057, "token_acc": 0.87220447, "grad_norm": 2.55328703, "learning_rate": 6.68e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.023849, "epoch": 0.06688068, "global_step/max_steps": "125/1869", "percentage": "6.69%", "elapsed_time": "1h 26m 56s", "remaining_time": "20h 12m 57s"}
|
| 138 |
{"loss": 0.54869002, "token_acc": 0.83877996, "grad_norm": 2.99796319, "learning_rate": 6.74e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.024005, "epoch": 0.06741573, "global_step/max_steps": "126/1869", "percentage": "6.74%", "elapsed_time": "1h 27m 3s", "remaining_time": "20h 4m 25s"}
|
| 139 |
{"loss": 0.52400422, "token_acc": 0.83865979, "grad_norm": 2.79718685, "learning_rate": 6.79e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.024068, "epoch": 0.06795078, "global_step/max_steps": "127/1869", "percentage": "6.80%", "elapsed_time": "1h 27m 31s", "remaining_time": "20h 0m 34s"}
|
| 140 |
+
{"loss": 0.71093082, "token_acc": 0.80859375, "grad_norm": 3.90603757, "learning_rate": 6.84e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.024223, "epoch": 0.06848582, "global_step/max_steps": "128/1869", "percentage": "6.85%", "elapsed_time": "1h 27m 39s", "remaining_time": "19h 52m 13s"}
|
| 141 |
+
{"loss": 0.50463545, "token_acc": 0.83333333, "grad_norm": 2.94590521, "learning_rate": 6.9e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.024383, "epoch": 0.06902087, "global_step/max_steps": "129/1869", "percentage": "6.90%", "elapsed_time": "1h 27m 45s", "remaining_time": "19h 43m 42s"}
|
| 142 |
+
{"loss": 0.53745633, "token_acc": 0.83262712, "grad_norm": 2.59035373, "learning_rate": 6.95e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.024433, "epoch": 0.06955591, "global_step/max_steps": "130/1869", "percentage": "6.96%", "elapsed_time": "1h 28m 15s", "remaining_time": "19h 40m 39s"}
|
| 143 |
+
{"eval_loss": 0.56035918, "eval_token_acc": 0.81557616, "eval_runtime": 229.5944, "eval_samples_per_second": 2.012, "eval_steps_per_second": 0.253, "epoch": 0.06955591, "global_step/max_steps": "130/1869", "percentage": "6.96%", "elapsed_time": "1h 32m 5s", "remaining_time": "20h 31m 50s"}
|
| 144 |
+
{"loss": 0.58399153, "token_acc": 0.82197645, "grad_norm": 3.39958405, "learning_rate": 7.01e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.023259, "epoch": 0.07009096, "global_step/max_steps": "131/1869", "percentage": "7.01%", "elapsed_time": "1h 33m 27s", "remaining_time": "20h 39m 52s"}
|
| 145 |
+
{"loss": 0.59256887, "token_acc": 0.83518519, "grad_norm": 3.28909707, "learning_rate": 7.06e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.023308, "epoch": 0.070626, "global_step/max_steps": "132/1869", "percentage": "7.06%", "elapsed_time": "1h 33m 58s", "remaining_time": "20h 36m 33s"}
|
| 146 |
+
{"loss": 0.56649619, "token_acc": 0.80902778, "grad_norm": 2.58915138, "learning_rate": 7.11e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.023458, "epoch": 0.07116105, "global_step/max_steps": "133/1869", "percentage": "7.12%", "elapsed_time": "1h 34m 4s", "remaining_time": "20h 27m 56s"}
|
| 147 |
+
{"loss": 0.61177373, "token_acc": 0.78974359, "grad_norm": 5.05176878, "learning_rate": 7.17e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.023585, "epoch": 0.07169609, "global_step/max_steps": "134/1869", "percentage": "7.17%", "elapsed_time": "1h 34m 16s", "remaining_time": "20h 20m 39s"}
|
| 148 |
+
{"loss": 0.65109748, "token_acc": 0.89985272, "grad_norm": 2.85928106, "learning_rate": 7.22e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.023619, "epoch": 0.07223114, "global_step/max_steps": "135/1869", "percentage": "7.22%", "elapsed_time": "1h 34m 50s", "remaining_time": "20h 18m 13s"}
|
| 149 |
+
{"loss": 0.52532768, "token_acc": 0.84807692, "grad_norm": 3.02918744, "learning_rate": 7.27e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.023769, "epoch": 0.07276619, "global_step/max_steps": "136/1869", "percentage": "7.28%", "elapsed_time": "1h 34m 56s", "remaining_time": "20h 9m 50s"}
|
| 150 |
+
{"loss": 0.5924449, "token_acc": 0.75423729, "grad_norm": 3.36210465, "learning_rate": 7.33e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.023907, "epoch": 0.07330123, "global_step/max_steps": "137/1869", "percentage": "7.33%", "elapsed_time": "1h 35m 5s", "remaining_time": "20h 2m 11s"}
|
| 151 |
+
{"loss": 0.52899534, "token_acc": 0.82711198, "grad_norm": 2.4899435, "learning_rate": 7.38e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.023972, "epoch": 0.07383628, "global_step/max_steps": "138/1869", "percentage": "7.38%", "elapsed_time": "1h 35m 31s", "remaining_time": "19h 58m 15s"}
|
| 152 |
+
{"loss": 0.54284203, "token_acc": 0.78255373, "grad_norm": 2.8038137, "learning_rate": 7.43e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.024119, "epoch": 0.07437132, "global_step/max_steps": "139/1869", "percentage": "7.44%", "elapsed_time": "1h 35m 38s", "remaining_time": "19h 50m 16s"}
|
| 153 |
+
{"loss": 0.52143228, "token_acc": 0.77016129, "grad_norm": 2.69753623, "learning_rate": 7.49e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.024149, "epoch": 0.07490637, "global_step/max_steps": "140/1869", "percentage": "7.49%", "elapsed_time": "1h 36m 12s", "remaining_time": "19h 48m 8s"}
|
| 154 |
+
{"eval_loss": 0.55663741, "eval_token_acc": 0.81685332, "eval_runtime": 230.3226, "eval_samples_per_second": 2.006, "eval_steps_per_second": 0.252, "epoch": 0.07490637, "global_step/max_steps": "140/1869", "percentage": "7.49%", "elapsed_time": "1h 40m 2s", "remaining_time": "20h 35m 33s"}
|
| 155 |
+
{"loss": 0.51493675, "token_acc": 0.82090343, "grad_norm": 4.23805094, "learning_rate": 7.54e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.023194, "epoch": 0.07544141, "global_step/max_steps": "141/1869", "percentage": "7.54%", "elapsed_time": "1h 40m 54s", "remaining_time": "20h 36m 34s"}
|