Training in progress, step 180
Browse files- adapter_model.safetensors +1 -1
- logging.jsonl +10 -0
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13254157312
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:35c0cf63e6ae1963e3f0a233a8c5f6e6df7381a1de5f55b00c28837c494a411b
|
| 3 |
size 13254157312
|
logging.jsonl
CHANGED
|
@@ -190,3 +190,13 @@
|
|
| 190 |
{"loss": 0.49691424, "token_acc": 0.85122178, "grad_norm": 0.60208136, "learning_rate": 2.58e-06, "memory(GiB)": 133.2, "train_speed(iter/s)": 0.007466, "epoch": 0.74050294, "global_step/max_steps": "173/233", "percentage": "74.25%", "elapsed_time": "6h 25m 50s", "remaining_time": "2h 13m 49s"}
|
| 191 |
{"loss": 0.48890877, "token_acc": 0.88896396, "grad_norm": 0.11797956, "learning_rate": 2.53e-06, "memory(GiB)": 133.2, "train_speed(iter/s)": 0.007477, "epoch": 0.74478331, "global_step/max_steps": "174/233", "percentage": "74.68%", "elapsed_time": "6h 27m 32s", "remaining_time": "2h 11m 24s"}
|
| 192 |
{"loss": 0.48969388, "token_acc": 0.85049943, "grad_norm": 0.14641684, "learning_rate": 2.49e-06, "memory(GiB)": 133.2, "train_speed(iter/s)": 0.007499, "epoch": 0.74906367, "global_step/max_steps": "175/233", "percentage": "75.11%", "elapsed_time": "6h 28m 35s", "remaining_time": "2h 8m 47s"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 190 |
{"loss": 0.49691424, "token_acc": 0.85122178, "grad_norm": 0.60208136, "learning_rate": 2.58e-06, "memory(GiB)": 133.2, "train_speed(iter/s)": 0.007466, "epoch": 0.74050294, "global_step/max_steps": "173/233", "percentage": "74.25%", "elapsed_time": "6h 25m 50s", "remaining_time": "2h 13m 49s"}
|
| 191 |
{"loss": 0.48890877, "token_acc": 0.88896396, "grad_norm": 0.11797956, "learning_rate": 2.53e-06, "memory(GiB)": 133.2, "train_speed(iter/s)": 0.007477, "epoch": 0.74478331, "global_step/max_steps": "174/233", "percentage": "74.68%", "elapsed_time": "6h 27m 32s", "remaining_time": "2h 11m 24s"}
|
| 192 |
{"loss": 0.48969388, "token_acc": 0.85049943, "grad_norm": 0.14641684, "learning_rate": 2.49e-06, "memory(GiB)": 133.2, "train_speed(iter/s)": 0.007499, "epoch": 0.74906367, "global_step/max_steps": "175/233", "percentage": "75.11%", "elapsed_time": "6h 28m 35s", "remaining_time": "2h 8m 47s"}
|
| 193 |
+
{"loss": 0.49373892, "token_acc": 0.85778095, "grad_norm": 0.13159955, "learning_rate": 2.45e-06, "memory(GiB)": 133.2, "train_speed(iter/s)": 0.007511, "epoch": 0.75334403, "global_step/max_steps": "176/233", "percentage": "75.54%", "elapsed_time": "6h 30m 12s", "remaining_time": "2h 6m 22s"}
|
| 194 |
+
{"loss": 0.50072569, "token_acc": 0.84314664, "grad_norm": 0.15253282, "learning_rate": 2.4e-06, "memory(GiB)": 133.2, "train_speed(iter/s)": 0.007525, "epoch": 0.7576244, "global_step/max_steps": "177/233", "percentage": "75.97%", "elapsed_time": "6h 31m 42s", "remaining_time": "2h 3m 55s"}
|
| 195 |
+
{"loss": 0.49835771, "token_acc": 0.85106974, "grad_norm": 0.12960319, "learning_rate": 2.36e-06, "memory(GiB)": 133.2, "train_speed(iter/s)": 0.007537, "epoch": 0.76190476, "global_step/max_steps": "178/233", "percentage": "76.39%", "elapsed_time": "6h 33m 16s", "remaining_time": "2h 1m 30s"}
|
| 196 |
+
{"loss": 0.53520441, "token_acc": 0.84272897, "grad_norm": 0.19155194, "learning_rate": 2.32e-06, "memory(GiB)": 133.2, "train_speed(iter/s)": 0.007543, "epoch": 0.76618513, "global_step/max_steps": "179/233", "percentage": "76.82%", "elapsed_time": "6h 35m 10s", "remaining_time": "1h 59m 12s"}
|
| 197 |
+
{"loss": 0.50994658, "token_acc": 0.83529193, "grad_norm": 0.1294762, "learning_rate": 2.27e-06, "memory(GiB)": 133.2, "train_speed(iter/s)": 0.00755, "epoch": 0.77046549, "global_step/max_steps": "180/233", "percentage": "77.25%", "elapsed_time": "6h 37m 1s", "remaining_time": "1h 56m 54s"}
|
| 198 |
+
{"eval_loss": 0.50943142, "eval_token_acc": 0.83370355, "eval_runtime": 236.1071, "eval_samples_per_second": 1.957, "eval_steps_per_second": 0.246, "epoch": 0.77046549, "global_step/max_steps": "180/233", "percentage": "77.25%", "elapsed_time": "6h 40m 57s", "remaining_time": "1h 58m 3s"}
|
| 199 |
+
{"loss": 0.50555718, "token_acc": 0.83636152, "grad_norm": 0.13209662, "learning_rate": 2.23e-06, "memory(GiB)": 133.2, "train_speed(iter/s)": 0.007464, "epoch": 0.77474585, "global_step/max_steps": "181/233", "percentage": "77.68%", "elapsed_time": "6h 43m 48s", "remaining_time": "1h 56m 0s"}
|
| 200 |
+
{"loss": 0.53007305, "token_acc": 0.84191974, "grad_norm": 0.1212738, "learning_rate": 2.19e-06, "memory(GiB)": 133.2, "train_speed(iter/s)": 0.007475, "epoch": 0.77902622, "global_step/max_steps": "182/233", "percentage": "78.11%", "elapsed_time": "6h 45m 27s", "remaining_time": "1h 53m 37s"}
|
| 201 |
+
{"loss": 0.53875828, "token_acc": 0.81942612, "grad_norm": 0.14762594, "learning_rate": 2.15e-06, "memory(GiB)": 133.2, "train_speed(iter/s)": 0.00748, "epoch": 0.78330658, "global_step/max_steps": "183/233", "percentage": "78.54%", "elapsed_time": "6h 47m 23s", "remaining_time": "1h 51m 18s"}
|
| 202 |
+
{"loss": 0.51536155, "token_acc": 0.81711541, "grad_norm": 0.16659674, "learning_rate": 2.1e-06, "memory(GiB)": 133.2, "train_speed(iter/s)": 0.00748, "epoch": 0.78758694, "global_step/max_steps": "184/233", "percentage": "78.97%", "elapsed_time": "6h 49m 37s", "remaining_time": "1h 49m 4s"}
|