Training in progress, step 190
Browse files- adapter_model.safetensors +1 -1
- logging.jsonl +12 -0
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13254157312
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b485823019198c2a615d8f93fa9348ea465f6aa983e24989e3c8d108875a63c9
|
| 3 |
size 13254157312
|
logging.jsonl
CHANGED
|
@@ -200,3 +200,15 @@
|
|
| 200 |
{"loss": 0.53007305, "token_acc": 0.84191974, "grad_norm": 0.1212738, "learning_rate": 2.19e-06, "memory(GiB)": 133.2, "train_speed(iter/s)": 0.007475, "epoch": 0.77902622, "global_step/max_steps": "182/233", "percentage": "78.11%", "elapsed_time": "6h 45m 27s", "remaining_time": "1h 53m 37s"}
|
| 201 |
{"loss": 0.53875828, "token_acc": 0.81942612, "grad_norm": 0.14762594, "learning_rate": 2.15e-06, "memory(GiB)": 133.2, "train_speed(iter/s)": 0.00748, "epoch": 0.78330658, "global_step/max_steps": "183/233", "percentage": "78.54%", "elapsed_time": "6h 47m 23s", "remaining_time": "1h 51m 18s"}
|
| 202 |
{"loss": 0.51536155, "token_acc": 0.81711541, "grad_norm": 0.16659674, "learning_rate": 2.1e-06, "memory(GiB)": 133.2, "train_speed(iter/s)": 0.00748, "epoch": 0.78758694, "global_step/max_steps": "184/233", "percentage": "78.97%", "elapsed_time": "6h 49m 37s", "remaining_time": "1h 49m 4s"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 200 |
{"loss": 0.53007305, "token_acc": 0.84191974, "grad_norm": 0.1212738, "learning_rate": 2.19e-06, "memory(GiB)": 133.2, "train_speed(iter/s)": 0.007475, "epoch": 0.77902622, "global_step/max_steps": "182/233", "percentage": "78.11%", "elapsed_time": "6h 45m 27s", "remaining_time": "1h 53m 37s"}
|
| 201 |
{"loss": 0.53875828, "token_acc": 0.81942612, "grad_norm": 0.14762594, "learning_rate": 2.15e-06, "memory(GiB)": 133.2, "train_speed(iter/s)": 0.00748, "epoch": 0.78330658, "global_step/max_steps": "183/233", "percentage": "78.54%", "elapsed_time": "6h 47m 23s", "remaining_time": "1h 51m 18s"}
|
| 202 |
{"loss": 0.51536155, "token_acc": 0.81711541, "grad_norm": 0.16659674, "learning_rate": 2.1e-06, "memory(GiB)": 133.2, "train_speed(iter/s)": 0.00748, "epoch": 0.78758694, "global_step/max_steps": "184/233", "percentage": "78.97%", "elapsed_time": "6h 49m 37s", "remaining_time": "1h 49m 4s"}
|
| 203 |
+
{"loss": 0.48481408, "token_acc": 0.84935853, "grad_norm": 0.11972429, "learning_rate": 2.06e-06, "memory(GiB)": 133.2, "train_speed(iter/s)": 0.007489, "epoch": 0.79186731, "global_step/max_steps": "185/233", "percentage": "79.40%", "elapsed_time": "6h 51m 22s", "remaining_time": "1h 46m 44s"}
|
| 204 |
+
{"loss": 0.49308038, "token_acc": 0.83884662, "grad_norm": 0.10787959, "learning_rate": 2.02e-06, "memory(GiB)": 133.2, "train_speed(iter/s)": 0.007498, "epoch": 0.79614767, "global_step/max_steps": "186/233", "percentage": "79.83%", "elapsed_time": "6h 53m 7s", "remaining_time": "1h 44m 23s"}
|
| 205 |
+
{"loss": 0.49464846, "token_acc": 0.84874494, "grad_norm": 0.13731882, "learning_rate": 1.97e-06, "memory(GiB)": 133.2, "train_speed(iter/s)": 0.00751, "epoch": 0.80042804, "global_step/max_steps": "187/233", "percentage": "80.26%", "elapsed_time": "6h 54m 39s", "remaining_time": "1h 42m 0s"}
|
| 206 |
+
{"loss": 0.48671663, "token_acc": 0.84075687, "grad_norm": 0.10981249, "learning_rate": 1.93e-06, "memory(GiB)": 133.2, "train_speed(iter/s)": 0.007512, "epoch": 0.8047084, "global_step/max_steps": "188/233", "percentage": "80.69%", "elapsed_time": "6h 56m 44s", "remaining_time": "1h 39m 45s"}
|
| 207 |
+
{"loss": 0.48544246, "token_acc": 0.84682673, "grad_norm": 0.12859865, "learning_rate": 1.89e-06, "memory(GiB)": 133.2, "train_speed(iter/s)": 0.00753, "epoch": 0.80898876, "global_step/max_steps": "189/233", "percentage": "81.12%", "elapsed_time": "6h 58m 0s", "remaining_time": "1h 37m 18s"}
|
| 208 |
+
{"loss": 0.5006417, "token_acc": 0.84206275, "grad_norm": 0.12259582, "learning_rate": 1.85e-06, "memory(GiB)": 133.2, "train_speed(iter/s)": 0.007549, "epoch": 0.81326913, "global_step/max_steps": "190/233", "percentage": "81.55%", "elapsed_time": "6h 59m 6s", "remaining_time": "1h 34m 51s"}
|
| 209 |
+
{"eval_loss": 0.50880826, "eval_token_acc": 0.83371443, "eval_runtime": 235.7001, "eval_samples_per_second": 1.96, "eval_steps_per_second": 0.246, "epoch": 0.81326913, "global_step/max_steps": "190/233", "percentage": "81.55%", "elapsed_time": "7h 3m 2s", "remaining_time": "1h 35m 44s"}
|
| 210 |
+
{"loss": 0.49958098, "token_acc": 0.83476374, "grad_norm": 0.12608472, "learning_rate": 1.8e-06, "memory(GiB)": 133.2, "train_speed(iter/s)": 0.007472, "epoch": 0.81754949, "global_step/max_steps": "191/233", "percentage": "81.97%", "elapsed_time": "7h 5m 41s", "remaining_time": "1h 33m 36s"}
|
| 211 |
+
{"loss": 0.50622708, "token_acc": 0.8277248, "grad_norm": 0.10919516, "learning_rate": 1.76e-06, "memory(GiB)": 133.2, "train_speed(iter/s)": 0.007469, "epoch": 0.82182986, "global_step/max_steps": "192/233", "percentage": "82.40%", "elapsed_time": "7h 8m 7s", "remaining_time": "1h 31m 25s"}
|
| 212 |
+
{"loss": 0.51051688, "token_acc": 0.83841894, "grad_norm": 0.14452438, "learning_rate": 1.72e-06, "memory(GiB)": 133.2, "train_speed(iter/s)": 0.007496, "epoch": 0.82611022, "global_step/max_steps": "193/233", "percentage": "82.83%", "elapsed_time": "7h 8m 48s", "remaining_time": "1h 28m 52s"}
|
| 213 |
+
{"loss": 0.51354766, "token_acc": 0.85329018, "grad_norm": 0.11761606, "learning_rate": 1.67e-06, "memory(GiB)": 133.2, "train_speed(iter/s)": 0.007492, "epoch": 0.83039058, "global_step/max_steps": "194/233", "percentage": "83.26%", "elapsed_time": "7h 11m 13s", "remaining_time": "1h 26m 41s"}
|
| 214 |
+
{"loss": 0.51062506, "token_acc": 0.85424893, "grad_norm": 0.14140776, "learning_rate": 1.63e-06, "memory(GiB)": 133.2, "train_speed(iter/s)": 0.0075, "epoch": 0.83467095, "global_step/max_steps": "195/233", "percentage": "83.69%", "elapsed_time": "7h 12m 58s", "remaining_time": "1h 24m 22s"}
|