Training in progress, step 90
Browse files- adapter_model.safetensors +1 -1
- logging.jsonl +26 -0
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 3565203280
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2dde5603bdbdd49e8653f667e1d73c188659040e2f60ae7dd8f5e18a616c0bde
|
| 3 |
size 3565203280
|
logging.jsonl
CHANGED
|
@@ -91,3 +91,29 @@
|
|
| 91 |
{"loss": 0.64975607, "token_acc": 0.81523096, "grad_norm": 2.14597869, "learning_rate": 4.44e-06, "memory(GiB)": 49.79, "train_speed(iter/s)": 0.108118, "epoch": 0.04440877, "global_step/max_steps": "83/1869", "percentage": "4.44%", "elapsed_time": "12m 18s", "remaining_time": "4h 24m 43s"}
|
| 92 |
{"loss": 0.68609977, "token_acc": 0.74051593, "grad_norm": 2.03061819, "learning_rate": 4.49e-06, "memory(GiB)": 49.79, "train_speed(iter/s)": 0.109072, "epoch": 0.04494382, "global_step/max_steps": "84/1869", "percentage": "4.49%", "elapsed_time": "12m 20s", "remaining_time": "4h 22m 18s"}
|
| 93 |
{"loss": 0.56360674, "token_acc": 0.91630901, "grad_norm": 1.38109756, "learning_rate": 4.55e-06, "memory(GiB)": 49.79, "train_speed(iter/s)": 0.109579, "epoch": 0.04547887, "global_step/max_steps": "85/1869", "percentage": "4.55%", "elapsed_time": "12m 26s", "remaining_time": "4h 21m 0s"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 91 |
{"loss": 0.64975607, "token_acc": 0.81523096, "grad_norm": 2.14597869, "learning_rate": 4.44e-06, "memory(GiB)": 49.79, "train_speed(iter/s)": 0.108118, "epoch": 0.04440877, "global_step/max_steps": "83/1869", "percentage": "4.44%", "elapsed_time": "12m 18s", "remaining_time": "4h 24m 43s"}
|
| 92 |
{"loss": 0.68609977, "token_acc": 0.74051593, "grad_norm": 2.03061819, "learning_rate": 4.49e-06, "memory(GiB)": 49.79, "train_speed(iter/s)": 0.109072, "epoch": 0.04494382, "global_step/max_steps": "84/1869", "percentage": "4.49%", "elapsed_time": "12m 20s", "remaining_time": "4h 22m 18s"}
|
| 93 |
{"loss": 0.56360674, "token_acc": 0.91630901, "grad_norm": 1.38109756, "learning_rate": 4.55e-06, "memory(GiB)": 49.79, "train_speed(iter/s)": 0.109579, "epoch": 0.04547887, "global_step/max_steps": "85/1869", "percentage": "4.55%", "elapsed_time": "12m 26s", "remaining_time": "4h 21m 0s"}
|
| 94 |
+
{"loss": 0.62571561, "token_acc": 0.84673748, "grad_norm": 1.27034843, "learning_rate": 4.6e-06, "memory(GiB)": 49.79, "train_speed(iter/s)": 0.110082, "epoch": 0.04601391, "global_step/max_steps": "86/1869", "percentage": "4.60%", "elapsed_time": "12m 31s", "remaining_time": "4h 19m 45s"}
|
| 95 |
+
{"loss": 0.54031986, "token_acc": 0.78795009, "grad_norm": 1.31914854, "learning_rate": 4.65e-06, "memory(GiB)": 49.79, "train_speed(iter/s)": 0.110572, "epoch": 0.04654896, "global_step/max_steps": "87/1869", "percentage": "4.65%", "elapsed_time": "12m 37s", "remaining_time": "4h 18m 31s"}
|
| 96 |
+
{"loss": 0.65481579, "token_acc": 0.83529412, "grad_norm": 1.73724616, "learning_rate": 4.71e-06, "memory(GiB)": 49.79, "train_speed(iter/s)": 0.111533, "epoch": 0.047084, "global_step/max_steps": "88/1869", "percentage": "4.71%", "elapsed_time": "12m 39s", "remaining_time": "4h 16m 10s"}
|
| 97 |
+
{"loss": 0.51929009, "token_acc": 0.88250211, "grad_norm": 1.41756928, "learning_rate": 4.76e-06, "memory(GiB)": 49.79, "train_speed(iter/s)": 0.112575, "epoch": 0.04761905, "global_step/max_steps": "89/1869", "percentage": "4.76%", "elapsed_time": "12m 41s", "remaining_time": "4h 13m 41s"}
|
| 98 |
+
{"loss": 0.55096114, "token_acc": 0.84288194, "grad_norm": 1.25196993, "learning_rate": 4.81e-06, "memory(GiB)": 49.79, "train_speed(iter/s)": 0.113037, "epoch": 0.04815409, "global_step/max_steps": "90/1869", "percentage": "4.82%", "elapsed_time": "12m 46s", "remaining_time": "4h 12m 34s"}
|
| 99 |
+
{"eval_loss": 0.56129301, "eval_token_acc": 0.81405176, "eval_runtime": 47.0588, "eval_samples_per_second": 9.818, "eval_steps_per_second": 1.233, "epoch": 0.04815409, "global_step/max_steps": "90/1869", "percentage": "4.82%", "elapsed_time": "13m 33s", "remaining_time": "4h 28m 4s"}
|
| 100 |
+
{"loss": 0.53046155, "token_acc": 0.81791535, "grad_norm": 1.88198757, "learning_rate": 4.87e-06, "memory(GiB)": 49.79, "train_speed(iter/s)": 0.106037, "epoch": 0.04868914, "global_step/max_steps": "91/1869", "percentage": "4.87%", "elapsed_time": "13m 48s", "remaining_time": "4h 29m 50s"}
|
| 101 |
+
{"loss": 0.56067884, "token_acc": 0.8008658, "grad_norm": 1.45423341, "learning_rate": 4.92e-06, "memory(GiB)": 49.79, "train_speed(iter/s)": 0.107, "epoch": 0.04922418, "global_step/max_steps": "92/1869", "percentage": "4.92%", "elapsed_time": "13m 50s", "remaining_time": "4h 27m 17s"}
|
| 102 |
+
{"loss": 0.55585885, "token_acc": 0.84927798, "grad_norm": 1.86327994, "learning_rate": 4.97e-06, "memory(GiB)": 49.79, "train_speed(iter/s)": 0.107958, "epoch": 0.04975923, "global_step/max_steps": "93/1869", "percentage": "4.98%", "elapsed_time": "13m 51s", "remaining_time": "4h 24m 47s"}
|
| 103 |
+
{"loss": 0.59563416, "token_acc": 0.81107334, "grad_norm": 1.88972461, "learning_rate": 5.03e-06, "memory(GiB)": 49.79, "train_speed(iter/s)": 0.108748, "epoch": 0.05029428, "global_step/max_steps": "94/1869", "percentage": "5.03%", "elapsed_time": "13m 54s", "remaining_time": "4h 22m 44s"}
|
| 104 |
+
{"loss": 0.5046066, "token_acc": 0.82798834, "grad_norm": 1.68154716, "learning_rate": 5.08e-06, "memory(GiB)": 49.79, "train_speed(iter/s)": 0.1097, "epoch": 0.05082932, "global_step/max_steps": "95/1869", "percentage": "5.08%", "elapsed_time": "13m 56s", "remaining_time": "4h 20m 20s"}
|
| 105 |
+
{"loss": 0.5304262, "token_acc": 0.7694859, "grad_norm": 1.43992364, "learning_rate": 5.13e-06, "memory(GiB)": 49.79, "train_speed(iter/s)": 0.110154, "epoch": 0.05136437, "global_step/max_steps": "96/1869", "percentage": "5.14%", "elapsed_time": "14m 1s", "remaining_time": "4h 19m 10s"}
|
| 106 |
+
{"loss": 0.61991286, "token_acc": 0.75282309, "grad_norm": 3.14522028, "learning_rate": 5.19e-06, "memory(GiB)": 49.79, "train_speed(iter/s)": 0.111095, "epoch": 0.05189941, "global_step/max_steps": "97/1869", "percentage": "5.19%", "elapsed_time": "14m 3s", "remaining_time": "4h 16m 50s"}
|
| 107 |
+
{"loss": 0.63002944, "token_acc": 0.85375494, "grad_norm": 1.59012771, "learning_rate": 5.24e-06, "memory(GiB)": 49.79, "train_speed(iter/s)": 0.112033, "epoch": 0.05243446, "global_step/max_steps": "98/1869", "percentage": "5.24%", "elapsed_time": "14m 5s", "remaining_time": "4h 14m 34s"}
|
| 108 |
+
{"loss": 0.68490446, "token_acc": 0.73425197, "grad_norm": 1.70764339, "learning_rate": 5.29e-06, "memory(GiB)": 49.79, "train_speed(iter/s)": 0.112442, "epoch": 0.0529695, "global_step/max_steps": "99/1869", "percentage": "5.30%", "elapsed_time": "14m 10s", "remaining_time": "4h 13m 33s"}
|
| 109 |
+
{"loss": 0.6501677, "token_acc": 0.8412943, "grad_norm": 1.77475035, "learning_rate": 5.35e-06, "memory(GiB)": 49.79, "train_speed(iter/s)": 0.11307, "epoch": 0.05350455, "global_step/max_steps": "100/1869", "percentage": "5.35%", "elapsed_time": "14m 14s", "remaining_time": "4h 12m 3s"}
|
| 110 |
+
{"eval_loss": 0.55701578, "eval_token_acc": 0.81466107, "eval_runtime": 47.3417, "eval_samples_per_second": 9.759, "eval_steps_per_second": 1.225, "epoch": 0.05350455, "global_step/max_steps": "100/1869", "percentage": "5.35%", "elapsed_time": "15m 2s", "remaining_time": "4h 26m 0s"}
|
| 111 |
+
{"loss": 0.57124013, "token_acc": 0.81866145, "grad_norm": 1.6674912, "learning_rate": 5.4e-06, "memory(GiB)": 49.79, "train_speed(iter/s)": 0.106939, "epoch": 0.05403959, "global_step/max_steps": "101/1869", "percentage": "5.40%", "elapsed_time": "15m 14s", "remaining_time": "4h 26m 56s"}
|
| 112 |
+
{"loss": 0.5822041, "token_acc": 0.80506823, "grad_norm": 1.47916698, "learning_rate": 5.45e-06, "memory(GiB)": 49.79, "train_speed(iter/s)": 0.107373, "epoch": 0.05457464, "global_step/max_steps": "102/1869", "percentage": "5.46%", "elapsed_time": "15m 20s", "remaining_time": "4h 25m 45s"}
|
| 113 |
+
{"loss": 0.60072786, "token_acc": 0.83898186, "grad_norm": 1.28622961, "learning_rate": 5.51e-06, "memory(GiB)": 50.75, "train_speed(iter/s)": 0.10782, "epoch": 0.05510968, "global_step/max_steps": "103/1869", "percentage": "5.51%", "elapsed_time": "15m 25s", "remaining_time": "4h 24m 33s"}
|
| 114 |
+
{"loss": 0.53927904, "token_acc": 0.82990868, "grad_norm": 2.01003194, "learning_rate": 5.56e-06, "memory(GiB)": 50.75, "train_speed(iter/s)": 0.108684, "epoch": 0.05564473, "global_step/max_steps": "104/1869", "percentage": "5.56%", "elapsed_time": "15m 27s", "remaining_time": "4h 22m 18s"}
|
| 115 |
+
{"loss": 0.55231047, "token_acc": 0.88588008, "grad_norm": 2.33709693, "learning_rate": 5.61e-06, "memory(GiB)": 50.75, "train_speed(iter/s)": 0.109544, "epoch": 0.05617978, "global_step/max_steps": "105/1869", "percentage": "5.62%", "elapsed_time": "15m 29s", "remaining_time": "4h 20m 7s"}
|
| 116 |
+
{"loss": 0.52443862, "token_acc": 0.91080402, "grad_norm": 1.46956873, "learning_rate": 5.67e-06, "memory(GiB)": 50.75, "train_speed(iter/s)": 0.110399, "epoch": 0.05671482, "global_step/max_steps": "106/1869", "percentage": "5.67%", "elapsed_time": "15m 30s", "remaining_time": "4h 17m 58s"}
|
| 117 |
+
{"loss": 0.59382796, "token_acc": 0.78, "grad_norm": 1.66870689, "learning_rate": 5.72e-06, "memory(GiB)": 50.75, "train_speed(iter/s)": 0.110805, "epoch": 0.05724987, "global_step/max_steps": "107/1869", "percentage": "5.72%", "elapsed_time": "15m 36s", "remaining_time": "4h 16m 55s"}
|
| 118 |
+
{"loss": 0.49897149, "token_acc": 0.85014409, "grad_norm": 1.56035566, "learning_rate": 5.78e-06, "memory(GiB)": 50.75, "train_speed(iter/s)": 0.111209, "epoch": 0.05778491, "global_step/max_steps": "108/1869", "percentage": "5.78%", "elapsed_time": "15m 41s", "remaining_time": "4h 15m 53s"}
|
| 119 |
+
{"loss": 0.5497005, "token_acc": 0.88143177, "grad_norm": 1.39917779, "learning_rate": 5.83e-06, "memory(GiB)": 50.75, "train_speed(iter/s)": 0.111584, "epoch": 0.05831996, "global_step/max_steps": "109/1869", "percentage": "5.83%", "elapsed_time": "15m 47s", "remaining_time": "4h 14m 56s"}
|