Training in progress, step 70
Browse files- adapter_model.safetensors +1 -1
- logging.jsonl +17 -0
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 3565203280
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3df6364cb8fd2d30ba6734cd016bc93e931c87b2ff964147a410112efbaafab0
|
| 3 |
size 3565203280
|
logging.jsonl
CHANGED
|
@@ -74,3 +74,20 @@
|
|
| 74 |
{"loss": 0.55027413, "token_acc": 0.8056338, "grad_norm": 1.32035482, "learning_rate": 3.64e-06, "memory(GiB)": 49.79, "train_speed(iter/s)": 0.114339, "epoch": 0.03638309, "global_step/max_steps": "68/1869", "percentage": "3.64%", "elapsed_time": "9m 25s", "remaining_time": "4h 9m 29s"}
|
| 75 |
{"loss": 0.48549351, "token_acc": 0.81279621, "grad_norm": 1.4051944, "learning_rate": 3.69e-06, "memory(GiB)": 49.79, "train_speed(iter/s)": 0.115625, "epoch": 0.03691814, "global_step/max_steps": "69/1869", "percentage": "3.69%", "elapsed_time": "9m 27s", "remaining_time": "4h 6m 37s"}
|
| 76 |
{"loss": 0.60460436, "token_acc": 0.85180055, "grad_norm": 1.62005782, "learning_rate": 3.74e-06, "memory(GiB)": 49.79, "train_speed(iter/s)": 0.116986, "epoch": 0.03745318, "global_step/max_steps": "70/1869", "percentage": "3.75%", "elapsed_time": "9m 28s", "remaining_time": "4h 3m 39s"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 74 |
{"loss": 0.55027413, "token_acc": 0.8056338, "grad_norm": 1.32035482, "learning_rate": 3.64e-06, "memory(GiB)": 49.79, "train_speed(iter/s)": 0.114339, "epoch": 0.03638309, "global_step/max_steps": "68/1869", "percentage": "3.64%", "elapsed_time": "9m 25s", "remaining_time": "4h 9m 29s"}
|
| 75 |
{"loss": 0.48549351, "token_acc": 0.81279621, "grad_norm": 1.4051944, "learning_rate": 3.69e-06, "memory(GiB)": 49.79, "train_speed(iter/s)": 0.115625, "epoch": 0.03691814, "global_step/max_steps": "69/1869", "percentage": "3.69%", "elapsed_time": "9m 27s", "remaining_time": "4h 6m 37s"}
|
| 76 |
{"loss": 0.60460436, "token_acc": 0.85180055, "grad_norm": 1.62005782, "learning_rate": 3.74e-06, "memory(GiB)": 49.79, "train_speed(iter/s)": 0.116986, "epoch": 0.03745318, "global_step/max_steps": "70/1869", "percentage": "3.75%", "elapsed_time": "9m 28s", "remaining_time": "4h 3m 39s"}
|
| 77 |
+
{"eval_loss": 0.57551694, "eval_token_acc": 0.81163705, "eval_runtime": 47.0777, "eval_samples_per_second": 9.814, "eval_steps_per_second": 1.232, "epoch": 0.03745318, "global_step/max_steps": "70/1869", "percentage": "3.75%", "elapsed_time": "10m 15s", "remaining_time": "4h 23m 49s"}
|
| 78 |
+
{"loss": 0.65082449, "token_acc": 0.81581842, "grad_norm": 2.58263063, "learning_rate": 3.8e-06, "memory(GiB)": 49.79, "train_speed(iter/s)": 0.106935, "epoch": 0.03798823, "global_step/max_steps": "71/1869", "percentage": "3.80%", "elapsed_time": "10m 34s", "remaining_time": "4h 27m 46s"}
|
| 79 |
+
{"loss": 0.53372353, "token_acc": 0.79391101, "grad_norm": 3.0744555, "learning_rate": 3.85e-06, "memory(GiB)": 49.79, "train_speed(iter/s)": 0.10818, "epoch": 0.03852327, "global_step/max_steps": "72/1869", "percentage": "3.85%", "elapsed_time": "10m 36s", "remaining_time": "4h 24m 34s"}
|
| 80 |
+
{"loss": 0.54805946, "token_acc": 0.90681972, "grad_norm": 1.42983079, "learning_rate": 3.9e-06, "memory(GiB)": 49.79, "train_speed(iter/s)": 0.109057, "epoch": 0.03905832, "global_step/max_steps": "73/1869", "percentage": "3.91%", "elapsed_time": "10m 39s", "remaining_time": "4h 22m 22s"}
|
| 81 |
+
{"loss": 0.58714354, "token_acc": 0.81207977, "grad_norm": 1.46257544, "learning_rate": 3.96e-06, "memory(GiB)": 49.79, "train_speed(iter/s)": 0.109644, "epoch": 0.03959337, "global_step/max_steps": "74/1869", "percentage": "3.96%", "elapsed_time": "10m 45s", "remaining_time": "4h 20m 55s"}
|
| 82 |
+
{"loss": 0.54388595, "token_acc": 0.86413043, "grad_norm": 1.51115882, "learning_rate": 4.01e-06, "memory(GiB)": 49.79, "train_speed(iter/s)": 0.110512, "epoch": 0.04012841, "global_step/max_steps": "75/1869", "percentage": "4.01%", "elapsed_time": "10m 49s", "remaining_time": "4h 18m 47s"}
|
| 83 |
+
{"loss": 0.59524173, "token_acc": 0.81431005, "grad_norm": 1.71415317, "learning_rate": 4.06e-06, "memory(GiB)": 49.79, "train_speed(iter/s)": 0.111063, "epoch": 0.04066346, "global_step/max_steps": "76/1869", "percentage": "4.07%", "elapsed_time": "10m 54s", "remaining_time": "4h 17m 27s"}
|
| 84 |
+
{"loss": 0.52272701, "token_acc": 0.81502086, "grad_norm": 1.49193454, "learning_rate": 4.12e-06, "memory(GiB)": 49.79, "train_speed(iter/s)": 0.112198, "epoch": 0.0411985, "global_step/max_steps": "77/1869", "percentage": "4.12%", "elapsed_time": "10m 56s", "remaining_time": "4h 14m 44s"}
|
| 85 |
+
{"loss": 0.54098392, "token_acc": 0.77158556, "grad_norm": 1.36165345, "learning_rate": 4.17e-06, "memory(GiB)": 49.79, "train_speed(iter/s)": 0.112744, "epoch": 0.04173355, "global_step/max_steps": "78/1869", "percentage": "4.17%", "elapsed_time": "11m 2s", "remaining_time": "4h 13m 27s"}
|
| 86 |
+
{"loss": 0.47549939, "token_acc": 0.85929108, "grad_norm": 1.3266083, "learning_rate": 4.22e-06, "memory(GiB)": 49.79, "train_speed(iter/s)": 0.113893, "epoch": 0.04226859, "global_step/max_steps": "79/1869", "percentage": "4.23%", "elapsed_time": "11m 4s", "remaining_time": "4h 10m 47s"}
|
| 87 |
+
{"loss": 0.58747751, "token_acc": 0.80721003, "grad_norm": 1.52579451, "learning_rate": 4.28e-06, "memory(GiB)": 49.79, "train_speed(iter/s)": 0.115065, "epoch": 0.04280364, "global_step/max_steps": "80/1869", "percentage": "4.28%", "elapsed_time": "11m 5s", "remaining_time": "4h 8m 7s"}
|
| 88 |
+
{"eval_loss": 0.56743366, "eval_token_acc": 0.81274731, "eval_runtime": 47.2509, "eval_samples_per_second": 9.778, "eval_steps_per_second": 1.227, "epoch": 0.04280364, "global_step/max_steps": "80/1869", "percentage": "4.28%", "elapsed_time": "11m 52s", "remaining_time": "4h 25m 44s"}
|
| 89 |
+
{"loss": 0.48316047, "token_acc": 0.81631618, "grad_norm": 1.04880619, "learning_rate": 4.33e-06, "memory(GiB)": 49.79, "train_speed(iter/s)": 0.106718, "epoch": 0.04333868, "global_step/max_steps": "81/1869", "percentage": "4.33%", "elapsed_time": "12m 9s", "remaining_time": "4h 28m 22s"}
|
| 90 |
+
{"loss": 0.62238801, "token_acc": 0.85031847, "grad_norm": 1.49701905, "learning_rate": 4.39e-06, "memory(GiB)": 49.79, "train_speed(iter/s)": 0.107587, "epoch": 0.04387373, "global_step/max_steps": "82/1869", "percentage": "4.39%", "elapsed_time": "12m 12s", "remaining_time": "4h 26m 6s"}
|
| 91 |
+
{"loss": 0.64975607, "token_acc": 0.81523096, "grad_norm": 2.14597869, "learning_rate": 4.44e-06, "memory(GiB)": 49.79, "train_speed(iter/s)": 0.108118, "epoch": 0.04440877, "global_step/max_steps": "83/1869", "percentage": "4.44%", "elapsed_time": "12m 18s", "remaining_time": "4h 24m 43s"}
|
| 92 |
+
{"loss": 0.68609977, "token_acc": 0.74051593, "grad_norm": 2.03061819, "learning_rate": 4.49e-06, "memory(GiB)": 49.79, "train_speed(iter/s)": 0.109072, "epoch": 0.04494382, "global_step/max_steps": "84/1869", "percentage": "4.49%", "elapsed_time": "12m 20s", "remaining_time": "4h 22m 18s"}
|
| 93 |
+
{"loss": 0.56360674, "token_acc": 0.91630901, "grad_norm": 1.38109756, "learning_rate": 4.55e-06, "memory(GiB)": 49.79, "train_speed(iter/s)": 0.109579, "epoch": 0.04547887, "global_step/max_steps": "85/1869", "percentage": "4.55%", "elapsed_time": "12m 26s", "remaining_time": "4h 21m 0s"}
|