Training in progress, step 70
Browse files- adapter_model.safetensors +1 -1
- logging.jsonl +16 -0
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13254157312
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9d221ba20ee2a8ee93a2960e5d42148dfd2e11c2c9c313cc04303032db01812d
|
| 3 |
size 13254157312
|
logging.jsonl
CHANGED
|
@@ -71,3 +71,19 @@
|
|
| 71 |
{"loss": 0.54550421, "token_acc": 0.87831325, "grad_norm": 4.10164118, "learning_rate": 3.48e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.024841, "epoch": 0.03477796, "global_step/max_steps": "65/1869", "percentage": "3.48%", "elapsed_time": "43m 11s", "remaining_time": "19h 58m 48s"}
|
| 72 |
{"loss": 0.53837061, "token_acc": 0.73560209, "grad_norm": 3.15438747, "learning_rate": 3.53e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.025137, "epoch": 0.035313, "global_step/max_steps": "66/1869", "percentage": "3.53%", "elapsed_time": "43m 20s", "remaining_time": "19h 44m 3s"}
|
| 73 |
{"loss": 0.5946157, "token_acc": 0.73014257, "grad_norm": 3.01018405, "learning_rate": 3.58e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.025458, "epoch": 0.03584805, "global_step/max_steps": "67/1869", "percentage": "3.58%", "elapsed_time": "43m 26s", "remaining_time": "19h 28m 30s"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 71 |
{"loss": 0.54550421, "token_acc": 0.87831325, "grad_norm": 4.10164118, "learning_rate": 3.48e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.024841, "epoch": 0.03477796, "global_step/max_steps": "65/1869", "percentage": "3.48%", "elapsed_time": "43m 11s", "remaining_time": "19h 58m 48s"}
|
| 72 |
{"loss": 0.53837061, "token_acc": 0.73560209, "grad_norm": 3.15438747, "learning_rate": 3.53e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.025137, "epoch": 0.035313, "global_step/max_steps": "66/1869", "percentage": "3.53%", "elapsed_time": "43m 20s", "remaining_time": "19h 44m 3s"}
|
| 73 |
{"loss": 0.5946157, "token_acc": 0.73014257, "grad_norm": 3.01018405, "learning_rate": 3.58e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.025458, "epoch": 0.03584805, "global_step/max_steps": "67/1869", "percentage": "3.58%", "elapsed_time": "43m 26s", "remaining_time": "19h 28m 30s"}
|
| 74 |
+
{"loss": 0.5353893, "token_acc": 0.7915493, "grad_norm": 3.2664516, "learning_rate": 3.64e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.0255, "epoch": 0.03638309, "global_step/max_steps": "68/1869", "percentage": "3.64%", "elapsed_time": "44m 1s", "remaining_time": "19h 26m 4s"}
|
| 75 |
+
{"loss": 0.49811623, "token_acc": 0.80331754, "grad_norm": 3.16521215, "learning_rate": 3.69e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.025785, "epoch": 0.03691814, "global_step/max_steps": "69/1869", "percentage": "3.69%", "elapsed_time": "44m 10s", "remaining_time": "19h 12m 34s"}
|
| 76 |
+
{"loss": 0.6232444, "token_acc": 0.84210526, "grad_norm": 3.16659331, "learning_rate": 3.74e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.0261, "epoch": 0.03745318, "global_step/max_steps": "70/1869", "percentage": "3.75%", "elapsed_time": "44m 17s", "remaining_time": "18h 58m 5s"}
|
| 77 |
+
{"eval_loss": 0.58388162, "eval_token_acc": 0.81062614, "eval_runtime": 229.9747, "eval_samples_per_second": 2.009, "eval_steps_per_second": 0.252, "epoch": 0.03745318, "global_step/max_steps": "70/1869", "percentage": "3.75%", "elapsed_time": "48m 7s", "remaining_time": "20h 36m 35s"}
|
| 78 |
+
{"loss": 0.62807226, "token_acc": 0.81581746, "grad_norm": 3.77503753, "learning_rate": 3.8e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.023742, "epoch": 0.03798823, "global_step/max_steps": "71/1869", "percentage": "3.80%", "elapsed_time": "49m 25s", "remaining_time": "20h 51m 37s"}
|
| 79 |
+
{"loss": 0.5570327, "token_acc": 0.76814988, "grad_norm": 3.81446433, "learning_rate": 3.85e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.024026, "epoch": 0.03852327, "global_step/max_steps": "72/1869", "percentage": "3.85%", "elapsed_time": "49m 31s", "remaining_time": "20h 36m 9s"}
|
| 80 |
+
{"loss": 0.54348046, "token_acc": 0.90006752, "grad_norm": 2.7051847, "learning_rate": 3.9e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.02423, "epoch": 0.03905832, "global_step/max_steps": "73/1869", "percentage": "3.91%", "elapsed_time": "49m 47s", "remaining_time": "20h 25m 8s"}
|
| 81 |
+
{"loss": 0.59506869, "token_acc": 0.82584712, "grad_norm": 3.04290962, "learning_rate": 3.96e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.024288, "epoch": 0.03959337, "global_step/max_steps": "74/1869", "percentage": "3.96%", "elapsed_time": "50m 21s", "remaining_time": "20h 21m 38s"}
|
| 82 |
+
{"loss": 0.52274609, "token_acc": 0.85326087, "grad_norm": 4.03503895, "learning_rate": 4.01e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.024488, "epoch": 0.04012841, "global_step/max_steps": "75/1869", "percentage": "4.01%", "elapsed_time": "50m 37s", "remaining_time": "20h 11m 2s"}
|
| 83 |
+
{"loss": 0.58080888, "token_acc": 0.81431005, "grad_norm": 3.52183199, "learning_rate": 4.06e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.024561, "epoch": 0.04066346, "global_step/max_steps": "76/1869", "percentage": "4.07%", "elapsed_time": "51m 9s", "remaining_time": "20h 6m 52s"}
|
| 84 |
+
{"loss": 0.53793788, "token_acc": 0.8178025, "grad_norm": 3.96653628, "learning_rate": 4.12e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.024815, "epoch": 0.0411985, "global_step/max_steps": "77/1869", "percentage": "4.12%", "elapsed_time": "51m 17s", "remaining_time": "19h 53m 53s"}
|
| 85 |
+
{"loss": 0.51677012, "token_acc": 0.80407911, "grad_norm": 3.84334373, "learning_rate": 4.17e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.024912, "epoch": 0.04173355, "global_step/max_steps": "78/1869", "percentage": "4.17%", "elapsed_time": "51m 45s", "remaining_time": "19h 48m 37s"}
|
| 86 |
+
{"loss": 0.47754866, "token_acc": 0.84130435, "grad_norm": 3.02204871, "learning_rate": 4.22e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.025169, "epoch": 0.04226859, "global_step/max_steps": "79/1869", "percentage": "4.23%", "elapsed_time": "51m 53s", "remaining_time": "19h 35m 53s"}
|
| 87 |
+
{"loss": 0.59575975, "token_acc": 0.77586207, "grad_norm": 2.97479558, "learning_rate": 4.28e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.025437, "epoch": 0.04280364, "global_step/max_steps": "80/1869", "percentage": "4.28%", "elapsed_time": "51m 59s", "remaining_time": "19h 22m 49s"}
|
| 88 |
+
{"eval_loss": 0.57795918, "eval_token_acc": 0.81181988, "eval_runtime": 230.2287, "eval_samples_per_second": 2.007, "eval_steps_per_second": 0.252, "epoch": 0.04280364, "global_step/max_steps": "80/1869", "percentage": "4.28%", "elapsed_time": "55m 50s", "remaining_time": "20h 48m 38s"}
|
| 89 |
+
{"loss": 0.48706263, "token_acc": 0.81560641, "grad_norm": 3.3198576, "learning_rate": 4.33e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.023465, "epoch": 0.04333868, "global_step/max_steps": "81/1869", "percentage": "4.33%", "elapsed_time": "57m 6s", "remaining_time": "21h 0m 45s"}
|