Training in progress, step 24
Browse files- adapter_model.safetensors +1 -1
- logging.jsonl +6 -0
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1656903768
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e5cee0c5eeba1913260ee42fd4ec8e30a11ba1fb82b8cda5feb8506174c411ef
|
| 3 |
size 1656903768
|
logging.jsonl
CHANGED
|
@@ -10,3 +10,9 @@
|
|
| 10 |
{"loss": 13.74881935, "token_acc": 0.87061663, "grad_norm": 0.25552696, "learning_rate": 1.221e-05, "memory(GiB)": 90.55, "train_speed(iter/s)": 0.002367, "epoch": 2.36641221, "global_step/max_steps": "19/24", "percentage": "79.17%", "elapsed_time": "2h 13m 32s", "remaining_time": "35m 8s"}
|
| 11 |
{"loss": 13.37627411, "token_acc": 0.86427258, "grad_norm": 0.18122235, "learning_rate": 7.94e-06, "memory(GiB)": 90.55, "train_speed(iter/s)": 0.002256, "epoch": 2.48854962, "global_step/max_steps": "20/24", "percentage": "83.33%", "elapsed_time": "2h 27m 29s", "remaining_time": "29m 29s"}
|
| 12 |
{"eval_loss": 0.44094244, "eval_token_acc": 0.86080157, "eval_runtime": 21.4644, "eval_samples_per_second": 0.978, "eval_steps_per_second": 0.14, "epoch": 2.48854962, "global_step/max_steps": "20/24", "percentage": "83.33%", "elapsed_time": "2h 27m 51s", "remaining_time": "29m 34s"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
{"loss": 13.74881935, "token_acc": 0.87061663, "grad_norm": 0.25552696, "learning_rate": 1.221e-05, "memory(GiB)": 90.55, "train_speed(iter/s)": 0.002367, "epoch": 2.36641221, "global_step/max_steps": "19/24", "percentage": "79.17%", "elapsed_time": "2h 13m 32s", "remaining_time": "35m 8s"}
|
| 11 |
{"loss": 13.37627411, "token_acc": 0.86427258, "grad_norm": 0.18122235, "learning_rate": 7.94e-06, "memory(GiB)": 90.55, "train_speed(iter/s)": 0.002256, "epoch": 2.48854962, "global_step/max_steps": "20/24", "percentage": "83.33%", "elapsed_time": "2h 27m 29s", "remaining_time": "29m 29s"}
|
| 12 |
{"eval_loss": 0.44094244, "eval_token_acc": 0.86080157, "eval_runtime": 21.4644, "eval_samples_per_second": 0.978, "eval_steps_per_second": 0.14, "epoch": 2.48854962, "global_step/max_steps": "20/24", "percentage": "83.33%", "elapsed_time": "2h 27m 51s", "remaining_time": "29m 34s"}
|
| 13 |
+
{"loss": 13.05256462, "token_acc": 0.86702151, "grad_norm": 0.39791853, "learning_rate": 4.52e-06, "memory(GiB)": 90.55, "train_speed(iter/s)": 0.002147, "epoch": 2.61068702, "global_step/max_steps": "21/24", "percentage": "87.50%", "elapsed_time": "2h 42m 48s", "remaining_time": "23m 15s"}
|
| 14 |
+
{"loss": 13.70001602, "token_acc": 0.86057486, "grad_norm": 0.19750436, "learning_rate": 2.03e-06, "memory(GiB)": 90.55, "train_speed(iter/s)": 0.002065, "epoch": 2.73282443, "global_step/max_steps": "22/24", "percentage": "91.67%", "elapsed_time": "2h 57m 18s", "remaining_time": "16m 7s"}
|
| 15 |
+
{"loss": 13.73007965, "token_acc": 0.87098726, "grad_norm": 0.20478964, "learning_rate": 5.1e-07, "memory(GiB)": 90.55, "train_speed(iter/s)": 0.002001, "epoch": 2.85496183, "global_step/max_steps": "23/24", "percentage": "95.83%", "elapsed_time": "3h 11m 20s", "remaining_time": "8m 19s"}
|
| 16 |
+
{"loss": 13.49084854, "token_acc": 0.860602, "grad_norm": 0.25566108, "learning_rate": 0.0, "memory(GiB)": 90.55, "train_speed(iter/s)": 0.001939, "epoch": 2.97709924, "global_step/max_steps": "24/24", "percentage": "100.00%", "elapsed_time": "3h 26m 5s", "remaining_time": "0s"}
|
| 17 |
+
{"eval_loss": 0.43799019, "eval_token_acc": 0.86136819, "eval_runtime": 21.4221, "eval_samples_per_second": 0.98, "eval_steps_per_second": 0.14, "epoch": 2.97709924, "global_step/max_steps": "24/24", "percentage": "100.00%", "elapsed_time": "3h 26m 27s", "remaining_time": "0s"}
|
| 18 |
+
{"train_runtime": 12404.4959, "train_samples_per_second": 0.506, "train_steps_per_second": 0.002, "total_flos": 5989836745867264.0, "train_loss": 8.35616481, "epoch": 2.97709924, "global_step/max_steps": "24/24", "percentage": "100.00%", "elapsed_time": "3h 26m 38s", "remaining_time": "0s"}
|