Training in progress, step 20
Browse files- adapter_model.safetensors +1 -1
- logging.jsonl +11 -0
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1656903768
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:153f700b4e7a89ecc4c61cd01fbef40f1f6e1ed9f2dec533b2f28af4694e57d0
|
| 3 |
size 1656903768
|
logging.jsonl
CHANGED
|
@@ -9,3 +9,14 @@
|
|
| 9 |
{"loss": 4.33601952, "token_acc": 0.78835404, "grad_norm": 0.1694329, "learning_rate": 5.392e-05, "memory(GiB)": 91.74, "train_speed(iter/s)": 0.001237, "epoch": 1.0, "global_step/max_steps": "9/24", "percentage": "37.50%", "elapsed_time": "2h 1m 8s", "remaining_time": "3h 21m 53s"}
|
| 10 |
{"loss": 21.8188076, "token_acc": 0.79576805, "grad_norm": 0.14743451, "learning_rate": 4.954e-05, "memory(GiB)": 91.74, "train_speed(iter/s)": 0.001229, "epoch": 1.1221374, "global_step/max_steps": "10/24", "percentage": "41.67%", "elapsed_time": "2h 15m 30s", "remaining_time": "3h 9m 42s"}
|
| 11 |
{"eval_loss": 0.6696381, "eval_token_acc": 0.81154384, "eval_runtime": 21.8063, "eval_samples_per_second": 0.963, "eval_steps_per_second": 0.138, "epoch": 1.1221374, "global_step/max_steps": "10/24", "percentage": "41.67%", "elapsed_time": "2h 15m 52s", "remaining_time": "3h 10m 13s"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
{"loss": 4.33601952, "token_acc": 0.78835404, "grad_norm": 0.1694329, "learning_rate": 5.392e-05, "memory(GiB)": 91.74, "train_speed(iter/s)": 0.001237, "epoch": 1.0, "global_step/max_steps": "9/24", "percentage": "37.50%", "elapsed_time": "2h 1m 8s", "remaining_time": "3h 21m 53s"}
|
| 10 |
{"loss": 21.8188076, "token_acc": 0.79576805, "grad_norm": 0.14743451, "learning_rate": 4.954e-05, "memory(GiB)": 91.74, "train_speed(iter/s)": 0.001229, "epoch": 1.1221374, "global_step/max_steps": "10/24", "percentage": "41.67%", "elapsed_time": "2h 15m 30s", "remaining_time": "3h 9m 42s"}
|
| 11 |
{"eval_loss": 0.6696381, "eval_token_acc": 0.81154384, "eval_runtime": 21.8063, "eval_samples_per_second": 0.963, "eval_steps_per_second": 0.138, "epoch": 1.1221374, "global_step/max_steps": "10/24", "percentage": "41.67%", "elapsed_time": "2h 15m 52s", "remaining_time": "3h 10m 13s"}
|
| 12 |
+
{"loss": 21.68900299, "token_acc": 0.78968529, "grad_norm": 0.15730255, "learning_rate": 4.486e-05, "memory(GiB)": 91.74, "train_speed(iter/s)": 0.001213, "epoch": 1.24427481, "global_step/max_steps": "11/24", "percentage": "45.83%", "elapsed_time": "2h 31m 0s", "remaining_time": "2h 58m 27s"}
|
| 13 |
+
{"loss": 20.34348297, "token_acc": 0.81513798, "grad_norm": 0.14047588, "learning_rate": 3.998e-05, "memory(GiB)": 91.74, "train_speed(iter/s)": 0.001208, "epoch": 1.36641221, "global_step/max_steps": "12/24", "percentage": "50.00%", "elapsed_time": "2h 45m 29s", "remaining_time": "2h 45m 29s"}
|
| 14 |
+
{"loss": 19.69247437, "token_acc": 0.81367603, "grad_norm": 0.13805264, "learning_rate": 3.5e-05, "memory(GiB)": 91.74, "train_speed(iter/s)": 0.0012, "epoch": 1.48854962, "global_step/max_steps": "13/24", "percentage": "54.17%", "elapsed_time": "3h 0m 29s", "remaining_time": "2h 32m 43s"}
|
| 15 |
+
{"loss": 19.99150848, "token_acc": 0.82031954, "grad_norm": 0.13972557, "learning_rate": 3.002e-05, "memory(GiB)": 91.74, "train_speed(iter/s)": 0.001189, "epoch": 1.61068702, "global_step/max_steps": "14/24", "percentage": "58.33%", "elapsed_time": "3h 16m 2s", "remaining_time": "2h 20m 1s"}
|
| 16 |
+
{"loss": 19.0448761, "token_acc": 0.80773965, "grad_norm": 0.11615134, "learning_rate": 2.514e-05, "memory(GiB)": 91.74, "train_speed(iter/s)": 0.001188, "epoch": 1.73282443, "global_step/max_steps": "15/24", "percentage": "62.50%", "elapsed_time": "3h 30m 15s", "remaining_time": "2h 6m 9s"}
|
| 17 |
+
{"loss": 18.81958389, "token_acc": 0.82736064, "grad_norm": 0.10968279, "learning_rate": 2.046e-05, "memory(GiB)": 91.74, "train_speed(iter/s)": 0.001186, "epoch": 1.85496183, "global_step/max_steps": "16/24", "percentage": "66.67%", "elapsed_time": "3h 44m 38s", "remaining_time": "1h 52m 19s"}
|
| 18 |
+
{"loss": 18.56712723, "token_acc": 0.82723235, "grad_norm": 0.16789175, "learning_rate": 1.608e-05, "memory(GiB)": 91.74, "train_speed(iter/s)": 0.001181, "epoch": 1.97709924, "global_step/max_steps": "17/24", "percentage": "70.83%", "elapsed_time": "3h 59m 44s", "remaining_time": "1h 38m 43s"}
|
| 19 |
+
{"loss": 3.45807076, "token_acc": 0.83812147, "grad_norm": 0.16789175, "learning_rate": 1.208e-05, "memory(GiB)": 91.74, "train_speed(iter/s)": 0.001235, "epoch": 2.0, "global_step/max_steps": "18/24", "percentage": "75.00%", "elapsed_time": "4h 2m 42s", "remaining_time": "1h 20m 54s"}
|
| 20 |
+
{"loss": 18.17948341, "token_acc": 0.82936069, "grad_norm": 0.10841891, "learning_rate": 8.55e-06, "memory(GiB)": 91.74, "train_speed(iter/s)": 0.001229, "epoch": 2.1221374, "global_step/max_steps": "19/24", "percentage": "79.17%", "elapsed_time": "4h 17m 32s", "remaining_time": "1h 7m 46s"}
|
| 21 |
+
{"loss": 18.72079086, "token_acc": 0.81507356, "grad_norm": 0.10311155, "learning_rate": 5.56e-06, "memory(GiB)": 91.74, "train_speed(iter/s)": 0.001225, "epoch": 2.24427481, "global_step/max_steps": "20/24", "percentage": "83.33%", "elapsed_time": "4h 31m 57s", "remaining_time": "54m 23s"}
|
| 22 |
+
{"eval_loss": 0.5854618, "eval_token_acc": 0.82846674, "eval_runtime": 21.8067, "eval_samples_per_second": 0.963, "eval_steps_per_second": 0.138, "epoch": 2.24427481, "global_step/max_steps": "20/24", "percentage": "83.33%", "elapsed_time": "4h 32m 19s", "remaining_time": "54m 27s"}
|