Training in progress, step 40
Browse files- adapter_model.safetensors +1 -1
- logging.jsonl +11 -0
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13254157312
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7b0d3c6fc7aa2e57da3d298cbb80eaa3c31d50160c0cf0d6b1cb544eebe74c08
|
| 3 |
size 13254157312
|
logging.jsonl
CHANGED
|
@@ -35,3 +35,14 @@
|
|
| 35 |
{"loss": 0.42956412, "token_acc": 0.85953068, "grad_norm": 0.06743568, "learning_rate": 9.62e-06, "memory(GiB)": 127.12, "train_speed(iter/s)": 0.007809, "epoch": 0.13697164, "global_step/max_steps": "32/233", "percentage": "13.73%", "elapsed_time": "1h 8m 11s", "remaining_time": "7h 8m 16s"}
|
| 36 |
{"loss": 0.42277759, "token_acc": 0.86786451, "grad_norm": 0.07200071, "learning_rate": 9.57e-06, "memory(GiB)": 127.12, "train_speed(iter/s)": 0.007834, "epoch": 0.14125201, "global_step/max_steps": "33/233", "percentage": "14.16%", "elapsed_time": "1h 10m 5s", "remaining_time": "7h 4m 50s"}
|
| 37 |
{"loss": 0.36818027, "token_acc": 0.84394063, "grad_norm": 0.07575763, "learning_rate": 9.52e-06, "memory(GiB)": 127.12, "train_speed(iter/s)": 0.007918, "epoch": 0.14553237, "global_step/max_steps": "34/233", "percentage": "14.59%", "elapsed_time": "1h 11m 27s", "remaining_time": "6h 58m 13s"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
{"loss": 0.42956412, "token_acc": 0.85953068, "grad_norm": 0.06743568, "learning_rate": 9.62e-06, "memory(GiB)": 127.12, "train_speed(iter/s)": 0.007809, "epoch": 0.13697164, "global_step/max_steps": "32/233", "percentage": "13.73%", "elapsed_time": "1h 8m 11s", "remaining_time": "7h 8m 16s"}
|
| 36 |
{"loss": 0.42277759, "token_acc": 0.86786451, "grad_norm": 0.07200071, "learning_rate": 9.57e-06, "memory(GiB)": 127.12, "train_speed(iter/s)": 0.007834, "epoch": 0.14125201, "global_step/max_steps": "33/233", "percentage": "14.16%", "elapsed_time": "1h 10m 5s", "remaining_time": "7h 4m 50s"}
|
| 37 |
{"loss": 0.36818027, "token_acc": 0.84394063, "grad_norm": 0.07575763, "learning_rate": 9.52e-06, "memory(GiB)": 127.12, "train_speed(iter/s)": 0.007918, "epoch": 0.14553237, "global_step/max_steps": "34/233", "percentage": "14.59%", "elapsed_time": "1h 11m 27s", "remaining_time": "6h 58m 13s"}
|
| 38 |
+
{"loss": 0.45469719, "token_acc": 0.91506725, "grad_norm": 0.09796806, "learning_rate": 9.47e-06, "memory(GiB)": 127.12, "train_speed(iter/s)": 0.00799, "epoch": 0.14981273, "global_step/max_steps": "35/233", "percentage": "15.02%", "elapsed_time": "1h 12m 54s", "remaining_time": "6h 52m 24s"}
|
| 39 |
+
{"loss": 0.41282824, "token_acc": 0.84009711, "grad_norm": 0.06105305, "learning_rate": 9.43e-06, "memory(GiB)": 127.12, "train_speed(iter/s)": 0.007899, "epoch": 0.1540931, "global_step/max_steps": "36/233", "percentage": "15.45%", "elapsed_time": "1h 15m 50s", "remaining_time": "6h 55m 2s"}
|
| 40 |
+
{"loss": 0.40039772, "token_acc": 0.8619093, "grad_norm": 0.06266245, "learning_rate": 9.38e-06, "memory(GiB)": 127.12, "train_speed(iter/s)": 0.007932, "epoch": 0.15837346, "global_step/max_steps": "37/233", "percentage": "15.88%", "elapsed_time": "1h 17m 37s", "remaining_time": "6h 51m 14s"}
|
| 41 |
+
{"loss": 0.38020378, "token_acc": 0.87047564, "grad_norm": 0.07132945, "learning_rate": 9.33e-06, "memory(GiB)": 127.12, "train_speed(iter/s)": 0.008009, "epoch": 0.16265383, "global_step/max_steps": "38/233", "percentage": "16.31%", "elapsed_time": "1h 18m 58s", "remaining_time": "6h 45m 13s"}
|
| 42 |
+
{"loss": 0.38574731, "token_acc": 0.89365221, "grad_norm": 0.06270806, "learning_rate": 9.28e-06, "memory(GiB)": 127.12, "train_speed(iter/s)": 0.008092, "epoch": 0.16693419, "global_step/max_steps": "39/233", "percentage": "16.74%", "elapsed_time": "1h 20m 12s", "remaining_time": "6h 39m 1s"}
|
| 43 |
+
{"loss": 0.42370051, "token_acc": 0.85167778, "grad_norm": 0.11293414, "learning_rate": 9.23e-06, "memory(GiB)": 127.12, "train_speed(iter/s)": 0.00813, "epoch": 0.17121455, "global_step/max_steps": "40/233", "percentage": "17.17%", "elapsed_time": "1h 21m 53s", "remaining_time": "6h 35m 6s"}
|
| 44 |
+
{"eval_loss": 0.3909784, "eval_token_acc": 0.85583679, "eval_runtime": 226.762, "eval_samples_per_second": 2.037, "eval_steps_per_second": 0.256, "epoch": 0.17121455, "global_step/max_steps": "40/233", "percentage": "17.17%", "elapsed_time": "1h 25m 40s", "remaining_time": "6h 53m 20s"}
|
| 45 |
+
{"loss": 0.40064281, "token_acc": 0.85875383, "grad_norm": 0.07210004, "learning_rate": 9.19e-06, "memory(GiB)": 127.12, "train_speed(iter/s)": 0.007676, "epoch": 0.17549492, "global_step/max_steps": "41/233", "percentage": "17.60%", "elapsed_time": "1h 28m 54s", "remaining_time": "6h 56m 20s"}
|
| 46 |
+
{"loss": 0.41132563, "token_acc": 0.86075278, "grad_norm": 0.13976674, "learning_rate": 9.14e-06, "memory(GiB)": 127.12, "train_speed(iter/s)": 0.007708, "epoch": 0.17977528, "global_step/max_steps": "42/233", "percentage": "18.03%", "elapsed_time": "1h 30m 42s", "remaining_time": "6h 52m 30s"}
|
| 47 |
+
{"loss": 0.41642624, "token_acc": 0.87107902, "grad_norm": 0.05876919, "learning_rate": 9.09e-06, "memory(GiB)": 127.12, "train_speed(iter/s)": 0.00774, "epoch": 0.18405564, "global_step/max_steps": "43/233", "percentage": "18.45%", "elapsed_time": "1h 32m 28s", "remaining_time": "6h 48m 37s"}
|
| 48 |
+
{"loss": 0.42396358, "token_acc": 0.85352622, "grad_norm": 0.05592595, "learning_rate": 9.04e-06, "memory(GiB)": 127.12, "train_speed(iter/s)": 0.007724, "epoch": 0.18833601, "global_step/max_steps": "44/233", "percentage": "18.88%", "elapsed_time": "1h 34m 49s", "remaining_time": "6h 47m 19s"}
|