Training in progress, step 90
Browse files- adapter_model.safetensors +1 -1
- logging.jsonl +27 -0
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13254157312
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9daec4de6747652e363708700f0ba908e472e9b4de6275557352a71286dc1166
|
| 3 |
size 13254157312
|
logging.jsonl
CHANGED
|
@@ -87,3 +87,30 @@
|
|
| 87 |
{"loss": 0.59575975, "token_acc": 0.77586207, "grad_norm": 2.97479558, "learning_rate": 4.28e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.025437, "epoch": 0.04280364, "global_step/max_steps": "80/1869", "percentage": "4.28%", "elapsed_time": "51m 59s", "remaining_time": "19h 22m 49s"}
|
| 88 |
{"eval_loss": 0.57795918, "eval_token_acc": 0.81181988, "eval_runtime": 230.2287, "eval_samples_per_second": 2.007, "eval_steps_per_second": 0.252, "epoch": 0.04280364, "global_step/max_steps": "80/1869", "percentage": "4.28%", "elapsed_time": "55m 50s", "remaining_time": "20h 48m 38s"}
|
| 89 |
{"loss": 0.48706263, "token_acc": 0.81560641, "grad_norm": 3.3198576, "learning_rate": 4.33e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.023465, "epoch": 0.04333868, "global_step/max_steps": "81/1869", "percentage": "4.33%", "elapsed_time": "57m 6s", "remaining_time": "21h 0m 45s"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 87 |
{"loss": 0.59575975, "token_acc": 0.77586207, "grad_norm": 2.97479558, "learning_rate": 4.28e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.025437, "epoch": 0.04280364, "global_step/max_steps": "80/1869", "percentage": "4.28%", "elapsed_time": "51m 59s", "remaining_time": "19h 22m 49s"}
|
| 88 |
{"eval_loss": 0.57795918, "eval_token_acc": 0.81181988, "eval_runtime": 230.2287, "eval_samples_per_second": 2.007, "eval_steps_per_second": 0.252, "epoch": 0.04280364, "global_step/max_steps": "80/1869", "percentage": "4.28%", "elapsed_time": "55m 50s", "remaining_time": "20h 48m 38s"}
|
| 89 |
{"loss": 0.48706263, "token_acc": 0.81560641, "grad_norm": 3.3198576, "learning_rate": 4.33e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.023465, "epoch": 0.04333868, "global_step/max_steps": "81/1869", "percentage": "4.33%", "elapsed_time": "57m 6s", "remaining_time": "21h 0m 45s"}
|
| 90 |
+
{"loss": 0.62418771, "token_acc": 0.8566879, "grad_norm": 3.29122806, "learning_rate": 4.39e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.023659, "epoch": 0.04387373, "global_step/max_steps": "82/1869", "percentage": "4.39%", "elapsed_time": "57m 20s", "remaining_time": "20h 49m 45s"}
|
| 91 |
+
{"loss": 0.61460698, "token_acc": 0.80149813, "grad_norm": 3.44209623, "learning_rate": 4.44e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.023735, "epoch": 0.04440877, "global_step/max_steps": "83/1869", "percentage": "4.44%", "elapsed_time": "57m 51s", "remaining_time": "20h 45m 8s"}
|
| 92 |
+
{"loss": 0.70045018, "token_acc": 0.74962064, "grad_norm": 3.83606768, "learning_rate": 4.49e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.023947, "epoch": 0.04494382, "global_step/max_steps": "84/1869", "percentage": "4.49%", "elapsed_time": "58m 2s", "remaining_time": "20h 33m 27s"}
|
| 93 |
+
{"loss": 0.58728027, "token_acc": 0.90450644, "grad_norm": 2.8820703, "learning_rate": 4.55e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.024034, "epoch": 0.04547887, "global_step/max_steps": "85/1869", "percentage": "4.55%", "elapsed_time": "58m 31s", "remaining_time": "20h 28m 23s"}
|
| 94 |
+
{"loss": 0.62150431, "token_acc": 0.83611533, "grad_norm": 3.53566265, "learning_rate": 4.6e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.024086, "epoch": 0.04601391, "global_step/max_steps": "86/1869", "percentage": "4.60%", "elapsed_time": "59m 5s", "remaining_time": "20h 25m 8s"}
|
| 95 |
+
{"loss": 0.53575307, "token_acc": 0.79455103, "grad_norm": 3.14529085, "learning_rate": 4.65e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.024135, "epoch": 0.04654896, "global_step/max_steps": "87/1869", "percentage": "4.65%", "elapsed_time": "59m 39s", "remaining_time": "20h 22m 2s"}
|
| 96 |
+
{"loss": 0.6705476, "token_acc": 0.83764706, "grad_norm": 2.86025071, "learning_rate": 4.71e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.024345, "epoch": 0.047084, "global_step/max_steps": "88/1869", "percentage": "4.71%", "elapsed_time": "59m 49s", "remaining_time": "20h 10m 49s"}
|
| 97 |
+
{"loss": 0.54670548, "token_acc": 0.87827557, "grad_norm": 3.45245957, "learning_rate": 4.76e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.024579, "epoch": 0.04761905, "global_step/max_steps": "89/1869", "percentage": "4.76%", "elapsed_time": "59m 55s", "remaining_time": "19h 58m 39s"}
|
| 98 |
+
{"loss": 0.55477643, "token_acc": 0.84027778, "grad_norm": 2.70059419, "learning_rate": 4.81e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.024696, "epoch": 0.04815409, "global_step/max_steps": "90/1869", "percentage": "4.82%", "elapsed_time": "1h 0m 19s", "remaining_time": "19h 52m 22s"}
|
| 99 |
+
{"eval_loss": 0.57087022, "eval_token_acc": 0.81361393, "eval_runtime": 230.2666, "eval_samples_per_second": 2.006, "eval_steps_per_second": 0.252, "epoch": 0.04815409, "global_step/max_steps": "90/1869", "percentage": "4.82%", "elapsed_time": "1h 4m 9s", "remaining_time": "21h 8m 14s"}
|
| 100 |
+
{"loss": 0.53591204, "token_acc": 0.81718168, "grad_norm": 3.41573358, "learning_rate": 4.87e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.02315, "epoch": 0.04868914, "global_step/max_steps": "91/1869", "percentage": "4.87%", "elapsed_time": "1h 5m 5s", "remaining_time": "21h 11m 53s"}
|
| 101 |
+
{"loss": 0.57939601, "token_acc": 0.78804348, "grad_norm": 3.22205687, "learning_rate": 4.92e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.023368, "epoch": 0.04922418, "global_step/max_steps": "92/1869", "percentage": "4.92%", "elapsed_time": "1h 5m 11s", "remaining_time": "20h 59m 19s"}
|
| 102 |
+
{"loss": 0.57385951, "token_acc": 0.83682771, "grad_norm": 3.16394401, "learning_rate": 4.97e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.023586, "epoch": 0.04975923, "global_step/max_steps": "93/1869", "percentage": "4.98%", "elapsed_time": "1h 5m 18s", "remaining_time": "20h 47m 2s"}
|
| 103 |
+
{"loss": 0.5800854, "token_acc": 0.81495872, "grad_norm": 3.31869483, "learning_rate": 5.03e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.023762, "epoch": 0.05029428, "global_step/max_steps": "94/1869", "percentage": "5.03%", "elapsed_time": "1h 5m 30s", "remaining_time": "20h 37m 7s"}
|
| 104 |
+
{"loss": 0.51432335, "token_acc": 0.82507289, "grad_norm": 4.43180466, "learning_rate": 5.08e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.023978, "epoch": 0.05082932, "global_step/max_steps": "95/1869", "percentage": "5.08%", "elapsed_time": "1h 5m 36s", "remaining_time": "20h 25m 16s"}
|
| 105 |
+
{"loss": 0.55478501, "token_acc": 0.77943615, "grad_norm": 3.0449295, "learning_rate": 5.13e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.024069, "epoch": 0.05136437, "global_step/max_steps": "96/1869", "percentage": "5.14%", "elapsed_time": "1h 6m 3s", "remaining_time": "20h 20m 0s"}
|
| 106 |
+
{"loss": 0.62661427, "token_acc": 0.76787955, "grad_norm": 6.60005713, "learning_rate": 5.19e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.024282, "epoch": 0.05189941, "global_step/max_steps": "97/1869", "percentage": "5.19%", "elapsed_time": "1h 6m 9s", "remaining_time": "20h 8m 39s"}
|
| 107 |
+
{"loss": 0.63851893, "token_acc": 0.84980237, "grad_norm": 3.27295399, "learning_rate": 5.24e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.024495, "epoch": 0.05243446, "global_step/max_steps": "98/1869", "percentage": "5.24%", "elapsed_time": "1h 6m 15s", "remaining_time": "19h 57m 29s"}
|
| 108 |
+
{"loss": 0.67096293, "token_acc": 0.7480315, "grad_norm": 3.94529891, "learning_rate": 5.29e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.024535, "epoch": 0.0529695, "global_step/max_steps": "99/1869", "percentage": "5.30%", "elapsed_time": "1h 6m 50s", "remaining_time": "19h 54m 53s"}
|
| 109 |
+
{"loss": 0.63782001, "token_acc": 0.81664099, "grad_norm": 3.11050892, "learning_rate": 5.35e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.024681, "epoch": 0.05350455, "global_step/max_steps": "100/1869", "percentage": "5.35%", "elapsed_time": "1h 7m 6s", "remaining_time": "19h 47m 11s"}
|
| 110 |
+
{"eval_loss": 0.56706554, "eval_token_acc": 0.81415815, "eval_runtime": 230.1885, "eval_samples_per_second": 2.007, "eval_steps_per_second": 0.252, "epoch": 0.05350455, "global_step/max_steps": "100/1869", "percentage": "5.35%", "elapsed_time": "1h 10m 56s", "remaining_time": "20h 55m 3s"}
|
| 111 |
+
{"loss": 0.59655845, "token_acc": 0.81839449, "grad_norm": 3.05500865, "learning_rate": 5.4e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.023317, "epoch": 0.05403959, "global_step/max_steps": "101/1869", "percentage": "5.40%", "elapsed_time": "1h 11m 46s", "remaining_time": "20h 56m 25s"}
|
| 112 |
+
{"loss": 0.59819782, "token_acc": 0.80311891, "grad_norm": 4.83508968, "learning_rate": 5.45e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.023427, "epoch": 0.05457464, "global_step/max_steps": "102/1869", "percentage": "5.46%", "elapsed_time": "1h 12m 8s", "remaining_time": "20h 49m 51s"}
|
| 113 |
+
{"loss": 0.5983628, "token_acc": 0.84854451, "grad_norm": 3.01287246, "learning_rate": 5.51e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.023539, "epoch": 0.05510968, "global_step/max_steps": "103/1869", "percentage": "5.51%", "elapsed_time": "1h 12m 30s", "remaining_time": "20h 43m 17s"}
|
| 114 |
+
{"loss": 0.5585193, "token_acc": 0.81392694, "grad_norm": 3.496773, "learning_rate": 5.56e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.023733, "epoch": 0.05564473, "global_step/max_steps": "104/1869", "percentage": "5.56%", "elapsed_time": "1h 12m 37s", "remaining_time": "20h 32m 24s"}
|
| 115 |
+
{"loss": 0.58505946, "token_acc": 0.87814313, "grad_norm": 3.46284056, "learning_rate": 5.61e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.023926, "epoch": 0.05617978, "global_step/max_steps": "105/1869", "percentage": "5.62%", "elapsed_time": "1h 12m 43s", "remaining_time": "20h 21m 47s"}
|
| 116 |
+
{"loss": 0.54000103, "token_acc": 0.90326633, "grad_norm": 3.30159211, "learning_rate": 5.67e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.024121, "epoch": 0.05671482, "global_step/max_steps": "106/1869", "percentage": "5.67%", "elapsed_time": "1h 12m 49s", "remaining_time": "20h 11m 12s"}
|