Training in progress, step 30
Browse files- adapter_model.safetensors +1 -1
- logging.jsonl +30 -0
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13254157312
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3b05019bdbc9ceed3cfef082003c5c2def393313ad289d6425d7111cb69d84bf
|
| 3 |
size 13254157312
|
logging.jsonl
CHANGED
|
@@ -22,3 +22,33 @@
|
|
| 22 |
{"eval_loss": 0.73671097, "eval_token_acc": 0.79061684, "eval_runtime": 230.7649, "eval_samples_per_second": 2.002, "eval_steps_per_second": 0.251, "epoch": 0.01070091, "global_step/max_steps": "20/1869", "percentage": "1.07%", "elapsed_time": "14m 11s", "remaining_time": "21h 51m 58s"}
|
| 23 |
{"loss": 0.71632719, "token_acc": 0.79510949, "grad_norm": 5.31815338, "learning_rate": 1.12e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.022449, "epoch": 0.01123596, "global_step/max_steps": "21/1869", "percentage": "1.12%", "elapsed_time": "15m 10s", "remaining_time": "22h 15m 19s"}
|
| 24 |
{"loss": 0.75135165, "token_acc": 0.77522936, "grad_norm": 4.08145428, "learning_rate": 1.18e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.022744, "epoch": 0.011771, "global_step/max_steps": "22/1869", "percentage": "1.18%", "elapsed_time": "15m 42s", "remaining_time": "21h 58m 26s"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
{"eval_loss": 0.73671097, "eval_token_acc": 0.79061684, "eval_runtime": 230.7649, "eval_samples_per_second": 2.002, "eval_steps_per_second": 0.251, "epoch": 0.01070091, "global_step/max_steps": "20/1869", "percentage": "1.07%", "elapsed_time": "14m 11s", "remaining_time": "21h 51m 58s"}
|
| 23 |
{"loss": 0.71632719, "token_acc": 0.79510949, "grad_norm": 5.31815338, "learning_rate": 1.12e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.022449, "epoch": 0.01123596, "global_step/max_steps": "21/1869", "percentage": "1.12%", "elapsed_time": "15m 10s", "remaining_time": "22h 15m 19s"}
|
| 24 |
{"loss": 0.75135165, "token_acc": 0.77522936, "grad_norm": 4.08145428, "learning_rate": 1.18e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.022744, "epoch": 0.011771, "global_step/max_steps": "22/1869", "percentage": "1.18%", "elapsed_time": "15m 42s", "remaining_time": "21h 58m 26s"}
|
| 25 |
+
{"loss": 0.70510459, "token_acc": 0.78542155, "grad_norm": 3.34494901, "learning_rate": 1.23e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.023061, "epoch": 0.01230605, "global_step/max_steps": "23/1869", "percentage": "1.23%", "elapsed_time": "16m 12s", "remaining_time": "21h 40m 41s"}
|
| 26 |
+
{"loss": 0.58708155, "token_acc": 0.89411765, "grad_norm": 4.94634724, "learning_rate": 1.28e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.023844, "epoch": 0.01284109, "global_step/max_steps": "24/1869", "percentage": "1.28%", "elapsed_time": "16m 21s", "remaining_time": "20h 57m 34s"}
|
| 27 |
+
{"loss": 0.69591022, "token_acc": 0.80592105, "grad_norm": 5.5039525, "learning_rate": 1.34e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.024497, "epoch": 0.01337614, "global_step/max_steps": "25/1869", "percentage": "1.34%", "elapsed_time": "16m 35s", "remaining_time": "20h 23m 49s"}
|
| 28 |
+
{"loss": 0.67804903, "token_acc": 0.81148564, "grad_norm": 3.53921056, "learning_rate": 1.39e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.025329, "epoch": 0.01391118, "global_step/max_steps": "26/1869", "percentage": "1.39%", "elapsed_time": "16m 41s", "remaining_time": "19h 43m 9s"}
|
| 29 |
+
{"loss": 0.59909678, "token_acc": 0.84777518, "grad_norm": 2.86513925, "learning_rate": 1.44e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.02614, "epoch": 0.01444623, "global_step/max_steps": "27/1869", "percentage": "1.44%", "elapsed_time": "16m 47s", "remaining_time": "19h 6m 0s"}
|
| 30 |
+
{"loss": 0.61625493, "token_acc": 0.87399236, "grad_norm": 4.2735076, "learning_rate": 1.5e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.026501, "epoch": 0.01498127, "global_step/max_steps": "28/1869", "percentage": "1.50%", "elapsed_time": "17m 11s", "remaining_time": "18h 50m 24s"}
|
| 31 |
+
{"loss": 0.62591362, "token_acc": 0.79508197, "grad_norm": 3.5549407, "learning_rate": 1.55e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.027282, "epoch": 0.01551632, "global_step/max_steps": "29/1869", "percentage": "1.55%", "elapsed_time": "17m 17s", "remaining_time": "18h 17m 37s"}
|
| 32 |
+
{"loss": 0.58011794, "token_acc": 0.79526227, "grad_norm": 3.10110259, "learning_rate": 1.6e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.028066, "epoch": 0.01605136, "global_step/max_steps": "30/1869", "percentage": "1.61%", "elapsed_time": "17m 23s", "remaining_time": "17h 46m 30s"}
|
| 33 |
+
{"eval_loss": 0.65177089, "eval_token_acc": 0.79980309, "eval_runtime": 230.3026, "eval_samples_per_second": 2.006, "eval_steps_per_second": 0.252, "epoch": 0.01605136, "global_step/max_steps": "30/1869", "percentage": "1.61%", "elapsed_time": "21m 14s", "remaining_time": "21h 41m 47s"}
|
| 34 |
+
{"loss": 0.67728925, "token_acc": 0.8055818, "grad_norm": 3.54144835, "learning_rate": 1.66e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.022548, "epoch": 0.01658641, "global_step/max_steps": "31/1869", "percentage": "1.66%", "elapsed_time": "22m 29s", "remaining_time": "22h 13m 53s"}
|
| 35 |
+
{"loss": 0.64033484, "token_acc": 0.80456949, "grad_norm": 3.63276386, "learning_rate": 1.71e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.022864, "epoch": 0.01712146, "global_step/max_steps": "32/1869", "percentage": "1.71%", "elapsed_time": "22m 54s", "remaining_time": "21h 55m 9s"}
|
| 36 |
+
{"loss": 0.59276545, "token_acc": 0.81354051, "grad_norm": 3.55099034, "learning_rate": 1.76e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.023474, "epoch": 0.0176565, "global_step/max_steps": "33/1869", "percentage": "1.77%", "elapsed_time": "23m 0s", "remaining_time": "21h 20m 22s"}
|
| 37 |
+
{"loss": 0.68321669, "token_acc": 0.77755906, "grad_norm": 3.37234855, "learning_rate": 1.82e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.024078, "epoch": 0.01819155, "global_step/max_steps": "34/1869", "percentage": "1.82%", "elapsed_time": "23m 7s", "remaining_time": "20h 47m 41s"}
|
| 38 |
+
{"loss": 0.66526508, "token_acc": 0.79340278, "grad_norm": 3.60016561, "learning_rate": 1.87e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.024627, "epoch": 0.01872659, "global_step/max_steps": "35/1869", "percentage": "1.87%", "elapsed_time": "23m 16s", "remaining_time": "20h 19m 21s"}
|
| 39 |
+
{"loss": 0.69026184, "token_acc": 0.85021097, "grad_norm": 3.26779056, "learning_rate": 1.93e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.02513, "epoch": 0.01926164, "global_step/max_steps": "36/1869", "percentage": "1.93%", "elapsed_time": "23m 27s", "remaining_time": "19h 54m 28s"}
|
| 40 |
+
{"loss": 0.52923977, "token_acc": 0.81941748, "grad_norm": 2.91604924, "learning_rate": 1.98e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.025709, "epoch": 0.01979668, "global_step/max_steps": "37/1869", "percentage": "1.98%", "elapsed_time": "23m 34s", "remaining_time": "19h 26m 59s"}
|
| 41 |
+
{"loss": 0.59180516, "token_acc": 0.89078156, "grad_norm": 3.15961385, "learning_rate": 2.03e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.026141, "epoch": 0.02033173, "global_step/max_steps": "38/1869", "percentage": "2.03%", "elapsed_time": "23m 48s", "remaining_time": "19h 7m 18s"}
|
| 42 |
+
{"loss": 0.63943875, "token_acc": 0.75691134, "grad_norm": 4.54194307, "learning_rate": 2.09e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.026649, "epoch": 0.02086677, "global_step/max_steps": "39/1869", "percentage": "2.09%", "elapsed_time": "23m 58s", "remaining_time": "18h 44m 55s"}
|
| 43 |
+
{"loss": 0.57443136, "token_acc": 0.75711382, "grad_norm": 3.56686258, "learning_rate": 2.14e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.027218, "epoch": 0.02140182, "global_step/max_steps": "40/1869", "percentage": "2.14%", "elapsed_time": "24m 4s", "remaining_time": "18h 20m 53s"}
|
| 44 |
+
{"eval_loss": 0.61997586, "eval_token_acc": 0.80398873, "eval_runtime": 230.319, "eval_samples_per_second": 2.006, "eval_steps_per_second": 0.252, "epoch": 0.02140182, "global_step/max_steps": "40/1869", "percentage": "2.14%", "elapsed_time": "27m 54s", "remaining_time": "21h 16m 25s"}
|
| 45 |
+
{"loss": 0.5590893, "token_acc": 0.80868182, "grad_norm": 3.27547669, "learning_rate": 2.19e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.023291, "epoch": 0.02193686, "global_step/max_steps": "41/1869", "percentage": "2.19%", "elapsed_time": "28m 55s", "remaining_time": "21h 29m 31s"}
|
| 46 |
+
{"loss": 0.58981824, "token_acc": 0.77457265, "grad_norm": 3.15756106, "learning_rate": 2.25e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.023774, "epoch": 0.02247191, "global_step/max_steps": "42/1869", "percentage": "2.25%", "elapsed_time": "29m 1s", "remaining_time": "21h 2m 39s"}
|
| 47 |
+
{"loss": 0.57524711, "token_acc": 0.83855422, "grad_norm": 4.17729044, "learning_rate": 2.3e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.02426, "epoch": 0.02300696, "global_step/max_steps": "43/1869", "percentage": "2.30%", "elapsed_time": "29m 7s", "remaining_time": "20h 36m 44s"}
|
| 48 |
+
{"loss": 0.63644284, "token_acc": 0.81646274, "grad_norm": 2.62428689, "learning_rate": 2.35e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.024434, "epoch": 0.023542, "global_step/max_steps": "44/1869", "percentage": "2.35%", "elapsed_time": "29m 35s", "remaining_time": "20h 27m 32s"}
|
| 49 |
+
{"loss": 0.64309716, "token_acc": 0.83072917, "grad_norm": 2.948915, "learning_rate": 2.41e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.024905, "epoch": 0.02407705, "global_step/max_steps": "45/1869", "percentage": "2.41%", "elapsed_time": "29m 41s", "remaining_time": "20h 3m 44s"}
|
| 50 |
+
{"loss": 0.5813536, "token_acc": 0.86121392, "grad_norm": 3.02264547, "learning_rate": 2.46e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.025314, "epoch": 0.02461209, "global_step/max_steps": "46/1869", "percentage": "2.46%", "elapsed_time": "29m 52s", "remaining_time": "19h 43m 42s"}
|
| 51 |
+
{"loss": 0.67458242, "token_acc": 0.84415584, "grad_norm": 4.08577394, "learning_rate": 2.51e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.025777, "epoch": 0.02514714, "global_step/max_steps": "47/1869", "percentage": "2.51%", "elapsed_time": "29m 58s", "remaining_time": "19h 21m 52s"}
|
| 52 |
+
{"loss": 0.55463028, "token_acc": 0.79433368, "grad_norm": 3.79704332, "learning_rate": 2.57e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.02624, "epoch": 0.02568218, "global_step/max_steps": "48/1869", "percentage": "2.57%", "elapsed_time": "30m 4s", "remaining_time": "19h 0m 49s"}
|
| 53 |
+
{"loss": 0.62436664, "token_acc": 0.7584, "grad_norm": 3.25214219, "learning_rate": 2.62e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.026392, "epoch": 0.02621723, "global_step/max_steps": "49/1869", "percentage": "2.62%", "elapsed_time": "30m 31s", "remaining_time": "18h 53m 49s"}
|
| 54 |
+
{"loss": 0.53918386, "token_acc": 0.90425532, "grad_norm": 4.26714802, "learning_rate": 2.67e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.026844, "epoch": 0.02675227, "global_step/max_steps": "50/1869", "percentage": "2.68%", "elapsed_time": "30m 37s", "remaining_time": "18h 34m 11s"}
|