Training in progress, step 500
Browse files- model-00001-of-00004.safetensors +1 -1
- model-00002-of-00004.safetensors +1 -1
- model-00003-of-00004.safetensors +1 -1
- model-00004-of-00004.safetensors +1 -1
- trainer_log.jsonl +60 -180
- training_args.bin +1 -1
model-00001-of-00004.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4976698672
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6d27713a849508d3f1b9a4c10cc9589c5fc54465ecd775765923e630ffad8902
|
| 3 |
size 4976698672
|
model-00002-of-00004.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4999802720
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c2c9f5d1d300b5215f2803433565807c7a6bc60ea4ceaae80b81ed0a1cb000ae
|
| 3 |
size 4999802720
|
model-00003-of-00004.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4915916176
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:18521e5ec62ead98eba12a153c51cadfe3eb919328406b688d06b22a081cebff
|
| 3 |
size 4915916176
|
model-00004-of-00004.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1168138808
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:67efdac01d671f49e02d8c72f4fe6fdd783d34109c8394b3163297d9cdf28710
|
| 3 |
size 1168138808
|
trainer_log.jsonl
CHANGED
|
@@ -1,180 +1,60 @@
|
|
| 1 |
-
{"current_steps": 10, "total_steps": 3751, "loss": 0.
|
| 2 |
-
{"current_steps": 20, "total_steps": 3751, "loss": 0.
|
| 3 |
-
{"current_steps": 30, "total_steps": 3751, "loss": 0.
|
| 4 |
-
{"current_steps": 40, "total_steps": 3751, "loss": 0.3824, "lr": 1.0638297872340427e-06, "epoch": 0.010663645300498192, "percentage": 1.07, "elapsed_time": "0:09:
|
| 5 |
-
{"current_steps": 50, "total_steps": 3751, "loss": 0.
|
| 6 |
-
{"current_steps": 50, "total_steps": 3751, "eval_loss": 0.
|
| 7 |
-
{"current_steps": 60, "total_steps": 3751, "loss": 0.
|
| 8 |
-
{"current_steps": 70, "total_steps": 3751, "loss": 0.3327, "lr": 1.8617021276595745e-06, "epoch": 0.018661379275871838, "percentage": 1.87, "elapsed_time": "0:20:
|
| 9 |
-
{"current_steps": 80, "total_steps": 3751, "loss": 0.
|
| 10 |
-
{"current_steps": 90, "total_steps": 3751, "loss": 0.306, "lr": 2.393617021276596e-06, "epoch": 0.023993201926120932, "percentage": 2.4, "elapsed_time": "0:25:
|
| 11 |
-
{"current_steps": 100, "total_steps": 3751, "loss": 0.
|
| 12 |
-
{"current_steps": 100, "total_steps": 3751, "eval_loss": 0.
|
| 13 |
-
{"current_steps": 110, "total_steps": 3751, "loss": 0.
|
| 14 |
-
{"current_steps": 120, "total_steps": 3751, "loss": 0.
|
| 15 |
-
{"current_steps": 130, "total_steps": 3751, "loss": 0.
|
| 16 |
-
{"current_steps": 140, "total_steps": 3751, "loss": 0.
|
| 17 |
-
{"current_steps": 150, "total_steps": 3751, "loss": 0.
|
| 18 |
-
{"current_steps": 150, "total_steps": 3751, "eval_loss": 0.
|
| 19 |
-
{"current_steps": 160, "total_steps": 3751, "loss": 0.
|
| 20 |
-
{"current_steps": 170, "total_steps": 3751, "loss": 0.
|
| 21 |
-
{"current_steps": 180, "total_steps": 3751, "loss": 0.
|
| 22 |
-
{"current_steps": 190, "total_steps": 3751, "loss": 0.
|
| 23 |
-
{"current_steps": 200, "total_steps": 3751, "loss": 0.
|
| 24 |
-
{"current_steps": 200, "total_steps": 3751, "eval_loss": 0.
|
| 25 |
-
{"current_steps": 210, "total_steps": 3751, "loss": 0.
|
| 26 |
-
{"current_steps": 220, "total_steps": 3751, "loss": 0.
|
| 27 |
-
{"current_steps": 230, "total_steps": 3751, "loss": 0.
|
| 28 |
-
{"current_steps": 240, "total_steps": 3751, "loss": 0.
|
| 29 |
-
{"current_steps": 250, "total_steps": 3751, "loss": 0.
|
| 30 |
-
{"current_steps": 250, "total_steps": 3751, "eval_loss": 0.
|
| 31 |
-
{"current_steps": 260, "total_steps": 3751, "loss": 0.
|
| 32 |
-
{"current_steps": 270, "total_steps": 3751, "loss": 0.
|
| 33 |
-
{"current_steps": 280, "total_steps": 3751, "loss": 0.
|
| 34 |
-
{"current_steps": 290, "total_steps": 3751, "loss": 0.
|
| 35 |
-
{"current_steps": 300, "total_steps": 3751, "loss": 0.
|
| 36 |
-
{"current_steps": 300, "total_steps": 3751, "eval_loss": 0.
|
| 37 |
-
{"current_steps": 310, "total_steps": 3751, "loss": 0.
|
| 38 |
-
{"current_steps": 320, "total_steps": 3751, "loss": 0.
|
| 39 |
-
{"current_steps": 330, "total_steps": 3751, "loss": 0.
|
| 40 |
-
{"current_steps": 340, "total_steps": 3751, "loss": 0.
|
| 41 |
-
{"current_steps": 350, "total_steps": 3751, "loss": 0.
|
| 42 |
-
{"current_steps": 350, "total_steps": 3751, "eval_loss": 0.
|
| 43 |
-
{"current_steps": 360, "total_steps": 3751, "loss": 0.
|
| 44 |
-
{"current_steps": 370, "total_steps": 3751, "loss": 0.
|
| 45 |
-
{"current_steps": 380, "total_steps": 3751, "loss": 0.
|
| 46 |
-
{"current_steps": 390, "total_steps": 3751, "loss": 0.
|
| 47 |
-
{"current_steps": 400, "total_steps": 3751, "loss": 0.
|
| 48 |
-
{"current_steps": 400, "total_steps": 3751, "eval_loss": 0.
|
| 49 |
-
{"current_steps": 410, "total_steps": 3751, "loss": 0.
|
| 50 |
-
{"current_steps": 420, "total_steps": 3751, "loss": 0.
|
| 51 |
-
{"current_steps": 430, "total_steps": 3751, "loss": 0.
|
| 52 |
-
{"current_steps": 440, "total_steps": 3751, "loss": 0.
|
| 53 |
-
{"current_steps": 450, "total_steps": 3751, "loss": 0.
|
| 54 |
-
{"current_steps": 450, "total_steps": 3751, "eval_loss": 0.
|
| 55 |
-
{"current_steps": 460, "total_steps": 3751, "loss": 0.
|
| 56 |
-
{"current_steps": 470, "total_steps": 3751, "loss": 0.
|
| 57 |
-
{"current_steps": 480, "total_steps": 3751, "loss": 0.
|
| 58 |
-
{"current_steps": 490, "total_steps": 3751, "loss": 0.
|
| 59 |
-
{"current_steps": 500, "total_steps": 3751, "loss": 0.
|
| 60 |
-
{"current_steps": 500, "total_steps": 3751, "eval_loss": 0.
|
| 61 |
-
{"current_steps": 510, "total_steps": 3751, "loss": 0.3037, "lr": 9.961154684757636e-06, "epoch": 0.13596147758135196, "percentage": 13.6, "elapsed_time": "2:43:08", "remaining_time": "17:16:42"}
|
| 62 |
-
{"current_steps": 520, "total_steps": 3751, "loss": 0.3027, "lr": 9.955149546707465e-06, "epoch": 0.1386273889064765, "percentage": 13.86, "elapsed_time": "2:45:25", "remaining_time": "17:07:54"}
|
| 63 |
-
{"current_steps": 530, "total_steps": 3751, "loss": 0.2957, "lr": 9.948715064238956e-06, "epoch": 0.14129330023160105, "percentage": 14.13, "elapsed_time": "2:47:49", "remaining_time": "16:59:54"}
|
| 64 |
-
{"current_steps": 540, "total_steps": 3751, "loss": 0.2993, "lr": 9.941851794874969e-06, "epoch": 0.14395921155672559, "percentage": 14.4, "elapsed_time": "2:50:05", "remaining_time": "16:51:26"}
|
| 65 |
-
{"current_steps": 550, "total_steps": 3751, "loss": 0.3142, "lr": 9.934560333291077e-06, "epoch": 0.14662512288185015, "percentage": 14.66, "elapsed_time": "2:52:24", "remaining_time": "16:43:24"}
|
| 66 |
-
{"current_steps": 550, "total_steps": 3751, "eval_loss": 0.3080333173274994, "epoch": 0.14662512288185015, "percentage": 14.66, "elapsed_time": "2:56:47", "remaining_time": "17:08:57"}
|
| 67 |
-
{"current_steps": 560, "total_steps": 3751, "loss": 0.302, "lr": 9.926841311264037e-06, "epoch": 0.1492910342069747, "percentage": 14.93, "elapsed_time": "2:59:04", "remaining_time": "17:00:25"}
|
| 68 |
-
{"current_steps": 570, "total_steps": 3751, "loss": 0.3098, "lr": 9.918695397617064e-06, "epoch": 0.15195694553209924, "percentage": 15.2, "elapsed_time": "3:01:25", "remaining_time": "16:52:28"}
|
| 69 |
-
{"current_steps": 580, "total_steps": 3751, "loss": 0.3001, "lr": 9.91012329816186e-06, "epoch": 0.1546228568572238, "percentage": 15.46, "elapsed_time": "3:03:48", "remaining_time": "16:44:53"}
|
| 70 |
-
{"current_steps": 590, "total_steps": 3751, "loss": 0.3014, "lr": 9.901125755637473e-06, "epoch": 0.15728876818234833, "percentage": 15.73, "elapsed_time": "3:06:07", "remaining_time": "16:37:11"}
|
| 71 |
-
{"current_steps": 600, "total_steps": 3751, "loss": 0.3046, "lr": 9.89170354964594e-06, "epoch": 0.15995467950747289, "percentage": 16.0, "elapsed_time": "3:08:26", "remaining_time": "16:29:37"}
|
| 72 |
-
{"current_steps": 600, "total_steps": 3751, "eval_loss": 0.3062264621257782, "epoch": 0.15995467950747289, "percentage": 16.0, "elapsed_time": "3:12:49", "remaining_time": "16:52:40"}
|
| 73 |
-
{"current_steps": 610, "total_steps": 3751, "loss": 0.2994, "lr": 9.881857496584726e-06, "epoch": 0.16262059083259742, "percentage": 16.26, "elapsed_time": "3:15:11", "remaining_time": "16:45:03"}
|
| 74 |
-
{"current_steps": 620, "total_steps": 3751, "loss": 0.2953, "lr": 9.871588449575999e-06, "epoch": 0.16528650215772198, "percentage": 16.53, "elapsed_time": "3:17:30", "remaining_time": "16:37:25"}
|
| 75 |
-
{"current_steps": 630, "total_steps": 3751, "loss": 0.3035, "lr": 9.860897298392712e-06, "epoch": 0.16795241348284654, "percentage": 16.8, "elapsed_time": "3:19:47", "remaining_time": "16:29:43"}
|
| 76 |
-
{"current_steps": 640, "total_steps": 3751, "loss": 0.3005, "lr": 9.849784969381488e-06, "epoch": 0.17061832480797107, "percentage": 17.06, "elapsed_time": "3:22:06", "remaining_time": "16:22:26"}
|
| 77 |
-
{"current_steps": 650, "total_steps": 3751, "loss": 0.2853, "lr": 9.83825242538238e-06, "epoch": 0.17328423613309563, "percentage": 17.33, "elapsed_time": "3:24:26", "remaining_time": "16:15:19"}
|
| 78 |
-
{"current_steps": 650, "total_steps": 3751, "eval_loss": 0.30302974581718445, "epoch": 0.17328423613309563, "percentage": 17.33, "elapsed_time": "3:28:49", "remaining_time": "16:36:16"}
|
| 79 |
-
{"current_steps": 660, "total_steps": 3751, "loss": 0.28, "lr": 9.826300665645432e-06, "epoch": 0.17595014745822016, "percentage": 17.6, "elapsed_time": "3:31:06", "remaining_time": "16:28:42"}
|
| 80 |
-
{"current_steps": 670, "total_steps": 3751, "loss": 0.2843, "lr": 9.813930725744095e-06, "epoch": 0.17861605878334472, "percentage": 17.86, "elapsed_time": "3:33:29", "remaining_time": "16:21:44"}
|
| 81 |
-
{"current_steps": 680, "total_steps": 3751, "loss": 0.302, "lr": 9.801143677485509e-06, "epoch": 0.18128197010846928, "percentage": 18.13, "elapsed_time": "3:35:51", "remaining_time": "16:14:52"}
|
| 82 |
-
{"current_steps": 690, "total_steps": 3751, "loss": 0.3038, "lr": 9.787940628817627e-06, "epoch": 0.1839478814335938, "percentage": 18.4, "elapsed_time": "3:38:11", "remaining_time": "16:07:56"}
|
| 83 |
-
{"current_steps": 700, "total_steps": 3751, "loss": 0.2933, "lr": 9.774322723733216e-06, "epoch": 0.18661379275871837, "percentage": 18.66, "elapsed_time": "3:40:28", "remaining_time": "16:00:56"}
|
| 84 |
-
{"current_steps": 700, "total_steps": 3751, "eval_loss": 0.2952025234699249, "epoch": 0.18661379275871837, "percentage": 18.66, "elapsed_time": "3:44:51", "remaining_time": "16:20:03"}
|
| 85 |
-
{"current_steps": 710, "total_steps": 3751, "loss": 0.2799, "lr": 9.760291142170739e-06, "epoch": 0.1892797040838429, "percentage": 18.93, "elapsed_time": "3:47:11", "remaining_time": "16:13:05"}
|
| 86 |
-
{"current_steps": 720, "total_steps": 3751, "loss": 0.2938, "lr": 9.745847099912116e-06, "epoch": 0.19194561540896746, "percentage": 19.19, "elapsed_time": "3:49:31", "remaining_time": "16:06:13"}
|
| 87 |
-
{"current_steps": 730, "total_steps": 3751, "loss": 0.2936, "lr": 9.73099184847738e-06, "epoch": 0.19461152673409202, "percentage": 19.46, "elapsed_time": "3:51:46", "remaining_time": "15:59:09"}
|
| 88 |
-
{"current_steps": 740, "total_steps": 3751, "loss": 0.2846, "lr": 9.715726675016238e-06, "epoch": 0.19727743805921655, "percentage": 19.73, "elapsed_time": "3:54:06", "remaining_time": "15:52:32"}
|
| 89 |
-
{"current_steps": 750, "total_steps": 3751, "loss": 0.2862, "lr": 9.700052902196541e-06, "epoch": 0.1999433493843411, "percentage": 19.99, "elapsed_time": "3:56:28", "remaining_time": "15:46:12"}
|
| 90 |
-
{"current_steps": 750, "total_steps": 3751, "eval_loss": 0.2914896607398987, "epoch": 0.1999433493843411, "percentage": 19.99, "elapsed_time": "4:00:51", "remaining_time": "16:03:47"}
|
| 91 |
-
{"current_steps": 760, "total_steps": 3751, "loss": 0.2914, "lr": 9.68397188808969e-06, "epoch": 0.20260926070946564, "percentage": 20.26, "elapsed_time": "4:03:07", "remaining_time": "15:56:51"}
|
| 92 |
-
{"current_steps": 770, "total_steps": 3751, "loss": 0.299, "lr": 9.667485026052956e-06, "epoch": 0.2052751720345902, "percentage": 20.53, "elapsed_time": "4:05:23", "remaining_time": "15:50:01"}
|
| 93 |
-
{"current_steps": 780, "total_steps": 3751, "loss": 0.2953, "lr": 9.650593744608754e-06, "epoch": 0.20794108335971476, "percentage": 20.79, "elapsed_time": "4:07:49", "remaining_time": "15:43:59"}
|
| 94 |
-
{"current_steps": 790, "total_steps": 3751, "loss": 0.2913, "lr": 9.633299507320862e-06, "epoch": 0.2106069946848393, "percentage": 21.06, "elapsed_time": "4:10:14", "remaining_time": "15:37:54"}
|
| 95 |
-
{"current_steps": 800, "total_steps": 3751, "loss": 0.2867, "lr": 9.615603812667618e-06, "epoch": 0.21327290600996385, "percentage": 21.33, "elapsed_time": "4:12:30", "remaining_time": "15:31:25"}
|
| 96 |
-
{"current_steps": 800, "total_steps": 3751, "eval_loss": 0.2883636951446533, "epoch": 0.21327290600996385, "percentage": 21.33, "elapsed_time": "4:16:53", "remaining_time": "15:47:36"}
|
| 97 |
-
{"current_steps": 810, "total_steps": 3751, "loss": 0.2891, "lr": 9.597508193912077e-06, "epoch": 0.21593881733508838, "percentage": 21.59, "elapsed_time": "4:19:10", "remaining_time": "15:41:02"}
|
| 98 |
-
{"current_steps": 820, "total_steps": 3751, "loss": 0.2853, "lr": 9.579014218969158e-06, "epoch": 0.21860472866021294, "percentage": 21.86, "elapsed_time": "4:21:34", "remaining_time": "15:34:57"}
|
| 99 |
-
{"current_steps": 830, "total_steps": 3751, "loss": 0.2936, "lr": 9.560123490269795e-06, "epoch": 0.2212706399853375, "percentage": 22.13, "elapsed_time": "4:23:55", "remaining_time": "15:28:47"}
|
| 100 |
-
{"current_steps": 840, "total_steps": 3751, "loss": 0.282, "lr": 9.540837644622091e-06, "epoch": 0.22393655131046203, "percentage": 22.39, "elapsed_time": "4:26:15", "remaining_time": "15:22:43"}
|
| 101 |
-
{"current_steps": 850, "total_steps": 3751, "loss": 0.2871, "lr": 9.521158353069494e-06, "epoch": 0.2266024626355866, "percentage": 22.66, "elapsed_time": "4:28:34", "remaining_time": "15:16:36"}
|
| 102 |
-
{"current_steps": 850, "total_steps": 3751, "eval_loss": 0.2833527624607086, "epoch": 0.2266024626355866, "percentage": 22.66, "elapsed_time": "4:32:57", "remaining_time": "15:31:35"}
|
| 103 |
-
{"current_steps": 860, "total_steps": 3751, "loss": 0.2869, "lr": 9.501087320746007e-06, "epoch": 0.22926837396071112, "percentage": 22.93, "elapsed_time": "4:35:17", "remaining_time": "15:25:24"}
|
| 104 |
-
{"current_steps": 870, "total_steps": 3751, "loss": 0.2857, "lr": 9.480626286728445e-06, "epoch": 0.23193428528583568, "percentage": 23.19, "elapsed_time": "4:37:36", "remaining_time": "15:19:16"}
|
| 105 |
-
{"current_steps": 880, "total_steps": 3751, "loss": 0.2826, "lr": 9.459777023885754e-06, "epoch": 0.23460019661096024, "percentage": 23.46, "elapsed_time": "4:39:50", "remaining_time": "15:12:58"}
|
| 106 |
-
{"current_steps": 890, "total_steps": 3751, "loss": 0.2834, "lr": 9.438541338725397e-06, "epoch": 0.23726610793608477, "percentage": 23.73, "elapsed_time": "4:42:10", "remaining_time": "15:07:04"}
|
| 107 |
-
{"current_steps": 900, "total_steps": 3751, "loss": 0.2921, "lr": 9.416921071236821e-06, "epoch": 0.23993201926120933, "percentage": 23.99, "elapsed_time": "4:44:30", "remaining_time": "15:01:15"}
|
| 108 |
-
{"current_steps": 900, "total_steps": 3751, "eval_loss": 0.27931955456733704, "epoch": 0.23993201926120933, "percentage": 23.99, "elapsed_time": "4:48:53", "remaining_time": "15:15:10"}
|
| 109 |
-
{"current_steps": 910, "total_steps": 3751, "loss": 0.2886, "lr": 9.394918094732044e-06, "epoch": 0.24259793058633386, "percentage": 24.26, "elapsed_time": "4:51:19", "remaining_time": "15:09:31"}
|
| 110 |
-
{"current_steps": 920, "total_steps": 3751, "loss": 0.2839, "lr": 9.37253431568332e-06, "epoch": 0.24526384191145842, "percentage": 24.53, "elapsed_time": "4:53:37", "remaining_time": "15:03:33"}
|
| 111 |
-
{"current_steps": 930, "total_steps": 3751, "loss": 0.2893, "lr": 9.349771673557966e-06, "epoch": 0.24792975323658298, "percentage": 24.79, "elapsed_time": "4:55:57", "remaining_time": "14:57:44"}
|
| 112 |
-
{"current_steps": 940, "total_steps": 3751, "loss": 0.289, "lr": 9.326632140650311e-06, "epoch": 0.2505956645617075, "percentage": 25.06, "elapsed_time": "4:58:16", "remaining_time": "14:51:58"}
|
| 113 |
-
{"current_steps": 950, "total_steps": 3751, "loss": 0.2848, "lr": 9.303117721910801e-06, "epoch": 0.25326157588683207, "percentage": 25.33, "elapsed_time": "5:00:36", "remaining_time": "14:46:18"}
|
| 114 |
-
{"current_steps": 950, "total_steps": 3751, "eval_loss": 0.28279709815979004, "epoch": 0.25326157588683207, "percentage": 25.33, "elapsed_time": "5:04:59", "remaining_time": "14:59:14"}
|
| 115 |
-
{"current_steps": 960, "total_steps": 3751, "loss": 0.2745, "lr": 9.279230454772282e-06, "epoch": 0.25592748721195663, "percentage": 25.59, "elapsed_time": "5:07:20", "remaining_time": "14:53:33"}
|
| 116 |
-
{"current_steps": 970, "total_steps": 3751, "loss": 0.2772, "lr": 9.25497240897346e-06, "epoch": 0.2585933985370812, "percentage": 25.86, "elapsed_time": "5:09:39", "remaining_time": "14:47:48"}
|
| 117 |
-
{"current_steps": 980, "total_steps": 3751, "loss": 0.2804, "lr": 9.23034568637957e-06, "epoch": 0.2612593098622057, "percentage": 26.13, "elapsed_time": "5:11:59", "remaining_time": "14:42:09"}
|
| 118 |
-
{"current_steps": 990, "total_steps": 3751, "loss": 0.2869, "lr": 9.205352420800253e-06, "epoch": 0.26392522118733025, "percentage": 26.39, "elapsed_time": "5:14:16", "remaining_time": "14:36:29"}
|
| 119 |
-
{"current_steps": 1000, "total_steps": 3751, "loss": 0.2896, "lr": 9.179994777804677e-06, "epoch": 0.2665911325124548, "percentage": 26.66, "elapsed_time": "5:16:35", "remaining_time": "14:30:56"}
|
| 120 |
-
{"current_steps": 1000, "total_steps": 3751, "eval_loss": 0.2778474688529968, "epoch": 0.2665911325124548, "percentage": 26.66, "elapsed_time": "5:20:58", "remaining_time": "14:43:01"}
|
| 121 |
-
{"current_steps": 1010, "total_steps": 3751, "loss": 0.2851, "lr": 9.154274954533895e-06, "epoch": 0.26925704383757937, "percentage": 26.93, "elapsed_time": "5:24:32", "remaining_time": "14:40:45"}
|
| 122 |
-
{"current_steps": 1020, "total_steps": 3751, "loss": 0.2791, "lr": 9.128195179510466e-06, "epoch": 0.2719229551627039, "percentage": 27.19, "elapsed_time": "5:26:53", "remaining_time": "14:35:15"}
|
| 123 |
-
{"current_steps": 1030, "total_steps": 3751, "loss": 0.275, "lr": 9.101757712445369e-06, "epoch": 0.27458886648782843, "percentage": 27.46, "elapsed_time": "5:29:14", "remaining_time": "14:29:46"}
|
| 124 |
-
{"current_steps": 1040, "total_steps": 3751, "loss": 0.274, "lr": 9.07496484404221e-06, "epoch": 0.277254777812953, "percentage": 27.73, "elapsed_time": "5:31:34", "remaining_time": "14:24:19"}
|
| 125 |
-
{"current_steps": 1050, "total_steps": 3751, "loss": 0.2791, "lr": 9.04781889579873e-06, "epoch": 0.27992068913807755, "percentage": 27.99, "elapsed_time": "5:33:52", "remaining_time": "14:18:51"}
|
| 126 |
-
{"current_steps": 1050, "total_steps": 3751, "eval_loss": 0.2756091356277466, "epoch": 0.27992068913807755, "percentage": 27.99, "elapsed_time": "5:38:16", "remaining_time": "14:30:09"}
|
| 127 |
-
{"current_steps": 1060, "total_steps": 3751, "loss": 0.2797, "lr": 9.020322219805674e-06, "epoch": 0.2825866004632021, "percentage": 28.26, "elapsed_time": "5:40:39", "remaining_time": "14:24:48"}
|
| 128 |
-
{"current_steps": 1070, "total_steps": 3751, "loss": 0.2827, "lr": 8.99247719854297e-06, "epoch": 0.28525251178832667, "percentage": 28.53, "elapsed_time": "5:43:00", "remaining_time": "14:19:27"}
|
| 129 |
-
{"current_steps": 1080, "total_steps": 3751, "loss": 0.2789, "lr": 8.964286244673315e-06, "epoch": 0.28791842311345117, "percentage": 28.79, "elapsed_time": "5:45:18", "remaining_time": "14:13:59"}
|
| 130 |
-
{"current_steps": 1090, "total_steps": 3751, "loss": 0.2709, "lr": 8.935751800833117e-06, "epoch": 0.29058433443857573, "percentage": 29.06, "elapsed_time": "5:47:36", "remaining_time": "14:08:35"}
|
| 131 |
-
{"current_steps": 1100, "total_steps": 3751, "loss": 0.2666, "lr": 8.906876339420851e-06, "epoch": 0.2932502457637003, "percentage": 29.33, "elapsed_time": "5:49:56", "remaining_time": "14:03:22"}
|
| 132 |
-
{"current_steps": 1100, "total_steps": 3751, "eval_loss": 0.27202168107032776, "epoch": 0.2932502457637003, "percentage": 29.33, "elapsed_time": "5:54:20", "remaining_time": "14:13:57"}
|
| 133 |
-
{"current_steps": 1110, "total_steps": 3751, "loss": 0.2735, "lr": 8.877662362382844e-06, "epoch": 0.29591615708882485, "percentage": 29.59, "elapsed_time": "5:56:39", "remaining_time": "14:08:34"}
|
| 134 |
-
{"current_steps": 1120, "total_steps": 3751, "loss": 0.268, "lr": 8.848112400996473e-06, "epoch": 0.2985820684139494, "percentage": 29.86, "elapsed_time": "5:59:03", "remaining_time": "14:03:27"}
|
| 135 |
-
{"current_steps": 1130, "total_steps": 3751, "loss": 0.2735, "lr": 8.818229015650862e-06, "epoch": 0.3012479797390739, "percentage": 30.13, "elapsed_time": "6:01:24", "remaining_time": "13:58:16"}
|
| 136 |
-
{"current_steps": 1140, "total_steps": 3751, "loss": 0.2769, "lr": 8.788014795625018e-06, "epoch": 0.30391389106419847, "percentage": 30.39, "elapsed_time": "6:03:44", "remaining_time": "13:53:06"}
|
| 137 |
-
{"current_steps": 1150, "total_steps": 3751, "loss": 0.2695, "lr": 8.757472358863481e-06, "epoch": 0.30657980238932303, "percentage": 30.66, "elapsed_time": "6:06:03", "remaining_time": "13:47:55"}
|
| 138 |
-
{"current_steps": 1150, "total_steps": 3751, "eval_loss": 0.2705162465572357, "epoch": 0.30657980238932303, "percentage": 30.66, "elapsed_time": "6:10:27", "remaining_time": "13:57:51"}
|
| 139 |
-
{"current_steps": 1160, "total_steps": 3751, "loss": 0.267, "lr": 8.726604351749503e-06, "epoch": 0.3092457137144476, "percentage": 30.93, "elapsed_time": "6:12:45", "remaining_time": "13:52:35"}
|
| 140 |
-
{"current_steps": 1170, "total_steps": 3751, "loss": 0.277, "lr": 8.69541344887573e-06, "epoch": 0.3119116250395721, "percentage": 31.19, "elapsed_time": "6:15:04", "remaining_time": "13:47:25"}
|
| 141 |
-
{"current_steps": 1180, "total_steps": 3751, "loss": 0.2693, "lr": 8.66390235281248e-06, "epoch": 0.31457753636469665, "percentage": 31.46, "elapsed_time": "6:17:21", "remaining_time": "13:42:10"}
|
| 142 |
-
{"current_steps": 1190, "total_steps": 3751, "loss": 0.2715, "lr": 8.632073793873548e-06, "epoch": 0.3172434476898212, "percentage": 31.72, "elapsed_time": "6:19:44", "remaining_time": "13:37:13"}
|
| 143 |
-
{"current_steps": 1200, "total_steps": 3751, "loss": 0.2763, "lr": 8.599930529879669e-06, "epoch": 0.31990935901494577, "percentage": 31.99, "elapsed_time": "6:22:08", "remaining_time": "13:32:23"}
|
| 144 |
-
{"current_steps": 1200, "total_steps": 3751, "eval_loss": 0.2758665978908539, "epoch": 0.31990935901494577, "percentage": 31.99, "elapsed_time": "6:26:32", "remaining_time": "13:41:43"}
|
| 145 |
-
{"current_steps": 1210, "total_steps": 3751, "loss": 0.2637, "lr": 8.567475345919532e-06, "epoch": 0.32257527034007033, "percentage": 32.26, "elapsed_time": "6:28:54", "remaining_time": "13:36:42"}
|
| 146 |
-
{"current_steps": 1220, "total_steps": 3751, "loss": 0.2734, "lr": 8.534711054108487e-06, "epoch": 0.32524118166519483, "percentage": 32.52, "elapsed_time": "6:31:13", "remaining_time": "13:31:37"}
|
| 147 |
-
{"current_steps": 1230, "total_steps": 3751, "loss": 0.2642, "lr": 8.501640493344866e-06, "epoch": 0.3279070929903194, "percentage": 32.79, "elapsed_time": "6:33:29", "remaining_time": "13:26:29"}
|
| 148 |
-
{"current_steps": 1240, "total_steps": 3751, "loss": 0.2625, "lr": 8.468266529064025e-06, "epoch": 0.33057300431544395, "percentage": 33.06, "elapsed_time": "6:35:44", "remaining_time": "13:21:23"}
|
| 149 |
-
{"current_steps": 1250, "total_steps": 3751, "loss": 0.268, "lr": 8.434592052990044e-06, "epoch": 0.3332389156405685, "percentage": 33.32, "elapsed_time": "6:38:01", "remaining_time": "13:16:22"}
|
| 150 |
-
{"current_steps": 1250, "total_steps": 3751, "eval_loss": 0.26985007524490356, "epoch": 0.3332389156405685, "percentage": 33.32, "elapsed_time": "6:42:24", "remaining_time": "13:25:09"}
|
| 151 |
-
{"current_steps": 1260, "total_steps": 3751, "loss": 0.2719, "lr": 8.400619982885183e-06, "epoch": 0.33590482696569307, "percentage": 33.59, "elapsed_time": "6:44:42", "remaining_time": "13:20:06"}
|
| 152 |
-
{"current_steps": 1270, "total_steps": 3751, "loss": 0.2698, "lr": 8.366353262297069e-06, "epoch": 0.3385707382908176, "percentage": 33.86, "elapsed_time": "6:47:03", "remaining_time": "13:15:11"}
|
| 153 |
-
{"current_steps": 1280, "total_steps": 3751, "loss": 0.2792, "lr": 8.331794860303644e-06, "epoch": 0.34123664961594213, "percentage": 34.12, "elapsed_time": "6:49:21", "remaining_time": "13:10:15"}
|
| 154 |
-
{"current_steps": 1290, "total_steps": 3751, "loss": 0.2628, "lr": 8.296947771255905e-06, "epoch": 0.3439025609410667, "percentage": 34.39, "elapsed_time": "6:51:38", "remaining_time": "13:05:19"}
|
| 155 |
-
{"current_steps": 1300, "total_steps": 3751, "loss": 0.2694, "lr": 8.261815014518465e-06, "epoch": 0.34656847226619125, "percentage": 34.66, "elapsed_time": "6:53:57", "remaining_time": "13:00:27"}
|
| 156 |
-
{"current_steps": 1300, "total_steps": 3751, "eval_loss": 0.2712825834751129, "epoch": 0.34656847226619125, "percentage": 34.66, "elapsed_time": "6:58:20", "remaining_time": "13:08:44"}
|
| 157 |
-
{"current_steps": 1310, "total_steps": 3751, "loss": 0.2722, "lr": 8.226399634207929e-06, "epoch": 0.3492343835913158, "percentage": 34.92, "elapsed_time": "7:00:41", "remaining_time": "13:03:53"}
|
| 158 |
-
{"current_steps": 1320, "total_steps": 3751, "loss": 0.2645, "lr": 8.190704698929128e-06, "epoch": 0.3519002949164403, "percentage": 35.19, "elapsed_time": "7:03:00", "remaining_time": "12:59:01"}
|
| 159 |
-
{"current_steps": 1330, "total_steps": 3751, "loss": 0.2604, "lr": 8.154733301509249e-06, "epoch": 0.3545662062415649, "percentage": 35.46, "elapsed_time": "7:05:18", "remaining_time": "12:54:11"}
|
| 160 |
-
{"current_steps": 1340, "total_steps": 3751, "loss": 0.2671, "lr": 8.118488558729846e-06, "epoch": 0.35723211756668943, "percentage": 35.72, "elapsed_time": "7:07:41", "remaining_time": "12:49:31"}
|
| 161 |
-
{"current_steps": 1350, "total_steps": 3751, "loss": 0.2674, "lr": 8.081973611056784e-06, "epoch": 0.359898028891814, "percentage": 35.99, "elapsed_time": "7:10:02", "remaining_time": "12:44:50"}
|
| 162 |
-
{"current_steps": 1350, "total_steps": 3751, "eval_loss": 0.2661799490451813, "epoch": 0.359898028891814, "percentage": 35.99, "elapsed_time": "7:14:26", "remaining_time": "12:52:39"}
|
| 163 |
-
{"current_steps": 1360, "total_steps": 3751, "loss": 0.261, "lr": 8.045191622368128e-06, "epoch": 0.36256394021693855, "percentage": 36.26, "elapsed_time": "7:16:44", "remaining_time": "12:47:49"}
|
| 164 |
-
{"current_steps": 1370, "total_steps": 3751, "loss": 0.2642, "lr": 8.008145779680011e-06, "epoch": 0.36522985154206306, "percentage": 36.52, "elapsed_time": "7:19:04", "remaining_time": "12:43:06"}
|
| 165 |
-
{"current_steps": 1380, "total_steps": 3751, "loss": 0.2667, "lr": 7.970839292870488e-06, "epoch": 0.3678957628671876, "percentage": 36.79, "elapsed_time": "7:21:20", "remaining_time": "12:38:16"}
|
| 166 |
-
{"current_steps": 1390, "total_steps": 3751, "loss": 0.2561, "lr": 7.933275394401407e-06, "epoch": 0.3705616741923122, "percentage": 37.06, "elapsed_time": "7:23:38", "remaining_time": "12:33:33"}
|
| 167 |
-
{"current_steps": 1400, "total_steps": 3751, "loss": 0.2668, "lr": 7.89545733903834e-06, "epoch": 0.37322758551743673, "percentage": 37.32, "elapsed_time": "7:25:55", "remaining_time": "12:28:49"}
|
| 168 |
-
{"current_steps": 1400, "total_steps": 3751, "eval_loss": 0.26635491847991943, "epoch": 0.37322758551743673, "percentage": 37.32, "elapsed_time": "7:30:18", "remaining_time": "12:36:12"}
|
| 169 |
-
{"current_steps": 1410, "total_steps": 3751, "loss": 0.2547, "lr": 7.857388403568564e-06, "epoch": 0.3758934968425613, "percentage": 37.59, "elapsed_time": "7:32:38", "remaining_time": "12:31:31"}
|
| 170 |
-
{"current_steps": 1420, "total_steps": 3751, "loss": 0.2646, "lr": 7.819071886517134e-06, "epoch": 0.3785594081676858, "percentage": 37.86, "elapsed_time": "7:34:57", "remaining_time": "12:26:50"}
|
| 171 |
-
{"current_steps": 1430, "total_steps": 3751, "loss": 0.2579, "lr": 7.780511107861095e-06, "epoch": 0.38122531949281036, "percentage": 38.12, "elapsed_time": "7:37:14", "remaining_time": "12:22:07"}
|
| 172 |
-
{"current_steps": 1440, "total_steps": 3751, "loss": 0.2527, "lr": 7.741709408741804e-06, "epoch": 0.3838912308179349, "percentage": 38.39, "elapsed_time": "7:39:32", "remaining_time": "12:17:29"}
|
| 173 |
-
{"current_steps": 1450, "total_steps": 3751, "loss": 0.262, "lr": 7.702670151175435e-06, "epoch": 0.3865571421430595, "percentage": 38.66, "elapsed_time": "7:41:52", "remaining_time": "12:12:56"}
|
| 174 |
-
{"current_steps": 1450, "total_steps": 3751, "eval_loss": 0.269025057554245, "epoch": 0.3865571421430595, "percentage": 38.66, "elapsed_time": "7:46:15", "remaining_time": "12:19:54"}
|
| 175 |
-
{"current_steps": 1460, "total_steps": 3751, "loss": 0.2603, "lr": 7.663396717761687e-06, "epoch": 0.38922305346818403, "percentage": 38.92, "elapsed_time": "7:48:36", "remaining_time": "12:15:19"}
|
| 176 |
-
{"current_steps": 1470, "total_steps": 3751, "loss": 0.2588, "lr": 7.6238925113906715e-06, "epoch": 0.39188896479330854, "percentage": 39.19, "elapsed_time": "7:50:52", "remaining_time": "12:10:39"}
|
| 177 |
-
{"current_steps": 1480, "total_steps": 3751, "loss": 0.2687, "lr": 7.5841609549480854e-06, "epoch": 0.3945548761184331, "percentage": 39.46, "elapsed_time": "7:53:09", "remaining_time": "12:06:02"}
|
| 178 |
-
{"current_steps": 1490, "total_steps": 3751, "loss": 0.2524, "lr": 7.544205491018626e-06, "epoch": 0.39722078744355765, "percentage": 39.72, "elapsed_time": "7:55:31", "remaining_time": "12:01:35"}
|
| 179 |
-
{"current_steps": 1500, "total_steps": 3751, "loss": 0.2481, "lr": 7.5040295815877e-06, "epoch": 0.3998866987686822, "percentage": 39.99, "elapsed_time": "7:57:51", "remaining_time": "11:57:06"}
|
| 180 |
-
{"current_steps": 1500, "total_steps": 3751, "eval_loss": 0.25703608989715576, "epoch": 0.3998866987686822, "percentage": 39.99, "elapsed_time": "8:02:14", "remaining_time": "12:03:41"}
|
|
|
|
| 1 |
+
{"current_steps": 10, "total_steps": 3751, "loss": 0.5957, "lr": 2.6595744680851066e-07, "epoch": 0.002665911325124548, "percentage": 0.27, "elapsed_time": "0:02:24", "remaining_time": "14:58:16"}
|
| 2 |
+
{"current_steps": 20, "total_steps": 3751, "loss": 0.5163, "lr": 5.319148936170213e-07, "epoch": 0.005331822650249096, "percentage": 0.53, "elapsed_time": "0:04:37", "remaining_time": "14:23:41"}
|
| 3 |
+
{"current_steps": 30, "total_steps": 3751, "loss": 0.4334, "lr": 7.97872340425532e-07, "epoch": 0.007997733975373645, "percentage": 0.8, "elapsed_time": "0:07:00", "remaining_time": "14:30:12"}
|
| 4 |
+
{"current_steps": 40, "total_steps": 3751, "loss": 0.3824, "lr": 1.0638297872340427e-06, "epoch": 0.010663645300498192, "percentage": 1.07, "elapsed_time": "0:09:17", "remaining_time": "14:21:59"}
|
| 5 |
+
{"current_steps": 50, "total_steps": 3751, "loss": 0.3821, "lr": 1.3297872340425533e-06, "epoch": 0.01332955662562274, "percentage": 1.33, "elapsed_time": "0:11:36", "remaining_time": "14:19:22"}
|
| 6 |
+
{"current_steps": 50, "total_steps": 3751, "eval_loss": 0.4735161364078522, "epoch": 0.01332955662562274, "percentage": 1.33, "elapsed_time": "0:15:59", "remaining_time": "19:43:59"}
|
| 7 |
+
{"current_steps": 60, "total_steps": 3751, "loss": 0.3431, "lr": 1.595744680851064e-06, "epoch": 0.01599546795074729, "percentage": 1.6, "elapsed_time": "0:18:18", "remaining_time": "18:45:49"}
|
| 8 |
+
{"current_steps": 70, "total_steps": 3751, "loss": 0.3327, "lr": 1.8617021276595745e-06, "epoch": 0.018661379275871838, "percentage": 1.87, "elapsed_time": "0:20:36", "remaining_time": "18:03:51"}
|
| 9 |
+
{"current_steps": 80, "total_steps": 3751, "loss": 0.3079, "lr": 2.1276595744680853e-06, "epoch": 0.021327290600996383, "percentage": 2.13, "elapsed_time": "0:22:54", "remaining_time": "17:31:07"}
|
| 10 |
+
{"current_steps": 90, "total_steps": 3751, "loss": 0.306, "lr": 2.393617021276596e-06, "epoch": 0.023993201926120932, "percentage": 2.4, "elapsed_time": "0:25:15", "remaining_time": "17:07:34"}
|
| 11 |
+
{"current_steps": 100, "total_steps": 3751, "loss": 0.302, "lr": 2.6595744680851065e-06, "epoch": 0.02665911325124548, "percentage": 2.67, "elapsed_time": "0:27:33", "remaining_time": "16:46:04"}
|
| 12 |
+
{"current_steps": 100, "total_steps": 3751, "eval_loss": 0.3177907168865204, "epoch": 0.02665911325124548, "percentage": 2.67, "elapsed_time": "0:31:56", "remaining_time": "19:26:13"}
|
| 13 |
+
{"current_steps": 110, "total_steps": 3751, "loss": 0.2978, "lr": 2.9255319148936174e-06, "epoch": 0.02932502457637003, "percentage": 2.93, "elapsed_time": "0:34:14", "remaining_time": "18:53:34"}
|
| 14 |
+
{"current_steps": 120, "total_steps": 3751, "loss": 0.3046, "lr": 3.191489361702128e-06, "epoch": 0.03199093590149458, "percentage": 3.2, "elapsed_time": "0:36:33", "remaining_time": "18:26:07"}
|
| 15 |
+
{"current_steps": 130, "total_steps": 3751, "loss": 0.3074, "lr": 3.457446808510639e-06, "epoch": 0.034656847226619124, "percentage": 3.47, "elapsed_time": "0:38:49", "remaining_time": "18:01:18"}
|
| 16 |
+
{"current_steps": 140, "total_steps": 3751, "loss": 0.3147, "lr": 3.723404255319149e-06, "epoch": 0.037322758551743676, "percentage": 3.73, "elapsed_time": "0:41:07", "remaining_time": "17:40:42"}
|
| 17 |
+
{"current_steps": 150, "total_steps": 3751, "loss": 0.2988, "lr": 3.98936170212766e-06, "epoch": 0.03998866987686822, "percentage": 4.0, "elapsed_time": "0:43:25", "remaining_time": "17:22:41"}
|
| 18 |
+
{"current_steps": 150, "total_steps": 3751, "eval_loss": 0.32532769441604614, "epoch": 0.03998866987686822, "percentage": 4.0, "elapsed_time": "0:47:48", "remaining_time": "19:07:55"}
|
| 19 |
+
{"current_steps": 160, "total_steps": 3751, "loss": 0.3049, "lr": 4.255319148936171e-06, "epoch": 0.04265458120199277, "percentage": 4.27, "elapsed_time": "0:50:09", "remaining_time": "18:45:35"}
|
| 20 |
+
{"current_steps": 170, "total_steps": 3751, "loss": 0.3077, "lr": 4.521276595744681e-06, "epoch": 0.04532049252711732, "percentage": 4.53, "elapsed_time": "0:52:26", "remaining_time": "18:24:31"}
|
| 21 |
+
{"current_steps": 180, "total_steps": 3751, "loss": 0.2957, "lr": 4.787234042553192e-06, "epoch": 0.047986403852241864, "percentage": 4.8, "elapsed_time": "0:54:46", "remaining_time": "18:06:31"}
|
| 22 |
+
{"current_steps": 190, "total_steps": 3751, "loss": 0.2993, "lr": 5.053191489361703e-06, "epoch": 0.05065231517736641, "percentage": 5.07, "elapsed_time": "0:57:06", "remaining_time": "17:50:12"}
|
| 23 |
+
{"current_steps": 200, "total_steps": 3751, "loss": 0.3054, "lr": 5.319148936170213e-06, "epoch": 0.05331822650249096, "percentage": 5.33, "elapsed_time": "0:59:28", "remaining_time": "17:36:07"}
|
| 24 |
+
{"current_steps": 200, "total_steps": 3751, "eval_loss": 0.3250272572040558, "epoch": 0.05331822650249096, "percentage": 5.33, "elapsed_time": "1:03:52", "remaining_time": "18:53:58"}
|
| 25 |
+
{"current_steps": 210, "total_steps": 3751, "loss": 0.3117, "lr": 5.5851063829787235e-06, "epoch": 0.05598413782761551, "percentage": 5.6, "elapsed_time": "1:06:05", "remaining_time": "18:34:26"}
|
| 26 |
+
{"current_steps": 220, "total_steps": 3751, "loss": 0.3102, "lr": 5.851063829787235e-06, "epoch": 0.05865004915274006, "percentage": 5.87, "elapsed_time": "1:08:24", "remaining_time": "18:17:54"}
|
| 27 |
+
{"current_steps": 230, "total_steps": 3751, "loss": 0.3096, "lr": 6.117021276595745e-06, "epoch": 0.061315960477864605, "percentage": 6.13, "elapsed_time": "1:10:43", "remaining_time": "18:02:42"}
|
| 28 |
+
{"current_steps": 240, "total_steps": 3751, "loss": 0.3046, "lr": 6.382978723404256e-06, "epoch": 0.06398187180298916, "percentage": 6.4, "elapsed_time": "1:13:00", "remaining_time": "17:47:59"}
|
| 29 |
+
{"current_steps": 250, "total_steps": 3751, "loss": 0.2967, "lr": 6.648936170212767e-06, "epoch": 0.0666477831281137, "percentage": 6.66, "elapsed_time": "1:15:20", "remaining_time": "17:35:08"}
|
| 30 |
+
{"current_steps": 250, "total_steps": 3751, "eval_loss": 0.3232134282588959, "epoch": 0.0666477831281137, "percentage": 6.66, "elapsed_time": "1:19:43", "remaining_time": "18:36:31"}
|
| 31 |
+
{"current_steps": 260, "total_steps": 3751, "loss": 0.3112, "lr": 6.914893617021278e-06, "epoch": 0.06931369445323825, "percentage": 6.93, "elapsed_time": "1:22:01", "remaining_time": "18:21:19"}
|
| 32 |
+
{"current_steps": 270, "total_steps": 3751, "loss": 0.314, "lr": 7.1808510638297875e-06, "epoch": 0.07197960577836279, "percentage": 7.2, "elapsed_time": "1:24:18", "remaining_time": "18:06:57"}
|
| 33 |
+
{"current_steps": 280, "total_steps": 3751, "loss": 0.3041, "lr": 7.446808510638298e-06, "epoch": 0.07464551710348735, "percentage": 7.46, "elapsed_time": "1:26:38", "remaining_time": "17:54:04"}
|
| 34 |
+
{"current_steps": 290, "total_steps": 3751, "loss": 0.3136, "lr": 7.71276595744681e-06, "epoch": 0.0773114284286119, "percentage": 7.73, "elapsed_time": "1:28:59", "remaining_time": "17:42:08"}
|
| 35 |
+
{"current_steps": 300, "total_steps": 3751, "loss": 0.3137, "lr": 7.97872340425532e-06, "epoch": 0.07997733975373644, "percentage": 8.0, "elapsed_time": "1:31:19", "remaining_time": "17:30:30"}
|
| 36 |
+
{"current_steps": 300, "total_steps": 3751, "eval_loss": 0.3207298815250397, "epoch": 0.07997733975373644, "percentage": 8.0, "elapsed_time": "1:35:42", "remaining_time": "18:20:55"}
|
| 37 |
+
{"current_steps": 310, "total_steps": 3751, "loss": 0.3092, "lr": 8.24468085106383e-06, "epoch": 0.08264325107886099, "percentage": 8.26, "elapsed_time": "1:37:58", "remaining_time": "18:07:26"}
|
| 38 |
+
{"current_steps": 320, "total_steps": 3751, "loss": 0.3255, "lr": 8.510638297872341e-06, "epoch": 0.08530916240398553, "percentage": 8.53, "elapsed_time": "1:40:14", "remaining_time": "17:54:48"}
|
| 39 |
+
{"current_steps": 330, "total_steps": 3751, "loss": 0.3219, "lr": 8.776595744680852e-06, "epoch": 0.08797507372911008, "percentage": 8.8, "elapsed_time": "1:42:34", "remaining_time": "17:43:19"}
|
| 40 |
+
{"current_steps": 340, "total_steps": 3751, "loss": 0.3093, "lr": 9.042553191489362e-06, "epoch": 0.09064098505423464, "percentage": 9.06, "elapsed_time": "1:44:51", "remaining_time": "17:32:01"}
|
| 41 |
+
{"current_steps": 350, "total_steps": 3751, "loss": 0.3221, "lr": 9.308510638297872e-06, "epoch": 0.09330689637935918, "percentage": 9.33, "elapsed_time": "1:47:12", "remaining_time": "17:21:41"}
|
| 42 |
+
{"current_steps": 350, "total_steps": 3751, "eval_loss": 0.3211060166358948, "epoch": 0.09330689637935918, "percentage": 9.33, "elapsed_time": "1:51:35", "remaining_time": "18:04:16"}
|
| 43 |
+
{"current_steps": 360, "total_steps": 3751, "loss": 0.3209, "lr": 9.574468085106385e-06, "epoch": 0.09597280770448373, "percentage": 9.6, "elapsed_time": "1:53:50", "remaining_time": "17:52:15"}
|
| 44 |
+
{"current_steps": 370, "total_steps": 3751, "loss": 0.3188, "lr": 9.840425531914895e-06, "epoch": 0.09863871902960827, "percentage": 9.86, "elapsed_time": "1:56:12", "remaining_time": "17:41:53"}
|
| 45 |
+
{"current_steps": 380, "total_steps": 3751, "loss": 0.3202, "lr": 9.999965341346946e-06, "epoch": 0.10130463035473282, "percentage": 10.13, "elapsed_time": "1:58:33", "remaining_time": "17:31:41"}
|
| 46 |
+
{"current_steps": 390, "total_steps": 3751, "loss": 0.3143, "lr": 9.999575437018172e-06, "epoch": 0.10397054167985738, "percentage": 10.4, "elapsed_time": "2:00:56", "remaining_time": "17:22:19"}
|
| 47 |
+
{"current_steps": 400, "total_steps": 3751, "loss": 0.3188, "lr": 9.998752338940612e-06, "epoch": 0.10663645300498192, "percentage": 10.66, "elapsed_time": "2:03:18", "remaining_time": "17:13:01"}
|
| 48 |
+
{"current_steps": 400, "total_steps": 3751, "eval_loss": 0.3204084634780884, "epoch": 0.10663645300498192, "percentage": 10.66, "elapsed_time": "2:07:41", "remaining_time": "17:49:45"}
|
| 49 |
+
{"current_steps": 410, "total_steps": 3751, "loss": 0.3026, "lr": 9.997496118432509e-06, "epoch": 0.10930236433010647, "percentage": 10.93, "elapsed_time": "2:09:59", "remaining_time": "17:39:15"}
|
| 50 |
+
{"current_steps": 420, "total_steps": 3751, "loss": 0.3192, "lr": 9.995806884340483e-06, "epoch": 0.11196827565523101, "percentage": 11.2, "elapsed_time": "2:12:16", "remaining_time": "17:29:02"}
|
| 51 |
+
{"current_steps": 430, "total_steps": 3751, "loss": 0.3143, "lr": 9.99368478303009e-06, "epoch": 0.11463418698035556, "percentage": 11.46, "elapsed_time": "2:14:36", "remaining_time": "17:19:33"}
|
| 52 |
+
{"current_steps": 440, "total_steps": 3751, "loss": 0.3189, "lr": 9.991129998373145e-06, "epoch": 0.11730009830548012, "percentage": 11.73, "elapsed_time": "2:16:56", "remaining_time": "17:10:27"}
|
| 53 |
+
{"current_steps": 450, "total_steps": 3751, "loss": 0.308, "lr": 9.988142751731797e-06, "epoch": 0.11996600963060466, "percentage": 12.0, "elapsed_time": "2:19:14", "remaining_time": "17:01:22"}
|
| 54 |
+
{"current_steps": 450, "total_steps": 3751, "eval_loss": 0.31486886739730835, "epoch": 0.11996600963060466, "percentage": 12.0, "elapsed_time": "2:23:37", "remaining_time": "17:33:31"}
|
| 55 |
+
{"current_steps": 460, "total_steps": 3751, "loss": 0.3172, "lr": 9.984723301939337e-06, "epoch": 0.12263192095572921, "percentage": 12.26, "elapsed_time": "2:25:55", "remaining_time": "17:24:02"}
|
| 56 |
+
{"current_steps": 470, "total_steps": 3751, "loss": 0.3147, "lr": 9.980871945277777e-06, "epoch": 0.12529783228085375, "percentage": 12.53, "elapsed_time": "2:28:14", "remaining_time": "17:14:49"}
|
| 57 |
+
{"current_steps": 480, "total_steps": 3751, "loss": 0.3211, "lr": 9.976589015452178e-06, "epoch": 0.12796374360597831, "percentage": 12.8, "elapsed_time": "2:30:32", "remaining_time": "17:05:51"}
|
| 58 |
+
{"current_steps": 490, "total_steps": 3751, "loss": 0.3109, "lr": 9.97187488356174e-06, "epoch": 0.13062965493110285, "percentage": 13.06, "elapsed_time": "2:32:51", "remaining_time": "16:57:15"}
|
| 59 |
+
{"current_steps": 500, "total_steps": 3751, "loss": 0.3123, "lr": 9.966729958067638e-06, "epoch": 0.1332955662562274, "percentage": 13.33, "elapsed_time": "2:35:08", "remaining_time": "16:48:43"}
|
| 60 |
+
{"current_steps": 500, "total_steps": 3751, "eval_loss": 0.3106406331062317, "epoch": 0.1332955662562274, "percentage": 13.33, "elapsed_time": "2:39:31", "remaining_time": "17:17:13"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 7544
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:22ebf1c25989dc4abd15cd7d7c48b7dd4e74040858c521200ef3704895f769e7
|
| 3 |
size 7544
|