sedrickkeh commited on
Commit
90d6300
·
verified ·
1 Parent(s): f80b5cb

Training in progress, epoch 2

Browse files
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:61b515ac24751ba02c6121e346d9da80dd316cf1f7933e1fcc8ff11b8e6a40aa
3
  size 4877660776
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c153774163b86d73dfcfee38369d1ae59a73a2c58e59a4dc26b5965a3a2dac6
3
  size 4877660776
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6b67916fccc37908075e0eab6356924fd6c792fb0399bced52b68ef6dd18ff42
3
  size 4932751008
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8bcc34b2094236c33648bc5b75da1c56a088d5c36ff28ef5ade723d0f938ce25
3
  size 4932751008
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b02c99cc6b1a05ea89fafc05ea54061cbda1e78d3ea0a803a8309c7801506e48
3
  size 4330865200
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:220f171e466320106106a4a037879c35418568ff431d21158d0a7d925932582d
3
  size 4330865200
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e2d8de5a9801bc9e0fb5ef697c050a481374c81f994dbd9cc06cccc7ae00dc96
3
  size 1089994880
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:359a641d72ad0730fddc7de5bb7630d207b1b8e60300d6a9998d4f1060480a1a
3
  size 1089994880
trainer_log.jsonl CHANGED
@@ -125,3 +125,63 @@
125
  {"current_steps": 125, "total_steps": 186, "loss": 1.2448, "lr": 2.946286626483463e-06, "epoch": 2.0, "percentage": 67.2, "elapsed_time": "1:56:36", "remaining_time": "0:56:54"}
126
  {"current_steps": 126, "total_steps": 186, "loss": 0.7986, "lr": 2.8608960482523058e-06, "epoch": 2.016, "percentage": 67.74, "elapsed_time": "1:58:31", "remaining_time": "0:56:26"}
127
  {"current_steps": 127, "total_steps": 186, "loss": 0.7906, "lr": 2.776262452601104e-06, "epoch": 2.032, "percentage": 68.28, "elapsed_time": "1:59:23", "remaining_time": "0:55:27"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
  {"current_steps": 125, "total_steps": 186, "loss": 1.2448, "lr": 2.946286626483463e-06, "epoch": 2.0, "percentage": 67.2, "elapsed_time": "1:56:36", "remaining_time": "0:56:54"}
126
  {"current_steps": 126, "total_steps": 186, "loss": 0.7986, "lr": 2.8608960482523058e-06, "epoch": 2.016, "percentage": 67.74, "elapsed_time": "1:58:31", "remaining_time": "0:56:26"}
127
  {"current_steps": 127, "total_steps": 186, "loss": 0.7906, "lr": 2.776262452601104e-06, "epoch": 2.032, "percentage": 68.28, "elapsed_time": "1:59:23", "remaining_time": "0:55:27"}
128
+ {"current_steps": 128, "total_steps": 186, "loss": 0.8128, "lr": 2.6924157895271563e-06, "epoch": 2.048, "percentage": 68.82, "elapsed_time": "2:00:26", "remaining_time": "0:54:34"}
129
+ {"current_steps": 129, "total_steps": 186, "loss": 0.8217, "lr": 2.6093857305493666e-06, "epoch": 2.064, "percentage": 69.35, "elapsed_time": "2:01:26", "remaining_time": "0:53:39"}
130
+ {"current_steps": 130, "total_steps": 186, "loss": 0.7677, "lr": 2.5272016582081236e-06, "epoch": 2.08, "percentage": 69.89, "elapsed_time": "2:02:08", "remaining_time": "0:52:37"}
131
+ {"current_steps": 131, "total_steps": 186, "loss": 0.8047, "lr": 2.445892655667462e-06, "epoch": 2.096, "percentage": 70.43, "elapsed_time": "2:03:05", "remaining_time": "0:51:40"}
132
+ {"current_steps": 132, "total_steps": 186, "loss": 0.8069, "lr": 2.365487496423152e-06, "epoch": 2.112, "percentage": 70.97, "elapsed_time": "2:03:58", "remaining_time": "0:50:43"}
133
+ {"current_steps": 133, "total_steps": 186, "loss": 0.7955, "lr": 2.2860146341203936e-06, "epoch": 2.128, "percentage": 71.51, "elapsed_time": "2:04:48", "remaining_time": "0:49:44"}
134
+ {"current_steps": 134, "total_steps": 186, "loss": 0.8239, "lr": 2.207502192484685e-06, "epoch": 2.144, "percentage": 72.04, "elapsed_time": "2:05:41", "remaining_time": "0:48:46"}
135
+ {"current_steps": 135, "total_steps": 186, "loss": 0.8155, "lr": 2.1299779553694323e-06, "epoch": 2.16, "percentage": 72.58, "elapsed_time": "2:06:42", "remaining_time": "0:47:52"}
136
+ {"current_steps": 136, "total_steps": 186, "loss": 0.8109, "lr": 2.053469356923865e-06, "epoch": 2.176, "percentage": 73.12, "elapsed_time": "2:07:47", "remaining_time": "0:46:58"}
137
+ {"current_steps": 137, "total_steps": 186, "loss": 0.8485, "lr": 1.9780034718846653e-06, "epoch": 2.192, "percentage": 73.66, "elapsed_time": "2:08:41", "remaining_time": "0:46:01"}
138
+ {"current_steps": 138, "total_steps": 186, "loss": 0.7487, "lr": 1.9036070059948253e-06, "epoch": 2.208, "percentage": 74.19, "elapsed_time": "2:09:26", "remaining_time": "0:45:01"}
139
+ {"current_steps": 139, "total_steps": 186, "loss": 0.8066, "lr": 1.8303062865530407e-06, "epoch": 2.224, "percentage": 74.73, "elapsed_time": "2:10:22", "remaining_time": "0:44:04"}
140
+ {"current_steps": 140, "total_steps": 186, "loss": 0.779, "lr": 1.7581272530970666e-06, "epoch": 2.24, "percentage": 75.27, "elapsed_time": "2:11:23", "remaining_time": "0:43:10"}
141
+ {"current_steps": 141, "total_steps": 186, "loss": 0.7981, "lr": 1.6870954482242707e-06, "epoch": 2.2560000000000002, "percentage": 75.81, "elapsed_time": "2:12:17", "remaining_time": "0:42:13"}
142
+ {"current_steps": 142, "total_steps": 186, "loss": 0.7759, "lr": 1.6172360085526567e-06, "epoch": 2.2720000000000002, "percentage": 76.34, "elapsed_time": "2:13:14", "remaining_time": "0:41:17"}
143
+ {"current_steps": 143, "total_steps": 186, "loss": 0.7576, "lr": 1.54857365582557e-06, "epoch": 2.288, "percentage": 76.88, "elapsed_time": "2:14:09", "remaining_time": "0:40:20"}
144
+ {"current_steps": 144, "total_steps": 186, "loss": 0.805, "lr": 1.4811326881631937e-06, "epoch": 2.304, "percentage": 77.42, "elapsed_time": "2:15:00", "remaining_time": "0:39:22"}
145
+ {"current_steps": 145, "total_steps": 186, "loss": 0.7901, "lr": 1.4149369714639856e-06, "epoch": 2.32, "percentage": 77.96, "elapsed_time": "2:15:56", "remaining_time": "0:38:26"}
146
+ {"current_steps": 146, "total_steps": 186, "loss": 0.7918, "lr": 1.3500099309590397e-06, "epoch": 2.336, "percentage": 78.49, "elapsed_time": "2:16:51", "remaining_time": "0:37:29"}
147
+ {"current_steps": 147, "total_steps": 186, "loss": 0.8227, "lr": 1.2863745429224145e-06, "epoch": 2.352, "percentage": 79.03, "elapsed_time": "2:17:46", "remaining_time": "0:36:33"}
148
+ {"current_steps": 148, "total_steps": 186, "loss": 0.7966, "lr": 1.22405332654032e-06, "epoch": 2.368, "percentage": 79.57, "elapsed_time": "2:18:35", "remaining_time": "0:35:34"}
149
+ {"current_steps": 149, "total_steps": 186, "loss": 0.7853, "lr": 1.1630683359420653e-06, "epoch": 2.384, "percentage": 80.11, "elapsed_time": "2:19:28", "remaining_time": "0:34:38"}
150
+ {"current_steps": 150, "total_steps": 186, "loss": 0.7966, "lr": 1.103441152395588e-06, "epoch": 2.4, "percentage": 80.65, "elapsed_time": "2:20:34", "remaining_time": "0:33:44"}
151
+ {"current_steps": 151, "total_steps": 186, "loss": 0.7746, "lr": 1.045192876670298e-06, "epoch": 2.416, "percentage": 81.18, "elapsed_time": "2:21:24", "remaining_time": "0:32:46"}
152
+ {"current_steps": 152, "total_steps": 186, "loss": 0.8016, "lr": 9.883441215699824e-07, "epoch": 2.432, "percentage": 81.72, "elapsed_time": "2:22:21", "remaining_time": "0:31:50"}
153
+ {"current_steps": 153, "total_steps": 186, "loss": 0.8247, "lr": 9.329150046383773e-07, "epoch": 2.448, "percentage": 82.26, "elapsed_time": "2:23:19", "remaining_time": "0:30:54"}
154
+ {"current_steps": 154, "total_steps": 186, "loss": 0.8123, "lr": 8.789251410400024e-07, "epoch": 2.464, "percentage": 82.8, "elapsed_time": "2:24:17", "remaining_time": "0:29:58"}
155
+ {"current_steps": 155, "total_steps": 186, "loss": 0.7802, "lr": 8.263936366187825e-07, "epoch": 2.48, "percentage": 83.33, "elapsed_time": "2:25:01", "remaining_time": "0:29:00"}
156
+ {"current_steps": 156, "total_steps": 186, "loss": 0.8144, "lr": 7.753390811368972e-07, "epoch": 2.496, "percentage": 83.87, "elapsed_time": "2:26:04", "remaining_time": "0:28:05"}
157
+ {"current_steps": 157, "total_steps": 186, "loss": 0.8041, "lr": 7.257795416962754e-07, "epoch": 2.512, "percentage": 84.41, "elapsed_time": "2:26:58", "remaining_time": "0:27:08"}
158
+ {"current_steps": 158, "total_steps": 186, "loss": 0.8293, "lr": 6.777325563450282e-07, "epoch": 2.528, "percentage": 84.95, "elapsed_time": "2:28:02", "remaining_time": "0:26:14"}
159
+ {"current_steps": 159, "total_steps": 186, "loss": 0.7941, "lr": 6.312151278711237e-07, "epoch": 2.544, "percentage": 85.48, "elapsed_time": "2:28:58", "remaining_time": "0:25:17"}
160
+ {"current_steps": 160, "total_steps": 186, "loss": 0.7833, "lr": 5.862437177854629e-07, "epoch": 2.56, "percentage": 86.02, "elapsed_time": "2:29:53", "remaining_time": "0:24:21"}
161
+ {"current_steps": 161, "total_steps": 186, "loss": 0.8054, "lr": 5.428342404965076e-07, "epoch": 2.576, "percentage": 86.56, "elapsed_time": "2:30:51", "remaining_time": "0:23:25"}
162
+ {"current_steps": 162, "total_steps": 186, "loss": 0.7977, "lr": 5.010020576785174e-07, "epoch": 2.592, "percentage": 87.1, "elapsed_time": "2:31:47", "remaining_time": "0:22:29"}
163
+ {"current_steps": 163, "total_steps": 186, "loss": 0.7742, "lr": 4.607619728353818e-07, "epoch": 2.608, "percentage": 87.63, "elapsed_time": "2:32:46", "remaining_time": "0:21:33"}
164
+ {"current_steps": 164, "total_steps": 186, "loss": 0.7976, "lr": 4.221282260619891e-07, "epoch": 2.624, "percentage": 88.17, "elapsed_time": "2:33:37", "remaining_time": "0:20:36"}
165
+ {"current_steps": 165, "total_steps": 186, "loss": 0.869, "lr": 3.851144890049535e-07, "epoch": 2.64, "percentage": 88.71, "elapsed_time": "2:34:35", "remaining_time": "0:19:40"}
166
+ {"current_steps": 166, "total_steps": 186, "loss": 0.838, "lr": 3.497338600245254e-07, "epoch": 2.656, "percentage": 89.25, "elapsed_time": "2:35:28", "remaining_time": "0:18:43"}
167
+ {"current_steps": 167, "total_steps": 186, "loss": 0.8194, "lr": 3.159988595593616e-07, "epoch": 2.672, "percentage": 89.78, "elapsed_time": "2:36:28", "remaining_time": "0:17:48"}
168
+ {"current_steps": 168, "total_steps": 186, "loss": 0.7788, "lr": 2.839214256958106e-07, "epoch": 2.6879999999999997, "percentage": 90.32, "elapsed_time": "2:37:21", "remaining_time": "0:16:51"}
169
+ {"current_steps": 169, "total_steps": 186, "loss": 0.785, "lr": 2.5351290994328703e-07, "epoch": 2.7039999999999997, "percentage": 90.86, "elapsed_time": "2:38:18", "remaining_time": "0:15:55"}
170
+ {"current_steps": 170, "total_steps": 186, "loss": 0.8027, "lr": 2.2478407321721295e-07, "epoch": 2.7199999999999998, "percentage": 91.4, "elapsed_time": "2:39:06", "remaining_time": "0:14:58"}
171
+ {"current_steps": 171, "total_steps": 186, "loss": 0.7901, "lr": 1.9774508203096843e-07, "epoch": 2.7359999999999998, "percentage": 91.94, "elapsed_time": "2:39:58", "remaining_time": "0:14:01"}
172
+ {"current_steps": 172, "total_steps": 186, "loss": 0.8184, "lr": 1.7240550489817652e-07, "epoch": 2.752, "percentage": 92.47, "elapsed_time": "2:40:53", "remaining_time": "0:13:05"}
173
+ {"current_steps": 173, "total_steps": 186, "loss": 0.8117, "lr": 1.4877430894662037e-07, "epoch": 2.768, "percentage": 93.01, "elapsed_time": "2:41:55", "remaining_time": "0:12:10"}
174
+ {"current_steps": 174, "total_steps": 186, "loss": 0.7767, "lr": 1.268598567449647e-07, "epoch": 2.784, "percentage": 93.55, "elapsed_time": "2:42:56", "remaining_time": "0:11:14"}
175
+ {"current_steps": 175, "total_steps": 186, "loss": 0.7666, "lr": 1.0666990334342708e-07, "epoch": 2.8, "percentage": 94.09, "elapsed_time": "2:43:51", "remaining_time": "0:10:17"}
176
+ {"current_steps": 176, "total_steps": 186, "loss": 0.816, "lr": 8.821159352943142e-08, "epoch": 2.816, "percentage": 94.62, "elapsed_time": "2:44:52", "remaining_time": "0:09:22"}
177
+ {"current_steps": 177, "total_steps": 186, "loss": 0.7514, "lr": 7.149145929922607e-08, "epoch": 2.832, "percentage": 95.16, "elapsed_time": "2:45:46", "remaining_time": "0:08:25"}
178
+ {"current_steps": 178, "total_steps": 186, "loss": 0.7848, "lr": 5.651541754634726e-08, "epoch": 2.848, "percentage": 95.7, "elapsed_time": "2:46:42", "remaining_time": "0:07:29"}
179
+ {"current_steps": 179, "total_steps": 186, "loss": 0.8048, "lr": 4.328876796776071e-08, "epoch": 2.864, "percentage": 96.24, "elapsed_time": "2:47:36", "remaining_time": "0:06:33"}
180
+ {"current_steps": 180, "total_steps": 186, "loss": 0.8017, "lr": 3.181619118841517e-08, "epoch": 2.88, "percentage": 96.77, "elapsed_time": "2:48:29", "remaining_time": "0:05:36"}
181
+ {"current_steps": 181, "total_steps": 186, "loss": 0.8113, "lr": 2.210174710486679e-08, "epoch": 2.896, "percentage": 97.31, "elapsed_time": "2:49:22", "remaining_time": "0:04:40"}
182
+ {"current_steps": 182, "total_steps": 186, "loss": 0.7914, "lr": 1.4148873448573408e-08, "epoch": 2.912, "percentage": 97.85, "elapsed_time": "2:50:10", "remaining_time": "0:03:44"}
183
+ {"current_steps": 183, "total_steps": 186, "loss": 0.7951, "lr": 7.96038456935322e-09, "epoch": 2.928, "percentage": 98.39, "elapsed_time": "2:51:01", "remaining_time": "0:02:48"}
184
+ {"current_steps": 184, "total_steps": 186, "loss": 0.7816, "lr": 3.538470439448105e-09, "epoch": 2.944, "percentage": 98.92, "elapsed_time": "2:51:49", "remaining_time": "0:01:52"}
185
+ {"current_steps": 185, "total_steps": 186, "loss": 0.812, "lr": 8.846958785418969e-10, "epoch": 2.96, "percentage": 99.46, "elapsed_time": "2:52:46", "remaining_time": "0:00:56"}
186
+ {"current_steps": 186, "total_steps": 186, "loss": 0.773, "lr": 0.0, "epoch": 2.976, "percentage": 100.0, "elapsed_time": "2:53:45", "remaining_time": "0:00:00"}
187
+ {"current_steps": 186, "total_steps": 186, "epoch": 2.976, "percentage": 100.0, "elapsed_time": "2:55:58", "remaining_time": "0:00:00"}