rbelanec commited on
Commit
041b9ed
·
verified ·
1 Parent(s): ba37a30

Training in progress, step 1600

Browse files
Files changed (2) hide show
  1. adapter_model.safetensors +1 -1
  2. trainer_log.jsonl +41 -0
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b049d5651cc3c510e8db7cf03abb995e40228a85051d1de3907b1478e216e713
3
  size 18124968
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68e48b334f316f3a049c22db11cb593976935623758a5d9979ae397fae1fabc2
3
  size 18124968
trainer_log.jsonl CHANGED
@@ -285,3 +285,44 @@
285
  {"current_steps": 1395, "total_steps": 40000, "loss": 0.4778, "lr": 4.985031390416469e-05, "epoch": 0.35721144613020933, "percentage": 3.49, "elapsed_time": "1:05:57", "remaining_time": "1 day, 6:25:23", "throughput": 4124.22, "total_tokens": 16322176}
286
  {"current_steps": 1400, "total_steps": 40000, "loss": 0.5305, "lr": 4.984923927255461e-05, "epoch": 0.3584917738941169, "percentage": 3.5, "elapsed_time": "1:06:06", "remaining_time": "1 day, 6:22:47", "throughput": 4129.2, "total_tokens": 16379264}
287
  {"current_steps": 1400, "total_steps": 40000, "eval_loss": 0.5197495222091675, "epoch": 0.3584917738941169, "percentage": 3.5, "elapsed_time": "1:10:05", "remaining_time": "1 day, 8:12:23", "throughput": 3895.01, "total_tokens": 16379264}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
285
  {"current_steps": 1395, "total_steps": 40000, "loss": 0.4778, "lr": 4.985031390416469e-05, "epoch": 0.35721144613020933, "percentage": 3.49, "elapsed_time": "1:05:57", "remaining_time": "1 day, 6:25:23", "throughput": 4124.22, "total_tokens": 16322176}
286
  {"current_steps": 1400, "total_steps": 40000, "loss": 0.5305, "lr": 4.984923927255461e-05, "epoch": 0.3584917738941169, "percentage": 3.5, "elapsed_time": "1:06:06", "remaining_time": "1 day, 6:22:47", "throughput": 4129.2, "total_tokens": 16379264}
287
  {"current_steps": 1400, "total_steps": 40000, "eval_loss": 0.5197495222091675, "epoch": 0.3584917738941169, "percentage": 3.5, "elapsed_time": "1:10:05", "remaining_time": "1 day, 8:12:23", "throughput": 3895.01, "total_tokens": 16379264}
288
+ {"current_steps": 1405, "total_steps": 40000, "loss": 0.4651, "lr": 4.984816080887958e-05, "epoch": 0.3597721016580245, "percentage": 3.51, "elapsed_time": "1:10:16", "remaining_time": "1 day, 8:10:28", "throughput": 3898.2, "total_tokens": 16437120}
289
+ {"current_steps": 1410, "total_steps": 40000, "loss": 0.5348, "lr": 4.9847078513305875e-05, "epoch": 0.361052429421932, "percentage": 3.52, "elapsed_time": "1:10:25", "remaining_time": "1 day, 8:07:29", "throughput": 3903.73, "total_tokens": 16495552}
290
+ {"current_steps": 1415, "total_steps": 40000, "loss": 0.547, "lr": 4.984599238600043e-05, "epoch": 0.36233275718583957, "percentage": 3.54, "elapsed_time": "1:10:34", "remaining_time": "1 day, 8:04:31", "throughput": 3909.11, "total_tokens": 16553472}
291
+ {"current_steps": 1420, "total_steps": 40000, "loss": 0.4973, "lr": 4.9844902427130716e-05, "epoch": 0.36361308494974715, "percentage": 3.55, "elapsed_time": "1:10:43", "remaining_time": "1 day, 8:01:34", "throughput": 3914.71, "total_tokens": 16612448}
292
+ {"current_steps": 1425, "total_steps": 40000, "loss": 0.5786, "lr": 4.984380863686482e-05, "epoch": 0.3648934127136547, "percentage": 3.56, "elapsed_time": "1:10:52", "remaining_time": "1 day, 7:58:39", "throughput": 3919.99, "total_tokens": 16670240}
293
+ {"current_steps": 1430, "total_steps": 40000, "loss": 0.4947, "lr": 4.984271101537143e-05, "epoch": 0.36617374047756224, "percentage": 3.57, "elapsed_time": "1:11:01", "remaining_time": "1 day, 7:55:43", "throughput": 3925.55, "total_tokens": 16729056}
294
+ {"current_steps": 1435, "total_steps": 40000, "loss": 0.5576, "lr": 4.9841609562819816e-05, "epoch": 0.3674540682414698, "percentage": 3.59, "elapsed_time": "1:11:10", "remaining_time": "1 day, 7:52:50", "throughput": 3930.98, "total_tokens": 16787616}
295
+ {"current_steps": 1440, "total_steps": 40000, "loss": 0.5683, "lr": 4.984050427937983e-05, "epoch": 0.3687343960053774, "percentage": 3.6, "elapsed_time": "1:11:19", "remaining_time": "1 day, 7:49:57", "throughput": 3936.06, "total_tokens": 16844704}
296
+ {"current_steps": 1445, "total_steps": 40000, "loss": 0.5113, "lr": 4.983939516522191e-05, "epoch": 0.37001472376928496, "percentage": 3.61, "elapsed_time": "1:11:28", "remaining_time": "1 day, 7:47:06", "throughput": 3941.05, "total_tokens": 16901504}
297
+ {"current_steps": 1450, "total_steps": 40000, "loss": 0.4887, "lr": 4.983828222051711e-05, "epoch": 0.3712950515331925, "percentage": 3.62, "elapsed_time": "1:11:37", "remaining_time": "1 day, 7:44:16", "throughput": 3946.54, "total_tokens": 16960576}
298
+ {"current_steps": 1455, "total_steps": 40000, "loss": 0.5279, "lr": 4.983716544543705e-05, "epoch": 0.37257537929710005, "percentage": 3.64, "elapsed_time": "1:11:46", "remaining_time": "1 day, 7:41:29", "throughput": 3951.49, "total_tokens": 17017728}
299
+ {"current_steps": 1460, "total_steps": 40000, "loss": 0.5414, "lr": 4.983604484015395e-05, "epoch": 0.3738557070610076, "percentage": 3.65, "elapsed_time": "1:11:55", "remaining_time": "1 day, 7:38:40", "throughput": 3956.81, "total_tokens": 17076096}
300
+ {"current_steps": 1465, "total_steps": 40000, "loss": 0.4974, "lr": 4.983492040484064e-05, "epoch": 0.3751360348249152, "percentage": 3.66, "elapsed_time": "1:12:04", "remaining_time": "1 day, 7:35:53", "throughput": 3961.97, "total_tokens": 17133952}
301
+ {"current_steps": 1470, "total_steps": 40000, "loss": 0.5116, "lr": 4.98337921396705e-05, "epoch": 0.3764163625888227, "percentage": 3.67, "elapsed_time": "1:12:13", "remaining_time": "1 day, 7:33:08", "throughput": 3967.51, "total_tokens": 17193792}
302
+ {"current_steps": 1475, "total_steps": 40000, "loss": 0.4652, "lr": 4.983266004481753e-05, "epoch": 0.3776966903527303, "percentage": 3.69, "elapsed_time": "1:12:22", "remaining_time": "1 day, 7:30:24", "throughput": 3972.61, "total_tokens": 17251776}
303
+ {"current_steps": 1480, "total_steps": 40000, "loss": 0.5839, "lr": 4.9831524120456316e-05, "epoch": 0.37897701811663786, "percentage": 3.7, "elapsed_time": "1:12:31", "remaining_time": "1 day, 7:27:41", "throughput": 3977.75, "total_tokens": 17309888}
304
+ {"current_steps": 1485, "total_steps": 40000, "loss": 0.5198, "lr": 4.9830384366762026e-05, "epoch": 0.38025734588054544, "percentage": 3.71, "elapsed_time": "1:12:40", "remaining_time": "1 day, 7:24:58", "throughput": 3982.81, "total_tokens": 17367712}
305
+ {"current_steps": 1490, "total_steps": 40000, "loss": 0.4885, "lr": 4.9829240783910436e-05, "epoch": 0.38153767364445296, "percentage": 3.72, "elapsed_time": "1:12:49", "remaining_time": "1 day, 7:22:18", "throughput": 3987.95, "total_tokens": 17426272}
306
+ {"current_steps": 1495, "total_steps": 40000, "loss": 0.4882, "lr": 4.982809337207789e-05, "epoch": 0.38281800140836053, "percentage": 3.74, "elapsed_time": "1:12:58", "remaining_time": "1 day, 7:19:35", "throughput": 3993.11, "total_tokens": 17484448}
307
+ {"current_steps": 1500, "total_steps": 40000, "loss": 0.4759, "lr": 4.9826942131441337e-05, "epoch": 0.3840983291722681, "percentage": 3.75, "elapsed_time": "1:13:07", "remaining_time": "1 day, 7:16:57", "throughput": 3997.93, "total_tokens": 17541696}
308
+ {"current_steps": 1505, "total_steps": 40000, "loss": 0.4887, "lr": 4.9825787062178315e-05, "epoch": 0.3853786569361757, "percentage": 3.76, "elapsed_time": "1:13:16", "remaining_time": "1 day, 7:14:18", "throughput": 4002.93, "total_tokens": 17599584}
309
+ {"current_steps": 1510, "total_steps": 40000, "loss": 0.499, "lr": 4.9824628164466945e-05, "epoch": 0.3866589847000832, "percentage": 3.77, "elapsed_time": "1:13:25", "remaining_time": "1 day, 7:11:39", "throughput": 4008.11, "total_tokens": 17658176}
310
+ {"current_steps": 1515, "total_steps": 40000, "loss": 0.5696, "lr": 4.982346543848595e-05, "epoch": 0.38793931246399077, "percentage": 3.79, "elapsed_time": "1:13:34", "remaining_time": "1 day, 7:09:04", "throughput": 4012.82, "total_tokens": 17715296}
311
+ {"current_steps": 1520, "total_steps": 40000, "loss": 0.4718, "lr": 4.9822298884414626e-05, "epoch": 0.38921964022789834, "percentage": 3.8, "elapsed_time": "1:13:43", "remaining_time": "1 day, 7:06:27", "throughput": 4018.26, "total_tokens": 17775264}
312
+ {"current_steps": 1525, "total_steps": 40000, "loss": 0.5933, "lr": 4.982112850243288e-05, "epoch": 0.3904999679918059, "percentage": 3.81, "elapsed_time": "1:13:53", "remaining_time": "1 day, 7:04:10", "throughput": 4024.08, "total_tokens": 17840064}
313
+ {"current_steps": 1530, "total_steps": 40000, "loss": 0.5015, "lr": 4.98199542927212e-05, "epoch": 0.39178029575571344, "percentage": 3.82, "elapsed_time": "1:14:02", "remaining_time": "1 day, 7:01:37", "throughput": 4029.09, "total_tokens": 17898688}
314
+ {"current_steps": 1535, "total_steps": 40000, "loss": 0.4895, "lr": 4.981877625546066e-05, "epoch": 0.393060623519621, "percentage": 3.84, "elapsed_time": "1:14:11", "remaining_time": "1 day, 6:59:04", "throughput": 4034.01, "total_tokens": 17956736}
315
+ {"current_steps": 1540, "total_steps": 40000, "loss": 0.5245, "lr": 4.981759439083293e-05, "epoch": 0.3943409512835286, "percentage": 3.85, "elapsed_time": "1:14:20", "remaining_time": "1 day, 6:56:32", "throughput": 4038.95, "total_tokens": 18015104}
316
+ {"current_steps": 1545, "total_steps": 40000, "loss": 0.5265, "lr": 4.981640869902027e-05, "epoch": 0.39562127904743616, "percentage": 3.86, "elapsed_time": "1:14:29", "remaining_time": "1 day, 6:54:01", "throughput": 4043.89, "total_tokens": 18073536}
317
+ {"current_steps": 1550, "total_steps": 40000, "loss": 0.5393, "lr": 4.9815219180205517e-05, "epoch": 0.3969016068113437, "percentage": 3.88, "elapsed_time": "1:14:38", "remaining_time": "1 day, 6:51:32", "throughput": 4048.86, "total_tokens": 18132224}
318
+ {"current_steps": 1555, "total_steps": 40000, "loss": 0.4717, "lr": 4.9814025834572126e-05, "epoch": 0.39818193457525125, "percentage": 3.89, "elapsed_time": "1:14:47", "remaining_time": "1 day, 6:49:02", "throughput": 4053.59, "total_tokens": 18189888}
319
+ {"current_steps": 1560, "total_steps": 40000, "loss": 0.4614, "lr": 4.981282866230411e-05, "epoch": 0.3994622623391588, "percentage": 3.9, "elapsed_time": "1:14:56", "remaining_time": "1 day, 6:46:33", "throughput": 4058.42, "total_tokens": 18247840}
320
+ {"current_steps": 1565, "total_steps": 40000, "loss": 0.4802, "lr": 4.981162766358611e-05, "epoch": 0.4007425901030664, "percentage": 3.91, "elapsed_time": "1:15:05", "remaining_time": "1 day, 6:44:08", "throughput": 4063.23, "total_tokens": 18306432}
321
+ {"current_steps": 1570, "total_steps": 40000, "loss": 0.446, "lr": 4.9810422838603316e-05, "epoch": 0.402022917866974, "percentage": 3.92, "elapsed_time": "1:15:14", "remaining_time": "1 day, 6:41:41", "throughput": 4067.76, "total_tokens": 18363392}
322
+ {"current_steps": 1575, "total_steps": 40000, "loss": 0.4283, "lr": 4.9809214187541533e-05, "epoch": 0.4033032456308815, "percentage": 3.94, "elapsed_time": "1:15:23", "remaining_time": "1 day, 6:39:16", "throughput": 4072.02, "total_tokens": 18419360}
323
+ {"current_steps": 1580, "total_steps": 40000, "loss": 0.4608, "lr": 4.980800171058715e-05, "epoch": 0.40458357339478906, "percentage": 3.95, "elapsed_time": "1:15:32", "remaining_time": "1 day, 6:36:51", "throughput": 4076.94, "total_tokens": 18478208}
324
+ {"current_steps": 1585, "total_steps": 40000, "loss": 0.5068, "lr": 4.980678540792715e-05, "epoch": 0.40586390115869664, "percentage": 3.96, "elapsed_time": "1:15:41", "remaining_time": "1 day, 6:34:27", "throughput": 4081.54, "total_tokens": 18535808}
325
+ {"current_steps": 1590, "total_steps": 40000, "loss": 0.4589, "lr": 4.980556527974909e-05, "epoch": 0.4071442289226042, "percentage": 3.98, "elapsed_time": "1:15:50", "remaining_time": "1 day, 6:32:03", "throughput": 4086.37, "total_tokens": 18594368}
326
+ {"current_steps": 1595, "total_steps": 40000, "loss": 0.5077, "lr": 4.980434132624114e-05, "epoch": 0.40842455668651173, "percentage": 3.99, "elapsed_time": "1:15:59", "remaining_time": "1 day, 6:29:41", "throughput": 4091.05, "total_tokens": 18652416}
327
+ {"current_steps": 1600, "total_steps": 40000, "loss": 0.4792, "lr": 4.980311354759205e-05, "epoch": 0.4097048844504193, "percentage": 4.0, "elapsed_time": "1:16:08", "remaining_time": "1 day, 6:27:19", "throughput": 4095.66, "total_tokens": 18710208}
328
+ {"current_steps": 1600, "total_steps": 40000, "eval_loss": 0.5102769136428833, "epoch": 0.4097048844504193, "percentage": 4.0, "elapsed_time": "1:20:06", "remaining_time": "1 day, 8:02:26", "throughput": 3893.02, "total_tokens": 18710208}