rbelanec commited on
Commit
406a294
·
verified ·
1 Parent(s): c545d02

Training in progress, step 45800

Browse files
Files changed (2) hide show
  1. adapter_model.safetensors +1 -1
  2. trainer_log.jsonl +41 -0
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b1d400d569ae249002f08676572715adaebf0d95ea8ecb7d8f6273e263ded167
3
  size 1638528
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c7198e870e3e15df28e8e060edf9e9dcb780cefd863c23681f9bb2ee59e4831
3
  size 1638528
trainer_log.jsonl CHANGED
@@ -9348,3 +9348,44 @@
9348
  {"current_steps": 45600, "total_steps": 80000, "eval_loss": 5.0990753173828125, "epoch": 0.3648934127136547, "percentage": 57.0, "elapsed_time": "1 day, 23:34:44", "remaining_time": "1 day, 11:53:34", "throughput": 84.29, "total_tokens": 14437616}
9349
  {"current_steps": 45605, "total_steps": 80000, "loss": 6.9887, "lr": 0.11725551969732467, "epoch": 0.3649334229562768, "percentage": 57.01, "elapsed_time": "1 day, 23:34:46", "remaining_time": "1 day, 11:53:02", "throughput": 84.3, "total_tokens": 14439176}
9350
  {"current_steps": 45610, "total_steps": 80000, "loss": 4.5815, "lr": 0.11722677821706107, "epoch": 0.3649734331988989, "percentage": 57.01, "elapsed_time": "1 day, 23:34:46", "remaining_time": "1 day, 11:52:30", "throughput": 84.31, "total_tokens": 14440800}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9348
  {"current_steps": 45600, "total_steps": 80000, "eval_loss": 5.0990753173828125, "epoch": 0.3648934127136547, "percentage": 57.0, "elapsed_time": "1 day, 23:34:44", "remaining_time": "1 day, 11:53:34", "throughput": 84.29, "total_tokens": 14437616}
9349
  {"current_steps": 45605, "total_steps": 80000, "loss": 6.9887, "lr": 0.11725551969732467, "epoch": 0.3649334229562768, "percentage": 57.01, "elapsed_time": "1 day, 23:34:46", "remaining_time": "1 day, 11:53:02", "throughput": 84.3, "total_tokens": 14439176}
9350
  {"current_steps": 45610, "total_steps": 80000, "loss": 4.5815, "lr": 0.11722677821706107, "epoch": 0.3649734331988989, "percentage": 57.01, "elapsed_time": "1 day, 23:34:46", "remaining_time": "1 day, 11:52:30", "throughput": 84.31, "total_tokens": 14440800}
9351
+ {"current_steps": 45615, "total_steps": 80000, "loss": 5.0775, "lr": 0.11719803800030815, "epoch": 0.36501344344152103, "percentage": 57.02, "elapsed_time": "1 day, 23:34:47", "remaining_time": "1 day, 11:51:57", "throughput": 84.32, "total_tokens": 14442360}
9352
+ {"current_steps": 45620, "total_steps": 80000, "loss": 4.6495, "lr": 0.11716929904817393, "epoch": 0.36505345368414316, "percentage": 57.03, "elapsed_time": "1 day, 23:34:47", "remaining_time": "1 day, 11:51:25", "throughput": 84.32, "total_tokens": 14443800}
9353
+ {"current_steps": 45625, "total_steps": 80000, "loss": 4.5596, "lr": 0.11714056136176634, "epoch": 0.3650934639267652, "percentage": 57.03, "elapsed_time": "1 day, 23:34:48", "remaining_time": "1 day, 11:50:52", "throughput": 84.33, "total_tokens": 14445352}
9354
+ {"current_steps": 45630, "total_steps": 80000, "loss": 5.6308, "lr": 0.11711182494219341, "epoch": 0.36513347416938735, "percentage": 57.04, "elapsed_time": "1 day, 23:34:48", "remaining_time": "1 day, 11:50:20", "throughput": 84.34, "total_tokens": 14446984}
9355
+ {"current_steps": 45635, "total_steps": 80000, "loss": 6.0331, "lr": 0.11708308979056296, "epoch": 0.3651734844120095, "percentage": 57.04, "elapsed_time": "1 day, 23:34:49", "remaining_time": "1 day, 11:49:47", "throughput": 84.35, "total_tokens": 14448576}
9356
+ {"current_steps": 45640, "total_steps": 80000, "loss": 4.4552, "lr": 0.11705435590798277, "epoch": 0.3652134946546316, "percentage": 57.05, "elapsed_time": "1 day, 23:34:50", "remaining_time": "1 day, 11:49:15", "throughput": 84.36, "total_tokens": 14450224}
9357
+ {"current_steps": 45645, "total_steps": 80000, "loss": 4.9775, "lr": 0.11702562329556072, "epoch": 0.3652535048972537, "percentage": 57.06, "elapsed_time": "1 day, 23:34:50", "remaining_time": "1 day, 11:48:43", "throughput": 84.37, "total_tokens": 14451808}
9358
+ {"current_steps": 45650, "total_steps": 80000, "loss": 4.4322, "lr": 0.11699689195440455, "epoch": 0.3652935151398758, "percentage": 57.06, "elapsed_time": "1 day, 23:34:51", "remaining_time": "1 day, 11:48:10", "throughput": 84.38, "total_tokens": 14453376}
9359
+ {"current_steps": 45655, "total_steps": 80000, "loss": 4.7887, "lr": 0.11696816188562179, "epoch": 0.3653335253824979, "percentage": 57.07, "elapsed_time": "1 day, 23:34:51", "remaining_time": "1 day, 11:47:38", "throughput": 84.39, "total_tokens": 14455088}
9360
+ {"current_steps": 45660, "total_steps": 80000, "loss": 3.2378, "lr": 0.11693943309032023, "epoch": 0.36537353562512004, "percentage": 57.07, "elapsed_time": "1 day, 23:34:52", "remaining_time": "1 day, 11:47:05", "throughput": 84.4, "total_tokens": 14456896}
9361
+ {"current_steps": 45665, "total_steps": 80000, "loss": 4.2802, "lr": 0.11691070556960743, "epoch": 0.36541354586774216, "percentage": 57.08, "elapsed_time": "1 day, 23:34:53", "remaining_time": "1 day, 11:46:33", "throughput": 84.41, "total_tokens": 14458496}
9362
+ {"current_steps": 45670, "total_steps": 80000, "loss": 5.4537, "lr": 0.11688197932459085, "epoch": 0.36545355611036423, "percentage": 57.09, "elapsed_time": "1 day, 23:34:53", "remaining_time": "1 day, 11:46:00", "throughput": 84.42, "total_tokens": 14460072}
9363
+ {"current_steps": 45675, "total_steps": 80000, "loss": 4.0324, "lr": 0.11685325435637808, "epoch": 0.36549356635298635, "percentage": 57.09, "elapsed_time": "1 day, 23:34:54", "remaining_time": "1 day, 11:45:28", "throughput": 84.43, "total_tokens": 14461656}
9364
+ {"current_steps": 45680, "total_steps": 80000, "loss": 6.8163, "lr": 0.11682453066607645, "epoch": 0.3655335765956085, "percentage": 57.1, "elapsed_time": "1 day, 23:34:54", "remaining_time": "1 day, 11:44:56", "throughput": 84.44, "total_tokens": 14463576}
9365
+ {"current_steps": 45685, "total_steps": 80000, "loss": 5.0675, "lr": 0.11679580825479345, "epoch": 0.3655735868382306, "percentage": 57.11, "elapsed_time": "1 day, 23:34:55", "remaining_time": "1 day, 11:44:23", "throughput": 84.45, "total_tokens": 14465152}
9366
+ {"current_steps": 45690, "total_steps": 80000, "loss": 5.169, "lr": 0.11676708712363633, "epoch": 0.3656135970808527, "percentage": 57.11, "elapsed_time": "1 day, 23:34:55", "remaining_time": "1 day, 11:43:51", "throughput": 84.45, "total_tokens": 14466600}
9367
+ {"current_steps": 45695, "total_steps": 80000, "loss": 5.3161, "lr": 0.11673836727371245, "epoch": 0.3656536073234748, "percentage": 57.12, "elapsed_time": "1 day, 23:34:56", "remaining_time": "1 day, 11:43:18", "throughput": 84.46, "total_tokens": 14468200}
9368
+ {"current_steps": 45700, "total_steps": 80000, "loss": 5.391, "lr": 0.11670964870612904, "epoch": 0.3656936175660969, "percentage": 57.12, "elapsed_time": "1 day, 23:34:57", "remaining_time": "1 day, 11:42:46", "throughput": 84.47, "total_tokens": 14469704}
9369
+ {"current_steps": 45705, "total_steps": 80000, "loss": 3.8915, "lr": 0.1166809314219932, "epoch": 0.36573362780871904, "percentage": 57.13, "elapsed_time": "1 day, 23:34:57", "remaining_time": "1 day, 11:42:14", "throughput": 84.48, "total_tokens": 14471248}
9370
+ {"current_steps": 45710, "total_steps": 80000, "loss": 6.5092, "lr": 0.11665221542241219, "epoch": 0.36577363805134117, "percentage": 57.14, "elapsed_time": "1 day, 23:34:58", "remaining_time": "1 day, 11:41:41", "throughput": 84.49, "total_tokens": 14472984}
9371
+ {"current_steps": 45715, "total_steps": 80000, "loss": 5.3554, "lr": 0.11662350070849307, "epoch": 0.36581364829396323, "percentage": 57.14, "elapsed_time": "1 day, 23:34:58", "remaining_time": "1 day, 11:41:09", "throughput": 84.5, "total_tokens": 14474504}
9372
+ {"current_steps": 45720, "total_steps": 80000, "loss": 5.8921, "lr": 0.11659478728134282, "epoch": 0.36585365853658536, "percentage": 57.15, "elapsed_time": "1 day, 23:34:59", "remaining_time": "1 day, 11:40:37", "throughput": 84.51, "total_tokens": 14476016}
9373
+ {"current_steps": 45725, "total_steps": 80000, "loss": 5.3527, "lr": 0.11656607514206851, "epoch": 0.3658936687792075, "percentage": 57.16, "elapsed_time": "1 day, 23:35:00", "remaining_time": "1 day, 11:40:04", "throughput": 84.52, "total_tokens": 14477640}
9374
+ {"current_steps": 45730, "total_steps": 80000, "loss": 5.711, "lr": 0.11653736429177709, "epoch": 0.3659336790218296, "percentage": 57.16, "elapsed_time": "1 day, 23:35:00", "remaining_time": "1 day, 11:39:32", "throughput": 84.53, "total_tokens": 14479200}
9375
+ {"current_steps": 45735, "total_steps": 80000, "loss": 5.2179, "lr": 0.11650865473157537, "epoch": 0.3659736892644517, "percentage": 57.17, "elapsed_time": "1 day, 23:35:01", "remaining_time": "1 day, 11:39:00", "throughput": 84.53, "total_tokens": 14480824}
9376
+ {"current_steps": 45740, "total_steps": 80000, "loss": 4.8279, "lr": 0.11647994646257025, "epoch": 0.3660136995070738, "percentage": 57.17, "elapsed_time": "1 day, 23:35:01", "remaining_time": "1 day, 11:38:27", "throughput": 84.54, "total_tokens": 14482568}
9377
+ {"current_steps": 45745, "total_steps": 80000, "loss": 4.1234, "lr": 0.11645123948586855, "epoch": 0.3660537097496959, "percentage": 57.18, "elapsed_time": "1 day, 23:35:02", "remaining_time": "1 day, 11:37:55", "throughput": 84.55, "total_tokens": 14484040}
9378
+ {"current_steps": 45750, "total_steps": 80000, "loss": 4.5525, "lr": 0.11642253380257694, "epoch": 0.36609371999231805, "percentage": 57.19, "elapsed_time": "1 day, 23:35:02", "remaining_time": "1 day, 11:37:23", "throughput": 84.56, "total_tokens": 14485472}
9379
+ {"current_steps": 45755, "total_steps": 80000, "loss": 4.7924, "lr": 0.11639382941380219, "epoch": 0.36613373023494017, "percentage": 57.19, "elapsed_time": "1 day, 23:35:03", "remaining_time": "1 day, 11:36:50", "throughput": 84.57, "total_tokens": 14487016}
9380
+ {"current_steps": 45760, "total_steps": 80000, "loss": 5.106, "lr": 0.11636512632065091, "epoch": 0.36617374047756224, "percentage": 57.2, "elapsed_time": "1 day, 23:35:04", "remaining_time": "1 day, 11:36:18", "throughput": 84.58, "total_tokens": 14488536}
9381
+ {"current_steps": 45765, "total_steps": 80000, "loss": 5.2514, "lr": 0.1163364245242297, "epoch": 0.36621375072018436, "percentage": 57.21, "elapsed_time": "1 day, 23:35:04", "remaining_time": "1 day, 11:35:46", "throughput": 84.59, "total_tokens": 14490112}
9382
+ {"current_steps": 45770, "total_steps": 80000, "loss": 5.9074, "lr": 0.11630772402564503, "epoch": 0.3662537609628065, "percentage": 57.21, "elapsed_time": "1 day, 23:35:05", "remaining_time": "1 day, 11:35:14", "throughput": 84.6, "total_tokens": 14491688}
9383
+ {"current_steps": 45775, "total_steps": 80000, "loss": 5.6634, "lr": 0.11627902482600351, "epoch": 0.3662937712054286, "percentage": 57.22, "elapsed_time": "1 day, 23:35:05", "remaining_time": "1 day, 11:34:41", "throughput": 84.61, "total_tokens": 14493376}
9384
+ {"current_steps": 45780, "total_steps": 80000, "loss": 5.2131, "lr": 0.11625032692641156, "epoch": 0.3663337814480507, "percentage": 57.23, "elapsed_time": "1 day, 23:35:06", "remaining_time": "1 day, 11:34:09", "throughput": 84.61, "total_tokens": 14494880}
9385
+ {"current_steps": 45785, "total_steps": 80000, "loss": 5.1533, "lr": 0.11622163032797546, "epoch": 0.3663737916906728, "percentage": 57.23, "elapsed_time": "1 day, 23:35:06", "remaining_time": "1 day, 11:33:37", "throughput": 84.62, "total_tokens": 14496584}
9386
+ {"current_steps": 45790, "total_steps": 80000, "loss": 5.7625, "lr": 0.11619293503180173, "epoch": 0.3664138019332949, "percentage": 57.24, "elapsed_time": "1 day, 23:35:07", "remaining_time": "1 day, 11:33:04", "throughput": 84.63, "total_tokens": 14498144}
9387
+ {"current_steps": 45795, "total_steps": 80000, "loss": 5.3461, "lr": 0.11616424103899656, "epoch": 0.36645381217591705, "percentage": 57.24, "elapsed_time": "1 day, 23:35:08", "remaining_time": "1 day, 11:32:32", "throughput": 84.64, "total_tokens": 14499664}
9388
+ {"current_steps": 45800, "total_steps": 80000, "loss": 6.2408, "lr": 0.11613554835066617, "epoch": 0.3664938224185392, "percentage": 57.25, "elapsed_time": "1 day, 23:35:08", "remaining_time": "1 day, 11:32:00", "throughput": 84.65, "total_tokens": 14501416}
9389
+ {"current_steps": 45800, "total_steps": 80000, "eval_loss": 5.108764171600342, "epoch": 0.3664938224185392, "percentage": 57.25, "elapsed_time": "1 day, 23:47:15", "remaining_time": "1 day, 11:41:03", "throughput": 84.29, "total_tokens": 14501416}
9390
+ {"current_steps": 45805, "total_steps": 80000, "loss": 4.4077, "lr": 0.11610685696791685, "epoch": 0.36653383266116124, "percentage": 57.26, "elapsed_time": "1 day, 23:47:17", "remaining_time": "1 day, 11:40:31", "throughput": 84.3, "total_tokens": 14503056}
9391
+ {"current_steps": 45810, "total_steps": 80000, "loss": 4.0565, "lr": 0.11607816689185468, "epoch": 0.36657384290378336, "percentage": 57.26, "elapsed_time": "1 day, 23:47:18", "remaining_time": "1 day, 11:39:59", "throughput": 84.31, "total_tokens": 14504704}