rbelanec commited on
Commit
6e5cbde
·
verified ·
1 Parent(s): 5552490

Training in progress, step 35800

Browse files
Files changed (2) hide show
  1. adapter_model.safetensors +1 -1
  2. trainer_log.jsonl +40 -0
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d874126b81ed361286ce0159dc62e2b3f86b2c78c87cbab9768e911db3268a46
3
  size 1638528
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:448feaaa1724accc5fd3dd7905c15d84f9121c853bceee5912b65b6e682aed55
3
  size 1638528
trainer_log.jsonl CHANGED
@@ -7297,3 +7297,43 @@
7297
  {"current_steps": 35600, "total_steps": 40000, "loss": 0.2697, "lr": 0.008871875754255508, "epoch": 1.1394917098777286, "percentage": 89.0, "elapsed_time": "1 day, 12:10:45", "remaining_time": "4:28:17", "throughput": 415.97, "total_tokens": 54178960}
7298
  {"current_steps": 35600, "total_steps": 40000, "eval_loss": 0.27406713366508484, "epoch": 1.1394917098777286, "percentage": 89.0, "elapsed_time": "1 day, 12:21:33", "remaining_time": "4:29:37", "throughput": 413.92, "total_tokens": 54178960}
7299
  {"current_steps": 35605, "total_steps": 40000, "loss": 0.22, "lr": 0.008851928973293422, "epoch": 1.1396517508482171, "percentage": 89.01, "elapsed_time": "1 day, 12:21:36", "remaining_time": "4:29:17", "throughput": 413.97, "total_tokens": 54186400}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7297
  {"current_steps": 35600, "total_steps": 40000, "loss": 0.2697, "lr": 0.008871875754255508, "epoch": 1.1394917098777286, "percentage": 89.0, "elapsed_time": "1 day, 12:10:45", "remaining_time": "4:28:17", "throughput": 415.97, "total_tokens": 54178960}
7298
  {"current_steps": 35600, "total_steps": 40000, "eval_loss": 0.27406713366508484, "epoch": 1.1394917098777286, "percentage": 89.0, "elapsed_time": "1 day, 12:21:33", "remaining_time": "4:29:37", "throughput": 413.92, "total_tokens": 54178960}
7299
  {"current_steps": 35605, "total_steps": 40000, "loss": 0.22, "lr": 0.008851928973293422, "epoch": 1.1396517508482171, "percentage": 89.01, "elapsed_time": "1 day, 12:21:36", "remaining_time": "4:29:17", "throughput": 413.97, "total_tokens": 54186400}
7300
+ {"current_steps": 35610, "total_steps": 40000, "loss": 0.2144, "lr": 0.00883200395913764, "epoch": 1.1398117918187056, "percentage": 89.03, "elapsed_time": "1 day, 12:21:38", "remaining_time": "4:28:57", "throughput": 414.02, "total_tokens": 54194336}
7301
+ {"current_steps": 35615, "total_steps": 40000, "loss": 0.2954, "lr": 0.00881210071486091, "epoch": 1.1399718327891941, "percentage": 89.04, "elapsed_time": "1 day, 12:21:40", "remaining_time": "4:28:36", "throughput": 414.07, "total_tokens": 54201568}
7302
+ {"current_steps": 35620, "total_steps": 40000, "loss": 0.3755, "lr": 0.008792219243532505, "epoch": 1.1401318737596824, "percentage": 89.05, "elapsed_time": "1 day, 12:21:42", "remaining_time": "4:28:16", "throughput": 414.12, "total_tokens": 54209136}
7303
+ {"current_steps": 35625, "total_steps": 40000, "loss": 0.2028, "lr": 0.008772359548218428, "epoch": 1.1402919147301709, "percentage": 89.06, "elapsed_time": "1 day, 12:21:44", "remaining_time": "4:27:55", "throughput": 414.17, "total_tokens": 54216880}
7304
+ {"current_steps": 35630, "total_steps": 40000, "loss": 0.2339, "lr": 0.008752521631981274, "epoch": 1.1404519557006594, "percentage": 89.08, "elapsed_time": "1 day, 12:21:46", "remaining_time": "4:27:35", "throughput": 414.23, "total_tokens": 54224768}
7305
+ {"current_steps": 35635, "total_steps": 40000, "loss": 0.2011, "lr": 0.008732705497880315, "epoch": 1.1406119966711479, "percentage": 89.09, "elapsed_time": "1 day, 12:21:48", "remaining_time": "4:27:15", "throughput": 414.28, "total_tokens": 54232416}
7306
+ {"current_steps": 35640, "total_steps": 40000, "loss": 0.3047, "lr": 0.008712911148971459, "epoch": 1.1407720376416361, "percentage": 89.1, "elapsed_time": "1 day, 12:21:50", "remaining_time": "4:26:54", "throughput": 414.33, "total_tokens": 54240704}
7307
+ {"current_steps": 35645, "total_steps": 40000, "loss": 0.2969, "lr": 0.008693138588307208, "epoch": 1.1409320786121246, "percentage": 89.11, "elapsed_time": "1 day, 12:21:52", "remaining_time": "4:26:34", "throughput": 414.39, "total_tokens": 54248496}
7308
+ {"current_steps": 35650, "total_steps": 40000, "loss": 0.4009, "lr": 0.008673387818936762, "epoch": 1.1410921195826131, "percentage": 89.12, "elapsed_time": "1 day, 12:21:55", "remaining_time": "4:26:14", "throughput": 414.44, "total_tokens": 54257088}
7309
+ {"current_steps": 35655, "total_steps": 40000, "loss": 0.2625, "lr": 0.008653658843905948, "epoch": 1.1412521605531016, "percentage": 89.14, "elapsed_time": "1 day, 12:21:57", "remaining_time": "4:25:53", "throughput": 414.5, "total_tokens": 54264992}
7310
+ {"current_steps": 35660, "total_steps": 40000, "loss": 0.2117, "lr": 0.0086339516662572, "epoch": 1.1414122015235901, "percentage": 89.15, "elapsed_time": "1 day, 12:21:59", "remaining_time": "4:25:33", "throughput": 414.55, "total_tokens": 54272656}
7311
+ {"current_steps": 35665, "total_steps": 40000, "loss": 0.1799, "lr": 0.008614266289029638, "epoch": 1.1415722424940784, "percentage": 89.16, "elapsed_time": "1 day, 12:22:01", "remaining_time": "4:25:13", "throughput": 414.6, "total_tokens": 54280672}
7312
+ {"current_steps": 35670, "total_steps": 40000, "loss": 0.1684, "lr": 0.008594602715258965, "epoch": 1.141732283464567, "percentage": 89.18, "elapsed_time": "1 day, 12:22:03", "remaining_time": "4:24:52", "throughput": 414.66, "total_tokens": 54288368}
7313
+ {"current_steps": 35675, "total_steps": 40000, "loss": 0.1677, "lr": 0.008574960947977573, "epoch": 1.1418923244350554, "percentage": 89.19, "elapsed_time": "1 day, 12:22:05", "remaining_time": "4:24:32", "throughput": 414.71, "total_tokens": 54296016}
7314
+ {"current_steps": 35680, "total_steps": 40000, "loss": 0.3761, "lr": 0.008555340990214438, "epoch": 1.142052365405544, "percentage": 89.2, "elapsed_time": "1 day, 12:22:07", "remaining_time": "4:24:12", "throughput": 414.76, "total_tokens": 54303680}
7315
+ {"current_steps": 35685, "total_steps": 40000, "loss": 0.3606, "lr": 0.008535742844995258, "epoch": 1.1422124063760322, "percentage": 89.21, "elapsed_time": "1 day, 12:22:09", "remaining_time": "4:23:51", "throughput": 414.81, "total_tokens": 54311168}
7316
+ {"current_steps": 35690, "total_steps": 40000, "loss": 0.246, "lr": 0.008516166515342266, "epoch": 1.1423724473465207, "percentage": 89.22, "elapsed_time": "1 day, 12:22:11", "remaining_time": "4:23:31", "throughput": 414.86, "total_tokens": 54318688}
7317
+ {"current_steps": 35695, "total_steps": 40000, "loss": 0.2528, "lr": 0.008496612004274411, "epoch": 1.1425324883170092, "percentage": 89.24, "elapsed_time": "1 day, 12:22:14", "remaining_time": "4:23:11", "throughput": 414.91, "total_tokens": 54326368}
7318
+ {"current_steps": 35700, "total_steps": 40000, "loss": 0.2842, "lr": 0.008477079314807201, "epoch": 1.1426925292874976, "percentage": 89.25, "elapsed_time": "1 day, 12:22:16", "remaining_time": "4:22:51", "throughput": 414.97, "total_tokens": 54334176}
7319
+ {"current_steps": 35705, "total_steps": 40000, "loss": 0.3171, "lr": 0.008457568449952874, "epoch": 1.1428525702579861, "percentage": 89.26, "elapsed_time": "1 day, 12:22:18", "remaining_time": "4:22:30", "throughput": 415.02, "total_tokens": 54341296}
7320
+ {"current_steps": 35710, "total_steps": 40000, "loss": 0.2428, "lr": 0.008438079412720189, "epoch": 1.1430126112284744, "percentage": 89.28, "elapsed_time": "1 day, 12:22:20", "remaining_time": "4:22:10", "throughput": 415.07, "total_tokens": 54349504}
7321
+ {"current_steps": 35715, "total_steps": 40000, "loss": 0.2449, "lr": 0.00841861220611466, "epoch": 1.143172652198963, "percentage": 89.29, "elapsed_time": "1 day, 12:22:22", "remaining_time": "4:21:50", "throughput": 415.12, "total_tokens": 54356992}
7322
+ {"current_steps": 35720, "total_steps": 40000, "loss": 0.1673, "lr": 0.008399166833138355, "epoch": 1.1433326931694514, "percentage": 89.3, "elapsed_time": "1 day, 12:22:24", "remaining_time": "4:21:29", "throughput": 415.17, "total_tokens": 54364768}
7323
+ {"current_steps": 35725, "total_steps": 40000, "loss": 0.3784, "lr": 0.008379743296789987, "epoch": 1.14349273413994, "percentage": 89.31, "elapsed_time": "1 day, 12:22:26", "remaining_time": "4:21:09", "throughput": 415.23, "total_tokens": 54372752}
7324
+ {"current_steps": 35730, "total_steps": 40000, "loss": 0.2704, "lr": 0.008360341600064896, "epoch": 1.1436527751104282, "percentage": 89.33, "elapsed_time": "1 day, 12:22:28", "remaining_time": "4:20:49", "throughput": 415.28, "total_tokens": 54380080}
7325
+ {"current_steps": 35735, "total_steps": 40000, "loss": 0.353, "lr": 0.008340961745955121, "epoch": 1.1438128160809167, "percentage": 89.34, "elapsed_time": "1 day, 12:22:30", "remaining_time": "4:20:29", "throughput": 415.33, "total_tokens": 54387376}
7326
+ {"current_steps": 35740, "total_steps": 40000, "loss": 0.2883, "lr": 0.008321603737449224, "epoch": 1.1439728570514052, "percentage": 89.35, "elapsed_time": "1 day, 12:22:32", "remaining_time": "4:20:08", "throughput": 415.38, "total_tokens": 54394864}
7327
+ {"current_steps": 35745, "total_steps": 40000, "loss": 0.4051, "lr": 0.008302267577532479, "epoch": 1.1441328980218937, "percentage": 89.36, "elapsed_time": "1 day, 12:22:34", "remaining_time": "4:19:48", "throughput": 415.43, "total_tokens": 54402560}
7328
+ {"current_steps": 35750, "total_steps": 40000, "loss": 0.2504, "lr": 0.008282953269186771, "epoch": 1.1442929389923822, "percentage": 89.38, "elapsed_time": "1 day, 12:22:36", "remaining_time": "4:19:28", "throughput": 415.48, "total_tokens": 54409984}
7329
+ {"current_steps": 35755, "total_steps": 40000, "loss": 0.2501, "lr": 0.008263660815390567, "epoch": 1.1444529799628704, "percentage": 89.39, "elapsed_time": "1 day, 12:22:39", "remaining_time": "4:19:08", "throughput": 415.54, "total_tokens": 54418224}
7330
+ {"current_steps": 35760, "total_steps": 40000, "loss": 0.2212, "lr": 0.008244390219119069, "epoch": 1.144613020933359, "percentage": 89.4, "elapsed_time": "1 day, 12:22:41", "remaining_time": "4:18:47", "throughput": 415.59, "total_tokens": 54425712}
7331
+ {"current_steps": 35765, "total_steps": 40000, "loss": 0.2762, "lr": 0.008225141483343967, "epoch": 1.1447730619038474, "percentage": 89.41, "elapsed_time": "1 day, 12:22:43", "remaining_time": "4:18:27", "throughput": 415.64, "total_tokens": 54433296}
7332
+ {"current_steps": 35770, "total_steps": 40000, "loss": 0.2366, "lr": 0.00820591461103372, "epoch": 1.144933102874336, "percentage": 89.42, "elapsed_time": "1 day, 12:22:45", "remaining_time": "4:18:07", "throughput": 415.69, "total_tokens": 54440624}
7333
+ {"current_steps": 35775, "total_steps": 40000, "loss": 0.4345, "lr": 0.008186709605153358, "epoch": 1.1450931438448242, "percentage": 89.44, "elapsed_time": "1 day, 12:22:47", "remaining_time": "4:17:47", "throughput": 415.74, "total_tokens": 54448496}
7334
+ {"current_steps": 35780, "total_steps": 40000, "loss": 0.2489, "lr": 0.008167526468664492, "epoch": 1.1452531848153127, "percentage": 89.45, "elapsed_time": "1 day, 12:22:49", "remaining_time": "4:17:26", "throughput": 415.79, "total_tokens": 54456400}
7335
+ {"current_steps": 35785, "total_steps": 40000, "loss": 0.1987, "lr": 0.008148365204525443, "epoch": 1.1454132257858012, "percentage": 89.46, "elapsed_time": "1 day, 12:22:51", "remaining_time": "4:17:06", "throughput": 415.85, "total_tokens": 54464032}
7336
+ {"current_steps": 35790, "total_steps": 40000, "loss": 0.2949, "lr": 0.00812922581569106, "epoch": 1.1455732667562897, "percentage": 89.48, "elapsed_time": "1 day, 12:22:53", "remaining_time": "4:16:46", "throughput": 415.9, "total_tokens": 54471888}
7337
+ {"current_steps": 35795, "total_steps": 40000, "loss": 0.2004, "lr": 0.008110108305112934, "epoch": 1.1457333077267782, "percentage": 89.49, "elapsed_time": "1 day, 12:22:55", "remaining_time": "4:16:26", "throughput": 415.95, "total_tokens": 54479600}
7338
+ {"current_steps": 35800, "total_steps": 40000, "loss": 0.2555, "lr": 0.008091012675739223, "epoch": 1.1458933486972664, "percentage": 89.5, "elapsed_time": "1 day, 12:22:57", "remaining_time": "4:16:06", "throughput": 416.0, "total_tokens": 54486752}
7339
+ {"current_steps": 35800, "total_steps": 40000, "eval_loss": 0.274140864610672, "epoch": 1.1458933486972664, "percentage": 89.5, "elapsed_time": "1 day, 12:33:44", "remaining_time": "4:17:22", "throughput": 413.95, "total_tokens": 54486752}