rbelanec commited on
Commit
75544d7
·
verified ·
1 Parent(s): d89283a

Training in progress, step 39800

Browse files
Files changed (1) hide show
  1. trainer_log.jsonl +41 -0
trainer_log.jsonl CHANGED
@@ -8116,3 +8116,44 @@
8116
  {"current_steps": 39595, "total_steps": 40000, "loss": 16.4221, "lr": 1.2708814586862016e-08, "epoch": 1.2673644452979964, "percentage": 98.99, "elapsed_time": "20:52:19", "remaining_time": "0:12:48", "throughput": 724.63, "total_tokens": 54447872}
8117
  {"current_steps": 39600, "total_steps": 40000, "loss": 13.2421, "lr": 1.2397742806111168e-08, "epoch": 1.2675244862684847, "percentage": 99.0, "elapsed_time": "20:52:20", "remaining_time": "0:12:38", "throughput": 724.7, "total_tokens": 54454880}
8118
  {"current_steps": 39600, "total_steps": 40000, "eval_loss": 15.15965461730957, "epoch": 1.2675244862684847, "percentage": 99.0, "elapsed_time": "20:57:35", "remaining_time": "0:12:42", "throughput": 721.68, "total_tokens": 54454880}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8116
  {"current_steps": 39595, "total_steps": 40000, "loss": 16.4221, "lr": 1.2708814586862016e-08, "epoch": 1.2673644452979964, "percentage": 98.99, "elapsed_time": "20:52:19", "remaining_time": "0:12:48", "throughput": 724.63, "total_tokens": 54447872}
8117
  {"current_steps": 39600, "total_steps": 40000, "loss": 13.2421, "lr": 1.2397742806111168e-08, "epoch": 1.2675244862684847, "percentage": 99.0, "elapsed_time": "20:52:20", "remaining_time": "0:12:38", "throughput": 724.7, "total_tokens": 54454880}
8118
  {"current_steps": 39600, "total_steps": 40000, "eval_loss": 15.15965461730957, "epoch": 1.2675244862684847, "percentage": 99.0, "elapsed_time": "20:57:35", "remaining_time": "0:12:42", "throughput": 721.68, "total_tokens": 54454880}
8119
+ {"current_steps": 39605, "total_steps": 40000, "loss": 13.242, "lr": 1.209052442764369e-08, "epoch": 1.2676845272389732, "percentage": 99.01, "elapsed_time": "20:57:38", "remaining_time": "0:12:32", "throughput": 721.75, "total_tokens": 54462032}
8120
+ {"current_steps": 39610, "total_steps": 40000, "loss": 14.6204, "lr": 1.17871594988328e-08, "epoch": 1.2678445682094617, "percentage": 99.02, "elapsed_time": "20:57:40", "remaining_time": "0:12:22", "throughput": 721.82, "total_tokens": 54468704}
8121
+ {"current_steps": 39615, "total_steps": 40000, "loss": 14.8327, "lr": 1.1487648066466072e-08, "epoch": 1.26800460917995, "percentage": 99.04, "elapsed_time": "20:57:41", "remaining_time": "0:12:13", "throughput": 721.89, "total_tokens": 54475296}
8122
+ {"current_steps": 39620, "total_steps": 40000, "loss": 17.1487, "lr": 1.1191990176728784e-08, "epoch": 1.2681646501504384, "percentage": 99.05, "elapsed_time": "20:57:43", "remaining_time": "0:12:03", "throughput": 721.96, "total_tokens": 54481920}
8123
+ {"current_steps": 39625, "total_steps": 40000, "loss": 12.6386, "lr": 1.0900185875215018e-08, "epoch": 1.268324691120927, "percentage": 99.06, "elapsed_time": "20:57:45", "remaining_time": "0:11:54", "throughput": 722.04, "total_tokens": 54488688}
8124
+ {"current_steps": 39630, "total_steps": 40000, "loss": 13.3821, "lr": 1.0612235206924891e-08, "epoch": 1.2684847320914154, "percentage": 99.08, "elapsed_time": "20:57:46", "remaining_time": "0:11:44", "throughput": 722.11, "total_tokens": 54495296}
8125
+ {"current_steps": 39635, "total_steps": 40000, "loss": 14.9168, "lr": 1.0328138216264549e-08, "epoch": 1.268644773061904, "percentage": 99.09, "elapsed_time": "20:57:48", "remaining_time": "0:11:34", "throughput": 722.19, "total_tokens": 54502112}
8126
+ {"current_steps": 39640, "total_steps": 40000, "loss": 15.2773, "lr": 1.004789494704339e-08, "epoch": 1.2688048140323924, "percentage": 99.1, "elapsed_time": "20:57:49", "remaining_time": "0:11:25", "throughput": 722.26, "total_tokens": 54508928}
8127
+ {"current_steps": 39645, "total_steps": 40000, "loss": 15.3889, "lr": 9.771505442482397e-09, "epoch": 1.2689648550028807, "percentage": 99.11, "elapsed_time": "20:57:51", "remaining_time": "0:11:15", "throughput": 722.33, "total_tokens": 54515536}
8128
+ {"current_steps": 39650, "total_steps": 40000, "loss": 14.4326, "lr": 9.498969745200259e-09, "epoch": 1.2691248959733692, "percentage": 99.12, "elapsed_time": "20:57:53", "remaining_time": "0:11:06", "throughput": 722.41, "total_tokens": 54522912}
8129
+ {"current_steps": 39655, "total_steps": 40000, "loss": 14.5261, "lr": 9.230287897230017e-09, "epoch": 1.2692849369438577, "percentage": 99.14, "elapsed_time": "20:57:54", "remaining_time": "0:10:56", "throughput": 722.49, "total_tokens": 54529600}
8130
+ {"current_steps": 39660, "total_steps": 40000, "loss": 15.9249, "lr": 8.965459940002419e-09, "epoch": 1.269444977914346, "percentage": 99.15, "elapsed_time": "20:57:56", "remaining_time": "0:10:47", "throughput": 722.56, "total_tokens": 54536256}
8131
+ {"current_steps": 39665, "total_steps": 40000, "loss": 16.1217, "lr": 8.704485914357019e-09, "epoch": 1.2696050188848345, "percentage": 99.16, "elapsed_time": "20:57:58", "remaining_time": "0:10:37", "throughput": 722.63, "total_tokens": 54543056}
8132
+ {"current_steps": 39670, "total_steps": 40000, "loss": 14.2989, "lr": 8.447365860539402e-09, "epoch": 1.269765059855323, "percentage": 99.17, "elapsed_time": "20:57:59", "remaining_time": "0:10:27", "throughput": 722.71, "total_tokens": 54550016}
8133
+ {"current_steps": 39675, "total_steps": 40000, "loss": 15.71, "lr": 8.194099818201184e-09, "epoch": 1.2699251008258114, "percentage": 99.19, "elapsed_time": "20:58:01", "remaining_time": "0:10:18", "throughput": 722.79, "total_tokens": 54556960}
8134
+ {"current_steps": 39680, "total_steps": 40000, "loss": 16.3551, "lr": 7.944687826400011e-09, "epoch": 1.2700851417963, "percentage": 99.2, "elapsed_time": "20:58:02", "remaining_time": "0:10:08", "throughput": 722.86, "total_tokens": 54563760}
8135
+ {"current_steps": 39685, "total_steps": 40000, "loss": 15.8843, "lr": 7.699129923599557e-09, "epoch": 1.2702451827667882, "percentage": 99.21, "elapsed_time": "20:58:04", "remaining_time": "0:09:59", "throughput": 722.94, "total_tokens": 54570768}
8136
+ {"current_steps": 39690, "total_steps": 40000, "loss": 14.8852, "lr": 7.457426147663982e-09, "epoch": 1.2704052237372767, "percentage": 99.22, "elapsed_time": "20:58:06", "remaining_time": "0:09:49", "throughput": 723.02, "total_tokens": 54577712}
8137
+ {"current_steps": 39695, "total_steps": 40000, "loss": 15.4928, "lr": 7.219576535871797e-09, "epoch": 1.2705652647077652, "percentage": 99.24, "elapsed_time": "20:58:07", "remaining_time": "0:09:40", "throughput": 723.09, "total_tokens": 54584176}
8138
+ {"current_steps": 39700, "total_steps": 40000, "loss": 14.1627, "lr": 6.985581124896445e-09, "epoch": 1.2707253056782537, "percentage": 99.25, "elapsed_time": "20:58:09", "remaining_time": "0:09:30", "throughput": 723.15, "total_tokens": 54590512}
8139
+ {"current_steps": 39705, "total_steps": 40000, "loss": 16.9927, "lr": 6.755439950828501e-09, "epoch": 1.270885346648742, "percentage": 99.26, "elapsed_time": "20:58:11", "remaining_time": "0:09:20", "throughput": 723.23, "total_tokens": 54597568}
8140
+ {"current_steps": 39710, "total_steps": 40000, "loss": 15.262, "lr": 6.5291530491562444e-09, "epoch": 1.2710453876192305, "percentage": 99.28, "elapsed_time": "20:58:12", "remaining_time": "0:09:11", "throughput": 723.31, "total_tokens": 54604624}
8141
+ {"current_steps": 39715, "total_steps": 40000, "loss": 15.3962, "lr": 6.3067204547739845e-09, "epoch": 1.271205428589719, "percentage": 99.29, "elapsed_time": "20:58:14", "remaining_time": "0:09:01", "throughput": 723.38, "total_tokens": 54611008}
8142
+ {"current_steps": 39720, "total_steps": 40000, "loss": 14.1461, "lr": 6.088142201987612e-09, "epoch": 1.2713654695602075, "percentage": 99.3, "elapsed_time": "20:58:15", "remaining_time": "0:08:52", "throughput": 723.46, "total_tokens": 54618160}
8143
+ {"current_steps": 39725, "total_steps": 40000, "loss": 14.6598, "lr": 5.873418324503499e-09, "epoch": 1.271525510530696, "percentage": 99.31, "elapsed_time": "20:58:17", "remaining_time": "0:08:42", "throughput": 723.53, "total_tokens": 54624560}
8144
+ {"current_steps": 39730, "total_steps": 40000, "loss": 14.3638, "lr": 5.6625488554340465e-09, "epoch": 1.2716855515011842, "percentage": 99.33, "elapsed_time": "20:58:19", "remaining_time": "0:08:33", "throughput": 723.6, "total_tokens": 54630960}
8145
+ {"current_steps": 39735, "total_steps": 40000, "loss": 16.0639, "lr": 5.455533827297688e-09, "epoch": 1.2718455924716727, "percentage": 99.34, "elapsed_time": "20:58:20", "remaining_time": "0:08:23", "throughput": 723.67, "total_tokens": 54637952}
8146
+ {"current_steps": 39740, "total_steps": 40000, "loss": 17.5274, "lr": 5.252373272018885e-09, "epoch": 1.2720056334421612, "percentage": 99.35, "elapsed_time": "20:58:22", "remaining_time": "0:08:13", "throughput": 723.75, "total_tokens": 54644960}
8147
+ {"current_steps": 39745, "total_steps": 40000, "loss": 13.8967, "lr": 5.053067220925356e-09, "epoch": 1.2721656744126497, "percentage": 99.36, "elapsed_time": "20:58:24", "remaining_time": "0:08:04", "throughput": 723.83, "total_tokens": 54651872}
8148
+ {"current_steps": 39750, "total_steps": 40000, "loss": 15.7298, "lr": 4.857615704759177e-09, "epoch": 1.272325715383138, "percentage": 99.38, "elapsed_time": "20:58:25", "remaining_time": "0:07:54", "throughput": 723.9, "total_tokens": 54658704}
8149
+ {"current_steps": 39755, "total_steps": 40000, "loss": 13.5589, "lr": 4.666018753654577e-09, "epoch": 1.2724857563536265, "percentage": 99.39, "elapsed_time": "20:58:27", "remaining_time": "0:07:45", "throughput": 723.98, "total_tokens": 54665632}
8150
+ {"current_steps": 39760, "total_steps": 40000, "loss": 15.4519, "lr": 4.478276397162917e-09, "epoch": 1.272645797324115, "percentage": 99.4, "elapsed_time": "20:58:28", "remaining_time": "0:07:35", "throughput": 724.05, "total_tokens": 54672432}
8151
+ {"current_steps": 39765, "total_steps": 40000, "loss": 15.04, "lr": 4.294388664233262e-09, "epoch": 1.2728058382946035, "percentage": 99.41, "elapsed_time": "20:58:30", "remaining_time": "0:07:26", "throughput": 724.13, "total_tokens": 54679136}
8152
+ {"current_steps": 39770, "total_steps": 40000, "loss": 15.811, "lr": 4.114355583223484e-09, "epoch": 1.272965879265092, "percentage": 99.42, "elapsed_time": "20:58:32", "remaining_time": "0:07:16", "throughput": 724.19, "total_tokens": 54685456}
8153
+ {"current_steps": 39775, "total_steps": 40000, "loss": 14.494, "lr": 3.9381771818974845e-09, "epoch": 1.2731259202355802, "percentage": 99.44, "elapsed_time": "20:58:33", "remaining_time": "0:07:07", "throughput": 724.26, "total_tokens": 54691936}
8154
+ {"current_steps": 39780, "total_steps": 40000, "loss": 14.6975, "lr": 3.765853487427973e-09, "epoch": 1.2732859612060687, "percentage": 99.45, "elapsed_time": "20:58:35", "remaining_time": "0:06:57", "throughput": 724.34, "total_tokens": 54698944}
8155
+ {"current_steps": 39785, "total_steps": 40000, "loss": 13.1805, "lr": 3.5973845263825857e-09, "epoch": 1.2734460021765572, "percentage": 99.46, "elapsed_time": "20:58:36", "remaining_time": "0:06:48", "throughput": 724.42, "total_tokens": 54706176}
8156
+ {"current_steps": 39790, "total_steps": 40000, "loss": 14.3417, "lr": 3.4327703247488684e-09, "epoch": 1.2736060431470455, "percentage": 99.48, "elapsed_time": "20:58:38", "remaining_time": "0:06:38", "throughput": 724.5, "total_tokens": 54713360}
8157
+ {"current_steps": 39795, "total_steps": 40000, "loss": 15.5161, "lr": 3.2720109079037443e-09, "epoch": 1.273766084117534, "percentage": 99.49, "elapsed_time": "20:58:40", "remaining_time": "0:06:29", "throughput": 724.58, "total_tokens": 54720384}
8158
+ {"current_steps": 39800, "total_steps": 40000, "loss": 14.5777, "lr": 3.1151063006468193e-09, "epoch": 1.2739261250880225, "percentage": 99.5, "elapsed_time": "20:58:41", "remaining_time": "0:06:19", "throughput": 724.66, "total_tokens": 54727600}
8159
+ {"current_steps": 39800, "total_steps": 40000, "eval_loss": 15.15965461730957, "epoch": 1.2739261250880225, "percentage": 99.5, "elapsed_time": "21:03:56", "remaining_time": "0:06:21", "throughput": 721.65, "total_tokens": 54727600}