bart-base-bodo / trainer_state.json
Mwnthai's picture
End of training
1933e8a verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"eval_steps": 5000.0,
"global_step": 29652,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.050586806960744635,
"grad_norm": 3.026057720184326,
"learning_rate": 4.9156886550654254e-05,
"loss": 7.7479,
"step": 500
},
{
"epoch": 0.10117361392148927,
"grad_norm": 2.428532361984253,
"learning_rate": 4.831377310130851e-05,
"loss": 7.602,
"step": 1000
},
{
"epoch": 0.1517604208822339,
"grad_norm": 3.12160587310791,
"learning_rate": 4.7470659651962764e-05,
"loss": 7.5609,
"step": 1500
},
{
"epoch": 0.20234722784297854,
"grad_norm": 2.7097604274749756,
"learning_rate": 4.662754620261703e-05,
"loss": 7.5615,
"step": 2000
},
{
"epoch": 0.2529340348037232,
"grad_norm": 2.684849500656128,
"learning_rate": 4.578443275327128e-05,
"loss": 7.5591,
"step": 2500
},
{
"epoch": 0.3035208417644678,
"grad_norm": 2.7275538444519043,
"learning_rate": 4.494131930392554e-05,
"loss": 7.5242,
"step": 3000
},
{
"epoch": 0.35410764872521244,
"grad_norm": 3.8471314907073975,
"learning_rate": 4.409820585457979e-05,
"loss": 7.5216,
"step": 3500
},
{
"epoch": 0.4046944556859571,
"grad_norm": 3.268939971923828,
"learning_rate": 4.325509240523405e-05,
"loss": 7.5121,
"step": 4000
},
{
"epoch": 0.45528126264670177,
"grad_norm": 4.609728813171387,
"learning_rate": 4.24119789558883e-05,
"loss": 7.4998,
"step": 4500
},
{
"epoch": 0.5058680696074463,
"grad_norm": 3.0856144428253174,
"learning_rate": 4.156886550654257e-05,
"loss": 7.4945,
"step": 5000
},
{
"epoch": 0.556454876568191,
"grad_norm": 3.188549757003784,
"learning_rate": 4.072575205719682e-05,
"loss": 7.4944,
"step": 5500
},
{
"epoch": 0.6070416835289356,
"grad_norm": 3.380218982696533,
"learning_rate": 3.988263860785108e-05,
"loss": 7.5203,
"step": 6000
},
{
"epoch": 0.6576284904896803,
"grad_norm": 3.2636780738830566,
"learning_rate": 3.903952515850533e-05,
"loss": 7.4899,
"step": 6500
},
{
"epoch": 0.7082152974504249,
"grad_norm": 3.4519433975219727,
"learning_rate": 3.819641170915959e-05,
"loss": 7.4763,
"step": 7000
},
{
"epoch": 0.7588021044111696,
"grad_norm": 3.5375866889953613,
"learning_rate": 3.735329825981384e-05,
"loss": 7.4911,
"step": 7500
},
{
"epoch": 0.8093889113719142,
"grad_norm": 3.090251922607422,
"learning_rate": 3.65101848104681e-05,
"loss": 7.4606,
"step": 8000
},
{
"epoch": 0.8599757183326588,
"grad_norm": 2.6117258071899414,
"learning_rate": 3.5667071361122356e-05,
"loss": 7.4925,
"step": 8500
},
{
"epoch": 0.9105625252934035,
"grad_norm": 3.3298494815826416,
"learning_rate": 3.482395791177661e-05,
"loss": 7.4626,
"step": 9000
},
{
"epoch": 0.9611493322541481,
"grad_norm": 3.267622232437134,
"learning_rate": 3.398084446243087e-05,
"loss": 7.4583,
"step": 9500
},
{
"epoch": 1.0117361392148927,
"grad_norm": 3.9384641647338867,
"learning_rate": 3.313773101308512e-05,
"loss": 7.471,
"step": 10000
},
{
"epoch": 1.0623229461756374,
"grad_norm": 3.3493237495422363,
"learning_rate": 3.229461756373938e-05,
"loss": 7.4833,
"step": 10500
},
{
"epoch": 1.112909753136382,
"grad_norm": 4.062368869781494,
"learning_rate": 3.145150411439363e-05,
"loss": 7.4637,
"step": 11000
},
{
"epoch": 1.1634965600971268,
"grad_norm": 3.0285532474517822,
"learning_rate": 3.0608390665047894e-05,
"loss": 7.4524,
"step": 11500
},
{
"epoch": 1.2140833670578712,
"grad_norm": 3.087231159210205,
"learning_rate": 2.976527721570215e-05,
"loss": 7.4797,
"step": 12000
},
{
"epoch": 1.264670174018616,
"grad_norm": 2.7964272499084473,
"learning_rate": 2.8922163766356404e-05,
"loss": 7.4641,
"step": 12500
},
{
"epoch": 1.3152569809793606,
"grad_norm": 3.68481183052063,
"learning_rate": 2.807905031701066e-05,
"loss": 7.452,
"step": 13000
},
{
"epoch": 1.3658437879401053,
"grad_norm": 3.3050220012664795,
"learning_rate": 2.7235936867664915e-05,
"loss": 7.4593,
"step": 13500
},
{
"epoch": 1.41643059490085,
"grad_norm": 2.939967155456543,
"learning_rate": 2.639282341831917e-05,
"loss": 7.4565,
"step": 14000
},
{
"epoch": 1.4670174018615945,
"grad_norm": 2.9299378395080566,
"learning_rate": 2.5549709968973428e-05,
"loss": 7.4769,
"step": 14500
},
{
"epoch": 1.5176042088223392,
"grad_norm": 4.469327449798584,
"learning_rate": 2.470659651962768e-05,
"loss": 7.4498,
"step": 15000
},
{
"epoch": 1.5681910157830838,
"grad_norm": 3.3183658123016357,
"learning_rate": 2.386348307028194e-05,
"loss": 7.4527,
"step": 15500
},
{
"epoch": 1.6187778227438283,
"grad_norm": 3.6595232486724854,
"learning_rate": 2.3020369620936194e-05,
"loss": 7.4346,
"step": 16000
},
{
"epoch": 1.669364629704573,
"grad_norm": 3.1423637866973877,
"learning_rate": 2.217725617159045e-05,
"loss": 7.4697,
"step": 16500
},
{
"epoch": 1.7199514366653177,
"grad_norm": 2.9798882007598877,
"learning_rate": 2.1334142722244707e-05,
"loss": 7.4587,
"step": 17000
},
{
"epoch": 1.7705382436260622,
"grad_norm": 3.496962547302246,
"learning_rate": 2.0491029272898962e-05,
"loss": 7.4842,
"step": 17500
},
{
"epoch": 1.821125050586807,
"grad_norm": 3.2860915660858154,
"learning_rate": 1.9647915823553217e-05,
"loss": 7.4505,
"step": 18000
},
{
"epoch": 1.8717118575475515,
"grad_norm": 3.7444324493408203,
"learning_rate": 1.8804802374207476e-05,
"loss": 7.467,
"step": 18500
},
{
"epoch": 1.9222986645082962,
"grad_norm": 12.527898788452148,
"learning_rate": 1.796168892486173e-05,
"loss": 7.4629,
"step": 19000
},
{
"epoch": 1.972885471469041,
"grad_norm": 3.0026357173919678,
"learning_rate": 1.7118575475515986e-05,
"loss": 7.4508,
"step": 19500
},
{
"epoch": 2.0234722784297854,
"grad_norm": 3.080428123474121,
"learning_rate": 1.6275462026170245e-05,
"loss": 7.4562,
"step": 20000
},
{
"epoch": 2.0740590853905303,
"grad_norm": 3.0115535259246826,
"learning_rate": 1.54323485768245e-05,
"loss": 7.4614,
"step": 20500
},
{
"epoch": 2.1246458923512748,
"grad_norm": 3.0066559314727783,
"learning_rate": 1.4589235127478753e-05,
"loss": 7.4522,
"step": 21000
},
{
"epoch": 2.1752326993120192,
"grad_norm": 3.1596481800079346,
"learning_rate": 1.3746121678133012e-05,
"loss": 7.4373,
"step": 21500
},
{
"epoch": 2.225819506272764,
"grad_norm": 3.6992828845977783,
"learning_rate": 1.2903008228787267e-05,
"loss": 7.4649,
"step": 22000
},
{
"epoch": 2.2764063132335086,
"grad_norm": 2.809210777282715,
"learning_rate": 1.2059894779441522e-05,
"loss": 7.4695,
"step": 22500
},
{
"epoch": 2.3269931201942535,
"grad_norm": 3.017029047012329,
"learning_rate": 1.1216781330095779e-05,
"loss": 7.439,
"step": 23000
},
{
"epoch": 2.377579927154998,
"grad_norm": 3.747455596923828,
"learning_rate": 1.0373667880750036e-05,
"loss": 7.4468,
"step": 23500
},
{
"epoch": 2.4281667341157425,
"grad_norm": 2.9909703731536865,
"learning_rate": 9.53055443140429e-06,
"loss": 7.4568,
"step": 24000
},
{
"epoch": 2.4787535410764874,
"grad_norm": 2.6719205379486084,
"learning_rate": 8.687440982058546e-06,
"loss": 7.4599,
"step": 24500
},
{
"epoch": 2.529340348037232,
"grad_norm": 3.0623087882995605,
"learning_rate": 7.844327532712801e-06,
"loss": 7.4613,
"step": 25000
},
{
"epoch": 2.5799271549979768,
"grad_norm": 3.4007935523986816,
"learning_rate": 7.001214083367058e-06,
"loss": 7.4496,
"step": 25500
},
{
"epoch": 2.6305139619587212,
"grad_norm": 3.7014873027801514,
"learning_rate": 6.158100634021314e-06,
"loss": 7.4569,
"step": 26000
},
{
"epoch": 2.6811007689194657,
"grad_norm": 2.988811492919922,
"learning_rate": 5.314987184675571e-06,
"loss": 7.4381,
"step": 26500
},
{
"epoch": 2.7316875758802106,
"grad_norm": 4.864758014678955,
"learning_rate": 4.471873735329827e-06,
"loss": 7.4846,
"step": 27000
},
{
"epoch": 2.782274382840955,
"grad_norm": 2.810575246810913,
"learning_rate": 3.628760285984082e-06,
"loss": 7.4511,
"step": 27500
},
{
"epoch": 2.8328611898017,
"grad_norm": 3.2787814140319824,
"learning_rate": 2.785646836638338e-06,
"loss": 7.4568,
"step": 28000
},
{
"epoch": 2.8834479967624445,
"grad_norm": 3.12109637260437,
"learning_rate": 1.942533387292594e-06,
"loss": 7.4372,
"step": 28500
},
{
"epoch": 2.934034803723189,
"grad_norm": 3.1420302391052246,
"learning_rate": 1.0994199379468503e-06,
"loss": 7.4484,
"step": 29000
},
{
"epoch": 2.9846216106839334,
"grad_norm": 3.052818775177002,
"learning_rate": 2.5630648860110616e-07,
"loss": 7.461,
"step": 29500
},
{
"epoch": 3.0,
"step": 29652,
"total_flos": 1.2851147000984371e+17,
"train_loss": 7.479052871051595,
"train_runtime": 34661.0306,
"train_samples_per_second": 1.711,
"train_steps_per_second": 0.855
}
],
"logging_steps": 500,
"max_steps": 29652,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 5000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.2851147000984371e+17,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}