SWIN_Gaudi_100 / trainer_state.json
faridkarimli's picture
End of training
eb3b9d0 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 100.0,
"eval_steps": 500,
"global_step": 131300,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.38080731150038083,
"grad_norm": 1.5335801839828491,
"learning_rate": 0.000996191926884996,
"loss": 6.3554,
"max_memory_allocated (GB)": 4.18,
"memory_allocated (GB)": 2.46,
"step": 500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.7616146230007617,
"grad_norm": 1.3639589548110962,
"learning_rate": 0.0009923838537699923,
"loss": 4.0897,
"max_memory_allocated (GB)": 4.18,
"memory_allocated (GB)": 2.46,
"step": 1000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.0,
"eval_accuracy": 0.0689470393855497,
"eval_loss": 6.560576438903809,
"eval_runtime": 321.5446,
"eval_samples_per_second": 522.338,
"eval_steps_per_second": 1.023,
"max_memory_allocated (GB)": 21.24,
"memory_allocated (GB)": 2.46,
"step": 1313,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.1424219345011424,
"grad_norm": 1.2049754858016968,
"learning_rate": 0.0009885757806549886,
"loss": 3.3285,
"max_memory_allocated (GB)": 21.24,
"memory_allocated (GB)": 2.46,
"step": 1500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.5232292460015233,
"grad_norm": 1.1118249893188477,
"learning_rate": 0.0009847677075399848,
"loss": 2.9296,
"max_memory_allocated (GB)": 21.24,
"memory_allocated (GB)": 2.46,
"step": 2000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.904036557501904,
"grad_norm": 1.0413897037506104,
"learning_rate": 0.0009809596344249809,
"loss": 2.8297,
"max_memory_allocated (GB)": 21.24,
"memory_allocated (GB)": 2.46,
"step": 2500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 2.0,
"eval_accuracy": 0.0806466017683308,
"eval_loss": 6.380691051483154,
"eval_runtime": 296.9621,
"eval_samples_per_second": 565.577,
"eval_steps_per_second": 1.108,
"max_memory_allocated (GB)": 21.25,
"memory_allocated (GB)": 2.46,
"step": 2626,
"total_memory_available (GB)": 94.62
},
{
"epoch": 2.2848438690022848,
"grad_norm": 1.0910780429840088,
"learning_rate": 0.0009771515613099771,
"loss": 2.4255,
"max_memory_allocated (GB)": 21.25,
"memory_allocated (GB)": 2.46,
"step": 3000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 2.6656511805026657,
"grad_norm": 0.9615127444267273,
"learning_rate": 0.0009733434881949733,
"loss": 2.3951,
"max_memory_allocated (GB)": 21.25,
"memory_allocated (GB)": 2.46,
"step": 3500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 3.0,
"eval_accuracy": 0.08840463219314697,
"eval_loss": 6.42078161239624,
"eval_runtime": 295.0967,
"eval_samples_per_second": 569.152,
"eval_steps_per_second": 1.115,
"max_memory_allocated (GB)": 21.25,
"memory_allocated (GB)": 2.46,
"step": 3939,
"total_memory_available (GB)": 94.62
},
{
"epoch": 3.0464584920030466,
"grad_norm": 0.9677470326423645,
"learning_rate": 0.0009695354150799695,
"loss": 2.3282,
"max_memory_allocated (GB)": 21.25,
"memory_allocated (GB)": 2.46,
"step": 4000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 3.427265803503427,
"grad_norm": 0.924505889415741,
"learning_rate": 0.0009657273419649657,
"loss": 2.034,
"max_memory_allocated (GB)": 21.25,
"memory_allocated (GB)": 2.46,
"step": 4500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 3.808073115003808,
"grad_norm": 0.943131685256958,
"learning_rate": 0.0009619192688499619,
"loss": 2.1003,
"max_memory_allocated (GB)": 21.25,
"memory_allocated (GB)": 2.46,
"step": 5000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 4.0,
"eval_accuracy": 0.10144979309934209,
"eval_loss": 6.180690765380859,
"eval_runtime": 294.8382,
"eval_samples_per_second": 569.651,
"eval_steps_per_second": 1.116,
"max_memory_allocated (GB)": 21.25,
"memory_allocated (GB)": 2.46,
"step": 5252,
"total_memory_available (GB)": 94.62
},
{
"epoch": 4.188880426504189,
"grad_norm": 0.9539695978164673,
"learning_rate": 0.0009581111957349581,
"loss": 1.9654,
"max_memory_allocated (GB)": 21.25,
"memory_allocated (GB)": 2.46,
"step": 5500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 4.5696877380045695,
"grad_norm": 0.958723247051239,
"learning_rate": 0.0009543031226199543,
"loss": 1.8655,
"max_memory_allocated (GB)": 21.25,
"memory_allocated (GB)": 2.46,
"step": 6000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 4.9504950495049505,
"grad_norm": 0.9000511765480042,
"learning_rate": 0.0009504950495049505,
"loss": 1.93,
"max_memory_allocated (GB)": 21.25,
"memory_allocated (GB)": 2.46,
"step": 6500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 5.0,
"eval_accuracy": 0.10267631210740973,
"eval_loss": 6.32958459854126,
"eval_runtime": 293.9778,
"eval_samples_per_second": 571.319,
"eval_steps_per_second": 1.119,
"max_memory_allocated (GB)": 21.25,
"memory_allocated (GB)": 2.46,
"step": 6565,
"total_memory_available (GB)": 94.62
},
{
"epoch": 5.331302361005331,
"grad_norm": 0.9381927251815796,
"learning_rate": 0.0009466869763899467,
"loss": 1.6809,
"max_memory_allocated (GB)": 21.25,
"memory_allocated (GB)": 2.46,
"step": 7000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 5.712109672505712,
"grad_norm": 0.9558943510055542,
"learning_rate": 0.0009428789032749429,
"loss": 1.7486,
"max_memory_allocated (GB)": 21.25,
"memory_allocated (GB)": 2.46,
"step": 7500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 6.0,
"eval_accuracy": 0.10908279003304457,
"eval_loss": 6.237691402435303,
"eval_runtime": 294.7934,
"eval_samples_per_second": 569.738,
"eval_steps_per_second": 1.116,
"max_memory_allocated (GB)": 21.25,
"memory_allocated (GB)": 2.46,
"step": 7878,
"total_memory_available (GB)": 94.62
},
{
"epoch": 6.092916984006093,
"grad_norm": 0.8602584004402161,
"learning_rate": 0.0009390708301599391,
"loss": 1.719,
"max_memory_allocated (GB)": 21.25,
"memory_allocated (GB)": 2.46,
"step": 8000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 6.473724295506473,
"grad_norm": 0.8856430649757385,
"learning_rate": 0.0009352627570449353,
"loss": 1.5757,
"max_memory_allocated (GB)": 21.25,
"memory_allocated (GB)": 2.46,
"step": 8500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 6.854531607006854,
"grad_norm": 0.8462656140327454,
"learning_rate": 0.0009314546839299315,
"loss": 1.6498,
"max_memory_allocated (GB)": 21.25,
"memory_allocated (GB)": 2.46,
"step": 9000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 7.0,
"eval_accuracy": 0.10559971420916317,
"eval_loss": 6.480878829956055,
"eval_runtime": 293.5226,
"eval_samples_per_second": 572.205,
"eval_steps_per_second": 1.121,
"max_memory_allocated (GB)": 21.25,
"memory_allocated (GB)": 2.46,
"step": 9191,
"total_memory_available (GB)": 94.62
},
{
"epoch": 7.235338918507235,
"grad_norm": 0.8403063416481018,
"learning_rate": 0.0009276466108149277,
"loss": 1.5278,
"max_memory_allocated (GB)": 21.25,
"memory_allocated (GB)": 2.46,
"step": 9500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 7.616146230007616,
"grad_norm": 0.8911013007164001,
"learning_rate": 0.0009238385376999238,
"loss": 1.5124,
"max_memory_allocated (GB)": 21.25,
"memory_allocated (GB)": 2.46,
"step": 10000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 7.996953541507997,
"grad_norm": 0.8742530941963196,
"learning_rate": 0.00092003046458492,
"loss": 1.5722,
"max_memory_allocated (GB)": 21.25,
"memory_allocated (GB)": 2.46,
"step": 10500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 8.0,
"eval_accuracy": 0.11500104194575927,
"eval_loss": 6.268129348754883,
"eval_runtime": 294.6985,
"eval_samples_per_second": 569.921,
"eval_steps_per_second": 1.116,
"max_memory_allocated (GB)": 21.25,
"memory_allocated (GB)": 2.46,
"step": 10504,
"total_memory_available (GB)": 94.62
},
{
"epoch": 8.377760853008377,
"grad_norm": 0.8218346834182739,
"learning_rate": 0.0009162223914699162,
"loss": 1.3603,
"max_memory_allocated (GB)": 21.25,
"memory_allocated (GB)": 2.46,
"step": 11000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 8.758568164508759,
"grad_norm": 0.8631531596183777,
"learning_rate": 0.0009124143183549124,
"loss": 1.458,
"max_memory_allocated (GB)": 21.25,
"memory_allocated (GB)": 2.46,
"step": 11500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 9.0,
"eval_accuracy": 0.11367330534964723,
"eval_loss": 6.290684223175049,
"eval_runtime": 293.6278,
"eval_samples_per_second": 572.0,
"eval_steps_per_second": 1.12,
"max_memory_allocated (GB)": 21.25,
"memory_allocated (GB)": 2.46,
"step": 11817,
"total_memory_available (GB)": 94.62
},
{
"epoch": 9.139375476009139,
"grad_norm": 0.8099371790885925,
"learning_rate": 0.0009086062452399086,
"loss": 1.4071,
"max_memory_allocated (GB)": 21.25,
"memory_allocated (GB)": 2.46,
"step": 12000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 9.52018278750952,
"grad_norm": 0.9186742901802063,
"learning_rate": 0.0009047981721249048,
"loss": 1.3494,
"max_memory_allocated (GB)": 21.25,
"memory_allocated (GB)": 2.46,
"step": 12500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 9.900990099009901,
"grad_norm": 0.8492868542671204,
"learning_rate": 0.000900990099009901,
"loss": 1.4131,
"max_memory_allocated (GB)": 21.25,
"memory_allocated (GB)": 2.46,
"step": 13000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 10.0,
"eval_accuracy": 0.1148819624304129,
"eval_loss": 6.425012588500977,
"eval_runtime": 294.2316,
"eval_samples_per_second": 570.826,
"eval_steps_per_second": 1.118,
"max_memory_allocated (GB)": 21.25,
"memory_allocated (GB)": 2.46,
"step": 13130,
"total_memory_available (GB)": 94.62
},
{
"epoch": 10.281797410510281,
"grad_norm": 0.8763368725776672,
"learning_rate": 0.0008971820258948972,
"loss": 1.2825,
"max_memory_allocated (GB)": 21.25,
"memory_allocated (GB)": 2.46,
"step": 13500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 10.662604722010663,
"grad_norm": 0.8775655627250671,
"learning_rate": 0.0008933739527798934,
"loss": 1.3132,
"max_memory_allocated (GB)": 21.25,
"memory_allocated (GB)": 2.46,
"step": 14000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 11.0,
"eval_accuracy": 0.11482242267273972,
"eval_loss": 6.423275470733643,
"eval_runtime": 293.1251,
"eval_samples_per_second": 572.981,
"eval_steps_per_second": 1.122,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 14443,
"total_memory_available (GB)": 94.62
},
{
"epoch": 11.043412033511043,
"grad_norm": 0.7483058571815491,
"learning_rate": 0.0008895658796648896,
"loss": 1.3292,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 14500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 11.424219345011425,
"grad_norm": 0.8545175194740295,
"learning_rate": 0.0008857578065498858,
"loss": 1.1967,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 15000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 11.805026656511805,
"grad_norm": 0.7811135649681091,
"learning_rate": 0.000881949733434882,
"loss": 1.2835,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 15500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 12.0,
"eval_accuracy": 0.11918073293441696,
"eval_loss": 6.378399848937988,
"eval_runtime": 294.7386,
"eval_samples_per_second": 569.844,
"eval_steps_per_second": 1.116,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 15756,
"total_memory_available (GB)": 94.62
},
{
"epoch": 12.185833968012187,
"grad_norm": 0.7616448402404785,
"learning_rate": 0.0008781416603198782,
"loss": 1.225,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 16000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 12.566641279512567,
"grad_norm": 0.8137641549110413,
"learning_rate": 0.0008743335872048743,
"loss": 1.1925,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 16500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 12.947448591012947,
"grad_norm": 0.8130796551704407,
"learning_rate": 0.0008705255140898705,
"loss": 1.2414,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 17000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 13.0,
"eval_accuracy": 0.11609061951117859,
"eval_loss": 6.441605091094971,
"eval_runtime": 293.5962,
"eval_samples_per_second": 572.061,
"eval_steps_per_second": 1.121,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 17069,
"total_memory_available (GB)": 94.62
},
{
"epoch": 13.328255902513328,
"grad_norm": 0.7482279539108276,
"learning_rate": 0.0008667174409748667,
"loss": 1.1175,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 17500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 13.709063214013709,
"grad_norm": 0.8002237677574158,
"learning_rate": 0.0008629093678598629,
"loss": 1.1652,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 18000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 14.0,
"eval_accuracy": 0.11941293798934238,
"eval_loss": 6.50691556930542,
"eval_runtime": 293.871,
"eval_samples_per_second": 571.526,
"eval_steps_per_second": 1.12,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 18382,
"total_memory_available (GB)": 94.62
},
{
"epoch": 14.08987052551409,
"grad_norm": 0.7629320621490479,
"learning_rate": 0.0008591012947448591,
"loss": 1.1667,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 18500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 14.47067783701447,
"grad_norm": 0.8081715106964111,
"learning_rate": 0.0008552932216298553,
"loss": 1.0783,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 19000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 14.851485148514852,
"grad_norm": 0.8121427893638611,
"learning_rate": 0.0008514851485148515,
"loss": 1.1415,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 19500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 15.0,
"eval_accuracy": 0.12653984698282278,
"eval_loss": 6.384666442871094,
"eval_runtime": 295.5264,
"eval_samples_per_second": 568.325,
"eval_steps_per_second": 1.113,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 19695,
"total_memory_available (GB)": 94.62
},
{
"epoch": 15.232292460015232,
"grad_norm": 0.7696598768234253,
"learning_rate": 0.0008476770753998477,
"loss": 1.0709,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 20000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 15.613099771515612,
"grad_norm": 0.8182108402252197,
"learning_rate": 0.0008438690022848439,
"loss": 1.0681,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 20500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 15.993907083015994,
"grad_norm": 0.829287052154541,
"learning_rate": 0.0008400609291698401,
"loss": 1.118,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 21000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 16.0,
"eval_accuracy": 0.12646244529784764,
"eval_loss": 6.311014652252197,
"eval_runtime": 293.3977,
"eval_samples_per_second": 572.448,
"eval_steps_per_second": 1.121,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 21008,
"total_memory_available (GB)": 94.62
},
{
"epoch": 16.374714394516374,
"grad_norm": 0.8062915205955505,
"learning_rate": 0.0008362528560548363,
"loss": 0.9966,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 21500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 16.755521706016754,
"grad_norm": 0.8203598260879517,
"learning_rate": 0.0008324447829398325,
"loss": 1.065,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 22000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 17.0,
"eval_accuracy": 0.128337947664553,
"eval_loss": 6.402354717254639,
"eval_runtime": 295.2238,
"eval_samples_per_second": 568.907,
"eval_steps_per_second": 1.114,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 22321,
"total_memory_available (GB)": 94.62
},
{
"epoch": 17.136329017517138,
"grad_norm": 0.732295572757721,
"learning_rate": 0.0008286367098248287,
"loss": 1.0415,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 22500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 17.517136329017518,
"grad_norm": 0.7375497221946716,
"learning_rate": 0.0008248286367098248,
"loss": 0.9912,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 23000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 17.897943640517898,
"grad_norm": 0.7598251104354858,
"learning_rate": 0.000821020563594821,
"loss": 1.0469,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 23500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 18.0,
"eval_accuracy": 0.1317555297549939,
"eval_loss": 6.188817977905273,
"eval_runtime": 294.5179,
"eval_samples_per_second": 570.271,
"eval_steps_per_second": 1.117,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 23634,
"total_memory_available (GB)": 94.62
},
{
"epoch": 18.278750952018278,
"grad_norm": 0.7098336815834045,
"learning_rate": 0.0008172124904798172,
"loss": 0.9608,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 24000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 18.659558263518658,
"grad_norm": 0.7254419922828674,
"learning_rate": 0.0008134044173648134,
"loss": 0.978,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 24500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 19.0,
"eval_accuracy": 0.13280938346580928,
"eval_loss": 6.488752841949463,
"eval_runtime": 291.3325,
"eval_samples_per_second": 576.506,
"eval_steps_per_second": 1.129,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 24947,
"total_memory_available (GB)": 94.62
},
{
"epoch": 19.04036557501904,
"grad_norm": 0.759738028049469,
"learning_rate": 0.0008095963442498096,
"loss": 1.0032,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 25000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 19.42117288651942,
"grad_norm": 0.7547454833984375,
"learning_rate": 0.0008057882711348058,
"loss": 0.9158,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 25500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 19.801980198019802,
"grad_norm": 0.7352393269538879,
"learning_rate": 0.000801980198019802,
"loss": 0.9734,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 26000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 20.0,
"eval_accuracy": 0.13280342949004198,
"eval_loss": 6.357004165649414,
"eval_runtime": 293.5583,
"eval_samples_per_second": 572.135,
"eval_steps_per_second": 1.121,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 26260,
"total_memory_available (GB)": 94.62
},
{
"epoch": 20.182787509520182,
"grad_norm": 0.7476137280464172,
"learning_rate": 0.0007981721249047982,
"loss": 0.9462,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 26500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 20.563594821020562,
"grad_norm": 0.7356305718421936,
"learning_rate": 0.0007943640517897944,
"loss": 0.9229,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 27000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 20.944402132520946,
"grad_norm": 0.6908999085426331,
"learning_rate": 0.0007905559786747906,
"loss": 0.9602,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 27500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 21.0,
"eval_accuracy": 0.13345836682444703,
"eval_loss": 6.237915515899658,
"eval_runtime": 294.0513,
"eval_samples_per_second": 571.176,
"eval_steps_per_second": 1.119,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 27573,
"total_memory_available (GB)": 94.62
},
{
"epoch": 21.325209444021326,
"grad_norm": 0.6861099600791931,
"learning_rate": 0.0007867479055597868,
"loss": 0.8804,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 28000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 21.706016755521706,
"grad_norm": 0.7063835263252258,
"learning_rate": 0.000782939832444783,
"loss": 0.9069,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 28500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 22.0,
"eval_accuracy": 0.13340478104254117,
"eval_loss": 6.306626796722412,
"eval_runtime": 293.3464,
"eval_samples_per_second": 572.548,
"eval_steps_per_second": 1.122,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 28886,
"total_memory_available (GB)": 94.62
},
{
"epoch": 22.086824067022086,
"grad_norm": 0.6700064539909363,
"learning_rate": 0.0007791317593297792,
"loss": 0.9188,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 29000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 22.46763137852247,
"grad_norm": 0.7254114151000977,
"learning_rate": 0.0007753236862147753,
"loss": 0.8521,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 29500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 22.84843869002285,
"grad_norm": 0.7956221699714661,
"learning_rate": 0.0007715156130997715,
"loss": 0.8996,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 30000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 23.0,
"eval_accuracy": 0.13276175165967075,
"eval_loss": 6.250992774963379,
"eval_runtime": 293.8104,
"eval_samples_per_second": 571.644,
"eval_steps_per_second": 1.12,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 30199,
"total_memory_available (GB)": 94.62
},
{
"epoch": 23.22924600152323,
"grad_norm": 0.7165413498878479,
"learning_rate": 0.0007677075399847677,
"loss": 0.8606,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 30500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 23.61005331302361,
"grad_norm": 0.6925890445709229,
"learning_rate": 0.0007638994668697639,
"loss": 0.8553,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 31000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 23.99086062452399,
"grad_norm": 0.7103093266487122,
"learning_rate": 0.0007600913937547601,
"loss": 0.893,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 31500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 24.0,
"eval_accuracy": 0.13703075228483821,
"eval_loss": 6.409420490264893,
"eval_runtime": 292.9538,
"eval_samples_per_second": 573.316,
"eval_steps_per_second": 1.123,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 31512,
"total_memory_available (GB)": 94.62
},
{
"epoch": 24.371667936024373,
"grad_norm": 0.6945925951004028,
"learning_rate": 0.0007562833206397562,
"loss": 0.8036,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 32000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 24.752475247524753,
"grad_norm": 0.7803900241851807,
"learning_rate": 0.0007524752475247525,
"loss": 0.8494,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 32500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 25.0,
"eval_accuracy": 0.13862046381471227,
"eval_loss": 6.323179721832275,
"eval_runtime": 295.4271,
"eval_samples_per_second": 568.516,
"eval_steps_per_second": 1.114,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 32825,
"total_memory_available (GB)": 94.62
},
{
"epoch": 25.133282559025133,
"grad_norm": 0.6690163016319275,
"learning_rate": 0.0007486671744097486,
"loss": 0.8476,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 33000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 25.514089870525513,
"grad_norm": 0.7143478989601135,
"learning_rate": 0.0007448591012947449,
"loss": 0.8046,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 33500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 25.894897182025893,
"grad_norm": 0.7087588310241699,
"learning_rate": 0.000741051028179741,
"loss": 0.8507,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 34000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 26.0,
"eval_accuracy": 0.13611979399243845,
"eval_loss": 6.426168441772461,
"eval_runtime": 293.8071,
"eval_samples_per_second": 571.651,
"eval_steps_per_second": 1.12,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 34138,
"total_memory_available (GB)": 94.62
},
{
"epoch": 26.275704493526277,
"grad_norm": 0.6879526376724243,
"learning_rate": 0.0007372429550647373,
"loss": 0.7922,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 34500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 26.656511805026657,
"grad_norm": 0.7182181477546692,
"learning_rate": 0.0007334348819497334,
"loss": 0.8065,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 35000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 27.0,
"eval_accuracy": 0.13387514512815932,
"eval_loss": 6.415595531463623,
"eval_runtime": 294.5186,
"eval_samples_per_second": 570.27,
"eval_steps_per_second": 1.117,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 35451,
"total_memory_available (GB)": 94.62
},
{
"epoch": 27.037319116527037,
"grad_norm": 0.6910179853439331,
"learning_rate": 0.0007296268088347297,
"loss": 0.8217,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 35500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 27.418126428027417,
"grad_norm": 0.6327067017555237,
"learning_rate": 0.0007258187357197257,
"loss": 0.754,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 36000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 27.798933739527797,
"grad_norm": 0.7196256518363953,
"learning_rate": 0.000722010662604722,
"loss": 0.7956,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 36500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 28.0,
"eval_accuracy": 0.13950760620404276,
"eval_loss": 6.287784099578857,
"eval_runtime": 294.6907,
"eval_samples_per_second": 569.937,
"eval_steps_per_second": 1.116,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 36764,
"total_memory_available (GB)": 94.62
},
{
"epoch": 28.17974105102818,
"grad_norm": 0.6903337836265564,
"learning_rate": 0.0007182025894897181,
"loss": 0.7804,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 37000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 28.56054836252856,
"grad_norm": 0.7300212979316711,
"learning_rate": 0.0007143945163747144,
"loss": 0.7552,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 37500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 28.94135567402894,
"grad_norm": 0.7145921587944031,
"learning_rate": 0.0007105864432597105,
"loss": 0.7889,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 38000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 29.0,
"eval_accuracy": 0.13493495281474205,
"eval_loss": 6.49939489364624,
"eval_runtime": 294.5404,
"eval_samples_per_second": 570.227,
"eval_steps_per_second": 1.117,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 38077,
"total_memory_available (GB)": 94.62
},
{
"epoch": 29.32216298552932,
"grad_norm": 0.6388425230979919,
"learning_rate": 0.0007067783701447068,
"loss": 0.7356,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 38500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 29.702970297029704,
"grad_norm": 0.7119573354721069,
"learning_rate": 0.0007029702970297029,
"loss": 0.7645,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 39000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 30.0,
"eval_accuracy": 0.14072817123634307,
"eval_loss": 6.493895530700684,
"eval_runtime": 293.9015,
"eval_samples_per_second": 571.467,
"eval_steps_per_second": 1.119,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 39390,
"total_memory_available (GB)": 94.62
},
{
"epoch": 30.083777608530085,
"grad_norm": 0.6302276253700256,
"learning_rate": 0.0006991622239146992,
"loss": 0.7568,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 39500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 30.464584920030465,
"grad_norm": 0.6814985871315002,
"learning_rate": 0.0006953541507996953,
"loss": 0.7356,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 40000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 30.845392231530845,
"grad_norm": 0.6445801258087158,
"learning_rate": 0.0006915460776846916,
"loss": 0.7548,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 40500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 31.0,
"eval_accuracy": 0.13752493227352564,
"eval_loss": 6.484902858734131,
"eval_runtime": 294.1773,
"eval_samples_per_second": 570.931,
"eval_steps_per_second": 1.118,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 40703,
"total_memory_available (GB)": 94.62
},
{
"epoch": 31.226199543031225,
"grad_norm": 0.7047973871231079,
"learning_rate": 0.0006877380045696877,
"loss": 0.7233,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 41000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 31.60700685453161,
"grad_norm": 0.7059982419013977,
"learning_rate": 0.000683929931454684,
"loss": 0.7176,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 41500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 31.98781416603199,
"grad_norm": 0.7162789106369019,
"learning_rate": 0.0006801218583396802,
"loss": 0.7494,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 42000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 32.0,
"eval_accuracy": 0.14151409603762913,
"eval_loss": 6.554172039031982,
"eval_runtime": 293.2908,
"eval_samples_per_second": 572.657,
"eval_steps_per_second": 1.122,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 42016,
"total_memory_available (GB)": 94.62
},
{
"epoch": 32.36862147753237,
"grad_norm": 0.6640636324882507,
"learning_rate": 0.0006763137852246763,
"loss": 0.6871,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 42500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 32.74942878903275,
"grad_norm": 0.646771252155304,
"learning_rate": 0.0006725057121096724,
"loss": 0.7162,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 43000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 33.0,
"eval_accuracy": 0.14183561072906434,
"eval_loss": 6.457273483276367,
"eval_runtime": 292.6918,
"eval_samples_per_second": 573.829,
"eval_steps_per_second": 1.124,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 43329,
"total_memory_available (GB)": 94.62
},
{
"epoch": 33.13023610053313,
"grad_norm": 0.6640152931213379,
"learning_rate": 0.0006686976389946687,
"loss": 0.7121,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 43500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 33.51104341203351,
"grad_norm": 0.6674479246139526,
"learning_rate": 0.0006648895658796648,
"loss": 0.6863,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 44000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 33.89185072353389,
"grad_norm": 0.6740984320640564,
"learning_rate": 0.0006610814927646611,
"loss": 0.7109,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 44500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 34.0,
"eval_accuracy": 0.14142478640111936,
"eval_loss": 6.490988731384277,
"eval_runtime": 294.702,
"eval_samples_per_second": 569.915,
"eval_steps_per_second": 1.116,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 44642,
"total_memory_available (GB)": 94.62
},
{
"epoch": 34.272658035034276,
"grad_norm": 0.6771370768547058,
"learning_rate": 0.0006572734196496572,
"loss": 0.6806,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 45000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 34.65346534653465,
"grad_norm": 0.6320595145225525,
"learning_rate": 0.0006534653465346535,
"loss": 0.683,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 45500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 35.0,
"eval_accuracy": 0.14110922568545145,
"eval_loss": 6.431344509124756,
"eval_runtime": 294.9108,
"eval_samples_per_second": 569.511,
"eval_steps_per_second": 1.116,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 45955,
"total_memory_available (GB)": 94.62
},
{
"epoch": 35.034272658035036,
"grad_norm": 0.6397078633308411,
"learning_rate": 0.0006496572734196497,
"loss": 0.7004,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 46000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 35.41507996953541,
"grad_norm": 0.5976701378822327,
"learning_rate": 0.0006458492003046459,
"loss": 0.6579,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 46500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 35.795887281035796,
"grad_norm": 0.6323311924934387,
"learning_rate": 0.000642041127189642,
"loss": 0.6828,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 47000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 36.0,
"eval_accuracy": 0.14563424726861363,
"eval_loss": 6.305937767028809,
"eval_runtime": 293.3837,
"eval_samples_per_second": 572.476,
"eval_steps_per_second": 1.121,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 47268,
"total_memory_available (GB)": 94.62
},
{
"epoch": 36.17669459253618,
"grad_norm": 0.6800869703292847,
"learning_rate": 0.0006382330540746383,
"loss": 0.6765,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 47500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 36.557501904036556,
"grad_norm": 0.6761651039123535,
"learning_rate": 0.0006344249809596345,
"loss": 0.6576,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 48000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 36.93830921553694,
"grad_norm": 0.6531367301940918,
"learning_rate": 0.0006306169078446307,
"loss": 0.6772,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 48500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 37.0,
"eval_accuracy": 0.14638444821529575,
"eval_loss": 6.376420021057129,
"eval_runtime": 293.926,
"eval_samples_per_second": 571.419,
"eval_steps_per_second": 1.119,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 48581,
"total_memory_available (GB)": 94.62
},
{
"epoch": 37.319116527037316,
"grad_norm": 0.639673113822937,
"learning_rate": 0.0006268088347296267,
"loss": 0.6436,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 49000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 37.6999238385377,
"grad_norm": 0.6823265552520752,
"learning_rate": 0.000623000761614623,
"loss": 0.652,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 49500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 38.0,
"eval_accuracy": 0.15022476258521628,
"eval_loss": 6.34368896484375,
"eval_runtime": 293.7272,
"eval_samples_per_second": 571.806,
"eval_steps_per_second": 1.12,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 49894,
"total_memory_available (GB)": 94.62
},
{
"epoch": 38.08073115003808,
"grad_norm": 0.6552994251251221,
"learning_rate": 0.0006191926884996192,
"loss": 0.6551,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 50000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 38.46153846153846,
"grad_norm": 0.6459840536117554,
"learning_rate": 0.0006153846153846154,
"loss": 0.6256,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 50500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 38.84234577303884,
"grad_norm": 0.7152717709541321,
"learning_rate": 0.0006115765422696116,
"loss": 0.6533,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 51000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 39.0,
"eval_accuracy": 0.1470393855497008,
"eval_loss": 6.349309921264648,
"eval_runtime": 293.4965,
"eval_samples_per_second": 572.256,
"eval_steps_per_second": 1.121,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 51207,
"total_memory_available (GB)": 94.62
},
{
"epoch": 39.22315308453922,
"grad_norm": 0.6504981517791748,
"learning_rate": 0.0006077684691546078,
"loss": 0.6289,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 51500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 39.603960396039604,
"grad_norm": 0.6987379789352417,
"learning_rate": 0.000603960396039604,
"loss": 0.6319,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 52000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 39.98476770753999,
"grad_norm": 0.7198599576950073,
"learning_rate": 0.0006001523229246002,
"loss": 0.6527,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 52500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 40.0,
"eval_accuracy": 0.14814087106665477,
"eval_loss": 6.3077898025512695,
"eval_runtime": 295.0602,
"eval_samples_per_second": 569.223,
"eval_steps_per_second": 1.115,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 52520,
"total_memory_available (GB)": 94.62
},
{
"epoch": 40.365575019040364,
"grad_norm": 0.6907523274421692,
"learning_rate": 0.0005963442498095964,
"loss": 0.6132,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 53000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 40.74638233054075,
"grad_norm": 0.6374237537384033,
"learning_rate": 0.0005925361766945926,
"loss": 0.633,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 53500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 41.0,
"eval_accuracy": 0.14128784495847102,
"eval_loss": 6.535091876983643,
"eval_runtime": 294.8025,
"eval_samples_per_second": 569.72,
"eval_steps_per_second": 1.116,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 53833,
"total_memory_available (GB)": 94.62
},
{
"epoch": 41.127189642041124,
"grad_norm": 0.6018815636634827,
"learning_rate": 0.0005887281035795888,
"loss": 0.6237,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 54000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 41.50799695354151,
"grad_norm": 0.5860393643379211,
"learning_rate": 0.000584920030464585,
"loss": 0.6081,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 54500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 41.88880426504189,
"grad_norm": 0.6938556432723999,
"learning_rate": 0.0005811119573495812,
"loss": 0.6219,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 55000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 42.0,
"eval_accuracy": 0.14922449465630674,
"eval_loss": 6.377195358276367,
"eval_runtime": 294.5637,
"eval_samples_per_second": 570.182,
"eval_steps_per_second": 1.117,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 55146,
"total_memory_available (GB)": 94.62
},
{
"epoch": 42.26961157654227,
"grad_norm": 0.6243091821670532,
"learning_rate": 0.0005773038842345773,
"loss": 0.5989,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 55500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 42.65041888804265,
"grad_norm": 0.5847932696342468,
"learning_rate": 0.0005734958111195735,
"loss": 0.6053,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 56000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 43.0,
"eval_accuracy": 0.1481289631151201,
"eval_loss": 6.480849266052246,
"eval_runtime": 294.6877,
"eval_samples_per_second": 569.942,
"eval_steps_per_second": 1.116,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 56459,
"total_memory_available (GB)": 94.62
},
{
"epoch": 43.031226199543035,
"grad_norm": 0.6111719012260437,
"learning_rate": 0.0005696877380045697,
"loss": 0.6123,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 56500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 43.41203351104341,
"grad_norm": 0.6217710971832275,
"learning_rate": 0.0005658796648895659,
"loss": 0.5842,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 57000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 43.792840822543795,
"grad_norm": 0.6644548177719116,
"learning_rate": 0.0005620715917745621,
"loss": 0.5996,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 57500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 44.0,
"eval_accuracy": 0.14795034384210057,
"eval_loss": 6.565069675445557,
"eval_runtime": 294.4066,
"eval_samples_per_second": 570.486,
"eval_steps_per_second": 1.118,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 57772,
"total_memory_available (GB)": 94.62
},
{
"epoch": 44.17364813404417,
"grad_norm": 0.60784512758255,
"learning_rate": 0.0005582635186595583,
"loss": 0.5859,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 58000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 44.554455445544555,
"grad_norm": 0.6305288076400757,
"learning_rate": 0.0005544554455445545,
"loss": 0.5792,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 58500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 44.93526275704494,
"grad_norm": 0.663642168045044,
"learning_rate": 0.0005506473724295507,
"loss": 0.5974,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 59000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 45.0,
"eval_accuracy": 0.14877199249799053,
"eval_loss": 6.533756256103516,
"eval_runtime": 292.6688,
"eval_samples_per_second": 573.874,
"eval_steps_per_second": 1.124,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 59085,
"total_memory_available (GB)": 94.62
},
{
"epoch": 45.316070068545315,
"grad_norm": 0.5684943199157715,
"learning_rate": 0.0005468392993145469,
"loss": 0.5616,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 59500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 45.6968773800457,
"grad_norm": 0.6369318962097168,
"learning_rate": 0.0005430312261995431,
"loss": 0.5818,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 60000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 46.0,
"eval_accuracy": 0.15239796374028758,
"eval_loss": 6.304377555847168,
"eval_runtime": 293.353,
"eval_samples_per_second": 572.536,
"eval_steps_per_second": 1.122,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 60398,
"total_memory_available (GB)": 94.62
},
{
"epoch": 46.077684691546075,
"grad_norm": 0.7030180096626282,
"learning_rate": 0.0005392231530845393,
"loss": 0.577,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 60500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 46.45849200304646,
"grad_norm": 0.6424877643585205,
"learning_rate": 0.0005354150799695355,
"loss": 0.5506,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 61000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 46.83929931454684,
"grad_norm": 0.6298852562904358,
"learning_rate": 0.0005316070068545317,
"loss": 0.5803,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 61500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 47.0,
"eval_accuracy": 0.15137387990830878,
"eval_loss": 6.536581993103027,
"eval_runtime": 295.29,
"eval_samples_per_second": 568.78,
"eval_steps_per_second": 1.114,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 61711,
"total_memory_available (GB)": 94.62
},
{
"epoch": 47.22010662604722,
"grad_norm": 0.6673698425292969,
"learning_rate": 0.0005277989337395278,
"loss": 0.5592,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 62000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 47.6009139375476,
"grad_norm": 0.6058672070503235,
"learning_rate": 0.000523990860624524,
"loss": 0.5503,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 62500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 47.98172124904798,
"grad_norm": 0.6532467603683472,
"learning_rate": 0.0005201827875095202,
"loss": 0.573,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 63000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 48.0,
"eval_accuracy": 0.15284451192283646,
"eval_loss": 6.47830057144165,
"eval_runtime": 293.0849,
"eval_samples_per_second": 573.059,
"eval_steps_per_second": 1.123,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 63024,
"total_memory_available (GB)": 94.62
},
{
"epoch": 48.36252856054836,
"grad_norm": 0.6419522762298584,
"learning_rate": 0.0005163747143945163,
"loss": 0.5408,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 63500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 48.743335872048746,
"grad_norm": 0.6108247637748718,
"learning_rate": 0.0005125666412795126,
"loss": 0.551,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 64000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 49.0,
"eval_accuracy": 0.15404721502783483,
"eval_loss": 6.494078636169434,
"eval_runtime": 293.6217,
"eval_samples_per_second": 572.012,
"eval_steps_per_second": 1.12,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 64337,
"total_memory_available (GB)": 94.62
},
{
"epoch": 49.12414318354912,
"grad_norm": 0.5675166845321655,
"learning_rate": 0.0005087585681645087,
"loss": 0.558,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 64500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 49.504950495049506,
"grad_norm": 0.5808805823326111,
"learning_rate": 0.000504950495049505,
"loss": 0.5386,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 65000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 49.88575780654988,
"grad_norm": 0.6140856146812439,
"learning_rate": 0.0005011424219345011,
"loss": 0.5447,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 65500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 50.0,
"eval_accuracy": 0.1527849721651633,
"eval_loss": 6.451413154602051,
"eval_runtime": 293.3727,
"eval_samples_per_second": 572.497,
"eval_steps_per_second": 1.121,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 65650,
"total_memory_available (GB)": 94.62
},
{
"epoch": 50.266565118050266,
"grad_norm": 0.5688324570655823,
"learning_rate": 0.0004973343488194974,
"loss": 0.5331,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 66000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 50.64737242955065,
"grad_norm": 0.6783095598220825,
"learning_rate": 0.0004935262757044935,
"loss": 0.5326,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 66500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 51.0,
"eval_accuracy": 0.15468429043493792,
"eval_loss": 6.373196125030518,
"eval_runtime": 294.0871,
"eval_samples_per_second": 571.106,
"eval_steps_per_second": 1.119,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 66963,
"total_memory_available (GB)": 94.62
},
{
"epoch": 51.02817974105103,
"grad_norm": 0.6522232294082642,
"learning_rate": 0.0004897182025894898,
"loss": 0.5431,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 67000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 51.40898705255141,
"grad_norm": 0.5886669158935547,
"learning_rate": 0.0004859101294744859,
"loss": 0.519,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 67500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 51.78979436405179,
"grad_norm": 0.5981758832931519,
"learning_rate": 0.0004821020563594821,
"loss": 0.5307,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 68000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 52.0,
"eval_accuracy": 0.15460093477419548,
"eval_loss": 6.580268383026123,
"eval_runtime": 294.46,
"eval_samples_per_second": 570.383,
"eval_steps_per_second": 1.117,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 68276,
"total_memory_available (GB)": 94.62
},
{
"epoch": 52.17060167555217,
"grad_norm": 0.5313323736190796,
"learning_rate": 0.0004782939832444783,
"loss": 0.5281,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 68500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 52.551408987052554,
"grad_norm": 0.5757377743721008,
"learning_rate": 0.0004744859101294745,
"loss": 0.5118,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 69000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 52.93221629855293,
"grad_norm": 0.5655534863471985,
"learning_rate": 0.0004706778370144707,
"loss": 0.5265,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 69500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 53.0,
"eval_accuracy": 0.1594057932184216,
"eval_loss": 6.225406169891357,
"eval_runtime": 295.5483,
"eval_samples_per_second": 568.283,
"eval_steps_per_second": 1.113,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 69589,
"total_memory_available (GB)": 94.62
},
{
"epoch": 53.313023610053314,
"grad_norm": 0.6315691471099854,
"learning_rate": 0.00046686976389946687,
"loss": 0.5063,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 70000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 53.69383092155369,
"grad_norm": 0.660926103591919,
"learning_rate": 0.00046306169078446307,
"loss": 0.5216,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 70500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 54.0,
"eval_accuracy": 0.15738739543330058,
"eval_loss": 6.288092136383057,
"eval_runtime": 292.8381,
"eval_samples_per_second": 573.542,
"eval_steps_per_second": 1.123,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 70902,
"total_memory_available (GB)": 94.62
},
{
"epoch": 54.074638233054074,
"grad_norm": 0.5640078186988831,
"learning_rate": 0.00045925361766945927,
"loss": 0.5181,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 71000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 54.45544554455446,
"grad_norm": 0.6716725826263428,
"learning_rate": 0.00045544554455445547,
"loss": 0.4994,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 71500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 54.836252856054834,
"grad_norm": 0.5691807866096497,
"learning_rate": 0.00045163747143945167,
"loss": 0.5214,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 72000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 55.0,
"eval_accuracy": 0.15635735762555447,
"eval_loss": 6.411758899688721,
"eval_runtime": 295.5856,
"eval_samples_per_second": 568.211,
"eval_steps_per_second": 1.113,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 72215,
"total_memory_available (GB)": 94.62
},
{
"epoch": 55.21706016755522,
"grad_norm": 0.6010560989379883,
"learning_rate": 0.00044782939832444787,
"loss": 0.5047,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 72500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 55.5978674790556,
"grad_norm": 0.6005520820617676,
"learning_rate": 0.0004440213252094441,
"loss": 0.4939,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 73000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 55.97867479055598,
"grad_norm": 0.6255254745483398,
"learning_rate": 0.0004402132520944402,
"loss": 0.5163,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 73500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 56.0,
"eval_accuracy": 0.1574290732636718,
"eval_loss": 6.470302104949951,
"eval_runtime": 294.9784,
"eval_samples_per_second": 569.381,
"eval_steps_per_second": 1.115,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 73528,
"total_memory_available (GB)": 94.62
},
{
"epoch": 56.35948210205636,
"grad_norm": 0.593974769115448,
"learning_rate": 0.0004364051789794364,
"loss": 0.4895,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 74000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 56.74028941355674,
"grad_norm": 0.5752760767936707,
"learning_rate": 0.0004325971058644326,
"loss": 0.4954,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 74500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 57.0,
"eval_accuracy": 0.16022744187431157,
"eval_loss": 6.3909783363342285,
"eval_runtime": 293.5227,
"eval_samples_per_second": 572.205,
"eval_steps_per_second": 1.121,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 74841,
"total_memory_available (GB)": 94.62
},
{
"epoch": 57.12109672505712,
"grad_norm": 0.5993156433105469,
"learning_rate": 0.0004287890327494288,
"loss": 0.5,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 75000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 57.501904036557505,
"grad_norm": 0.6445265412330627,
"learning_rate": 0.000424980959634425,
"loss": 0.4888,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 75500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 57.88271134805788,
"grad_norm": 0.5880737900733948,
"learning_rate": 0.0004211728865194212,
"loss": 0.4946,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 76000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 58.0,
"eval_accuracy": 0.1607156678872317,
"eval_loss": 6.456667900085449,
"eval_runtime": 294.1379,
"eval_samples_per_second": 571.008,
"eval_steps_per_second": 1.119,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 76154,
"total_memory_available (GB)": 94.62
},
{
"epoch": 58.263518659558265,
"grad_norm": 0.6263173818588257,
"learning_rate": 0.00041736481340441737,
"loss": 0.4825,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 76500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 58.64432597105864,
"grad_norm": 0.630531907081604,
"learning_rate": 0.0004135567402894136,
"loss": 0.4764,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 77000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 59.0,
"eval_accuracy": 0.15916168021196153,
"eval_loss": 6.475037097930908,
"eval_runtime": 293.105,
"eval_samples_per_second": 573.02,
"eval_steps_per_second": 1.122,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 77467,
"total_memory_available (GB)": 94.62
},
{
"epoch": 59.025133282559025,
"grad_norm": 0.5097931623458862,
"learning_rate": 0.0004097486671744098,
"loss": 0.4917,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 77500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 59.40594059405941,
"grad_norm": 0.5600863695144653,
"learning_rate": 0.000405940594059406,
"loss": 0.469,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 78000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 59.786747905559785,
"grad_norm": 0.543627917766571,
"learning_rate": 0.0004021325209444021,
"loss": 0.4797,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 78500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 60.0,
"eval_accuracy": 0.15797088505849782,
"eval_loss": 6.5070600509643555,
"eval_runtime": 293.3914,
"eval_samples_per_second": 572.461,
"eval_steps_per_second": 1.121,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 78780,
"total_memory_available (GB)": 94.62
},
{
"epoch": 60.16755521706017,
"grad_norm": 0.5502893328666687,
"learning_rate": 0.0003983244478293983,
"loss": 0.4723,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 79000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 60.548362528560546,
"grad_norm": 0.6071276068687439,
"learning_rate": 0.0003945163747143945,
"loss": 0.4628,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 79500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 60.92916984006093,
"grad_norm": 0.534694492816925,
"learning_rate": 0.00039070830159939067,
"loss": 0.4773,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 80000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 61.0,
"eval_accuracy": 0.16487749694858742,
"eval_loss": 6.299595832824707,
"eval_runtime": 292.6075,
"eval_samples_per_second": 573.994,
"eval_steps_per_second": 1.124,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 80093,
"total_memory_available (GB)": 94.62
},
{
"epoch": 61.30997715156131,
"grad_norm": 0.5583491325378418,
"learning_rate": 0.00038690022848438687,
"loss": 0.4689,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 80500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 61.69078446306169,
"grad_norm": 0.5769256353378296,
"learning_rate": 0.0003830921553693831,
"loss": 0.4638,
"max_memory_allocated (GB)": 21.28,
"memory_allocated (GB)": 2.46,
"step": 81000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 62.0,
"eval_accuracy": 0.1581852281861213,
"eval_loss": 6.37566614151001,
"eval_runtime": 295.0035,
"eval_samples_per_second": 569.332,
"eval_steps_per_second": 1.115,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 81406,
"total_memory_available (GB)": 94.62
},
{
"epoch": 62.07159177456207,
"grad_norm": 0.6076428890228271,
"learning_rate": 0.0003792840822543793,
"loss": 0.471,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 81500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 62.45239908606245,
"grad_norm": 0.5036810040473938,
"learning_rate": 0.0003754760091393755,
"loss": 0.4594,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 82000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 62.83320639756283,
"grad_norm": 0.5345487594604492,
"learning_rate": 0.0003716679360243717,
"loss": 0.4634,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 82500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 63.0,
"eval_accuracy": 0.15785180554315145,
"eval_loss": 6.494438171386719,
"eval_runtime": 293.3816,
"eval_samples_per_second": 572.48,
"eval_steps_per_second": 1.121,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 82719,
"total_memory_available (GB)": 94.62
},
{
"epoch": 63.21401370906322,
"grad_norm": 0.5531702637672424,
"learning_rate": 0.0003678598629093678,
"loss": 0.4539,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 83000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 63.59482102056359,
"grad_norm": 0.6362270712852478,
"learning_rate": 0.000364051789794364,
"loss": 0.4442,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 83500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 63.97562833206398,
"grad_norm": 0.5610324144363403,
"learning_rate": 0.0003602437166793602,
"loss": 0.4605,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 84000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 64.0,
"eval_accuracy": 0.15728617784525617,
"eval_loss": 6.63605260848999,
"eval_runtime": 294.3948,
"eval_samples_per_second": 570.509,
"eval_steps_per_second": 1.118,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 84032,
"total_memory_available (GB)": 94.62
},
{
"epoch": 64.35643564356435,
"grad_norm": 0.5020191073417664,
"learning_rate": 0.0003564356435643564,
"loss": 0.4363,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 84500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 64.73724295506474,
"grad_norm": 0.5931971073150635,
"learning_rate": 0.00035262757044935263,
"loss": 0.4541,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 85000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 65.0,
"eval_accuracy": 0.15664910243815308,
"eval_loss": 6.532097816467285,
"eval_runtime": 294.244,
"eval_samples_per_second": 570.802,
"eval_steps_per_second": 1.118,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 85345,
"total_memory_available (GB)": 94.62
},
{
"epoch": 65.11805026656512,
"grad_norm": 0.5734322667121887,
"learning_rate": 0.00034881949733434883,
"loss": 0.4573,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 85500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 65.4988575780655,
"grad_norm": 0.597134530544281,
"learning_rate": 0.00034501142421934503,
"loss": 0.4381,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 86000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 65.87966488956587,
"grad_norm": 0.6044827103614807,
"learning_rate": 0.0003412033511043412,
"loss": 0.447,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 86500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 66.0,
"eval_accuracy": 0.1647167396028698,
"eval_loss": 6.29494047164917,
"eval_runtime": 293.9365,
"eval_samples_per_second": 571.399,
"eval_steps_per_second": 1.119,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 86658,
"total_memory_available (GB)": 94.62
},
{
"epoch": 66.26047220106626,
"grad_norm": 0.5792707800865173,
"learning_rate": 0.0003373952779893374,
"loss": 0.4338,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 87000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 66.64127951256664,
"grad_norm": 0.5685553550720215,
"learning_rate": 0.0003335872048743336,
"loss": 0.4392,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 87500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 67.0,
"eval_accuracy": 0.16156113244619094,
"eval_loss": 6.429385662078857,
"eval_runtime": 293.8448,
"eval_samples_per_second": 571.577,
"eval_steps_per_second": 1.12,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 87971,
"total_memory_available (GB)": 94.62
},
{
"epoch": 67.02208682406702,
"grad_norm": 0.5447320342063904,
"learning_rate": 0.0003297791317593298,
"loss": 0.4436,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 88000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 67.40289413556741,
"grad_norm": 0.572354257106781,
"learning_rate": 0.000325971058644326,
"loss": 0.4261,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 88500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 67.78370144706778,
"grad_norm": 0.6266507506370544,
"learning_rate": 0.0003221629855293222,
"loss": 0.4319,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 89000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 68.0,
"eval_accuracy": 0.16571105355601204,
"eval_loss": 6.468620300292969,
"eval_runtime": 294.3847,
"eval_samples_per_second": 570.529,
"eval_steps_per_second": 1.118,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 89284,
"total_memory_available (GB)": 94.62
},
{
"epoch": 68.16450875856816,
"grad_norm": 0.6514284014701843,
"learning_rate": 0.00031835491241431833,
"loss": 0.4371,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 89500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 68.54531607006855,
"grad_norm": 0.6122202277183533,
"learning_rate": 0.00031454683929931453,
"loss": 0.4232,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 90000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 68.92612338156893,
"grad_norm": 0.617365300655365,
"learning_rate": 0.00031073876618431073,
"loss": 0.4321,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 90500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 69.0,
"eval_accuracy": 0.16538953886457683,
"eval_loss": 6.5044403076171875,
"eval_runtime": 292.9492,
"eval_samples_per_second": 573.325,
"eval_steps_per_second": 1.123,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 90597,
"total_memory_available (GB)": 94.62
},
{
"epoch": 69.3069306930693,
"grad_norm": 0.5891785025596619,
"learning_rate": 0.00030693069306930693,
"loss": 0.4156,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 91000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 69.68773800456968,
"grad_norm": 0.5906224846839905,
"learning_rate": 0.00030312261995430313,
"loss": 0.4239,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 91500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 70.0,
"eval_accuracy": 0.1670447441278914,
"eval_loss": 6.288415908813477,
"eval_runtime": 294.6055,
"eval_samples_per_second": 570.101,
"eval_steps_per_second": 1.117,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 91910,
"total_memory_available (GB)": 94.62
},
{
"epoch": 70.06854531607007,
"grad_norm": 0.4991946518421173,
"learning_rate": 0.00029931454683929933,
"loss": 0.4317,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 92000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 70.44935262757045,
"grad_norm": 0.5375520586967468,
"learning_rate": 0.00029550647372429554,
"loss": 0.4111,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 92500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 70.83015993907082,
"grad_norm": 0.5114530324935913,
"learning_rate": 0.0002916984006092917,
"loss": 0.424,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 93000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 71.0,
"eval_accuracy": 0.16497276056086452,
"eval_loss": 6.455724716186523,
"eval_runtime": 293.9553,
"eval_samples_per_second": 571.362,
"eval_steps_per_second": 1.119,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 93223,
"total_memory_available (GB)": 94.62
},
{
"epoch": 71.21096725057122,
"grad_norm": 0.5219191312789917,
"learning_rate": 0.0002878903274942879,
"loss": 0.4153,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 93500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 71.59177456207159,
"grad_norm": 0.5508619546890259,
"learning_rate": 0.0002840822543792841,
"loss": 0.4138,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 94000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 71.97258187357197,
"grad_norm": 0.5840057134628296,
"learning_rate": 0.0002802741812642803,
"loss": 0.4189,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 94500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 72.0,
"eval_accuracy": 0.16434759310529606,
"eval_loss": 6.51508903503418,
"eval_runtime": 294.2848,
"eval_samples_per_second": 570.723,
"eval_steps_per_second": 1.118,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 94536,
"total_memory_available (GB)": 94.62
},
{
"epoch": 72.35338918507236,
"grad_norm": 0.545428991317749,
"learning_rate": 0.0002764661081492765,
"loss": 0.4125,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 95000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 72.73419649657274,
"grad_norm": 0.5353052616119385,
"learning_rate": 0.0002726580350342727,
"loss": 0.4056,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 95500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 73.0,
"eval_accuracy": 0.16853919204548837,
"eval_loss": 6.449789524078369,
"eval_runtime": 294.3037,
"eval_samples_per_second": 570.686,
"eval_steps_per_second": 1.118,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 95849,
"total_memory_available (GB)": 94.62
},
{
"epoch": 73.11500380807311,
"grad_norm": 0.5559654831886292,
"learning_rate": 0.00026884996191926883,
"loss": 0.4057,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 96000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 73.4958111195735,
"grad_norm": 0.562247097492218,
"learning_rate": 0.00026504188880426504,
"loss": 0.4081,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 96500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 73.87661843107388,
"grad_norm": 0.4949222207069397,
"learning_rate": 0.00026123381568926124,
"loss": 0.4113,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 97000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 74.0,
"eval_accuracy": 0.16715786966747045,
"eval_loss": 6.463613986968994,
"eval_runtime": 294.07,
"eval_samples_per_second": 571.14,
"eval_steps_per_second": 1.119,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 97162,
"total_memory_available (GB)": 94.62
},
{
"epoch": 74.25742574257426,
"grad_norm": 0.516631007194519,
"learning_rate": 0.00025742574257425744,
"loss": 0.4041,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 97500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 74.63823305407463,
"grad_norm": 0.5838146209716797,
"learning_rate": 0.00025361766945925364,
"loss": 0.4031,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 98000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 75.0,
"eval_accuracy": 0.16266261796314488,
"eval_loss": 6.646434307098389,
"eval_runtime": 293.4935,
"eval_samples_per_second": 572.261,
"eval_steps_per_second": 1.121,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 98475,
"total_memory_available (GB)": 94.62
},
{
"epoch": 75.01904036557502,
"grad_norm": 0.49288421869277954,
"learning_rate": 0.00024980959634424984,
"loss": 0.4003,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 98500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 75.3998476770754,
"grad_norm": 0.5597474575042725,
"learning_rate": 0.00024600152322924604,
"loss": 0.393,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 99000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 75.78065498857578,
"grad_norm": 0.605529248714447,
"learning_rate": 0.00024219345011424221,
"loss": 0.3965,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 99500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 76.0,
"eval_accuracy": 0.16858682385162693,
"eval_loss": 6.5632758140563965,
"eval_runtime": 293.7256,
"eval_samples_per_second": 571.809,
"eval_steps_per_second": 1.12,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 99788,
"total_memory_available (GB)": 94.62
},
{
"epoch": 76.16146230007617,
"grad_norm": 0.4536028802394867,
"learning_rate": 0.0002383853769992384,
"loss": 0.3944,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 100000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 76.54226961157654,
"grad_norm": 0.480114221572876,
"learning_rate": 0.0002345773038842346,
"loss": 0.3848,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 100500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 76.92307692307692,
"grad_norm": 0.5449275970458984,
"learning_rate": 0.0002307692307692308,
"loss": 0.393,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 101000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 77.0,
"eval_accuracy": 0.1632758774671787,
"eval_loss": 6.687775135040283,
"eval_runtime": 295.5696,
"eval_samples_per_second": 568.242,
"eval_steps_per_second": 1.113,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 101101,
"total_memory_available (GB)": 94.62
},
{
"epoch": 77.30388423457731,
"grad_norm": 0.5685235857963562,
"learning_rate": 0.000226961157654227,
"loss": 0.3919,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 101500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 77.68469154607769,
"grad_norm": 0.4742094874382019,
"learning_rate": 0.00022315308453922314,
"loss": 0.3958,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 102000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 78.0,
"eval_accuracy": 0.17422523890327765,
"eval_loss": 6.410009384155273,
"eval_runtime": 294.3708,
"eval_samples_per_second": 570.556,
"eval_steps_per_second": 1.118,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 102414,
"total_memory_available (GB)": 94.62
},
{
"epoch": 78.06549885757806,
"grad_norm": 0.5653303861618042,
"learning_rate": 0.00021934501142421934,
"loss": 0.3924,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 102500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 78.44630616907844,
"grad_norm": 0.5051060914993286,
"learning_rate": 0.00021553693830921554,
"loss": 0.3866,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 103000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 78.82711348057883,
"grad_norm": 0.5776985287666321,
"learning_rate": 0.00021172886519421171,
"loss": 0.3848,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 103500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 79.0,
"eval_accuracy": 0.17083147271590604,
"eval_loss": 6.537196159362793,
"eval_runtime": 293.4855,
"eval_samples_per_second": 572.277,
"eval_steps_per_second": 1.121,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 103727,
"total_memory_available (GB)": 94.62
},
{
"epoch": 79.20792079207921,
"grad_norm": 0.48929280042648315,
"learning_rate": 0.00020792079207920792,
"loss": 0.3856,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 104000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 79.58872810357958,
"grad_norm": 0.5744351744651794,
"learning_rate": 0.00020411271896420412,
"loss": 0.3838,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 104500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 79.96953541507997,
"grad_norm": 0.5490047335624695,
"learning_rate": 0.0002003046458492003,
"loss": 0.3785,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 105000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 80.0,
"eval_accuracy": 0.17015867345419905,
"eval_loss": 6.4460296630859375,
"eval_runtime": 295.5798,
"eval_samples_per_second": 568.222,
"eval_steps_per_second": 1.113,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 105040,
"total_memory_available (GB)": 94.62
},
{
"epoch": 80.35034272658035,
"grad_norm": 0.5375010967254639,
"learning_rate": 0.0001964965727341965,
"loss": 0.3743,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 105500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 80.73115003808073,
"grad_norm": 0.48377513885498047,
"learning_rate": 0.0001926884996191927,
"loss": 0.3709,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 106000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 81.0,
"eval_accuracy": 0.17629126849453722,
"eval_loss": 6.449659824371338,
"eval_runtime": 295.4724,
"eval_samples_per_second": 568.429,
"eval_steps_per_second": 1.113,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 106353,
"total_memory_available (GB)": 94.62
},
{
"epoch": 81.11195734958112,
"grad_norm": 0.5163039565086365,
"learning_rate": 0.00018888042650418887,
"loss": 0.3748,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 106500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 81.4927646610815,
"grad_norm": 0.44855397939682007,
"learning_rate": 0.00018507235338918507,
"loss": 0.3688,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 107000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 81.87357197258187,
"grad_norm": 0.4645147919654846,
"learning_rate": 0.00018126428027418127,
"loss": 0.3692,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 107500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 82.0,
"eval_accuracy": 0.17463606323122266,
"eval_loss": 6.449412822723389,
"eval_runtime": 293.3089,
"eval_samples_per_second": 572.621,
"eval_steps_per_second": 1.122,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 107666,
"total_memory_available (GB)": 94.62
},
{
"epoch": 82.25437928408225,
"grad_norm": 0.5181131362915039,
"learning_rate": 0.00017745620715917747,
"loss": 0.3689,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 108000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 82.63518659558264,
"grad_norm": 0.48766759037971497,
"learning_rate": 0.00017364813404417364,
"loss": 0.3667,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 108500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 83.0,
"eval_accuracy": 0.17326069482897205,
"eval_loss": 6.478708744049072,
"eval_runtime": 294.5422,
"eval_samples_per_second": 570.224,
"eval_steps_per_second": 1.117,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 108979,
"total_memory_available (GB)": 94.62
},
{
"epoch": 83.01599390708301,
"grad_norm": 0.5013307929039001,
"learning_rate": 0.00016984006092916984,
"loss": 0.3755,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 109000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 83.39680121858339,
"grad_norm": 0.4935319125652313,
"learning_rate": 0.00016603198781416605,
"loss": 0.3573,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 109500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 83.77760853008378,
"grad_norm": 0.47369396686553955,
"learning_rate": 0.00016222391469916222,
"loss": 0.3642,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 110000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 84.0,
"eval_accuracy": 0.1762198207853294,
"eval_loss": 6.379220008850098,
"eval_runtime": 295.6777,
"eval_samples_per_second": 568.034,
"eval_steps_per_second": 1.113,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 110292,
"total_memory_available (GB)": 94.62
},
{
"epoch": 84.15841584158416,
"grad_norm": 0.4918299615383148,
"learning_rate": 0.00015841584158415842,
"loss": 0.36,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 110500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 84.53922315308454,
"grad_norm": 0.4875505268573761,
"learning_rate": 0.00015460776846915462,
"loss": 0.365,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 111000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 84.92003046458493,
"grad_norm": 0.4853888750076294,
"learning_rate": 0.0001507996953541508,
"loss": 0.3648,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 111500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 85.0,
"eval_accuracy": 0.17838111398886605,
"eval_loss": 6.410529613494873,
"eval_runtime": 294.3712,
"eval_samples_per_second": 570.555,
"eval_steps_per_second": 1.118,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 111605,
"total_memory_available (GB)": 94.62
},
{
"epoch": 85.3008377760853,
"grad_norm": 0.4833332598209381,
"learning_rate": 0.000146991622239147,
"loss": 0.3548,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 112000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 85.68164508758568,
"grad_norm": 0.5711667537689209,
"learning_rate": 0.0001431835491241432,
"loss": 0.3595,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 112500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 86.0,
"eval_accuracy": 0.17176029293560774,
"eval_loss": 6.682053089141846,
"eval_runtime": 294.7944,
"eval_samples_per_second": 569.736,
"eval_steps_per_second": 1.116,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 112918,
"total_memory_available (GB)": 94.62
},
{
"epoch": 86.06245239908607,
"grad_norm": 0.503462553024292,
"learning_rate": 0.00013937547600913937,
"loss": 0.3593,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 113000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 86.44325971058645,
"grad_norm": 0.5184527635574341,
"learning_rate": 0.00013556740289413557,
"loss": 0.3451,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 113500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 86.82406702208682,
"grad_norm": 0.5371147394180298,
"learning_rate": 0.00013175932977913177,
"loss": 0.3575,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 114000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 87.0,
"eval_accuracy": 0.17632103837337382,
"eval_loss": 6.518656253814697,
"eval_runtime": 293.8373,
"eval_samples_per_second": 571.592,
"eval_steps_per_second": 1.12,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 114231,
"total_memory_available (GB)": 94.62
},
{
"epoch": 87.2048743335872,
"grad_norm": 0.5780415534973145,
"learning_rate": 0.00012795125666412798,
"loss": 0.3571,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 114500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 87.58568164508759,
"grad_norm": 0.47814422845840454,
"learning_rate": 0.00012414318354912415,
"loss": 0.3524,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 115000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 87.96648895658797,
"grad_norm": 0.529201865196228,
"learning_rate": 0.00012033511043412034,
"loss": 0.3512,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 115500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 88.0,
"eval_accuracy": 0.17518382900181595,
"eval_loss": 6.586051940917969,
"eval_runtime": 293.3082,
"eval_samples_per_second": 572.623,
"eval_steps_per_second": 1.122,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 115544,
"total_memory_available (GB)": 94.62
},
{
"epoch": 88.34729626808834,
"grad_norm": 0.5704371929168701,
"learning_rate": 0.00011652703731911652,
"loss": 0.3506,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 116000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 88.72810357958873,
"grad_norm": 0.4795701503753662,
"learning_rate": 0.00011271896420411273,
"loss": 0.3416,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 116500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 89.0,
"eval_accuracy": 0.17716054895656574,
"eval_loss": 6.533735275268555,
"eval_runtime": 295.8371,
"eval_samples_per_second": 567.728,
"eval_steps_per_second": 1.112,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 116857,
"total_memory_available (GB)": 94.62
},
{
"epoch": 89.10891089108911,
"grad_norm": 0.5546539425849915,
"learning_rate": 0.00010891089108910891,
"loss": 0.3448,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 117000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 89.48971820258949,
"grad_norm": 0.5290449857711792,
"learning_rate": 0.0001051028179741051,
"loss": 0.3446,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 117500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 89.87052551408988,
"grad_norm": 0.5648689866065979,
"learning_rate": 0.0001012947448591013,
"loss": 0.3454,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 118000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 90.0,
"eval_accuracy": 0.17577922657854783,
"eval_loss": 6.607510089874268,
"eval_runtime": 292.9613,
"eval_samples_per_second": 573.301,
"eval_steps_per_second": 1.123,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 118170,
"total_memory_available (GB)": 94.62
},
{
"epoch": 90.25133282559025,
"grad_norm": 0.5280727744102478,
"learning_rate": 9.748667174409749e-05,
"loss": 0.3411,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 118500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 90.63214013709063,
"grad_norm": 0.5048713088035583,
"learning_rate": 9.367859862909369e-05,
"loss": 0.3401,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 119000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 91.0,
"eval_accuracy": 0.1758983060938942,
"eval_loss": 6.536908149719238,
"eval_runtime": 293.2277,
"eval_samples_per_second": 572.78,
"eval_steps_per_second": 1.122,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 119483,
"total_memory_available (GB)": 94.62
},
{
"epoch": 91.012947448591,
"grad_norm": 0.4610355496406555,
"learning_rate": 8.987052551408988e-05,
"loss": 0.3417,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 119500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 91.3937547600914,
"grad_norm": 0.5393335819244385,
"learning_rate": 8.606245239908606e-05,
"loss": 0.3334,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 120000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 91.77456207159177,
"grad_norm": 0.5774939060211182,
"learning_rate": 8.225437928408227e-05,
"loss": 0.3361,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 120500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 92.0,
"eval_accuracy": 0.17773808460599566,
"eval_loss": 6.614775657653809,
"eval_runtime": 295.4182,
"eval_samples_per_second": 568.533,
"eval_steps_per_second": 1.114,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 120796,
"total_memory_available (GB)": 94.62
},
{
"epoch": 92.15536938309215,
"grad_norm": 0.5457442402839661,
"learning_rate": 7.844630616907845e-05,
"loss": 0.3276,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 121000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 92.53617669459254,
"grad_norm": 0.5042428970336914,
"learning_rate": 7.463823305407463e-05,
"loss": 0.3348,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 121500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 92.91698400609292,
"grad_norm": 0.6076750159263611,
"learning_rate": 7.083015993907083e-05,
"loss": 0.3377,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 122000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 93.0,
"eval_accuracy": 0.17985174600339376,
"eval_loss": 6.484330654144287,
"eval_runtime": 294.3563,
"eval_samples_per_second": 570.584,
"eval_steps_per_second": 1.118,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 122109,
"total_memory_available (GB)": 94.62
},
{
"epoch": 93.2977913175933,
"grad_norm": 0.5274074077606201,
"learning_rate": 6.702208682406702e-05,
"loss": 0.3318,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 122500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 93.67859862909368,
"grad_norm": 0.4592651426792145,
"learning_rate": 6.321401370906322e-05,
"loss": 0.3344,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 123000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 94.0,
"eval_accuracy": 0.1790300973475038,
"eval_loss": 6.447075843811035,
"eval_runtime": 294.1853,
"eval_samples_per_second": 570.916,
"eval_steps_per_second": 1.118,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 123422,
"total_memory_available (GB)": 94.62
},
{
"epoch": 94.05940594059406,
"grad_norm": 0.4637293815612793,
"learning_rate": 5.9405940594059404e-05,
"loss": 0.33,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 123500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 94.44021325209444,
"grad_norm": 0.4784545600414276,
"learning_rate": 5.55978674790556e-05,
"loss": 0.3299,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 124000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 94.82102056359481,
"grad_norm": 0.5497499108314514,
"learning_rate": 5.178979436405179e-05,
"loss": 0.3262,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 124500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 95.0,
"eval_accuracy": 0.1810127712780209,
"eval_loss": 6.450591087341309,
"eval_runtime": 294.1798,
"eval_samples_per_second": 570.926,
"eval_steps_per_second": 1.118,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 124735,
"total_memory_available (GB)": 94.62
},
{
"epoch": 95.2018278750952,
"grad_norm": 0.5419259071350098,
"learning_rate": 4.798172124904798e-05,
"loss": 0.3309,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 125000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 95.58263518659558,
"grad_norm": 0.49941861629486084,
"learning_rate": 4.4173648134044175e-05,
"loss": 0.3301,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 125500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 95.96344249809596,
"grad_norm": 0.4585070312023163,
"learning_rate": 4.036557501904037e-05,
"loss": 0.3228,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 126000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 96.0,
"eval_accuracy": 0.17943496769968145,
"eval_loss": 6.566529750823975,
"eval_runtime": 291.9607,
"eval_samples_per_second": 575.266,
"eval_steps_per_second": 1.127,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 126048,
"total_memory_available (GB)": 94.62
},
{
"epoch": 96.34424980959635,
"grad_norm": 0.507581353187561,
"learning_rate": 3.655750190403656e-05,
"loss": 0.3236,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 126500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 96.72505712109673,
"grad_norm": 0.5264877080917358,
"learning_rate": 3.274942878903275e-05,
"loss": 0.327,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 127000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 97.0,
"eval_accuracy": 0.17927421035396385,
"eval_loss": 6.534857749938965,
"eval_runtime": 293.1746,
"eval_samples_per_second": 572.884,
"eval_steps_per_second": 1.122,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 127361,
"total_memory_available (GB)": 94.62
},
{
"epoch": 97.1058644325971,
"grad_norm": 0.4856893718242645,
"learning_rate": 2.8941355674028942e-05,
"loss": 0.3286,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 127500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 97.48667174409749,
"grad_norm": 0.4634458124637604,
"learning_rate": 2.5133282559025133e-05,
"loss": 0.3249,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 128000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 97.86747905559787,
"grad_norm": 0.5153778791427612,
"learning_rate": 2.1325209444021324e-05,
"loss": 0.3275,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 128500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 98.0,
"eval_accuracy": 0.17994105563990354,
"eval_loss": 6.51284646987915,
"eval_runtime": 296.4245,
"eval_samples_per_second": 566.603,
"eval_steps_per_second": 1.11,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 128674,
"total_memory_available (GB)": 94.62
},
{
"epoch": 98.24828636709825,
"grad_norm": 0.5075029730796814,
"learning_rate": 1.7517136329017518e-05,
"loss": 0.3245,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 129000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 98.62909367859864,
"grad_norm": 0.462927907705307,
"learning_rate": 1.3709063214013709e-05,
"loss": 0.321,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 129500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 99.0,
"eval_accuracy": 0.1801494447917597,
"eval_loss": 6.557428359985352,
"eval_runtime": 296.1671,
"eval_samples_per_second": 567.095,
"eval_steps_per_second": 1.111,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 129987,
"total_memory_available (GB)": 94.62
},
{
"epoch": 99.00990099009901,
"grad_norm": 0.48631536960601807,
"learning_rate": 9.900990099009901e-06,
"loss": 0.3226,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 130000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 99.39070830159939,
"grad_norm": 0.4906682074069977,
"learning_rate": 6.092916984006093e-06,
"loss": 0.3186,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 130500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 99.77151561309977,
"grad_norm": 0.4988425672054291,
"learning_rate": 2.284843869002285e-06,
"loss": 0.3217,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 131000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 100.0,
"eval_accuracy": 0.17984579202762646,
"eval_loss": 6.551010608673096,
"eval_runtime": 294.715,
"eval_samples_per_second": 569.89,
"eval_steps_per_second": 1.116,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 131300,
"total_memory_available (GB)": 94.62
},
{
"epoch": 100.0,
"max_memory_allocated (GB)": 21.35,
"memory_allocated (GB)": 2.46,
"step": 131300,
"total_flos": 9.770330117395578e+21,
"total_memory_available (GB)": 94.62,
"train_loss": 0.7415650523898262,
"train_runtime": 105713.6949,
"train_samples_per_second": 635.506,
"train_steps_per_second": 1.242
}
],
"logging_steps": 500,
"max_steps": 131300,
"num_input_tokens_seen": 0,
"num_train_epochs": 100,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 9.770330117395578e+21,
"train_batch_size": 64,
"trial_name": null,
"trial_params": null
}