Code_Translation / trainer_state.json
wang-king's picture
Upload folder using huggingface_hub
29fc3e1 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.0,
"eval_steps": 500,
"global_step": 974,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.02053915275994865,
"grad_norm": 0.6241949796676636,
"learning_rate": 9.99789342773948e-06,
"loss": 0.1517,
"step": 10
},
{
"epoch": 0.0410783055198973,
"grad_norm": 0.3109052777290344,
"learning_rate": 9.99061372843304e-06,
"loss": 0.1347,
"step": 20
},
{
"epoch": 0.06161745827984596,
"grad_norm": 0.28893721103668213,
"learning_rate": 9.978142466031648e-06,
"loss": 0.12,
"step": 30
},
{
"epoch": 0.0821566110397946,
"grad_norm": 0.2942233979701996,
"learning_rate": 9.960492613958845e-06,
"loss": 0.1187,
"step": 40
},
{
"epoch": 0.10269576379974327,
"grad_norm": 0.26697084307670593,
"learning_rate": 9.937682532746152e-06,
"loss": 0.1146,
"step": 50
},
{
"epoch": 0.12323491655969192,
"grad_norm": 0.26033908128738403,
"learning_rate": 9.90973595093323e-06,
"loss": 0.1111,
"step": 60
},
{
"epoch": 0.14377406931964057,
"grad_norm": 0.24977026879787445,
"learning_rate": 9.87668194038392e-06,
"loss": 0.1073,
"step": 70
},
{
"epoch": 0.1643132220795892,
"grad_norm": 0.2998938262462616,
"learning_rate": 9.838554886043768e-06,
"loss": 0.1077,
"step": 80
},
{
"epoch": 0.18485237483953787,
"grad_norm": 0.30642247200012207,
"learning_rate": 9.795394450170566e-06,
"loss": 0.1087,
"step": 90
},
{
"epoch": 0.20539152759948653,
"grad_norm": 0.2964343726634979,
"learning_rate": 9.747245531075069e-06,
"loss": 0.1052,
"step": 100
},
{
"epoch": 0.22593068035943517,
"grad_norm": 0.2638753056526184,
"learning_rate": 9.694158216414836e-06,
"loss": 0.1061,
"step": 110
},
{
"epoch": 0.24646983311938384,
"grad_norm": 0.2852879464626312,
"learning_rate": 9.63618773108977e-06,
"loss": 0.1024,
"step": 120
},
{
"epoch": 0.26700898587933247,
"grad_norm": 0.2863173484802246,
"learning_rate": 9.573394379793557e-06,
"loss": 0.1032,
"step": 130
},
{
"epoch": 0.28754813863928114,
"grad_norm": 0.2960529625415802,
"learning_rate": 9.505843484280783e-06,
"loss": 0.1063,
"step": 140
},
{
"epoch": 0.3080872913992298,
"grad_norm": 0.28948211669921875,
"learning_rate": 9.433605315414967e-06,
"loss": 0.1027,
"step": 150
},
{
"epoch": 0.3286264441591784,
"grad_norm": 0.2555496096611023,
"learning_rate": 9.356755020068201e-06,
"loss": 0.0995,
"step": 160
},
{
"epoch": 0.3491655969191271,
"grad_norm": 0.2884163558483124,
"learning_rate": 9.275372542948466e-06,
"loss": 0.1017,
"step": 170
},
{
"epoch": 0.36970474967907574,
"grad_norm": 0.2388547658920288,
"learning_rate": 9.189542543435909e-06,
"loss": 0.1015,
"step": 180
},
{
"epoch": 0.3902439024390244,
"grad_norm": 0.2760099768638611,
"learning_rate": 9.099354307514607e-06,
"loss": 0.099,
"step": 190
},
{
"epoch": 0.41078305519897307,
"grad_norm": 0.29273074865341187,
"learning_rate": 9.004901654891453e-06,
"loss": 0.0989,
"step": 200
},
{
"epoch": 0.4313222079589217,
"grad_norm": 0.31362560391426086,
"learning_rate": 8.906282841398747e-06,
"loss": 0.096,
"step": 210
},
{
"epoch": 0.45186136071887034,
"grad_norm": 0.27469882369041443,
"learning_rate": 8.803600456782053e-06,
"loss": 0.0957,
"step": 220
},
{
"epoch": 0.472400513478819,
"grad_norm": 0.2949489653110504,
"learning_rate": 8.696961317979622e-06,
"loss": 0.0959,
"step": 230
},
{
"epoch": 0.49293966623876767,
"grad_norm": 0.2868664264678955,
"learning_rate": 8.586476358004427e-06,
"loss": 0.0954,
"step": 240
},
{
"epoch": 0.5134788189987163,
"grad_norm": 0.28068357706069946,
"learning_rate": 8.472260510544383e-06,
"loss": 0.0958,
"step": 250
},
{
"epoch": 0.5340179717586649,
"grad_norm": 0.2683391273021698,
"learning_rate": 8.354432590400792e-06,
"loss": 0.0934,
"step": 260
},
{
"epoch": 0.5545571245186136,
"grad_norm": 0.26564258337020874,
"learning_rate": 8.23311516988942e-06,
"loss": 0.0964,
"step": 270
},
{
"epoch": 0.5750962772785623,
"grad_norm": 0.3143099844455719,
"learning_rate": 8.10843445133274e-06,
"loss": 0.094,
"step": 280
},
{
"epoch": 0.5956354300385109,
"grad_norm": 0.25333961844444275,
"learning_rate": 7.980520135776023e-06,
"loss": 0.0908,
"step": 290
},
{
"epoch": 0.6161745827984596,
"grad_norm": 0.2577582001686096,
"learning_rate": 7.849505288063808e-06,
"loss": 0.092,
"step": 300
},
{
"epoch": 0.6367137355584083,
"grad_norm": 0.2629285752773285,
"learning_rate": 7.71552619841716e-06,
"loss": 0.0889,
"step": 310
},
{
"epoch": 0.6572528883183568,
"grad_norm": 0.24951164424419403,
"learning_rate": 7.578722240655645e-06,
"loss": 0.0914,
"step": 320
},
{
"epoch": 0.6777920410783055,
"grad_norm": 0.264588862657547,
"learning_rate": 7.43923572721156e-06,
"loss": 0.0885,
"step": 330
},
{
"epoch": 0.6983311938382541,
"grad_norm": 0.2667712867259979,
"learning_rate": 7.297211761087229e-06,
"loss": 0.0918,
"step": 340
},
{
"epoch": 0.7188703465982028,
"grad_norm": 0.2499169409275055,
"learning_rate": 7.152798084909365e-06,
"loss": 0.0865,
"step": 350
},
{
"epoch": 0.7394094993581515,
"grad_norm": 0.27517884969711304,
"learning_rate": 7.006144927237505e-06,
"loss": 0.0866,
"step": 360
},
{
"epoch": 0.7599486521181001,
"grad_norm": 0.2483636885881424,
"learning_rate": 6.857404846286444e-06,
"loss": 0.0871,
"step": 370
},
{
"epoch": 0.7804878048780488,
"grad_norm": 0.30694103240966797,
"learning_rate": 6.706732571225183e-06,
"loss": 0.0884,
"step": 380
},
{
"epoch": 0.8010269576379975,
"grad_norm": 0.23266981542110443,
"learning_rate": 6.554284841217519e-06,
"loss": 0.085,
"step": 390
},
{
"epoch": 0.8215661103979461,
"grad_norm": 0.27542293071746826,
"learning_rate": 6.400220242371714e-06,
"loss": 0.0861,
"step": 400
},
{
"epoch": 0.8421052631578947,
"grad_norm": 0.2664916515350342,
"learning_rate": 6.244699042768835e-06,
"loss": 0.0852,
"step": 410
},
{
"epoch": 0.8626444159178434,
"grad_norm": 0.24229811131954193,
"learning_rate": 6.087883025741408e-06,
"loss": 0.0838,
"step": 420
},
{
"epoch": 0.883183568677792,
"grad_norm": 0.23243330419063568,
"learning_rate": 5.929935321575806e-06,
"loss": 0.0865,
"step": 430
},
{
"epoch": 0.9037227214377407,
"grad_norm": 0.2365313470363617,
"learning_rate": 5.771020237813448e-06,
"loss": 0.0837,
"step": 440
},
{
"epoch": 0.9242618741976893,
"grad_norm": 0.24980725347995758,
"learning_rate": 5.611303088327331e-06,
"loss": 0.0879,
"step": 450
},
{
"epoch": 0.944801026957638,
"grad_norm": 0.2599492371082306,
"learning_rate": 5.450950021351725e-06,
"loss": 0.0876,
"step": 460
},
{
"epoch": 0.9653401797175867,
"grad_norm": 0.27633577585220337,
"learning_rate": 5.290127846643903e-06,
"loss": 0.0851,
"step": 470
},
{
"epoch": 0.9858793324775353,
"grad_norm": 0.25800392031669617,
"learning_rate": 5.129003861957706e-06,
"loss": 0.0827,
"step": 480
},
{
"epoch": 1.0061617458279846,
"grad_norm": 0.22503100335597992,
"learning_rate": 4.967745679009474e-06,
"loss": 0.0749,
"step": 490
},
{
"epoch": 1.0267008985879333,
"grad_norm": 0.23040170967578888,
"learning_rate": 4.806521049117363e-06,
"loss": 0.0626,
"step": 500
},
{
"epoch": 1.047240051347882,
"grad_norm": 0.23996306955814362,
"learning_rate": 4.645497688695444e-06,
"loss": 0.0621,
"step": 510
},
{
"epoch": 1.0677792041078304,
"grad_norm": 0.2363160401582718,
"learning_rate": 4.484843104784106e-06,
"loss": 0.0626,
"step": 520
},
{
"epoch": 1.0883183568677792,
"grad_norm": 0.24278973042964935,
"learning_rate": 4.32472442079828e-06,
"loss": 0.0608,
"step": 530
},
{
"epoch": 1.1088575096277278,
"grad_norm": 0.2526680827140808,
"learning_rate": 4.165308202674704e-06,
"loss": 0.0626,
"step": 540
},
{
"epoch": 1.1293966623876766,
"grad_norm": 0.260307639837265,
"learning_rate": 4.006760285599146e-06,
"loss": 0.0594,
"step": 550
},
{
"epoch": 1.149935815147625,
"grad_norm": 0.2719564139842987,
"learning_rate": 3.849245601493784e-06,
"loss": 0.0563,
"step": 560
},
{
"epoch": 1.1704749679075739,
"grad_norm": 0.23317693173885345,
"learning_rate": 3.69292800744422e-06,
"loss": 0.0589,
"step": 570
},
{
"epoch": 1.1910141206675224,
"grad_norm": 0.23016999661922455,
"learning_rate": 3.5379701152446184e-06,
"loss": 0.0602,
"step": 580
},
{
"epoch": 1.2115532734274712,
"grad_norm": 0.21775227785110474,
"learning_rate": 3.384533122238267e-06,
"loss": 0.0572,
"step": 590
},
{
"epoch": 1.2320924261874198,
"grad_norm": 0.247865229845047,
"learning_rate": 3.2327766436295618e-06,
"loss": 0.0601,
"step": 600
},
{
"epoch": 1.2526315789473683,
"grad_norm": 0.23535579442977905,
"learning_rate": 3.0828585464418013e-06,
"loss": 0.0605,
"step": 610
},
{
"epoch": 1.273170731707317,
"grad_norm": 0.2479717880487442,
"learning_rate": 2.934934785293593e-06,
"loss": 0.0597,
"step": 620
},
{
"epoch": 1.2937098844672656,
"grad_norm": 0.24478904902935028,
"learning_rate": 2.7891592401646406e-06,
"loss": 0.058,
"step": 630
},
{
"epoch": 1.3142490372272144,
"grad_norm": 0.24038828909397125,
"learning_rate": 2.6456835563197194e-06,
"loss": 0.0574,
"step": 640
},
{
"epoch": 1.334788189987163,
"grad_norm": 0.24659165740013123,
"learning_rate": 2.504656986557342e-06,
"loss": 0.0591,
"step": 650
},
{
"epoch": 1.3553273427471118,
"grad_norm": 0.23365946114063263,
"learning_rate": 2.3662262359472326e-06,
"loss": 0.0587,
"step": 660
},
{
"epoch": 1.3758664955070603,
"grad_norm": 0.2665676772594452,
"learning_rate": 2.2305353092181014e-06,
"loss": 0.0592,
"step": 670
},
{
"epoch": 1.396405648267009,
"grad_norm": 0.24800005555152893,
"learning_rate": 2.09772536095451e-06,
"loss": 0.0588,
"step": 680
},
{
"epoch": 1.4169448010269576,
"grad_norm": 0.2330596148967743,
"learning_rate": 1.967934548758621e-06,
"loss": 0.0588,
"step": 690
},
{
"epoch": 1.4374839537869062,
"grad_norm": 0.2489139586687088,
"learning_rate": 1.8412978895296223e-06,
"loss": 0.0555,
"step": 700
},
{
"epoch": 1.458023106546855,
"grad_norm": 0.2403998076915741,
"learning_rate": 1.7179471190103103e-06,
"loss": 0.0568,
"step": 710
},
{
"epoch": 1.4785622593068035,
"grad_norm": 0.23150111734867096,
"learning_rate": 1.5980105547469443e-06,
"loss": 0.0584,
"step": 720
},
{
"epoch": 1.4991014120667523,
"grad_norm": 0.2429497092962265,
"learning_rate": 1.4816129626049365e-06,
"loss": 0.0578,
"step": 730
},
{
"epoch": 1.5196405648267008,
"grad_norm": 0.23500065505504608,
"learning_rate": 1.3688754269792297e-06,
"loss": 0.0579,
"step": 740
},
{
"epoch": 1.5401797175866494,
"grad_norm": 0.24009016156196594,
"learning_rate": 1.2599152248343848e-06,
"loss": 0.0567,
"step": 750
},
{
"epoch": 1.5607188703465982,
"grad_norm": 0.2493337094783783,
"learning_rate": 1.1548457037054089e-06,
"loss": 0.0581,
"step": 760
},
{
"epoch": 1.581258023106547,
"grad_norm": 0.25276979804039,
"learning_rate": 1.0537761637862198e-06,
"loss": 0.0563,
"step": 770
},
{
"epoch": 1.6017971758664955,
"grad_norm": 0.23559069633483887,
"learning_rate": 9.568117442284375e-07,
"loss": 0.0556,
"step": 780
},
{
"epoch": 1.622336328626444,
"grad_norm": 0.22181905806064606,
"learning_rate": 8.640533137687523e-07,
"loss": 0.0556,
"step": 790
},
{
"epoch": 1.6428754813863928,
"grad_norm": 0.25861266255378723,
"learning_rate": 7.755973657986648e-07,
"loss": 0.0571,
"step": 800
},
{
"epoch": 1.6634146341463416,
"grad_norm": 0.24212630093097687,
"learning_rate": 6.91535917985739e-07,
"loss": 0.0555,
"step": 810
},
{
"epoch": 1.6839537869062902,
"grad_norm": 0.23966702818870544,
"learning_rate": 6.119564165508057e-07,
"loss": 0.0553,
"step": 820
},
{
"epoch": 1.7044929396662387,
"grad_norm": 0.2524842619895935,
"learning_rate": 5.369416453006764e-07,
"loss": 0.0575,
"step": 830
},
{
"epoch": 1.7250320924261873,
"grad_norm": 0.2377074807882309,
"learning_rate": 4.665696395110081e-07,
"loss": 0.054,
"step": 840
},
{
"epoch": 1.745571245186136,
"grad_norm": 0.23431187868118286,
"learning_rate": 4.009136047489026e-07,
"loss": 0.0563,
"step": 850
},
{
"epoch": 1.7661103979460848,
"grad_norm": 0.2580386698246002,
"learning_rate": 3.400418407196826e-07,
"loss": 0.054,
"step": 860
},
{
"epoch": 1.7866495507060334,
"grad_norm": 0.23353375494480133,
"learning_rate": 2.840176702170683e-07,
"loss": 0.054,
"step": 870
},
{
"epoch": 1.807188703465982,
"grad_norm": 0.24874167144298553,
"learning_rate": 2.3289937325065902e-07,
"loss": 0.0566,
"step": 880
},
{
"epoch": 1.8277278562259307,
"grad_norm": 0.2422235757112503,
"learning_rate": 1.8674012641925642e-07,
"loss": 0.0543,
"step": 890
},
{
"epoch": 1.8482670089858795,
"grad_norm": 0.24417363107204437,
"learning_rate": 1.455879475930827e-07,
"loss": 0.057,
"step": 900
},
{
"epoch": 1.868806161745828,
"grad_norm": 0.23857787251472473,
"learning_rate": 1.0948564596244937e-07,
"loss": 0.0572,
"step": 910
},
{
"epoch": 1.8893453145057766,
"grad_norm": 0.25707000494003296,
"learning_rate": 7.847077750483489e-08,
"loss": 0.0553,
"step": 920
},
{
"epoch": 1.9098844672657251,
"grad_norm": 0.2240484207868576,
"learning_rate": 5.257560591669764e-08,
"loss": 0.0546,
"step": 930
},
{
"epoch": 1.930423620025674,
"grad_norm": 0.22742733359336853,
"learning_rate": 3.182706905067001e-08,
"loss": 0.0571,
"step": 940
},
{
"epoch": 1.9509627727856227,
"grad_norm": 0.24693480134010315,
"learning_rate": 1.624675089303529e-08,
"loss": 0.0559,
"step": 950
},
{
"epoch": 1.9715019255455712,
"grad_norm": 0.22910530865192413,
"learning_rate": 5.850859110658191e-09,
"loss": 0.0572,
"step": 960
},
{
"epoch": 1.9920410783055198,
"grad_norm": 0.24262285232543945,
"learning_rate": 6.502081907039381e-10,
"loss": 0.0544,
"step": 970
},
{
"epoch": 2.0,
"step": 974,
"total_flos": 1.635906903930726e+19,
"train_loss": 0.07772063925954106,
"train_runtime": 22087.4933,
"train_samples_per_second": 11.286,
"train_steps_per_second": 0.044
}
],
"logging_steps": 10,
"max_steps": 974,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 10000000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.635906903930726e+19,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}