oh_v1.3_camel_chemistry_x8 / trainer_state.json
sedrickkeh's picture
End of training
b309fab verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.9988536492166604,
"eval_steps": 500,
"global_step": 981,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.030569354222392053,
"grad_norm": 2.5210809779069177,
"learning_rate": 5e-06,
"loss": 1.0377,
"step": 10
},
{
"epoch": 0.061138708444784105,
"grad_norm": 8.843402948999215,
"learning_rate": 5e-06,
"loss": 0.9357,
"step": 20
},
{
"epoch": 0.09170806266717615,
"grad_norm": 5.905493309213603,
"learning_rate": 5e-06,
"loss": 0.8957,
"step": 30
},
{
"epoch": 0.12227741688956821,
"grad_norm": 1.8539641142842107,
"learning_rate": 5e-06,
"loss": 0.8741,
"step": 40
},
{
"epoch": 0.15284677111196027,
"grad_norm": 2.070310187410454,
"learning_rate": 5e-06,
"loss": 0.8547,
"step": 50
},
{
"epoch": 0.1834161253343523,
"grad_norm": 1.6459728758706702,
"learning_rate": 5e-06,
"loss": 0.8415,
"step": 60
},
{
"epoch": 0.21398547955674435,
"grad_norm": 1.0031157681260168,
"learning_rate": 5e-06,
"loss": 0.8256,
"step": 70
},
{
"epoch": 0.24455483377913642,
"grad_norm": 1.1608879789418929,
"learning_rate": 5e-06,
"loss": 0.8178,
"step": 80
},
{
"epoch": 0.2751241880015285,
"grad_norm": 0.8809818540535138,
"learning_rate": 5e-06,
"loss": 0.8075,
"step": 90
},
{
"epoch": 0.30569354222392053,
"grad_norm": 0.6485177640239032,
"learning_rate": 5e-06,
"loss": 0.7984,
"step": 100
},
{
"epoch": 0.3362628964463126,
"grad_norm": 0.6091087407506572,
"learning_rate": 5e-06,
"loss": 0.7954,
"step": 110
},
{
"epoch": 0.3668322506687046,
"grad_norm": 0.6370713623745252,
"learning_rate": 5e-06,
"loss": 0.7896,
"step": 120
},
{
"epoch": 0.39740160489109666,
"grad_norm": 0.9042512916702462,
"learning_rate": 5e-06,
"loss": 0.7887,
"step": 130
},
{
"epoch": 0.4279709591134887,
"grad_norm": 0.6657970582129965,
"learning_rate": 5e-06,
"loss": 0.7875,
"step": 140
},
{
"epoch": 0.4585403133358808,
"grad_norm": 0.6623117082135378,
"learning_rate": 5e-06,
"loss": 0.7829,
"step": 150
},
{
"epoch": 0.48910966755827284,
"grad_norm": 0.8758486633363365,
"learning_rate": 5e-06,
"loss": 0.7835,
"step": 160
},
{
"epoch": 0.5196790217806648,
"grad_norm": 0.687987022965735,
"learning_rate": 5e-06,
"loss": 0.7774,
"step": 170
},
{
"epoch": 0.550248376003057,
"grad_norm": 0.5715159943996615,
"learning_rate": 5e-06,
"loss": 0.7757,
"step": 180
},
{
"epoch": 0.580817730225449,
"grad_norm": 0.6235956778260581,
"learning_rate": 5e-06,
"loss": 0.7738,
"step": 190
},
{
"epoch": 0.6113870844478411,
"grad_norm": 0.625289072408262,
"learning_rate": 5e-06,
"loss": 0.7785,
"step": 200
},
{
"epoch": 0.6419564386702331,
"grad_norm": 0.8374831890605251,
"learning_rate": 5e-06,
"loss": 0.773,
"step": 210
},
{
"epoch": 0.6725257928926252,
"grad_norm": 0.7439050014272415,
"learning_rate": 5e-06,
"loss": 0.7733,
"step": 220
},
{
"epoch": 0.7030951471150172,
"grad_norm": 0.8205921908676899,
"learning_rate": 5e-06,
"loss": 0.7696,
"step": 230
},
{
"epoch": 0.7336645013374092,
"grad_norm": 0.5336955969561407,
"learning_rate": 5e-06,
"loss": 0.7705,
"step": 240
},
{
"epoch": 0.7642338555598013,
"grad_norm": 0.8042702021993413,
"learning_rate": 5e-06,
"loss": 0.7633,
"step": 250
},
{
"epoch": 0.7948032097821933,
"grad_norm": 0.7443163831237201,
"learning_rate": 5e-06,
"loss": 0.7642,
"step": 260
},
{
"epoch": 0.8253725640045854,
"grad_norm": 0.7100151672475864,
"learning_rate": 5e-06,
"loss": 0.7672,
"step": 270
},
{
"epoch": 0.8559419182269774,
"grad_norm": 0.8122666871045187,
"learning_rate": 5e-06,
"loss": 0.7658,
"step": 280
},
{
"epoch": 0.8865112724493696,
"grad_norm": 0.9650852170253817,
"learning_rate": 5e-06,
"loss": 0.761,
"step": 290
},
{
"epoch": 0.9170806266717616,
"grad_norm": 0.9066342061905646,
"learning_rate": 5e-06,
"loss": 0.7623,
"step": 300
},
{
"epoch": 0.9476499808941536,
"grad_norm": 0.8736567540109669,
"learning_rate": 5e-06,
"loss": 0.7605,
"step": 310
},
{
"epoch": 0.9782193351165457,
"grad_norm": 0.581848021872034,
"learning_rate": 5e-06,
"loss": 0.759,
"step": 320
},
{
"epoch": 0.9996178830722201,
"eval_loss": 0.7565266489982605,
"eval_runtime": 348.0672,
"eval_samples_per_second": 25.323,
"eval_steps_per_second": 0.396,
"step": 327
},
{
"epoch": 1.0087886893389377,
"grad_norm": 1.2716473505505381,
"learning_rate": 5e-06,
"loss": 0.8102,
"step": 330
},
{
"epoch": 1.0393580435613297,
"grad_norm": 0.6878381986732218,
"learning_rate": 5e-06,
"loss": 0.7133,
"step": 340
},
{
"epoch": 1.0699273977837218,
"grad_norm": 0.6062487967329377,
"learning_rate": 5e-06,
"loss": 0.7114,
"step": 350
},
{
"epoch": 1.100496752006114,
"grad_norm": 0.6358424479273052,
"learning_rate": 5e-06,
"loss": 0.7172,
"step": 360
},
{
"epoch": 1.131066106228506,
"grad_norm": 0.6758954139263628,
"learning_rate": 5e-06,
"loss": 0.7155,
"step": 370
},
{
"epoch": 1.161635460450898,
"grad_norm": 0.6698110186698876,
"learning_rate": 5e-06,
"loss": 0.7112,
"step": 380
},
{
"epoch": 1.19220481467329,
"grad_norm": 0.7109888242262707,
"learning_rate": 5e-06,
"loss": 0.717,
"step": 390
},
{
"epoch": 1.2227741688956821,
"grad_norm": 0.8395391271484969,
"learning_rate": 5e-06,
"loss": 0.7124,
"step": 400
},
{
"epoch": 1.253343523118074,
"grad_norm": 0.7312759566081071,
"learning_rate": 5e-06,
"loss": 0.7118,
"step": 410
},
{
"epoch": 1.2839128773404662,
"grad_norm": 0.7632165886503746,
"learning_rate": 5e-06,
"loss": 0.7158,
"step": 420
},
{
"epoch": 1.3144822315628581,
"grad_norm": 0.6457766924924516,
"learning_rate": 5e-06,
"loss": 0.7125,
"step": 430
},
{
"epoch": 1.3450515857852503,
"grad_norm": 0.6072207383876659,
"learning_rate": 5e-06,
"loss": 0.7073,
"step": 440
},
{
"epoch": 1.3756209400076425,
"grad_norm": 0.7504686700687067,
"learning_rate": 5e-06,
"loss": 0.7126,
"step": 450
},
{
"epoch": 1.4061902942300344,
"grad_norm": 0.5951557615102034,
"learning_rate": 5e-06,
"loss": 0.7153,
"step": 460
},
{
"epoch": 1.4367596484524263,
"grad_norm": 0.7769347892834471,
"learning_rate": 5e-06,
"loss": 0.7121,
"step": 470
},
{
"epoch": 1.4673290026748185,
"grad_norm": 0.6602990935189074,
"learning_rate": 5e-06,
"loss": 0.7109,
"step": 480
},
{
"epoch": 1.4978983568972106,
"grad_norm": 0.6128725764352912,
"learning_rate": 5e-06,
"loss": 0.7107,
"step": 490
},
{
"epoch": 1.5284677111196026,
"grad_norm": 0.6273289786548926,
"learning_rate": 5e-06,
"loss": 0.7163,
"step": 500
},
{
"epoch": 1.5590370653419945,
"grad_norm": 0.7489331538468206,
"learning_rate": 5e-06,
"loss": 0.7089,
"step": 510
},
{
"epoch": 1.5896064195643866,
"grad_norm": 0.5681605011723666,
"learning_rate": 5e-06,
"loss": 0.7095,
"step": 520
},
{
"epoch": 1.6201757737867788,
"grad_norm": 0.5985303729183026,
"learning_rate": 5e-06,
"loss": 0.7121,
"step": 530
},
{
"epoch": 1.650745128009171,
"grad_norm": 0.797108834271602,
"learning_rate": 5e-06,
"loss": 0.7076,
"step": 540
},
{
"epoch": 1.6813144822315629,
"grad_norm": 0.7153053885200517,
"learning_rate": 5e-06,
"loss": 0.7122,
"step": 550
},
{
"epoch": 1.7118838364539548,
"grad_norm": 0.6550919899983794,
"learning_rate": 5e-06,
"loss": 0.7081,
"step": 560
},
{
"epoch": 1.742453190676347,
"grad_norm": 0.518606008941313,
"learning_rate": 5e-06,
"loss": 0.7079,
"step": 570
},
{
"epoch": 1.7730225448987391,
"grad_norm": 0.5838363697325892,
"learning_rate": 5e-06,
"loss": 0.7099,
"step": 580
},
{
"epoch": 1.803591899121131,
"grad_norm": 0.7286231789681924,
"learning_rate": 5e-06,
"loss": 0.7114,
"step": 590
},
{
"epoch": 1.834161253343523,
"grad_norm": 0.5756454212301187,
"learning_rate": 5e-06,
"loss": 0.7097,
"step": 600
},
{
"epoch": 1.8647306075659151,
"grad_norm": 0.6043070572041148,
"learning_rate": 5e-06,
"loss": 0.7088,
"step": 610
},
{
"epoch": 1.8952999617883073,
"grad_norm": 0.6701129239334284,
"learning_rate": 5e-06,
"loss": 0.7102,
"step": 620
},
{
"epoch": 1.9258693160106994,
"grad_norm": 0.682232504464511,
"learning_rate": 5e-06,
"loss": 0.7065,
"step": 630
},
{
"epoch": 1.9564386702330914,
"grad_norm": 0.7372520465078513,
"learning_rate": 5e-06,
"loss": 0.7075,
"step": 640
},
{
"epoch": 1.9870080244554833,
"grad_norm": 0.643870845568192,
"learning_rate": 5e-06,
"loss": 0.7082,
"step": 650
},
{
"epoch": 1.9992357661444402,
"eval_loss": 0.7433667182922363,
"eval_runtime": 347.9317,
"eval_samples_per_second": 25.333,
"eval_steps_per_second": 0.397,
"step": 654
},
{
"epoch": 2.0175773786778755,
"grad_norm": 0.8491803882899337,
"learning_rate": 5e-06,
"loss": 0.7384,
"step": 660
},
{
"epoch": 2.0481467329002676,
"grad_norm": 0.6533686347265598,
"learning_rate": 5e-06,
"loss": 0.6584,
"step": 670
},
{
"epoch": 2.0787160871226593,
"grad_norm": 0.7193242535677702,
"learning_rate": 5e-06,
"loss": 0.6626,
"step": 680
},
{
"epoch": 2.1092854413450515,
"grad_norm": 0.6207686304604076,
"learning_rate": 5e-06,
"loss": 0.6584,
"step": 690
},
{
"epoch": 2.1398547955674436,
"grad_norm": 0.8022673797865767,
"learning_rate": 5e-06,
"loss": 0.6637,
"step": 700
},
{
"epoch": 2.1704241497898358,
"grad_norm": 0.7500169227153624,
"learning_rate": 5e-06,
"loss": 0.664,
"step": 710
},
{
"epoch": 2.200993504012228,
"grad_norm": 0.8712969762773869,
"learning_rate": 5e-06,
"loss": 0.6639,
"step": 720
},
{
"epoch": 2.2315628582346196,
"grad_norm": 0.8613714358485228,
"learning_rate": 5e-06,
"loss": 0.6644,
"step": 730
},
{
"epoch": 2.262132212457012,
"grad_norm": 0.947467641650837,
"learning_rate": 5e-06,
"loss": 0.6597,
"step": 740
},
{
"epoch": 2.292701566679404,
"grad_norm": 0.6868607018777386,
"learning_rate": 5e-06,
"loss": 0.6649,
"step": 750
},
{
"epoch": 2.323270920901796,
"grad_norm": 0.6251862332352117,
"learning_rate": 5e-06,
"loss": 0.6659,
"step": 760
},
{
"epoch": 2.353840275124188,
"grad_norm": 0.5757269555166172,
"learning_rate": 5e-06,
"loss": 0.663,
"step": 770
},
{
"epoch": 2.38440962934658,
"grad_norm": 0.584698830183716,
"learning_rate": 5e-06,
"loss": 0.6615,
"step": 780
},
{
"epoch": 2.414978983568972,
"grad_norm": 0.6077791824133885,
"learning_rate": 5e-06,
"loss": 0.6668,
"step": 790
},
{
"epoch": 2.4455483377913643,
"grad_norm": 0.760997414953586,
"learning_rate": 5e-06,
"loss": 0.6652,
"step": 800
},
{
"epoch": 2.476117692013756,
"grad_norm": 0.8057818067666922,
"learning_rate": 5e-06,
"loss": 0.6658,
"step": 810
},
{
"epoch": 2.506687046236148,
"grad_norm": 0.7538900658925544,
"learning_rate": 5e-06,
"loss": 0.6652,
"step": 820
},
{
"epoch": 2.5372564004585403,
"grad_norm": 0.5345492924904168,
"learning_rate": 5e-06,
"loss": 0.6667,
"step": 830
},
{
"epoch": 2.5678257546809324,
"grad_norm": 0.631444604730353,
"learning_rate": 5e-06,
"loss": 0.6675,
"step": 840
},
{
"epoch": 2.5983951089033246,
"grad_norm": 0.5462915586952234,
"learning_rate": 5e-06,
"loss": 0.6645,
"step": 850
},
{
"epoch": 2.6289644631257163,
"grad_norm": 0.54738098576822,
"learning_rate": 5e-06,
"loss": 0.6632,
"step": 860
},
{
"epoch": 2.6595338173481085,
"grad_norm": 0.5479245307940317,
"learning_rate": 5e-06,
"loss": 0.668,
"step": 870
},
{
"epoch": 2.6901031715705006,
"grad_norm": 0.573639898499825,
"learning_rate": 5e-06,
"loss": 0.668,
"step": 880
},
{
"epoch": 2.7206725257928928,
"grad_norm": 0.7814076044018249,
"learning_rate": 5e-06,
"loss": 0.6673,
"step": 890
},
{
"epoch": 2.751241880015285,
"grad_norm": 0.7321434596516128,
"learning_rate": 5e-06,
"loss": 0.6682,
"step": 900
},
{
"epoch": 2.7818112342376766,
"grad_norm": 0.6195747454218001,
"learning_rate": 5e-06,
"loss": 0.6654,
"step": 910
},
{
"epoch": 2.8123805884600688,
"grad_norm": 0.6928375671510486,
"learning_rate": 5e-06,
"loss": 0.6644,
"step": 920
},
{
"epoch": 2.842949942682461,
"grad_norm": 0.6500798961347346,
"learning_rate": 5e-06,
"loss": 0.6662,
"step": 930
},
{
"epoch": 2.8735192969048526,
"grad_norm": 0.6034455141876413,
"learning_rate": 5e-06,
"loss": 0.6663,
"step": 940
},
{
"epoch": 2.904088651127245,
"grad_norm": 0.6232438975926966,
"learning_rate": 5e-06,
"loss": 0.6658,
"step": 950
},
{
"epoch": 2.934658005349637,
"grad_norm": 0.6210983063190099,
"learning_rate": 5e-06,
"loss": 0.667,
"step": 960
},
{
"epoch": 2.965227359572029,
"grad_norm": 0.5251770576443071,
"learning_rate": 5e-06,
"loss": 0.6669,
"step": 970
},
{
"epoch": 2.9957967137944213,
"grad_norm": 0.7539461515741752,
"learning_rate": 5e-06,
"loss": 0.6679,
"step": 980
},
{
"epoch": 2.9988536492166604,
"eval_loss": 0.7435035109519958,
"eval_runtime": 349.5805,
"eval_samples_per_second": 25.213,
"eval_steps_per_second": 0.395,
"step": 981
},
{
"epoch": 2.9988536492166604,
"step": 981,
"total_flos": 1643002158120960.0,
"train_loss": 0.7275239749907475,
"train_runtime": 58192.229,
"train_samples_per_second": 8.633,
"train_steps_per_second": 0.017
}
],
"logging_steps": 10,
"max_steps": 981,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1643002158120960.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}