OpenElla3-Llama3.2B-V2 / checkpoint-186 /trainer_state.json
ItsMeDevRoland's picture
Upload folder using huggingface_hub
132841a verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.96,
"eval_steps": 50,
"global_step": 186,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.08,
"grad_norm": 3.3149960041046143,
"learning_rate": 5e-06,
"logits/chosen": -0.24067819118499756,
"logits/rejected": -0.4968351423740387,
"logps/chosen": -433.0858459472656,
"logps/rejected": -68.33470153808594,
"loss": 0.1722,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.1653084754943848,
"rewards/margins": 1.9695370197296143,
"rewards/rejected": 0.19577142596244812,
"step": 5
},
{
"epoch": 0.16,
"grad_norm": 1.4687750339508057,
"learning_rate": 1e-05,
"logits/chosen": -0.23465164005756378,
"logits/rejected": -0.5149508118629456,
"logps/chosen": -433.3921813964844,
"logps/rejected": -65.57392883300781,
"loss": 0.1348,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": 2.6356241703033447,
"rewards/margins": 2.478445529937744,
"rewards/rejected": 0.15717869997024536,
"step": 10
},
{
"epoch": 0.24,
"grad_norm": 0.4518139958381653,
"learning_rate": 1.5e-05,
"logits/chosen": -0.22109150886535645,
"logits/rejected": -0.5422734022140503,
"logps/chosen": -421.8102111816406,
"logps/rejected": -68.03514099121094,
"loss": 0.0432,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.584801197052002,
"rewards/margins": 3.5185985565185547,
"rewards/rejected": 0.06620248407125473,
"step": 15
},
{
"epoch": 0.32,
"grad_norm": 0.12663038074970245,
"learning_rate": 2e-05,
"logits/chosen": -0.21289744973182678,
"logits/rejected": -0.5218192338943481,
"logps/chosen": -414.16058349609375,
"logps/rejected": -72.08072662353516,
"loss": 0.0138,
"rewards/accuracies": 1.0,
"rewards/chosen": 4.662674903869629,
"rewards/margins": 4.634940147399902,
"rewards/rejected": 0.02773415483534336,
"step": 20
},
{
"epoch": 0.4,
"grad_norm": 0.043434809893369675,
"learning_rate": 2.5e-05,
"logits/chosen": -0.1724880337715149,
"logits/rejected": -0.632266640663147,
"logps/chosen": -391.53204345703125,
"logps/rejected": -71.50444030761719,
"loss": 0.0039,
"rewards/accuracies": 1.0,
"rewards/chosen": 5.770969390869141,
"rewards/margins": 5.899745464324951,
"rewards/rejected": -0.12877611815929413,
"step": 25
},
{
"epoch": 0.48,
"grad_norm": 0.012639075517654419,
"learning_rate": 3e-05,
"logits/chosen": -0.11160198599100113,
"logits/rejected": -0.46575218439102173,
"logps/chosen": -387.0035095214844,
"logps/rejected": -68.87992095947266,
"loss": 0.0012,
"rewards/accuracies": 1.0,
"rewards/chosen": 7.1022233963012695,
"rewards/margins": 7.20810604095459,
"rewards/rejected": -0.10588403791189194,
"step": 30
},
{
"epoch": 0.56,
"grad_norm": 0.008335842750966549,
"learning_rate": 3.5e-05,
"logits/chosen": -0.16194215416908264,
"logits/rejected": -0.5091412663459778,
"logps/chosen": -380.4056701660156,
"logps/rejected": -70.05772399902344,
"loss": 0.0007,
"rewards/accuracies": 1.0,
"rewards/chosen": 7.549788475036621,
"rewards/margins": 7.717337608337402,
"rewards/rejected": -0.16754867136478424,
"step": 35
},
{
"epoch": 0.64,
"grad_norm": 0.0034919867757707834,
"learning_rate": 4e-05,
"logits/chosen": -0.12451864778995514,
"logits/rejected": -0.4964370131492615,
"logps/chosen": -379.21673583984375,
"logps/rejected": -69.48336029052734,
"loss": 0.0003,
"rewards/accuracies": 1.0,
"rewards/chosen": 8.368996620178223,
"rewards/margins": 8.601344108581543,
"rewards/rejected": -0.23234805464744568,
"step": 40
},
{
"epoch": 0.72,
"grad_norm": 0.0023931912146508694,
"learning_rate": 4.5e-05,
"logits/chosen": -0.11491024494171143,
"logits/rejected": -0.5707582831382751,
"logps/chosen": -367.6617431640625,
"logps/rejected": -71.28264617919922,
"loss": 0.0002,
"rewards/accuracies": 1.0,
"rewards/chosen": 8.458444595336914,
"rewards/margins": 8.877108573913574,
"rewards/rejected": -0.41866397857666016,
"step": 45
},
{
"epoch": 0.8,
"grad_norm": 0.002425891114398837,
"learning_rate": 5e-05,
"logits/chosen": -0.1450928896665573,
"logits/rejected": -0.6191288232803345,
"logps/chosen": -371.6398620605469,
"logps/rejected": -75.66804504394531,
"loss": 0.0001,
"rewards/accuracies": 1.0,
"rewards/chosen": 8.693218231201172,
"rewards/margins": 9.168657302856445,
"rewards/rejected": -0.4754392206668854,
"step": 50
},
{
"epoch": 0.8,
"eval_logits/chosen": -0.09945501387119293,
"eval_logits/rejected": -0.5488065481185913,
"eval_logps/chosen": -366.370849609375,
"eval_logps/rejected": -72.13478088378906,
"eval_loss": 0.0001304554898524657,
"eval_rewards/accuracies": 1.0,
"eval_rewards/chosen": 8.9785737991333,
"eval_rewards/margins": 9.362648963928223,
"eval_rewards/rejected": -0.3840752840042114,
"eval_runtime": 31.4719,
"eval_samples_per_second": 3.177,
"eval_steps_per_second": 0.794,
"step": 50
},
{
"epoch": 0.88,
"grad_norm": 0.0031152062583714724,
"learning_rate": 5.500000000000001e-05,
"logits/chosen": -0.10387110710144043,
"logits/rejected": -0.5721521377563477,
"logps/chosen": -367.0964660644531,
"logps/rejected": -73.10216522216797,
"loss": 0.0001,
"rewards/accuracies": 1.0,
"rewards/chosen": 8.92541217803955,
"rewards/margins": 9.32882308959961,
"rewards/rejected": -0.40341073274612427,
"step": 55
},
{
"epoch": 0.96,
"grad_norm": 0.0020932538900524378,
"learning_rate": 6e-05,
"logits/chosen": -0.09782540053129196,
"logits/rejected": -0.634772002696991,
"logps/chosen": -365.34393310546875,
"logps/rejected": -74.85542297363281,
"loss": 0.0001,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.201251983642578,
"rewards/margins": 9.6889009475708,
"rewards/rejected": -0.48764729499816895,
"step": 60
},
{
"epoch": 1.032,
"grad_norm": 0.0024136879947036505,
"learning_rate": 6.500000000000001e-05,
"logits/chosen": -0.06437685340642929,
"logits/rejected": -0.5220383405685425,
"logps/chosen": -371.1302490234375,
"logps/rejected": -70.0087890625,
"loss": 0.0001,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.469672203063965,
"rewards/margins": 9.84286880493164,
"rewards/rejected": -0.37319669127464294,
"step": 65
},
{
"epoch": 1.112,
"grad_norm": 0.0008256471483036876,
"learning_rate": 7e-05,
"logits/chosen": -0.10976336151361465,
"logits/rejected": -0.47059911489486694,
"logps/chosen": -359.65667724609375,
"logps/rejected": -67.18486785888672,
"loss": 0.0001,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.607645988464355,
"rewards/margins": 10.04837703704834,
"rewards/rejected": -0.4407329559326172,
"step": 70
},
{
"epoch": 1.192,
"grad_norm": 0.003003115998581052,
"learning_rate": 7.500000000000001e-05,
"logits/chosen": -0.1105552464723587,
"logits/rejected": -0.6176896095275879,
"logps/chosen": -365.54412841796875,
"logps/rejected": -81.55340576171875,
"loss": 0.0001,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.260071754455566,
"rewards/margins": 9.807526588439941,
"rewards/rejected": -0.547455906867981,
"step": 75
},
{
"epoch": 1.272,
"grad_norm": 0.0006993189454078674,
"learning_rate": 8e-05,
"logits/chosen": -0.10653767734766006,
"logits/rejected": -0.5204580426216125,
"logps/chosen": -364.65301513671875,
"logps/rejected": -69.9474105834961,
"loss": 0.0001,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.50252628326416,
"rewards/margins": 9.944330215454102,
"rewards/rejected": -0.441803514957428,
"step": 80
},
{
"epoch": 1.3519999999999999,
"grad_norm": 0.0006063419277779758,
"learning_rate": 8.5e-05,
"logits/chosen": -0.09719086438417435,
"logits/rejected": -0.5659133791923523,
"logps/chosen": -360.6310119628906,
"logps/rejected": -71.894775390625,
"loss": 0.0001,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.610506057739258,
"rewards/margins": 10.054449081420898,
"rewards/rejected": -0.4439435601234436,
"step": 85
},
{
"epoch": 1.432,
"grad_norm": 0.001517809578217566,
"learning_rate": 9e-05,
"logits/chosen": -0.06048806384205818,
"logits/rejected": -0.6155588626861572,
"logps/chosen": -361.54010009765625,
"logps/rejected": -69.9623031616211,
"loss": 0.0001,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.503868103027344,
"rewards/margins": 9.950047492980957,
"rewards/rejected": -0.4461793303489685,
"step": 90
},
{
"epoch": 1.512,
"grad_norm": 0.0008690144750289619,
"learning_rate": 9.5e-05,
"logits/chosen": -0.09672050923109055,
"logits/rejected": -0.6058477759361267,
"logps/chosen": -362.9765625,
"logps/rejected": -75.24437713623047,
"loss": 0.0001,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.525094032287598,
"rewards/margins": 9.997543334960938,
"rewards/rejected": -0.47244787216186523,
"step": 95
},
{
"epoch": 1.592,
"grad_norm": 0.0008889890741556883,
"learning_rate": 0.0001,
"logits/chosen": -0.09034673869609833,
"logits/rejected": -0.49319934844970703,
"logps/chosen": -358.0984802246094,
"logps/rejected": -73.61305236816406,
"loss": 0.0001,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.614057540893555,
"rewards/margins": 10.11325740814209,
"rewards/rejected": -0.4991990625858307,
"step": 100
},
{
"epoch": 1.592,
"eval_logits/chosen": -0.08441560715436935,
"eval_logits/rejected": -0.5524138808250427,
"eval_logps/chosen": -359.42413330078125,
"eval_logps/rejected": -73.16493225097656,
"eval_loss": 5.7459325034869835e-05,
"eval_rewards/accuracies": 1.0,
"eval_rewards/chosen": 9.673240661621094,
"eval_rewards/margins": 10.160331726074219,
"eval_rewards/rejected": -0.48709091544151306,
"eval_runtime": 31.6934,
"eval_samples_per_second": 3.155,
"eval_steps_per_second": 0.789,
"step": 100
},
{
"epoch": 1.6720000000000002,
"grad_norm": 0.0011348004918545485,
"learning_rate": 9.418604651162792e-05,
"logits/chosen": -0.08024512976408005,
"logits/rejected": -0.5957251787185669,
"logps/chosen": -357.5785217285156,
"logps/rejected": -73.45134735107422,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.876195907592773,
"rewards/margins": 10.362418174743652,
"rewards/rejected": -0.4862229824066162,
"step": 105
},
{
"epoch": 1.752,
"grad_norm": 0.0006927254726178944,
"learning_rate": 8.837209302325582e-05,
"logits/chosen": -0.08959682285785675,
"logits/rejected": -0.5512081980705261,
"logps/chosen": -358.33837890625,
"logps/rejected": -77.51631927490234,
"loss": 0.0001,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.615246772766113,
"rewards/margins": 10.157711029052734,
"rewards/rejected": -0.5424639582633972,
"step": 110
},
{
"epoch": 1.8319999999999999,
"grad_norm": 0.0007744388421997428,
"learning_rate": 8.255813953488373e-05,
"logits/chosen": -0.11678247153759003,
"logits/rejected": -0.6245185732841492,
"logps/chosen": -358.83660888671875,
"logps/rejected": -74.44224548339844,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.813782691955566,
"rewards/margins": 10.402308464050293,
"rewards/rejected": -0.5885262489318848,
"step": 115
},
{
"epoch": 1.912,
"grad_norm": 0.0007590119494125247,
"learning_rate": 7.674418604651163e-05,
"logits/chosen": -0.09831385314464569,
"logits/rejected": -0.6255627870559692,
"logps/chosen": -359.8316345214844,
"logps/rejected": -78.41828918457031,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.824723243713379,
"rewards/margins": 10.354299545288086,
"rewards/rejected": -0.5295764803886414,
"step": 120
},
{
"epoch": 1.992,
"grad_norm": 0.0010877439053729177,
"learning_rate": 7.093023255813955e-05,
"logits/chosen": -0.1179874986410141,
"logits/rejected": -0.5575474500656128,
"logps/chosen": -358.70001220703125,
"logps/rejected": -74.7917251586914,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.817138671875,
"rewards/margins": 10.419405937194824,
"rewards/rejected": -0.6022665500640869,
"step": 125
},
{
"epoch": 2.064,
"grad_norm": 0.0007917136535979807,
"learning_rate": 6.511627906976745e-05,
"logits/chosen": -0.07707415521144867,
"logits/rejected": -0.6612274050712585,
"logps/chosen": -361.1393127441406,
"logps/rejected": -81.81687927246094,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.78316593170166,
"rewards/margins": 10.39424991607666,
"rewards/rejected": -0.6110839247703552,
"step": 130
},
{
"epoch": 2.144,
"grad_norm": 0.0007871920824982226,
"learning_rate": 5.9302325581395356e-05,
"logits/chosen": -0.08203691989183426,
"logits/rejected": -0.5725603699684143,
"logps/chosen": -357.78302001953125,
"logps/rejected": -70.41331481933594,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.980680465698242,
"rewards/margins": 10.44549560546875,
"rewards/rejected": -0.4648161828517914,
"step": 135
},
{
"epoch": 2.224,
"grad_norm": 0.0005172414821572602,
"learning_rate": 5.348837209302326e-05,
"logits/chosen": -0.09138365834951401,
"logits/rejected": -0.5546398758888245,
"logps/chosen": -362.23577880859375,
"logps/rejected": -74.2463607788086,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.337722778320312,
"rewards/margins": 10.846572875976562,
"rewards/rejected": -0.5088496804237366,
"step": 140
},
{
"epoch": 2.304,
"grad_norm": 0.0008091035997495055,
"learning_rate": 4.7674418604651164e-05,
"logits/chosen": -0.09143027663230896,
"logits/rejected": -0.48933038115501404,
"logps/chosen": -356.3218078613281,
"logps/rejected": -74.66313171386719,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.803215026855469,
"rewards/margins": 10.368491172790527,
"rewards/rejected": -0.5652762055397034,
"step": 145
},
{
"epoch": 2.384,
"grad_norm": 0.0007813669508323073,
"learning_rate": 4.186046511627907e-05,
"logits/chosen": -0.07970213890075684,
"logits/rejected": -0.5864819288253784,
"logps/chosen": -358.19586181640625,
"logps/rejected": -73.56185913085938,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.667705535888672,
"rewards/margins": 10.243247985839844,
"rewards/rejected": -0.5755430459976196,
"step": 150
},
{
"epoch": 2.384,
"eval_logits/chosen": -0.07971817255020142,
"eval_logits/rejected": -0.5524048805236816,
"eval_logps/chosen": -357.5785217285156,
"eval_logps/rejected": -73.52456665039062,
"eval_loss": 4.5410510210786015e-05,
"eval_rewards/accuracies": 1.0,
"eval_rewards/chosen": 9.857807159423828,
"eval_rewards/margins": 10.380863189697266,
"eval_rewards/rejected": -0.5230547189712524,
"eval_runtime": 31.5883,
"eval_samples_per_second": 3.166,
"eval_steps_per_second": 0.791,
"step": 150
},
{
"epoch": 2.464,
"grad_norm": 0.0005108030745759606,
"learning_rate": 3.604651162790698e-05,
"logits/chosen": -0.07415401935577393,
"logits/rejected": -0.5602105259895325,
"logps/chosen": -359.1539306640625,
"logps/rejected": -74.1802749633789,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.182788848876953,
"rewards/margins": 10.665776252746582,
"rewards/rejected": -0.4829869866371155,
"step": 155
},
{
"epoch": 2.544,
"grad_norm": 0.0005740431952290237,
"learning_rate": 3.0232558139534883e-05,
"logits/chosen": -0.0852808803319931,
"logits/rejected": -0.5853781700134277,
"logps/chosen": -356.70208740234375,
"logps/rejected": -72.76698303222656,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.781842231750488,
"rewards/margins": 10.317041397094727,
"rewards/rejected": -0.5351991057395935,
"step": 160
},
{
"epoch": 2.624,
"grad_norm": 0.0009486350463703275,
"learning_rate": 2.441860465116279e-05,
"logits/chosen": -0.08894743770360947,
"logits/rejected": -0.6087764501571655,
"logps/chosen": -355.6590881347656,
"logps/rejected": -72.82820129394531,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.820540428161621,
"rewards/margins": 10.366652488708496,
"rewards/rejected": -0.5461126565933228,
"step": 165
},
{
"epoch": 2.7039999999999997,
"grad_norm": 0.0009406894678249955,
"learning_rate": 1.8604651162790697e-05,
"logits/chosen": -0.09478039294481277,
"logits/rejected": -0.5880772471427917,
"logps/chosen": -358.8096923828125,
"logps/rejected": -76.46202087402344,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.935961723327637,
"rewards/margins": 10.557890892028809,
"rewards/rejected": -0.6219290494918823,
"step": 170
},
{
"epoch": 2.784,
"grad_norm": 0.0007802930776961148,
"learning_rate": 1.2790697674418606e-05,
"logits/chosen": -0.07708299160003662,
"logits/rejected": -0.5758073925971985,
"logps/chosen": -352.0008239746094,
"logps/rejected": -72.15168762207031,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.940695762634277,
"rewards/margins": 10.538742065429688,
"rewards/rejected": -0.5980448126792908,
"step": 175
},
{
"epoch": 2.864,
"grad_norm": 0.001332178944721818,
"learning_rate": 6.976744186046512e-06,
"logits/chosen": -0.1345936805009842,
"logits/rejected": -0.4655265808105469,
"logps/chosen": -358.48260498046875,
"logps/rejected": -72.2758560180664,
"loss": 0.0001,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.618751525878906,
"rewards/margins": 10.160510063171387,
"rewards/rejected": -0.5417580604553223,
"step": 180
},
{
"epoch": 2.944,
"grad_norm": 0.000980083947069943,
"learning_rate": 1.1627906976744186e-06,
"logits/chosen": -0.10129542648792267,
"logits/rejected": -0.5999717712402344,
"logps/chosen": -360.7377014160156,
"logps/rejected": -74.36566925048828,
"loss": 0.0001,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.746424674987793,
"rewards/margins": 10.256307601928711,
"rewards/rejected": -0.5098813772201538,
"step": 185
}
],
"logging_steps": 5,
"max_steps": 186,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 50,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}