selm_ours_iter_3 / trainer_state.json
YYYYYYibo's picture
Model save
241cf89 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9992429977289932,
"eval_steps": 500,
"global_step": 165,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"eta": 0.0010000000474974513,
"grad_norm": 18.46040839297478,
"learning_rate": 2.941176470588235e-08,
"logits/chosen": -1.823674201965332,
"logits/rejected": -1.9654152393341064,
"logps/chosen": -228.01856994628906,
"logps/pi_response": -374.7322998046875,
"logps/ref_response": -374.7322998046875,
"logps/rejected": -233.90797424316406,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.06,
"eta": 0.0010000000474974513,
"grad_norm": 17.640747860282204,
"learning_rate": 2.941176470588235e-07,
"logits/chosen": -1.9038560390472412,
"logits/rejected": -1.9730311632156372,
"logps/chosen": -247.17388916015625,
"logps/pi_response": -390.3918762207031,
"logps/ref_response": -389.3831787109375,
"logps/rejected": -256.4879150390625,
"loss": 0.6928,
"rewards/accuracies": 0.4305555522441864,
"rewards/chosen": -0.009673516266047955,
"rewards/margins": 0.001249339897185564,
"rewards/rejected": -0.010922855697572231,
"step": 10
},
{
"epoch": 0.12,
"eta": 0.0010000000474974513,
"grad_norm": 28.899289506668094,
"learning_rate": 4.994932636402031e-07,
"logits/chosen": -1.8548221588134766,
"logits/rejected": -1.8832534551620483,
"logps/chosen": -251.72506713867188,
"logps/pi_response": -406.16729736328125,
"logps/ref_response": -385.27423095703125,
"logps/rejected": -261.83319091796875,
"loss": 0.6886,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": -0.17078793048858643,
"rewards/margins": 0.0026144247967749834,
"rewards/rejected": -0.1734023541212082,
"step": 20
},
{
"epoch": 0.18,
"eta": 0.0010000000474974513,
"grad_norm": 42.67715933224017,
"learning_rate": 4.905416503522123e-07,
"logits/chosen": -1.7043983936309814,
"logits/rejected": -1.7824294567108154,
"logps/chosen": -270.9886169433594,
"logps/pi_response": -429.6463928222656,
"logps/ref_response": -377.20526123046875,
"logps/rejected": -279.2468566894531,
"loss": 0.6933,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": -0.38385888934135437,
"rewards/margins": 0.053137779235839844,
"rewards/rejected": -0.4369966983795166,
"step": 30
},
{
"epoch": 0.24,
"eta": 0.0010000000474974513,
"grad_norm": 15.90865641628214,
"learning_rate": 4.707922373336523e-07,
"logits/chosen": -2.0103068351745605,
"logits/rejected": -2.06811261177063,
"logps/chosen": -231.14230346679688,
"logps/pi_response": -350.94573974609375,
"logps/ref_response": -381.7485656738281,
"logps/rejected": -240.6361083984375,
"loss": 0.7047,
"rewards/accuracies": 0.4593749940395355,
"rewards/chosen": 0.17535772919654846,
"rewards/margins": -0.011809123679995537,
"rewards/rejected": 0.18716685473918915,
"step": 40
},
{
"epoch": 0.3,
"eta": 0.0010000000474974513,
"grad_norm": 15.507604404572884,
"learning_rate": 4.4113156629677313e-07,
"logits/chosen": -2.049783706665039,
"logits/rejected": -2.066466808319092,
"logps/chosen": -251.8543701171875,
"logps/pi_response": -378.17755126953125,
"logps/ref_response": -385.29022216796875,
"logps/rejected": -263.17840576171875,
"loss": 0.6992,
"rewards/accuracies": 0.503125011920929,
"rewards/chosen": 0.0042174034751951694,
"rewards/margins": 0.009917219169437885,
"rewards/rejected": -0.005699816159904003,
"step": 50
},
{
"epoch": 0.36,
"eta": 0.0010000000474974513,
"grad_norm": 26.8718224209532,
"learning_rate": 4.0289109058972283e-07,
"logits/chosen": -1.816748857498169,
"logits/rejected": -1.7844823598861694,
"logps/chosen": -294.067626953125,
"logps/pi_response": -430.2914123535156,
"logps/ref_response": -382.1808166503906,
"logps/rejected": -307.89300537109375,
"loss": 0.6871,
"rewards/accuracies": 0.534375011920929,
"rewards/chosen": -0.48830538988113403,
"rewards/margins": 0.05274411290884018,
"rewards/rejected": -0.5410495400428772,
"step": 60
},
{
"epoch": 0.42,
"eta": 0.0010000000474974513,
"grad_norm": 26.160189012700346,
"learning_rate": 3.577874068920446e-07,
"logits/chosen": -1.6282529830932617,
"logits/rejected": -1.6833488941192627,
"logps/chosen": -307.86944580078125,
"logps/pi_response": -454.16510009765625,
"logps/ref_response": -385.6980895996094,
"logps/rejected": -318.7071533203125,
"loss": 0.687,
"rewards/accuracies": 0.543749988079071,
"rewards/chosen": -0.6023891568183899,
"rewards/margins": 0.05064737796783447,
"rewards/rejected": -0.6530365347862244,
"step": 70
},
{
"epoch": 0.48,
"eta": 0.0010000000474974513,
"grad_norm": 29.331288950657562,
"learning_rate": 3.078451980100854e-07,
"logits/chosen": -1.6993424892425537,
"logits/rejected": -1.781658411026001,
"logps/chosen": -298.09710693359375,
"logps/pi_response": -443.031982421875,
"logps/ref_response": -377.09844970703125,
"logps/rejected": -306.07000732421875,
"loss": 0.687,
"rewards/accuracies": 0.5562499761581421,
"rewards/chosen": -0.5618255138397217,
"rewards/margins": 0.033075593411922455,
"rewards/rejected": -0.5949010848999023,
"step": 80
},
{
"epoch": 0.55,
"eta": 0.0010000000474974513,
"grad_norm": 19.393381596005668,
"learning_rate": 2.553063458334059e-07,
"logits/chosen": -1.794803261756897,
"logits/rejected": -1.7129312753677368,
"logps/chosen": -291.0694274902344,
"logps/pi_response": -433.5843811035156,
"logps/ref_response": -365.1755676269531,
"logps/rejected": -299.36712646484375,
"loss": 0.6865,
"rewards/accuracies": 0.5531250238418579,
"rewards/chosen": -0.5367754697799683,
"rewards/margins": 0.03232881426811218,
"rewards/rejected": -0.5691043138504028,
"step": 90
},
{
"epoch": 0.61,
"eta": 0.0010000000474974513,
"grad_norm": 25.069097284805487,
"learning_rate": 2.0252929432814287e-07,
"logits/chosen": -1.7748645544052124,
"logits/rejected": -1.7401233911514282,
"logps/chosen": -311.4417724609375,
"logps/pi_response": -464.7859802246094,
"logps/ref_response": -385.6819763183594,
"logps/rejected": -322.92425537109375,
"loss": 0.687,
"rewards/accuracies": 0.5093749761581421,
"rewards/chosen": -0.6161566972732544,
"rewards/margins": 0.018065042793750763,
"rewards/rejected": -0.6342216730117798,
"step": 100
},
{
"epoch": 0.67,
"eta": 0.0010000000474974513,
"grad_norm": 24.497323695269376,
"learning_rate": 1.5188318011445906e-07,
"logits/chosen": -1.5538889169692993,
"logits/rejected": -1.5146961212158203,
"logps/chosen": -327.43988037109375,
"logps/pi_response": -494.04071044921875,
"logps/ref_response": -391.55242919921875,
"logps/rejected": -343.62298583984375,
"loss": 0.6812,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -0.8402616381645203,
"rewards/margins": 0.0964951142668724,
"rewards/rejected": -0.9367567896842957,
"step": 110
},
{
"epoch": 0.73,
"eta": 0.0010000000474974513,
"grad_norm": 26.960393670257023,
"learning_rate": 1.0564148305586295e-07,
"logits/chosen": -1.55520761013031,
"logits/rejected": -1.6926014423370361,
"logps/chosen": -302.15289306640625,
"logps/pi_response": -478.40008544921875,
"logps/ref_response": -395.8247985839844,
"logps/rejected": -316.0426940917969,
"loss": 0.6831,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": -0.6136020421981812,
"rewards/margins": 0.05630939081311226,
"rewards/rejected": -0.6699115037918091,
"step": 120
},
{
"epoch": 0.79,
"eta": 0.0010000000474974513,
"grad_norm": 19.22478432809335,
"learning_rate": 6.587997083462196e-08,
"logits/chosen": -1.6059160232543945,
"logits/rejected": -1.7904043197631836,
"logps/chosen": -298.92108154296875,
"logps/pi_response": -464.8466796875,
"logps/ref_response": -377.23529052734375,
"logps/rejected": -310.23858642578125,
"loss": 0.6854,
"rewards/accuracies": 0.5531250238418579,
"rewards/chosen": -0.5502643585205078,
"rewards/margins": 0.04218818619847298,
"rewards/rejected": -0.5924525856971741,
"step": 130
},
{
"epoch": 0.85,
"eta": 0.0010000000474974513,
"grad_norm": 27.016415362773294,
"learning_rate": 3.438351873250492e-08,
"logits/chosen": -1.7260879278182983,
"logits/rejected": -1.7643539905548096,
"logps/chosen": -306.79022216796875,
"logps/pi_response": -471.447021484375,
"logps/ref_response": -384.4253845214844,
"logps/rejected": -320.1635437011719,
"loss": 0.6806,
"rewards/accuracies": 0.5843750238418579,
"rewards/chosen": -0.5997852087020874,
"rewards/margins": 0.06555557250976562,
"rewards/rejected": -0.665340781211853,
"step": 140
},
{
"epoch": 0.91,
"eta": 0.0010000000474974513,
"grad_norm": 21.580864096737645,
"learning_rate": 1.256598743236703e-08,
"logits/chosen": -1.6270942687988281,
"logits/rejected": -1.499762773513794,
"logps/chosen": -299.7687072753906,
"logps/pi_response": -473.4771423339844,
"logps/ref_response": -380.0408020019531,
"logps/rejected": -312.3251953125,
"loss": 0.6835,
"rewards/accuracies": 0.5687500238418579,
"rewards/chosen": -0.6862987875938416,
"rewards/margins": 0.05176978185772896,
"rewards/rejected": -0.7380686402320862,
"step": 150
},
{
"epoch": 0.97,
"eta": 0.0010000000474974513,
"grad_norm": 25.201629028850032,
"learning_rate": 1.406755487774386e-09,
"logits/chosen": -1.6800531148910522,
"logits/rejected": -1.5669732093811035,
"logps/chosen": -309.3727722167969,
"logps/pi_response": -477.26513671875,
"logps/ref_response": -381.9959411621094,
"logps/rejected": -319.76129150390625,
"loss": 0.6837,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -0.6621077656745911,
"rewards/margins": 0.04643635079264641,
"rewards/rejected": -0.7085440754890442,
"step": 160
},
{
"epoch": 1.0,
"step": 165,
"total_flos": 0.0,
"train_loss": 0.6879499728029425,
"train_runtime": 34730.0556,
"train_samples_per_second": 0.609,
"train_steps_per_second": 0.005
}
],
"logging_steps": 10,
"max_steps": 165,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 200,
"total_flos": 0.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}