0.001_ablation_iter_3 / trainer_state.json
ShenaoZ's picture
Model save
e3b3cb7 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9968652037617555,
"eval_steps": 500,
"global_step": 159,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 3.125e-08,
"logits/chosen": -2.072277545928955,
"logits/rejected": -2.0595779418945312,
"logps/chosen": -398.8112487792969,
"logps/pi_response": -311.4126892089844,
"logps/ref_response": -311.4126892089844,
"logps/rejected": -484.30792236328125,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.06,
"learning_rate": 3.1249999999999997e-07,
"logits/chosen": -2.0879757404327393,
"logits/rejected": -1.9906916618347168,
"logps/chosen": -323.0065612792969,
"logps/pi_response": -217.33697509765625,
"logps/ref_response": -217.61642456054688,
"logps/rejected": -381.7716979980469,
"loss": 0.6868,
"rewards/accuracies": 0.4791666567325592,
"rewards/chosen": -0.005266552325338125,
"rewards/margins": 0.0116809643805027,
"rewards/rejected": -0.016947515308856964,
"step": 10
},
{
"epoch": 0.13,
"learning_rate": 4.990353313429303e-07,
"logits/chosen": -1.9731781482696533,
"logits/rejected": -1.9694221019744873,
"logps/chosen": -309.8916931152344,
"logps/pi_response": -223.656005859375,
"logps/ref_response": -227.64566040039062,
"logps/rejected": -454.9925231933594,
"loss": 0.6618,
"rewards/accuracies": 0.581250011920929,
"rewards/chosen": -0.1710830181837082,
"rewards/margins": 0.27294591069221497,
"rewards/rejected": -0.4440288543701172,
"step": 20
},
{
"epoch": 0.19,
"learning_rate": 4.882681251368548e-07,
"logits/chosen": -1.9831979274749756,
"logits/rejected": -1.925021767616272,
"logps/chosen": -335.2173767089844,
"logps/pi_response": -229.2874755859375,
"logps/ref_response": -233.17965698242188,
"logps/rejected": -407.2176818847656,
"loss": 0.6719,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": -0.14075350761413574,
"rewards/margins": 0.23552139103412628,
"rewards/rejected": -0.37627488374710083,
"step": 30
},
{
"epoch": 0.25,
"learning_rate": 4.6604720940421207e-07,
"logits/chosen": -2.0499300956726074,
"logits/rejected": -1.9712011814117432,
"logps/chosen": -340.423095703125,
"logps/pi_response": -247.22396850585938,
"logps/ref_response": -226.72451782226562,
"logps/rejected": -480.654296875,
"loss": 0.6482,
"rewards/accuracies": 0.59375,
"rewards/chosen": -0.30246642231941223,
"rewards/margins": 0.3026350140571594,
"rewards/rejected": -0.605101466178894,
"step": 40
},
{
"epoch": 0.31,
"learning_rate": 4.3344075855595097e-07,
"logits/chosen": -2.1223251819610596,
"logits/rejected": -1.982454538345337,
"logps/chosen": -344.250732421875,
"logps/pi_response": -275.4682312011719,
"logps/ref_response": -248.96499633789062,
"logps/rejected": -547.07177734375,
"loss": 0.6326,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -0.29991382360458374,
"rewards/margins": 0.4313625395298004,
"rewards/rejected": -0.7312763929367065,
"step": 50
},
{
"epoch": 0.38,
"learning_rate": 3.920161866827889e-07,
"logits/chosen": -1.8565187454223633,
"logits/rejected": -1.7435777187347412,
"logps/chosen": -347.46746826171875,
"logps/pi_response": -278.3968811035156,
"logps/ref_response": -236.6101531982422,
"logps/rejected": -519.2654418945312,
"loss": 0.6007,
"rewards/accuracies": 0.668749988079071,
"rewards/chosen": -0.46309342980384827,
"rewards/margins": 0.5743097066879272,
"rewards/rejected": -1.0374032258987427,
"step": 60
},
{
"epoch": 0.44,
"learning_rate": 3.4376480090239047e-07,
"logits/chosen": -1.473629355430603,
"logits/rejected": -1.2030349969863892,
"logps/chosen": -375.29510498046875,
"logps/pi_response": -285.02398681640625,
"logps/ref_response": -218.2779998779297,
"logps/rejected": -519.3750610351562,
"loss": 0.6111,
"rewards/accuracies": 0.6625000238418579,
"rewards/chosen": -0.7290376424789429,
"rewards/margins": 0.5281031727790833,
"rewards/rejected": -1.257140874862671,
"step": 70
},
{
"epoch": 0.5,
"learning_rate": 2.910060778827554e-07,
"logits/chosen": -1.3979953527450562,
"logits/rejected": -1.066356897354126,
"logps/chosen": -396.0669250488281,
"logps/pi_response": -294.2704162597656,
"logps/ref_response": -234.7672119140625,
"logps/rejected": -505.8048400878906,
"loss": 0.573,
"rewards/accuracies": 0.6312500238418579,
"rewards/chosen": -0.6167668104171753,
"rewards/margins": 0.3895764946937561,
"rewards/rejected": -1.0063434839248657,
"step": 80
},
{
"epoch": 0.56,
"learning_rate": 2.3627616503391812e-07,
"logits/chosen": -0.893900990486145,
"logits/rejected": -0.3373408913612366,
"logps/chosen": -409.7942810058594,
"logps/pi_response": -316.0831604003906,
"logps/ref_response": -232.30178833007812,
"logps/rejected": -567.0125122070312,
"loss": 0.5714,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": -0.9255668520927429,
"rewards/margins": 0.6182758808135986,
"rewards/rejected": -1.5438427925109863,
"step": 90
},
{
"epoch": 0.63,
"learning_rate": 1.8220596619089573e-07,
"logits/chosen": -0.8694812059402466,
"logits/rejected": -0.5924743413925171,
"logps/chosen": -428.4925231933594,
"logps/pi_response": -329.4599304199219,
"logps/ref_response": -245.4618682861328,
"logps/rejected": -537.4727783203125,
"loss": 0.5734,
"rewards/accuracies": 0.6625000238418579,
"rewards/chosen": -0.9432543516159058,
"rewards/margins": 0.4144589900970459,
"rewards/rejected": -1.357713222503662,
"step": 100
},
{
"epoch": 0.69,
"learning_rate": 1.3139467229135998e-07,
"logits/chosen": -0.9938161969184875,
"logits/rejected": -0.45242977142333984,
"logps/chosen": -389.0281677246094,
"logps/pi_response": -314.51422119140625,
"logps/ref_response": -233.9892120361328,
"logps/rejected": -560.4173583984375,
"loss": 0.5906,
"rewards/accuracies": 0.71875,
"rewards/chosen": -0.7948281168937683,
"rewards/margins": 0.5036167502403259,
"rewards/rejected": -1.2984448671340942,
"step": 110
},
{
"epoch": 0.75,
"learning_rate": 8.628481651367875e-08,
"logits/chosen": -0.982222855091095,
"logits/rejected": -0.48237520456314087,
"logps/chosen": -397.54718017578125,
"logps/pi_response": -313.84527587890625,
"logps/ref_response": -230.01510620117188,
"logps/rejected": -565.9222412109375,
"loss": 0.5429,
"rewards/accuracies": 0.6875,
"rewards/chosen": -0.8856877088546753,
"rewards/margins": 0.6042315363883972,
"rewards/rejected": -1.4899194240570068,
"step": 120
},
{
"epoch": 0.82,
"learning_rate": 4.904486005914027e-08,
"logits/chosen": -0.8022481203079224,
"logits/rejected": -0.40596532821655273,
"logps/chosen": -407.4441833496094,
"logps/pi_response": -319.7482604980469,
"logps/ref_response": -227.49935913085938,
"logps/rejected": -588.3087768554688,
"loss": 0.5733,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": -0.9994068145751953,
"rewards/margins": 0.6314084529876709,
"rewards/rejected": -1.6308151483535767,
"step": 130
},
{
"epoch": 0.88,
"learning_rate": 2.1464952759020856e-08,
"logits/chosen": -0.6527538299560547,
"logits/rejected": -0.13006380200386047,
"logps/chosen": -432.28009033203125,
"logps/pi_response": -334.286376953125,
"logps/ref_response": -231.6268310546875,
"logps/rejected": -581.1611328125,
"loss": 0.5507,
"rewards/accuracies": 0.75,
"rewards/chosen": -1.0939096212387085,
"rewards/margins": 0.6699361801147461,
"rewards/rejected": -1.7638458013534546,
"step": 140
},
{
"epoch": 0.94,
"learning_rate": 4.8708793644441086e-09,
"logits/chosen": -0.9013077616691589,
"logits/rejected": -0.350179523229599,
"logps/chosen": -417.94500732421875,
"logps/pi_response": -348.1318359375,
"logps/ref_response": -250.2039337158203,
"logps/rejected": -609.0900268554688,
"loss": 0.5658,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.9682818651199341,
"rewards/margins": 0.6685720086097717,
"rewards/rejected": -1.636853814125061,
"step": 150
},
{
"epoch": 1.0,
"step": 159,
"total_flos": 0.0,
"train_loss": 0.596127321135323,
"train_runtime": 4548.9667,
"train_samples_per_second": 4.48,
"train_steps_per_second": 0.035
}
],
"logging_steps": 10,
"max_steps": 159,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}