| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9999015651146766, | |
| "eval_steps": 500, | |
| "global_step": 5079, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.05000492174426617, | |
| "grad_norm": 5.337889194488525, | |
| "learning_rate": 5e-07, | |
| "logits/chosen": -0.5339647531509399, | |
| "logits/rejected": -0.47227513790130615, | |
| "logps/chosen": -71.58138275146484, | |
| "logps/rejected": -13.533102989196777, | |
| "loss": 0.6865, | |
| "rewards/accuracies": 0.5871062874794006, | |
| "rewards/chosen": 0.012079809792339802, | |
| "rewards/margins": 0.013962473720312119, | |
| "rewards/rejected": -0.001882663695141673, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.10000984348853234, | |
| "grad_norm": 5.7681660652160645, | |
| "learning_rate": 1e-06, | |
| "logits/chosen": -0.524512529373169, | |
| "logits/rejected": -0.470420241355896, | |
| "logps/chosen": -68.49125671386719, | |
| "logps/rejected": -14.996256828308105, | |
| "loss": 0.5103, | |
| "rewards/accuracies": 0.8449802994728088, | |
| "rewards/chosen": 0.4529332220554352, | |
| "rewards/margins": 0.5830409526824951, | |
| "rewards/rejected": -0.13010773062705994, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.1500147652327985, | |
| "grad_norm": 1.7311837673187256, | |
| "learning_rate": 9.44432290527237e-07, | |
| "logits/chosen": -0.5406456589698792, | |
| "logits/rejected": -0.49012085795402527, | |
| "logps/chosen": -61.494964599609375, | |
| "logps/rejected": -28.769521713256836, | |
| "loss": 0.1423, | |
| "rewards/accuracies": 0.9788385629653931, | |
| "rewards/chosen": 1.3764965534210205, | |
| "rewards/margins": 2.9073357582092285, | |
| "rewards/rejected": -1.5308390855789185, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 0.20001968697706468, | |
| "grad_norm": 4.213954925537109, | |
| "learning_rate": 8.888645810544738e-07, | |
| "logits/chosen": -0.5104743838310242, | |
| "logits/rejected": -0.43841081857681274, | |
| "logps/chosen": -56.98832702636719, | |
| "logps/rejected": -44.329437255859375, | |
| "loss": 0.0364, | |
| "rewards/accuracies": 0.9872047305107117, | |
| "rewards/chosen": 1.6219738721847534, | |
| "rewards/margins": 4.794076919555664, | |
| "rewards/rejected": -3.1721031665802, | |
| "step": 1016 | |
| }, | |
| { | |
| "epoch": 0.25002460872133087, | |
| "grad_norm": 0.14951969683170319, | |
| "learning_rate": 8.332968715817108e-07, | |
| "logits/chosen": -0.4608861804008484, | |
| "logits/rejected": -0.3742350935935974, | |
| "logps/chosen": -55.1967887878418, | |
| "logps/rejected": -51.54916000366211, | |
| "loss": 0.0258, | |
| "rewards/accuracies": 0.9886810779571533, | |
| "rewards/chosen": 1.6978679895401, | |
| "rewards/margins": 5.571296691894531, | |
| "rewards/rejected": -3.8734288215637207, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.300029530465597, | |
| "grad_norm": 0.11377181112766266, | |
| "learning_rate": 7.777291621089477e-07, | |
| "logits/chosen": -0.4032284915447235, | |
| "logits/rejected": -0.2789752185344696, | |
| "logps/chosen": -57.66849899291992, | |
| "logps/rejected": -57.404354095458984, | |
| "loss": 0.0181, | |
| "rewards/accuracies": 0.9936023354530334, | |
| "rewards/chosen": 1.815442681312561, | |
| "rewards/margins": 6.287877559661865, | |
| "rewards/rejected": -4.472434043884277, | |
| "step": 1524 | |
| }, | |
| { | |
| "epoch": 0.3500344522098632, | |
| "grad_norm": 61.85912322998047, | |
| "learning_rate": 7.221614526361847e-07, | |
| "logits/chosen": -0.39197683334350586, | |
| "logits/rejected": -0.2711484432220459, | |
| "logps/chosen": -56.51285934448242, | |
| "logps/rejected": -66.6546630859375, | |
| "loss": 0.0243, | |
| "rewards/accuracies": 0.9906495809555054, | |
| "rewards/chosen": 1.6953144073486328, | |
| "rewards/margins": 6.998918533325195, | |
| "rewards/rejected": -5.3036041259765625, | |
| "step": 1778 | |
| }, | |
| { | |
| "epoch": 0.40003937395412936, | |
| "grad_norm": 0.09933885931968689, | |
| "learning_rate": 6.665937431634215e-07, | |
| "logits/chosen": -0.35676872730255127, | |
| "logits/rejected": -0.21091562509536743, | |
| "logps/chosen": -58.933048248291016, | |
| "logps/rejected": -72.55093383789062, | |
| "loss": 0.022, | |
| "rewards/accuracies": 0.9906495809555054, | |
| "rewards/chosen": 1.709058165550232, | |
| "rewards/margins": 7.6529622077941895, | |
| "rewards/rejected": -5.943903923034668, | |
| "step": 2032 | |
| }, | |
| { | |
| "epoch": 0.4500442956983955, | |
| "grad_norm": 0.3078814446926117, | |
| "learning_rate": 6.110260336906585e-07, | |
| "logits/chosen": -0.3692930340766907, | |
| "logits/rejected": -0.21181651949882507, | |
| "logps/chosen": -56.33852767944336, | |
| "logps/rejected": -78.77378845214844, | |
| "loss": 0.0193, | |
| "rewards/accuracies": 0.9916338324546814, | |
| "rewards/chosen": 1.583296775817871, | |
| "rewards/margins": 8.18080997467041, | |
| "rewards/rejected": -6.597513675689697, | |
| "step": 2286 | |
| }, | |
| { | |
| "epoch": 0.5000492174426617, | |
| "grad_norm": 0.026140812784433365, | |
| "learning_rate": 5.554583242178954e-07, | |
| "logits/chosen": -0.36246979236602783, | |
| "logits/rejected": -0.21610520780086517, | |
| "logps/chosen": -56.37531280517578, | |
| "logps/rejected": -81.92805480957031, | |
| "loss": 0.0298, | |
| "rewards/accuracies": 0.9886810779571533, | |
| "rewards/chosen": 1.608428955078125, | |
| "rewards/margins": 8.545919418334961, | |
| "rewards/rejected": -6.9374895095825195, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 0.5500541391869278, | |
| "grad_norm": 0.0485980287194252, | |
| "learning_rate": 4.998906147451324e-07, | |
| "logits/chosen": -0.35004737973213196, | |
| "logits/rejected": -0.1877668797969818, | |
| "logps/chosen": -56.29869079589844, | |
| "logps/rejected": -84.21609497070312, | |
| "loss": 0.0229, | |
| "rewards/accuracies": 0.9901574850082397, | |
| "rewards/chosen": 1.6171692609786987, | |
| "rewards/margins": 8.883115768432617, | |
| "rewards/rejected": -7.265947341918945, | |
| "step": 2794 | |
| }, | |
| { | |
| "epoch": 0.600059060931194, | |
| "grad_norm": 0.13149231672286987, | |
| "learning_rate": 4.4432290527236927e-07, | |
| "logits/chosen": -0.3316061198711395, | |
| "logits/rejected": -0.17913725972175598, | |
| "logps/chosen": -57.6456413269043, | |
| "logps/rejected": -89.23247528076172, | |
| "loss": 0.0144, | |
| "rewards/accuracies": 0.9960629940032959, | |
| "rewards/chosen": 1.622791051864624, | |
| "rewards/margins": 9.201993942260742, | |
| "rewards/rejected": -7.579202651977539, | |
| "step": 3048 | |
| }, | |
| { | |
| "epoch": 0.6500639826754602, | |
| "grad_norm": 0.03225807845592499, | |
| "learning_rate": 3.887551957996062e-07, | |
| "logits/chosen": -0.3303147554397583, | |
| "logits/rejected": -0.16745421290397644, | |
| "logps/chosen": -57.618045806884766, | |
| "logps/rejected": -90.39539337158203, | |
| "loss": 0.0165, | |
| "rewards/accuracies": 0.9921259880065918, | |
| "rewards/chosen": 1.5876142978668213, | |
| "rewards/margins": 9.376455307006836, | |
| "rewards/rejected": -7.788840293884277, | |
| "step": 3302 | |
| }, | |
| { | |
| "epoch": 0.7000689044197264, | |
| "grad_norm": 0.024763241410255432, | |
| "learning_rate": 3.3318748632684314e-07, | |
| "logits/chosen": -0.3256986141204834, | |
| "logits/rejected": -0.1574079841375351, | |
| "logps/chosen": -59.10237121582031, | |
| "logps/rejected": -92.18179321289062, | |
| "loss": 0.0203, | |
| "rewards/accuracies": 0.9901574850082397, | |
| "rewards/chosen": 1.5978204011917114, | |
| "rewards/margins": 9.474949836730957, | |
| "rewards/rejected": -7.877129554748535, | |
| "step": 3556 | |
| }, | |
| { | |
| "epoch": 0.7500738261639925, | |
| "grad_norm": 0.18685077130794525, | |
| "learning_rate": 2.7761977685408005e-07, | |
| "logits/chosen": -0.31320706009864807, | |
| "logits/rejected": -0.14784303307533264, | |
| "logps/chosen": -58.15943908691406, | |
| "logps/rejected": -92.56378936767578, | |
| "loss": 0.0265, | |
| "rewards/accuracies": 0.9906495809555054, | |
| "rewards/chosen": 1.5292613506317139, | |
| "rewards/margins": 9.509092330932617, | |
| "rewards/rejected": -7.979831218719482, | |
| "step": 3810 | |
| }, | |
| { | |
| "epoch": 0.8000787479082587, | |
| "grad_norm": 0.01488853245973587, | |
| "learning_rate": 2.22052067381317e-07, | |
| "logits/chosen": -0.3247720003128052, | |
| "logits/rejected": -0.15560078620910645, | |
| "logps/chosen": -56.86127471923828, | |
| "logps/rejected": -93.76302337646484, | |
| "loss": 0.0189, | |
| "rewards/accuracies": 0.9931102395057678, | |
| "rewards/chosen": 1.586428165435791, | |
| "rewards/margins": 9.719764709472656, | |
| "rewards/rejected": -8.133337020874023, | |
| "step": 4064 | |
| }, | |
| { | |
| "epoch": 0.8500836696525249, | |
| "grad_norm": 4.113521575927734, | |
| "learning_rate": 1.6648435790855392e-07, | |
| "logits/chosen": -0.31977561116218567, | |
| "logits/rejected": -0.164890855550766, | |
| "logps/chosen": -56.98260498046875, | |
| "logps/rejected": -95.06165313720703, | |
| "loss": 0.0233, | |
| "rewards/accuracies": 0.9921259880065918, | |
| "rewards/chosen": 1.4957386255264282, | |
| "rewards/margins": 9.688507080078125, | |
| "rewards/rejected": -8.192767143249512, | |
| "step": 4318 | |
| }, | |
| { | |
| "epoch": 0.900088591396791, | |
| "grad_norm": 0.0642678439617157, | |
| "learning_rate": 1.1091664843579085e-07, | |
| "logits/chosen": -0.3185438811779022, | |
| "logits/rejected": -0.1604050248861313, | |
| "logps/chosen": -57.956459045410156, | |
| "logps/rejected": -96.31430053710938, | |
| "loss": 0.0202, | |
| "rewards/accuracies": 0.9916338324546814, | |
| "rewards/chosen": 1.4874851703643799, | |
| "rewards/margins": 9.749979019165039, | |
| "rewards/rejected": -8.262493133544922, | |
| "step": 4572 | |
| }, | |
| { | |
| "epoch": 0.9500935131410572, | |
| "grad_norm": 0.0038960117381066084, | |
| "learning_rate": 5.534893896302778e-08, | |
| "logits/chosen": -0.3140643537044525, | |
| "logits/rejected": -0.1565851867198944, | |
| "logps/chosen": -59.727909088134766, | |
| "logps/rejected": -95.10466003417969, | |
| "loss": 0.0173, | |
| "rewards/accuracies": 0.9936023354530334, | |
| "rewards/chosen": 1.604878544807434, | |
| "rewards/margins": 9.842850685119629, | |
| "rewards/rejected": -8.237971305847168, | |
| "step": 4826 | |
| }, | |
| { | |
| "epoch": 0.9999015651146766, | |
| "step": 5079, | |
| "total_flos": 3.074560994106409e+18, | |
| "train_loss": 0.08538520530960554, | |
| "train_runtime": 75713.4377, | |
| "train_samples_per_second": 0.537, | |
| "train_steps_per_second": 0.067 | |
| } | |
| ], | |
| "logging_steps": 254, | |
| "max_steps": 5079, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.074560994106409e+18, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |