{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 13, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.07692307692307693, "grad_norm": 36.832374572753906, "learning_rate": 2e-05, "logits/chosen": -1.1979809999465942, "logits/rejected": -0.8325968980789185, "logps/chosen": -14.64731216430664, "logps/rejected": -1005.949951171875, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.15384615384615385, "grad_norm": 48.96150207519531, "learning_rate": 4e-05, "logits/chosen": -1.198547124862671, "logits/rejected": -0.7667418122291565, "logps/chosen": -13.524721145629883, "logps/rejected": -1149.490966796875, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 2 }, { "epoch": 0.23076923076923078, "grad_norm": 28.16060447692871, "learning_rate": 6e-05, "logits/chosen": -1.2035918235778809, "logits/rejected": -0.7419127225875854, "logps/chosen": -13.74232006072998, "logps/rejected": -988.1054077148438, "loss": 0.5315, "rewards/accuracies": 1.0, "rewards/chosen": 0.02188706398010254, "rewards/margins": 0.3549116849899292, "rewards/rejected": -0.33302462100982666, "step": 3 }, { "epoch": 0.3076923076923077, "grad_norm": 8.096433639526367, "learning_rate": 8e-05, "logits/chosen": -1.1596912145614624, "logits/rejected": -0.7631052732467651, "logps/chosen": -12.934490203857422, "logps/rejected": -968.7999267578125, "loss": 0.0984, "rewards/accuracies": 1.0, "rewards/chosen": 0.15821895003318787, "rewards/margins": 2.3479552268981934, "rewards/rejected": -2.1897363662719727, "step": 4 }, { "epoch": 0.38461538461538464, "grad_norm": 0.0061914557591080666, "learning_rate": 0.0001, "logits/chosen": -1.0816950798034668, "logits/rejected": -0.6030597686767578, "logps/chosen": -13.906386375427246, "logps/rejected": -1000.5069580078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.49933597445487976, "rewards/margins": 10.395833969116211, "rewards/rejected": -9.89649772644043, "step": 5 }, { "epoch": 0.46153846153846156, "grad_norm": 2.919980923721255e-10, "learning_rate": 0.00012, "logits/chosen": -0.988565981388092, "logits/rejected": -0.6128067374229431, "logps/chosen": -8.973637580871582, "logps/rejected": -1372.248046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.7627038955688477, "rewards/margins": 30.984756469726562, "rewards/rejected": -30.22205352783203, "step": 6 }, { "epoch": 0.5384615384615384, "grad_norm": 0.0, "learning_rate": 0.00014, "logits/chosen": -0.8455103039741516, "logits/rejected": -0.5573095679283142, "logps/chosen": -8.776264190673828, "logps/rejected": -1911.2926025390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.5577902793884277, "rewards/margins": 70.72723388671875, "rewards/rejected": -70.16944885253906, "step": 7 }, { "epoch": 0.6153846153846154, "grad_norm": 0.0, "learning_rate": 0.00016, "logits/chosen": -0.7601979970932007, "logits/rejected": -0.5386461615562439, "logps/chosen": -7.5249152183532715, "logps/rejected": -2239.0810546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.6714935302734375, "rewards/margins": 112.79618072509766, "rewards/rejected": -112.12467956542969, "step": 8 }, { "epoch": 0.6923076923076923, "grad_norm": 0.0, "learning_rate": 0.00018, "logits/chosen": -0.7337682247161865, "logits/rejected": -0.6141457557678223, "logps/chosen": -6.297776699066162, "logps/rejected": -2562.424560546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.9677034616470337, "rewards/margins": 154.79287719726562, "rewards/rejected": -153.82516479492188, "step": 9 }, { "epoch": 0.7692307692307693, "grad_norm": 0.0, "learning_rate": 0.0002, "logits/chosen": -0.7853107452392578, "logits/rejected": -0.7350925803184509, "logps/chosen": -8.834450721740723, "logps/rejected": -3320.821533203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.912455677986145, "rewards/margins": 216.040771484375, "rewards/rejected": -215.12832641601562, "step": 10 }, { "epoch": 0.8461538461538461, "grad_norm": 0.0, "learning_rate": 0.00015000000000000001, "logits/chosen": -0.8751146793365479, "logits/rejected": -0.8513680696487427, "logps/chosen": -7.073147773742676, "logps/rejected": -3444.56396484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.7486312389373779, "rewards/margins": 242.8885040283203, "rewards/rejected": -242.13986206054688, "step": 11 }, { "epoch": 0.9230769230769231, "grad_norm": 0.0, "learning_rate": 5.000000000000002e-05, "logits/chosen": -1.1207599639892578, "logits/rejected": -1.0976228713989258, "logps/chosen": -9.499576568603516, "logps/rejected": -3954.97705078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.4698023200035095, "rewards/margins": 283.22412109375, "rewards/rejected": -282.75433349609375, "step": 12 }, { "epoch": 1.0, "grad_norm": 0.0, "learning_rate": 0.0, "logits/chosen": -1.2583673000335693, "logits/rejected": -1.2936705350875854, "logps/chosen": -14.507587432861328, "logps/rejected": -4967.0595703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.08211517333984375, "rewards/margins": 360.6194763183594, "rewards/rejected": -360.70159912109375, "step": 13 } ], "logging_steps": 1, "max_steps": 13, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }