| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 10000.0, | |
| "global_step": 156, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.006418613980543576, | |
| "grad_norm": 32.048425683927725, | |
| "learning_rate": 4.999493072462126e-07, | |
| "logits/chosen": -2.115234375, | |
| "logits/rejected": -1.5445556640625, | |
| "logps/chosen": -132.65625, | |
| "logps/rejected": -181.029296875, | |
| "loss": 0.703399658203125, | |
| "rewards/accuracies": 0.439453125, | |
| "rewards/chosen": -0.015892624855041504, | |
| "rewards/margins": -0.00758051872253418, | |
| "rewards/rejected": -0.008310675621032715, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.06418613980543576, | |
| "grad_norm": 23.578875399152054, | |
| "learning_rate": 4.949476630105669e-07, | |
| "logits/chosen": -2.146253824234009, | |
| "logits/rejected": -1.5375298261642456, | |
| "logps/chosen": -127.29991149902344, | |
| "logps/rejected": -182.31988525390625, | |
| "loss": 0.6579203075832791, | |
| "rewards/accuracies": 0.5796440839767456, | |
| "rewards/chosen": 0.001607447862625122, | |
| "rewards/margins": 0.09492193162441254, | |
| "rewards/rejected": -0.09333191812038422, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.12837227961087153, | |
| "grad_norm": 16.096083288887513, | |
| "learning_rate": 4.799948609147061e-07, | |
| "logits/chosen": -2.1872315406799316, | |
| "logits/rejected": -1.5583984851837158, | |
| "logps/chosen": -127.17167663574219, | |
| "logps/rejected": -188.09335327148438, | |
| "loss": 0.5185166358947754, | |
| "rewards/accuracies": 0.7955077886581421, | |
| "rewards/chosen": 0.10318219661712646, | |
| "rewards/margins": 0.5961636304855347, | |
| "rewards/rejected": -0.49292677640914917, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.1925584194163073, | |
| "grad_norm": 16.84839050494715, | |
| "learning_rate": 4.557459664734141e-07, | |
| "logits/chosen": -2.217529296875, | |
| "logits/rejected": -1.5862548351287842, | |
| "logps/chosen": -125.22636413574219, | |
| "logps/rejected": -189.91366577148438, | |
| "loss": 0.41579198837280273, | |
| "rewards/accuracies": 0.850781261920929, | |
| "rewards/chosen": 0.16709718108177185, | |
| "rewards/margins": 1.2345550060272217, | |
| "rewards/rejected": -1.067326307296753, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.25674455922174305, | |
| "grad_norm": 8.87416271370084, | |
| "learning_rate": 4.2318108837739986e-07, | |
| "logits/chosen": -2.2668824195861816, | |
| "logits/rejected": -1.598077416419983, | |
| "logps/chosen": -127.8832015991211, | |
| "logps/rejected": -192.91796875, | |
| "loss": 0.35149335861206055, | |
| "rewards/accuracies": 0.866406261920929, | |
| "rewards/chosen": 0.12800344824790955, | |
| "rewards/margins": 1.7798080444335938, | |
| "rewards/rejected": -1.651770830154419, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.3209306990271788, | |
| "grad_norm": 8.909019678687008, | |
| "learning_rate": 3.8361645653195024e-07, | |
| "logits/chosen": -2.3679442405700684, | |
| "logits/rejected": -1.644537329673767, | |
| "logps/chosen": -127.83222961425781, | |
| "logps/rejected": -205.20272827148438, | |
| "loss": 0.30472755432128906, | |
| "rewards/accuracies": 0.8851562738418579, | |
| "rewards/chosen": 0.2481112778186798, | |
| "rewards/margins": 2.5448379516601562, | |
| "rewards/rejected": -2.296844482421875, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.3851168388326146, | |
| "grad_norm": 9.080316543515908, | |
| "learning_rate": 3.3865122176063385e-07, | |
| "logits/chosen": -2.381664991378784, | |
| "logits/rejected": -1.673553466796875, | |
| "logps/chosen": -126.41679382324219, | |
| "logps/rejected": -204.1730499267578, | |
| "loss": 0.3040948390960693, | |
| "rewards/accuracies": 0.883007824420929, | |
| "rewards/chosen": 0.16746802628040314, | |
| "rewards/margins": 2.610337734222412, | |
| "rewards/rejected": -2.443005323410034, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.44930297863805035, | |
| "grad_norm": 7.319834355840221, | |
| "learning_rate": 2.9010282021444005e-07, | |
| "logits/chosen": -2.3894896507263184, | |
| "logits/rejected": -1.654962182044983, | |
| "logps/chosen": -126.53593444824219, | |
| "logps/rejected": -207.21328735351562, | |
| "loss": 0.2680961608886719, | |
| "rewards/accuracies": 0.8951171636581421, | |
| "rewards/chosen": 0.16773858666419983, | |
| "rewards/margins": 2.727093458175659, | |
| "rewards/rejected": -2.5592041015625, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.5134891184434861, | |
| "grad_norm": 11.714525445790118, | |
| "learning_rate": 2.399335149726463e-07, | |
| "logits/chosen": -2.400561571121216, | |
| "logits/rejected": -1.6822998523712158, | |
| "logps/chosen": -126.2464828491211, | |
| "logps/rejected": -208.8488311767578, | |
| "loss": 0.27129082679748534, | |
| "rewards/accuracies": 0.8958984613418579, | |
| "rewards/chosen": 0.0786014050245285, | |
| "rewards/margins": 2.6517059803009033, | |
| "rewards/rejected": -2.5730834007263184, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.5776752582489219, | |
| "grad_norm": 13.05044359804742, | |
| "learning_rate": 1.9017108392811062e-07, | |
| "logits/chosen": -2.4099974632263184, | |
| "logits/rejected": -1.6946532726287842, | |
| "logps/chosen": -129.740234375, | |
| "logps/rejected": -212.8679656982422, | |
| "loss": 0.25001063346862795, | |
| "rewards/accuracies": 0.9019531011581421, | |
| "rewards/chosen": 0.07523002475500107, | |
| "rewards/margins": 2.694448947906494, | |
| "rewards/rejected": -2.618884325027466, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.6418613980543576, | |
| "grad_norm": 11.826565551460419, | |
| "learning_rate": 1.428268596492364e-07, | |
| "logits/chosen": -2.4135499000549316, | |
| "logits/rejected": -1.6916077136993408, | |
| "logps/chosen": -128.24374389648438, | |
| "logps/rejected": -200.87850952148438, | |
| "loss": 0.2475870132446289, | |
| "rewards/accuracies": 0.9037109613418579, | |
| "rewards/chosen": 0.13076062500476837, | |
| "rewards/margins": 2.663525342941284, | |
| "rewards/rejected": -2.5332884788513184, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.7060475378597934, | |
| "grad_norm": 19.350657646267706, | |
| "learning_rate": 9.981443394050524e-08, | |
| "logits/chosen": -2.416271924972534, | |
| "logits/rejected": -1.668573021888733, | |
| "logps/chosen": -124.5199203491211, | |
| "logps/rejected": -204.02774047851562, | |
| "loss": 0.24730167388916016, | |
| "rewards/accuracies": 0.8970702886581421, | |
| "rewards/chosen": 0.06585326045751572, | |
| "rewards/margins": 2.7098052501678467, | |
| "rewards/rejected": -2.6437134742736816, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.7702336776652292, | |
| "grad_norm": 14.914033483862706, | |
| "learning_rate": 6.28723129572247e-08, | |
| "logits/chosen": -2.42510986328125, | |
| "logits/rejected": -1.6651611328125, | |
| "logps/chosen": -129.416015625, | |
| "logps/rejected": -211.32461547851562, | |
| "loss": 0.2530521869659424, | |
| "rewards/accuracies": 0.8990234136581421, | |
| "rewards/chosen": 0.03038964234292507, | |
| "rewards/margins": 2.786761522293091, | |
| "rewards/rejected": -2.7561402320861816, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.834419817470665, | |
| "grad_norm": 12.452750762359729, | |
| "learning_rate": 3.349364905389032e-08, | |
| "logits/chosen": -2.4213013648986816, | |
| "logits/rejected": -1.6964843273162842, | |
| "logps/chosen": -126.28125, | |
| "logps/rejected": -202.9324188232422, | |
| "loss": 0.24086828231811525, | |
| "rewards/accuracies": 0.904101550579071, | |
| "rewards/chosen": 0.0428071990609169, | |
| "rewards/margins": 2.784435987472534, | |
| "rewards/rejected": -2.741345167160034, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.8986059572761007, | |
| "grad_norm": 7.337706041982024, | |
| "learning_rate": 1.2865889513213628e-08, | |
| "logits/chosen": -2.431103467941284, | |
| "logits/rejected": -1.689788818359375, | |
| "logps/chosen": -127.52030944824219, | |
| "logps/rejected": -207.2761688232422, | |
| "loss": 0.24082815647125244, | |
| "rewards/accuracies": 0.9056640863418579, | |
| "rewards/chosen": 0.055707789957523346, | |
| "rewards/margins": 2.8203492164611816, | |
| "rewards/rejected": -2.7646727561950684, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.9627920970815365, | |
| "grad_norm": 9.490755329142848, | |
| "learning_rate": 1.8227814754865067e-09, | |
| "logits/chosen": -2.4349732398986816, | |
| "logits/rejected": -1.698211669921875, | |
| "logps/chosen": -129.65625, | |
| "logps/rejected": -206.28164672851562, | |
| "loss": 0.24139628410339356, | |
| "rewards/accuracies": 0.9046875238418579, | |
| "rewards/chosen": 0.04055643081665039, | |
| "rewards/margins": 2.805835008621216, | |
| "rewards/rejected": -2.7651429176330566, | |
| "step": 150 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 156, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 10000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.2243593606337659e+19, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |