| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9998045729919874, | |
| "eval_steps": 500, | |
| "global_step": 1279, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.050029314051201874, | |
| "grad_norm": 6.638810634613037, | |
| "learning_rate": 5e-07, | |
| "logits/chosen": -0.5138859748840332, | |
| "logits/rejected": -0.4518983066082001, | |
| "logps/chosen": -77.28889465332031, | |
| "logps/rejected": -14.844705581665039, | |
| "loss": 0.6924, | |
| "rewards/accuracies": 0.50390625, | |
| "rewards/chosen": 0.0017984423320740461, | |
| "rewards/margins": 0.0019690156914293766, | |
| "rewards/rejected": -0.00017057315562851727, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.10005862810240375, | |
| "grad_norm": 3.6703028678894043, | |
| "learning_rate": 1e-06, | |
| "logits/chosen": -0.5301803350448608, | |
| "logits/rejected": -0.46129050850868225, | |
| "logps/chosen": -77.88700866699219, | |
| "logps/rejected": -13.736372947692871, | |
| "loss": 0.6787, | |
| "rewards/accuracies": 0.693359375, | |
| "rewards/chosen": 0.026838650926947594, | |
| "rewards/margins": 0.02976146526634693, | |
| "rewards/rejected": -0.002922814106568694, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.15008794215360563, | |
| "grad_norm": 4.980158805847168, | |
| "learning_rate": 9.44396177237185e-07, | |
| "logits/chosen": -0.5469677448272705, | |
| "logits/rejected": -0.48332178592681885, | |
| "logps/chosen": -77.87326049804688, | |
| "logps/rejected": -14.177068710327148, | |
| "loss": 0.6161, | |
| "rewards/accuracies": 0.82421875, | |
| "rewards/chosen": 0.15434227883815765, | |
| "rewards/margins": 0.17204590141773224, | |
| "rewards/rejected": -0.017703618854284286, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.2001172562048075, | |
| "grad_norm": 2.8744795322418213, | |
| "learning_rate": 8.887923544743701e-07, | |
| "logits/chosen": -0.5636645555496216, | |
| "logits/rejected": -0.5051460266113281, | |
| "logps/chosen": -63.355735778808594, | |
| "logps/rejected": -12.730010032653809, | |
| "loss": 0.5124, | |
| "rewards/accuracies": 0.84375, | |
| "rewards/chosen": 0.3969045877456665, | |
| "rewards/margins": 0.5048438310623169, | |
| "rewards/rejected": -0.10793925076723099, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.2501465702560094, | |
| "grad_norm": 2.2977707386016846, | |
| "learning_rate": 8.331885317115551e-07, | |
| "logits/chosen": -0.5301216840744019, | |
| "logits/rejected": -0.47033828496932983, | |
| "logps/chosen": -65.85761260986328, | |
| "logps/rejected": -15.843816757202148, | |
| "loss": 0.3878, | |
| "rewards/accuracies": 0.888671875, | |
| "rewards/chosen": 0.79034823179245, | |
| "rewards/margins": 1.061028003692627, | |
| "rewards/rejected": -0.2706798315048218, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.30017588430721126, | |
| "grad_norm": 8.175545692443848, | |
| "learning_rate": 7.775847089487402e-07, | |
| "logits/chosen": -0.5346195697784424, | |
| "logits/rejected": -0.4867401123046875, | |
| "logps/chosen": -57.788429260253906, | |
| "logps/rejected": -16.67302894592285, | |
| "loss": 0.3424, | |
| "rewards/accuracies": 0.904296875, | |
| "rewards/chosen": 0.9831899404525757, | |
| "rewards/margins": 1.4113095998764038, | |
| "rewards/rejected": -0.42811962962150574, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.35020519835841313, | |
| "grad_norm": 5.475202560424805, | |
| "learning_rate": 7.219808861859253e-07, | |
| "logits/chosen": -0.5421884059906006, | |
| "logits/rejected": -0.4896155595779419, | |
| "logps/chosen": -59.334922790527344, | |
| "logps/rejected": -18.624767303466797, | |
| "loss": 0.265, | |
| "rewards/accuracies": 0.970703125, | |
| "rewards/chosen": 1.2069406509399414, | |
| "rewards/margins": 1.8477783203125, | |
| "rewards/rejected": -0.640837550163269, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.400234512409615, | |
| "grad_norm": 3.7930808067321777, | |
| "learning_rate": 6.663770634231103e-07, | |
| "logits/chosen": -0.5423855185508728, | |
| "logits/rejected": -0.5051375031471252, | |
| "logps/chosen": -58.52878952026367, | |
| "logps/rejected": -23.4506893157959, | |
| "loss": 0.182, | |
| "rewards/accuracies": 0.986328125, | |
| "rewards/chosen": 1.3336964845657349, | |
| "rewards/margins": 2.311509132385254, | |
| "rewards/rejected": -0.977812647819519, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.45026382646081686, | |
| "grad_norm": 7.777218818664551, | |
| "learning_rate": 6.107732406602954e-07, | |
| "logits/chosen": -0.5627282857894897, | |
| "logits/rejected": -0.5052539110183716, | |
| "logps/chosen": -60.55673599243164, | |
| "logps/rejected": -26.99791717529297, | |
| "loss": 0.1096, | |
| "rewards/accuracies": 0.982421875, | |
| "rewards/chosen": 1.3532178401947021, | |
| "rewards/margins": 2.905151605606079, | |
| "rewards/rejected": -1.5519336462020874, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 0.5002931405120188, | |
| "grad_norm": 0.5906669497489929, | |
| "learning_rate": 5.551694178974804e-07, | |
| "logits/chosen": -0.5611196160316467, | |
| "logits/rejected": -0.5089166164398193, | |
| "logps/chosen": -59.12473678588867, | |
| "logps/rejected": -32.835243225097656, | |
| "loss": 0.0648, | |
| "rewards/accuracies": 0.9921875, | |
| "rewards/chosen": 1.443546175956726, | |
| "rewards/margins": 3.489488124847412, | |
| "rewards/rejected": -2.0459418296813965, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.5503224545632206, | |
| "grad_norm": 0.7606092095375061, | |
| "learning_rate": 4.995655951346655e-07, | |
| "logits/chosen": -0.5526726245880127, | |
| "logits/rejected": -0.49657124280929565, | |
| "logps/chosen": -57.72775650024414, | |
| "logps/rejected": -36.90594482421875, | |
| "loss": 0.0546, | |
| "rewards/accuracies": 0.990234375, | |
| "rewards/chosen": 1.4812407493591309, | |
| "rewards/margins": 3.8918285369873047, | |
| "rewards/rejected": -2.410587787628174, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 0.6003517686144225, | |
| "grad_norm": 0.8932979702949524, | |
| "learning_rate": 4.4396177237185057e-07, | |
| "logits/chosen": -0.5280415415763855, | |
| "logits/rejected": -0.4753148555755615, | |
| "logps/chosen": -58.317237854003906, | |
| "logps/rejected": -40.71867370605469, | |
| "loss": 0.0436, | |
| "rewards/accuracies": 0.990234375, | |
| "rewards/chosen": 1.4664157629013062, | |
| "rewards/margins": 4.119485855102539, | |
| "rewards/rejected": -2.6530702114105225, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 0.6503810826656244, | |
| "grad_norm": 1.0950371026992798, | |
| "learning_rate": 3.8835794960903563e-07, | |
| "logits/chosen": -0.523577094078064, | |
| "logits/rejected": -0.464932382106781, | |
| "logps/chosen": -58.45826721191406, | |
| "logps/rejected": -40.988529205322266, | |
| "loss": 0.0442, | |
| "rewards/accuracies": 0.990234375, | |
| "rewards/chosen": 1.5603256225585938, | |
| "rewards/margins": 4.361178398132324, | |
| "rewards/rejected": -2.8008527755737305, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 0.7004103967168263, | |
| "grad_norm": 0.8026629686355591, | |
| "learning_rate": 3.327541268462207e-07, | |
| "logits/chosen": -0.5141459703445435, | |
| "logits/rejected": -0.44692087173461914, | |
| "logps/chosen": -59.13020324707031, | |
| "logps/rejected": -41.566627502441406, | |
| "loss": 0.038, | |
| "rewards/accuracies": 0.994140625, | |
| "rewards/chosen": 1.5455219745635986, | |
| "rewards/margins": 4.467495918273926, | |
| "rewards/rejected": -2.9219741821289062, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 0.7504397107680282, | |
| "grad_norm": 0.05718870088458061, | |
| "learning_rate": 2.7715030408340575e-07, | |
| "logits/chosen": -0.5028055906295776, | |
| "logits/rejected": -0.43091291189193726, | |
| "logps/chosen": -56.72560501098633, | |
| "logps/rejected": -42.42565155029297, | |
| "loss": 0.0423, | |
| "rewards/accuracies": 0.986328125, | |
| "rewards/chosen": 1.564468264579773, | |
| "rewards/margins": 4.592702865600586, | |
| "rewards/rejected": -3.0282342433929443, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.80046902481923, | |
| "grad_norm": 0.14965815842151642, | |
| "learning_rate": 2.215464813205908e-07, | |
| "logits/chosen": -0.510848343372345, | |
| "logits/rejected": -0.4440664052963257, | |
| "logps/chosen": -56.914100646972656, | |
| "logps/rejected": -42.933258056640625, | |
| "loss": 0.0307, | |
| "rewards/accuracies": 0.9921875, | |
| "rewards/chosen": 1.552268147468567, | |
| "rewards/margins": 4.621021270751953, | |
| "rewards/rejected": -3.068753242492676, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 0.8504983388704319, | |
| "grad_norm": 0.4853415787220001, | |
| "learning_rate": 1.6594265855777585e-07, | |
| "logits/chosen": -0.5129883289337158, | |
| "logits/rejected": -0.4405321180820465, | |
| "logps/chosen": -56.04991912841797, | |
| "logps/rejected": -43.990997314453125, | |
| "loss": 0.0213, | |
| "rewards/accuracies": 0.994140625, | |
| "rewards/chosen": 1.5749919414520264, | |
| "rewards/margins": 4.778657913208008, | |
| "rewards/rejected": -3.2036657333374023, | |
| "step": 1088 | |
| }, | |
| { | |
| "epoch": 0.9005276529216337, | |
| "grad_norm": 0.44261807203292847, | |
| "learning_rate": 1.103388357949609e-07, | |
| "logits/chosen": -0.5202418565750122, | |
| "logits/rejected": -0.4409845471382141, | |
| "logps/chosen": -58.534915924072266, | |
| "logps/rejected": -44.22764205932617, | |
| "loss": 0.0335, | |
| "rewards/accuracies": 0.98828125, | |
| "rewards/chosen": 1.6060659885406494, | |
| "rewards/margins": 4.796003341674805, | |
| "rewards/rejected": -3.189937114715576, | |
| "step": 1152 | |
| }, | |
| { | |
| "epoch": 0.9505569669728356, | |
| "grad_norm": 0.2634647786617279, | |
| "learning_rate": 5.4735013032145953e-08, | |
| "logits/chosen": -0.48342519998550415, | |
| "logits/rejected": -0.4142173230648041, | |
| "logps/chosen": -57.232521057128906, | |
| "logps/rejected": -44.50954818725586, | |
| "loss": 0.0315, | |
| "rewards/accuracies": 0.990234375, | |
| "rewards/chosen": 1.5877962112426758, | |
| "rewards/margins": 4.721351623535156, | |
| "rewards/rejected": -3.1335554122924805, | |
| "step": 1216 | |
| }, | |
| { | |
| "epoch": 0.9998045729919874, | |
| "step": 1279, | |
| "total_flos": 7.724459437129728e+17, | |
| "train_loss": 0.21135991807092067, | |
| "train_runtime": 19931.1539, | |
| "train_samples_per_second": 0.513, | |
| "train_steps_per_second": 0.064 | |
| } | |
| ], | |
| "logging_steps": 64, | |
| "max_steps": 1279, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 7.724459437129728e+17, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |