| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.9945, |
| "eval_steps": 500, |
| "global_step": 153, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.01, |
| "grad_norm": 14.538259596842082, |
| "learning_rate": 3.125e-08, |
| "logits/chosen": -1.485394835472107, |
| "logits/rejected": -1.5657753944396973, |
| "logps/chosen": -113.49234771728516, |
| "logps/rejected": -112.02042388916016, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.0, |
| "rewards/chosen": 0.0, |
| "rewards/margins": 0.0, |
| "rewards/rejected": 0.0, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 13.777860556030118, |
| "learning_rate": 3.1249999999999997e-07, |
| "logits/chosen": -1.8860422372817993, |
| "logits/rejected": -1.8673903942108154, |
| "logps/chosen": -159.92677307128906, |
| "logps/rejected": -158.13575744628906, |
| "loss": 0.693, |
| "rewards/accuracies": 0.470085471868515, |
| "rewards/chosen": -0.0027076357509940863, |
| "rewards/margins": 0.0009333029738627374, |
| "rewards/rejected": -0.003640938550233841, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 15.832935585691502, |
| "learning_rate": 4.989490450759331e-07, |
| "logits/chosen": -1.714375615119934, |
| "logits/rejected": -1.6653386354446411, |
| "logps/chosen": -179.85107421875, |
| "logps/rejected": -182.88519287109375, |
| "loss": 0.692, |
| "rewards/accuracies": 0.5692307949066162, |
| "rewards/chosen": -0.14061911404132843, |
| "rewards/margins": 0.009414789266884327, |
| "rewards/rejected": -0.15003390610218048, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 17.171119399204557, |
| "learning_rate": 4.872270441827174e-07, |
| "logits/chosen": -1.6256521940231323, |
| "logits/rejected": -1.5526025295257568, |
| "logps/chosen": -186.2281036376953, |
| "logps/rejected": -183.31361389160156, |
| "loss": 0.6942, |
| "rewards/accuracies": 0.48461538553237915, |
| "rewards/chosen": -0.32728826999664307, |
| "rewards/margins": 0.015691382810473442, |
| "rewards/rejected": -0.34297963976860046, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 23.62795629433987, |
| "learning_rate": 4.6308512113530063e-07, |
| "logits/chosen": -1.723747968673706, |
| "logits/rejected": -1.7841739654541016, |
| "logps/chosen": -193.51380920410156, |
| "logps/rejected": -202.89682006835938, |
| "loss": 0.6861, |
| "rewards/accuracies": 0.5307692289352417, |
| "rewards/chosen": -0.2831575870513916, |
| "rewards/margins": 0.013270785100758076, |
| "rewards/rejected": -0.2964283227920532, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 15.77157102463667, |
| "learning_rate": 4.277872161641681e-07, |
| "logits/chosen": -1.7303481101989746, |
| "logits/rejected": -1.65773344039917, |
| "logps/chosen": -177.57931518554688, |
| "logps/rejected": -179.11293029785156, |
| "loss": 0.6898, |
| "rewards/accuracies": 0.5269230604171753, |
| "rewards/chosen": -0.18453820049762726, |
| "rewards/margins": 0.02953496389091015, |
| "rewards/rejected": -0.21407318115234375, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.39, |
| "grad_norm": 16.13100802946847, |
| "learning_rate": 3.8318133624280046e-07, |
| "logits/chosen": -1.6843277215957642, |
| "logits/rejected": -1.6873857975006104, |
| "logps/chosen": -182.8292694091797, |
| "logps/rejected": -194.4746551513672, |
| "loss": 0.6849, |
| "rewards/accuracies": 0.5538461804389954, |
| "rewards/chosen": -0.19599129259586334, |
| "rewards/margins": 0.03240448608994484, |
| "rewards/rejected": -0.22839577496051788, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 16.445288992190534, |
| "learning_rate": 3.316028034595861e-07, |
| "logits/chosen": -1.6721850633621216, |
| "logits/rejected": -1.6687787771224976, |
| "logps/chosen": -195.88442993164062, |
| "logps/rejected": -199.55630493164062, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.5153846144676208, |
| "rewards/chosen": -0.2889784276485443, |
| "rewards/margins": -0.0007830683025531471, |
| "rewards/rejected": -0.2881953716278076, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 12.960135155210429, |
| "learning_rate": 2.7575199021178855e-07, |
| "logits/chosen": -1.4989523887634277, |
| "logits/rejected": -1.4164642095565796, |
| "logps/chosen": -184.61236572265625, |
| "logps/rejected": -192.20050048828125, |
| "loss": 0.6833, |
| "rewards/accuracies": 0.5692307949066162, |
| "rewards/chosen": -0.22188612818717957, |
| "rewards/margins": 0.04173959046602249, |
| "rewards/rejected": -0.26362574100494385, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.58, |
| "grad_norm": 15.682163034505669, |
| "learning_rate": 2.1855294234408068e-07, |
| "logits/chosen": -1.4179878234863281, |
| "logits/rejected": -1.4031846523284912, |
| "logps/chosen": -179.42979431152344, |
| "logps/rejected": -187.13311767578125, |
| "loss": 0.6809, |
| "rewards/accuracies": 0.5730769038200378, |
| "rewards/chosen": -0.23481449484825134, |
| "rewards/margins": 0.03382309526205063, |
| "rewards/rejected": -0.26863762736320496, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.65, |
| "grad_norm": 19.094980323061744, |
| "learning_rate": 1.6300029195778453e-07, |
| "logits/chosen": -1.2222946882247925, |
| "logits/rejected": -1.3037612438201904, |
| "logps/chosen": -191.2831268310547, |
| "logps/rejected": -198.2336883544922, |
| "loss": 0.6865, |
| "rewards/accuracies": 0.5153846144676208, |
| "rewards/chosen": -0.3084586560726166, |
| "rewards/margins": 0.02169790491461754, |
| "rewards/rejected": -0.3301565647125244, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.71, |
| "grad_norm": 19.896508891597843, |
| "learning_rate": 1.1200247470632392e-07, |
| "logits/chosen": -1.4282666444778442, |
| "logits/rejected": -1.4667084217071533, |
| "logps/chosen": -185.49362182617188, |
| "logps/rejected": -186.56646728515625, |
| "loss": 0.6928, |
| "rewards/accuracies": 0.5115384459495544, |
| "rewards/chosen": -0.26517340540885925, |
| "rewards/margins": 0.011802640743553638, |
| "rewards/rejected": -0.2769760489463806, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.78, |
| "grad_norm": 16.948516897970652, |
| "learning_rate": 6.822945986946385e-08, |
| "logits/chosen": -1.1220929622650146, |
| "logits/rejected": -1.2350115776062012, |
| "logps/chosen": -189.1597137451172, |
| "logps/rejected": -199.32754516601562, |
| "loss": 0.6781, |
| "rewards/accuracies": 0.6307692527770996, |
| "rewards/chosen": -0.25235074758529663, |
| "rewards/margins": 0.046545807272195816, |
| "rewards/rejected": -0.29889652132987976, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 17.56220411035667, |
| "learning_rate": 3.397296523427806e-08, |
| "logits/chosen": -0.9844478368759155, |
| "logits/rejected": -1.0250178575515747, |
| "logps/chosen": -183.03431701660156, |
| "logps/rejected": -190.74391174316406, |
| "loss": 0.686, |
| "rewards/accuracies": 0.5192307829856873, |
| "rewards/chosen": -0.3220398724079132, |
| "rewards/margins": 0.022711992263793945, |
| "rewards/rejected": -0.34475192427635193, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.91, |
| "grad_norm": 15.473766104576516, |
| "learning_rate": 1.1026475173977978e-08, |
| "logits/chosen": -1.361193299293518, |
| "logits/rejected": -1.2996608018875122, |
| "logps/chosen": -193.40740966796875, |
| "logps/rejected": -192.8422393798828, |
| "loss": 0.686, |
| "rewards/accuracies": 0.5384615659713745, |
| "rewards/chosen": -0.2967548072338104, |
| "rewards/margins": 0.024480195716023445, |
| "rewards/rejected": -0.3212350010871887, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.97, |
| "grad_norm": 17.16639289762015, |
| "learning_rate": 5.913435276374834e-10, |
| "logits/chosen": -1.1994727849960327, |
| "logits/rejected": -1.2772407531738281, |
| "logps/chosen": -184.84437561035156, |
| "logps/rejected": -193.1089324951172, |
| "loss": 0.6824, |
| "rewards/accuracies": 0.5884615182876587, |
| "rewards/chosen": -0.3001053035259247, |
| "rewards/margins": 0.036925580352544785, |
| "rewards/rejected": -0.33703088760375977, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.99, |
| "step": 153, |
| "total_flos": 0.0, |
| "train_loss": 0.6870396254109401, |
| "train_runtime": 21840.9373, |
| "train_samples_per_second": 0.916, |
| "train_steps_per_second": 0.007 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 153, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 100, |
| "total_flos": 0.0, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|