| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9968652037617555, | |
| "eval_steps": 500, | |
| "global_step": 159, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 3.125e-08, | |
| "logits/chosen": -2.5757241249084473, | |
| "logits/rejected": -2.6334033012390137, | |
| "logps/chosen": -158.64126586914062, | |
| "logps/rejected": -129.17214965820312, | |
| "loss": 0.6931, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 3.1249999999999997e-07, | |
| "logits/chosen": -2.689120292663574, | |
| "logits/rejected": -2.7100415229797363, | |
| "logps/chosen": -231.9307403564453, | |
| "logps/rejected": -230.61669921875, | |
| "loss": 0.6931, | |
| "rewards/accuracies": 0.3541666567325592, | |
| "rewards/chosen": -0.005180968437343836, | |
| "rewards/margins": -0.0007737001869827509, | |
| "rewards/rejected": -0.004407268483191729, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.990353313429303e-07, | |
| "logits/chosen": -2.7198684215545654, | |
| "logits/rejected": -2.677248477935791, | |
| "logps/chosen": -246.5954132080078, | |
| "logps/rejected": -250.72412109375, | |
| "loss": 0.6907, | |
| "rewards/accuracies": 0.581250011920929, | |
| "rewards/chosen": -0.14306680858135223, | |
| "rewards/margins": 0.009777521714568138, | |
| "rewards/rejected": -0.15284433960914612, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.882681251368548e-07, | |
| "logits/chosen": -2.576653242111206, | |
| "logits/rejected": -2.5379045009613037, | |
| "logps/chosen": -247.7720489501953, | |
| "logps/rejected": -245.41921997070312, | |
| "loss": 0.6804, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.21431183815002441, | |
| "rewards/margins": 0.036602433770895004, | |
| "rewards/rejected": -0.2509142756462097, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.6604720940421207e-07, | |
| "logits/chosen": -2.4344098567962646, | |
| "logits/rejected": -2.4352052211761475, | |
| "logps/chosen": -253.05126953125, | |
| "logps/rejected": -293.6170654296875, | |
| "loss": 0.666, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": -0.521602988243103, | |
| "rewards/margins": 0.07865401357412338, | |
| "rewards/rejected": -0.6002570390701294, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 4.3344075855595097e-07, | |
| "logits/chosen": -2.3450260162353516, | |
| "logits/rejected": -2.3353271484375, | |
| "logps/chosen": -307.8914794921875, | |
| "logps/rejected": -316.11505126953125, | |
| "loss": 0.6678, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.8211005926132202, | |
| "rewards/margins": 0.08438173681497574, | |
| "rewards/rejected": -0.9054821729660034, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 3.920161866827889e-07, | |
| "logits/chosen": -2.2511789798736572, | |
| "logits/rejected": -2.2335622310638428, | |
| "logps/chosen": -282.9967956542969, | |
| "logps/rejected": -303.55426025390625, | |
| "loss": 0.6582, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -0.6935502886772156, | |
| "rewards/margins": 0.16655965149402618, | |
| "rewards/rejected": -0.8601099848747253, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 3.4376480090239047e-07, | |
| "logits/chosen": -2.2453956604003906, | |
| "logits/rejected": -2.226074457168579, | |
| "logps/chosen": -286.8125915527344, | |
| "logps/rejected": -300.02789306640625, | |
| "loss": 0.665, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.6816620826721191, | |
| "rewards/margins": 0.20877805352210999, | |
| "rewards/rejected": -0.8904401659965515, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 2.910060778827554e-07, | |
| "logits/chosen": -2.2295961380004883, | |
| "logits/rejected": -2.209394931793213, | |
| "logps/chosen": -282.4059753417969, | |
| "logps/rejected": -335.4136657714844, | |
| "loss": 0.6747, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.8680456876754761, | |
| "rewards/margins": 0.1932743787765503, | |
| "rewards/rejected": -1.0613200664520264, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 2.3627616503391812e-07, | |
| "logits/chosen": -2.2832415103912354, | |
| "logits/rejected": -2.2923521995544434, | |
| "logps/chosen": -280.1626892089844, | |
| "logps/rejected": -269.5970153808594, | |
| "loss": 0.6566, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.7220357656478882, | |
| "rewards/margins": 0.12294892966747284, | |
| "rewards/rejected": -0.8449847102165222, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 1.8220596619089573e-07, | |
| "logits/chosen": -2.2097229957580566, | |
| "logits/rejected": -2.204942464828491, | |
| "logps/chosen": -277.9078674316406, | |
| "logps/rejected": -311.76177978515625, | |
| "loss": 0.6557, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.695646345615387, | |
| "rewards/margins": 0.10921863466501236, | |
| "rewards/rejected": -0.8048648834228516, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 1.3139467229135998e-07, | |
| "logits/chosen": -2.2849056720733643, | |
| "logits/rejected": -2.301518201828003, | |
| "logps/chosen": -298.0511474609375, | |
| "logps/rejected": -290.65155029296875, | |
| "loss": 0.6557, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.8137847781181335, | |
| "rewards/margins": 0.09719870239496231, | |
| "rewards/rejected": -0.9109834432601929, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 8.628481651367875e-08, | |
| "logits/chosen": -2.277188777923584, | |
| "logits/rejected": -2.2617883682250977, | |
| "logps/chosen": -308.5967102050781, | |
| "logps/rejected": -313.9825134277344, | |
| "loss": 0.6536, | |
| "rewards/accuracies": 0.5687500238418579, | |
| "rewards/chosen": -0.7052074670791626, | |
| "rewards/margins": 0.12203893810510635, | |
| "rewards/rejected": -0.827246367931366, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 4.904486005914027e-08, | |
| "logits/chosen": -2.3243308067321777, | |
| "logits/rejected": -2.3333945274353027, | |
| "logps/chosen": -274.1197204589844, | |
| "logps/rejected": -313.65740966796875, | |
| "loss": 0.6559, | |
| "rewards/accuracies": 0.5687500238418579, | |
| "rewards/chosen": -0.615770161151886, | |
| "rewards/margins": 0.12384297698736191, | |
| "rewards/rejected": -0.7396131753921509, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 2.1464952759020856e-08, | |
| "logits/chosen": -2.321314573287964, | |
| "logits/rejected": -2.318671703338623, | |
| "logps/chosen": -298.8154602050781, | |
| "logps/rejected": -320.1452331542969, | |
| "loss": 0.6433, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.7064296007156372, | |
| "rewards/margins": 0.19559960067272186, | |
| "rewards/rejected": -0.9020291566848755, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 4.8708793644441086e-09, | |
| "logits/chosen": -2.2854580879211426, | |
| "logits/rejected": -2.2632009983062744, | |
| "logps/chosen": -282.5638732910156, | |
| "logps/rejected": -272.4760437011719, | |
| "loss": 0.6327, | |
| "rewards/accuracies": 0.5562499761581421, | |
| "rewards/chosen": -0.6732932925224304, | |
| "rewards/margins": 0.1675054430961609, | |
| "rewards/rejected": -0.8407986760139465, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "step": 159, | |
| "total_flos": 0.0, | |
| "train_loss": 0.6621600247029239, | |
| "train_runtime": 2660.7412, | |
| "train_samples_per_second": 7.659, | |
| "train_steps_per_second": 0.06 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 159, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 100, | |
| "total_flos": 0.0, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |