| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 40.0, | |
| "eval_steps": 500, | |
| "global_step": 1000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.36, | |
| "forget_cf_outputs.loss": -2.683056592941284, | |
| "forget_loss": 2.683056592941284, | |
| "gated_loss": 0.50390625, | |
| "retain_loss": 0.2806420624256134, | |
| "step": 9, | |
| "warm_up_unlearning_weight": 1 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 12.48087215423584, | |
| "learning_rate": 0.00396, | |
| "loss": 10.0439, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "forget_cf_outputs.loss": -2.3281075954437256, | |
| "forget_loss": 2.3281075954437256, | |
| "gated_loss": 0.447265625, | |
| "retain_loss": 0.30973613262176514, | |
| "step": 19, | |
| "warm_up_unlearning_weight": 1 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 14.701934814453125, | |
| "learning_rate": 0.00392, | |
| "loss": 6.7906, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "forget_cf_outputs.loss": -1.9086629152297974, | |
| "forget_loss": 1.9086629152297974, | |
| "gated_loss": 0.12890625, | |
| "retain_loss": 0.25592905282974243, | |
| "step": 29, | |
| "warm_up_unlearning_weight": 1 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "grad_norm": 6.575860500335693, | |
| "learning_rate": 0.0038799999999999998, | |
| "loss": 5.0776, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "forget_cf_outputs.loss": -1.8418117761611938, | |
| "forget_loss": 1.8418117761611938, | |
| "gated_loss": 0.26953125, | |
| "retain_loss": 0.2785710096359253, | |
| "step": 39, | |
| "warm_up_unlearning_weight": 1 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "grad_norm": 16.85633087158203, | |
| "learning_rate": 0.00384, | |
| "loss": 4.3983, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "forget_cf_outputs.loss": -1.5592725276947021, | |
| "forget_loss": 1.5592725276947021, | |
| "gated_loss": 0.09228515625, | |
| "retain_loss": 0.2128174901008606, | |
| "step": 49, | |
| "warm_up_unlearning_weight": 1 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 6.894440650939941, | |
| "learning_rate": 0.0038, | |
| "loss": 3.8443, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "forget_cf_outputs.loss": -1.8075147867202759, | |
| "forget_loss": 1.8075147867202759, | |
| "gated_loss": 0.2099609375, | |
| "retain_loss": 0.20431871712207794, | |
| "step": 59, | |
| "warm_up_unlearning_weight": 1 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "grad_norm": 12.495519638061523, | |
| "learning_rate": 0.00376, | |
| "loss": 3.1611, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "forget_cf_outputs.loss": -1.648992896080017, | |
| "forget_loss": 1.648992896080017, | |
| "gated_loss": 0.08447265625, | |
| "retain_loss": 0.23505711555480957, | |
| "step": 69, | |
| "warm_up_unlearning_weight": 1 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "grad_norm": 4.081427574157715, | |
| "learning_rate": 0.00372, | |
| "loss": 3.1274, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 3.16, | |
| "forget_cf_outputs.loss": -1.7901757955551147, | |
| "forget_loss": 1.7901757955551147, | |
| "gated_loss": 0.07861328125, | |
| "retain_loss": 0.19908858835697174, | |
| "step": 79, | |
| "warm_up_unlearning_weight": 1 | |
| }, | |
| { | |
| "epoch": 3.2, | |
| "grad_norm": 5.019917011260986, | |
| "learning_rate": 0.00368, | |
| "loss": 2.9064, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 3.56, | |
| "forget_cf_outputs.loss": -1.6832540035247803, | |
| "forget_loss": 1.6832540035247803, | |
| "gated_loss": 0.1572265625, | |
| "retain_loss": 0.31473198533058167, | |
| "step": 89, | |
| "warm_up_unlearning_weight": 1 | |
| }, | |
| { | |
| "epoch": 3.6, | |
| "grad_norm": 25.354310989379883, | |
| "learning_rate": 0.00364, | |
| "loss": 2.7559, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 3.96, | |
| "forget_cf_outputs.loss": -1.5529592037200928, | |
| "forget_loss": 1.5529592037200928, | |
| "gated_loss": 0.0859375, | |
| "retain_loss": 0.21396659314632416, | |
| "step": 99, | |
| "warm_up_unlearning_weight": 1 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 13.010091781616211, | |
| "learning_rate": 0.0036000000000000003, | |
| "loss": 4.6507, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 4.36, | |
| "forget_cf_outputs.loss": -1.4749836921691895, | |
| "forget_loss": 1.4749836921691895, | |
| "gated_loss": 0.0751953125, | |
| "retain_loss": 0.3322639465332031, | |
| "step": 109, | |
| "warm_up_unlearning_weight": 1 | |
| }, | |
| { | |
| "epoch": 4.4, | |
| "grad_norm": 19.40593147277832, | |
| "learning_rate": 0.0035600000000000002, | |
| "loss": 2.4806, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 4.76, | |
| "forget_cf_outputs.loss": -1.586808443069458, | |
| "forget_loss": 1.586808443069458, | |
| "gated_loss": 0.044677734375, | |
| "retain_loss": 0.20972619950771332, | |
| "step": 119, | |
| "warm_up_unlearning_weight": 1 | |
| }, | |
| { | |
| "epoch": 4.8, | |
| "grad_norm": 3.9136083126068115, | |
| "learning_rate": 0.00352, | |
| "loss": 2.3875, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 5.16, | |
| "forget_cf_outputs.loss": -1.3828651905059814, | |
| "forget_loss": 1.3828651905059814, | |
| "gated_loss": 0.0308837890625, | |
| "retain_loss": 0.2637632191181183, | |
| "step": 129, | |
| "warm_up_unlearning_weight": 1 | |
| }, | |
| { | |
| "epoch": 5.2, | |
| "grad_norm": 2.676300525665283, | |
| "learning_rate": 0.00348, | |
| "loss": 2.3602, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 5.5600000000000005, | |
| "forget_cf_outputs.loss": -1.353875756263733, | |
| "forget_loss": 1.353875756263733, | |
| "gated_loss": 0.03076171875, | |
| "retain_loss": 0.2234891653060913, | |
| "step": 139, | |
| "warm_up_unlearning_weight": 1 | |
| }, | |
| { | |
| "epoch": 5.6, | |
| "grad_norm": 2.6358110904693604, | |
| "learning_rate": 0.00344, | |
| "loss": 2.14, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 5.96, | |
| "forget_cf_outputs.loss": -1.4662984609603882, | |
| "forget_loss": 1.4662984609603882, | |
| "gated_loss": 0.044921875, | |
| "retain_loss": 0.22434721887111664, | |
| "step": 149, | |
| "warm_up_unlearning_weight": 1 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "grad_norm": 5.245201587677002, | |
| "learning_rate": 0.0034, | |
| "loss": 3.8186, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 6.36, | |
| "forget_cf_outputs.loss": -1.3115544319152832, | |
| "forget_loss": 1.3115544319152832, | |
| "gated_loss": 0.0208740234375, | |
| "retain_loss": 0.27212581038475037, | |
| "step": 159, | |
| "warm_up_unlearning_weight": 1 | |
| }, | |
| { | |
| "epoch": 6.4, | |
| "grad_norm": 2.41806697845459, | |
| "learning_rate": 0.00336, | |
| "loss": 2.0658, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 6.76, | |
| "forget_cf_outputs.loss": -1.389865517616272, | |
| "forget_loss": 1.389865517616272, | |
| "gated_loss": 0.0157470703125, | |
| "retain_loss": 0.22109903395175934, | |
| "step": 169, | |
| "warm_up_unlearning_weight": 1 | |
| }, | |
| { | |
| "epoch": 6.8, | |
| "grad_norm": 1.6761163473129272, | |
| "learning_rate": 0.00332, | |
| "loss": 2.0436, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 7.16, | |
| "forget_cf_outputs.loss": -1.4114540815353394, | |
| "forget_loss": 1.4114540815353394, | |
| "gated_loss": 0.01953125, | |
| "retain_loss": 0.28868579864501953, | |
| "step": 179, | |
| "warm_up_unlearning_weight": 1 | |
| }, | |
| { | |
| "epoch": 7.2, | |
| "grad_norm": 2.110581159591675, | |
| "learning_rate": 0.00328, | |
| "loss": 1.9892, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 7.5600000000000005, | |
| "forget_cf_outputs.loss": -1.5645467042922974, | |
| "forget_loss": 1.5645467042922974, | |
| "gated_loss": 0.0135498046875, | |
| "retain_loss": 0.18681125342845917, | |
| "step": 189, | |
| "warm_up_unlearning_weight": 1 | |
| }, | |
| { | |
| "epoch": 7.6, | |
| "grad_norm": 2.8026249408721924, | |
| "learning_rate": 0.0032400000000000003, | |
| "loss": 1.9509, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 7.96, | |
| "forget_cf_outputs.loss": -1.368323802947998, | |
| "forget_loss": 1.368323802947998, | |
| "gated_loss": 0.0142822265625, | |
| "retain_loss": 0.23697978258132935, | |
| "step": 199, | |
| "warm_up_unlearning_weight": 1 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "grad_norm": 1.9209412336349487, | |
| "learning_rate": 0.0032, | |
| "loss": 1.8948, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 8.36, | |
| "forget_cf_outputs.loss": -1.199570894241333, | |
| "forget_loss": 1.199570894241333, | |
| "gated_loss": 0.00933837890625, | |
| "retain_loss": 0.25221553444862366, | |
| "step": 209, | |
| "warm_up_unlearning_weight": 1 | |
| }, | |
| { | |
| "epoch": 8.4, | |
| "grad_norm": 1.5988783836364746, | |
| "learning_rate": 0.00316, | |
| "loss": 1.8466, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 8.76, | |
| "forget_cf_outputs.loss": -1.2841148376464844, | |
| "forget_loss": 1.2841148376464844, | |
| "gated_loss": 0.009765625, | |
| "retain_loss": 0.24307847023010254, | |
| "step": 219, | |
| "warm_up_unlearning_weight": 1 | |
| }, | |
| { | |
| "epoch": 8.8, | |
| "grad_norm": 1.7333821058273315, | |
| "learning_rate": 0.0031200000000000004, | |
| "loss": 1.817, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 9.16, | |
| "forget_cf_outputs.loss": -1.0533504486083984, | |
| "forget_loss": 1.0533504486083984, | |
| "gated_loss": 0.005462646484375, | |
| "retain_loss": 0.22837677597999573, | |
| "step": 229, | |
| "warm_up_unlearning_weight": 1 | |
| }, | |
| { | |
| "epoch": 9.2, | |
| "grad_norm": 1.5669026374816895, | |
| "learning_rate": 0.0030800000000000003, | |
| "loss": 1.7654, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 9.56, | |
| "forget_cf_outputs.loss": -1.2518203258514404, | |
| "forget_loss": 1.2518203258514404, | |
| "gated_loss": 0.0107421875, | |
| "retain_loss": 0.27540236711502075, | |
| "step": 239, | |
| "warm_up_unlearning_weight": 1 | |
| }, | |
| { | |
| "epoch": 9.6, | |
| "grad_norm": 1.7850066423416138, | |
| "learning_rate": 0.00304, | |
| "loss": 1.7462, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 9.96, | |
| "forget_cf_outputs.loss": -1.2480342388153076, | |
| "forget_loss": 1.2480342388153076, | |
| "gated_loss": 0.0079345703125, | |
| "retain_loss": 0.2164781242609024, | |
| "step": 249, | |
| "warm_up_unlearning_weight": 1 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "grad_norm": 1.8285338878631592, | |
| "learning_rate": 0.003, | |
| "loss": 1.8334, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 10.36, | |
| "forget_cf_outputs.loss": -1.14595365524292, | |
| "forget_loss": 1.14595365524292, | |
| "gated_loss": 0.006072998046875, | |
| "retain_loss": 0.22983184456825256, | |
| "step": 259, | |
| "warm_up_unlearning_weight": 1 | |
| }, | |
| { | |
| "epoch": 10.4, | |
| "grad_norm": 1.8580724000930786, | |
| "learning_rate": 0.00296, | |
| "loss": 1.6553, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 10.76, | |
| "forget_cf_outputs.loss": -1.2968733310699463, | |
| "forget_loss": 1.2968733310699463, | |
| "gated_loss": 0.008544921875, | |
| "retain_loss": 0.20906659960746765, | |
| "step": 269, | |
| "warm_up_unlearning_weight": 1 | |
| }, | |
| { | |
| "epoch": 10.8, | |
| "grad_norm": 1.6227186918258667, | |
| "learning_rate": 0.00292, | |
| "loss": 1.7339, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 11.16, | |
| "forget_cf_outputs.loss": -1.21259605884552, | |
| "forget_loss": 1.21259605884552, | |
| "gated_loss": 0.00750732421875, | |
| "retain_loss": 0.23951691389083862, | |
| "step": 279, | |
| "warm_up_unlearning_weight": 1 | |
| }, | |
| { | |
| "epoch": 11.2, | |
| "grad_norm": 1.3491921424865723, | |
| "learning_rate": 0.0028799999999999997, | |
| "loss": 1.6642, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 11.56, | |
| "forget_cf_outputs.loss": -1.203460454940796, | |
| "forget_loss": 1.203460454940796, | |
| "gated_loss": 0.0087890625, | |
| "retain_loss": 0.23031194508075714, | |
| "step": 289, | |
| "warm_up_unlearning_weight": 1 | |
| }, | |
| { | |
| "epoch": 11.6, | |
| "grad_norm": 2.027022123336792, | |
| "learning_rate": 0.00284, | |
| "loss": 1.6413, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 11.96, | |
| "forget_cf_outputs.loss": -1.1864365339279175, | |
| "forget_loss": 1.1864365339279175, | |
| "gated_loss": 0.01043701171875, | |
| "retain_loss": 0.23127436637878418, | |
| "step": 299, | |
| "warm_up_unlearning_weight": 1 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "grad_norm": 1.8040319681167603, | |
| "learning_rate": 0.0028, | |
| "loss": 1.761, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 12.36, | |
| "forget_cf_outputs.loss": -1.227767825126648, | |
| "forget_loss": 1.227767825126648, | |
| "gated_loss": 0.00616455078125, | |
| "retain_loss": 0.22675465047359467, | |
| "step": 309, | |
| "warm_up_unlearning_weight": 1 | |
| }, | |
| { | |
| "epoch": 12.4, | |
| "grad_norm": 2.1620850563049316, | |
| "learning_rate": 0.00276, | |
| "loss": 1.5947, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 12.76, | |
| "forget_cf_outputs.loss": -1.0549200773239136, | |
| "forget_loss": 1.0549200773239136, | |
| "gated_loss": 0.0054931640625, | |
| "retain_loss": 0.225913867354393, | |
| "step": 319, | |
| "warm_up_unlearning_weight": 1 | |
| }, | |
| { | |
| "epoch": 12.8, | |
| "grad_norm": 1.422467827796936, | |
| "learning_rate": 0.00272, | |
| "loss": 1.6839, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 13.16, | |
| "forget_cf_outputs.loss": -0.966583251953125, | |
| "forget_loss": 0.966583251953125, | |
| "gated_loss": 0.0050048828125, | |
| "retain_loss": 0.21935530006885529, | |
| "step": 329, | |
| "warm_up_unlearning_weight": 1 | |
| }, | |
| { | |
| "epoch": 13.2, | |
| "grad_norm": 1.3767800331115723, | |
| "learning_rate": 0.00268, | |
| "loss": 1.6052, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 13.56, | |
| "forget_cf_outputs.loss": -1.0747449398040771, | |
| "forget_loss": 1.0747449398040771, | |
| "gated_loss": 0.008544921875, | |
| "retain_loss": 0.23962758481502533, | |
| "step": 339, | |
| "warm_up_unlearning_weight": 1 | |
| }, | |
| { | |
| "epoch": 13.6, | |
| "grad_norm": 2.152151584625244, | |
| "learning_rate": 0.00264, | |
| "loss": 1.6239, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 13.96, | |
| "forget_cf_outputs.loss": -0.9583653211593628, | |
| "forget_loss": 0.9583653211593628, | |
| "gated_loss": 0.006500244140625, | |
| "retain_loss": 0.21241800487041473, | |
| "step": 349, | |
| "warm_up_unlearning_weight": 1 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "grad_norm": 2.0913257598876953, | |
| "learning_rate": 0.0026000000000000003, | |
| "loss": 1.566, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 14.36, | |
| "forget_cf_outputs.loss": -0.8475239276885986, | |
| "forget_loss": 0.8475239276885986, | |
| "gated_loss": 0.01165771484375, | |
| "retain_loss": 0.22438839077949524, | |
| "step": 359, | |
| "warm_up_unlearning_weight": 1 | |
| }, | |
| { | |
| "epoch": 14.4, | |
| "grad_norm": 1.8125321865081787, | |
| "learning_rate": 0.00256, | |
| "loss": 1.52, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 14.76, | |
| "forget_cf_outputs.loss": -1.2125965356826782, | |
| "forget_loss": 1.2125965356826782, | |
| "gated_loss": 0.004974365234375, | |
| "retain_loss": 0.23040254414081573, | |
| "step": 369, | |
| "warm_up_unlearning_weight": 1 | |
| }, | |
| { | |
| "epoch": 14.8, | |
| "grad_norm": 1.811591386795044, | |
| "learning_rate": 0.00252, | |
| "loss": 1.5403, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 15.16, | |
| "forget_cf_outputs.loss": -0.9788862466812134, | |
| "forget_loss": 0.9788862466812134, | |
| "gated_loss": 0.006439208984375, | |
| "retain_loss": 0.24656128883361816, | |
| "step": 379, | |
| "warm_up_unlearning_weight": 1 | |
| }, | |
| { | |
| "epoch": 15.2, | |
| "grad_norm": 1.5504097938537598, | |
| "learning_rate": 0.00248, | |
| "loss": 1.5363, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 15.56, | |
| "forget_cf_outputs.loss": -0.8573880195617676, | |
| "forget_loss": 0.8573880195617676, | |
| "gated_loss": 0.007080078125, | |
| "retain_loss": 0.2557204067707062, | |
| "step": 389, | |
| "warm_up_unlearning_weight": 1 | |
| }, | |
| { | |
| "epoch": 15.6, | |
| "grad_norm": 1.5796666145324707, | |
| "learning_rate": 0.00244, | |
| "loss": 1.4844, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 15.96, | |
| "forget_cf_outputs.loss": -1.0210211277008057, | |
| "forget_loss": 1.0210211277008057, | |
| "gated_loss": 0.007080078125, | |
| "retain_loss": 0.22587800025939941, | |
| "step": 399, | |
| "warm_up_unlearning_weight": 1 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "grad_norm": 1.902213215827942, | |
| "learning_rate": 0.0024, | |
| "loss": 1.5053, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 16.36, | |
| "forget_cf_outputs.loss": -0.7990767955780029, | |
| "forget_loss": 0.7990767955780029, | |
| "gated_loss": 0.0064697265625, | |
| "retain_loss": 0.20731617510318756, | |
| "step": 409, | |
| "warm_up_unlearning_weight": 1 | |
| }, | |
| { | |
| "epoch": 16.4, | |
| "grad_norm": 1.7532883882522583, | |
| "learning_rate": 0.00236, | |
| "loss": 1.3702, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 16.76, | |
| "forget_cf_outputs.loss": -1.08427095413208, | |
| "forget_loss": 1.08427095413208, | |
| "gated_loss": 0.005950927734375, | |
| "retain_loss": 0.23056094348430634, | |
| "step": 419, | |
| "warm_up_unlearning_weight": 1 | |
| }, | |
| { | |
| "epoch": 16.8, | |
| "grad_norm": 1.8573689460754395, | |
| "learning_rate": 0.00232, | |
| "loss": 1.4936, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 17.16, | |
| "forget_cf_outputs.loss": -0.8196872472763062, | |
| "forget_loss": 0.8196872472763062, | |
| "gated_loss": 0.006805419921875, | |
| "retain_loss": 0.3019656836986542, | |
| "step": 429, | |
| "warm_up_unlearning_weight": 1 | |
| }, | |
| { | |
| "epoch": 17.2, | |
| "grad_norm": 1.6391781568527222, | |
| "learning_rate": 0.00228, | |
| "loss": 1.5101, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 17.56, | |
| "forget_cf_outputs.loss": -0.8601583242416382, | |
| "forget_loss": 0.8601583242416382, | |
| "gated_loss": 0.00677490234375, | |
| "retain_loss": 0.30145329236984253, | |
| "step": 439, | |
| "warm_up_unlearning_weight": 1 | |
| }, | |
| { | |
| "epoch": 17.6, | |
| "grad_norm": 1.7495087385177612, | |
| "learning_rate": 0.0022400000000000002, | |
| "loss": 1.4067, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 17.96, | |
| "forget_cf_outputs.loss": -0.9261890053749084, | |
| "forget_loss": 0.9261890053749084, | |
| "gated_loss": 0.007110595703125, | |
| "retain_loss": 0.2040254771709442, | |
| "step": 449, | |
| "warm_up_unlearning_weight": 1 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "grad_norm": 2.1609787940979004, | |
| "learning_rate": 0.0022, | |
| "loss": 1.3843, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 18.36, | |
| "forget_cf_outputs.loss": -0.7474625706672668, | |
| "forget_loss": 0.7474625706672668, | |
| "gated_loss": 0.006103515625, | |
| "retain_loss": 0.20823714137077332, | |
| "step": 459, | |
| "warm_up_unlearning_weight": 1 | |
| }, | |
| { | |
| "epoch": 18.4, | |
| "grad_norm": 1.997226595878601, | |
| "learning_rate": 0.00216, | |
| "loss": 1.3081, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 18.76, | |
| "forget_cf_outputs.loss": -1.0060858726501465, | |
| "forget_loss": 1.0060858726501465, | |
| "gated_loss": 0.00579833984375, | |
| "retain_loss": 0.23037730157375336, | |
| "step": 469, | |
| "warm_up_unlearning_weight": 1 | |
| }, | |
| { | |
| "epoch": 18.8, | |
| "grad_norm": 2.023531675338745, | |
| "learning_rate": 0.0021200000000000004, | |
| "loss": 1.4162, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 19.16, | |
| "forget_cf_outputs.loss": -0.7942442893981934, | |
| "forget_loss": 0.7942442893981934, | |
| "gated_loss": 0.00677490234375, | |
| "retain_loss": 0.30291858315467834, | |
| "step": 479, | |
| "warm_up_unlearning_weight": 1 | |
| }, | |
| { | |
| "epoch": 19.2, | |
| "grad_norm": 2.0801267623901367, | |
| "learning_rate": 0.0020800000000000003, | |
| "loss": 1.4455, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 19.56, | |
| "forget_cf_outputs.loss": -0.7663432955741882, | |
| "forget_loss": 0.7663432955741882, | |
| "gated_loss": 0.00640869140625, | |
| "retain_loss": 0.3002184331417084, | |
| "step": 489, | |
| "warm_up_unlearning_weight": 1 | |
| }, | |
| { | |
| "epoch": 19.6, | |
| "grad_norm": 1.6843624114990234, | |
| "learning_rate": 0.00204, | |
| "loss": 1.3348, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 19.96, | |
| "forget_cf_outputs.loss": -0.8520928025245667, | |
| "forget_loss": 0.8520928025245667, | |
| "gated_loss": 0.006591796875, | |
| "retain_loss": 0.20439262688159943, | |
| "step": 499, | |
| "warm_up_unlearning_weight": 1 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "grad_norm": 2.177591323852539, | |
| "learning_rate": 0.002, | |
| "loss": 1.3115, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 20.36, | |
| "forget_cf_outputs.loss": -0.7113033533096313, | |
| "forget_loss": 0.7113033533096313, | |
| "gated_loss": 0.0062255859375, | |
| "retain_loss": 0.20779718458652496, | |
| "step": 509, | |
| "warm_up_unlearning_weight": 1 | |
| }, | |
| { | |
| "epoch": 20.4, | |
| "grad_norm": 2.2269105911254883, | |
| "learning_rate": 0.00196, | |
| "loss": 1.2523, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 20.76, | |
| "forget_cf_outputs.loss": -0.9132112860679626, | |
| "forget_loss": 0.9132112860679626, | |
| "gated_loss": 0.005615234375, | |
| "retain_loss": 0.23009441792964935, | |
| "step": 519, | |
| "warm_up_unlearning_weight": 1 | |
| }, | |
| { | |
| "epoch": 20.8, | |
| "grad_norm": 1.955623745918274, | |
| "learning_rate": 0.00192, | |
| "loss": 1.3478, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 21.16, | |
| "forget_cf_outputs.loss": -0.7705625891685486, | |
| "forget_loss": 0.7705625891685486, | |
| "gated_loss": 0.00677490234375, | |
| "retain_loss": 0.30292266607284546, | |
| "step": 529, | |
| "warm_up_unlearning_weight": 1 | |
| }, | |
| { | |
| "epoch": 21.2, | |
| "grad_norm": 2.0390868186950684, | |
| "learning_rate": 0.00188, | |
| "loss": 1.3883, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 21.56, | |
| "forget_cf_outputs.loss": -0.7298972606658936, | |
| "forget_loss": 0.7298972606658936, | |
| "gated_loss": 0.006195068359375, | |
| "retain_loss": 0.3011211156845093, | |
| "step": 539, | |
| "warm_up_unlearning_weight": 1 | |
| }, | |
| { | |
| "epoch": 21.6, | |
| "grad_norm": 1.9718871116638184, | |
| "learning_rate": 0.00184, | |
| "loss": 1.2773, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 21.96, | |
| "forget_cf_outputs.loss": -0.7989807724952698, | |
| "forget_loss": 0.7989807724952698, | |
| "gated_loss": 0.00640869140625, | |
| "retain_loss": 0.20371052622795105, | |
| "step": 549, | |
| "warm_up_unlearning_weight": 1 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "grad_norm": 2.304124116897583, | |
| "learning_rate": 0.0018000000000000002, | |
| "loss": 1.2614, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 22.36, | |
| "forget_cf_outputs.loss": -0.6765010952949524, | |
| "forget_loss": 0.6765010952949524, | |
| "gated_loss": 0.006134033203125, | |
| "retain_loss": 0.20700086653232574, | |
| "step": 559, | |
| "warm_up_unlearning_weight": 1 | |
| }, | |
| { | |
| "epoch": 22.4, | |
| "grad_norm": 2.3407692909240723, | |
| "learning_rate": 0.00176, | |
| "loss": 1.2052, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 22.76, | |
| "forget_cf_outputs.loss": -0.8482251167297363, | |
| "forget_loss": 0.8482251167297363, | |
| "gated_loss": 0.00555419921875, | |
| "retain_loss": 0.23026637732982635, | |
| "step": 569, | |
| "warm_up_unlearning_weight": 1 | |
| }, | |
| { | |
| "epoch": 22.8, | |
| "grad_norm": 1.9227235317230225, | |
| "learning_rate": 0.00172, | |
| "loss": 1.2995, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 23.16, | |
| "forget_cf_outputs.loss": -0.7452784776687622, | |
| "forget_loss": 0.7452784776687622, | |
| "gated_loss": 0.006744384765625, | |
| "retain_loss": 0.30324649810791016, | |
| "step": 579, | |
| "warm_up_unlearning_weight": 1 | |
| }, | |
| { | |
| "epoch": 23.2, | |
| "grad_norm": 2.1659557819366455, | |
| "learning_rate": 0.00168, | |
| "loss": 1.3367, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 23.56, | |
| "forget_cf_outputs.loss": -0.6973183155059814, | |
| "forget_loss": 0.6973183155059814, | |
| "gated_loss": 0.006011962890625, | |
| "retain_loss": 0.30067041516304016, | |
| "step": 589, | |
| "warm_up_unlearning_weight": 1 | |
| }, | |
| { | |
| "epoch": 23.6, | |
| "grad_norm": 2.140836238861084, | |
| "learning_rate": 0.00164, | |
| "loss": 1.2336, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 23.96, | |
| "forget_cf_outputs.loss": -0.7499862313270569, | |
| "forget_loss": 0.7499862313270569, | |
| "gated_loss": 0.006317138671875, | |
| "retain_loss": 0.20415130257606506, | |
| "step": 599, | |
| "warm_up_unlearning_weight": 1 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "grad_norm": 2.5653772354125977, | |
| "learning_rate": 0.0016, | |
| "loss": 1.2199, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 24.36, | |
| "forget_cf_outputs.loss": -0.6281754374504089, | |
| "forget_loss": 0.6281754374504089, | |
| "gated_loss": 0.006103515625, | |
| "retain_loss": 0.2073870748281479, | |
| "step": 609, | |
| "warm_up_unlearning_weight": 1 | |
| }, | |
| { | |
| "epoch": 24.4, | |
| "grad_norm": 2.2012555599212646, | |
| "learning_rate": 0.0015600000000000002, | |
| "loss": 1.1692, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 24.76, | |
| "forget_cf_outputs.loss": -0.7929825782775879, | |
| "forget_loss": 0.7929825782775879, | |
| "gated_loss": 0.005584716796875, | |
| "retain_loss": 0.23124848306179047, | |
| "step": 619, | |
| "warm_up_unlearning_weight": 1 | |
| }, | |
| { | |
| "epoch": 24.8, | |
| "grad_norm": 1.9918410778045654, | |
| "learning_rate": 0.00152, | |
| "loss": 1.2547, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 25.16, | |
| "forget_cf_outputs.loss": -0.7136563062667847, | |
| "forget_loss": 0.7136563062667847, | |
| "gated_loss": 0.00677490234375, | |
| "retain_loss": 0.3019442558288574, | |
| "step": 629, | |
| "warm_up_unlearning_weight": 1 | |
| }, | |
| { | |
| "epoch": 25.2, | |
| "grad_norm": 2.17232608795166, | |
| "learning_rate": 0.00148, | |
| "loss": 1.2906, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 25.56, | |
| "forget_cf_outputs.loss": -0.6733591556549072, | |
| "forget_loss": 0.6733591556549072, | |
| "gated_loss": 0.00604248046875, | |
| "retain_loss": 0.3010113835334778, | |
| "step": 639, | |
| "warm_up_unlearning_weight": 1 | |
| }, | |
| { | |
| "epoch": 25.6, | |
| "grad_norm": 2.1968352794647217, | |
| "learning_rate": 0.0014399999999999999, | |
| "loss": 1.198, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 25.96, | |
| "forget_cf_outputs.loss": -0.6898148655891418, | |
| "forget_loss": 0.6898148655891418, | |
| "gated_loss": 0.00628662109375, | |
| "retain_loss": 0.20451340079307556, | |
| "step": 649, | |
| "warm_up_unlearning_weight": 1 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "grad_norm": 2.708749294281006, | |
| "learning_rate": 0.0014, | |
| "loss": 1.1784, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 26.36, | |
| "forget_cf_outputs.loss": -0.5750948786735535, | |
| "forget_loss": 0.5750948786735535, | |
| "gated_loss": 0.006011962890625, | |
| "retain_loss": 0.20745757222175598, | |
| "step": 659, | |
| "warm_up_unlearning_weight": 1 | |
| }, | |
| { | |
| "epoch": 26.4, | |
| "grad_norm": 2.0222957134246826, | |
| "learning_rate": 0.00136, | |
| "loss": 1.1326, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 26.76, | |
| "forget_cf_outputs.loss": -0.7313442826271057, | |
| "forget_loss": 0.7313442826271057, | |
| "gated_loss": 0.005523681640625, | |
| "retain_loss": 0.2309209704399109, | |
| "step": 669, | |
| "warm_up_unlearning_weight": 1 | |
| }, | |
| { | |
| "epoch": 26.8, | |
| "grad_norm": 1.8242149353027344, | |
| "learning_rate": 0.00132, | |
| "loss": 1.2022, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 27.16, | |
| "forget_cf_outputs.loss": -0.6695391535758972, | |
| "forget_loss": 0.6695391535758972, | |
| "gated_loss": 0.006744384765625, | |
| "retain_loss": 0.30264630913734436, | |
| "step": 679, | |
| "warm_up_unlearning_weight": 1 | |
| }, | |
| { | |
| "epoch": 27.2, | |
| "grad_norm": 2.184037208557129, | |
| "learning_rate": 0.00128, | |
| "loss": 1.24, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 27.56, | |
| "forget_cf_outputs.loss": -0.6346314549446106, | |
| "forget_loss": 0.6346314549446106, | |
| "gated_loss": 0.005950927734375, | |
| "retain_loss": 0.3012670874595642, | |
| "step": 689, | |
| "warm_up_unlearning_weight": 1 | |
| }, | |
| { | |
| "epoch": 27.6, | |
| "grad_norm": 2.071834087371826, | |
| "learning_rate": 0.00124, | |
| "loss": 1.1582, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 27.96, | |
| "forget_cf_outputs.loss": -0.6438873410224915, | |
| "forget_loss": 0.6438873410224915, | |
| "gated_loss": 0.00634765625, | |
| "retain_loss": 0.20389188826084137, | |
| "step": 699, | |
| "warm_up_unlearning_weight": 1 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "grad_norm": 2.627547264099121, | |
| "learning_rate": 0.0012, | |
| "loss": 1.1426, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 28.36, | |
| "forget_cf_outputs.loss": -0.5409132242202759, | |
| "forget_loss": 0.5409132242202759, | |
| "gated_loss": 0.00604248046875, | |
| "retain_loss": 0.20821160078048706, | |
| "step": 709, | |
| "warm_up_unlearning_weight": 1 | |
| }, | |
| { | |
| "epoch": 28.4, | |
| "grad_norm": 1.8756951093673706, | |
| "learning_rate": 0.00116, | |
| "loss": 1.0961, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 28.76, | |
| "forget_cf_outputs.loss": -0.6844155788421631, | |
| "forget_loss": 0.6844155788421631, | |
| "gated_loss": 0.005523681640625, | |
| "retain_loss": 0.23081088066101074, | |
| "step": 719, | |
| "warm_up_unlearning_weight": 1 | |
| }, | |
| { | |
| "epoch": 28.8, | |
| "grad_norm": 1.882070541381836, | |
| "learning_rate": 0.0011200000000000001, | |
| "loss": 1.1636, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 29.16, | |
| "forget_cf_outputs.loss": -0.6462154388427734, | |
| "forget_loss": 0.6462154388427734, | |
| "gated_loss": 0.0068359375, | |
| "retain_loss": 0.30321410298347473, | |
| "step": 729, | |
| "warm_up_unlearning_weight": 1 | |
| }, | |
| { | |
| "epoch": 29.2, | |
| "grad_norm": 2.2409791946411133, | |
| "learning_rate": 0.00108, | |
| "loss": 1.1954, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 29.56, | |
| "forget_cf_outputs.loss": -0.5779778957366943, | |
| "forget_loss": 0.5779778957366943, | |
| "gated_loss": 0.005828857421875, | |
| "retain_loss": 0.301031231880188, | |
| "step": 739, | |
| "warm_up_unlearning_weight": 1 | |
| }, | |
| { | |
| "epoch": 29.6, | |
| "grad_norm": 1.9507259130477905, | |
| "learning_rate": 0.0010400000000000001, | |
| "loss": 1.1284, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 29.96, | |
| "forget_cf_outputs.loss": -0.595399022102356, | |
| "forget_loss": 0.595399022102356, | |
| "gated_loss": 0.00628662109375, | |
| "retain_loss": 0.20331645011901855, | |
| "step": 749, | |
| "warm_up_unlearning_weight": 1 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "grad_norm": 2.5469954013824463, | |
| "learning_rate": 0.001, | |
| "loss": 1.1074, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 30.36, | |
| "forget_cf_outputs.loss": -0.5063520669937134, | |
| "forget_loss": 0.5063520669937134, | |
| "gated_loss": 0.0059814453125, | |
| "retain_loss": 0.20728009939193726, | |
| "step": 759, | |
| "warm_up_unlearning_weight": 1 | |
| }, | |
| { | |
| "epoch": 30.4, | |
| "grad_norm": 1.9551663398742676, | |
| "learning_rate": 0.00096, | |
| "loss": 1.0691, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 30.76, | |
| "forget_cf_outputs.loss": -0.6612439751625061, | |
| "forget_loss": 0.6612439751625061, | |
| "gated_loss": 0.005523681640625, | |
| "retain_loss": 0.23078040778636932, | |
| "step": 769, | |
| "warm_up_unlearning_weight": 1 | |
| }, | |
| { | |
| "epoch": 30.8, | |
| "grad_norm": 2.112478494644165, | |
| "learning_rate": 0.00092, | |
| "loss": 1.1296, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 31.16, | |
| "forget_cf_outputs.loss": -0.6047573685646057, | |
| "forget_loss": 0.6047573685646057, | |
| "gated_loss": 0.00677490234375, | |
| "retain_loss": 0.3026810586452484, | |
| "step": 779, | |
| "warm_up_unlearning_weight": 1 | |
| }, | |
| { | |
| "epoch": 31.2, | |
| "grad_norm": 2.141299247741699, | |
| "learning_rate": 0.00088, | |
| "loss": 1.1566, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 31.56, | |
| "forget_cf_outputs.loss": -0.5377554297447205, | |
| "forget_loss": 0.5377554297447205, | |
| "gated_loss": 0.005828857421875, | |
| "retain_loss": 0.30064091086387634, | |
| "step": 789, | |
| "warm_up_unlearning_weight": 1 | |
| }, | |
| { | |
| "epoch": 31.6, | |
| "grad_norm": 1.9649981260299683, | |
| "learning_rate": 0.00084, | |
| "loss": 1.097, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 31.96, | |
| "forget_cf_outputs.loss": -0.5527829527854919, | |
| "forget_loss": 0.5527829527854919, | |
| "gated_loss": 0.0062255859375, | |
| "retain_loss": 0.20365992188453674, | |
| "step": 799, | |
| "warm_up_unlearning_weight": 1 | |
| }, | |
| { | |
| "epoch": 32.0, | |
| "grad_norm": 2.628004312515259, | |
| "learning_rate": 0.0008, | |
| "loss": 1.0695, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 32.36, | |
| "forget_cf_outputs.loss": -0.47989267110824585, | |
| "forget_loss": 0.47989267110824585, | |
| "gated_loss": 0.005950927734375, | |
| "retain_loss": 0.20727092027664185, | |
| "step": 809, | |
| "warm_up_unlearning_weight": 1 | |
| }, | |
| { | |
| "epoch": 32.4, | |
| "grad_norm": 1.9921866655349731, | |
| "learning_rate": 0.00076, | |
| "loss": 1.0319, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 32.76, | |
| "forget_cf_outputs.loss": -0.6170799136161804, | |
| "forget_loss": 0.6170799136161804, | |
| "gated_loss": 0.00555419921875, | |
| "retain_loss": 0.23075489699840546, | |
| "step": 819, | |
| "warm_up_unlearning_weight": 1 | |
| }, | |
| { | |
| "epoch": 32.8, | |
| "grad_norm": 2.0169899463653564, | |
| "learning_rate": 0.0007199999999999999, | |
| "loss": 1.091, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 33.16, | |
| "forget_cf_outputs.loss": -0.5823113322257996, | |
| "forget_loss": 0.5823113322257996, | |
| "gated_loss": 0.0068359375, | |
| "retain_loss": 0.30245938897132874, | |
| "step": 829, | |
| "warm_up_unlearning_weight": 1 | |
| }, | |
| { | |
| "epoch": 33.2, | |
| "grad_norm": 2.2369046211242676, | |
| "learning_rate": 0.00068, | |
| "loss": 1.126, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 33.56, | |
| "forget_cf_outputs.loss": -0.5231561660766602, | |
| "forget_loss": 0.5231561660766602, | |
| "gated_loss": 0.005767822265625, | |
| "retain_loss": 0.3002181351184845, | |
| "step": 839, | |
| "warm_up_unlearning_weight": 1 | |
| }, | |
| { | |
| "epoch": 33.6, | |
| "grad_norm": 2.4003753662109375, | |
| "learning_rate": 0.00064, | |
| "loss": 1.0696, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 33.96, | |
| "forget_cf_outputs.loss": -0.4980463981628418, | |
| "forget_loss": 0.4980463981628418, | |
| "gated_loss": 0.0062255859375, | |
| "retain_loss": 0.20359160006046295, | |
| "step": 849, | |
| "warm_up_unlearning_weight": 1 | |
| }, | |
| { | |
| "epoch": 34.0, | |
| "grad_norm": 2.4804441928863525, | |
| "learning_rate": 0.0006, | |
| "loss": 1.0311, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 34.36, | |
| "forget_cf_outputs.loss": -0.4477725327014923, | |
| "forget_loss": 0.4477725327014923, | |
| "gated_loss": 0.00592041015625, | |
| "retain_loss": 0.20766329765319824, | |
| "step": 859, | |
| "warm_up_unlearning_weight": 1 | |
| }, | |
| { | |
| "epoch": 34.4, | |
| "grad_norm": 1.8642009496688843, | |
| "learning_rate": 0.0005600000000000001, | |
| "loss": 0.9964, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 34.76, | |
| "forget_cf_outputs.loss": -0.5641895532608032, | |
| "forget_loss": 0.5641895532608032, | |
| "gated_loss": 0.00555419921875, | |
| "retain_loss": 0.23121777176856995, | |
| "step": 869, | |
| "warm_up_unlearning_weight": 1 | |
| }, | |
| { | |
| "epoch": 34.8, | |
| "grad_norm": 1.8299639225006104, | |
| "learning_rate": 0.0005200000000000001, | |
| "loss": 1.055, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 35.16, | |
| "forget_cf_outputs.loss": -0.5485031604766846, | |
| "forget_loss": 0.5485031604766846, | |
| "gated_loss": 0.006805419921875, | |
| "retain_loss": 0.3044523596763611, | |
| "step": 879, | |
| "warm_up_unlearning_weight": 1 | |
| }, | |
| { | |
| "epoch": 35.2, | |
| "grad_norm": 2.0533902645111084, | |
| "learning_rate": 0.00048, | |
| "loss": 1.0938, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 35.56, | |
| "forget_cf_outputs.loss": -0.5070799589157104, | |
| "forget_loss": 0.5070799589157104, | |
| "gated_loss": 0.005828857421875, | |
| "retain_loss": 0.3008805215358734, | |
| "step": 889, | |
| "warm_up_unlearning_weight": 1 | |
| }, | |
| { | |
| "epoch": 35.6, | |
| "grad_norm": 2.342500925064087, | |
| "learning_rate": 0.00044, | |
| "loss": 1.0375, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 35.96, | |
| "forget_cf_outputs.loss": -0.4470018446445465, | |
| "forget_loss": 0.4470018446445465, | |
| "gated_loss": 0.0062255859375, | |
| "retain_loss": 0.20359660685062408, | |
| "step": 899, | |
| "warm_up_unlearning_weight": 1 | |
| }, | |
| { | |
| "epoch": 36.0, | |
| "grad_norm": 2.555351734161377, | |
| "learning_rate": 0.0004, | |
| "loss": 0.9953, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 36.36, | |
| "forget_cf_outputs.loss": -0.42907437682151794, | |
| "forget_loss": 0.42907437682151794, | |
| "gated_loss": 0.005950927734375, | |
| "retain_loss": 0.20733724534511566, | |
| "step": 909, | |
| "warm_up_unlearning_weight": 1 | |
| }, | |
| { | |
| "epoch": 36.4, | |
| "grad_norm": 1.745898962020874, | |
| "learning_rate": 0.00035999999999999997, | |
| "loss": 0.9637, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 36.76, | |
| "forget_cf_outputs.loss": -0.5278509855270386, | |
| "forget_loss": 0.5278509855270386, | |
| "gated_loss": 0.00555419921875, | |
| "retain_loss": 0.23086762428283691, | |
| "step": 919, | |
| "warm_up_unlearning_weight": 1 | |
| }, | |
| { | |
| "epoch": 36.8, | |
| "grad_norm": 1.8470394611358643, | |
| "learning_rate": 0.00032, | |
| "loss": 1.0207, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 37.16, | |
| "forget_cf_outputs.loss": -0.5195350646972656, | |
| "forget_loss": 0.5195350646972656, | |
| "gated_loss": 0.0068359375, | |
| "retain_loss": 0.3039552867412567, | |
| "step": 929, | |
| "warm_up_unlearning_weight": 1 | |
| }, | |
| { | |
| "epoch": 37.2, | |
| "grad_norm": 2.0206496715545654, | |
| "learning_rate": 0.00028000000000000003, | |
| "loss": 1.0645, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 37.56, | |
| "forget_cf_outputs.loss": -0.4906051754951477, | |
| "forget_loss": 0.4906051754951477, | |
| "gated_loss": 0.005828857421875, | |
| "retain_loss": 0.3013906478881836, | |
| "step": 939, | |
| "warm_up_unlearning_weight": 1 | |
| }, | |
| { | |
| "epoch": 37.6, | |
| "grad_norm": 2.380035877227783, | |
| "learning_rate": 0.00024, | |
| "loss": 1.0092, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 37.96, | |
| "forget_cf_outputs.loss": -0.41733187437057495, | |
| "forget_loss": 0.41733187437057495, | |
| "gated_loss": 0.0062255859375, | |
| "retain_loss": 0.20396247506141663, | |
| "step": 949, | |
| "warm_up_unlearning_weight": 1 | |
| }, | |
| { | |
| "epoch": 38.0, | |
| "grad_norm": 2.3801748752593994, | |
| "learning_rate": 0.0002, | |
| "loss": 0.9681, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 38.36, | |
| "forget_cf_outputs.loss": -0.41866594552993774, | |
| "forget_loss": 0.41866594552993774, | |
| "gated_loss": 0.005950927734375, | |
| "retain_loss": 0.20748548209667206, | |
| "step": 959, | |
| "warm_up_unlearning_weight": 1 | |
| }, | |
| { | |
| "epoch": 38.4, | |
| "grad_norm": 1.8068156242370605, | |
| "learning_rate": 0.00016, | |
| "loss": 0.9408, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 38.76, | |
| "forget_cf_outputs.loss": -0.503368079662323, | |
| "forget_loss": 0.503368079662323, | |
| "gated_loss": 0.00555419921875, | |
| "retain_loss": 0.2300793081521988, | |
| "step": 969, | |
| "warm_up_unlearning_weight": 1 | |
| }, | |
| { | |
| "epoch": 38.8, | |
| "grad_norm": 1.8037129640579224, | |
| "learning_rate": 0.00012, | |
| "loss": 0.9892, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 39.16, | |
| "forget_cf_outputs.loss": -0.5070582628250122, | |
| "forget_loss": 0.5070582628250122, | |
| "gated_loss": 0.006805419921875, | |
| "retain_loss": 0.3026222288608551, | |
| "step": 979, | |
| "warm_up_unlearning_weight": 1 | |
| }, | |
| { | |
| "epoch": 39.2, | |
| "grad_norm": 2.055860757827759, | |
| "learning_rate": 8e-05, | |
| "loss": 1.0436, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 39.56, | |
| "forget_cf_outputs.loss": -0.46062666177749634, | |
| "forget_loss": 0.46062666177749634, | |
| "gated_loss": 0.005828857421875, | |
| "retain_loss": 0.30048835277557373, | |
| "step": 989, | |
| "warm_up_unlearning_weight": 1 | |
| }, | |
| { | |
| "epoch": 39.6, | |
| "grad_norm": 2.1290202140808105, | |
| "learning_rate": 4e-05, | |
| "loss": 0.9838, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 39.96, | |
| "forget_cf_outputs.loss": -0.39183497428894043, | |
| "forget_loss": 0.39183497428894043, | |
| "gated_loss": 0.0062255859375, | |
| "retain_loss": 0.20340043306350708, | |
| "step": 999, | |
| "warm_up_unlearning_weight": 1 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "grad_norm": 2.2874202728271484, | |
| "learning_rate": 0.0, | |
| "loss": 0.9411, | |
| "step": 1000 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 1000, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 40, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |