{ "best_metric": null, "best_model_checkpoint": null, "epoch": 40.0, "eval_steps": 500, "global_step": 1000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.36, "forget_cf_outputs.loss": -2.683056592941284, "forget_loss": 2.683056592941284, "gated_loss": 0.50390625, "retain_loss": 0.2806420624256134, "step": 9, "warm_up_unlearning_weight": 1 }, { "epoch": 0.4, "grad_norm": 12.48087215423584, "learning_rate": 0.00396, "loss": 10.0439, "step": 10 }, { "epoch": 0.76, "forget_cf_outputs.loss": -2.3281075954437256, "forget_loss": 2.3281075954437256, "gated_loss": 0.447265625, "retain_loss": 0.30973613262176514, "step": 19, "warm_up_unlearning_weight": 1 }, { "epoch": 0.8, "grad_norm": 14.701934814453125, "learning_rate": 0.00392, "loss": 6.7906, "step": 20 }, { "epoch": 1.16, "forget_cf_outputs.loss": -1.9086629152297974, "forget_loss": 1.9086629152297974, "gated_loss": 0.12890625, "retain_loss": 0.25592905282974243, "step": 29, "warm_up_unlearning_weight": 1 }, { "epoch": 1.2, "grad_norm": 6.575860500335693, "learning_rate": 0.0038799999999999998, "loss": 5.0776, "step": 30 }, { "epoch": 1.56, "forget_cf_outputs.loss": -1.8418117761611938, "forget_loss": 1.8418117761611938, "gated_loss": 0.26953125, "retain_loss": 0.2785710096359253, "step": 39, "warm_up_unlearning_weight": 1 }, { "epoch": 1.6, "grad_norm": 16.85633087158203, "learning_rate": 0.00384, "loss": 4.3983, "step": 40 }, { "epoch": 1.96, "forget_cf_outputs.loss": -1.5592725276947021, "forget_loss": 1.5592725276947021, "gated_loss": 0.09228515625, "retain_loss": 0.2128174901008606, "step": 49, "warm_up_unlearning_weight": 1 }, { "epoch": 2.0, "grad_norm": 6.894440650939941, "learning_rate": 0.0038, "loss": 3.8443, "step": 50 }, { "epoch": 2.36, "forget_cf_outputs.loss": -1.8075147867202759, "forget_loss": 1.8075147867202759, "gated_loss": 0.2099609375, "retain_loss": 0.20431871712207794, "step": 59, "warm_up_unlearning_weight": 1 }, { "epoch": 2.4, "grad_norm": 12.495519638061523, "learning_rate": 0.00376, "loss": 3.1611, "step": 60 }, { "epoch": 2.76, "forget_cf_outputs.loss": -1.648992896080017, "forget_loss": 1.648992896080017, "gated_loss": 0.08447265625, "retain_loss": 0.23505711555480957, "step": 69, "warm_up_unlearning_weight": 1 }, { "epoch": 2.8, "grad_norm": 4.081427574157715, "learning_rate": 0.00372, "loss": 3.1274, "step": 70 }, { "epoch": 3.16, "forget_cf_outputs.loss": -1.7901757955551147, "forget_loss": 1.7901757955551147, "gated_loss": 0.07861328125, "retain_loss": 0.19908858835697174, "step": 79, "warm_up_unlearning_weight": 1 }, { "epoch": 3.2, "grad_norm": 5.019917011260986, "learning_rate": 0.00368, "loss": 2.9064, "step": 80 }, { "epoch": 3.56, "forget_cf_outputs.loss": -1.6832540035247803, "forget_loss": 1.6832540035247803, "gated_loss": 0.1572265625, "retain_loss": 0.31473198533058167, "step": 89, "warm_up_unlearning_weight": 1 }, { "epoch": 3.6, "grad_norm": 25.354310989379883, "learning_rate": 0.00364, "loss": 2.7559, "step": 90 }, { "epoch": 3.96, "forget_cf_outputs.loss": -1.5529592037200928, "forget_loss": 1.5529592037200928, "gated_loss": 0.0859375, "retain_loss": 0.21396659314632416, "step": 99, "warm_up_unlearning_weight": 1 }, { "epoch": 4.0, "grad_norm": 13.010091781616211, "learning_rate": 0.0036000000000000003, "loss": 4.6507, "step": 100 }, { "epoch": 4.36, "forget_cf_outputs.loss": -1.4749836921691895, "forget_loss": 1.4749836921691895, "gated_loss": 0.0751953125, "retain_loss": 0.3322639465332031, "step": 109, "warm_up_unlearning_weight": 1 }, { "epoch": 4.4, "grad_norm": 19.40593147277832, "learning_rate": 0.0035600000000000002, "loss": 2.4806, "step": 110 }, { "epoch": 4.76, "forget_cf_outputs.loss": -1.586808443069458, "forget_loss": 1.586808443069458, "gated_loss": 0.044677734375, "retain_loss": 0.20972619950771332, "step": 119, "warm_up_unlearning_weight": 1 }, { "epoch": 4.8, "grad_norm": 3.9136083126068115, "learning_rate": 0.00352, "loss": 2.3875, "step": 120 }, { "epoch": 5.16, "forget_cf_outputs.loss": -1.3828651905059814, "forget_loss": 1.3828651905059814, "gated_loss": 0.0308837890625, "retain_loss": 0.2637632191181183, "step": 129, "warm_up_unlearning_weight": 1 }, { "epoch": 5.2, "grad_norm": 2.676300525665283, "learning_rate": 0.00348, "loss": 2.3602, "step": 130 }, { "epoch": 5.5600000000000005, "forget_cf_outputs.loss": -1.353875756263733, "forget_loss": 1.353875756263733, "gated_loss": 0.03076171875, "retain_loss": 0.2234891653060913, "step": 139, "warm_up_unlearning_weight": 1 }, { "epoch": 5.6, "grad_norm": 2.6358110904693604, "learning_rate": 0.00344, "loss": 2.14, "step": 140 }, { "epoch": 5.96, "forget_cf_outputs.loss": -1.4662984609603882, "forget_loss": 1.4662984609603882, "gated_loss": 0.044921875, "retain_loss": 0.22434721887111664, "step": 149, "warm_up_unlearning_weight": 1 }, { "epoch": 6.0, "grad_norm": 5.245201587677002, "learning_rate": 0.0034, "loss": 3.8186, "step": 150 }, { "epoch": 6.36, "forget_cf_outputs.loss": -1.3115544319152832, "forget_loss": 1.3115544319152832, "gated_loss": 0.0208740234375, "retain_loss": 0.27212581038475037, "step": 159, "warm_up_unlearning_weight": 1 }, { "epoch": 6.4, "grad_norm": 2.41806697845459, "learning_rate": 0.00336, "loss": 2.0658, "step": 160 }, { "epoch": 6.76, "forget_cf_outputs.loss": -1.389865517616272, "forget_loss": 1.389865517616272, "gated_loss": 0.0157470703125, "retain_loss": 0.22109903395175934, "step": 169, "warm_up_unlearning_weight": 1 }, { "epoch": 6.8, "grad_norm": 1.6761163473129272, "learning_rate": 0.00332, "loss": 2.0436, "step": 170 }, { "epoch": 7.16, "forget_cf_outputs.loss": -1.4114540815353394, "forget_loss": 1.4114540815353394, "gated_loss": 0.01953125, "retain_loss": 0.28868579864501953, "step": 179, "warm_up_unlearning_weight": 1 }, { "epoch": 7.2, "grad_norm": 2.110581159591675, "learning_rate": 0.00328, "loss": 1.9892, "step": 180 }, { "epoch": 7.5600000000000005, "forget_cf_outputs.loss": -1.5645467042922974, "forget_loss": 1.5645467042922974, "gated_loss": 0.0135498046875, "retain_loss": 0.18681125342845917, "step": 189, "warm_up_unlearning_weight": 1 }, { "epoch": 7.6, "grad_norm": 2.8026249408721924, "learning_rate": 0.0032400000000000003, "loss": 1.9509, "step": 190 }, { "epoch": 7.96, "forget_cf_outputs.loss": -1.368323802947998, "forget_loss": 1.368323802947998, "gated_loss": 0.0142822265625, "retain_loss": 0.23697978258132935, "step": 199, "warm_up_unlearning_weight": 1 }, { "epoch": 8.0, "grad_norm": 1.9209412336349487, "learning_rate": 0.0032, "loss": 1.8948, "step": 200 }, { "epoch": 8.36, "forget_cf_outputs.loss": -1.199570894241333, "forget_loss": 1.199570894241333, "gated_loss": 0.00933837890625, "retain_loss": 0.25221553444862366, "step": 209, "warm_up_unlearning_weight": 1 }, { "epoch": 8.4, "grad_norm": 1.5988783836364746, "learning_rate": 0.00316, "loss": 1.8466, "step": 210 }, { "epoch": 8.76, "forget_cf_outputs.loss": -1.2841148376464844, "forget_loss": 1.2841148376464844, "gated_loss": 0.009765625, "retain_loss": 0.24307847023010254, "step": 219, "warm_up_unlearning_weight": 1 }, { "epoch": 8.8, "grad_norm": 1.7333821058273315, "learning_rate": 0.0031200000000000004, "loss": 1.817, "step": 220 }, { "epoch": 9.16, "forget_cf_outputs.loss": -1.0533504486083984, "forget_loss": 1.0533504486083984, "gated_loss": 0.005462646484375, "retain_loss": 0.22837677597999573, "step": 229, "warm_up_unlearning_weight": 1 }, { "epoch": 9.2, "grad_norm": 1.5669026374816895, "learning_rate": 0.0030800000000000003, "loss": 1.7654, "step": 230 }, { "epoch": 9.56, "forget_cf_outputs.loss": -1.2518203258514404, "forget_loss": 1.2518203258514404, "gated_loss": 0.0107421875, "retain_loss": 0.27540236711502075, "step": 239, "warm_up_unlearning_weight": 1 }, { "epoch": 9.6, "grad_norm": 1.7850066423416138, "learning_rate": 0.00304, "loss": 1.7462, "step": 240 }, { "epoch": 9.96, "forget_cf_outputs.loss": -1.2480342388153076, "forget_loss": 1.2480342388153076, "gated_loss": 0.0079345703125, "retain_loss": 0.2164781242609024, "step": 249, "warm_up_unlearning_weight": 1 }, { "epoch": 10.0, "grad_norm": 1.8285338878631592, "learning_rate": 0.003, "loss": 1.8334, "step": 250 }, { "epoch": 10.36, "forget_cf_outputs.loss": -1.14595365524292, "forget_loss": 1.14595365524292, "gated_loss": 0.006072998046875, "retain_loss": 0.22983184456825256, "step": 259, "warm_up_unlearning_weight": 1 }, { "epoch": 10.4, "grad_norm": 1.8580724000930786, "learning_rate": 0.00296, "loss": 1.6553, "step": 260 }, { "epoch": 10.76, "forget_cf_outputs.loss": -1.2968733310699463, "forget_loss": 1.2968733310699463, "gated_loss": 0.008544921875, "retain_loss": 0.20906659960746765, "step": 269, "warm_up_unlearning_weight": 1 }, { "epoch": 10.8, "grad_norm": 1.6227186918258667, "learning_rate": 0.00292, "loss": 1.7339, "step": 270 }, { "epoch": 11.16, "forget_cf_outputs.loss": -1.21259605884552, "forget_loss": 1.21259605884552, "gated_loss": 0.00750732421875, "retain_loss": 0.23951691389083862, "step": 279, "warm_up_unlearning_weight": 1 }, { "epoch": 11.2, "grad_norm": 1.3491921424865723, "learning_rate": 0.0028799999999999997, "loss": 1.6642, "step": 280 }, { "epoch": 11.56, "forget_cf_outputs.loss": -1.203460454940796, "forget_loss": 1.203460454940796, "gated_loss": 0.0087890625, "retain_loss": 0.23031194508075714, "step": 289, "warm_up_unlearning_weight": 1 }, { "epoch": 11.6, "grad_norm": 2.027022123336792, "learning_rate": 0.00284, "loss": 1.6413, "step": 290 }, { "epoch": 11.96, "forget_cf_outputs.loss": -1.1864365339279175, "forget_loss": 1.1864365339279175, "gated_loss": 0.01043701171875, "retain_loss": 0.23127436637878418, "step": 299, "warm_up_unlearning_weight": 1 }, { "epoch": 12.0, "grad_norm": 1.8040319681167603, "learning_rate": 0.0028, "loss": 1.761, "step": 300 }, { "epoch": 12.36, "forget_cf_outputs.loss": -1.227767825126648, "forget_loss": 1.227767825126648, "gated_loss": 0.00616455078125, "retain_loss": 0.22675465047359467, "step": 309, "warm_up_unlearning_weight": 1 }, { "epoch": 12.4, "grad_norm": 2.1620850563049316, "learning_rate": 0.00276, "loss": 1.5947, "step": 310 }, { "epoch": 12.76, "forget_cf_outputs.loss": -1.0549200773239136, "forget_loss": 1.0549200773239136, "gated_loss": 0.0054931640625, "retain_loss": 0.225913867354393, "step": 319, "warm_up_unlearning_weight": 1 }, { "epoch": 12.8, "grad_norm": 1.422467827796936, "learning_rate": 0.00272, "loss": 1.6839, "step": 320 }, { "epoch": 13.16, "forget_cf_outputs.loss": -0.966583251953125, "forget_loss": 0.966583251953125, "gated_loss": 0.0050048828125, "retain_loss": 0.21935530006885529, "step": 329, "warm_up_unlearning_weight": 1 }, { "epoch": 13.2, "grad_norm": 1.3767800331115723, "learning_rate": 0.00268, "loss": 1.6052, "step": 330 }, { "epoch": 13.56, "forget_cf_outputs.loss": -1.0747449398040771, "forget_loss": 1.0747449398040771, "gated_loss": 0.008544921875, "retain_loss": 0.23962758481502533, "step": 339, "warm_up_unlearning_weight": 1 }, { "epoch": 13.6, "grad_norm": 2.152151584625244, "learning_rate": 0.00264, "loss": 1.6239, "step": 340 }, { "epoch": 13.96, "forget_cf_outputs.loss": -0.9583653211593628, "forget_loss": 0.9583653211593628, "gated_loss": 0.006500244140625, "retain_loss": 0.21241800487041473, "step": 349, "warm_up_unlearning_weight": 1 }, { "epoch": 14.0, "grad_norm": 2.0913257598876953, "learning_rate": 0.0026000000000000003, "loss": 1.566, "step": 350 }, { "epoch": 14.36, "forget_cf_outputs.loss": -0.8475239276885986, "forget_loss": 0.8475239276885986, "gated_loss": 0.01165771484375, "retain_loss": 0.22438839077949524, "step": 359, "warm_up_unlearning_weight": 1 }, { "epoch": 14.4, "grad_norm": 1.8125321865081787, "learning_rate": 0.00256, "loss": 1.52, "step": 360 }, { "epoch": 14.76, "forget_cf_outputs.loss": -1.2125965356826782, "forget_loss": 1.2125965356826782, "gated_loss": 0.004974365234375, "retain_loss": 0.23040254414081573, "step": 369, "warm_up_unlearning_weight": 1 }, { "epoch": 14.8, "grad_norm": 1.811591386795044, "learning_rate": 0.00252, "loss": 1.5403, "step": 370 }, { "epoch": 15.16, "forget_cf_outputs.loss": -0.9788862466812134, "forget_loss": 0.9788862466812134, "gated_loss": 0.006439208984375, "retain_loss": 0.24656128883361816, "step": 379, "warm_up_unlearning_weight": 1 }, { "epoch": 15.2, "grad_norm": 1.5504097938537598, "learning_rate": 0.00248, "loss": 1.5363, "step": 380 }, { "epoch": 15.56, "forget_cf_outputs.loss": -0.8573880195617676, "forget_loss": 0.8573880195617676, "gated_loss": 0.007080078125, "retain_loss": 0.2557204067707062, "step": 389, "warm_up_unlearning_weight": 1 }, { "epoch": 15.6, "grad_norm": 1.5796666145324707, "learning_rate": 0.00244, "loss": 1.4844, "step": 390 }, { "epoch": 15.96, "forget_cf_outputs.loss": -1.0210211277008057, "forget_loss": 1.0210211277008057, "gated_loss": 0.007080078125, "retain_loss": 0.22587800025939941, "step": 399, "warm_up_unlearning_weight": 1 }, { "epoch": 16.0, "grad_norm": 1.902213215827942, "learning_rate": 0.0024, "loss": 1.5053, "step": 400 }, { "epoch": 16.36, "forget_cf_outputs.loss": -0.7990767955780029, "forget_loss": 0.7990767955780029, "gated_loss": 0.0064697265625, "retain_loss": 0.20731617510318756, "step": 409, "warm_up_unlearning_weight": 1 }, { "epoch": 16.4, "grad_norm": 1.7532883882522583, "learning_rate": 0.00236, "loss": 1.3702, "step": 410 }, { "epoch": 16.76, "forget_cf_outputs.loss": -1.08427095413208, "forget_loss": 1.08427095413208, "gated_loss": 0.005950927734375, "retain_loss": 0.23056094348430634, "step": 419, "warm_up_unlearning_weight": 1 }, { "epoch": 16.8, "grad_norm": 1.8573689460754395, "learning_rate": 0.00232, "loss": 1.4936, "step": 420 }, { "epoch": 17.16, "forget_cf_outputs.loss": -0.8196872472763062, "forget_loss": 0.8196872472763062, "gated_loss": 0.006805419921875, "retain_loss": 0.3019656836986542, "step": 429, "warm_up_unlearning_weight": 1 }, { "epoch": 17.2, "grad_norm": 1.6391781568527222, "learning_rate": 0.00228, "loss": 1.5101, "step": 430 }, { "epoch": 17.56, "forget_cf_outputs.loss": -0.8601583242416382, "forget_loss": 0.8601583242416382, "gated_loss": 0.00677490234375, "retain_loss": 0.30145329236984253, "step": 439, "warm_up_unlearning_weight": 1 }, { "epoch": 17.6, "grad_norm": 1.7495087385177612, "learning_rate": 0.0022400000000000002, "loss": 1.4067, "step": 440 }, { "epoch": 17.96, "forget_cf_outputs.loss": -0.9261890053749084, "forget_loss": 0.9261890053749084, "gated_loss": 0.007110595703125, "retain_loss": 0.2040254771709442, "step": 449, "warm_up_unlearning_weight": 1 }, { "epoch": 18.0, "grad_norm": 2.1609787940979004, "learning_rate": 0.0022, "loss": 1.3843, "step": 450 }, { "epoch": 18.36, "forget_cf_outputs.loss": -0.7474625706672668, "forget_loss": 0.7474625706672668, "gated_loss": 0.006103515625, "retain_loss": 0.20823714137077332, "step": 459, "warm_up_unlearning_weight": 1 }, { "epoch": 18.4, "grad_norm": 1.997226595878601, "learning_rate": 0.00216, "loss": 1.3081, "step": 460 }, { "epoch": 18.76, "forget_cf_outputs.loss": -1.0060858726501465, "forget_loss": 1.0060858726501465, "gated_loss": 0.00579833984375, "retain_loss": 0.23037730157375336, "step": 469, "warm_up_unlearning_weight": 1 }, { "epoch": 18.8, "grad_norm": 2.023531675338745, "learning_rate": 0.0021200000000000004, "loss": 1.4162, "step": 470 }, { "epoch": 19.16, "forget_cf_outputs.loss": -0.7942442893981934, "forget_loss": 0.7942442893981934, "gated_loss": 0.00677490234375, "retain_loss": 0.30291858315467834, "step": 479, "warm_up_unlearning_weight": 1 }, { "epoch": 19.2, "grad_norm": 2.0801267623901367, "learning_rate": 0.0020800000000000003, "loss": 1.4455, "step": 480 }, { "epoch": 19.56, "forget_cf_outputs.loss": -0.7663432955741882, "forget_loss": 0.7663432955741882, "gated_loss": 0.00640869140625, "retain_loss": 0.3002184331417084, "step": 489, "warm_up_unlearning_weight": 1 }, { "epoch": 19.6, "grad_norm": 1.6843624114990234, "learning_rate": 0.00204, "loss": 1.3348, "step": 490 }, { "epoch": 19.96, "forget_cf_outputs.loss": -0.8520928025245667, "forget_loss": 0.8520928025245667, "gated_loss": 0.006591796875, "retain_loss": 0.20439262688159943, "step": 499, "warm_up_unlearning_weight": 1 }, { "epoch": 20.0, "grad_norm": 2.177591323852539, "learning_rate": 0.002, "loss": 1.3115, "step": 500 }, { "epoch": 20.36, "forget_cf_outputs.loss": -0.7113033533096313, "forget_loss": 0.7113033533096313, "gated_loss": 0.0062255859375, "retain_loss": 0.20779718458652496, "step": 509, "warm_up_unlearning_weight": 1 }, { "epoch": 20.4, "grad_norm": 2.2269105911254883, "learning_rate": 0.00196, "loss": 1.2523, "step": 510 }, { "epoch": 20.76, "forget_cf_outputs.loss": -0.9132112860679626, "forget_loss": 0.9132112860679626, "gated_loss": 0.005615234375, "retain_loss": 0.23009441792964935, "step": 519, "warm_up_unlearning_weight": 1 }, { "epoch": 20.8, "grad_norm": 1.955623745918274, "learning_rate": 0.00192, "loss": 1.3478, "step": 520 }, { "epoch": 21.16, "forget_cf_outputs.loss": -0.7705625891685486, "forget_loss": 0.7705625891685486, "gated_loss": 0.00677490234375, "retain_loss": 0.30292266607284546, "step": 529, "warm_up_unlearning_weight": 1 }, { "epoch": 21.2, "grad_norm": 2.0390868186950684, "learning_rate": 0.00188, "loss": 1.3883, "step": 530 }, { "epoch": 21.56, "forget_cf_outputs.loss": -0.7298972606658936, "forget_loss": 0.7298972606658936, "gated_loss": 0.006195068359375, "retain_loss": 0.3011211156845093, "step": 539, "warm_up_unlearning_weight": 1 }, { "epoch": 21.6, "grad_norm": 1.9718871116638184, "learning_rate": 0.00184, "loss": 1.2773, "step": 540 }, { "epoch": 21.96, "forget_cf_outputs.loss": -0.7989807724952698, "forget_loss": 0.7989807724952698, "gated_loss": 0.00640869140625, "retain_loss": 0.20371052622795105, "step": 549, "warm_up_unlearning_weight": 1 }, { "epoch": 22.0, "grad_norm": 2.304124116897583, "learning_rate": 0.0018000000000000002, "loss": 1.2614, "step": 550 }, { "epoch": 22.36, "forget_cf_outputs.loss": -0.6765010952949524, "forget_loss": 0.6765010952949524, "gated_loss": 0.006134033203125, "retain_loss": 0.20700086653232574, "step": 559, "warm_up_unlearning_weight": 1 }, { "epoch": 22.4, "grad_norm": 2.3407692909240723, "learning_rate": 0.00176, "loss": 1.2052, "step": 560 }, { "epoch": 22.76, "forget_cf_outputs.loss": -0.8482251167297363, "forget_loss": 0.8482251167297363, "gated_loss": 0.00555419921875, "retain_loss": 0.23026637732982635, "step": 569, "warm_up_unlearning_weight": 1 }, { "epoch": 22.8, "grad_norm": 1.9227235317230225, "learning_rate": 0.00172, "loss": 1.2995, "step": 570 }, { "epoch": 23.16, "forget_cf_outputs.loss": -0.7452784776687622, "forget_loss": 0.7452784776687622, "gated_loss": 0.006744384765625, "retain_loss": 0.30324649810791016, "step": 579, "warm_up_unlearning_weight": 1 }, { "epoch": 23.2, "grad_norm": 2.1659557819366455, "learning_rate": 0.00168, "loss": 1.3367, "step": 580 }, { "epoch": 23.56, "forget_cf_outputs.loss": -0.6973183155059814, "forget_loss": 0.6973183155059814, "gated_loss": 0.006011962890625, "retain_loss": 0.30067041516304016, "step": 589, "warm_up_unlearning_weight": 1 }, { "epoch": 23.6, "grad_norm": 2.140836238861084, "learning_rate": 0.00164, "loss": 1.2336, "step": 590 }, { "epoch": 23.96, "forget_cf_outputs.loss": -0.7499862313270569, "forget_loss": 0.7499862313270569, "gated_loss": 0.006317138671875, "retain_loss": 0.20415130257606506, "step": 599, "warm_up_unlearning_weight": 1 }, { "epoch": 24.0, "grad_norm": 2.5653772354125977, "learning_rate": 0.0016, "loss": 1.2199, "step": 600 }, { "epoch": 24.36, "forget_cf_outputs.loss": -0.6281754374504089, "forget_loss": 0.6281754374504089, "gated_loss": 0.006103515625, "retain_loss": 0.2073870748281479, "step": 609, "warm_up_unlearning_weight": 1 }, { "epoch": 24.4, "grad_norm": 2.2012555599212646, "learning_rate": 0.0015600000000000002, "loss": 1.1692, "step": 610 }, { "epoch": 24.76, "forget_cf_outputs.loss": -0.7929825782775879, "forget_loss": 0.7929825782775879, "gated_loss": 0.005584716796875, "retain_loss": 0.23124848306179047, "step": 619, "warm_up_unlearning_weight": 1 }, { "epoch": 24.8, "grad_norm": 1.9918410778045654, "learning_rate": 0.00152, "loss": 1.2547, "step": 620 }, { "epoch": 25.16, "forget_cf_outputs.loss": -0.7136563062667847, "forget_loss": 0.7136563062667847, "gated_loss": 0.00677490234375, "retain_loss": 0.3019442558288574, "step": 629, "warm_up_unlearning_weight": 1 }, { "epoch": 25.2, "grad_norm": 2.17232608795166, "learning_rate": 0.00148, "loss": 1.2906, "step": 630 }, { "epoch": 25.56, "forget_cf_outputs.loss": -0.6733591556549072, "forget_loss": 0.6733591556549072, "gated_loss": 0.00604248046875, "retain_loss": 0.3010113835334778, "step": 639, "warm_up_unlearning_weight": 1 }, { "epoch": 25.6, "grad_norm": 2.1968352794647217, "learning_rate": 0.0014399999999999999, "loss": 1.198, "step": 640 }, { "epoch": 25.96, "forget_cf_outputs.loss": -0.6898148655891418, "forget_loss": 0.6898148655891418, "gated_loss": 0.00628662109375, "retain_loss": 0.20451340079307556, "step": 649, "warm_up_unlearning_weight": 1 }, { "epoch": 26.0, "grad_norm": 2.708749294281006, "learning_rate": 0.0014, "loss": 1.1784, "step": 650 }, { "epoch": 26.36, "forget_cf_outputs.loss": -0.5750948786735535, "forget_loss": 0.5750948786735535, "gated_loss": 0.006011962890625, "retain_loss": 0.20745757222175598, "step": 659, "warm_up_unlearning_weight": 1 }, { "epoch": 26.4, "grad_norm": 2.0222957134246826, "learning_rate": 0.00136, "loss": 1.1326, "step": 660 }, { "epoch": 26.76, "forget_cf_outputs.loss": -0.7313442826271057, "forget_loss": 0.7313442826271057, "gated_loss": 0.005523681640625, "retain_loss": 0.2309209704399109, "step": 669, "warm_up_unlearning_weight": 1 }, { "epoch": 26.8, "grad_norm": 1.8242149353027344, "learning_rate": 0.00132, "loss": 1.2022, "step": 670 }, { "epoch": 27.16, "forget_cf_outputs.loss": -0.6695391535758972, "forget_loss": 0.6695391535758972, "gated_loss": 0.006744384765625, "retain_loss": 0.30264630913734436, "step": 679, "warm_up_unlearning_weight": 1 }, { "epoch": 27.2, "grad_norm": 2.184037208557129, "learning_rate": 0.00128, "loss": 1.24, "step": 680 }, { "epoch": 27.56, "forget_cf_outputs.loss": -0.6346314549446106, "forget_loss": 0.6346314549446106, "gated_loss": 0.005950927734375, "retain_loss": 0.3012670874595642, "step": 689, "warm_up_unlearning_weight": 1 }, { "epoch": 27.6, "grad_norm": 2.071834087371826, "learning_rate": 0.00124, "loss": 1.1582, "step": 690 }, { "epoch": 27.96, "forget_cf_outputs.loss": -0.6438873410224915, "forget_loss": 0.6438873410224915, "gated_loss": 0.00634765625, "retain_loss": 0.20389188826084137, "step": 699, "warm_up_unlearning_weight": 1 }, { "epoch": 28.0, "grad_norm": 2.627547264099121, "learning_rate": 0.0012, "loss": 1.1426, "step": 700 }, { "epoch": 28.36, "forget_cf_outputs.loss": -0.5409132242202759, "forget_loss": 0.5409132242202759, "gated_loss": 0.00604248046875, "retain_loss": 0.20821160078048706, "step": 709, "warm_up_unlearning_weight": 1 }, { "epoch": 28.4, "grad_norm": 1.8756951093673706, "learning_rate": 0.00116, "loss": 1.0961, "step": 710 }, { "epoch": 28.76, "forget_cf_outputs.loss": -0.6844155788421631, "forget_loss": 0.6844155788421631, "gated_loss": 0.005523681640625, "retain_loss": 0.23081088066101074, "step": 719, "warm_up_unlearning_weight": 1 }, { "epoch": 28.8, "grad_norm": 1.882070541381836, "learning_rate": 0.0011200000000000001, "loss": 1.1636, "step": 720 }, { "epoch": 29.16, "forget_cf_outputs.loss": -0.6462154388427734, "forget_loss": 0.6462154388427734, "gated_loss": 0.0068359375, "retain_loss": 0.30321410298347473, "step": 729, "warm_up_unlearning_weight": 1 }, { "epoch": 29.2, "grad_norm": 2.2409791946411133, "learning_rate": 0.00108, "loss": 1.1954, "step": 730 }, { "epoch": 29.56, "forget_cf_outputs.loss": -0.5779778957366943, "forget_loss": 0.5779778957366943, "gated_loss": 0.005828857421875, "retain_loss": 0.301031231880188, "step": 739, "warm_up_unlearning_weight": 1 }, { "epoch": 29.6, "grad_norm": 1.9507259130477905, "learning_rate": 0.0010400000000000001, "loss": 1.1284, "step": 740 }, { "epoch": 29.96, "forget_cf_outputs.loss": -0.595399022102356, "forget_loss": 0.595399022102356, "gated_loss": 0.00628662109375, "retain_loss": 0.20331645011901855, "step": 749, "warm_up_unlearning_weight": 1 }, { "epoch": 30.0, "grad_norm": 2.5469954013824463, "learning_rate": 0.001, "loss": 1.1074, "step": 750 }, { "epoch": 30.36, "forget_cf_outputs.loss": -0.5063520669937134, "forget_loss": 0.5063520669937134, "gated_loss": 0.0059814453125, "retain_loss": 0.20728009939193726, "step": 759, "warm_up_unlearning_weight": 1 }, { "epoch": 30.4, "grad_norm": 1.9551663398742676, "learning_rate": 0.00096, "loss": 1.0691, "step": 760 }, { "epoch": 30.76, "forget_cf_outputs.loss": -0.6612439751625061, "forget_loss": 0.6612439751625061, "gated_loss": 0.005523681640625, "retain_loss": 0.23078040778636932, "step": 769, "warm_up_unlearning_weight": 1 }, { "epoch": 30.8, "grad_norm": 2.112478494644165, "learning_rate": 0.00092, "loss": 1.1296, "step": 770 }, { "epoch": 31.16, "forget_cf_outputs.loss": -0.6047573685646057, "forget_loss": 0.6047573685646057, "gated_loss": 0.00677490234375, "retain_loss": 0.3026810586452484, "step": 779, "warm_up_unlearning_weight": 1 }, { "epoch": 31.2, "grad_norm": 2.141299247741699, "learning_rate": 0.00088, "loss": 1.1566, "step": 780 }, { "epoch": 31.56, "forget_cf_outputs.loss": -0.5377554297447205, "forget_loss": 0.5377554297447205, "gated_loss": 0.005828857421875, "retain_loss": 0.30064091086387634, "step": 789, "warm_up_unlearning_weight": 1 }, { "epoch": 31.6, "grad_norm": 1.9649981260299683, "learning_rate": 0.00084, "loss": 1.097, "step": 790 }, { "epoch": 31.96, "forget_cf_outputs.loss": -0.5527829527854919, "forget_loss": 0.5527829527854919, "gated_loss": 0.0062255859375, "retain_loss": 0.20365992188453674, "step": 799, "warm_up_unlearning_weight": 1 }, { "epoch": 32.0, "grad_norm": 2.628004312515259, "learning_rate": 0.0008, "loss": 1.0695, "step": 800 }, { "epoch": 32.36, "forget_cf_outputs.loss": -0.47989267110824585, "forget_loss": 0.47989267110824585, "gated_loss": 0.005950927734375, "retain_loss": 0.20727092027664185, "step": 809, "warm_up_unlearning_weight": 1 }, { "epoch": 32.4, "grad_norm": 1.9921866655349731, "learning_rate": 0.00076, "loss": 1.0319, "step": 810 }, { "epoch": 32.76, "forget_cf_outputs.loss": -0.6170799136161804, "forget_loss": 0.6170799136161804, "gated_loss": 0.00555419921875, "retain_loss": 0.23075489699840546, "step": 819, "warm_up_unlearning_weight": 1 }, { "epoch": 32.8, "grad_norm": 2.0169899463653564, "learning_rate": 0.0007199999999999999, "loss": 1.091, "step": 820 }, { "epoch": 33.16, "forget_cf_outputs.loss": -0.5823113322257996, "forget_loss": 0.5823113322257996, "gated_loss": 0.0068359375, "retain_loss": 0.30245938897132874, "step": 829, "warm_up_unlearning_weight": 1 }, { "epoch": 33.2, "grad_norm": 2.2369046211242676, "learning_rate": 0.00068, "loss": 1.126, "step": 830 }, { "epoch": 33.56, "forget_cf_outputs.loss": -0.5231561660766602, "forget_loss": 0.5231561660766602, "gated_loss": 0.005767822265625, "retain_loss": 0.3002181351184845, "step": 839, "warm_up_unlearning_weight": 1 }, { "epoch": 33.6, "grad_norm": 2.4003753662109375, "learning_rate": 0.00064, "loss": 1.0696, "step": 840 }, { "epoch": 33.96, "forget_cf_outputs.loss": -0.4980463981628418, "forget_loss": 0.4980463981628418, "gated_loss": 0.0062255859375, "retain_loss": 0.20359160006046295, "step": 849, "warm_up_unlearning_weight": 1 }, { "epoch": 34.0, "grad_norm": 2.4804441928863525, "learning_rate": 0.0006, "loss": 1.0311, "step": 850 }, { "epoch": 34.36, "forget_cf_outputs.loss": -0.4477725327014923, "forget_loss": 0.4477725327014923, "gated_loss": 0.00592041015625, "retain_loss": 0.20766329765319824, "step": 859, "warm_up_unlearning_weight": 1 }, { "epoch": 34.4, "grad_norm": 1.8642009496688843, "learning_rate": 0.0005600000000000001, "loss": 0.9964, "step": 860 }, { "epoch": 34.76, "forget_cf_outputs.loss": -0.5641895532608032, "forget_loss": 0.5641895532608032, "gated_loss": 0.00555419921875, "retain_loss": 0.23121777176856995, "step": 869, "warm_up_unlearning_weight": 1 }, { "epoch": 34.8, "grad_norm": 1.8299639225006104, "learning_rate": 0.0005200000000000001, "loss": 1.055, "step": 870 }, { "epoch": 35.16, "forget_cf_outputs.loss": -0.5485031604766846, "forget_loss": 0.5485031604766846, "gated_loss": 0.006805419921875, "retain_loss": 0.3044523596763611, "step": 879, "warm_up_unlearning_weight": 1 }, { "epoch": 35.2, "grad_norm": 2.0533902645111084, "learning_rate": 0.00048, "loss": 1.0938, "step": 880 }, { "epoch": 35.56, "forget_cf_outputs.loss": -0.5070799589157104, "forget_loss": 0.5070799589157104, "gated_loss": 0.005828857421875, "retain_loss": 0.3008805215358734, "step": 889, "warm_up_unlearning_weight": 1 }, { "epoch": 35.6, "grad_norm": 2.342500925064087, "learning_rate": 0.00044, "loss": 1.0375, "step": 890 }, { "epoch": 35.96, "forget_cf_outputs.loss": -0.4470018446445465, "forget_loss": 0.4470018446445465, "gated_loss": 0.0062255859375, "retain_loss": 0.20359660685062408, "step": 899, "warm_up_unlearning_weight": 1 }, { "epoch": 36.0, "grad_norm": 2.555351734161377, "learning_rate": 0.0004, "loss": 0.9953, "step": 900 }, { "epoch": 36.36, "forget_cf_outputs.loss": -0.42907437682151794, "forget_loss": 0.42907437682151794, "gated_loss": 0.005950927734375, "retain_loss": 0.20733724534511566, "step": 909, "warm_up_unlearning_weight": 1 }, { "epoch": 36.4, "grad_norm": 1.745898962020874, "learning_rate": 0.00035999999999999997, "loss": 0.9637, "step": 910 }, { "epoch": 36.76, "forget_cf_outputs.loss": -0.5278509855270386, "forget_loss": 0.5278509855270386, "gated_loss": 0.00555419921875, "retain_loss": 0.23086762428283691, "step": 919, "warm_up_unlearning_weight": 1 }, { "epoch": 36.8, "grad_norm": 1.8470394611358643, "learning_rate": 0.00032, "loss": 1.0207, "step": 920 }, { "epoch": 37.16, "forget_cf_outputs.loss": -0.5195350646972656, "forget_loss": 0.5195350646972656, "gated_loss": 0.0068359375, "retain_loss": 0.3039552867412567, "step": 929, "warm_up_unlearning_weight": 1 }, { "epoch": 37.2, "grad_norm": 2.0206496715545654, "learning_rate": 0.00028000000000000003, "loss": 1.0645, "step": 930 }, { "epoch": 37.56, "forget_cf_outputs.loss": -0.4906051754951477, "forget_loss": 0.4906051754951477, "gated_loss": 0.005828857421875, "retain_loss": 0.3013906478881836, "step": 939, "warm_up_unlearning_weight": 1 }, { "epoch": 37.6, "grad_norm": 2.380035877227783, "learning_rate": 0.00024, "loss": 1.0092, "step": 940 }, { "epoch": 37.96, "forget_cf_outputs.loss": -0.41733187437057495, "forget_loss": 0.41733187437057495, "gated_loss": 0.0062255859375, "retain_loss": 0.20396247506141663, "step": 949, "warm_up_unlearning_weight": 1 }, { "epoch": 38.0, "grad_norm": 2.3801748752593994, "learning_rate": 0.0002, "loss": 0.9681, "step": 950 }, { "epoch": 38.36, "forget_cf_outputs.loss": -0.41866594552993774, "forget_loss": 0.41866594552993774, "gated_loss": 0.005950927734375, "retain_loss": 0.20748548209667206, "step": 959, "warm_up_unlearning_weight": 1 }, { "epoch": 38.4, "grad_norm": 1.8068156242370605, "learning_rate": 0.00016, "loss": 0.9408, "step": 960 }, { "epoch": 38.76, "forget_cf_outputs.loss": -0.503368079662323, "forget_loss": 0.503368079662323, "gated_loss": 0.00555419921875, "retain_loss": 0.2300793081521988, "step": 969, "warm_up_unlearning_weight": 1 }, { "epoch": 38.8, "grad_norm": 1.8037129640579224, "learning_rate": 0.00012, "loss": 0.9892, "step": 970 }, { "epoch": 39.16, "forget_cf_outputs.loss": -0.5070582628250122, "forget_loss": 0.5070582628250122, "gated_loss": 0.006805419921875, "retain_loss": 0.3026222288608551, "step": 979, "warm_up_unlearning_weight": 1 }, { "epoch": 39.2, "grad_norm": 2.055860757827759, "learning_rate": 8e-05, "loss": 1.0436, "step": 980 }, { "epoch": 39.56, "forget_cf_outputs.loss": -0.46062666177749634, "forget_loss": 0.46062666177749634, "gated_loss": 0.005828857421875, "retain_loss": 0.30048835277557373, "step": 989, "warm_up_unlearning_weight": 1 }, { "epoch": 39.6, "grad_norm": 2.1290202140808105, "learning_rate": 4e-05, "loss": 0.9838, "step": 990 }, { "epoch": 39.96, "forget_cf_outputs.loss": -0.39183497428894043, "forget_loss": 0.39183497428894043, "gated_loss": 0.0062255859375, "retain_loss": 0.20340043306350708, "step": 999, "warm_up_unlearning_weight": 1 }, { "epoch": 40.0, "grad_norm": 2.2874202728271484, "learning_rate": 0.0, "loss": 0.9411, "step": 1000 } ], "logging_steps": 10, "max_steps": 1000, "num_input_tokens_seen": 0, "num_train_epochs": 40, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }