grun_10 / trainer_state.json
astirex's picture
Upload 8 files
b0b3518 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 40.0,
"eval_steps": 500,
"global_step": 1000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.36,
"forget_cf_outputs.loss": -2.683056592941284,
"forget_loss": 2.683056592941284,
"gated_loss": 0.50390625,
"retain_loss": 0.2806420624256134,
"step": 9,
"warm_up_unlearning_weight": 1
},
{
"epoch": 0.4,
"grad_norm": 12.48087215423584,
"learning_rate": 0.00396,
"loss": 10.0439,
"step": 10
},
{
"epoch": 0.76,
"forget_cf_outputs.loss": -2.3281075954437256,
"forget_loss": 2.3281075954437256,
"gated_loss": 0.447265625,
"retain_loss": 0.30973613262176514,
"step": 19,
"warm_up_unlearning_weight": 1
},
{
"epoch": 0.8,
"grad_norm": 14.701934814453125,
"learning_rate": 0.00392,
"loss": 6.7906,
"step": 20
},
{
"epoch": 1.16,
"forget_cf_outputs.loss": -1.9086629152297974,
"forget_loss": 1.9086629152297974,
"gated_loss": 0.12890625,
"retain_loss": 0.25592905282974243,
"step": 29,
"warm_up_unlearning_weight": 1
},
{
"epoch": 1.2,
"grad_norm": 6.575860500335693,
"learning_rate": 0.0038799999999999998,
"loss": 5.0776,
"step": 30
},
{
"epoch": 1.56,
"forget_cf_outputs.loss": -1.8418117761611938,
"forget_loss": 1.8418117761611938,
"gated_loss": 0.26953125,
"retain_loss": 0.2785710096359253,
"step": 39,
"warm_up_unlearning_weight": 1
},
{
"epoch": 1.6,
"grad_norm": 16.85633087158203,
"learning_rate": 0.00384,
"loss": 4.3983,
"step": 40
},
{
"epoch": 1.96,
"forget_cf_outputs.loss": -1.5592725276947021,
"forget_loss": 1.5592725276947021,
"gated_loss": 0.09228515625,
"retain_loss": 0.2128174901008606,
"step": 49,
"warm_up_unlearning_weight": 1
},
{
"epoch": 2.0,
"grad_norm": 6.894440650939941,
"learning_rate": 0.0038,
"loss": 3.8443,
"step": 50
},
{
"epoch": 2.36,
"forget_cf_outputs.loss": -1.8075147867202759,
"forget_loss": 1.8075147867202759,
"gated_loss": 0.2099609375,
"retain_loss": 0.20431871712207794,
"step": 59,
"warm_up_unlearning_weight": 1
},
{
"epoch": 2.4,
"grad_norm": 12.495519638061523,
"learning_rate": 0.00376,
"loss": 3.1611,
"step": 60
},
{
"epoch": 2.76,
"forget_cf_outputs.loss": -1.648992896080017,
"forget_loss": 1.648992896080017,
"gated_loss": 0.08447265625,
"retain_loss": 0.23505711555480957,
"step": 69,
"warm_up_unlearning_weight": 1
},
{
"epoch": 2.8,
"grad_norm": 4.081427574157715,
"learning_rate": 0.00372,
"loss": 3.1274,
"step": 70
},
{
"epoch": 3.16,
"forget_cf_outputs.loss": -1.7901757955551147,
"forget_loss": 1.7901757955551147,
"gated_loss": 0.07861328125,
"retain_loss": 0.19908858835697174,
"step": 79,
"warm_up_unlearning_weight": 1
},
{
"epoch": 3.2,
"grad_norm": 5.019917011260986,
"learning_rate": 0.00368,
"loss": 2.9064,
"step": 80
},
{
"epoch": 3.56,
"forget_cf_outputs.loss": -1.6832540035247803,
"forget_loss": 1.6832540035247803,
"gated_loss": 0.1572265625,
"retain_loss": 0.31473198533058167,
"step": 89,
"warm_up_unlearning_weight": 1
},
{
"epoch": 3.6,
"grad_norm": 25.354310989379883,
"learning_rate": 0.00364,
"loss": 2.7559,
"step": 90
},
{
"epoch": 3.96,
"forget_cf_outputs.loss": -1.5529592037200928,
"forget_loss": 1.5529592037200928,
"gated_loss": 0.0859375,
"retain_loss": 0.21396659314632416,
"step": 99,
"warm_up_unlearning_weight": 1
},
{
"epoch": 4.0,
"grad_norm": 13.010091781616211,
"learning_rate": 0.0036000000000000003,
"loss": 4.6507,
"step": 100
},
{
"epoch": 4.36,
"forget_cf_outputs.loss": -1.4749836921691895,
"forget_loss": 1.4749836921691895,
"gated_loss": 0.0751953125,
"retain_loss": 0.3322639465332031,
"step": 109,
"warm_up_unlearning_weight": 1
},
{
"epoch": 4.4,
"grad_norm": 19.40593147277832,
"learning_rate": 0.0035600000000000002,
"loss": 2.4806,
"step": 110
},
{
"epoch": 4.76,
"forget_cf_outputs.loss": -1.586808443069458,
"forget_loss": 1.586808443069458,
"gated_loss": 0.044677734375,
"retain_loss": 0.20972619950771332,
"step": 119,
"warm_up_unlearning_weight": 1
},
{
"epoch": 4.8,
"grad_norm": 3.9136083126068115,
"learning_rate": 0.00352,
"loss": 2.3875,
"step": 120
},
{
"epoch": 5.16,
"forget_cf_outputs.loss": -1.3828651905059814,
"forget_loss": 1.3828651905059814,
"gated_loss": 0.0308837890625,
"retain_loss": 0.2637632191181183,
"step": 129,
"warm_up_unlearning_weight": 1
},
{
"epoch": 5.2,
"grad_norm": 2.676300525665283,
"learning_rate": 0.00348,
"loss": 2.3602,
"step": 130
},
{
"epoch": 5.5600000000000005,
"forget_cf_outputs.loss": -1.353875756263733,
"forget_loss": 1.353875756263733,
"gated_loss": 0.03076171875,
"retain_loss": 0.2234891653060913,
"step": 139,
"warm_up_unlearning_weight": 1
},
{
"epoch": 5.6,
"grad_norm": 2.6358110904693604,
"learning_rate": 0.00344,
"loss": 2.14,
"step": 140
},
{
"epoch": 5.96,
"forget_cf_outputs.loss": -1.4662984609603882,
"forget_loss": 1.4662984609603882,
"gated_loss": 0.044921875,
"retain_loss": 0.22434721887111664,
"step": 149,
"warm_up_unlearning_weight": 1
},
{
"epoch": 6.0,
"grad_norm": 5.245201587677002,
"learning_rate": 0.0034,
"loss": 3.8186,
"step": 150
},
{
"epoch": 6.36,
"forget_cf_outputs.loss": -1.3115544319152832,
"forget_loss": 1.3115544319152832,
"gated_loss": 0.0208740234375,
"retain_loss": 0.27212581038475037,
"step": 159,
"warm_up_unlearning_weight": 1
},
{
"epoch": 6.4,
"grad_norm": 2.41806697845459,
"learning_rate": 0.00336,
"loss": 2.0658,
"step": 160
},
{
"epoch": 6.76,
"forget_cf_outputs.loss": -1.389865517616272,
"forget_loss": 1.389865517616272,
"gated_loss": 0.0157470703125,
"retain_loss": 0.22109903395175934,
"step": 169,
"warm_up_unlearning_weight": 1
},
{
"epoch": 6.8,
"grad_norm": 1.6761163473129272,
"learning_rate": 0.00332,
"loss": 2.0436,
"step": 170
},
{
"epoch": 7.16,
"forget_cf_outputs.loss": -1.4114540815353394,
"forget_loss": 1.4114540815353394,
"gated_loss": 0.01953125,
"retain_loss": 0.28868579864501953,
"step": 179,
"warm_up_unlearning_weight": 1
},
{
"epoch": 7.2,
"grad_norm": 2.110581159591675,
"learning_rate": 0.00328,
"loss": 1.9892,
"step": 180
},
{
"epoch": 7.5600000000000005,
"forget_cf_outputs.loss": -1.5645467042922974,
"forget_loss": 1.5645467042922974,
"gated_loss": 0.0135498046875,
"retain_loss": 0.18681125342845917,
"step": 189,
"warm_up_unlearning_weight": 1
},
{
"epoch": 7.6,
"grad_norm": 2.8026249408721924,
"learning_rate": 0.0032400000000000003,
"loss": 1.9509,
"step": 190
},
{
"epoch": 7.96,
"forget_cf_outputs.loss": -1.368323802947998,
"forget_loss": 1.368323802947998,
"gated_loss": 0.0142822265625,
"retain_loss": 0.23697978258132935,
"step": 199,
"warm_up_unlearning_weight": 1
},
{
"epoch": 8.0,
"grad_norm": 1.9209412336349487,
"learning_rate": 0.0032,
"loss": 1.8948,
"step": 200
},
{
"epoch": 8.36,
"forget_cf_outputs.loss": -1.199570894241333,
"forget_loss": 1.199570894241333,
"gated_loss": 0.00933837890625,
"retain_loss": 0.25221553444862366,
"step": 209,
"warm_up_unlearning_weight": 1
},
{
"epoch": 8.4,
"grad_norm": 1.5988783836364746,
"learning_rate": 0.00316,
"loss": 1.8466,
"step": 210
},
{
"epoch": 8.76,
"forget_cf_outputs.loss": -1.2841148376464844,
"forget_loss": 1.2841148376464844,
"gated_loss": 0.009765625,
"retain_loss": 0.24307847023010254,
"step": 219,
"warm_up_unlearning_weight": 1
},
{
"epoch": 8.8,
"grad_norm": 1.7333821058273315,
"learning_rate": 0.0031200000000000004,
"loss": 1.817,
"step": 220
},
{
"epoch": 9.16,
"forget_cf_outputs.loss": -1.0533504486083984,
"forget_loss": 1.0533504486083984,
"gated_loss": 0.005462646484375,
"retain_loss": 0.22837677597999573,
"step": 229,
"warm_up_unlearning_weight": 1
},
{
"epoch": 9.2,
"grad_norm": 1.5669026374816895,
"learning_rate": 0.0030800000000000003,
"loss": 1.7654,
"step": 230
},
{
"epoch": 9.56,
"forget_cf_outputs.loss": -1.2518203258514404,
"forget_loss": 1.2518203258514404,
"gated_loss": 0.0107421875,
"retain_loss": 0.27540236711502075,
"step": 239,
"warm_up_unlearning_weight": 1
},
{
"epoch": 9.6,
"grad_norm": 1.7850066423416138,
"learning_rate": 0.00304,
"loss": 1.7462,
"step": 240
},
{
"epoch": 9.96,
"forget_cf_outputs.loss": -1.2480342388153076,
"forget_loss": 1.2480342388153076,
"gated_loss": 0.0079345703125,
"retain_loss": 0.2164781242609024,
"step": 249,
"warm_up_unlearning_weight": 1
},
{
"epoch": 10.0,
"grad_norm": 1.8285338878631592,
"learning_rate": 0.003,
"loss": 1.8334,
"step": 250
},
{
"epoch": 10.36,
"forget_cf_outputs.loss": -1.14595365524292,
"forget_loss": 1.14595365524292,
"gated_loss": 0.006072998046875,
"retain_loss": 0.22983184456825256,
"step": 259,
"warm_up_unlearning_weight": 1
},
{
"epoch": 10.4,
"grad_norm": 1.8580724000930786,
"learning_rate": 0.00296,
"loss": 1.6553,
"step": 260
},
{
"epoch": 10.76,
"forget_cf_outputs.loss": -1.2968733310699463,
"forget_loss": 1.2968733310699463,
"gated_loss": 0.008544921875,
"retain_loss": 0.20906659960746765,
"step": 269,
"warm_up_unlearning_weight": 1
},
{
"epoch": 10.8,
"grad_norm": 1.6227186918258667,
"learning_rate": 0.00292,
"loss": 1.7339,
"step": 270
},
{
"epoch": 11.16,
"forget_cf_outputs.loss": -1.21259605884552,
"forget_loss": 1.21259605884552,
"gated_loss": 0.00750732421875,
"retain_loss": 0.23951691389083862,
"step": 279,
"warm_up_unlearning_weight": 1
},
{
"epoch": 11.2,
"grad_norm": 1.3491921424865723,
"learning_rate": 0.0028799999999999997,
"loss": 1.6642,
"step": 280
},
{
"epoch": 11.56,
"forget_cf_outputs.loss": -1.203460454940796,
"forget_loss": 1.203460454940796,
"gated_loss": 0.0087890625,
"retain_loss": 0.23031194508075714,
"step": 289,
"warm_up_unlearning_weight": 1
},
{
"epoch": 11.6,
"grad_norm": 2.027022123336792,
"learning_rate": 0.00284,
"loss": 1.6413,
"step": 290
},
{
"epoch": 11.96,
"forget_cf_outputs.loss": -1.1864365339279175,
"forget_loss": 1.1864365339279175,
"gated_loss": 0.01043701171875,
"retain_loss": 0.23127436637878418,
"step": 299,
"warm_up_unlearning_weight": 1
},
{
"epoch": 12.0,
"grad_norm": 1.8040319681167603,
"learning_rate": 0.0028,
"loss": 1.761,
"step": 300
},
{
"epoch": 12.36,
"forget_cf_outputs.loss": -1.227767825126648,
"forget_loss": 1.227767825126648,
"gated_loss": 0.00616455078125,
"retain_loss": 0.22675465047359467,
"step": 309,
"warm_up_unlearning_weight": 1
},
{
"epoch": 12.4,
"grad_norm": 2.1620850563049316,
"learning_rate": 0.00276,
"loss": 1.5947,
"step": 310
},
{
"epoch": 12.76,
"forget_cf_outputs.loss": -1.0549200773239136,
"forget_loss": 1.0549200773239136,
"gated_loss": 0.0054931640625,
"retain_loss": 0.225913867354393,
"step": 319,
"warm_up_unlearning_weight": 1
},
{
"epoch": 12.8,
"grad_norm": 1.422467827796936,
"learning_rate": 0.00272,
"loss": 1.6839,
"step": 320
},
{
"epoch": 13.16,
"forget_cf_outputs.loss": -0.966583251953125,
"forget_loss": 0.966583251953125,
"gated_loss": 0.0050048828125,
"retain_loss": 0.21935530006885529,
"step": 329,
"warm_up_unlearning_weight": 1
},
{
"epoch": 13.2,
"grad_norm": 1.3767800331115723,
"learning_rate": 0.00268,
"loss": 1.6052,
"step": 330
},
{
"epoch": 13.56,
"forget_cf_outputs.loss": -1.0747449398040771,
"forget_loss": 1.0747449398040771,
"gated_loss": 0.008544921875,
"retain_loss": 0.23962758481502533,
"step": 339,
"warm_up_unlearning_weight": 1
},
{
"epoch": 13.6,
"grad_norm": 2.152151584625244,
"learning_rate": 0.00264,
"loss": 1.6239,
"step": 340
},
{
"epoch": 13.96,
"forget_cf_outputs.loss": -0.9583653211593628,
"forget_loss": 0.9583653211593628,
"gated_loss": 0.006500244140625,
"retain_loss": 0.21241800487041473,
"step": 349,
"warm_up_unlearning_weight": 1
},
{
"epoch": 14.0,
"grad_norm": 2.0913257598876953,
"learning_rate": 0.0026000000000000003,
"loss": 1.566,
"step": 350
},
{
"epoch": 14.36,
"forget_cf_outputs.loss": -0.8475239276885986,
"forget_loss": 0.8475239276885986,
"gated_loss": 0.01165771484375,
"retain_loss": 0.22438839077949524,
"step": 359,
"warm_up_unlearning_weight": 1
},
{
"epoch": 14.4,
"grad_norm": 1.8125321865081787,
"learning_rate": 0.00256,
"loss": 1.52,
"step": 360
},
{
"epoch": 14.76,
"forget_cf_outputs.loss": -1.2125965356826782,
"forget_loss": 1.2125965356826782,
"gated_loss": 0.004974365234375,
"retain_loss": 0.23040254414081573,
"step": 369,
"warm_up_unlearning_weight": 1
},
{
"epoch": 14.8,
"grad_norm": 1.811591386795044,
"learning_rate": 0.00252,
"loss": 1.5403,
"step": 370
},
{
"epoch": 15.16,
"forget_cf_outputs.loss": -0.9788862466812134,
"forget_loss": 0.9788862466812134,
"gated_loss": 0.006439208984375,
"retain_loss": 0.24656128883361816,
"step": 379,
"warm_up_unlearning_weight": 1
},
{
"epoch": 15.2,
"grad_norm": 1.5504097938537598,
"learning_rate": 0.00248,
"loss": 1.5363,
"step": 380
},
{
"epoch": 15.56,
"forget_cf_outputs.loss": -0.8573880195617676,
"forget_loss": 0.8573880195617676,
"gated_loss": 0.007080078125,
"retain_loss": 0.2557204067707062,
"step": 389,
"warm_up_unlearning_weight": 1
},
{
"epoch": 15.6,
"grad_norm": 1.5796666145324707,
"learning_rate": 0.00244,
"loss": 1.4844,
"step": 390
},
{
"epoch": 15.96,
"forget_cf_outputs.loss": -1.0210211277008057,
"forget_loss": 1.0210211277008057,
"gated_loss": 0.007080078125,
"retain_loss": 0.22587800025939941,
"step": 399,
"warm_up_unlearning_weight": 1
},
{
"epoch": 16.0,
"grad_norm": 1.902213215827942,
"learning_rate": 0.0024,
"loss": 1.5053,
"step": 400
},
{
"epoch": 16.36,
"forget_cf_outputs.loss": -0.7990767955780029,
"forget_loss": 0.7990767955780029,
"gated_loss": 0.0064697265625,
"retain_loss": 0.20731617510318756,
"step": 409,
"warm_up_unlearning_weight": 1
},
{
"epoch": 16.4,
"grad_norm": 1.7532883882522583,
"learning_rate": 0.00236,
"loss": 1.3702,
"step": 410
},
{
"epoch": 16.76,
"forget_cf_outputs.loss": -1.08427095413208,
"forget_loss": 1.08427095413208,
"gated_loss": 0.005950927734375,
"retain_loss": 0.23056094348430634,
"step": 419,
"warm_up_unlearning_weight": 1
},
{
"epoch": 16.8,
"grad_norm": 1.8573689460754395,
"learning_rate": 0.00232,
"loss": 1.4936,
"step": 420
},
{
"epoch": 17.16,
"forget_cf_outputs.loss": -0.8196872472763062,
"forget_loss": 0.8196872472763062,
"gated_loss": 0.006805419921875,
"retain_loss": 0.3019656836986542,
"step": 429,
"warm_up_unlearning_weight": 1
},
{
"epoch": 17.2,
"grad_norm": 1.6391781568527222,
"learning_rate": 0.00228,
"loss": 1.5101,
"step": 430
},
{
"epoch": 17.56,
"forget_cf_outputs.loss": -0.8601583242416382,
"forget_loss": 0.8601583242416382,
"gated_loss": 0.00677490234375,
"retain_loss": 0.30145329236984253,
"step": 439,
"warm_up_unlearning_weight": 1
},
{
"epoch": 17.6,
"grad_norm": 1.7495087385177612,
"learning_rate": 0.0022400000000000002,
"loss": 1.4067,
"step": 440
},
{
"epoch": 17.96,
"forget_cf_outputs.loss": -0.9261890053749084,
"forget_loss": 0.9261890053749084,
"gated_loss": 0.007110595703125,
"retain_loss": 0.2040254771709442,
"step": 449,
"warm_up_unlearning_weight": 1
},
{
"epoch": 18.0,
"grad_norm": 2.1609787940979004,
"learning_rate": 0.0022,
"loss": 1.3843,
"step": 450
},
{
"epoch": 18.36,
"forget_cf_outputs.loss": -0.7474625706672668,
"forget_loss": 0.7474625706672668,
"gated_loss": 0.006103515625,
"retain_loss": 0.20823714137077332,
"step": 459,
"warm_up_unlearning_weight": 1
},
{
"epoch": 18.4,
"grad_norm": 1.997226595878601,
"learning_rate": 0.00216,
"loss": 1.3081,
"step": 460
},
{
"epoch": 18.76,
"forget_cf_outputs.loss": -1.0060858726501465,
"forget_loss": 1.0060858726501465,
"gated_loss": 0.00579833984375,
"retain_loss": 0.23037730157375336,
"step": 469,
"warm_up_unlearning_weight": 1
},
{
"epoch": 18.8,
"grad_norm": 2.023531675338745,
"learning_rate": 0.0021200000000000004,
"loss": 1.4162,
"step": 470
},
{
"epoch": 19.16,
"forget_cf_outputs.loss": -0.7942442893981934,
"forget_loss": 0.7942442893981934,
"gated_loss": 0.00677490234375,
"retain_loss": 0.30291858315467834,
"step": 479,
"warm_up_unlearning_weight": 1
},
{
"epoch": 19.2,
"grad_norm": 2.0801267623901367,
"learning_rate": 0.0020800000000000003,
"loss": 1.4455,
"step": 480
},
{
"epoch": 19.56,
"forget_cf_outputs.loss": -0.7663432955741882,
"forget_loss": 0.7663432955741882,
"gated_loss": 0.00640869140625,
"retain_loss": 0.3002184331417084,
"step": 489,
"warm_up_unlearning_weight": 1
},
{
"epoch": 19.6,
"grad_norm": 1.6843624114990234,
"learning_rate": 0.00204,
"loss": 1.3348,
"step": 490
},
{
"epoch": 19.96,
"forget_cf_outputs.loss": -0.8520928025245667,
"forget_loss": 0.8520928025245667,
"gated_loss": 0.006591796875,
"retain_loss": 0.20439262688159943,
"step": 499,
"warm_up_unlearning_weight": 1
},
{
"epoch": 20.0,
"grad_norm": 2.177591323852539,
"learning_rate": 0.002,
"loss": 1.3115,
"step": 500
},
{
"epoch": 20.36,
"forget_cf_outputs.loss": -0.7113033533096313,
"forget_loss": 0.7113033533096313,
"gated_loss": 0.0062255859375,
"retain_loss": 0.20779718458652496,
"step": 509,
"warm_up_unlearning_weight": 1
},
{
"epoch": 20.4,
"grad_norm": 2.2269105911254883,
"learning_rate": 0.00196,
"loss": 1.2523,
"step": 510
},
{
"epoch": 20.76,
"forget_cf_outputs.loss": -0.9132112860679626,
"forget_loss": 0.9132112860679626,
"gated_loss": 0.005615234375,
"retain_loss": 0.23009441792964935,
"step": 519,
"warm_up_unlearning_weight": 1
},
{
"epoch": 20.8,
"grad_norm": 1.955623745918274,
"learning_rate": 0.00192,
"loss": 1.3478,
"step": 520
},
{
"epoch": 21.16,
"forget_cf_outputs.loss": -0.7705625891685486,
"forget_loss": 0.7705625891685486,
"gated_loss": 0.00677490234375,
"retain_loss": 0.30292266607284546,
"step": 529,
"warm_up_unlearning_weight": 1
},
{
"epoch": 21.2,
"grad_norm": 2.0390868186950684,
"learning_rate": 0.00188,
"loss": 1.3883,
"step": 530
},
{
"epoch": 21.56,
"forget_cf_outputs.loss": -0.7298972606658936,
"forget_loss": 0.7298972606658936,
"gated_loss": 0.006195068359375,
"retain_loss": 0.3011211156845093,
"step": 539,
"warm_up_unlearning_weight": 1
},
{
"epoch": 21.6,
"grad_norm": 1.9718871116638184,
"learning_rate": 0.00184,
"loss": 1.2773,
"step": 540
},
{
"epoch": 21.96,
"forget_cf_outputs.loss": -0.7989807724952698,
"forget_loss": 0.7989807724952698,
"gated_loss": 0.00640869140625,
"retain_loss": 0.20371052622795105,
"step": 549,
"warm_up_unlearning_weight": 1
},
{
"epoch": 22.0,
"grad_norm": 2.304124116897583,
"learning_rate": 0.0018000000000000002,
"loss": 1.2614,
"step": 550
},
{
"epoch": 22.36,
"forget_cf_outputs.loss": -0.6765010952949524,
"forget_loss": 0.6765010952949524,
"gated_loss": 0.006134033203125,
"retain_loss": 0.20700086653232574,
"step": 559,
"warm_up_unlearning_weight": 1
},
{
"epoch": 22.4,
"grad_norm": 2.3407692909240723,
"learning_rate": 0.00176,
"loss": 1.2052,
"step": 560
},
{
"epoch": 22.76,
"forget_cf_outputs.loss": -0.8482251167297363,
"forget_loss": 0.8482251167297363,
"gated_loss": 0.00555419921875,
"retain_loss": 0.23026637732982635,
"step": 569,
"warm_up_unlearning_weight": 1
},
{
"epoch": 22.8,
"grad_norm": 1.9227235317230225,
"learning_rate": 0.00172,
"loss": 1.2995,
"step": 570
},
{
"epoch": 23.16,
"forget_cf_outputs.loss": -0.7452784776687622,
"forget_loss": 0.7452784776687622,
"gated_loss": 0.006744384765625,
"retain_loss": 0.30324649810791016,
"step": 579,
"warm_up_unlearning_weight": 1
},
{
"epoch": 23.2,
"grad_norm": 2.1659557819366455,
"learning_rate": 0.00168,
"loss": 1.3367,
"step": 580
},
{
"epoch": 23.56,
"forget_cf_outputs.loss": -0.6973183155059814,
"forget_loss": 0.6973183155059814,
"gated_loss": 0.006011962890625,
"retain_loss": 0.30067041516304016,
"step": 589,
"warm_up_unlearning_weight": 1
},
{
"epoch": 23.6,
"grad_norm": 2.140836238861084,
"learning_rate": 0.00164,
"loss": 1.2336,
"step": 590
},
{
"epoch": 23.96,
"forget_cf_outputs.loss": -0.7499862313270569,
"forget_loss": 0.7499862313270569,
"gated_loss": 0.006317138671875,
"retain_loss": 0.20415130257606506,
"step": 599,
"warm_up_unlearning_weight": 1
},
{
"epoch": 24.0,
"grad_norm": 2.5653772354125977,
"learning_rate": 0.0016,
"loss": 1.2199,
"step": 600
},
{
"epoch": 24.36,
"forget_cf_outputs.loss": -0.6281754374504089,
"forget_loss": 0.6281754374504089,
"gated_loss": 0.006103515625,
"retain_loss": 0.2073870748281479,
"step": 609,
"warm_up_unlearning_weight": 1
},
{
"epoch": 24.4,
"grad_norm": 2.2012555599212646,
"learning_rate": 0.0015600000000000002,
"loss": 1.1692,
"step": 610
},
{
"epoch": 24.76,
"forget_cf_outputs.loss": -0.7929825782775879,
"forget_loss": 0.7929825782775879,
"gated_loss": 0.005584716796875,
"retain_loss": 0.23124848306179047,
"step": 619,
"warm_up_unlearning_weight": 1
},
{
"epoch": 24.8,
"grad_norm": 1.9918410778045654,
"learning_rate": 0.00152,
"loss": 1.2547,
"step": 620
},
{
"epoch": 25.16,
"forget_cf_outputs.loss": -0.7136563062667847,
"forget_loss": 0.7136563062667847,
"gated_loss": 0.00677490234375,
"retain_loss": 0.3019442558288574,
"step": 629,
"warm_up_unlearning_weight": 1
},
{
"epoch": 25.2,
"grad_norm": 2.17232608795166,
"learning_rate": 0.00148,
"loss": 1.2906,
"step": 630
},
{
"epoch": 25.56,
"forget_cf_outputs.loss": -0.6733591556549072,
"forget_loss": 0.6733591556549072,
"gated_loss": 0.00604248046875,
"retain_loss": 0.3010113835334778,
"step": 639,
"warm_up_unlearning_weight": 1
},
{
"epoch": 25.6,
"grad_norm": 2.1968352794647217,
"learning_rate": 0.0014399999999999999,
"loss": 1.198,
"step": 640
},
{
"epoch": 25.96,
"forget_cf_outputs.loss": -0.6898148655891418,
"forget_loss": 0.6898148655891418,
"gated_loss": 0.00628662109375,
"retain_loss": 0.20451340079307556,
"step": 649,
"warm_up_unlearning_weight": 1
},
{
"epoch": 26.0,
"grad_norm": 2.708749294281006,
"learning_rate": 0.0014,
"loss": 1.1784,
"step": 650
},
{
"epoch": 26.36,
"forget_cf_outputs.loss": -0.5750948786735535,
"forget_loss": 0.5750948786735535,
"gated_loss": 0.006011962890625,
"retain_loss": 0.20745757222175598,
"step": 659,
"warm_up_unlearning_weight": 1
},
{
"epoch": 26.4,
"grad_norm": 2.0222957134246826,
"learning_rate": 0.00136,
"loss": 1.1326,
"step": 660
},
{
"epoch": 26.76,
"forget_cf_outputs.loss": -0.7313442826271057,
"forget_loss": 0.7313442826271057,
"gated_loss": 0.005523681640625,
"retain_loss": 0.2309209704399109,
"step": 669,
"warm_up_unlearning_weight": 1
},
{
"epoch": 26.8,
"grad_norm": 1.8242149353027344,
"learning_rate": 0.00132,
"loss": 1.2022,
"step": 670
},
{
"epoch": 27.16,
"forget_cf_outputs.loss": -0.6695391535758972,
"forget_loss": 0.6695391535758972,
"gated_loss": 0.006744384765625,
"retain_loss": 0.30264630913734436,
"step": 679,
"warm_up_unlearning_weight": 1
},
{
"epoch": 27.2,
"grad_norm": 2.184037208557129,
"learning_rate": 0.00128,
"loss": 1.24,
"step": 680
},
{
"epoch": 27.56,
"forget_cf_outputs.loss": -0.6346314549446106,
"forget_loss": 0.6346314549446106,
"gated_loss": 0.005950927734375,
"retain_loss": 0.3012670874595642,
"step": 689,
"warm_up_unlearning_weight": 1
},
{
"epoch": 27.6,
"grad_norm": 2.071834087371826,
"learning_rate": 0.00124,
"loss": 1.1582,
"step": 690
},
{
"epoch": 27.96,
"forget_cf_outputs.loss": -0.6438873410224915,
"forget_loss": 0.6438873410224915,
"gated_loss": 0.00634765625,
"retain_loss": 0.20389188826084137,
"step": 699,
"warm_up_unlearning_weight": 1
},
{
"epoch": 28.0,
"grad_norm": 2.627547264099121,
"learning_rate": 0.0012,
"loss": 1.1426,
"step": 700
},
{
"epoch": 28.36,
"forget_cf_outputs.loss": -0.5409132242202759,
"forget_loss": 0.5409132242202759,
"gated_loss": 0.00604248046875,
"retain_loss": 0.20821160078048706,
"step": 709,
"warm_up_unlearning_weight": 1
},
{
"epoch": 28.4,
"grad_norm": 1.8756951093673706,
"learning_rate": 0.00116,
"loss": 1.0961,
"step": 710
},
{
"epoch": 28.76,
"forget_cf_outputs.loss": -0.6844155788421631,
"forget_loss": 0.6844155788421631,
"gated_loss": 0.005523681640625,
"retain_loss": 0.23081088066101074,
"step": 719,
"warm_up_unlearning_weight": 1
},
{
"epoch": 28.8,
"grad_norm": 1.882070541381836,
"learning_rate": 0.0011200000000000001,
"loss": 1.1636,
"step": 720
},
{
"epoch": 29.16,
"forget_cf_outputs.loss": -0.6462154388427734,
"forget_loss": 0.6462154388427734,
"gated_loss": 0.0068359375,
"retain_loss": 0.30321410298347473,
"step": 729,
"warm_up_unlearning_weight": 1
},
{
"epoch": 29.2,
"grad_norm": 2.2409791946411133,
"learning_rate": 0.00108,
"loss": 1.1954,
"step": 730
},
{
"epoch": 29.56,
"forget_cf_outputs.loss": -0.5779778957366943,
"forget_loss": 0.5779778957366943,
"gated_loss": 0.005828857421875,
"retain_loss": 0.301031231880188,
"step": 739,
"warm_up_unlearning_weight": 1
},
{
"epoch": 29.6,
"grad_norm": 1.9507259130477905,
"learning_rate": 0.0010400000000000001,
"loss": 1.1284,
"step": 740
},
{
"epoch": 29.96,
"forget_cf_outputs.loss": -0.595399022102356,
"forget_loss": 0.595399022102356,
"gated_loss": 0.00628662109375,
"retain_loss": 0.20331645011901855,
"step": 749,
"warm_up_unlearning_weight": 1
},
{
"epoch": 30.0,
"grad_norm": 2.5469954013824463,
"learning_rate": 0.001,
"loss": 1.1074,
"step": 750
},
{
"epoch": 30.36,
"forget_cf_outputs.loss": -0.5063520669937134,
"forget_loss": 0.5063520669937134,
"gated_loss": 0.0059814453125,
"retain_loss": 0.20728009939193726,
"step": 759,
"warm_up_unlearning_weight": 1
},
{
"epoch": 30.4,
"grad_norm": 1.9551663398742676,
"learning_rate": 0.00096,
"loss": 1.0691,
"step": 760
},
{
"epoch": 30.76,
"forget_cf_outputs.loss": -0.6612439751625061,
"forget_loss": 0.6612439751625061,
"gated_loss": 0.005523681640625,
"retain_loss": 0.23078040778636932,
"step": 769,
"warm_up_unlearning_weight": 1
},
{
"epoch": 30.8,
"grad_norm": 2.112478494644165,
"learning_rate": 0.00092,
"loss": 1.1296,
"step": 770
},
{
"epoch": 31.16,
"forget_cf_outputs.loss": -0.6047573685646057,
"forget_loss": 0.6047573685646057,
"gated_loss": 0.00677490234375,
"retain_loss": 0.3026810586452484,
"step": 779,
"warm_up_unlearning_weight": 1
},
{
"epoch": 31.2,
"grad_norm": 2.141299247741699,
"learning_rate": 0.00088,
"loss": 1.1566,
"step": 780
},
{
"epoch": 31.56,
"forget_cf_outputs.loss": -0.5377554297447205,
"forget_loss": 0.5377554297447205,
"gated_loss": 0.005828857421875,
"retain_loss": 0.30064091086387634,
"step": 789,
"warm_up_unlearning_weight": 1
},
{
"epoch": 31.6,
"grad_norm": 1.9649981260299683,
"learning_rate": 0.00084,
"loss": 1.097,
"step": 790
},
{
"epoch": 31.96,
"forget_cf_outputs.loss": -0.5527829527854919,
"forget_loss": 0.5527829527854919,
"gated_loss": 0.0062255859375,
"retain_loss": 0.20365992188453674,
"step": 799,
"warm_up_unlearning_weight": 1
},
{
"epoch": 32.0,
"grad_norm": 2.628004312515259,
"learning_rate": 0.0008,
"loss": 1.0695,
"step": 800
},
{
"epoch": 32.36,
"forget_cf_outputs.loss": -0.47989267110824585,
"forget_loss": 0.47989267110824585,
"gated_loss": 0.005950927734375,
"retain_loss": 0.20727092027664185,
"step": 809,
"warm_up_unlearning_weight": 1
},
{
"epoch": 32.4,
"grad_norm": 1.9921866655349731,
"learning_rate": 0.00076,
"loss": 1.0319,
"step": 810
},
{
"epoch": 32.76,
"forget_cf_outputs.loss": -0.6170799136161804,
"forget_loss": 0.6170799136161804,
"gated_loss": 0.00555419921875,
"retain_loss": 0.23075489699840546,
"step": 819,
"warm_up_unlearning_weight": 1
},
{
"epoch": 32.8,
"grad_norm": 2.0169899463653564,
"learning_rate": 0.0007199999999999999,
"loss": 1.091,
"step": 820
},
{
"epoch": 33.16,
"forget_cf_outputs.loss": -0.5823113322257996,
"forget_loss": 0.5823113322257996,
"gated_loss": 0.0068359375,
"retain_loss": 0.30245938897132874,
"step": 829,
"warm_up_unlearning_weight": 1
},
{
"epoch": 33.2,
"grad_norm": 2.2369046211242676,
"learning_rate": 0.00068,
"loss": 1.126,
"step": 830
},
{
"epoch": 33.56,
"forget_cf_outputs.loss": -0.5231561660766602,
"forget_loss": 0.5231561660766602,
"gated_loss": 0.005767822265625,
"retain_loss": 0.3002181351184845,
"step": 839,
"warm_up_unlearning_weight": 1
},
{
"epoch": 33.6,
"grad_norm": 2.4003753662109375,
"learning_rate": 0.00064,
"loss": 1.0696,
"step": 840
},
{
"epoch": 33.96,
"forget_cf_outputs.loss": -0.4980463981628418,
"forget_loss": 0.4980463981628418,
"gated_loss": 0.0062255859375,
"retain_loss": 0.20359160006046295,
"step": 849,
"warm_up_unlearning_weight": 1
},
{
"epoch": 34.0,
"grad_norm": 2.4804441928863525,
"learning_rate": 0.0006,
"loss": 1.0311,
"step": 850
},
{
"epoch": 34.36,
"forget_cf_outputs.loss": -0.4477725327014923,
"forget_loss": 0.4477725327014923,
"gated_loss": 0.00592041015625,
"retain_loss": 0.20766329765319824,
"step": 859,
"warm_up_unlearning_weight": 1
},
{
"epoch": 34.4,
"grad_norm": 1.8642009496688843,
"learning_rate": 0.0005600000000000001,
"loss": 0.9964,
"step": 860
},
{
"epoch": 34.76,
"forget_cf_outputs.loss": -0.5641895532608032,
"forget_loss": 0.5641895532608032,
"gated_loss": 0.00555419921875,
"retain_loss": 0.23121777176856995,
"step": 869,
"warm_up_unlearning_weight": 1
},
{
"epoch": 34.8,
"grad_norm": 1.8299639225006104,
"learning_rate": 0.0005200000000000001,
"loss": 1.055,
"step": 870
},
{
"epoch": 35.16,
"forget_cf_outputs.loss": -0.5485031604766846,
"forget_loss": 0.5485031604766846,
"gated_loss": 0.006805419921875,
"retain_loss": 0.3044523596763611,
"step": 879,
"warm_up_unlearning_weight": 1
},
{
"epoch": 35.2,
"grad_norm": 2.0533902645111084,
"learning_rate": 0.00048,
"loss": 1.0938,
"step": 880
},
{
"epoch": 35.56,
"forget_cf_outputs.loss": -0.5070799589157104,
"forget_loss": 0.5070799589157104,
"gated_loss": 0.005828857421875,
"retain_loss": 0.3008805215358734,
"step": 889,
"warm_up_unlearning_weight": 1
},
{
"epoch": 35.6,
"grad_norm": 2.342500925064087,
"learning_rate": 0.00044,
"loss": 1.0375,
"step": 890
},
{
"epoch": 35.96,
"forget_cf_outputs.loss": -0.4470018446445465,
"forget_loss": 0.4470018446445465,
"gated_loss": 0.0062255859375,
"retain_loss": 0.20359660685062408,
"step": 899,
"warm_up_unlearning_weight": 1
},
{
"epoch": 36.0,
"grad_norm": 2.555351734161377,
"learning_rate": 0.0004,
"loss": 0.9953,
"step": 900
},
{
"epoch": 36.36,
"forget_cf_outputs.loss": -0.42907437682151794,
"forget_loss": 0.42907437682151794,
"gated_loss": 0.005950927734375,
"retain_loss": 0.20733724534511566,
"step": 909,
"warm_up_unlearning_weight": 1
},
{
"epoch": 36.4,
"grad_norm": 1.745898962020874,
"learning_rate": 0.00035999999999999997,
"loss": 0.9637,
"step": 910
},
{
"epoch": 36.76,
"forget_cf_outputs.loss": -0.5278509855270386,
"forget_loss": 0.5278509855270386,
"gated_loss": 0.00555419921875,
"retain_loss": 0.23086762428283691,
"step": 919,
"warm_up_unlearning_weight": 1
},
{
"epoch": 36.8,
"grad_norm": 1.8470394611358643,
"learning_rate": 0.00032,
"loss": 1.0207,
"step": 920
},
{
"epoch": 37.16,
"forget_cf_outputs.loss": -0.5195350646972656,
"forget_loss": 0.5195350646972656,
"gated_loss": 0.0068359375,
"retain_loss": 0.3039552867412567,
"step": 929,
"warm_up_unlearning_weight": 1
},
{
"epoch": 37.2,
"grad_norm": 2.0206496715545654,
"learning_rate": 0.00028000000000000003,
"loss": 1.0645,
"step": 930
},
{
"epoch": 37.56,
"forget_cf_outputs.loss": -0.4906051754951477,
"forget_loss": 0.4906051754951477,
"gated_loss": 0.005828857421875,
"retain_loss": 0.3013906478881836,
"step": 939,
"warm_up_unlearning_weight": 1
},
{
"epoch": 37.6,
"grad_norm": 2.380035877227783,
"learning_rate": 0.00024,
"loss": 1.0092,
"step": 940
},
{
"epoch": 37.96,
"forget_cf_outputs.loss": -0.41733187437057495,
"forget_loss": 0.41733187437057495,
"gated_loss": 0.0062255859375,
"retain_loss": 0.20396247506141663,
"step": 949,
"warm_up_unlearning_weight": 1
},
{
"epoch": 38.0,
"grad_norm": 2.3801748752593994,
"learning_rate": 0.0002,
"loss": 0.9681,
"step": 950
},
{
"epoch": 38.36,
"forget_cf_outputs.loss": -0.41866594552993774,
"forget_loss": 0.41866594552993774,
"gated_loss": 0.005950927734375,
"retain_loss": 0.20748548209667206,
"step": 959,
"warm_up_unlearning_weight": 1
},
{
"epoch": 38.4,
"grad_norm": 1.8068156242370605,
"learning_rate": 0.00016,
"loss": 0.9408,
"step": 960
},
{
"epoch": 38.76,
"forget_cf_outputs.loss": -0.503368079662323,
"forget_loss": 0.503368079662323,
"gated_loss": 0.00555419921875,
"retain_loss": 0.2300793081521988,
"step": 969,
"warm_up_unlearning_weight": 1
},
{
"epoch": 38.8,
"grad_norm": 1.8037129640579224,
"learning_rate": 0.00012,
"loss": 0.9892,
"step": 970
},
{
"epoch": 39.16,
"forget_cf_outputs.loss": -0.5070582628250122,
"forget_loss": 0.5070582628250122,
"gated_loss": 0.006805419921875,
"retain_loss": 0.3026222288608551,
"step": 979,
"warm_up_unlearning_weight": 1
},
{
"epoch": 39.2,
"grad_norm": 2.055860757827759,
"learning_rate": 8e-05,
"loss": 1.0436,
"step": 980
},
{
"epoch": 39.56,
"forget_cf_outputs.loss": -0.46062666177749634,
"forget_loss": 0.46062666177749634,
"gated_loss": 0.005828857421875,
"retain_loss": 0.30048835277557373,
"step": 989,
"warm_up_unlearning_weight": 1
},
{
"epoch": 39.6,
"grad_norm": 2.1290202140808105,
"learning_rate": 4e-05,
"loss": 0.9838,
"step": 990
},
{
"epoch": 39.96,
"forget_cf_outputs.loss": -0.39183497428894043,
"forget_loss": 0.39183497428894043,
"gated_loss": 0.0062255859375,
"retain_loss": 0.20340043306350708,
"step": 999,
"warm_up_unlearning_weight": 1
},
{
"epoch": 40.0,
"grad_norm": 2.2874202728271484,
"learning_rate": 0.0,
"loss": 0.9411,
"step": 1000
}
],
"logging_steps": 10,
"max_steps": 1000,
"num_input_tokens_seen": 0,
"num_train_epochs": 40,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}