unlearning-tofu-graddiff / trainer_state.json
MSKo's picture
Upload folder using huggingface_hub (#1)
d03996b verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 9.6,
"eval_steps": 500,
"global_step": 60,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0,
"forget_Q_A_PARA_Prob": 0.10203013149555773,
"forget_Q_A_PERT_Prob": 0.042262932972778805,
"forget_Q_A_Prob": 0.8794082054495811,
"forget_Q_A_ROUGE": 0.8167727170975732,
"forget_truth_ratio": 0.4771099600108043,
"model_utility": 0.6005086084956538,
"ra_Q_A_PERT_Prob": 0.006701486817293396,
"ra_Q_A_Prob": 0.016744789615659103,
"ra_Q_A_Prob_normalised": 0.4116699642309797,
"ra_Q_A_ROUGE": 0.807,
"ra_Truth_Ratio": 0.5267699222134944,
"retain_Q_A_PARA_Prob": 0.09064217003295198,
"retain_Q_A_PERT_Prob": 0.039781397786144375,
"retain_Q_A_Prob": 0.8692756203562021,
"retain_Q_A_ROUGE": 0.8015532158382885,
"retain_Truth_Ratio": 0.5168476524332276,
"step": 0,
"wf_Q_A_PERT_Prob": 0.0020705466852406146,
"wf_Q_A_Prob": 0.005700615959729778,
"wf_Q_A_Prob_normalised": 0.43773736892877796,
"wf_Q_A_ROUGE": 0.8364672364672365,
"wf_Truth_Ratio": 0.6219292647960423
},
{
"epoch": 0.8,
"grad_norm": 5.8125,
"learning_rate": 8.333333333333334e-06,
"loss": -0.02,
"step": 5
},
{
"epoch": 0.96,
"forget_Q_A_PARA_Prob": 0.09847796681104228,
"forget_Q_A_PERT_Prob": 0.040619134066480905,
"forget_Q_A_Prob": 0.864417289942503,
"forget_Q_A_ROUGE": 0.7766220569608907,
"forget_truth_ratio": 0.47638448812151046,
"model_utility": 0.5975087905014809,
"ra_Q_A_PERT_Prob": 0.006394159720402966,
"ra_Q_A_Prob": 0.015820167160964047,
"ra_Q_A_Prob_normalised": 0.41041366764582365,
"ra_Q_A_ROUGE": 0.777,
"ra_Truth_Ratio": 0.5271842454552385,
"retain_Q_A_PARA_Prob": 0.08715349238598719,
"retain_Q_A_PERT_Prob": 0.03806647144458753,
"retain_Q_A_Prob": 0.8727010402083397,
"retain_Q_A_ROUGE": 0.7914930746967861,
"retain_Truth_Ratio": 0.5175890218892292,
"step": 6,
"wf_Q_A_PERT_Prob": 0.001985204046405395,
"wf_Q_A_Prob": 0.00539462376765188,
"wf_Q_A_Prob_normalised": 0.4370356662995382,
"wf_Q_A_ROUGE": 0.827920227920228,
"wf_Truth_Ratio": 0.6231397564594454
},
{
"epoch": 1.6,
"grad_norm": 8.25,
"learning_rate": 9.25925925925926e-06,
"loss": -0.0338,
"step": 10
},
{
"epoch": 1.92,
"forget_Q_A_PARA_Prob": 0.07399103340110741,
"forget_Q_A_PERT_Prob": 0.029306427133338725,
"forget_Q_A_Prob": 0.7740213040821254,
"forget_Q_A_ROUGE": 0.6595271131334257,
"forget_truth_ratio": 0.46773561050100876,
"model_utility": 0.5893980977084695,
"ra_Q_A_PERT_Prob": 0.004266789740991801,
"ra_Q_A_Prob": 0.010450768835853523,
"ra_Q_A_Prob_normalised": 0.4032027476780875,
"ra_Q_A_ROUGE": 0.75,
"ra_Truth_Ratio": 0.5291234473401906,
"retain_Q_A_PARA_Prob": 0.0634593945366214,
"retain_Q_A_PERT_Prob": 0.026901972586099675,
"retain_Q_A_Prob": 0.8664361499994994,
"retain_Q_A_ROUGE": 0.7475174987799494,
"retain_Truth_Ratio": 0.5218488540542622,
"step": 12,
"wf_Q_A_PERT_Prob": 0.0011738253327534733,
"wf_Q_A_Prob": 0.0032587294159555527,
"wf_Q_A_Prob_normalised": 0.4264818107064885,
"wf_Q_A_ROUGE": 0.8364672364672365,
"wf_Truth_Ratio": 0.6180570007754097
},
{
"epoch": 2.4,
"grad_norm": 28.625,
"learning_rate": 8.333333333333334e-06,
"loss": -0.1547,
"step": 15
},
{
"epoch": 2.88,
"forget_Q_A_PARA_Prob": 0.029494350103268518,
"forget_Q_A_PERT_Prob": 0.010788252533970309,
"forget_Q_A_Prob": 0.42890838033519685,
"forget_Q_A_ROUGE": 0.47554562025573865,
"forget_truth_ratio": 0.4270363127789192,
"model_utility": 0.4938221340759587,
"ra_Q_A_PERT_Prob": 0.002862240351208592,
"ra_Q_A_Prob": 0.005310191345149633,
"ra_Q_A_Prob_normalised": 0.35948261719370866,
"ra_Q_A_ROUGE": 0.5386666666666666,
"ra_Truth_Ratio": 0.48806928121618726,
"retain_Q_A_PARA_Prob": 0.023005029767518864,
"retain_Q_A_PERT_Prob": 0.009349260415194934,
"retain_Q_A_Prob": 0.5418562651053072,
"retain_Q_A_ROUGE": 0.48791049224399674,
"retain_Truth_Ratio": 0.535666342753256,
"step": 18,
"wf_Q_A_PERT_Prob": 0.0006206208060901188,
"wf_Q_A_Prob": 0.0016164754898864997,
"wf_Q_A_Prob_normalised": 0.3695440150531716,
"wf_Q_A_ROUGE": 0.8015669515669516,
"wf_Truth_Ratio": 0.5486688898270423
},
{
"epoch": 3.2,
"grad_norm": 64.0,
"learning_rate": 7.4074074074074075e-06,
"loss": -0.3682,
"step": 20
},
{
"epoch": 4.0,
"grad_norm": 76.0,
"learning_rate": 6.481481481481482e-06,
"loss": -0.6335,
"step": 25
},
{
"epoch": 4.0,
"forget_Q_A_PARA_Prob": 0.022825238737423207,
"forget_Q_A_PERT_Prob": 0.008707179363070963,
"forget_Q_A_Prob": 0.32061772370303515,
"forget_Q_A_ROUGE": 0.4246722074970103,
"forget_truth_ratio": 0.4505302561799914,
"model_utility": 0.4981323727560218,
"ra_Q_A_PERT_Prob": 0.0018825544706558498,
"ra_Q_A_Prob": 0.0035706917219795286,
"ra_Q_A_Prob_normalised": 0.3627041594788875,
"ra_Q_A_ROUGE": 0.542,
"ra_Truth_Ratio": 0.5090475941918271,
"retain_Q_A_PARA_Prob": 0.019999133674391486,
"retain_Q_A_PERT_Prob": 0.008501721144224763,
"retain_Q_A_Prob": 0.5289758392423392,
"retain_Q_A_ROUGE": 0.45165674425660607,
"retain_Truth_Ratio": 0.525029996959407,
"step": 25,
"wf_Q_A_PERT_Prob": 0.00039124924958522844,
"wf_Q_A_Prob": 0.0010230127496096808,
"wf_Q_A_Prob_normalised": 0.3892502383463082,
"wf_Q_A_ROUGE": 0.8272079772079772,
"wf_Truth_Ratio": 0.5834813665954073
},
{
"epoch": 4.8,
"grad_norm": 95.5,
"learning_rate": 5.555555555555557e-06,
"loss": -1.0036,
"step": 30
},
{
"epoch": 4.96,
"forget_Q_A_PARA_Prob": 0.016297213927791744,
"forget_Q_A_PERT_Prob": 0.006429708399797391,
"forget_Q_A_Prob": 0.21623114350717515,
"forget_Q_A_ROUGE": 0.40352585788557677,
"forget_truth_ratio": 0.4535519157830674,
"model_utility": 0.4689847440154844,
"ra_Q_A_PERT_Prob": 0.0016007507892272226,
"ra_Q_A_Prob": 0.0027378251486902625,
"ra_Q_A_Prob_normalised": 0.34992650866051084,
"ra_Q_A_ROUGE": 0.4903333333333333,
"ra_Truth_Ratio": 0.49958388756148436,
"retain_Q_A_PARA_Prob": 0.014509558475438097,
"retain_Q_A_PERT_Prob": 0.0062653698918112245,
"retain_Q_A_Prob": 0.42842917014844717,
"retain_Q_A_ROUGE": 0.4159923793722353,
"retain_Truth_Ratio": 0.5252854380777774,
"step": 31,
"wf_Q_A_PERT_Prob": 0.00032658670132012314,
"wf_Q_A_Prob": 0.0008091054565828816,
"wf_Q_A_Prob_normalised": 0.38135258950349965,
"wf_Q_A_ROUGE": 0.79017094017094,
"wf_Truth_Ratio": 0.5675263659637196
},
{
"epoch": 5.6,
"grad_norm": 102.5,
"learning_rate": 4.62962962962963e-06,
"loss": -1.2174,
"step": 35
},
{
"epoch": 5.92,
"forget_Q_A_PARA_Prob": 0.013320248301934044,
"forget_Q_A_PERT_Prob": 0.005359866307460322,
"forget_Q_A_Prob": 0.1701333471563703,
"forget_Q_A_ROUGE": 0.3893579632252991,
"forget_truth_ratio": 0.45182502190223545,
"model_utility": 0.4605119280905741,
"ra_Q_A_PERT_Prob": 0.0014502067912881985,
"ra_Q_A_Prob": 0.002283854310616391,
"ra_Q_A_Prob_normalised": 0.3426354827622861,
"ra_Q_A_ROUGE": 0.5186666666666667,
"ra_Truth_Ratio": 0.49187928673425846,
"retain_Q_A_PARA_Prob": 0.012149485763020494,
"retain_Q_A_PERT_Prob": 0.005184423866649652,
"retain_Q_A_Prob": 0.3953410206688568,
"retain_Q_A_ROUGE": 0.40594121669274513,
"retain_Truth_Ratio": 0.5262515998417391,
"step": 37,
"wf_Q_A_PERT_Prob": 0.00028506373011880564,
"wf_Q_A_Prob": 0.0006574596959592552,
"wf_Q_A_Prob_normalised": 0.37066396527061546,
"wf_Q_A_ROUGE": 0.7873219373219373,
"wf_Truth_Ratio": 0.5546987766336627
},
{
"epoch": 6.4,
"grad_norm": 110.0,
"learning_rate": 3.7037037037037037e-06,
"loss": -1.4013,
"step": 40
},
{
"epoch": 6.88,
"forget_Q_A_PARA_Prob": 0.012247884544667614,
"forget_Q_A_PERT_Prob": 0.005036943114251244,
"forget_Q_A_Prob": 0.15036444320954614,
"forget_Q_A_ROUGE": 0.3864768855204044,
"forget_truth_ratio": 0.45294569247206007,
"model_utility": 0.45632718347300794,
"ra_Q_A_PERT_Prob": 0.0013401418308045029,
"ra_Q_A_Prob": 0.0021134812324058315,
"ra_Q_A_Prob_normalised": 0.3396657603929745,
"ra_Q_A_ROUGE": 0.48066666666666663,
"ra_Truth_Ratio": 0.486192937273368,
"retain_Q_A_PARA_Prob": 0.011550714041468381,
"retain_Q_A_PERT_Prob": 0.004946324555627044,
"retain_Q_A_Prob": 0.3944013632973656,
"retain_Q_A_ROUGE": 0.4117662006946269,
"retain_Truth_Ratio": 0.528704070223116,
"step": 43,
"wf_Q_A_PERT_Prob": 0.0002627398549638734,
"wf_Q_A_Prob": 0.0006088191181077826,
"wf_Q_A_Prob_normalised": 0.3683084379921478,
"wf_Q_A_ROUGE": 0.7830484330484331,
"wf_Truth_Ratio": 0.5574956770152373
},
{
"epoch": 7.2,
"grad_norm": 105.5,
"learning_rate": 2.7777777777777783e-06,
"loss": -1.6715,
"step": 45
},
{
"epoch": 8.0,
"grad_norm": 105.5,
"learning_rate": 1.8518518518518519e-06,
"loss": -1.6866,
"step": 50
},
{
"epoch": 8.0,
"forget_Q_A_PARA_Prob": 0.01165698835206058,
"forget_Q_A_PERT_Prob": 0.004784465665314599,
"forget_Q_A_Prob": 0.1409262517654861,
"forget_Q_A_ROUGE": 0.3853420125934032,
"forget_truth_ratio": 0.45286755399160056,
"model_utility": 0.45785485616455157,
"ra_Q_A_PERT_Prob": 0.0012624921512297978,
"ra_Q_A_Prob": 0.0019572545638450833,
"ra_Q_A_Prob_normalised": 0.33708947240715936,
"ra_Q_A_ROUGE": 0.5136666666666667,
"ra_Truth_Ratio": 0.4830684580107244,
"retain_Q_A_PARA_Prob": 0.011154631597355547,
"retain_Q_A_PERT_Prob": 0.00474388190690015,
"retain_Q_A_Prob": 0.39457711114082483,
"retain_Q_A_ROUGE": 0.40390020891646317,
"retain_Truth_Ratio": 0.530181033678879,
"step": 50,
"wf_Q_A_PERT_Prob": 0.00024781451680188493,
"wf_Q_A_Prob": 0.000590465893023208,
"wf_Q_A_Prob_normalised": 0.36882322011476676,
"wf_Q_A_ROUGE": 0.7915954415954417,
"wf_Truth_Ratio": 0.5547968676582311
},
{
"epoch": 8.8,
"grad_norm": 111.0,
"learning_rate": 9.259259259259259e-07,
"loss": -1.8808,
"step": 55
},
{
"epoch": 8.96,
"forget_Q_A_PARA_Prob": 0.011548278887548804,
"forget_Q_A_PERT_Prob": 0.004766597765372808,
"forget_Q_A_Prob": 0.13767883754961077,
"forget_Q_A_ROUGE": 0.38167919519324256,
"forget_truth_ratio": 0.4518345413593613,
"model_utility": 0.4572918713236023,
"ra_Q_A_PERT_Prob": 0.0012244680082650562,
"ra_Q_A_Prob": 0.001937580604701452,
"ra_Q_A_Prob_normalised": 0.33616675247851513,
"ra_Q_A_ROUGE": 0.5136666666666667,
"ra_Truth_Ratio": 0.4828442270725617,
"retain_Q_A_PARA_Prob": 0.011029426595423502,
"retain_Q_A_PERT_Prob": 0.0047029192964760744,
"retain_Q_A_Prob": 0.39323330818675456,
"retain_Q_A_ROUGE": 0.4034188604618481,
"retain_Truth_Ratio": 0.5289516408948064,
"step": 56,
"wf_Q_A_PERT_Prob": 0.0002483615086286227,
"wf_Q_A_Prob": 0.0005842194486731583,
"wf_Q_A_Prob_normalised": 0.368697058136704,
"wf_Q_A_ROUGE": 0.7915954415954417,
"wf_Truth_Ratio": 0.55536081743841
},
{
"epoch": 9.6,
"grad_norm": 112.5,
"learning_rate": 0.0,
"loss": -1.8657,
"step": 60
},
{
"epoch": 9.6,
"forget_Q_A_PARA_Prob": 0.011589295231115102,
"forget_Q_A_PERT_Prob": 0.004739246443255553,
"forget_Q_A_Prob": 0.13900133552757324,
"forget_Q_A_ROUGE": 0.384309664120033,
"forget_truth_ratio": 0.452656394665064,
"model_utility": 0.4601335429232997,
"ra_Q_A_PERT_Prob": 0.0012687713540489507,
"ra_Q_A_Prob": 0.0019638981411819146,
"ra_Q_A_Prob_normalised": 0.3378397275960433,
"ra_Q_A_ROUGE": 0.5286666666666666,
"ra_Truth_Ratio": 0.48694460359142416,
"retain_Q_A_PARA_Prob": 0.011170326338497034,
"retain_Q_A_PERT_Prob": 0.004777183987491883,
"retain_Q_A_Prob": 0.3971325243171304,
"retain_Q_A_ROUGE": 0.40513202099256107,
"retain_Truth_Ratio": 0.5295923120496997,
"step": 60,
"wf_Q_A_PERT_Prob": 0.00024279495572724724,
"wf_Q_A_Prob": 0.0005832649986044549,
"wf_Q_A_Prob_normalised": 0.37083602701806173,
"wf_Q_A_ROUGE": 0.7745014245014246,
"wf_Truth_Ratio": 0.5580379667626183
},
{
"epoch": 9.6,
"step": 60,
"total_flos": 0.0,
"train_loss": -0.9947510037571192,
"train_runtime": 1021.2635,
"train_samples_per_second": 3.917,
"train_steps_per_second": 0.059
}
],
"logging_steps": 5,
"max_steps": 60,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": false,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}