{ "best_metric": 0.028283841907978058, "best_model_checkpoint": "./bert_sensitive_columns/checkpoint-1044", "epoch": 4.0, "eval_steps": 500, "global_step": 1044, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.038314176245210725, "grad_norm": 6.37937593460083, "learning_rate": 9.923371647509579e-06, "loss": 0.7375, "step": 10 }, { "epoch": 0.07662835249042145, "grad_norm": 4.292376518249512, "learning_rate": 9.846743295019157e-06, "loss": 0.6499, "step": 20 }, { "epoch": 0.11494252873563218, "grad_norm": 6.317875385284424, "learning_rate": 9.770114942528738e-06, "loss": 0.6161, "step": 30 }, { "epoch": 0.1532567049808429, "grad_norm": 7.314127445220947, "learning_rate": 9.693486590038314e-06, "loss": 0.5928, "step": 40 }, { "epoch": 0.19157088122605365, "grad_norm": 9.533055305480957, "learning_rate": 9.616858237547894e-06, "loss": 0.5842, "step": 50 }, { "epoch": 0.22988505747126436, "grad_norm": 10.419981956481934, "learning_rate": 9.54022988505747e-06, "loss": 0.539, "step": 60 }, { "epoch": 0.2681992337164751, "grad_norm": 12.050772666931152, "learning_rate": 9.46360153256705e-06, "loss": 0.486, "step": 70 }, { "epoch": 0.3065134099616858, "grad_norm": 6.0962958335876465, "learning_rate": 9.386973180076629e-06, "loss": 0.4488, "step": 80 }, { "epoch": 0.3448275862068966, "grad_norm": 9.836843490600586, "learning_rate": 9.310344827586207e-06, "loss": 0.4442, "step": 90 }, { "epoch": 0.3831417624521073, "grad_norm": 10.53395938873291, "learning_rate": 9.233716475095786e-06, "loss": 0.389, "step": 100 }, { "epoch": 0.421455938697318, "grad_norm": 12.318860054016113, "learning_rate": 9.157088122605364e-06, "loss": 0.3499, "step": 110 }, { "epoch": 0.45977011494252873, "grad_norm": 15.045988082885742, "learning_rate": 9.080459770114942e-06, "loss": 0.3011, "step": 120 }, { "epoch": 0.49808429118773945, "grad_norm": 11.415493965148926, "learning_rate": 9.003831417624522e-06, "loss": 0.3427, "step": 130 }, { "epoch": 0.5363984674329502, "grad_norm": 12.154045104980469, "learning_rate": 8.9272030651341e-06, "loss": 0.2912, "step": 140 }, { "epoch": 0.5747126436781609, "grad_norm": 12.373332977294922, "learning_rate": 8.85057471264368e-06, "loss": 0.291, "step": 150 }, { "epoch": 0.6130268199233716, "grad_norm": 11.356131553649902, "learning_rate": 8.773946360153257e-06, "loss": 0.3067, "step": 160 }, { "epoch": 0.6513409961685823, "grad_norm": 14.210739135742188, "learning_rate": 8.697318007662836e-06, "loss": 0.1868, "step": 170 }, { "epoch": 0.6896551724137931, "grad_norm": 14.556158065795898, "learning_rate": 8.620689655172414e-06, "loss": 0.1947, "step": 180 }, { "epoch": 0.7279693486590039, "grad_norm": 6.439550399780273, "learning_rate": 8.544061302681992e-06, "loss": 0.3156, "step": 190 }, { "epoch": 0.7662835249042146, "grad_norm": 17.6083984375, "learning_rate": 8.467432950191573e-06, "loss": 0.2002, "step": 200 }, { "epoch": 0.8045977011494253, "grad_norm": 15.961342811584473, "learning_rate": 8.390804597701149e-06, "loss": 0.13, "step": 210 }, { "epoch": 0.842911877394636, "grad_norm": 31.289339065551758, "learning_rate": 8.31417624521073e-06, "loss": 0.2278, "step": 220 }, { "epoch": 0.8812260536398467, "grad_norm": 15.521924018859863, "learning_rate": 8.237547892720307e-06, "loss": 0.2059, "step": 230 }, { "epoch": 0.9195402298850575, "grad_norm": 15.326404571533203, "learning_rate": 8.160919540229886e-06, "loss": 0.2234, "step": 240 }, { "epoch": 0.9578544061302682, "grad_norm": 19.96432876586914, "learning_rate": 8.084291187739464e-06, "loss": 0.1941, "step": 250 }, { "epoch": 0.9961685823754789, "grad_norm": 18.93885040283203, "learning_rate": 8.007662835249042e-06, "loss": 0.201, "step": 260 }, { "epoch": 1.0, "eval_loss": 0.11205583065748215, "eval_runtime": 1.2974, "eval_samples_per_second": 803.139, "eval_steps_per_second": 50.871, "step": 261 }, { "epoch": 1.0344827586206897, "grad_norm": 9.7966947555542, "learning_rate": 7.93103448275862e-06, "loss": 0.1023, "step": 270 }, { "epoch": 1.0727969348659003, "grad_norm": 19.185115814208984, "learning_rate": 7.854406130268199e-06, "loss": 0.1514, "step": 280 }, { "epoch": 1.1111111111111112, "grad_norm": 15.477436065673828, "learning_rate": 7.77777777777778e-06, "loss": 0.1278, "step": 290 }, { "epoch": 1.1494252873563218, "grad_norm": 1.358453392982483, "learning_rate": 7.701149425287356e-06, "loss": 0.1164, "step": 300 }, { "epoch": 1.1877394636015326, "grad_norm": 19.57685661315918, "learning_rate": 7.624521072796936e-06, "loss": 0.1216, "step": 310 }, { "epoch": 1.2260536398467432, "grad_norm": 8.200814247131348, "learning_rate": 7.547892720306514e-06, "loss": 0.1565, "step": 320 }, { "epoch": 1.264367816091954, "grad_norm": 2.0913987159729004, "learning_rate": 7.4712643678160925e-06, "loss": 0.1394, "step": 330 }, { "epoch": 1.3026819923371646, "grad_norm": 10.067784309387207, "learning_rate": 7.394636015325672e-06, "loss": 0.133, "step": 340 }, { "epoch": 1.3409961685823755, "grad_norm": 27.741924285888672, "learning_rate": 7.318007662835249e-06, "loss": 0.1483, "step": 350 }, { "epoch": 1.3793103448275863, "grad_norm": 56.61354064941406, "learning_rate": 7.241379310344828e-06, "loss": 0.1156, "step": 360 }, { "epoch": 1.417624521072797, "grad_norm": 19.167062759399414, "learning_rate": 7.1647509578544075e-06, "loss": 0.1101, "step": 370 }, { "epoch": 1.4559386973180077, "grad_norm": 24.54031753540039, "learning_rate": 7.088122605363985e-06, "loss": 0.0848, "step": 380 }, { "epoch": 1.4942528735632183, "grad_norm": 0.2951218783855438, "learning_rate": 7.011494252873564e-06, "loss": 0.062, "step": 390 }, { "epoch": 1.5325670498084292, "grad_norm": 0.5242842435836792, "learning_rate": 6.934865900383142e-06, "loss": 0.1586, "step": 400 }, { "epoch": 1.5708812260536398, "grad_norm": 1.8837841749191284, "learning_rate": 6.858237547892721e-06, "loss": 0.0764, "step": 410 }, { "epoch": 1.6091954022988506, "grad_norm": 8.144768714904785, "learning_rate": 6.781609195402299e-06, "loss": 0.1034, "step": 420 }, { "epoch": 1.6475095785440614, "grad_norm": 25.173917770385742, "learning_rate": 6.7049808429118775e-06, "loss": 0.1133, "step": 430 }, { "epoch": 1.685823754789272, "grad_norm": 36.27584457397461, "learning_rate": 6.628352490421457e-06, "loss": 0.1259, "step": 440 }, { "epoch": 1.7241379310344827, "grad_norm": 44.28847122192383, "learning_rate": 6.551724137931035e-06, "loss": 0.0782, "step": 450 }, { "epoch": 1.7624521072796935, "grad_norm": 30.58072280883789, "learning_rate": 6.475095785440614e-06, "loss": 0.1196, "step": 460 }, { "epoch": 1.8007662835249043, "grad_norm": 1.082352876663208, "learning_rate": 6.398467432950192e-06, "loss": 0.1099, "step": 470 }, { "epoch": 1.839080459770115, "grad_norm": 0.34228336811065674, "learning_rate": 6.321839080459771e-06, "loss": 0.0765, "step": 480 }, { "epoch": 1.8773946360153255, "grad_norm": 53.12428283691406, "learning_rate": 6.24521072796935e-06, "loss": 0.1151, "step": 490 }, { "epoch": 1.9157088122605364, "grad_norm": 0.43705159425735474, "learning_rate": 6.1685823754789275e-06, "loss": 0.0485, "step": 500 }, { "epoch": 1.9540229885057472, "grad_norm": 1.2276843786239624, "learning_rate": 6.091954022988507e-06, "loss": 0.0962, "step": 510 }, { "epoch": 1.9923371647509578, "grad_norm": 13.329333305358887, "learning_rate": 6.015325670498084e-06, "loss": 0.0608, "step": 520 }, { "epoch": 2.0, "eval_loss": 0.06104712933301926, "eval_runtime": 1.1775, "eval_samples_per_second": 884.96, "eval_steps_per_second": 56.053, "step": 522 }, { "epoch": 2.0306513409961684, "grad_norm": 1.5714704990386963, "learning_rate": 5.938697318007663e-06, "loss": 0.0464, "step": 530 }, { "epoch": 2.0689655172413794, "grad_norm": 10.750398635864258, "learning_rate": 5.862068965517242e-06, "loss": 0.0518, "step": 540 }, { "epoch": 2.10727969348659, "grad_norm": 26.733074188232422, "learning_rate": 5.78544061302682e-06, "loss": 0.1253, "step": 550 }, { "epoch": 2.1455938697318007, "grad_norm": 7.556675434112549, "learning_rate": 5.708812260536399e-06, "loss": 0.0313, "step": 560 }, { "epoch": 2.1839080459770113, "grad_norm": 24.295198440551758, "learning_rate": 5.6321839080459775e-06, "loss": 0.0909, "step": 570 }, { "epoch": 2.2222222222222223, "grad_norm": 19.714115142822266, "learning_rate": 5.555555555555557e-06, "loss": 0.0387, "step": 580 }, { "epoch": 2.260536398467433, "grad_norm": 0.8200851082801819, "learning_rate": 5.478927203065134e-06, "loss": 0.0455, "step": 590 }, { "epoch": 2.2988505747126435, "grad_norm": 0.183212548494339, "learning_rate": 5.402298850574713e-06, "loss": 0.0327, "step": 600 }, { "epoch": 2.3371647509578546, "grad_norm": 61.08492660522461, "learning_rate": 5.3256704980842925e-06, "loss": 0.0621, "step": 610 }, { "epoch": 2.375478927203065, "grad_norm": 0.16310882568359375, "learning_rate": 5.24904214559387e-06, "loss": 0.0545, "step": 620 }, { "epoch": 2.413793103448276, "grad_norm": 39.58172607421875, "learning_rate": 5.172413793103449e-06, "loss": 0.0539, "step": 630 }, { "epoch": 2.4521072796934864, "grad_norm": 23.265289306640625, "learning_rate": 5.095785440613027e-06, "loss": 0.1148, "step": 640 }, { "epoch": 2.4904214559386975, "grad_norm": 0.07729102671146393, "learning_rate": 5.019157088122606e-06, "loss": 0.0281, "step": 650 }, { "epoch": 2.528735632183908, "grad_norm": 15.417061805725098, "learning_rate": 4.942528735632184e-06, "loss": 0.0086, "step": 660 }, { "epoch": 2.5670498084291187, "grad_norm": 0.08401647210121155, "learning_rate": 4.8659003831417625e-06, "loss": 0.0334, "step": 670 }, { "epoch": 2.6053639846743293, "grad_norm": 2.524700164794922, "learning_rate": 4.789272030651342e-06, "loss": 0.0206, "step": 680 }, { "epoch": 2.6436781609195403, "grad_norm": 0.24694575369358063, "learning_rate": 4.71264367816092e-06, "loss": 0.0038, "step": 690 }, { "epoch": 2.681992337164751, "grad_norm": 0.28223150968551636, "learning_rate": 4.636015325670498e-06, "loss": 0.0411, "step": 700 }, { "epoch": 2.7203065134099615, "grad_norm": 0.08240451663732529, "learning_rate": 4.5593869731800775e-06, "loss": 0.0744, "step": 710 }, { "epoch": 2.7586206896551726, "grad_norm": 10.965692520141602, "learning_rate": 4.482758620689656e-06, "loss": 0.0412, "step": 720 }, { "epoch": 2.796934865900383, "grad_norm": 0.4931705892086029, "learning_rate": 4.406130268199234e-06, "loss": 0.014, "step": 730 }, { "epoch": 2.835249042145594, "grad_norm": 0.066756471991539, "learning_rate": 4.3295019157088125e-06, "loss": 0.0271, "step": 740 }, { "epoch": 2.873563218390805, "grad_norm": 0.06784966588020325, "learning_rate": 4.252873563218391e-06, "loss": 0.0026, "step": 750 }, { "epoch": 2.9118773946360155, "grad_norm": 0.06324368715286255, "learning_rate": 4.17624521072797e-06, "loss": 0.0961, "step": 760 }, { "epoch": 2.950191570881226, "grad_norm": 28.81197166442871, "learning_rate": 4.099616858237548e-06, "loss": 0.0472, "step": 770 }, { "epoch": 2.9885057471264367, "grad_norm": 0.05656365305185318, "learning_rate": 4.022988505747127e-06, "loss": 0.0078, "step": 780 }, { "epoch": 3.0, "eval_loss": 0.028292344883084297, "eval_runtime": 1.1733, "eval_samples_per_second": 888.059, "eval_steps_per_second": 56.249, "step": 783 }, { "epoch": 3.0268199233716473, "grad_norm": 42.40280532836914, "learning_rate": 3.946360153256705e-06, "loss": 0.0335, "step": 790 }, { "epoch": 3.0651340996168583, "grad_norm": 0.05205749720335007, "learning_rate": 3.869731800766283e-06, "loss": 0.0405, "step": 800 }, { "epoch": 3.103448275862069, "grad_norm": 0.09792916476726532, "learning_rate": 3.793103448275862e-06, "loss": 0.0278, "step": 810 }, { "epoch": 3.1417624521072796, "grad_norm": 0.05216526985168457, "learning_rate": 3.7164750957854412e-06, "loss": 0.0041, "step": 820 }, { "epoch": 3.1800766283524906, "grad_norm": 0.1886385679244995, "learning_rate": 3.6398467432950196e-06, "loss": 0.0354, "step": 830 }, { "epoch": 3.218390804597701, "grad_norm": 0.0512104369699955, "learning_rate": 3.563218390804598e-06, "loss": 0.0609, "step": 840 }, { "epoch": 3.256704980842912, "grad_norm": 0.34635305404663086, "learning_rate": 3.4865900383141767e-06, "loss": 0.0506, "step": 850 }, { "epoch": 3.2950191570881224, "grad_norm": 11.31212329864502, "learning_rate": 3.409961685823755e-06, "loss": 0.0429, "step": 860 }, { "epoch": 3.3333333333333335, "grad_norm": 0.20496389269828796, "learning_rate": 3.3333333333333333e-06, "loss": 0.041, "step": 870 }, { "epoch": 3.371647509578544, "grad_norm": 9.11811351776123, "learning_rate": 3.256704980842912e-06, "loss": 0.003, "step": 880 }, { "epoch": 3.4099616858237547, "grad_norm": 39.507572174072266, "learning_rate": 3.180076628352491e-06, "loss": 0.1216, "step": 890 }, { "epoch": 3.4482758620689653, "grad_norm": 0.24006924033164978, "learning_rate": 3.103448275862069e-06, "loss": 0.0022, "step": 900 }, { "epoch": 3.4865900383141764, "grad_norm": 0.1950913518667221, "learning_rate": 3.026819923371648e-06, "loss": 0.0233, "step": 910 }, { "epoch": 3.524904214559387, "grad_norm": 0.059875085949897766, "learning_rate": 2.9501915708812262e-06, "loss": 0.0182, "step": 920 }, { "epoch": 3.5632183908045976, "grad_norm": 65.01390838623047, "learning_rate": 2.8735632183908046e-06, "loss": 0.0522, "step": 930 }, { "epoch": 3.6015325670498086, "grad_norm": 0.30775925517082214, "learning_rate": 2.796934865900383e-06, "loss": 0.0019, "step": 940 }, { "epoch": 3.6398467432950192, "grad_norm": 6.678956985473633, "learning_rate": 2.720306513409962e-06, "loss": 0.0395, "step": 950 }, { "epoch": 3.67816091954023, "grad_norm": 0.08484911918640137, "learning_rate": 2.6436781609195404e-06, "loss": 0.0071, "step": 960 }, { "epoch": 3.716475095785441, "grad_norm": 0.0435795895755291, "learning_rate": 2.567049808429119e-06, "loss": 0.01, "step": 970 }, { "epoch": 3.7547892720306515, "grad_norm": 62.425113677978516, "learning_rate": 2.4904214559386975e-06, "loss": 0.0811, "step": 980 }, { "epoch": 3.793103448275862, "grad_norm": 0.05430278554558754, "learning_rate": 2.4137931034482762e-06, "loss": 0.0021, "step": 990 }, { "epoch": 3.8314176245210727, "grad_norm": 0.05314672365784645, "learning_rate": 2.3371647509578546e-06, "loss": 0.0015, "step": 1000 }, { "epoch": 3.8697318007662833, "grad_norm": 0.04625044763088226, "learning_rate": 2.260536398467433e-06, "loss": 0.0287, "step": 1010 }, { "epoch": 3.9080459770114944, "grad_norm": 0.07921384274959564, "learning_rate": 2.1839080459770117e-06, "loss": 0.0402, "step": 1020 }, { "epoch": 3.946360153256705, "grad_norm": 0.07240170985460281, "learning_rate": 2.1072796934865904e-06, "loss": 0.03, "step": 1030 }, { "epoch": 3.9846743295019156, "grad_norm": 0.04859640449285507, "learning_rate": 2.0306513409961687e-06, "loss": 0.0063, "step": 1040 }, { "epoch": 4.0, "eval_loss": 0.028283841907978058, "eval_runtime": 1.1847, "eval_samples_per_second": 879.559, "eval_steps_per_second": 55.711, "step": 1044 } ], "logging_steps": 10, "max_steps": 1305, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 111269254200720.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }