| { | |
| "best_metric": 0.028283841907978058, | |
| "best_model_checkpoint": "./bert_sensitive_columns/checkpoint-1044", | |
| "epoch": 4.0, | |
| "eval_steps": 500, | |
| "global_step": 1044, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.038314176245210725, | |
| "grad_norm": 6.37937593460083, | |
| "learning_rate": 9.923371647509579e-06, | |
| "loss": 0.7375, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.07662835249042145, | |
| "grad_norm": 4.292376518249512, | |
| "learning_rate": 9.846743295019157e-06, | |
| "loss": 0.6499, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.11494252873563218, | |
| "grad_norm": 6.317875385284424, | |
| "learning_rate": 9.770114942528738e-06, | |
| "loss": 0.6161, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.1532567049808429, | |
| "grad_norm": 7.314127445220947, | |
| "learning_rate": 9.693486590038314e-06, | |
| "loss": 0.5928, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.19157088122605365, | |
| "grad_norm": 9.533055305480957, | |
| "learning_rate": 9.616858237547894e-06, | |
| "loss": 0.5842, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.22988505747126436, | |
| "grad_norm": 10.419981956481934, | |
| "learning_rate": 9.54022988505747e-06, | |
| "loss": 0.539, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.2681992337164751, | |
| "grad_norm": 12.050772666931152, | |
| "learning_rate": 9.46360153256705e-06, | |
| "loss": 0.486, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.3065134099616858, | |
| "grad_norm": 6.0962958335876465, | |
| "learning_rate": 9.386973180076629e-06, | |
| "loss": 0.4488, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.3448275862068966, | |
| "grad_norm": 9.836843490600586, | |
| "learning_rate": 9.310344827586207e-06, | |
| "loss": 0.4442, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.3831417624521073, | |
| "grad_norm": 10.53395938873291, | |
| "learning_rate": 9.233716475095786e-06, | |
| "loss": 0.389, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.421455938697318, | |
| "grad_norm": 12.318860054016113, | |
| "learning_rate": 9.157088122605364e-06, | |
| "loss": 0.3499, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.45977011494252873, | |
| "grad_norm": 15.045988082885742, | |
| "learning_rate": 9.080459770114942e-06, | |
| "loss": 0.3011, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.49808429118773945, | |
| "grad_norm": 11.415493965148926, | |
| "learning_rate": 9.003831417624522e-06, | |
| "loss": 0.3427, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.5363984674329502, | |
| "grad_norm": 12.154045104980469, | |
| "learning_rate": 8.9272030651341e-06, | |
| "loss": 0.2912, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.5747126436781609, | |
| "grad_norm": 12.373332977294922, | |
| "learning_rate": 8.85057471264368e-06, | |
| "loss": 0.291, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.6130268199233716, | |
| "grad_norm": 11.356131553649902, | |
| "learning_rate": 8.773946360153257e-06, | |
| "loss": 0.3067, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.6513409961685823, | |
| "grad_norm": 14.210739135742188, | |
| "learning_rate": 8.697318007662836e-06, | |
| "loss": 0.1868, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.6896551724137931, | |
| "grad_norm": 14.556158065795898, | |
| "learning_rate": 8.620689655172414e-06, | |
| "loss": 0.1947, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.7279693486590039, | |
| "grad_norm": 6.439550399780273, | |
| "learning_rate": 8.544061302681992e-06, | |
| "loss": 0.3156, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.7662835249042146, | |
| "grad_norm": 17.6083984375, | |
| "learning_rate": 8.467432950191573e-06, | |
| "loss": 0.2002, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.8045977011494253, | |
| "grad_norm": 15.961342811584473, | |
| "learning_rate": 8.390804597701149e-06, | |
| "loss": 0.13, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.842911877394636, | |
| "grad_norm": 31.289339065551758, | |
| "learning_rate": 8.31417624521073e-06, | |
| "loss": 0.2278, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.8812260536398467, | |
| "grad_norm": 15.521924018859863, | |
| "learning_rate": 8.237547892720307e-06, | |
| "loss": 0.2059, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.9195402298850575, | |
| "grad_norm": 15.326404571533203, | |
| "learning_rate": 8.160919540229886e-06, | |
| "loss": 0.2234, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.9578544061302682, | |
| "grad_norm": 19.96432876586914, | |
| "learning_rate": 8.084291187739464e-06, | |
| "loss": 0.1941, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.9961685823754789, | |
| "grad_norm": 18.93885040283203, | |
| "learning_rate": 8.007662835249042e-06, | |
| "loss": 0.201, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_loss": 0.11205583065748215, | |
| "eval_runtime": 1.2974, | |
| "eval_samples_per_second": 803.139, | |
| "eval_steps_per_second": 50.871, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 1.0344827586206897, | |
| "grad_norm": 9.7966947555542, | |
| "learning_rate": 7.93103448275862e-06, | |
| "loss": 0.1023, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 1.0727969348659003, | |
| "grad_norm": 19.185115814208984, | |
| "learning_rate": 7.854406130268199e-06, | |
| "loss": 0.1514, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 1.1111111111111112, | |
| "grad_norm": 15.477436065673828, | |
| "learning_rate": 7.77777777777778e-06, | |
| "loss": 0.1278, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 1.1494252873563218, | |
| "grad_norm": 1.358453392982483, | |
| "learning_rate": 7.701149425287356e-06, | |
| "loss": 0.1164, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.1877394636015326, | |
| "grad_norm": 19.57685661315918, | |
| "learning_rate": 7.624521072796936e-06, | |
| "loss": 0.1216, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 1.2260536398467432, | |
| "grad_norm": 8.200814247131348, | |
| "learning_rate": 7.547892720306514e-06, | |
| "loss": 0.1565, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 1.264367816091954, | |
| "grad_norm": 2.0913987159729004, | |
| "learning_rate": 7.4712643678160925e-06, | |
| "loss": 0.1394, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 1.3026819923371646, | |
| "grad_norm": 10.067784309387207, | |
| "learning_rate": 7.394636015325672e-06, | |
| "loss": 0.133, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 1.3409961685823755, | |
| "grad_norm": 27.741924285888672, | |
| "learning_rate": 7.318007662835249e-06, | |
| "loss": 0.1483, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 1.3793103448275863, | |
| "grad_norm": 56.61354064941406, | |
| "learning_rate": 7.241379310344828e-06, | |
| "loss": 0.1156, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 1.417624521072797, | |
| "grad_norm": 19.167062759399414, | |
| "learning_rate": 7.1647509578544075e-06, | |
| "loss": 0.1101, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 1.4559386973180077, | |
| "grad_norm": 24.54031753540039, | |
| "learning_rate": 7.088122605363985e-06, | |
| "loss": 0.0848, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 1.4942528735632183, | |
| "grad_norm": 0.2951218783855438, | |
| "learning_rate": 7.011494252873564e-06, | |
| "loss": 0.062, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 1.5325670498084292, | |
| "grad_norm": 0.5242842435836792, | |
| "learning_rate": 6.934865900383142e-06, | |
| "loss": 0.1586, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.5708812260536398, | |
| "grad_norm": 1.8837841749191284, | |
| "learning_rate": 6.858237547892721e-06, | |
| "loss": 0.0764, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 1.6091954022988506, | |
| "grad_norm": 8.144768714904785, | |
| "learning_rate": 6.781609195402299e-06, | |
| "loss": 0.1034, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 1.6475095785440614, | |
| "grad_norm": 25.173917770385742, | |
| "learning_rate": 6.7049808429118775e-06, | |
| "loss": 0.1133, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 1.685823754789272, | |
| "grad_norm": 36.27584457397461, | |
| "learning_rate": 6.628352490421457e-06, | |
| "loss": 0.1259, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 1.7241379310344827, | |
| "grad_norm": 44.28847122192383, | |
| "learning_rate": 6.551724137931035e-06, | |
| "loss": 0.0782, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 1.7624521072796935, | |
| "grad_norm": 30.58072280883789, | |
| "learning_rate": 6.475095785440614e-06, | |
| "loss": 0.1196, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 1.8007662835249043, | |
| "grad_norm": 1.082352876663208, | |
| "learning_rate": 6.398467432950192e-06, | |
| "loss": 0.1099, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 1.839080459770115, | |
| "grad_norm": 0.34228336811065674, | |
| "learning_rate": 6.321839080459771e-06, | |
| "loss": 0.0765, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.8773946360153255, | |
| "grad_norm": 53.12428283691406, | |
| "learning_rate": 6.24521072796935e-06, | |
| "loss": 0.1151, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 1.9157088122605364, | |
| "grad_norm": 0.43705159425735474, | |
| "learning_rate": 6.1685823754789275e-06, | |
| "loss": 0.0485, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.9540229885057472, | |
| "grad_norm": 1.2276843786239624, | |
| "learning_rate": 6.091954022988507e-06, | |
| "loss": 0.0962, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 1.9923371647509578, | |
| "grad_norm": 13.329333305358887, | |
| "learning_rate": 6.015325670498084e-06, | |
| "loss": 0.0608, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_loss": 0.06104712933301926, | |
| "eval_runtime": 1.1775, | |
| "eval_samples_per_second": 884.96, | |
| "eval_steps_per_second": 56.053, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 2.0306513409961684, | |
| "grad_norm": 1.5714704990386963, | |
| "learning_rate": 5.938697318007663e-06, | |
| "loss": 0.0464, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 2.0689655172413794, | |
| "grad_norm": 10.750398635864258, | |
| "learning_rate": 5.862068965517242e-06, | |
| "loss": 0.0518, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 2.10727969348659, | |
| "grad_norm": 26.733074188232422, | |
| "learning_rate": 5.78544061302682e-06, | |
| "loss": 0.1253, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 2.1455938697318007, | |
| "grad_norm": 7.556675434112549, | |
| "learning_rate": 5.708812260536399e-06, | |
| "loss": 0.0313, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 2.1839080459770113, | |
| "grad_norm": 24.295198440551758, | |
| "learning_rate": 5.6321839080459775e-06, | |
| "loss": 0.0909, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 2.2222222222222223, | |
| "grad_norm": 19.714115142822266, | |
| "learning_rate": 5.555555555555557e-06, | |
| "loss": 0.0387, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 2.260536398467433, | |
| "grad_norm": 0.8200851082801819, | |
| "learning_rate": 5.478927203065134e-06, | |
| "loss": 0.0455, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 2.2988505747126435, | |
| "grad_norm": 0.183212548494339, | |
| "learning_rate": 5.402298850574713e-06, | |
| "loss": 0.0327, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 2.3371647509578546, | |
| "grad_norm": 61.08492660522461, | |
| "learning_rate": 5.3256704980842925e-06, | |
| "loss": 0.0621, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 2.375478927203065, | |
| "grad_norm": 0.16310882568359375, | |
| "learning_rate": 5.24904214559387e-06, | |
| "loss": 0.0545, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 2.413793103448276, | |
| "grad_norm": 39.58172607421875, | |
| "learning_rate": 5.172413793103449e-06, | |
| "loss": 0.0539, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 2.4521072796934864, | |
| "grad_norm": 23.265289306640625, | |
| "learning_rate": 5.095785440613027e-06, | |
| "loss": 0.1148, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 2.4904214559386975, | |
| "grad_norm": 0.07729102671146393, | |
| "learning_rate": 5.019157088122606e-06, | |
| "loss": 0.0281, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 2.528735632183908, | |
| "grad_norm": 15.417061805725098, | |
| "learning_rate": 4.942528735632184e-06, | |
| "loss": 0.0086, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 2.5670498084291187, | |
| "grad_norm": 0.08401647210121155, | |
| "learning_rate": 4.8659003831417625e-06, | |
| "loss": 0.0334, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 2.6053639846743293, | |
| "grad_norm": 2.524700164794922, | |
| "learning_rate": 4.789272030651342e-06, | |
| "loss": 0.0206, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 2.6436781609195403, | |
| "grad_norm": 0.24694575369358063, | |
| "learning_rate": 4.71264367816092e-06, | |
| "loss": 0.0038, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 2.681992337164751, | |
| "grad_norm": 0.28223150968551636, | |
| "learning_rate": 4.636015325670498e-06, | |
| "loss": 0.0411, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 2.7203065134099615, | |
| "grad_norm": 0.08240451663732529, | |
| "learning_rate": 4.5593869731800775e-06, | |
| "loss": 0.0744, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 2.7586206896551726, | |
| "grad_norm": 10.965692520141602, | |
| "learning_rate": 4.482758620689656e-06, | |
| "loss": 0.0412, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 2.796934865900383, | |
| "grad_norm": 0.4931705892086029, | |
| "learning_rate": 4.406130268199234e-06, | |
| "loss": 0.014, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 2.835249042145594, | |
| "grad_norm": 0.066756471991539, | |
| "learning_rate": 4.3295019157088125e-06, | |
| "loss": 0.0271, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 2.873563218390805, | |
| "grad_norm": 0.06784966588020325, | |
| "learning_rate": 4.252873563218391e-06, | |
| "loss": 0.0026, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 2.9118773946360155, | |
| "grad_norm": 0.06324368715286255, | |
| "learning_rate": 4.17624521072797e-06, | |
| "loss": 0.0961, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 2.950191570881226, | |
| "grad_norm": 28.81197166442871, | |
| "learning_rate": 4.099616858237548e-06, | |
| "loss": 0.0472, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 2.9885057471264367, | |
| "grad_norm": 0.05656365305185318, | |
| "learning_rate": 4.022988505747127e-06, | |
| "loss": 0.0078, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_loss": 0.028292344883084297, | |
| "eval_runtime": 1.1733, | |
| "eval_samples_per_second": 888.059, | |
| "eval_steps_per_second": 56.249, | |
| "step": 783 | |
| }, | |
| { | |
| "epoch": 3.0268199233716473, | |
| "grad_norm": 42.40280532836914, | |
| "learning_rate": 3.946360153256705e-06, | |
| "loss": 0.0335, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 3.0651340996168583, | |
| "grad_norm": 0.05205749720335007, | |
| "learning_rate": 3.869731800766283e-06, | |
| "loss": 0.0405, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 3.103448275862069, | |
| "grad_norm": 0.09792916476726532, | |
| "learning_rate": 3.793103448275862e-06, | |
| "loss": 0.0278, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 3.1417624521072796, | |
| "grad_norm": 0.05216526985168457, | |
| "learning_rate": 3.7164750957854412e-06, | |
| "loss": 0.0041, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 3.1800766283524906, | |
| "grad_norm": 0.1886385679244995, | |
| "learning_rate": 3.6398467432950196e-06, | |
| "loss": 0.0354, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 3.218390804597701, | |
| "grad_norm": 0.0512104369699955, | |
| "learning_rate": 3.563218390804598e-06, | |
| "loss": 0.0609, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 3.256704980842912, | |
| "grad_norm": 0.34635305404663086, | |
| "learning_rate": 3.4865900383141767e-06, | |
| "loss": 0.0506, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 3.2950191570881224, | |
| "grad_norm": 11.31212329864502, | |
| "learning_rate": 3.409961685823755e-06, | |
| "loss": 0.0429, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 3.3333333333333335, | |
| "grad_norm": 0.20496389269828796, | |
| "learning_rate": 3.3333333333333333e-06, | |
| "loss": 0.041, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 3.371647509578544, | |
| "grad_norm": 9.11811351776123, | |
| "learning_rate": 3.256704980842912e-06, | |
| "loss": 0.003, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 3.4099616858237547, | |
| "grad_norm": 39.507572174072266, | |
| "learning_rate": 3.180076628352491e-06, | |
| "loss": 0.1216, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 3.4482758620689653, | |
| "grad_norm": 0.24006924033164978, | |
| "learning_rate": 3.103448275862069e-06, | |
| "loss": 0.0022, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 3.4865900383141764, | |
| "grad_norm": 0.1950913518667221, | |
| "learning_rate": 3.026819923371648e-06, | |
| "loss": 0.0233, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 3.524904214559387, | |
| "grad_norm": 0.059875085949897766, | |
| "learning_rate": 2.9501915708812262e-06, | |
| "loss": 0.0182, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 3.5632183908045976, | |
| "grad_norm": 65.01390838623047, | |
| "learning_rate": 2.8735632183908046e-06, | |
| "loss": 0.0522, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 3.6015325670498086, | |
| "grad_norm": 0.30775925517082214, | |
| "learning_rate": 2.796934865900383e-06, | |
| "loss": 0.0019, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 3.6398467432950192, | |
| "grad_norm": 6.678956985473633, | |
| "learning_rate": 2.720306513409962e-06, | |
| "loss": 0.0395, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 3.67816091954023, | |
| "grad_norm": 0.08484911918640137, | |
| "learning_rate": 2.6436781609195404e-06, | |
| "loss": 0.0071, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 3.716475095785441, | |
| "grad_norm": 0.0435795895755291, | |
| "learning_rate": 2.567049808429119e-06, | |
| "loss": 0.01, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 3.7547892720306515, | |
| "grad_norm": 62.425113677978516, | |
| "learning_rate": 2.4904214559386975e-06, | |
| "loss": 0.0811, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 3.793103448275862, | |
| "grad_norm": 0.05430278554558754, | |
| "learning_rate": 2.4137931034482762e-06, | |
| "loss": 0.0021, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 3.8314176245210727, | |
| "grad_norm": 0.05314672365784645, | |
| "learning_rate": 2.3371647509578546e-06, | |
| "loss": 0.0015, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 3.8697318007662833, | |
| "grad_norm": 0.04625044763088226, | |
| "learning_rate": 2.260536398467433e-06, | |
| "loss": 0.0287, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 3.9080459770114944, | |
| "grad_norm": 0.07921384274959564, | |
| "learning_rate": 2.1839080459770117e-06, | |
| "loss": 0.0402, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 3.946360153256705, | |
| "grad_norm": 0.07240170985460281, | |
| "learning_rate": 2.1072796934865904e-06, | |
| "loss": 0.03, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 3.9846743295019156, | |
| "grad_norm": 0.04859640449285507, | |
| "learning_rate": 2.0306513409961687e-06, | |
| "loss": 0.0063, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_loss": 0.028283841907978058, | |
| "eval_runtime": 1.1847, | |
| "eval_samples_per_second": 879.559, | |
| "eval_steps_per_second": 55.711, | |
| "step": 1044 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 1305, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 111269254200720.0, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |