| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.997759522031367, | |
| "eval_steps": 500, | |
| "global_step": 3513, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0, | |
| "eval_loss": 2.1641297340393066, | |
| "eval_runtime": 2.1264, | |
| "eval_samples_per_second": 9.405, | |
| "eval_steps_per_second": 0.941, | |
| "step": 0 | |
| }, | |
| { | |
| "epoch": 0.0008535154166222128, | |
| "grad_norm": 4.247310161590576, | |
| "learning_rate": 5.681818181818182e-08, | |
| "loss": 1.0032, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.008535154166222128, | |
| "grad_norm": 4.285112380981445, | |
| "learning_rate": 5.681818181818182e-07, | |
| "loss": 0.9823, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.017070308332444255, | |
| "grad_norm": 3.0184619426727295, | |
| "learning_rate": 1.1363636363636364e-06, | |
| "loss": 0.9382, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.02560546249866638, | |
| "grad_norm": 2.0186476707458496, | |
| "learning_rate": 1.7045454545454546e-06, | |
| "loss": 0.8583, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.03414061666488851, | |
| "grad_norm": 1.175138235092163, | |
| "learning_rate": 2.2727272727272728e-06, | |
| "loss": 0.764, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.04267577083111064, | |
| "grad_norm": 0.9045423269271851, | |
| "learning_rate": 2.8409090909090916e-06, | |
| "loss": 0.714, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.05121092499733276, | |
| "grad_norm": 0.8305149078369141, | |
| "learning_rate": 3.409090909090909e-06, | |
| "loss": 0.6835, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.05974607916355489, | |
| "grad_norm": 0.7908885478973389, | |
| "learning_rate": 3.9772727272727275e-06, | |
| "loss": 0.6549, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.06828123332977702, | |
| "grad_norm": 0.8054904341697693, | |
| "learning_rate": 4.5454545454545455e-06, | |
| "loss": 0.6368, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.07681638749599914, | |
| "grad_norm": 0.7891983985900879, | |
| "learning_rate": 5.113636363636364e-06, | |
| "loss": 0.631, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.08535154166222128, | |
| "grad_norm": 0.7775433659553528, | |
| "learning_rate": 5.681818181818183e-06, | |
| "loss": 0.6186, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.0938866958284434, | |
| "grad_norm": 0.8340434432029724, | |
| "learning_rate": 6.25e-06, | |
| "loss": 0.6028, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.10242184999466553, | |
| "grad_norm": 0.7749195098876953, | |
| "learning_rate": 6.818181818181818e-06, | |
| "loss": 0.5965, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.11095700416088766, | |
| "grad_norm": 0.7949061393737793, | |
| "learning_rate": 7.386363636363637e-06, | |
| "loss": 0.5837, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.11949215832710978, | |
| "grad_norm": 0.8714171051979065, | |
| "learning_rate": 7.954545454545455e-06, | |
| "loss": 0.5865, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.1280273124933319, | |
| "grad_norm": 0.7890238761901855, | |
| "learning_rate": 8.522727272727273e-06, | |
| "loss": 0.5712, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.13656246665955404, | |
| "grad_norm": 0.8195155262947083, | |
| "learning_rate": 9.090909090909091e-06, | |
| "loss": 0.5774, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.14509762082577615, | |
| "grad_norm": 0.8219712972640991, | |
| "learning_rate": 9.65909090909091e-06, | |
| "loss": 0.5767, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.1536327749919983, | |
| "grad_norm": 0.8124852776527405, | |
| "learning_rate": 1.0227272727272729e-05, | |
| "loss": 0.5643, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.16216792915822043, | |
| "grad_norm": 0.8383850455284119, | |
| "learning_rate": 1.0795454545454547e-05, | |
| "loss": 0.5734, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.17070308332444256, | |
| "grad_norm": 0.7605228424072266, | |
| "learning_rate": 1.1363636363636366e-05, | |
| "loss": 0.5661, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.17923823749066467, | |
| "grad_norm": 0.8585752844810486, | |
| "learning_rate": 1.1931818181818183e-05, | |
| "loss": 0.5534, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.1877733916568868, | |
| "grad_norm": 0.8643070459365845, | |
| "learning_rate": 1.25e-05, | |
| "loss": 0.5549, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.19630854582310894, | |
| "grad_norm": 0.9826616644859314, | |
| "learning_rate": 1.306818181818182e-05, | |
| "loss": 0.5531, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.20484369998933105, | |
| "grad_norm": 0.8799106478691101, | |
| "learning_rate": 1.3636363636363637e-05, | |
| "loss": 0.5371, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.2133788541555532, | |
| "grad_norm": 0.757698118686676, | |
| "learning_rate": 1.4204545454545455e-05, | |
| "loss": 0.5459, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.22191400832177532, | |
| "grad_norm": 0.8704412579536438, | |
| "learning_rate": 1.4772727272727274e-05, | |
| "loss": 0.5369, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.23044916248799743, | |
| "grad_norm": 0.7941352725028992, | |
| "learning_rate": 1.5340909090909094e-05, | |
| "loss": 0.5359, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.23898431665421957, | |
| "grad_norm": 0.7990615367889404, | |
| "learning_rate": 1.590909090909091e-05, | |
| "loss": 0.5285, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.2475194708204417, | |
| "grad_norm": 0.7938647270202637, | |
| "learning_rate": 1.647727272727273e-05, | |
| "loss": 0.5391, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.2560546249866638, | |
| "grad_norm": 0.7677845358848572, | |
| "learning_rate": 1.7045454545454546e-05, | |
| "loss": 0.5195, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.26458977915288595, | |
| "grad_norm": 0.7977807521820068, | |
| "learning_rate": 1.7613636363636366e-05, | |
| "loss": 0.5246, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.2731249333191081, | |
| "grad_norm": 0.819622814655304, | |
| "learning_rate": 1.8181818181818182e-05, | |
| "loss": 0.5176, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.2816600874853302, | |
| "grad_norm": 0.8428648114204407, | |
| "learning_rate": 1.8750000000000002e-05, | |
| "loss": 0.513, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.2901952416515523, | |
| "grad_norm": 0.7542017102241516, | |
| "learning_rate": 1.931818181818182e-05, | |
| "loss": 0.5196, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.29873039581777444, | |
| "grad_norm": 0.8601102232933044, | |
| "learning_rate": 1.9886363636363638e-05, | |
| "loss": 0.5206, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.3072655499839966, | |
| "grad_norm": 0.7532691955566406, | |
| "learning_rate": 1.9999683918961086e-05, | |
| "loss": 0.5162, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.3158007041502187, | |
| "grad_norm": 0.7489638924598694, | |
| "learning_rate": 1.999839987398595e-05, | |
| "loss": 0.5122, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.32433585831644085, | |
| "grad_norm": 0.7671491503715515, | |
| "learning_rate": 1.9996128236743682e-05, | |
| "loss": 0.5161, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.332871012482663, | |
| "grad_norm": 0.7274801731109619, | |
| "learning_rate": 1.9992869231615323e-05, | |
| "loss": 0.5167, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.3414061666488851, | |
| "grad_norm": 0.7289360165596008, | |
| "learning_rate": 1.9988623180509206e-05, | |
| "loss": 0.5127, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.3499413208151072, | |
| "grad_norm": 0.7416843175888062, | |
| "learning_rate": 1.9983390502829168e-05, | |
| "loss": 0.5208, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.35847647498132934, | |
| "grad_norm": 0.7530054450035095, | |
| "learning_rate": 1.997717171543311e-05, | |
| "loss": 0.5011, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.3670116291475515, | |
| "grad_norm": 0.7372049689292908, | |
| "learning_rate": 1.9969967432581962e-05, | |
| "loss": 0.5091, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.3755467833137736, | |
| "grad_norm": 0.8357003927230835, | |
| "learning_rate": 1.996177836587899e-05, | |
| "loss": 0.5114, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.38408193747999575, | |
| "grad_norm": 0.8466181755065918, | |
| "learning_rate": 1.9952605324199516e-05, | |
| "loss": 0.5009, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.3926170916462179, | |
| "grad_norm": 0.7260451316833496, | |
| "learning_rate": 1.9942449213611028e-05, | |
| "loss": 0.5087, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.40115224581243997, | |
| "grad_norm": 0.6815033555030823, | |
| "learning_rate": 1.9931311037283673e-05, | |
| "loss": 0.5033, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.4096873999786621, | |
| "grad_norm": 0.706380307674408, | |
| "learning_rate": 1.9919191895391176e-05, | |
| "loss": 0.4974, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.41822255414488424, | |
| "grad_norm": 0.736223042011261, | |
| "learning_rate": 1.9906092985002163e-05, | |
| "loss": 0.4981, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.4267577083111064, | |
| "grad_norm": 0.9467535614967346, | |
| "learning_rate": 1.9892015599961927e-05, | |
| "loss": 0.5017, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.4352928624773285, | |
| "grad_norm": 0.7372705936431885, | |
| "learning_rate": 1.9876961130764624e-05, | |
| "loss": 0.5047, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.44382801664355065, | |
| "grad_norm": 0.7471463680267334, | |
| "learning_rate": 1.9860931064415934e-05, | |
| "loss": 0.5009, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.45236317080977273, | |
| "grad_norm": 0.6775240898132324, | |
| "learning_rate": 1.9843926984286165e-05, | |
| "loss": 0.5045, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.46089832497599487, | |
| "grad_norm": 0.6793776750564575, | |
| "learning_rate": 1.9825950569953884e-05, | |
| "loss": 0.4978, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.469433479142217, | |
| "grad_norm": 0.6851901412010193, | |
| "learning_rate": 1.980700359703999e-05, | |
| "loss": 0.494, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.47796863330843914, | |
| "grad_norm": 0.6855452060699463, | |
| "learning_rate": 1.9787087937032333e-05, | |
| "loss": 0.4952, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.4865037874746613, | |
| "grad_norm": 0.707453727722168, | |
| "learning_rate": 1.976620555710087e-05, | |
| "loss": 0.4949, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.4950389416408834, | |
| "grad_norm": 0.6674401760101318, | |
| "learning_rate": 1.9744358519903343e-05, | |
| "loss": 0.4863, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.5035740958071055, | |
| "grad_norm": 0.7680632472038269, | |
| "learning_rate": 1.9721548983381554e-05, | |
| "loss": 0.4882, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.5121092499733276, | |
| "grad_norm": 0.6789171099662781, | |
| "learning_rate": 1.9697779200548202e-05, | |
| "loss": 0.4848, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.5206444041395498, | |
| "grad_norm": 0.6551903486251831, | |
| "learning_rate": 1.9673051519264342e-05, | |
| "loss": 0.4951, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.5291795583057719, | |
| "grad_norm": 0.7067411541938782, | |
| "learning_rate": 1.964736838200749e-05, | |
| "loss": 0.493, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.537714712471994, | |
| "grad_norm": 0.6695910692214966, | |
| "learning_rate": 1.9620732325630342e-05, | |
| "loss": 0.4938, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.5462498666382162, | |
| "grad_norm": 0.6625940203666687, | |
| "learning_rate": 1.9593145981110223e-05, | |
| "loss": 0.4873, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.5547850208044383, | |
| "grad_norm": 0.7263137102127075, | |
| "learning_rate": 1.9564612073289192e-05, | |
| "loss": 0.4964, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.5633201749706604, | |
| "grad_norm": 0.7034249901771545, | |
| "learning_rate": 1.9535133420604905e-05, | |
| "loss": 0.4952, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.5718553291368825, | |
| "grad_norm": 0.633065402507782, | |
| "learning_rate": 1.9504712934812228e-05, | |
| "loss": 0.4982, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.5803904833031046, | |
| "grad_norm": 0.7597299814224243, | |
| "learning_rate": 1.9473353620695614e-05, | |
| "loss": 0.4839, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.5889256374693268, | |
| "grad_norm": 0.6461811065673828, | |
| "learning_rate": 1.9441058575772317e-05, | |
| "loss": 0.4853, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.5974607916355489, | |
| "grad_norm": 0.6678339242935181, | |
| "learning_rate": 1.940783098998643e-05, | |
| "loss": 0.4814, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.6059959458017711, | |
| "grad_norm": 0.6317865252494812, | |
| "learning_rate": 1.9373674145393804e-05, | |
| "loss": 0.4896, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.6145310999679932, | |
| "grad_norm": 0.6536778211593628, | |
| "learning_rate": 1.9338591415837856e-05, | |
| "loss": 0.4795, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.6230662541342153, | |
| "grad_norm": 0.653390645980835, | |
| "learning_rate": 1.9302586266616318e-05, | |
| "loss": 0.4862, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.6316014083004374, | |
| "grad_norm": 0.6988620758056641, | |
| "learning_rate": 1.9265662254138958e-05, | |
| "loss": 0.4913, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.6401365624666595, | |
| "grad_norm": 0.6603461503982544, | |
| "learning_rate": 1.922782302557628e-05, | |
| "loss": 0.4813, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.6486717166328817, | |
| "grad_norm": 0.7291558980941772, | |
| "learning_rate": 1.918907231849931e-05, | |
| "loss": 0.4843, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.6572068707991038, | |
| "grad_norm": 0.678535521030426, | |
| "learning_rate": 1.914941396051036e-05, | |
| "loss": 0.4819, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.665742024965326, | |
| "grad_norm": 0.7419494390487671, | |
| "learning_rate": 1.910885186886502e-05, | |
| "loss": 0.4759, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.674277179131548, | |
| "grad_norm": 0.7254301309585571, | |
| "learning_rate": 1.9067390050085183e-05, | |
| "loss": 0.4754, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.6828123332977702, | |
| "grad_norm": 0.6892661452293396, | |
| "learning_rate": 1.902503259956333e-05, | |
| "loss": 0.4759, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.6913474874639923, | |
| "grad_norm": 0.6353612542152405, | |
| "learning_rate": 1.8981783701157985e-05, | |
| "loss": 0.4787, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.6998826416302144, | |
| "grad_norm": 0.578580915927887, | |
| "learning_rate": 1.8937647626780473e-05, | |
| "loss": 0.4748, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.7084177957964366, | |
| "grad_norm": 0.6349884867668152, | |
| "learning_rate": 1.889262873597295e-05, | |
| "loss": 0.4817, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.7169529499626587, | |
| "grad_norm": 0.6372865438461304, | |
| "learning_rate": 1.8846731475477796e-05, | |
| "loss": 0.4811, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.7254881041288809, | |
| "grad_norm": 0.70332932472229, | |
| "learning_rate": 1.8799960378798382e-05, | |
| "loss": 0.4854, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.734023258295103, | |
| "grad_norm": 0.6345937252044678, | |
| "learning_rate": 1.8752320065751276e-05, | |
| "loss": 0.4804, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.742558412461325, | |
| "grad_norm": 0.7001857757568359, | |
| "learning_rate": 1.8703815242009927e-05, | |
| "loss": 0.4823, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.7510935666275472, | |
| "grad_norm": 0.6997294425964355, | |
| "learning_rate": 1.8654450698639845e-05, | |
| "loss": 0.4772, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.7596287207937693, | |
| "grad_norm": 0.6218723058700562, | |
| "learning_rate": 1.860423131162538e-05, | |
| "loss": 0.4741, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.7681638749599915, | |
| "grad_norm": 0.6111359000205994, | |
| "learning_rate": 1.8553162041388096e-05, | |
| "loss": 0.4724, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.7766990291262136, | |
| "grad_norm": 0.6032017469406128, | |
| "learning_rate": 1.8501247932296785e-05, | |
| "loss": 0.4769, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.7852341832924358, | |
| "grad_norm": 0.6396908164024353, | |
| "learning_rate": 1.8448494112169234e-05, | |
| "loss": 0.4747, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.7937693374586579, | |
| "grad_norm": 0.690779983997345, | |
| "learning_rate": 1.8394905791765714e-05, | |
| "loss": 0.4719, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.8023044916248799, | |
| "grad_norm": 0.6640828251838684, | |
| "learning_rate": 1.8340488264274285e-05, | |
| "loss": 0.477, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.8108396457911021, | |
| "grad_norm": 0.6492927074432373, | |
| "learning_rate": 1.8285246904787968e-05, | |
| "loss": 0.4626, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.8193747999573242, | |
| "grad_norm": 0.6751061677932739, | |
| "learning_rate": 1.8229187169773805e-05, | |
| "loss": 0.4698, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.8279099541235464, | |
| "grad_norm": 0.6475481390953064, | |
| "learning_rate": 1.8172314596533914e-05, | |
| "loss": 0.4698, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.8364451082897685, | |
| "grad_norm": 0.6659871935844421, | |
| "learning_rate": 1.8114634802658542e-05, | |
| "loss": 0.4778, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.8449802624559906, | |
| "grad_norm": 0.6479447484016418, | |
| "learning_rate": 1.8056153485471167e-05, | |
| "loss": 0.4703, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.8535154166222128, | |
| "grad_norm": 0.7254726886749268, | |
| "learning_rate": 1.7996876421465764e-05, | |
| "loss": 0.4767, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.8620505707884348, | |
| "grad_norm": 0.6294846534729004, | |
| "learning_rate": 1.7936809465736223e-05, | |
| "loss": 0.4615, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.870585724954657, | |
| "grad_norm": 0.6530686020851135, | |
| "learning_rate": 1.7875958551398023e-05, | |
| "loss": 0.4642, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.8791208791208791, | |
| "grad_norm": 0.713152289390564, | |
| "learning_rate": 1.781432968900217e-05, | |
| "loss": 0.4692, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.8876560332871013, | |
| "grad_norm": 0.7651381492614746, | |
| "learning_rate": 1.775192896594151e-05, | |
| "loss": 0.4646, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.8961911874533234, | |
| "grad_norm": 0.5913043022155762, | |
| "learning_rate": 1.7688762545849466e-05, | |
| "loss": 0.4688, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.9047263416195455, | |
| "grad_norm": 0.6034150719642639, | |
| "learning_rate": 1.7624836667991195e-05, | |
| "loss": 0.4574, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.9132614957857677, | |
| "grad_norm": 0.6374826431274414, | |
| "learning_rate": 1.7560157646647335e-05, | |
| "loss": 0.4651, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.9217966499519897, | |
| "grad_norm": 0.6261687278747559, | |
| "learning_rate": 1.749473187049028e-05, | |
| "loss": 0.4608, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.9303318041182119, | |
| "grad_norm": 0.6067594885826111, | |
| "learning_rate": 1.742856580195316e-05, | |
| "loss": 0.4592, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.938866958284434, | |
| "grad_norm": 0.6240684986114502, | |
| "learning_rate": 1.7361665976591513e-05, | |
| "loss": 0.4663, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.9474021124506561, | |
| "grad_norm": 0.6192986965179443, | |
| "learning_rate": 1.7294039002437724e-05, | |
| "loss": 0.476, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.9559372666168783, | |
| "grad_norm": 0.6121706962585449, | |
| "learning_rate": 1.7225691559348333e-05, | |
| "loss": 0.4531, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.9644724207831004, | |
| "grad_norm": 0.5956526398658752, | |
| "learning_rate": 1.715663039834421e-05, | |
| "loss": 0.4643, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.9730075749493226, | |
| "grad_norm": 0.5699427127838135, | |
| "learning_rate": 1.7086862340943745e-05, | |
| "loss": 0.4601, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.9815427291155446, | |
| "grad_norm": 0.6840018630027771, | |
| "learning_rate": 1.701639427848903e-05, | |
| "loss": 0.4555, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.9900778832817668, | |
| "grad_norm": 0.6009624004364014, | |
| "learning_rate": 1.6945233171465193e-05, | |
| "loss": 0.4632, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.9986130374479889, | |
| "grad_norm": 0.5962963104248047, | |
| "learning_rate": 1.6873386048812854e-05, | |
| "loss": 0.4702, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 1.0068281233329777, | |
| "grad_norm": 0.679568350315094, | |
| "learning_rate": 1.680086000723385e-05, | |
| "loss": 0.4318, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 1.0153632774991999, | |
| "grad_norm": 0.6293564438819885, | |
| "learning_rate": 1.672766221049025e-05, | |
| "loss": 0.4231, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 1.023898431665422, | |
| "grad_norm": 0.6619155406951904, | |
| "learning_rate": 1.6653799888696777e-05, | |
| "loss": 0.4251, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.032433585831644, | |
| "grad_norm": 0.6258383393287659, | |
| "learning_rate": 1.6579280337606615e-05, | |
| "loss": 0.4237, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 1.0409687399978662, | |
| "grad_norm": 0.6029568314552307, | |
| "learning_rate": 1.650411091789082e-05, | |
| "loss": 0.42, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 1.0495038941640884, | |
| "grad_norm": 0.5933378338813782, | |
| "learning_rate": 1.6428299054411212e-05, | |
| "loss": 0.4325, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 1.0580390483303104, | |
| "grad_norm": 0.593717098236084, | |
| "learning_rate": 1.635185223548704e-05, | |
| "loss": 0.4289, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 1.0665742024965326, | |
| "grad_norm": 0.699511706829071, | |
| "learning_rate": 1.627477801215528e-05, | |
| "loss": 0.4218, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 1.0751093566627548, | |
| "grad_norm": 0.6585178971290588, | |
| "learning_rate": 1.619708399742481e-05, | |
| "loss": 0.4222, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 1.083644510828977, | |
| "grad_norm": 0.6042758822441101, | |
| "learning_rate": 1.6118777865524414e-05, | |
| "loss": 0.4231, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 1.092179664995199, | |
| "grad_norm": 0.5816319584846497, | |
| "learning_rate": 1.6039867351144778e-05, | |
| "loss": 0.4199, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 1.100714819161421, | |
| "grad_norm": 0.7652620673179626, | |
| "learning_rate": 1.5960360248674478e-05, | |
| "loss": 0.428, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 1.1092499733276433, | |
| "grad_norm": 0.6318002343177795, | |
| "learning_rate": 1.5880264411430106e-05, | |
| "loss": 0.4263, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 1.1177851274938653, | |
| "grad_norm": 0.5835772752761841, | |
| "learning_rate": 1.579958775088054e-05, | |
| "loss": 0.4277, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 1.1263202816600875, | |
| "grad_norm": 0.6293865442276001, | |
| "learning_rate": 1.5718338235865505e-05, | |
| "loss": 0.4226, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 1.1348554358263097, | |
| "grad_norm": 0.6008028984069824, | |
| "learning_rate": 1.5636523891808452e-05, | |
| "loss": 0.4247, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 1.1433905899925318, | |
| "grad_norm": 0.6461915373802185, | |
| "learning_rate": 1.5554152799923824e-05, | |
| "loss": 0.429, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 1.1519257441587538, | |
| "grad_norm": 0.606767475605011, | |
| "learning_rate": 1.547123309641885e-05, | |
| "loss": 0.4268, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 1.160460898324976, | |
| "grad_norm": 0.637474000453949, | |
| "learning_rate": 1.538777297168991e-05, | |
| "loss": 0.4378, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 1.1689960524911982, | |
| "grad_norm": 0.6810766458511353, | |
| "learning_rate": 1.5303780669513472e-05, | |
| "loss": 0.42, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 1.1775312066574202, | |
| "grad_norm": 0.6600694060325623, | |
| "learning_rate": 1.5219264486231882e-05, | |
| "loss": 0.4181, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 1.1860663608236424, | |
| "grad_norm": 0.7521950006484985, | |
| "learning_rate": 1.5134232769933836e-05, | |
| "loss": 0.4326, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 1.1946015149898646, | |
| "grad_norm": 0.6155983805656433, | |
| "learning_rate": 1.5048693919629837e-05, | |
| "loss": 0.4263, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 1.2031366691560867, | |
| "grad_norm": 0.6304906606674194, | |
| "learning_rate": 1.4962656384422555e-05, | |
| "loss": 0.4321, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 1.2116718233223087, | |
| "grad_norm": 0.5815775990486145, | |
| "learning_rate": 1.4876128662672277e-05, | |
| "loss": 0.4248, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 1.220206977488531, | |
| "grad_norm": 0.619872510433197, | |
| "learning_rate": 1.4789119301157491e-05, | |
| "loss": 0.4192, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 1.228742131654753, | |
| "grad_norm": 0.6162813305854797, | |
| "learning_rate": 1.4701636894230655e-05, | |
| "loss": 0.4199, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 1.237277285820975, | |
| "grad_norm": 0.6389777064323425, | |
| "learning_rate": 1.4613690082969311e-05, | |
| "loss": 0.4239, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 1.2458124399871973, | |
| "grad_norm": 0.6258131265640259, | |
| "learning_rate": 1.4525287554322558e-05, | |
| "loss": 0.4229, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 1.2543475941534195, | |
| "grad_norm": 0.678900957107544, | |
| "learning_rate": 1.4436438040252983e-05, | |
| "loss": 0.431, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 1.2628827483196416, | |
| "grad_norm": 0.564929723739624, | |
| "learning_rate": 1.4347150316874179e-05, | |
| "loss": 0.4178, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 1.2714179024858636, | |
| "grad_norm": 0.5994879603385925, | |
| "learning_rate": 1.4257433203583876e-05, | |
| "loss": 0.4271, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 1.2799530566520858, | |
| "grad_norm": 0.5823336839675903, | |
| "learning_rate": 1.4167295562192808e-05, | |
| "loss": 0.4246, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.2884882108183078, | |
| "grad_norm": 0.624218225479126, | |
| "learning_rate": 1.4076746296049387e-05, | |
| "loss": 0.4278, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 1.29702336498453, | |
| "grad_norm": 0.6522708535194397, | |
| "learning_rate": 1.3985794349160267e-05, | |
| "loss": 0.4225, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 1.3055585191507522, | |
| "grad_norm": 0.5826681852340698, | |
| "learning_rate": 1.3894448705306908e-05, | |
| "loss": 0.4195, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 1.3140936733169744, | |
| "grad_norm": 0.6364330053329468, | |
| "learning_rate": 1.3802718387158208e-05, | |
| "loss": 0.4167, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 1.3226288274831965, | |
| "grad_norm": 0.6075431704521179, | |
| "learning_rate": 1.3710612455379268e-05, | |
| "loss": 0.4216, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 1.3311639816494185, | |
| "grad_norm": 0.6225044131278992, | |
| "learning_rate": 1.3618140007736442e-05, | |
| "loss": 0.4181, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 1.3396991358156407, | |
| "grad_norm": 0.5846056938171387, | |
| "learning_rate": 1.3525310178198707e-05, | |
| "loss": 0.4264, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 1.3482342899818627, | |
| "grad_norm": 0.6489505171775818, | |
| "learning_rate": 1.3432132136035443e-05, | |
| "loss": 0.4267, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 1.3567694441480849, | |
| "grad_norm": 0.7888132929801941, | |
| "learning_rate": 1.3338615084910737e-05, | |
| "loss": 0.4173, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 1.365304598314307, | |
| "grad_norm": 0.6617109775543213, | |
| "learning_rate": 1.3244768261974307e-05, | |
| "loss": 0.422, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 1.3738397524805293, | |
| "grad_norm": 0.5967974066734314, | |
| "learning_rate": 1.3150600936949092e-05, | |
| "loss": 0.422, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 1.3823749066467514, | |
| "grad_norm": 0.6307809352874756, | |
| "learning_rate": 1.305612241121562e-05, | |
| "loss": 0.422, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 1.3909100608129734, | |
| "grad_norm": 0.5797590017318726, | |
| "learning_rate": 1.2961342016893302e-05, | |
| "loss": 0.4168, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 1.3994452149791956, | |
| "grad_norm": 0.5861985087394714, | |
| "learning_rate": 1.2866269115918606e-05, | |
| "loss": 0.4289, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 1.4079803691454176, | |
| "grad_norm": 0.6140124797821045, | |
| "learning_rate": 1.2770913099120374e-05, | |
| "loss": 0.4294, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 1.4165155233116398, | |
| "grad_norm": 0.5719994306564331, | |
| "learning_rate": 1.2675283385292212e-05, | |
| "loss": 0.4254, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 1.425050677477862, | |
| "grad_norm": 0.5942404866218567, | |
| "learning_rate": 1.2579389420262151e-05, | |
| "loss": 0.4212, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 1.4335858316440842, | |
| "grad_norm": 0.6488329172134399, | |
| "learning_rate": 1.248324067595966e-05, | |
| "loss": 0.4173, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 1.4421209858103061, | |
| "grad_norm": 0.5882269144058228, | |
| "learning_rate": 1.2386846649480036e-05, | |
| "loss": 0.4206, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 1.4506561399765283, | |
| "grad_norm": 0.6740261316299438, | |
| "learning_rate": 1.2290216862146309e-05, | |
| "loss": 0.4157, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 1.4591912941427505, | |
| "grad_norm": 0.6225579977035522, | |
| "learning_rate": 1.2193360858568824e-05, | |
| "loss": 0.4165, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 1.4677264483089725, | |
| "grad_norm": 0.6018004417419434, | |
| "learning_rate": 1.2096288205702431e-05, | |
| "loss": 0.4225, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 1.4762616024751947, | |
| "grad_norm": 0.6020993590354919, | |
| "learning_rate": 1.1999008491901511e-05, | |
| "loss": 0.4247, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 1.4847967566414169, | |
| "grad_norm": 0.6166216731071472, | |
| "learning_rate": 1.1901531325972911e-05, | |
| "loss": 0.4227, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 1.493331910807639, | |
| "grad_norm": 0.5822474956512451, | |
| "learning_rate": 1.180386633622681e-05, | |
| "loss": 0.4132, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 1.5018670649738612, | |
| "grad_norm": 0.6379313468933105, | |
| "learning_rate": 1.1706023169525691e-05, | |
| "loss": 0.4213, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 1.5104022191400832, | |
| "grad_norm": 0.5989879965782166, | |
| "learning_rate": 1.160801149033147e-05, | |
| "loss": 0.4222, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 1.5189373733063052, | |
| "grad_norm": 0.6491897702217102, | |
| "learning_rate": 1.1509840979750895e-05, | |
| "loss": 0.4186, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 1.5274725274725274, | |
| "grad_norm": 0.5892138481140137, | |
| "learning_rate": 1.1411521334579288e-05, | |
| "loss": 0.4215, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 1.5360076816387496, | |
| "grad_norm": 0.5960173606872559, | |
| "learning_rate": 1.131306226634274e-05, | |
| "loss": 0.418, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 1.5445428358049718, | |
| "grad_norm": 0.5612832307815552, | |
| "learning_rate": 1.1214473500338862e-05, | |
| "loss": 0.4203, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 1.553077989971194, | |
| "grad_norm": 0.5860463380813599, | |
| "learning_rate": 1.1115764774676172e-05, | |
| "loss": 0.422, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 1.5616131441374161, | |
| "grad_norm": 0.6095026135444641, | |
| "learning_rate": 1.1016945839312202e-05, | |
| "loss": 0.4153, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 1.5701482983036381, | |
| "grad_norm": 0.5922375321388245, | |
| "learning_rate": 1.0918026455090447e-05, | |
| "loss": 0.4195, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 1.57868345246986, | |
| "grad_norm": 0.5867443084716797, | |
| "learning_rate": 1.0819016392776245e-05, | |
| "loss": 0.4169, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 1.5872186066360823, | |
| "grad_norm": 0.6018590927124023, | |
| "learning_rate": 1.0719925432091671e-05, | |
| "loss": 0.4111, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 1.5957537608023045, | |
| "grad_norm": 0.5756944417953491, | |
| "learning_rate": 1.0620763360749534e-05, | |
| "loss": 0.4189, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 1.6042889149685267, | |
| "grad_norm": 0.6248449683189392, | |
| "learning_rate": 1.0521539973486592e-05, | |
| "loss": 0.4183, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 1.6128240691347489, | |
| "grad_norm": 0.6250705122947693, | |
| "learning_rate": 1.0422265071096101e-05, | |
| "loss": 0.4233, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 1.6213592233009708, | |
| "grad_norm": 0.5823164582252502, | |
| "learning_rate": 1.0322948459459716e-05, | |
| "loss": 0.4143, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 1.629894377467193, | |
| "grad_norm": 0.574417769908905, | |
| "learning_rate": 1.0223599948578923e-05, | |
| "loss": 0.423, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 1.638429531633415, | |
| "grad_norm": 0.547025203704834, | |
| "learning_rate": 1.0124229351606065e-05, | |
| "loss": 0.4164, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 1.6469646857996372, | |
| "grad_norm": 0.5772645473480225, | |
| "learning_rate": 1.002484648387503e-05, | |
| "loss": 0.4116, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 1.6554998399658594, | |
| "grad_norm": 0.5808480381965637, | |
| "learning_rate": 9.925461161931758e-06, | |
| "loss": 0.4147, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 1.6640349941320816, | |
| "grad_norm": 0.5725694894790649, | |
| "learning_rate": 9.826083202564596e-06, | |
| "loss": 0.4036, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 1.6725701482983037, | |
| "grad_norm": 0.563442587852478, | |
| "learning_rate": 9.726722421834664e-06, | |
| "loss": 0.421, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 1.6811053024645257, | |
| "grad_norm": 0.5920106172561646, | |
| "learning_rate": 9.627388634106245e-06, | |
| "loss": 0.4214, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 1.689640456630748, | |
| "grad_norm": 0.5918905138969421, | |
| "learning_rate": 9.528091651077404e-06, | |
| "loss": 0.417, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 1.6981756107969699, | |
| "grad_norm": 0.576627790927887, | |
| "learning_rate": 9.428841280810811e-06, | |
| "loss": 0.4151, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 1.706710764963192, | |
| "grad_norm": 0.5786689519882202, | |
| "learning_rate": 9.329647326764963e-06, | |
| "loss": 0.4076, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.7152459191294143, | |
| "grad_norm": 0.621134340763092, | |
| "learning_rate": 9.23051958682584e-06, | |
| "loss": 0.4165, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 1.7237810732956365, | |
| "grad_norm": 0.5719323754310608, | |
| "learning_rate": 9.131467852339123e-06, | |
| "loss": 0.4156, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 1.7323162274618586, | |
| "grad_norm": 0.5678684711456299, | |
| "learning_rate": 9.032501907143053e-06, | |
| "loss": 0.4191, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 1.7408513816280806, | |
| "grad_norm": 0.5779105424880981, | |
| "learning_rate": 8.933631526602028e-06, | |
| "loss": 0.4046, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 1.7493865357943028, | |
| "grad_norm": 0.5759092569351196, | |
| "learning_rate": 8.834866476641048e-06, | |
| "loss": 0.4143, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 1.7579216899605248, | |
| "grad_norm": 0.5681161284446716, | |
| "learning_rate": 8.73621651278108e-06, | |
| "loss": 0.424, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 1.766456844126747, | |
| "grad_norm": 0.614236056804657, | |
| "learning_rate": 8.637691379175453e-06, | |
| "loss": 0.4148, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 1.7749919982929692, | |
| "grad_norm": 0.5703302621841431, | |
| "learning_rate": 8.539300807647396e-06, | |
| "loss": 0.4179, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 1.7835271524591914, | |
| "grad_norm": 0.6135383248329163, | |
| "learning_rate": 8.441054516728747e-06, | |
| "loss": 0.4173, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 1.7920623066254135, | |
| "grad_norm": 0.5604820847511292, | |
| "learning_rate": 8.342962210700043e-06, | |
| "loss": 0.4167, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 1.8005974607916355, | |
| "grad_norm": 0.5805196166038513, | |
| "learning_rate": 8.245033578631939e-06, | |
| "loss": 0.4232, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 1.8091326149578577, | |
| "grad_norm": 0.5662888884544373, | |
| "learning_rate": 8.147278293428199e-06, | |
| "loss": 0.4066, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 1.8176677691240797, | |
| "grad_norm": 0.581598162651062, | |
| "learning_rate": 8.049706010870241e-06, | |
| "loss": 0.416, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 1.8262029232903019, | |
| "grad_norm": 0.5490183234214783, | |
| "learning_rate": 7.952326368663404e-06, | |
| "loss": 0.4088, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 1.834738077456524, | |
| "grad_norm": 0.6272237300872803, | |
| "learning_rate": 7.855148985484946e-06, | |
| "loss": 0.4099, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 1.8432732316227463, | |
| "grad_norm": 0.6257729530334473, | |
| "learning_rate": 7.758183460034005e-06, | |
| "loss": 0.4068, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 1.8518083857889684, | |
| "grad_norm": 0.6253445744514465, | |
| "learning_rate": 7.661439370083456e-06, | |
| "loss": 0.4137, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 1.8603435399551904, | |
| "grad_norm": 0.6083528995513916, | |
| "learning_rate": 7.564926271533876e-06, | |
| "loss": 0.4112, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 1.8688786941214126, | |
| "grad_norm": 0.598172128200531, | |
| "learning_rate": 7.468653697469655e-06, | |
| "loss": 0.4146, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 1.8774138482876346, | |
| "grad_norm": 0.5930103659629822, | |
| "learning_rate": 7.372631157217377e-06, | |
| "loss": 0.4093, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 1.8859490024538568, | |
| "grad_norm": 0.5641002655029297, | |
| "learning_rate": 7.276868135406523e-06, | |
| "loss": 0.4162, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 1.894484156620079, | |
| "grad_norm": 0.5636462569236755, | |
| "learning_rate": 7.181374091032635e-06, | |
| "loss": 0.408, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 1.9030193107863012, | |
| "grad_norm": 0.6011704802513123, | |
| "learning_rate": 7.0861584565229955e-06, | |
| "loss": 0.4119, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 1.9115544649525233, | |
| "grad_norm": 0.5807620882987976, | |
| "learning_rate": 6.991230636804953e-06, | |
| "loss": 0.4056, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 1.9200896191187453, | |
| "grad_norm": 0.6091267466545105, | |
| "learning_rate": 6.896600008376926e-06, | |
| "loss": 0.4095, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 1.9286247732849675, | |
| "grad_norm": 0.5541015267372131, | |
| "learning_rate": 6.802275918382266e-06, | |
| "loss": 0.4085, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 1.9371599274511895, | |
| "grad_norm": 0.5683854222297668, | |
| "learning_rate": 6.708267683685978e-06, | |
| "loss": 0.4098, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 1.9456950816174117, | |
| "grad_norm": 0.6314034461975098, | |
| "learning_rate": 6.614584589954447e-06, | |
| "loss": 0.4135, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 1.9542302357836339, | |
| "grad_norm": 0.5882005095481873, | |
| "learning_rate": 6.521235890738251e-06, | |
| "loss": 0.4102, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 1.962765389949856, | |
| "grad_norm": 0.5736626386642456, | |
| "learning_rate": 6.42823080655814e-06, | |
| "loss": 0.4037, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 1.9713005441160782, | |
| "grad_norm": 0.5581937432289124, | |
| "learning_rate": 6.33557852399428e-06, | |
| "loss": 0.4155, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 1.9798356982823002, | |
| "grad_norm": 0.5713927745819092, | |
| "learning_rate": 6.243288194778837e-06, | |
| "loss": 0.4058, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 1.9883708524485222, | |
| "grad_norm": 0.6096407771110535, | |
| "learning_rate": 6.151368934892028e-06, | |
| "loss": 0.4096, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 1.9969060066147444, | |
| "grad_norm": 0.5943323373794556, | |
| "learning_rate": 6.059829823661692e-06, | |
| "loss": 0.413, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 2.005121092499733, | |
| "grad_norm": 0.6159135699272156, | |
| "learning_rate": 5.968679902866463e-06, | |
| "loss": 0.3891, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 2.0136562466659553, | |
| "grad_norm": 0.6119585037231445, | |
| "learning_rate": 5.877928175842682e-06, | |
| "loss": 0.3701, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 2.0221914008321775, | |
| "grad_norm": 0.5810120105743408, | |
| "learning_rate": 5.787583606595087e-06, | |
| "loss": 0.3682, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 2.0307265549983997, | |
| "grad_norm": 0.597731351852417, | |
| "learning_rate": 5.697655118911392e-06, | |
| "loss": 0.374, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 2.039261709164622, | |
| "grad_norm": 0.6231815218925476, | |
| "learning_rate": 5.608151595480844e-06, | |
| "loss": 0.3757, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 2.047796863330844, | |
| "grad_norm": 0.6622620224952698, | |
| "learning_rate": 5.51908187701683e-06, | |
| "loss": 0.3737, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 2.056332017497066, | |
| "grad_norm": 0.5878000855445862, | |
| "learning_rate": 5.430454761383637e-06, | |
| "loss": 0.3781, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 2.064867171663288, | |
| "grad_norm": 0.5704830884933472, | |
| "learning_rate": 5.3422790027274416e-06, | |
| "loss": 0.3695, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 2.0734023258295102, | |
| "grad_norm": 0.5505034327507019, | |
| "learning_rate": 5.254563310611631e-06, | |
| "loss": 0.362, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 2.0819374799957324, | |
| "grad_norm": 0.5436623692512512, | |
| "learning_rate": 5.167316349156495e-06, | |
| "loss": 0.3661, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 2.0904726341619546, | |
| "grad_norm": 0.5680120587348938, | |
| "learning_rate": 5.080546736183433e-06, | |
| "loss": 0.3722, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 2.099007788328177, | |
| "grad_norm": 0.6044514179229736, | |
| "learning_rate": 4.994263042363754e-06, | |
| "loss": 0.3681, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 2.107542942494399, | |
| "grad_norm": 0.5783267021179199, | |
| "learning_rate": 4.908473790372061e-06, | |
| "loss": 0.3595, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 2.1160780966606207, | |
| "grad_norm": 0.614342212677002, | |
| "learning_rate": 4.823187454044461e-06, | |
| "loss": 0.3704, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 2.124613250826843, | |
| "grad_norm": 0.5875915884971619, | |
| "learning_rate": 4.738412457541559e-06, | |
| "loss": 0.3669, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 2.133148404993065, | |
| "grad_norm": 0.6116498708724976, | |
| "learning_rate": 4.6541571745163396e-06, | |
| "loss": 0.3678, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 2.1416835591592873, | |
| "grad_norm": 0.5992347002029419, | |
| "learning_rate": 4.570429927287073e-06, | |
| "loss": 0.3706, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 2.1502187133255095, | |
| "grad_norm": 0.5668257474899292, | |
| "learning_rate": 4.4872389860152895e-06, | |
| "loss": 0.3759, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 2.1587538674917317, | |
| "grad_norm": 0.6104972958564758, | |
| "learning_rate": 4.404592567888871e-06, | |
| "loss": 0.3681, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 2.167289021657954, | |
| "grad_norm": 0.5686572194099426, | |
| "learning_rate": 4.322498836310398e-06, | |
| "loss": 0.3676, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 2.1758241758241756, | |
| "grad_norm": 0.5945229530334473, | |
| "learning_rate": 4.240965900090844e-06, | |
| "loss": 0.3768, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 2.184359329990398, | |
| "grad_norm": 0.6045082807540894, | |
| "learning_rate": 4.160001812648584e-06, | |
| "loss": 0.3626, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 2.19289448415662, | |
| "grad_norm": 0.5578190684318542, | |
| "learning_rate": 4.079614571213941e-06, | |
| "loss": 0.3699, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 2.201429638322842, | |
| "grad_norm": 0.6121580600738525, | |
| "learning_rate": 3.999812116039265e-06, | |
| "loss": 0.3679, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 2.2099647924890644, | |
| "grad_norm": 0.590093731880188, | |
| "learning_rate": 3.920602329614611e-06, | |
| "loss": 0.3707, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 2.2184999466552866, | |
| "grad_norm": 0.5550406575202942, | |
| "learning_rate": 3.841993035889165e-06, | |
| "loss": 0.3736, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 2.227035100821509, | |
| "grad_norm": 0.5881937742233276, | |
| "learning_rate": 3.76399199949843e-06, | |
| "loss": 0.3733, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 2.2355702549877305, | |
| "grad_norm": 0.5684910416603088, | |
| "learning_rate": 3.686606924997267e-06, | |
| "loss": 0.3645, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 2.2441054091539527, | |
| "grad_norm": 0.5846866369247437, | |
| "learning_rate": 3.6098454560988783e-06, | |
| "loss": 0.368, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 2.252640563320175, | |
| "grad_norm": 0.5647754669189453, | |
| "learning_rate": 3.533715174919817e-06, | |
| "loss": 0.3712, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 2.261175717486397, | |
| "grad_norm": 0.5805838704109192, | |
| "learning_rate": 3.4582236012310356e-06, | |
| "loss": 0.3771, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 2.2697108716526193, | |
| "grad_norm": 0.6296060681343079, | |
| "learning_rate": 3.3833781917151375e-06, | |
| "loss": 0.3736, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 2.2782460258188415, | |
| "grad_norm": 0.5701944828033447, | |
| "learning_rate": 3.3091863392298417e-06, | |
| "loss": 0.3716, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 2.2867811799850637, | |
| "grad_norm": 0.5808659195899963, | |
| "learning_rate": 3.235655372077747e-06, | |
| "loss": 0.3657, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 2.2953163341512854, | |
| "grad_norm": 0.5861009359359741, | |
| "learning_rate": 3.1627925532824855e-06, | |
| "loss": 0.3755, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 2.3038514883175076, | |
| "grad_norm": 0.5694013833999634, | |
| "learning_rate": 3.0906050798713085e-06, | |
| "loss": 0.3694, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 2.31238664248373, | |
| "grad_norm": 0.6029444932937622, | |
| "learning_rate": 3.019100082164217e-06, | |
| "loss": 0.3721, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 2.320921796649952, | |
| "grad_norm": 0.5814130306243896, | |
| "learning_rate": 2.9482846230696405e-06, | |
| "loss": 0.3652, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 2.329456950816174, | |
| "grad_norm": 0.5692527294158936, | |
| "learning_rate": 2.878165697386812e-06, | |
| "loss": 0.3667, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 2.3379921049823964, | |
| "grad_norm": 0.5681324601173401, | |
| "learning_rate": 2.8087502311148662e-06, | |
| "loss": 0.3671, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 2.346527259148618, | |
| "grad_norm": 0.5992768406867981, | |
| "learning_rate": 2.740045080768694e-06, | |
| "loss": 0.3679, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 2.3550624133148403, | |
| "grad_norm": 0.5779207944869995, | |
| "learning_rate": 2.672057032701717e-06, | |
| "loss": 0.3694, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 2.3635975674810625, | |
| "grad_norm": 0.5771894454956055, | |
| "learning_rate": 2.6047928024355617e-06, | |
| "loss": 0.3634, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 2.3721327216472847, | |
| "grad_norm": 0.5959951281547546, | |
| "learning_rate": 2.5382590339967205e-06, | |
| "loss": 0.3702, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 2.380667875813507, | |
| "grad_norm": 0.6430909037590027, | |
| "learning_rate": 2.472462299260293e-06, | |
| "loss": 0.3722, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 2.389203029979729, | |
| "grad_norm": 0.6245153546333313, | |
| "learning_rate": 2.4074090973008634e-06, | |
| "loss": 0.365, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 2.3977381841459513, | |
| "grad_norm": 0.624854564666748, | |
| "learning_rate": 2.34310585375053e-06, | |
| "loss": 0.3682, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 2.4062733383121735, | |
| "grad_norm": 0.5641084313392639, | |
| "learning_rate": 2.279558920164231e-06, | |
| "loss": 0.367, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 2.4148084924783952, | |
| "grad_norm": 0.5821437239646912, | |
| "learning_rate": 2.216774573392363e-06, | |
| "loss": 0.3663, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 2.4233436466446174, | |
| "grad_norm": 0.5898992419242859, | |
| "learning_rate": 2.1547590149607834e-06, | |
| "loss": 0.3621, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 2.4318788008108396, | |
| "grad_norm": 0.6132239103317261, | |
| "learning_rate": 2.0935183704582574e-06, | |
| "loss": 0.3689, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 2.440413954977062, | |
| "grad_norm": 0.5809346437454224, | |
| "learning_rate": 2.0330586889314073e-06, | |
| "loss": 0.3644, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 2.448949109143284, | |
| "grad_norm": 0.5714666247367859, | |
| "learning_rate": 1.973385942287207e-06, | |
| "loss": 0.3756, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 2.457484263309506, | |
| "grad_norm": 0.6122422814369202, | |
| "learning_rate": 1.914506024703112e-06, | |
| "loss": 0.3781, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 2.466019417475728, | |
| "grad_norm": 0.570672333240509, | |
| "learning_rate": 1.8564247520448654e-06, | |
| "loss": 0.3687, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 2.47455457164195, | |
| "grad_norm": 0.5583861470222473, | |
| "learning_rate": 1.79914786129203e-06, | |
| "loss": 0.3646, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 2.4830897258081723, | |
| "grad_norm": 0.5938260555267334, | |
| "learning_rate": 1.7426810099713176e-06, | |
| "loss": 0.3729, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 2.4916248799743945, | |
| "grad_norm": 0.5803694128990173, | |
| "learning_rate": 1.6870297755977772e-06, | |
| "loss": 0.3708, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 2.5001600341406167, | |
| "grad_norm": 0.5796012282371521, | |
| "learning_rate": 1.6321996551238618e-06, | |
| "loss": 0.3652, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 2.508695188306839, | |
| "grad_norm": 0.5569105744361877, | |
| "learning_rate": 1.578196064396471e-06, | |
| "loss": 0.3677, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 2.517230342473061, | |
| "grad_norm": 0.590110719203949, | |
| "learning_rate": 1.5250243376220054e-06, | |
| "loss": 0.367, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 2.5257654966392833, | |
| "grad_norm": 0.5972738862037659, | |
| "learning_rate": 1.4726897268394824e-06, | |
| "loss": 0.3654, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 2.534300650805505, | |
| "grad_norm": 0.6115542054176331, | |
| "learning_rate": 1.4211974014017448e-06, | |
| "loss": 0.3695, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 2.5428358049717272, | |
| "grad_norm": 0.5728586316108704, | |
| "learning_rate": 1.3705524474648924e-06, | |
| "loss": 0.3714, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 2.5513709591379494, | |
| "grad_norm": 0.594648540019989, | |
| "learning_rate": 1.3207598674858656e-06, | |
| "loss": 0.3722, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 2.5599061133041716, | |
| "grad_norm": 0.5523515343666077, | |
| "learning_rate": 1.2718245797283435e-06, | |
| "loss": 0.3705, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 2.568441267470394, | |
| "grad_norm": 0.5695915818214417, | |
| "learning_rate": 1.2237514177769373e-06, | |
| "loss": 0.3752, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 2.5769764216366156, | |
| "grad_norm": 0.583398163318634, | |
| "learning_rate": 1.1765451300597574e-06, | |
| "loss": 0.3713, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 2.5855115758028377, | |
| "grad_norm": 0.5840734243392944, | |
| "learning_rate": 1.1302103793793817e-06, | |
| "loss": 0.3661, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 2.59404672996906, | |
| "grad_norm": 0.5768590569496155, | |
| "learning_rate": 1.084751742452288e-06, | |
| "loss": 0.3686, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 2.602581884135282, | |
| "grad_norm": 0.583306610584259, | |
| "learning_rate": 1.0401737094567943e-06, | |
| "loss": 0.3704, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 2.6111170383015043, | |
| "grad_norm": 0.575630247592926, | |
| "learning_rate": 9.964806835895324e-07, | |
| "loss": 0.3663, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 2.6196521924677265, | |
| "grad_norm": 0.5804203152656555, | |
| "learning_rate": 9.536769806305269e-07, | |
| "loss": 0.372, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 2.6281873466339487, | |
| "grad_norm": 0.5700560212135315, | |
| "learning_rate": 9.117668285169112e-07, | |
| "loss": 0.3685, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 2.636722500800171, | |
| "grad_norm": 0.5414294600486755, | |
| "learning_rate": 8.707543669252971e-07, | |
| "loss": 0.3656, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 2.645257654966393, | |
| "grad_norm": 0.6020073890686035, | |
| "learning_rate": 8.306436468628886e-07, | |
| "loss": 0.3742, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 2.653792809132615, | |
| "grad_norm": 0.567762017250061, | |
| "learning_rate": 7.914386302673438e-07, | |
| "loss": 0.37, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 2.662327963298837, | |
| "grad_norm": 0.5635555386543274, | |
| "learning_rate": 7.531431896154274e-07, | |
| "loss": 0.3639, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 2.6708631174650592, | |
| "grad_norm": 0.5736328363418579, | |
| "learning_rate": 7.157611075405146e-07, | |
| "loss": 0.3678, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 2.6793982716312814, | |
| "grad_norm": 0.5781832933425903, | |
| "learning_rate": 6.792960764589584e-07, | |
| "loss": 0.3645, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 2.6879334257975036, | |
| "grad_norm": 0.5733327865600586, | |
| "learning_rate": 6.437516982053693e-07, | |
| "loss": 0.3628, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 2.6964685799637254, | |
| "grad_norm": 0.5717176198959351, | |
| "learning_rate": 6.091314836768436e-07, | |
| "loss": 0.3698, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 2.7050037341299475, | |
| "grad_norm": 0.5804247260093689, | |
| "learning_rate": 5.754388524861832e-07, | |
| "loss": 0.3593, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 2.7135388882961697, | |
| "grad_norm": 0.5856621861457825, | |
| "learning_rate": 5.426771326241043e-07, | |
| "loss": 0.3699, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 2.722074042462392, | |
| "grad_norm": 0.5695067644119263, | |
| "learning_rate": 5.108495601305341e-07, | |
| "loss": 0.3616, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 2.730609196628614, | |
| "grad_norm": 0.5347820520401001, | |
| "learning_rate": 4.799592787749674e-07, | |
| "loss": 0.367, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 2.7391443507948363, | |
| "grad_norm": 0.5704131126403809, | |
| "learning_rate": 4.500093397459293e-07, | |
| "loss": 0.3756, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 2.7476795049610585, | |
| "grad_norm": 0.578822135925293, | |
| "learning_rate": 4.210027013496054e-07, | |
| "loss": 0.3675, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 2.7562146591272807, | |
| "grad_norm": 0.5722731351852417, | |
| "learning_rate": 3.929422287176299e-07, | |
| "loss": 0.3628, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 2.764749813293503, | |
| "grad_norm": 0.5673348307609558, | |
| "learning_rate": 3.658306935240852e-07, | |
| "loss": 0.3644, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 2.7732849674597246, | |
| "grad_norm": 0.5280677080154419, | |
| "learning_rate": 3.3967077371172554e-07, | |
| "loss": 0.3641, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 2.781820121625947, | |
| "grad_norm": 0.5813700556755066, | |
| "learning_rate": 3.1446505322746735e-07, | |
| "loss": 0.3645, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 2.790355275792169, | |
| "grad_norm": 0.5796400308609009, | |
| "learning_rate": 2.902160217671568e-07, | |
| "loss": 0.3686, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 2.798890429958391, | |
| "grad_norm": 0.5714091658592224, | |
| "learning_rate": 2.669260745296509e-07, | |
| "loss": 0.3625, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 2.8074255841246134, | |
| "grad_norm": 0.606631338596344, | |
| "learning_rate": 2.445975119802324e-07, | |
| "loss": 0.3681, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 2.815960738290835, | |
| "grad_norm": 0.8842149376869202, | |
| "learning_rate": 2.2323253962338143e-07, | |
| "loss": 0.3688, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 2.8244958924570573, | |
| "grad_norm": 0.550936758518219, | |
| "learning_rate": 2.028332677849254e-07, | |
| "loss": 0.3614, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 2.8330310466232795, | |
| "grad_norm": 0.5830692648887634, | |
| "learning_rate": 1.834017114035924e-07, | |
| "loss": 0.3581, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 2.8415662007895017, | |
| "grad_norm": 0.587570071220398, | |
| "learning_rate": 1.649397898319838e-07, | |
| "loss": 0.3674, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 2.850101354955724, | |
| "grad_norm": 0.5561811327934265, | |
| "learning_rate": 1.4744932664699453e-07, | |
| "loss": 0.3646, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 2.858636509121946, | |
| "grad_norm": 0.5832992196083069, | |
| "learning_rate": 1.3093204946968307e-07, | |
| "loss": 0.3716, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 2.8671716632881683, | |
| "grad_norm": 0.5671942830085754, | |
| "learning_rate": 1.1538958979463333e-07, | |
| "loss": 0.3718, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 2.8757068174543905, | |
| "grad_norm": 0.5540612936019897, | |
| "learning_rate": 1.0082348282879351e-07, | |
| "loss": 0.3618, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 2.8842419716206122, | |
| "grad_norm": 0.5753257870674133, | |
| "learning_rate": 8.723516733984527e-08, | |
| "loss": 0.3655, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 2.8927771257868344, | |
| "grad_norm": 0.562337338924408, | |
| "learning_rate": 7.46259855140874e-08, | |
| "loss": 0.3701, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 2.9013122799530566, | |
| "grad_norm": 0.5680497884750366, | |
| "learning_rate": 6.299718282385625e-08, | |
| "loss": 0.3646, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 2.909847434119279, | |
| "grad_norm": 0.5629882216453552, | |
| "learning_rate": 5.234990790450867e-08, | |
| "loss": 0.3606, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 2.918382588285501, | |
| "grad_norm": 0.5232166647911072, | |
| "learning_rate": 4.268521244096602e-08, | |
| "loss": 0.3573, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 2.9269177424517228, | |
| "grad_norm": 0.5849005579948425, | |
| "learning_rate": 3.400405106383176e-08, | |
| "loss": 0.3646, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 2.935452896617945, | |
| "grad_norm": 0.5933673977851868, | |
| "learning_rate": 2.630728125509796e-08, | |
| "loss": 0.3677, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 2.943988050784167, | |
| "grad_norm": 0.5976866483688354, | |
| "learning_rate": 1.9595663263448638e-08, | |
| "loss": 0.3722, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 2.9525232049503893, | |
| "grad_norm": 0.5706110596656799, | |
| "learning_rate": 1.3869860029165349e-08, | |
| "loss": 0.372, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 2.9610583591166115, | |
| "grad_norm": 0.5455855131149292, | |
| "learning_rate": 9.13043711864514e-09, | |
| "loss": 0.3636, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 2.9695935132828337, | |
| "grad_norm": 0.5781861543655396, | |
| "learning_rate": 5.3778626685385695e-09, | |
| "loss": 0.3659, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 2.978128667449056, | |
| "grad_norm": 0.5493679046630859, | |
| "learning_rate": 2.6125073395066867e-09, | |
| "loss": 0.3626, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 2.986663821615278, | |
| "grad_norm": 0.5861932039260864, | |
| "learning_rate": 8.346442796092202e-10, | |
| "loss": 0.3631, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 2.9951989757815003, | |
| "grad_norm": 0.5658364295959473, | |
| "learning_rate": 4.444909732836955e-11, | |
| "loss": 0.37, | |
| "step": 3510 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 3513, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500.0, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.1182668447261655e+18, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |