| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.5, |
| "eval_steps": 500, |
| "global_step": 3000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "grad_norm": 0.18099799752235413, |
| "learning_rate": 3e-06, |
| "loss": 1.2088, |
| "step": 10 |
| }, |
| { |
| "grad_norm": 0.20619013905525208, |
| "learning_rate": 6.333333333333334e-06, |
| "loss": 1.1961, |
| "step": 20 |
| }, |
| { |
| "grad_norm": 0.14481662213802338, |
| "learning_rate": 9.666666666666667e-06, |
| "loss": 1.1631, |
| "step": 30 |
| }, |
| { |
| "grad_norm": 0.14480064809322357, |
| "learning_rate": 1.3000000000000001e-05, |
| "loss": 1.12, |
| "step": 40 |
| }, |
| { |
| "grad_norm": 0.1346646249294281, |
| "learning_rate": 1.6333333333333335e-05, |
| "loss": 1.0839, |
| "step": 50 |
| }, |
| { |
| "grad_norm": 0.1372521072626114, |
| "learning_rate": 1.9666666666666666e-05, |
| "loss": 1.0649, |
| "step": 60 |
| }, |
| { |
| "grad_norm": 0.2891208529472351, |
| "learning_rate": 2.3000000000000003e-05, |
| "loss": 1.0489, |
| "step": 70 |
| }, |
| { |
| "grad_norm": 0.2914903163909912, |
| "learning_rate": 2.633333333333333e-05, |
| "loss": 1.0182, |
| "step": 80 |
| }, |
| { |
| "grad_norm": 0.4698174297809601, |
| "learning_rate": 2.9666666666666672e-05, |
| "loss": 0.9654, |
| "step": 90 |
| }, |
| { |
| "grad_norm": 0.8523975014686584, |
| "learning_rate": 3.3e-05, |
| "loss": 0.9062, |
| "step": 100 |
| }, |
| { |
| "grad_norm": 0.5535483956336975, |
| "learning_rate": 3.633333333333333e-05, |
| "loss": 0.8529, |
| "step": 110 |
| }, |
| { |
| "grad_norm": 0.6099287271499634, |
| "learning_rate": 3.966666666666667e-05, |
| "loss": 0.8047, |
| "step": 120 |
| }, |
| { |
| "grad_norm": 0.6395930051803589, |
| "learning_rate": 4.3e-05, |
| "loss": 0.7568, |
| "step": 130 |
| }, |
| { |
| "grad_norm": 0.7526710033416748, |
| "learning_rate": 4.633333333333333e-05, |
| "loss": 0.7168, |
| "step": 140 |
| }, |
| { |
| "grad_norm": 0.7764474153518677, |
| "learning_rate": 4.966666666666667e-05, |
| "loss": 0.6804, |
| "step": 150 |
| }, |
| { |
| "grad_norm": 0.9331451654434204, |
| "learning_rate": 5.300000000000001e-05, |
| "loss": 0.6504, |
| "step": 160 |
| }, |
| { |
| "grad_norm": 0.702415943145752, |
| "learning_rate": 5.633333333333334e-05, |
| "loss": 0.625, |
| "step": 170 |
| }, |
| { |
| "grad_norm": 1.1818166971206665, |
| "learning_rate": 5.966666666666667e-05, |
| "loss": 0.6007, |
| "step": 180 |
| }, |
| { |
| "grad_norm": 1.1777819395065308, |
| "learning_rate": 6.3e-05, |
| "loss": 0.5718, |
| "step": 190 |
| }, |
| { |
| "grad_norm": 1.0473010540008545, |
| "learning_rate": 6.633333333333334e-05, |
| "loss": 0.5543, |
| "step": 200 |
| }, |
| { |
| "grad_norm": 0.873248815536499, |
| "learning_rate": 6.966666666666668e-05, |
| "loss": 0.5387, |
| "step": 210 |
| }, |
| { |
| "grad_norm": 0.8804641962051392, |
| "learning_rate": 7.3e-05, |
| "loss": 0.5221, |
| "step": 220 |
| }, |
| { |
| "grad_norm": 0.915578305721283, |
| "learning_rate": 7.633333333333334e-05, |
| "loss": 0.5033, |
| "step": 230 |
| }, |
| { |
| "grad_norm": 1.316872000694275, |
| "learning_rate": 7.966666666666666e-05, |
| "loss": 0.4825, |
| "step": 240 |
| }, |
| { |
| "grad_norm": 1.1146589517593384, |
| "learning_rate": 8.3e-05, |
| "loss": 0.4626, |
| "step": 250 |
| }, |
| { |
| "grad_norm": 1.5606942176818848, |
| "learning_rate": 8.633333333333334e-05, |
| "loss": 0.4351, |
| "step": 260 |
| }, |
| { |
| "grad_norm": 1.0714901685714722, |
| "learning_rate": 8.966666666666666e-05, |
| "loss": 0.4128, |
| "step": 270 |
| }, |
| { |
| "grad_norm": 1.3251256942749023, |
| "learning_rate": 9.300000000000001e-05, |
| "loss": 0.3907, |
| "step": 280 |
| }, |
| { |
| "grad_norm": 1.0007801055908203, |
| "learning_rate": 9.633333333333335e-05, |
| "loss": 0.3783, |
| "step": 290 |
| }, |
| { |
| "grad_norm": 1.3502074480056763, |
| "learning_rate": 9.966666666666667e-05, |
| "loss": 0.3597, |
| "step": 300 |
| }, |
| { |
| "grad_norm": 1.1962398290634155, |
| "learning_rate": 9.999938485971279e-05, |
| "loss": 0.3383, |
| "step": 310 |
| }, |
| { |
| "grad_norm": 1.634523630142212, |
| "learning_rate": 9.999725846827562e-05, |
| "loss": 0.3226, |
| "step": 320 |
| }, |
| { |
| "grad_norm": 1.4946134090423584, |
| "learning_rate": 9.999361329594254e-05, |
| "loss": 0.2976, |
| "step": 330 |
| }, |
| { |
| "grad_norm": 1.2499020099639893, |
| "learning_rate": 9.998844945344405e-05, |
| "loss": 0.2673, |
| "step": 340 |
| }, |
| { |
| "grad_norm": 1.481104850769043, |
| "learning_rate": 9.99817670976436e-05, |
| "loss": 0.2459, |
| "step": 350 |
| }, |
| { |
| "grad_norm": 1.554062008857727, |
| "learning_rate": 9.997356643153303e-05, |
| "loss": 0.2272, |
| "step": 360 |
| }, |
| { |
| "grad_norm": 1.8656765222549438, |
| "learning_rate": 9.996384770422629e-05, |
| "loss": 0.2146, |
| "step": 370 |
| }, |
| { |
| "grad_norm": 1.0278624296188354, |
| "learning_rate": 9.995261121095194e-05, |
| "loss": 0.1967, |
| "step": 380 |
| }, |
| { |
| "grad_norm": 1.3932757377624512, |
| "learning_rate": 9.993985729304408e-05, |
| "loss": 0.1756, |
| "step": 390 |
| }, |
| { |
| "grad_norm": 1.618746042251587, |
| "learning_rate": 9.992558633793212e-05, |
| "loss": 0.1501, |
| "step": 400 |
| }, |
| { |
| "grad_norm": 1.1260430812835693, |
| "learning_rate": 9.990979877912891e-05, |
| "loss": 0.1335, |
| "step": 410 |
| }, |
| { |
| "grad_norm": 1.0491538047790527, |
| "learning_rate": 9.989249509621759e-05, |
| "loss": 0.1259, |
| "step": 420 |
| }, |
| { |
| "grad_norm": 1.284968614578247, |
| "learning_rate": 9.987367581483705e-05, |
| "loss": 0.122, |
| "step": 430 |
| }, |
| { |
| "grad_norm": 1.0914318561553955, |
| "learning_rate": 9.985334150666592e-05, |
| "loss": 0.1172, |
| "step": 440 |
| }, |
| { |
| "grad_norm": 1.1247401237487793, |
| "learning_rate": 9.983149278940526e-05, |
| "loss": 0.1097, |
| "step": 450 |
| }, |
| { |
| "grad_norm": 1.192862868309021, |
| "learning_rate": 9.980813032675974e-05, |
| "loss": 0.1, |
| "step": 460 |
| }, |
| { |
| "grad_norm": 0.8659683465957642, |
| "learning_rate": 9.978325482841753e-05, |
| "loss": 0.1027, |
| "step": 470 |
| }, |
| { |
| "grad_norm": 1.0352890491485596, |
| "learning_rate": 9.975686705002867e-05, |
| "loss": 0.1003, |
| "step": 480 |
| }, |
| { |
| "grad_norm": 1.2678529024124146, |
| "learning_rate": 9.972896779318219e-05, |
| "loss": 0.0971, |
| "step": 490 |
| }, |
| { |
| "grad_norm": 1.1098188161849976, |
| "learning_rate": 9.969955790538175e-05, |
| "loss": 0.0992, |
| "step": 500 |
| }, |
| { |
| "grad_norm": 1.181512713432312, |
| "learning_rate": 9.966863828001982e-05, |
| "loss": 0.0965, |
| "step": 510 |
| }, |
| { |
| "grad_norm": 1.0185215473175049, |
| "learning_rate": 9.963620985635065e-05, |
| "loss": 0.0877, |
| "step": 520 |
| }, |
| { |
| "grad_norm": 1.1182595491409302, |
| "learning_rate": 9.960227361946164e-05, |
| "loss": 0.0843, |
| "step": 530 |
| }, |
| { |
| "grad_norm": 0.9148073196411133, |
| "learning_rate": 9.95668306002435e-05, |
| "loss": 0.0923, |
| "step": 540 |
| }, |
| { |
| "grad_norm": 0.9969584941864014, |
| "learning_rate": 9.952988187535886e-05, |
| "loss": 0.0849, |
| "step": 550 |
| }, |
| { |
| "grad_norm": 1.2698750495910645, |
| "learning_rate": 9.949142856720961e-05, |
| "loss": 0.0835, |
| "step": 560 |
| }, |
| { |
| "grad_norm": 1.0291186571121216, |
| "learning_rate": 9.945147184390278e-05, |
| "loss": 0.0878, |
| "step": 570 |
| }, |
| { |
| "grad_norm": 0.9522308111190796, |
| "learning_rate": 9.941001291921512e-05, |
| "loss": 0.0823, |
| "step": 580 |
| }, |
| { |
| "grad_norm": 1.0067387819290161, |
| "learning_rate": 9.936705305255612e-05, |
| "loss": 0.084, |
| "step": 590 |
| }, |
| { |
| "grad_norm": 1.1268168687820435, |
| "learning_rate": 9.932259354892984e-05, |
| "loss": 0.0824, |
| "step": 600 |
| }, |
| { |
| "grad_norm": 0.8802523016929626, |
| "learning_rate": 9.927663575889521e-05, |
| "loss": 0.0792, |
| "step": 610 |
| }, |
| { |
| "grad_norm": 1.3417941331863403, |
| "learning_rate": 9.922918107852504e-05, |
| "loss": 0.0811, |
| "step": 620 |
| }, |
| { |
| "grad_norm": 1.097968578338623, |
| "learning_rate": 9.918023094936363e-05, |
| "loss": 0.077, |
| "step": 630 |
| }, |
| { |
| "grad_norm": 1.0577588081359863, |
| "learning_rate": 9.912978685838294e-05, |
| "loss": 0.0802, |
| "step": 640 |
| }, |
| { |
| "grad_norm": 1.1714197397232056, |
| "learning_rate": 9.90778503379374e-05, |
| "loss": 0.078, |
| "step": 650 |
| }, |
| { |
| "grad_norm": 0.8812937140464783, |
| "learning_rate": 9.902442296571743e-05, |
| "loss": 0.0708, |
| "step": 660 |
| }, |
| { |
| "grad_norm": 0.9380112886428833, |
| "learning_rate": 9.896950636470147e-05, |
| "loss": 0.0803, |
| "step": 670 |
| }, |
| { |
| "grad_norm": 1.1852452754974365, |
| "learning_rate": 9.891310220310666e-05, |
| "loss": 0.0757, |
| "step": 680 |
| }, |
| { |
| "grad_norm": 1.0475136041641235, |
| "learning_rate": 9.885521219433823e-05, |
| "loss": 0.0727, |
| "step": 690 |
| }, |
| { |
| "grad_norm": 1.1130269765853882, |
| "learning_rate": 9.879583809693738e-05, |
| "loss": 0.0711, |
| "step": 700 |
| }, |
| { |
| "grad_norm": 0.9928076863288879, |
| "learning_rate": 9.873498171452789e-05, |
| "loss": 0.0703, |
| "step": 710 |
| }, |
| { |
| "grad_norm": 0.979897141456604, |
| "learning_rate": 9.867264489576135e-05, |
| "loss": 0.0687, |
| "step": 720 |
| }, |
| { |
| "grad_norm": 1.0989563465118408, |
| "learning_rate": 9.860882953426099e-05, |
| "loss": 0.0747, |
| "step": 730 |
| }, |
| { |
| "grad_norm": 1.0301982164382935, |
| "learning_rate": 9.854353756856412e-05, |
| "loss": 0.0699, |
| "step": 740 |
| }, |
| { |
| "grad_norm": 1.101908802986145, |
| "learning_rate": 9.847677098206332e-05, |
| "loss": 0.069, |
| "step": 750 |
| }, |
| { |
| "grad_norm": 0.8733094334602356, |
| "learning_rate": 9.840853180294608e-05, |
| "loss": 0.0672, |
| "step": 760 |
| }, |
| { |
| "grad_norm": 1.0546811819076538, |
| "learning_rate": 9.833882210413332e-05, |
| "loss": 0.0706, |
| "step": 770 |
| }, |
| { |
| "grad_norm": 0.8678887486457825, |
| "learning_rate": 9.826764400321633e-05, |
| "loss": 0.0702, |
| "step": 780 |
| }, |
| { |
| "grad_norm": 0.8769698739051819, |
| "learning_rate": 9.819499966239243e-05, |
| "loss": 0.0678, |
| "step": 790 |
| }, |
| { |
| "grad_norm": 1.1157478094100952, |
| "learning_rate": 9.812089128839938e-05, |
| "loss": 0.0693, |
| "step": 800 |
| }, |
| { |
| "grad_norm": 1.0164200067520142, |
| "learning_rate": 9.804532113244828e-05, |
| "loss": 0.0624, |
| "step": 810 |
| }, |
| { |
| "grad_norm": 0.915485680103302, |
| "learning_rate": 9.796829149015517e-05, |
| "loss": 0.0647, |
| "step": 820 |
| }, |
| { |
| "grad_norm": 0.8830865025520325, |
| "learning_rate": 9.788980470147132e-05, |
| "loss": 0.0613, |
| "step": 830 |
| }, |
| { |
| "grad_norm": 1.0174789428710938, |
| "learning_rate": 9.780986315061218e-05, |
| "loss": 0.0641, |
| "step": 840 |
| }, |
| { |
| "grad_norm": 0.7468952536582947, |
| "learning_rate": 9.772846926598491e-05, |
| "loss": 0.0716, |
| "step": 850 |
| }, |
| { |
| "grad_norm": 0.7474204301834106, |
| "learning_rate": 9.76456255201146e-05, |
| "loss": 0.0636, |
| "step": 860 |
| }, |
| { |
| "grad_norm": 1.0349617004394531, |
| "learning_rate": 9.756133442956923e-05, |
| "loss": 0.0612, |
| "step": 870 |
| }, |
| { |
| "grad_norm": 0.8907390236854553, |
| "learning_rate": 9.747559855488313e-05, |
| "loss": 0.0656, |
| "step": 880 |
| }, |
| { |
| "grad_norm": 0.8625577092170715, |
| "learning_rate": 9.73884205004793e-05, |
| "loss": 0.0637, |
| "step": 890 |
| }, |
| { |
| "grad_norm": 0.895968496799469, |
| "learning_rate": 9.729980291459019e-05, |
| "loss": 0.0635, |
| "step": 900 |
| }, |
| { |
| "grad_norm": 0.9742909073829651, |
| "learning_rate": 9.720974848917735e-05, |
| "loss": 0.0596, |
| "step": 910 |
| }, |
| { |
| "grad_norm": 0.7080522775650024, |
| "learning_rate": 9.711825995984957e-05, |
| "loss": 0.0604, |
| "step": 920 |
| }, |
| { |
| "grad_norm": 0.7485001087188721, |
| "learning_rate": 9.702534010577991e-05, |
| "loss": 0.0627, |
| "step": 930 |
| }, |
| { |
| "grad_norm": 0.8010299801826477, |
| "learning_rate": 9.693099174962103e-05, |
| "loss": 0.0584, |
| "step": 940 |
| }, |
| { |
| "grad_norm": 0.8207157254219055, |
| "learning_rate": 9.683521775741977e-05, |
| "loss": 0.0606, |
| "step": 950 |
| }, |
| { |
| "grad_norm": 0.7718232870101929, |
| "learning_rate": 9.673802103852979e-05, |
| "loss": 0.0586, |
| "step": 960 |
| }, |
| { |
| "grad_norm": 1.0081161260604858, |
| "learning_rate": 9.663940454552342e-05, |
| "loss": 0.0595, |
| "step": 970 |
| }, |
| { |
| "grad_norm": 0.8325558304786682, |
| "learning_rate": 9.65393712741018e-05, |
| "loss": 0.0581, |
| "step": 980 |
| }, |
| { |
| "grad_norm": 0.9128422737121582, |
| "learning_rate": 9.6437924263004e-05, |
| "loss": 0.0613, |
| "step": 990 |
| }, |
| { |
| "grad_norm": 0.8013613224029541, |
| "learning_rate": 9.63350665939146e-05, |
| "loss": 0.059, |
| "step": 1000 |
| }, |
| { |
| "grad_norm": 0.8024020791053772, |
| "learning_rate": 9.623080139137023e-05, |
| "loss": 0.0585, |
| "step": 1010 |
| }, |
| { |
| "grad_norm": 0.8608654737472534, |
| "learning_rate": 9.612513182266447e-05, |
| "loss": 0.0573, |
| "step": 1020 |
| }, |
| { |
| "grad_norm": 0.7992358803749084, |
| "learning_rate": 9.601806109775179e-05, |
| "loss": 0.0588, |
| "step": 1030 |
| }, |
| { |
| "grad_norm": 0.9951710104942322, |
| "learning_rate": 9.590959246914995e-05, |
| "loss": 0.0549, |
| "step": 1040 |
| }, |
| { |
| "grad_norm": 0.7153400778770447, |
| "learning_rate": 9.579972923184122e-05, |
| "loss": 0.0581, |
| "step": 1050 |
| }, |
| { |
| "grad_norm": 0.8274824023246765, |
| "learning_rate": 9.568847472317232e-05, |
| "loss": 0.0528, |
| "step": 1060 |
| }, |
| { |
| "grad_norm": 0.6790134906768799, |
| "learning_rate": 9.557583232275303e-05, |
| "loss": 0.0554, |
| "step": 1070 |
| }, |
| { |
| "grad_norm": 0.7177821397781372, |
| "learning_rate": 9.546180545235344e-05, |
| "loss": 0.0525, |
| "step": 1080 |
| }, |
| { |
| "grad_norm": 0.8989811539649963, |
| "learning_rate": 9.534639757580013e-05, |
| "loss": 0.0515, |
| "step": 1090 |
| }, |
| { |
| "grad_norm": 0.8031622767448425, |
| "learning_rate": 9.522961219887092e-05, |
| "loss": 0.0564, |
| "step": 1100 |
| }, |
| { |
| "grad_norm": 0.8315763473510742, |
| "learning_rate": 9.511145286918828e-05, |
| "loss": 0.0567, |
| "step": 1110 |
| }, |
| { |
| "grad_norm": 0.7631978988647461, |
| "learning_rate": 9.499192317611167e-05, |
| "loss": 0.0524, |
| "step": 1120 |
| }, |
| { |
| "grad_norm": 0.8047354817390442, |
| "learning_rate": 9.487102675062851e-05, |
| "loss": 0.0563, |
| "step": 1130 |
| }, |
| { |
| "grad_norm": 0.5823233127593994, |
| "learning_rate": 9.474876726524374e-05, |
| "loss": 0.0507, |
| "step": 1140 |
| }, |
| { |
| "grad_norm": 0.7840980887413025, |
| "learning_rate": 9.462514843386845e-05, |
| "loss": 0.0522, |
| "step": 1150 |
| }, |
| { |
| "grad_norm": 0.7950931191444397, |
| "learning_rate": 9.450017401170689e-05, |
| "loss": 0.0544, |
| "step": 1160 |
| }, |
| { |
| "grad_norm": 0.7812637090682983, |
| "learning_rate": 9.437384779514256e-05, |
| "loss": 0.0538, |
| "step": 1170 |
| }, |
| { |
| "grad_norm": 0.8743076920509338, |
| "learning_rate": 9.424617362162271e-05, |
| "loss": 0.0551, |
| "step": 1180 |
| }, |
| { |
| "grad_norm": 0.7778111100196838, |
| "learning_rate": 9.411715536954196e-05, |
| "loss": 0.0515, |
| "step": 1190 |
| }, |
| { |
| "grad_norm": 0.799373209476471, |
| "learning_rate": 9.39867969581243e-05, |
| "loss": 0.0514, |
| "step": 1200 |
| }, |
| { |
| "grad_norm": 0.7291685342788696, |
| "learning_rate": 9.385510234730415e-05, |
| "loss": 0.0524, |
| "step": 1210 |
| }, |
| { |
| "grad_norm": 0.830124020576477, |
| "learning_rate": 9.372207553760603e-05, |
| "loss": 0.0506, |
| "step": 1220 |
| }, |
| { |
| "grad_norm": 0.6252336502075195, |
| "learning_rate": 9.358772057002312e-05, |
| "loss": 0.0502, |
| "step": 1230 |
| }, |
| { |
| "grad_norm": 0.7802227735519409, |
| "learning_rate": 9.345204152589428e-05, |
| "loss": 0.0505, |
| "step": 1240 |
| }, |
| { |
| "grad_norm": 0.7094554901123047, |
| "learning_rate": 9.331504252678037e-05, |
| "loss": 0.0537, |
| "step": 1250 |
| }, |
| { |
| "grad_norm": 0.7272769808769226, |
| "learning_rate": 9.317672773433876e-05, |
| "loss": 0.0506, |
| "step": 1260 |
| }, |
| { |
| "grad_norm": 0.6488326191902161, |
| "learning_rate": 9.30371013501972e-05, |
| "loss": 0.0484, |
| "step": 1270 |
| }, |
| { |
| "grad_norm": 0.6355553865432739, |
| "learning_rate": 9.289616761582587e-05, |
| "loss": 0.0544, |
| "step": 1280 |
| }, |
| { |
| "grad_norm": 0.769917905330658, |
| "learning_rate": 9.275393081240882e-05, |
| "loss": 0.048, |
| "step": 1290 |
| }, |
| { |
| "grad_norm": 0.6785501837730408, |
| "learning_rate": 9.261039526071374e-05, |
| "loss": 0.0484, |
| "step": 1300 |
| }, |
| { |
| "grad_norm": 0.7995139360427856, |
| "learning_rate": 9.246556532096078e-05, |
| "loss": 0.05, |
| "step": 1310 |
| }, |
| { |
| "grad_norm": 0.7101492285728455, |
| "learning_rate": 9.231944539269009e-05, |
| "loss": 0.0499, |
| "step": 1320 |
| }, |
| { |
| "grad_norm": 0.6292925477027893, |
| "learning_rate": 9.217203991462815e-05, |
| "loss": 0.0499, |
| "step": 1330 |
| }, |
| { |
| "grad_norm": 0.6308528780937195, |
| "learning_rate": 9.202335336455296e-05, |
| "loss": 0.049, |
| "step": 1340 |
| }, |
| { |
| "grad_norm": 0.6749176979064941, |
| "learning_rate": 9.187339025915802e-05, |
| "loss": 0.0488, |
| "step": 1350 |
| }, |
| { |
| "grad_norm": 0.5973607897758484, |
| "learning_rate": 9.17221551539151e-05, |
| "loss": 0.0475, |
| "step": 1360 |
| }, |
| { |
| "grad_norm": 0.6545643210411072, |
| "learning_rate": 9.156965264293586e-05, |
| "loss": 0.0476, |
| "step": 1370 |
| }, |
| { |
| "grad_norm": 0.6092913746833801, |
| "learning_rate": 9.141588735883232e-05, |
| "loss": 0.0433, |
| "step": 1380 |
| }, |
| { |
| "grad_norm": 0.5947241187095642, |
| "learning_rate": 9.126086397257612e-05, |
| "loss": 0.0471, |
| "step": 1390 |
| }, |
| { |
| "grad_norm": 0.5612359046936035, |
| "learning_rate": 9.110458719335659e-05, |
| "loss": 0.0463, |
| "step": 1400 |
| }, |
| { |
| "grad_norm": 0.654656708240509, |
| "learning_rate": 9.094706176843777e-05, |
| "loss": 0.0486, |
| "step": 1410 |
| }, |
| { |
| "grad_norm": 0.7321748733520508, |
| "learning_rate": 9.078829248301417e-05, |
| "loss": 0.0451, |
| "step": 1420 |
| }, |
| { |
| "grad_norm": 0.7481226325035095, |
| "learning_rate": 9.062828416006539e-05, |
| "loss": 0.0503, |
| "step": 1430 |
| }, |
| { |
| "grad_norm": 0.6706563234329224, |
| "learning_rate": 9.046704166020961e-05, |
| "loss": 0.0472, |
| "step": 1440 |
| }, |
| { |
| "grad_norm": 0.6942538619041443, |
| "learning_rate": 9.030456988155596e-05, |
| "loss": 0.0462, |
| "step": 1450 |
| }, |
| { |
| "grad_norm": 0.65287184715271, |
| "learning_rate": 9.014087375955573e-05, |
| "loss": 0.0469, |
| "step": 1460 |
| }, |
| { |
| "grad_norm": 0.7019280195236206, |
| "learning_rate": 8.997595826685243e-05, |
| "loss": 0.0514, |
| "step": 1470 |
| }, |
| { |
| "grad_norm": 0.6150776147842407, |
| "learning_rate": 8.980982841313074e-05, |
| "loss": 0.0466, |
| "step": 1480 |
| }, |
| { |
| "grad_norm": 0.784782350063324, |
| "learning_rate": 8.964248924496435e-05, |
| "loss": 0.0434, |
| "step": 1490 |
| }, |
| { |
| "grad_norm": 0.6784024834632874, |
| "learning_rate": 8.947394584566258e-05, |
| "loss": 0.0438, |
| "step": 1500 |
| }, |
| { |
| "grad_norm": 0.5981051921844482, |
| "learning_rate": 8.930420333511606e-05, |
| "loss": 0.0427, |
| "step": 1510 |
| }, |
| { |
| "grad_norm": 0.7331579923629761, |
| "learning_rate": 8.913326686964117e-05, |
| "loss": 0.0432, |
| "step": 1520 |
| }, |
| { |
| "grad_norm": 0.6730307936668396, |
| "learning_rate": 8.89611416418234e-05, |
| "loss": 0.0424, |
| "step": 1530 |
| }, |
| { |
| "grad_norm": 0.5771387219429016, |
| "learning_rate": 8.878783288035957e-05, |
| "loss": 0.0432, |
| "step": 1540 |
| }, |
| { |
| "grad_norm": 0.7928068041801453, |
| "learning_rate": 8.86133458498991e-05, |
| "loss": 0.0475, |
| "step": 1550 |
| }, |
| { |
| "grad_norm": 0.6628245115280151, |
| "learning_rate": 8.843768585088393e-05, |
| "loss": 0.0432, |
| "step": 1560 |
| }, |
| { |
| "grad_norm": 0.7262830138206482, |
| "learning_rate": 8.82608582193877e-05, |
| "loss": 0.0451, |
| "step": 1570 |
| }, |
| { |
| "grad_norm": 0.6896581649780273, |
| "learning_rate": 8.80828683269535e-05, |
| "loss": 0.0429, |
| "step": 1580 |
| }, |
| { |
| "grad_norm": 0.6019271016120911, |
| "learning_rate": 8.790372158043074e-05, |
| "loss": 0.0416, |
| "step": 1590 |
| }, |
| { |
| "grad_norm": 0.6586809754371643, |
| "learning_rate": 8.772342342181095e-05, |
| "loss": 0.0435, |
| "step": 1600 |
| }, |
| { |
| "grad_norm": 0.741075336933136, |
| "learning_rate": 8.75419793280624e-05, |
| "loss": 0.0428, |
| "step": 1610 |
| }, |
| { |
| "grad_norm": 0.7138071656227112, |
| "learning_rate": 8.735939481096378e-05, |
| "loss": 0.0415, |
| "step": 1620 |
| }, |
| { |
| "grad_norm": 0.665623128414154, |
| "learning_rate": 8.717567541693673e-05, |
| "loss": 0.0437, |
| "step": 1630 |
| }, |
| { |
| "grad_norm": 0.6723113059997559, |
| "learning_rate": 8.699082672687734e-05, |
| "loss": 0.0442, |
| "step": 1640 |
| }, |
| { |
| "grad_norm": 0.5757609605789185, |
| "learning_rate": 8.680485435598673e-05, |
| "loss": 0.0473, |
| "step": 1650 |
| }, |
| { |
| "grad_norm": 0.646248459815979, |
| "learning_rate": 8.661776395360029e-05, |
| "loss": 0.0443, |
| "step": 1660 |
| }, |
| { |
| "grad_norm": 0.7440095543861389, |
| "learning_rate": 8.642956120301626e-05, |
| "loss": 0.0414, |
| "step": 1670 |
| }, |
| { |
| "grad_norm": 0.6682982444763184, |
| "learning_rate": 8.624025182132292e-05, |
| "loss": 0.042, |
| "step": 1680 |
| }, |
| { |
| "grad_norm": 0.6209063529968262, |
| "learning_rate": 8.604984155922506e-05, |
| "loss": 0.0422, |
| "step": 1690 |
| }, |
| { |
| "grad_norm": 0.6250181198120117, |
| "learning_rate": 8.585833620086918e-05, |
| "loss": 0.042, |
| "step": 1700 |
| }, |
| { |
| "grad_norm": 0.709252655506134, |
| "learning_rate": 8.566574156366784e-05, |
| "loss": 0.0369, |
| "step": 1710 |
| }, |
| { |
| "grad_norm": 0.783593475818634, |
| "learning_rate": 8.547206349812298e-05, |
| "loss": 0.0445, |
| "step": 1720 |
| }, |
| { |
| "grad_norm": 0.5931394100189209, |
| "learning_rate": 8.527730788764805e-05, |
| "loss": 0.0449, |
| "step": 1730 |
| }, |
| { |
| "grad_norm": 0.5985734462738037, |
| "learning_rate": 8.508148064838948e-05, |
| "loss": 0.0412, |
| "step": 1740 |
| }, |
| { |
| "grad_norm": 0.528599739074707, |
| "learning_rate": 8.488458772904684e-05, |
| "loss": 0.0398, |
| "step": 1750 |
| }, |
| { |
| "grad_norm": 0.6593722701072693, |
| "learning_rate": 8.468663511069217e-05, |
| "loss": 0.0408, |
| "step": 1760 |
| }, |
| { |
| "grad_norm": 0.5931499600410461, |
| "learning_rate": 8.448762880658825e-05, |
| "loss": 0.0414, |
| "step": 1770 |
| }, |
| { |
| "grad_norm": 0.5673992037773132, |
| "learning_rate": 8.428757486200603e-05, |
| "loss": 0.041, |
| "step": 1780 |
| }, |
| { |
| "grad_norm": 0.7802947759628296, |
| "learning_rate": 8.40864793540409e-05, |
| "loss": 0.0421, |
| "step": 1790 |
| }, |
| { |
| "grad_norm": 0.5950642228126526, |
| "learning_rate": 8.388434839142813e-05, |
| "loss": 0.0424, |
| "step": 1800 |
| }, |
| { |
| "grad_norm": 0.6841787099838257, |
| "learning_rate": 8.368118811435726e-05, |
| "loss": 0.0391, |
| "step": 1810 |
| }, |
| { |
| "grad_norm": 0.5789716839790344, |
| "learning_rate": 8.347700469428564e-05, |
| "loss": 0.0386, |
| "step": 1820 |
| }, |
| { |
| "grad_norm": 0.6306881904602051, |
| "learning_rate": 8.327180433375091e-05, |
| "loss": 0.0404, |
| "step": 1830 |
| }, |
| { |
| "grad_norm": 0.5804703831672668, |
| "learning_rate": 8.306559326618259e-05, |
| "loss": 0.0392, |
| "step": 1840 |
| }, |
| { |
| "grad_norm": 0.6453599333763123, |
| "learning_rate": 8.285837775571276e-05, |
| "loss": 0.0398, |
| "step": 1850 |
| }, |
| { |
| "grad_norm": 0.5413265228271484, |
| "learning_rate": 8.265016409698573e-05, |
| "loss": 0.0389, |
| "step": 1860 |
| }, |
| { |
| "grad_norm": 0.5259561538696289, |
| "learning_rate": 8.244095861496686e-05, |
| "loss": 0.0389, |
| "step": 1870 |
| }, |
| { |
| "grad_norm": 0.6392974853515625, |
| "learning_rate": 8.223076766475035e-05, |
| "loss": 0.0404, |
| "step": 1880 |
| }, |
| { |
| "grad_norm": 0.7087792754173279, |
| "learning_rate": 8.201959763136633e-05, |
| "loss": 0.0388, |
| "step": 1890 |
| }, |
| { |
| "grad_norm": 0.7540794610977173, |
| "learning_rate": 8.180745492958674e-05, |
| "loss": 0.0419, |
| "step": 1900 |
| }, |
| { |
| "grad_norm": 0.5628899335861206, |
| "learning_rate": 8.159434600373061e-05, |
| "loss": 0.0375, |
| "step": 1910 |
| }, |
| { |
| "grad_norm": 0.5828471779823303, |
| "learning_rate": 8.138027732746818e-05, |
| "loss": 0.0394, |
| "step": 1920 |
| }, |
| { |
| "grad_norm": 0.6918069124221802, |
| "learning_rate": 8.116525540362434e-05, |
| "loss": 0.0377, |
| "step": 1930 |
| }, |
| { |
| "grad_norm": 0.5691211819648743, |
| "learning_rate": 8.094928676398101e-05, |
| "loss": 0.0389, |
| "step": 1940 |
| }, |
| { |
| "grad_norm": 0.5968996286392212, |
| "learning_rate": 8.073237796907882e-05, |
| "loss": 0.0361, |
| "step": 1950 |
| }, |
| { |
| "grad_norm": 0.5921427011489868, |
| "learning_rate": 8.051453560801772e-05, |
| "loss": 0.0433, |
| "step": 1960 |
| }, |
| { |
| "grad_norm": 0.5701543688774109, |
| "learning_rate": 8.029576629825687e-05, |
| "loss": 0.0368, |
| "step": 1970 |
| }, |
| { |
| "grad_norm": 0.6130271553993225, |
| "learning_rate": 8.007607668541362e-05, |
| "loss": 0.0395, |
| "step": 1980 |
| }, |
| { |
| "grad_norm": 0.6060221195220947, |
| "learning_rate": 7.985547344306161e-05, |
| "loss": 0.0438, |
| "step": 1990 |
| }, |
| { |
| "grad_norm": 0.709045946598053, |
| "learning_rate": 7.963396327252812e-05, |
| "loss": 0.0414, |
| "step": 2000 |
| }, |
| { |
| "grad_norm": 0.6804901361465454, |
| "learning_rate": 7.941155290269038e-05, |
| "loss": 0.0394, |
| "step": 2010 |
| }, |
| { |
| "grad_norm": 0.5408011078834534, |
| "learning_rate": 7.918824908977123e-05, |
| "loss": 0.0367, |
| "step": 2020 |
| }, |
| { |
| "grad_norm": 0.554338812828064, |
| "learning_rate": 7.896405861713394e-05, |
| "loss": 0.0356, |
| "step": 2030 |
| }, |
| { |
| "grad_norm": 0.711392879486084, |
| "learning_rate": 7.873898829507606e-05, |
| "loss": 0.0371, |
| "step": 2040 |
| }, |
| { |
| "grad_norm": 0.6779384613037109, |
| "learning_rate": 7.851304496062254e-05, |
| "loss": 0.038, |
| "step": 2050 |
| }, |
| { |
| "grad_norm": 0.6775013208389282, |
| "learning_rate": 7.828623547731818e-05, |
| "loss": 0.038, |
| "step": 2060 |
| }, |
| { |
| "grad_norm": 0.5738393664360046, |
| "learning_rate": 7.80585667350189e-05, |
| "loss": 0.0388, |
| "step": 2070 |
| }, |
| { |
| "grad_norm": 0.5050686001777649, |
| "learning_rate": 7.783004564968263e-05, |
| "loss": 0.0381, |
| "step": 2080 |
| }, |
| { |
| "grad_norm": 0.6223453283309937, |
| "learning_rate": 7.760067916315921e-05, |
| "loss": 0.0382, |
| "step": 2090 |
| }, |
| { |
| "grad_norm": 0.6240858435630798, |
| "learning_rate": 7.737047424297941e-05, |
| "loss": 0.0345, |
| "step": 2100 |
| }, |
| { |
| "grad_norm": 0.5866036415100098, |
| "learning_rate": 7.713943788214337e-05, |
| "loss": 0.0341, |
| "step": 2110 |
| }, |
| { |
| "grad_norm": 0.6695197224617004, |
| "learning_rate": 7.690757709890812e-05, |
| "loss": 0.0354, |
| "step": 2120 |
| }, |
| { |
| "grad_norm": 0.5520651340484619, |
| "learning_rate": 7.66748989365744e-05, |
| "loss": 0.0366, |
| "step": 2130 |
| }, |
| { |
| "grad_norm": 0.5425397157669067, |
| "learning_rate": 7.644141046327271e-05, |
| "loss": 0.0339, |
| "step": 2140 |
| }, |
| { |
| "grad_norm": 0.5396847128868103, |
| "learning_rate": 7.620711877174866e-05, |
| "loss": 0.037, |
| "step": 2150 |
| }, |
| { |
| "grad_norm": 0.633583128452301, |
| "learning_rate": 7.597203097914732e-05, |
| "loss": 0.0358, |
| "step": 2160 |
| }, |
| { |
| "grad_norm": 0.5030661821365356, |
| "learning_rate": 7.573615422679726e-05, |
| "loss": 0.0372, |
| "step": 2170 |
| }, |
| { |
| "grad_norm": 0.7198052406311035, |
| "learning_rate": 7.549949567999345e-05, |
| "loss": 0.0344, |
| "step": 2180 |
| }, |
| { |
| "grad_norm": 0.5248534679412842, |
| "learning_rate": 7.526206252777968e-05, |
| "loss": 0.0382, |
| "step": 2190 |
| }, |
| { |
| "grad_norm": 0.6668030619621277, |
| "learning_rate": 7.50238619827301e-05, |
| "loss": 0.0375, |
| "step": 2200 |
| }, |
| { |
| "grad_norm": 0.6512902975082397, |
| "learning_rate": 7.478490128073022e-05, |
| "loss": 0.0365, |
| "step": 2210 |
| }, |
| { |
| "grad_norm": 0.5244461894035339, |
| "learning_rate": 7.454518768075704e-05, |
| "loss": 0.0369, |
| "step": 2220 |
| }, |
| { |
| "grad_norm": 0.5693942308425903, |
| "learning_rate": 7.430472846465856e-05, |
| "loss": 0.0344, |
| "step": 2230 |
| }, |
| { |
| "grad_norm": 0.6084948182106018, |
| "learning_rate": 7.406353093693253e-05, |
| "loss": 0.035, |
| "step": 2240 |
| }, |
| { |
| "grad_norm": 0.536939263343811, |
| "learning_rate": 7.382160242450469e-05, |
| "loss": 0.0356, |
| "step": 2250 |
| }, |
| { |
| "grad_norm": 0.5331522226333618, |
| "learning_rate": 7.357895027650598e-05, |
| "loss": 0.031, |
| "step": 2260 |
| }, |
| { |
| "grad_norm": 0.45928630232810974, |
| "learning_rate": 7.333558186404958e-05, |
| "loss": 0.0327, |
| "step": 2270 |
| }, |
| { |
| "grad_norm": 0.528089165687561, |
| "learning_rate": 7.309150458000668e-05, |
| "loss": 0.0359, |
| "step": 2280 |
| }, |
| { |
| "grad_norm": 0.581315279006958, |
| "learning_rate": 7.284672583878219e-05, |
| "loss": 0.0343, |
| "step": 2290 |
| }, |
| { |
| "grad_norm": 0.558525800704956, |
| "learning_rate": 7.260125307608929e-05, |
| "loss": 0.0367, |
| "step": 2300 |
| }, |
| { |
| "grad_norm": 0.47152066230773926, |
| "learning_rate": 7.235509374872373e-05, |
| "loss": 0.035, |
| "step": 2310 |
| }, |
| { |
| "grad_norm": 0.6010111570358276, |
| "learning_rate": 7.210825533433719e-05, |
| "loss": 0.0335, |
| "step": 2320 |
| }, |
| { |
| "grad_norm": 0.6121062636375427, |
| "learning_rate": 7.186074533121013e-05, |
| "loss": 0.0336, |
| "step": 2330 |
| }, |
| { |
| "grad_norm": 0.5350408554077148, |
| "learning_rate": 7.161257125802413e-05, |
| "loss": 0.0353, |
| "step": 2340 |
| }, |
| { |
| "grad_norm": 0.5239009857177734, |
| "learning_rate": 7.136374065363334e-05, |
| "loss": 0.037, |
| "step": 2350 |
| }, |
| { |
| "grad_norm": 0.4956763982772827, |
| "learning_rate": 7.11142610768356e-05, |
| "loss": 0.0354, |
| "step": 2360 |
| }, |
| { |
| "grad_norm": 0.5018975138664246, |
| "learning_rate": 7.086414010614276e-05, |
| "loss": 0.0338, |
| "step": 2370 |
| }, |
| { |
| "grad_norm": 0.4875252842903137, |
| "learning_rate": 7.061338533955043e-05, |
| "loss": 0.0362, |
| "step": 2380 |
| }, |
| { |
| "grad_norm": 0.47811827063560486, |
| "learning_rate": 7.036200439430725e-05, |
| "loss": 0.0376, |
| "step": 2390 |
| }, |
| { |
| "grad_norm": 0.5614078044891357, |
| "learning_rate": 7.01100049066835e-05, |
| "loss": 0.0339, |
| "step": 2400 |
| }, |
| { |
| "grad_norm": 0.6021232008934021, |
| "learning_rate": 6.985739453173903e-05, |
| "loss": 0.0372, |
| "step": 2410 |
| }, |
| { |
| "grad_norm": 0.5856548547744751, |
| "learning_rate": 6.960418094309085e-05, |
| "loss": 0.0353, |
| "step": 2420 |
| }, |
| { |
| "grad_norm": 0.46249493956565857, |
| "learning_rate": 6.93503718326799e-05, |
| "loss": 0.0334, |
| "step": 2430 |
| }, |
| { |
| "grad_norm": 0.5227417945861816, |
| "learning_rate": 6.909597491053751e-05, |
| "loss": 0.0342, |
| "step": 2440 |
| }, |
| { |
| "grad_norm": 0.607357382774353, |
| "learning_rate": 6.884099790455113e-05, |
| "loss": 0.0324, |
| "step": 2450 |
| }, |
| { |
| "grad_norm": 0.485953152179718, |
| "learning_rate": 6.858544856022952e-05, |
| "loss": 0.0348, |
| "step": 2460 |
| }, |
| { |
| "grad_norm": 0.571148157119751, |
| "learning_rate": 6.83293346404676e-05, |
| "loss": 0.0347, |
| "step": 2470 |
| }, |
| { |
| "grad_norm": 0.5217222571372986, |
| "learning_rate": 6.80726639253105e-05, |
| "loss": 0.0368, |
| "step": 2480 |
| }, |
| { |
| "grad_norm": 0.4487457573413849, |
| "learning_rate": 6.781544421171732e-05, |
| "loss": 0.0355, |
| "step": 2490 |
| }, |
| { |
| "grad_norm": 0.47729650139808655, |
| "learning_rate": 6.755768331332424e-05, |
| "loss": 0.0343, |
| "step": 2500 |
| }, |
| { |
| "grad_norm": 0.4894144535064697, |
| "learning_rate": 6.729938906020713e-05, |
| "loss": 0.0353, |
| "step": 2510 |
| }, |
| { |
| "grad_norm": 0.544179379940033, |
| "learning_rate": 6.704056929864376e-05, |
| "loss": 0.0331, |
| "step": 2520 |
| }, |
| { |
| "grad_norm": 0.6115988492965698, |
| "learning_rate": 6.67812318908754e-05, |
| "loss": 0.0326, |
| "step": 2530 |
| }, |
| { |
| "grad_norm": 0.5752000212669373, |
| "learning_rate": 6.6521384714868e-05, |
| "loss": 0.0312, |
| "step": 2540 |
| }, |
| { |
| "grad_norm": 0.47528618574142456, |
| "learning_rate": 6.626103566407295e-05, |
| "loss": 0.0331, |
| "step": 2550 |
| }, |
| { |
| "grad_norm": 0.5542522072792053, |
| "learning_rate": 6.600019264718713e-05, |
| "loss": 0.0327, |
| "step": 2560 |
| }, |
| { |
| "grad_norm": 0.5280784368515015, |
| "learning_rate": 6.573886358791285e-05, |
| "loss": 0.0347, |
| "step": 2570 |
| }, |
| { |
| "grad_norm": 0.5374977588653564, |
| "learning_rate": 6.547705642471703e-05, |
| "loss": 0.0331, |
| "step": 2580 |
| }, |
| { |
| "grad_norm": 0.3995784521102905, |
| "learning_rate": 6.521477911059008e-05, |
| "loss": 0.0287, |
| "step": 2590 |
| }, |
| { |
| "grad_norm": 0.43667104840278625, |
| "learning_rate": 6.495203961280434e-05, |
| "loss": 0.0327, |
| "step": 2600 |
| }, |
| { |
| "grad_norm": 0.5405910611152649, |
| "learning_rate": 6.468884591267204e-05, |
| "loss": 0.0325, |
| "step": 2610 |
| }, |
| { |
| "grad_norm": 0.46785178780555725, |
| "learning_rate": 6.44252060053028e-05, |
| "loss": 0.0318, |
| "step": 2620 |
| }, |
| { |
| "grad_norm": 0.45796945691108704, |
| "learning_rate": 6.416112789936086e-05, |
| "loss": 0.0331, |
| "step": 2630 |
| }, |
| { |
| "grad_norm": 0.4898403286933899, |
| "learning_rate": 6.389661961682173e-05, |
| "loss": 0.0317, |
| "step": 2640 |
| }, |
| { |
| "grad_norm": 0.5258901119232178, |
| "learning_rate": 6.363168919272846e-05, |
| "loss": 0.0317, |
| "step": 2650 |
| }, |
| { |
| "grad_norm": 0.492632120847702, |
| "learning_rate": 6.336634467494768e-05, |
| "loss": 0.0306, |
| "step": 2660 |
| }, |
| { |
| "grad_norm": 0.5009192824363708, |
| "learning_rate": 6.310059412392505e-05, |
| "loss": 0.0304, |
| "step": 2670 |
| }, |
| { |
| "grad_norm": 0.6297652721405029, |
| "learning_rate": 6.283444561244042e-05, |
| "loss": 0.0304, |
| "step": 2680 |
| }, |
| { |
| "grad_norm": 0.4868377149105072, |
| "learning_rate": 6.256790722536251e-05, |
| "loss": 0.0313, |
| "step": 2690 |
| }, |
| { |
| "grad_norm": 0.5541006922721863, |
| "learning_rate": 6.230098705940354e-05, |
| "loss": 0.0316, |
| "step": 2700 |
| }, |
| { |
| "grad_norm": 0.42766621708869934, |
| "learning_rate": 6.203369322287306e-05, |
| "loss": 0.0327, |
| "step": 2710 |
| }, |
| { |
| "grad_norm": 0.5170658826828003, |
| "learning_rate": 6.17660338354317e-05, |
| "loss": 0.0293, |
| "step": 2720 |
| }, |
| { |
| "grad_norm": 0.4898792505264282, |
| "learning_rate": 6.149801702784456e-05, |
| "loss": 0.0288, |
| "step": 2730 |
| }, |
| { |
| "grad_norm": 0.4858188033103943, |
| "learning_rate": 6.122965094173424e-05, |
| "loss": 0.031, |
| "step": 2740 |
| }, |
| { |
| "grad_norm": 0.5073441863059998, |
| "learning_rate": 6.0960943729333374e-05, |
| "loss": 0.034, |
| "step": 2750 |
| }, |
| { |
| "grad_norm": 0.4941282570362091, |
| "learning_rate": 6.069190355323717e-05, |
| "loss": 0.0305, |
| "step": 2760 |
| }, |
| { |
| "grad_norm": 0.4680149257183075, |
| "learning_rate": 6.042253858615532e-05, |
| "loss": 0.0308, |
| "step": 2770 |
| }, |
| { |
| "grad_norm": 0.4339468777179718, |
| "learning_rate": 6.015285701066382e-05, |
| "loss": 0.0333, |
| "step": 2780 |
| }, |
| { |
| "grad_norm": 0.46258655190467834, |
| "learning_rate": 5.988286701895631e-05, |
| "loss": 0.0349, |
| "step": 2790 |
| }, |
| { |
| "grad_norm": 0.490296870470047, |
| "learning_rate": 5.961257681259535e-05, |
| "loss": 0.0343, |
| "step": 2800 |
| }, |
| { |
| "grad_norm": 0.5121153593063354, |
| "learning_rate": 5.934199460226317e-05, |
| "loss": 0.0332, |
| "step": 2810 |
| }, |
| { |
| "grad_norm": 0.4576858878135681, |
| "learning_rate": 5.9071128607512285e-05, |
| "loss": 0.0308, |
| "step": 2820 |
| }, |
| { |
| "grad_norm": 0.4811716675758362, |
| "learning_rate": 5.8799987056515804e-05, |
| "loss": 0.0304, |
| "step": 2830 |
| }, |
| { |
| "grad_norm": 0.6127643585205078, |
| "learning_rate": 5.8528578185817514e-05, |
| "loss": 0.0318, |
| "step": 2840 |
| }, |
| { |
| "grad_norm": 0.48503780364990234, |
| "learning_rate": 5.825691024008162e-05, |
| "loss": 0.0294, |
| "step": 2850 |
| }, |
| { |
| "grad_norm": 0.555530846118927, |
| "learning_rate": 5.798499147184233e-05, |
| "loss": 0.0307, |
| "step": 2860 |
| }, |
| { |
| "grad_norm": 0.562579870223999, |
| "learning_rate": 5.771283014125317e-05, |
| "loss": 0.0338, |
| "step": 2870 |
| }, |
| { |
| "grad_norm": 0.5244818925857544, |
| "learning_rate": 5.7440434515836064e-05, |
| "loss": 0.0284, |
| "step": 2880 |
| }, |
| { |
| "grad_norm": 0.3919405937194824, |
| "learning_rate": 5.7167812870230094e-05, |
| "loss": 0.0305, |
| "step": 2890 |
| }, |
| { |
| "grad_norm": 0.46723607182502747, |
| "learning_rate": 5.689497348594035e-05, |
| "loss": 0.0292, |
| "step": 2900 |
| }, |
| { |
| "grad_norm": 0.47963953018188477, |
| "learning_rate": 5.662192465108613e-05, |
| "loss": 0.0303, |
| "step": 2910 |
| }, |
| { |
| "grad_norm": 0.4416669011116028, |
| "learning_rate": 5.634867466014932e-05, |
| "loss": 0.0282, |
| "step": 2920 |
| }, |
| { |
| "grad_norm": 0.3962218761444092, |
| "learning_rate": 5.607523181372234e-05, |
| "loss": 0.0308, |
| "step": 2930 |
| }, |
| { |
| "grad_norm": 0.4772116243839264, |
| "learning_rate": 5.5801604418256117e-05, |
| "loss": 0.0292, |
| "step": 2940 |
| }, |
| { |
| "grad_norm": 0.40191689133644104, |
| "learning_rate": 5.552780078580756e-05, |
| "loss": 0.0275, |
| "step": 2950 |
| }, |
| { |
| "grad_norm": 0.4422965943813324, |
| "learning_rate": 5.525382923378728e-05, |
| "loss": 0.0292, |
| "step": 2960 |
| }, |
| { |
| "grad_norm": 0.4391031563282013, |
| "learning_rate": 5.49796980847068e-05, |
| "loss": 0.0311, |
| "step": 2970 |
| }, |
| { |
| "grad_norm": 0.4302864372730255, |
| "learning_rate": 5.470541566592573e-05, |
| "loss": 0.0303, |
| "step": 2980 |
| }, |
| { |
| "grad_norm": 0.4752635359764099, |
| "learning_rate": 5.443099030939887e-05, |
| "loss": 0.0284, |
| "step": 2990 |
| }, |
| { |
| "grad_norm": 0.42526647448539734, |
| "learning_rate": 5.415643035142309e-05, |
| "loss": 0.0279, |
| "step": 3000 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 6000, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 9223372036854775807, |
| "save_steps": 1500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 128, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|