| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9998474446987032, | |
| "eval_steps": 500, | |
| "global_step": 3277, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0030511060259344014, | |
| "grad_norm": 4.5989179611206055, | |
| "learning_rate": 9e-06, | |
| "loss": 1.1046, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.006102212051868803, | |
| "grad_norm": 4.813294887542725, | |
| "learning_rate": 1.9e-05, | |
| "loss": 0.8003, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.009153318077803204, | |
| "grad_norm": 4.563686847686768, | |
| "learning_rate": 2.9e-05, | |
| "loss": 0.7886, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.012204424103737605, | |
| "grad_norm": 5.083153247833252, | |
| "learning_rate": 3.9000000000000006e-05, | |
| "loss": 0.7638, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.015255530129672006, | |
| "grad_norm": 4.403526306152344, | |
| "learning_rate": 4.9e-05, | |
| "loss": 0.7635, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.018306636155606407, | |
| "grad_norm": 3.6779983043670654, | |
| "learning_rate": 4.986055159590952e-05, | |
| "loss": 0.841, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.02135774218154081, | |
| "grad_norm": 3.7986652851104736, | |
| "learning_rate": 4.970560892469787e-05, | |
| "loss": 0.9439, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.02440884820747521, | |
| "grad_norm": 3.810528516769409, | |
| "learning_rate": 4.955066625348621e-05, | |
| "loss": 0.843, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.02745995423340961, | |
| "grad_norm": 3.7793450355529785, | |
| "learning_rate": 4.939572358227456e-05, | |
| "loss": 0.9459, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.03051106025934401, | |
| "grad_norm": 4.002682685852051, | |
| "learning_rate": 4.924078091106291e-05, | |
| "loss": 0.9242, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.033562166285278416, | |
| "grad_norm": 3.5819356441497803, | |
| "learning_rate": 4.908583823985126e-05, | |
| "loss": 0.8683, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.036613272311212815, | |
| "grad_norm": 3.728360891342163, | |
| "learning_rate": 4.893089556863961e-05, | |
| "loss": 0.9133, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.03966437833714721, | |
| "grad_norm": 4.5038981437683105, | |
| "learning_rate": 4.8775952897427956e-05, | |
| "loss": 0.8466, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.04271548436308162, | |
| "grad_norm": 3.0992960929870605, | |
| "learning_rate": 4.8621010226216305e-05, | |
| "loss": 0.8424, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.04576659038901602, | |
| "grad_norm": 3.688044786453247, | |
| "learning_rate": 4.8466067555004654e-05, | |
| "loss": 0.8283, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.04881769641495042, | |
| "grad_norm": 4.070577621459961, | |
| "learning_rate": 4.8311124883792996e-05, | |
| "loss": 0.9207, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.05186880244088482, | |
| "grad_norm": 3.9466311931610107, | |
| "learning_rate": 4.8156182212581345e-05, | |
| "loss": 0.8095, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.05491990846681922, | |
| "grad_norm": 3.0422556400299072, | |
| "learning_rate": 4.8001239541369694e-05, | |
| "loss": 0.8649, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.057971014492753624, | |
| "grad_norm": 3.414745330810547, | |
| "learning_rate": 4.784629687015804e-05, | |
| "loss": 0.8963, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.06102212051868802, | |
| "grad_norm": 3.7459030151367188, | |
| "learning_rate": 4.769135419894639e-05, | |
| "loss": 0.8352, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.06407322654462243, | |
| "grad_norm": 2.6838042736053467, | |
| "learning_rate": 4.753641152773474e-05, | |
| "loss": 0.8089, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.06712433257055683, | |
| "grad_norm": 5.595691680908203, | |
| "learning_rate": 4.738146885652309e-05, | |
| "loss": 0.8701, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.07017543859649122, | |
| "grad_norm": 3.1771981716156006, | |
| "learning_rate": 4.722652618531144e-05, | |
| "loss": 0.8714, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.07322654462242563, | |
| "grad_norm": 2.728522539138794, | |
| "learning_rate": 4.707158351409979e-05, | |
| "loss": 0.8655, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.07627765064836003, | |
| "grad_norm": 3.781033515930176, | |
| "learning_rate": 4.691664084288813e-05, | |
| "loss": 0.7953, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.07932875667429443, | |
| "grad_norm": 2.9846785068511963, | |
| "learning_rate": 4.676169817167648e-05, | |
| "loss": 0.8079, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.08237986270022883, | |
| "grad_norm": 3.5596115589141846, | |
| "learning_rate": 4.6606755500464835e-05, | |
| "loss": 0.8413, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.08543096872616324, | |
| "grad_norm": 3.188471555709839, | |
| "learning_rate": 4.645181282925318e-05, | |
| "loss": 0.8411, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.08848207475209764, | |
| "grad_norm": 4.387840270996094, | |
| "learning_rate": 4.629687015804153e-05, | |
| "loss": 0.9073, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.09153318077803203, | |
| "grad_norm": 3.842625379562378, | |
| "learning_rate": 4.6141927486829876e-05, | |
| "loss": 0.8269, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.09458428680396644, | |
| "grad_norm": 4.832952499389648, | |
| "learning_rate": 4.5986984815618225e-05, | |
| "loss": 0.9064, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.09763539282990084, | |
| "grad_norm": 2.8753535747528076, | |
| "learning_rate": 4.5832042144406574e-05, | |
| "loss": 0.8873, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.10068649885583524, | |
| "grad_norm": 2.5598573684692383, | |
| "learning_rate": 4.5677099473194916e-05, | |
| "loss": 0.8913, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.10373760488176964, | |
| "grad_norm": 2.9012229442596436, | |
| "learning_rate": 4.5522156801983265e-05, | |
| "loss": 0.7988, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.10678871090770405, | |
| "grad_norm": 2.888648748397827, | |
| "learning_rate": 4.536721413077162e-05, | |
| "loss": 0.8404, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.10983981693363844, | |
| "grad_norm": 3.794020891189575, | |
| "learning_rate": 4.521227145955996e-05, | |
| "loss": 0.8429, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.11289092295957284, | |
| "grad_norm": 3.925490617752075, | |
| "learning_rate": 4.505732878834831e-05, | |
| "loss": 0.8768, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.11594202898550725, | |
| "grad_norm": 3.4535255432128906, | |
| "learning_rate": 4.490238611713666e-05, | |
| "loss": 0.7951, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.11899313501144165, | |
| "grad_norm": 3.9904849529266357, | |
| "learning_rate": 4.474744344592501e-05, | |
| "loss": 0.8524, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.12204424103737604, | |
| "grad_norm": 2.6464405059814453, | |
| "learning_rate": 4.459250077471336e-05, | |
| "loss": 0.8194, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.12509534706331046, | |
| "grad_norm": 3.6185855865478516, | |
| "learning_rate": 4.443755810350171e-05, | |
| "loss": 0.7706, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.12814645308924486, | |
| "grad_norm": 4.222907543182373, | |
| "learning_rate": 4.428261543229005e-05, | |
| "loss": 0.7858, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.13119755911517925, | |
| "grad_norm": 2.8912553787231445, | |
| "learning_rate": 4.4127672761078406e-05, | |
| "loss": 0.7658, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.13424866514111367, | |
| "grad_norm": 2.9529941082000732, | |
| "learning_rate": 4.3972730089866755e-05, | |
| "loss": 0.8415, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.13729977116704806, | |
| "grad_norm": 3.296875238418579, | |
| "learning_rate": 4.38177874186551e-05, | |
| "loss": 0.818, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.14035087719298245, | |
| "grad_norm": 2.736250400543213, | |
| "learning_rate": 4.366284474744345e-05, | |
| "loss": 0.819, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.14340198321891687, | |
| "grad_norm": 3.2874772548675537, | |
| "learning_rate": 4.3507902076231796e-05, | |
| "loss": 0.8381, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.14645308924485126, | |
| "grad_norm": 2.9855446815490723, | |
| "learning_rate": 4.3352959405020145e-05, | |
| "loss": 0.7591, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.14950419527078565, | |
| "grad_norm": 3.37127947807312, | |
| "learning_rate": 4.3198016733808494e-05, | |
| "loss": 0.7815, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.15255530129672007, | |
| "grad_norm": 3.162848472595215, | |
| "learning_rate": 4.3043074062596836e-05, | |
| "loss": 0.8115, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.15560640732265446, | |
| "grad_norm": 3.0527734756469727, | |
| "learning_rate": 4.288813139138519e-05, | |
| "loss": 0.8871, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.15865751334858885, | |
| "grad_norm": 3.1467580795288086, | |
| "learning_rate": 4.273318872017354e-05, | |
| "loss": 0.9112, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.16170861937452327, | |
| "grad_norm": 3.414907455444336, | |
| "learning_rate": 4.257824604896188e-05, | |
| "loss": 0.8089, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.16475972540045766, | |
| "grad_norm": 3.9576640129089355, | |
| "learning_rate": 4.242330337775023e-05, | |
| "loss": 0.7847, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.16781083142639205, | |
| "grad_norm": 3.8652496337890625, | |
| "learning_rate": 4.226836070653858e-05, | |
| "loss": 0.801, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.17086193745232647, | |
| "grad_norm": 3.028764247894287, | |
| "learning_rate": 4.211341803532693e-05, | |
| "loss": 0.8323, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.17391304347826086, | |
| "grad_norm": 3.1638927459716797, | |
| "learning_rate": 4.195847536411528e-05, | |
| "loss": 0.8575, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.17696414950419528, | |
| "grad_norm": 3.3378586769104004, | |
| "learning_rate": 4.180353269290363e-05, | |
| "loss": 0.8048, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.18001525553012968, | |
| "grad_norm": 2.5703723430633545, | |
| "learning_rate": 4.164859002169198e-05, | |
| "loss": 0.8557, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.18306636155606407, | |
| "grad_norm": 4.101804733276367, | |
| "learning_rate": 4.1493647350480326e-05, | |
| "loss": 0.8358, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.18611746758199849, | |
| "grad_norm": 3.845407009124756, | |
| "learning_rate": 4.1338704679268675e-05, | |
| "loss": 0.8777, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.18916857360793288, | |
| "grad_norm": 3.370732545852661, | |
| "learning_rate": 4.118376200805702e-05, | |
| "loss": 0.8485, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.19221967963386727, | |
| "grad_norm": 2.168769121170044, | |
| "learning_rate": 4.102881933684537e-05, | |
| "loss": 0.7416, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.1952707856598017, | |
| "grad_norm": 3.819798707962036, | |
| "learning_rate": 4.087387666563372e-05, | |
| "loss": 0.9199, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.19832189168573608, | |
| "grad_norm": 3.19429612159729, | |
| "learning_rate": 4.0718933994422065e-05, | |
| "loss": 0.7747, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.20137299771167047, | |
| "grad_norm": 3.17229962348938, | |
| "learning_rate": 4.0563991323210414e-05, | |
| "loss": 0.7394, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.2044241037376049, | |
| "grad_norm": 2.316675901412964, | |
| "learning_rate": 4.040904865199876e-05, | |
| "loss": 0.7368, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.20747520976353928, | |
| "grad_norm": 3.350698471069336, | |
| "learning_rate": 4.025410598078711e-05, | |
| "loss": 0.8839, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.21052631578947367, | |
| "grad_norm": 2.5842721462249756, | |
| "learning_rate": 4.009916330957546e-05, | |
| "loss": 0.7665, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.2135774218154081, | |
| "grad_norm": 2.9108049869537354, | |
| "learning_rate": 3.99442206383638e-05, | |
| "loss": 0.8145, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.21662852784134248, | |
| "grad_norm": 2.9678006172180176, | |
| "learning_rate": 3.978927796715215e-05, | |
| "loss": 0.7719, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.21967963386727687, | |
| "grad_norm": 2.911322832107544, | |
| "learning_rate": 3.963433529594051e-05, | |
| "loss": 0.7354, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.2227307398932113, | |
| "grad_norm": 3.1599249839782715, | |
| "learning_rate": 3.947939262472885e-05, | |
| "loss": 0.7873, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.22578184591914569, | |
| "grad_norm": 2.6644296646118164, | |
| "learning_rate": 3.93244499535172e-05, | |
| "loss": 0.8311, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.2288329519450801, | |
| "grad_norm": 3.263995885848999, | |
| "learning_rate": 3.916950728230555e-05, | |
| "loss": 0.8185, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.2318840579710145, | |
| "grad_norm": 2.542343854904175, | |
| "learning_rate": 3.90145646110939e-05, | |
| "loss": 0.7769, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.2349351639969489, | |
| "grad_norm": 2.4859440326690674, | |
| "learning_rate": 3.8859621939882246e-05, | |
| "loss": 0.8328, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.2379862700228833, | |
| "grad_norm": 2.632722854614258, | |
| "learning_rate": 3.8704679268670596e-05, | |
| "loss": 0.7859, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.2410373760488177, | |
| "grad_norm": 3.209907054901123, | |
| "learning_rate": 3.854973659745894e-05, | |
| "loss": 0.8394, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.2440884820747521, | |
| "grad_norm": 2.4981114864349365, | |
| "learning_rate": 3.8394793926247294e-05, | |
| "loss": 0.8241, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.2471395881006865, | |
| "grad_norm": 2.5217268466949463, | |
| "learning_rate": 3.823985125503564e-05, | |
| "loss": 0.7501, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.2501906941266209, | |
| "grad_norm": 3.676504611968994, | |
| "learning_rate": 3.8084908583823985e-05, | |
| "loss": 0.8782, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.2532418001525553, | |
| "grad_norm": 2.651146173477173, | |
| "learning_rate": 3.7929965912612334e-05, | |
| "loss": 0.848, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.2562929061784897, | |
| "grad_norm": 3.1401736736297607, | |
| "learning_rate": 3.777502324140068e-05, | |
| "loss": 0.7991, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.2593440122044241, | |
| "grad_norm": 3.0251362323760986, | |
| "learning_rate": 3.762008057018903e-05, | |
| "loss": 0.8723, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.2623951182303585, | |
| "grad_norm": 2.1540679931640625, | |
| "learning_rate": 3.746513789897738e-05, | |
| "loss": 0.7478, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.2654462242562929, | |
| "grad_norm": 2.6065683364868164, | |
| "learning_rate": 3.731019522776572e-05, | |
| "loss": 0.7488, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.26849733028222733, | |
| "grad_norm": 2.697063684463501, | |
| "learning_rate": 3.715525255655408e-05, | |
| "loss": 0.7778, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.2715484363081617, | |
| "grad_norm": 3.4640157222747803, | |
| "learning_rate": 3.700030988534243e-05, | |
| "loss": 1.0205, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.2745995423340961, | |
| "grad_norm": 3.5426061153411865, | |
| "learning_rate": 3.684536721413077e-05, | |
| "loss": 0.762, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.2776506483600305, | |
| "grad_norm": 2.219358444213867, | |
| "learning_rate": 3.669042454291912e-05, | |
| "loss": 0.7996, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.2807017543859649, | |
| "grad_norm": 2.7404627799987793, | |
| "learning_rate": 3.6535481871707475e-05, | |
| "loss": 0.7611, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.2837528604118993, | |
| "grad_norm": 2.950205087661743, | |
| "learning_rate": 3.638053920049582e-05, | |
| "loss": 0.7685, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.28680396643783374, | |
| "grad_norm": 2.676258087158203, | |
| "learning_rate": 3.6225596529284167e-05, | |
| "loss": 0.7864, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.2898550724637681, | |
| "grad_norm": 2.5284500122070312, | |
| "learning_rate": 3.6070653858072516e-05, | |
| "loss": 0.7856, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.2929061784897025, | |
| "grad_norm": 2.2837953567504883, | |
| "learning_rate": 3.5915711186860865e-05, | |
| "loss": 0.7965, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.2959572845156369, | |
| "grad_norm": 3.223839044570923, | |
| "learning_rate": 3.5760768515649214e-05, | |
| "loss": 0.8123, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.2990083905415713, | |
| "grad_norm": 2.8053982257843018, | |
| "learning_rate": 3.560582584443756e-05, | |
| "loss": 0.7813, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.30205949656750575, | |
| "grad_norm": 2.799560785293579, | |
| "learning_rate": 3.5450883173225905e-05, | |
| "loss": 0.8418, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.30511060259344014, | |
| "grad_norm": 3.838677406311035, | |
| "learning_rate": 3.529594050201426e-05, | |
| "loss": 0.7948, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.30816170861937453, | |
| "grad_norm": 2.587244749069214, | |
| "learning_rate": 3.51409978308026e-05, | |
| "loss": 0.784, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.3112128146453089, | |
| "grad_norm": 3.555494785308838, | |
| "learning_rate": 3.498605515959095e-05, | |
| "loss": 0.7854, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.3142639206712433, | |
| "grad_norm": 2.900275468826294, | |
| "learning_rate": 3.48311124883793e-05, | |
| "loss": 0.8536, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.3173150266971777, | |
| "grad_norm": 3.058786153793335, | |
| "learning_rate": 3.467616981716765e-05, | |
| "loss": 0.8163, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.32036613272311215, | |
| "grad_norm": 3.0138072967529297, | |
| "learning_rate": 3.4521227145956e-05, | |
| "loss": 0.8534, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.32341723874904654, | |
| "grad_norm": 2.71114182472229, | |
| "learning_rate": 3.436628447474435e-05, | |
| "loss": 0.8401, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.32646834477498093, | |
| "grad_norm": 2.574453592300415, | |
| "learning_rate": 3.421134180353269e-05, | |
| "loss": 0.7596, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.3295194508009153, | |
| "grad_norm": 2.7125132083892822, | |
| "learning_rate": 3.4056399132321046e-05, | |
| "loss": 0.8042, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.3325705568268497, | |
| "grad_norm": 3.0520405769348145, | |
| "learning_rate": 3.3901456461109395e-05, | |
| "loss": 0.7713, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.3356216628527841, | |
| "grad_norm": 2.5527734756469727, | |
| "learning_rate": 3.374651378989774e-05, | |
| "loss": 0.7929, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.33867276887871856, | |
| "grad_norm": 2.1792235374450684, | |
| "learning_rate": 3.3591571118686087e-05, | |
| "loss": 0.7102, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.34172387490465295, | |
| "grad_norm": 3.192811965942383, | |
| "learning_rate": 3.3436628447474436e-05, | |
| "loss": 0.7956, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.34477498093058734, | |
| "grad_norm": 3.0279996395111084, | |
| "learning_rate": 3.3281685776262785e-05, | |
| "loss": 0.7883, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.34782608695652173, | |
| "grad_norm": 3.0758237838745117, | |
| "learning_rate": 3.3126743105051134e-05, | |
| "loss": 0.7457, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.3508771929824561, | |
| "grad_norm": 2.2491872310638428, | |
| "learning_rate": 3.297180043383948e-05, | |
| "loss": 0.7712, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.35392829900839057, | |
| "grad_norm": 2.543558120727539, | |
| "learning_rate": 3.281685776262783e-05, | |
| "loss": 0.8013, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.35697940503432496, | |
| "grad_norm": 2.996823310852051, | |
| "learning_rate": 3.266191509141618e-05, | |
| "loss": 0.7769, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.36003051106025935, | |
| "grad_norm": 2.1455206871032715, | |
| "learning_rate": 3.250697242020452e-05, | |
| "loss": 0.7778, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.36308161708619374, | |
| "grad_norm": 3.1174466609954834, | |
| "learning_rate": 3.235202974899287e-05, | |
| "loss": 0.7519, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.36613272311212813, | |
| "grad_norm": 2.6810905933380127, | |
| "learning_rate": 3.219708707778122e-05, | |
| "loss": 0.8112, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.3691838291380625, | |
| "grad_norm": 2.6105597019195557, | |
| "learning_rate": 3.204214440656957e-05, | |
| "loss": 0.7344, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.37223493516399697, | |
| "grad_norm": 3.3781626224517822, | |
| "learning_rate": 3.188720173535792e-05, | |
| "loss": 0.8401, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.37528604118993136, | |
| "grad_norm": 3.3649299144744873, | |
| "learning_rate": 3.173225906414627e-05, | |
| "loss": 0.7849, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.37833714721586575, | |
| "grad_norm": 2.6910207271575928, | |
| "learning_rate": 3.157731639293462e-05, | |
| "loss": 0.8165, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.38138825324180015, | |
| "grad_norm": 2.5760819911956787, | |
| "learning_rate": 3.1422373721722966e-05, | |
| "loss": 0.8043, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.38443935926773454, | |
| "grad_norm": 2.5498452186584473, | |
| "learning_rate": 3.1267431050511315e-05, | |
| "loss": 0.7566, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.38749046529366893, | |
| "grad_norm": 1.8285728693008423, | |
| "learning_rate": 3.111248837929966e-05, | |
| "loss": 0.7152, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.3905415713196034, | |
| "grad_norm": 2.509260654449463, | |
| "learning_rate": 3.0957545708088007e-05, | |
| "loss": 0.8489, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.39359267734553777, | |
| "grad_norm": 2.905968427658081, | |
| "learning_rate": 3.080260303687636e-05, | |
| "loss": 0.7369, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.39664378337147216, | |
| "grad_norm": 2.5083484649658203, | |
| "learning_rate": 3.0647660365664705e-05, | |
| "loss": 0.7331, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.39969488939740655, | |
| "grad_norm": 2.2442944049835205, | |
| "learning_rate": 3.0492717694453054e-05, | |
| "loss": 0.7282, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.40274599542334094, | |
| "grad_norm": 2.856735944747925, | |
| "learning_rate": 3.0337775023241406e-05, | |
| "loss": 0.7009, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.4057971014492754, | |
| "grad_norm": 2.9385952949523926, | |
| "learning_rate": 3.018283235202975e-05, | |
| "loss": 0.7225, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.4088482074752098, | |
| "grad_norm": 3.0979793071746826, | |
| "learning_rate": 3.00278896808181e-05, | |
| "loss": 0.764, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.41189931350114417, | |
| "grad_norm": 2.6863820552825928, | |
| "learning_rate": 2.9872947009606443e-05, | |
| "loss": 0.7475, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.41495041952707856, | |
| "grad_norm": 2.9140465259552, | |
| "learning_rate": 2.9718004338394795e-05, | |
| "loss": 0.7468, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.41800152555301295, | |
| "grad_norm": 2.702855348587036, | |
| "learning_rate": 2.9563061667183144e-05, | |
| "loss": 0.7444, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.42105263157894735, | |
| "grad_norm": 2.646045207977295, | |
| "learning_rate": 2.940811899597149e-05, | |
| "loss": 0.7911, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.4241037376048818, | |
| "grad_norm": 2.568760395050049, | |
| "learning_rate": 2.925317632475984e-05, | |
| "loss": 0.7815, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.4271548436308162, | |
| "grad_norm": 2.381138801574707, | |
| "learning_rate": 2.909823365354819e-05, | |
| "loss": 0.7411, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.4302059496567506, | |
| "grad_norm": 2.6686244010925293, | |
| "learning_rate": 2.8943290982336534e-05, | |
| "loss": 0.7593, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.43325705568268497, | |
| "grad_norm": 3.8091323375701904, | |
| "learning_rate": 2.8788348311124886e-05, | |
| "loss": 0.7731, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.43630816170861936, | |
| "grad_norm": 3.0860376358032227, | |
| "learning_rate": 2.8633405639913235e-05, | |
| "loss": 0.7617, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.43935926773455375, | |
| "grad_norm": 3.304323196411133, | |
| "learning_rate": 2.847846296870158e-05, | |
| "loss": 0.8343, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.4424103737604882, | |
| "grad_norm": 2.6195857524871826, | |
| "learning_rate": 2.832352029748993e-05, | |
| "loss": 0.688, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.4454614797864226, | |
| "grad_norm": 2.3896477222442627, | |
| "learning_rate": 2.8168577626278282e-05, | |
| "loss": 0.8032, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.448512585812357, | |
| "grad_norm": 2.9045519828796387, | |
| "learning_rate": 2.8013634955066625e-05, | |
| "loss": 0.8131, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.45156369183829137, | |
| "grad_norm": 2.5903823375701904, | |
| "learning_rate": 2.7858692283854977e-05, | |
| "loss": 0.7219, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.45461479786422576, | |
| "grad_norm": 2.8301548957824707, | |
| "learning_rate": 2.7703749612643326e-05, | |
| "loss": 0.7661, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.4576659038901602, | |
| "grad_norm": 2.5708298683166504, | |
| "learning_rate": 2.7548806941431672e-05, | |
| "loss": 0.7673, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.4607170099160946, | |
| "grad_norm": 2.4464125633239746, | |
| "learning_rate": 2.739386427022002e-05, | |
| "loss": 0.7328, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.463768115942029, | |
| "grad_norm": 3.007063627243042, | |
| "learning_rate": 2.7238921599008366e-05, | |
| "loss": 0.7449, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.4668192219679634, | |
| "grad_norm": 2.8109517097473145, | |
| "learning_rate": 2.7083978927796715e-05, | |
| "loss": 0.8403, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.4698703279938978, | |
| "grad_norm": 2.756274461746216, | |
| "learning_rate": 2.6929036256585068e-05, | |
| "loss": 0.786, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.47292143401983217, | |
| "grad_norm": 3.142169237136841, | |
| "learning_rate": 2.677409358537341e-05, | |
| "loss": 0.7738, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.4759725400457666, | |
| "grad_norm": 2.546050548553467, | |
| "learning_rate": 2.6619150914161763e-05, | |
| "loss": 0.7806, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.479023646071701, | |
| "grad_norm": 2.3914120197296143, | |
| "learning_rate": 2.646420824295011e-05, | |
| "loss": 0.7641, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.4820747520976354, | |
| "grad_norm": 2.156118869781494, | |
| "learning_rate": 2.6309265571738457e-05, | |
| "loss": 0.8034, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.4851258581235698, | |
| "grad_norm": 2.6585230827331543, | |
| "learning_rate": 2.6154322900526806e-05, | |
| "loss": 0.7609, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.4881769641495042, | |
| "grad_norm": 2.175424814224243, | |
| "learning_rate": 2.5999380229315155e-05, | |
| "loss": 0.6381, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.49122807017543857, | |
| "grad_norm": 3.3766794204711914, | |
| "learning_rate": 2.58444375581035e-05, | |
| "loss": 0.7187, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.494279176201373, | |
| "grad_norm": 2.665618419647217, | |
| "learning_rate": 2.568949488689185e-05, | |
| "loss": 0.7492, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.4973302822273074, | |
| "grad_norm": 2.5544931888580322, | |
| "learning_rate": 2.5534552215680202e-05, | |
| "loss": 0.7473, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.5003813882532419, | |
| "grad_norm": 2.091601848602295, | |
| "learning_rate": 2.5379609544468548e-05, | |
| "loss": 0.7364, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.5034324942791762, | |
| "grad_norm": 2.7752933502197266, | |
| "learning_rate": 2.5224666873256897e-05, | |
| "loss": 0.8124, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.5064836003051106, | |
| "grad_norm": 2.3963379859924316, | |
| "learning_rate": 2.5069724202045246e-05, | |
| "loss": 0.7213, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.509534706331045, | |
| "grad_norm": 2.0520660877227783, | |
| "learning_rate": 2.4914781530833592e-05, | |
| "loss": 0.7954, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.5125858123569794, | |
| "grad_norm": 2.9152419567108154, | |
| "learning_rate": 2.475983885962194e-05, | |
| "loss": 0.7114, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.5156369183829138, | |
| "grad_norm": 3.4632716178894043, | |
| "learning_rate": 2.460489618841029e-05, | |
| "loss": 0.7557, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.5186880244088482, | |
| "grad_norm": 3.0776329040527344, | |
| "learning_rate": 2.4449953517198636e-05, | |
| "loss": 0.7867, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.5217391304347826, | |
| "grad_norm": 2.640956163406372, | |
| "learning_rate": 2.4295010845986985e-05, | |
| "loss": 0.717, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.524790236460717, | |
| "grad_norm": 3.0681939125061035, | |
| "learning_rate": 2.4140068174775334e-05, | |
| "loss": 0.779, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.5278413424866514, | |
| "grad_norm": 3.3912854194641113, | |
| "learning_rate": 2.3985125503563683e-05, | |
| "loss": 0.7602, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.5308924485125858, | |
| "grad_norm": 3.4418396949768066, | |
| "learning_rate": 2.3830182832352028e-05, | |
| "loss": 0.7719, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.5339435545385202, | |
| "grad_norm": 2.5723013877868652, | |
| "learning_rate": 2.367524016114038e-05, | |
| "loss": 0.6693, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.5369946605644547, | |
| "grad_norm": 3.5987260341644287, | |
| "learning_rate": 2.3520297489928726e-05, | |
| "loss": 0.7202, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.540045766590389, | |
| "grad_norm": 3.0256638526916504, | |
| "learning_rate": 2.3365354818717075e-05, | |
| "loss": 0.769, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.5430968726163234, | |
| "grad_norm": 2.5137994289398193, | |
| "learning_rate": 2.3210412147505424e-05, | |
| "loss": 0.7133, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.5461479786422578, | |
| "grad_norm": 2.394585132598877, | |
| "learning_rate": 2.3055469476293773e-05, | |
| "loss": 0.7089, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.5491990846681922, | |
| "grad_norm": 2.6688249111175537, | |
| "learning_rate": 2.290052680508212e-05, | |
| "loss": 0.7819, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.5522501906941266, | |
| "grad_norm": 3.030775785446167, | |
| "learning_rate": 2.2745584133870468e-05, | |
| "loss": 0.7545, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.555301296720061, | |
| "grad_norm": 3.2252886295318604, | |
| "learning_rate": 2.2590641462658817e-05, | |
| "loss": 0.7854, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.5583524027459954, | |
| "grad_norm": 1.8850343227386475, | |
| "learning_rate": 2.2435698791447166e-05, | |
| "loss": 0.708, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.5614035087719298, | |
| "grad_norm": 3.511936902999878, | |
| "learning_rate": 2.2280756120235512e-05, | |
| "loss": 0.7384, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.5644546147978642, | |
| "grad_norm": 3.171138286590576, | |
| "learning_rate": 2.2125813449023864e-05, | |
| "loss": 0.7831, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.5675057208237986, | |
| "grad_norm": 2.5248336791992188, | |
| "learning_rate": 2.197087077781221e-05, | |
| "loss": 0.7011, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.5705568268497331, | |
| "grad_norm": 3.0410759449005127, | |
| "learning_rate": 2.181592810660056e-05, | |
| "loss": 0.7252, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.5736079328756675, | |
| "grad_norm": 3.691617012023926, | |
| "learning_rate": 2.1660985435388905e-05, | |
| "loss": 0.7585, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.5766590389016019, | |
| "grad_norm": 2.653752565383911, | |
| "learning_rate": 2.1506042764177257e-05, | |
| "loss": 0.7948, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.5797101449275363, | |
| "grad_norm": 2.0451462268829346, | |
| "learning_rate": 2.1351100092965603e-05, | |
| "loss": 0.6916, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.5827612509534706, | |
| "grad_norm": 2.8858132362365723, | |
| "learning_rate": 2.1196157421753952e-05, | |
| "loss": 0.682, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.585812356979405, | |
| "grad_norm": 2.553213596343994, | |
| "learning_rate": 2.10412147505423e-05, | |
| "loss": 0.7501, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.5888634630053394, | |
| "grad_norm": 2.1616005897521973, | |
| "learning_rate": 2.088627207933065e-05, | |
| "loss": 0.7536, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.5919145690312738, | |
| "grad_norm": 3.390302896499634, | |
| "learning_rate": 2.0731329408118995e-05, | |
| "loss": 0.787, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.5949656750572082, | |
| "grad_norm": 2.695875883102417, | |
| "learning_rate": 2.0576386736907348e-05, | |
| "loss": 0.7717, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.5980167810831426, | |
| "grad_norm": 2.517509698867798, | |
| "learning_rate": 2.0421444065695693e-05, | |
| "loss": 0.7527, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.601067887109077, | |
| "grad_norm": 2.498511552810669, | |
| "learning_rate": 2.0266501394484043e-05, | |
| "loss": 0.6925, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.6041189931350115, | |
| "grad_norm": 2.409759044647217, | |
| "learning_rate": 2.0111558723272388e-05, | |
| "loss": 0.7283, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.6071700991609459, | |
| "grad_norm": 3.212092876434326, | |
| "learning_rate": 1.995661605206074e-05, | |
| "loss": 0.6655, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 0.6102212051868803, | |
| "grad_norm": 2.577396869659424, | |
| "learning_rate": 1.9801673380849086e-05, | |
| "loss": 0.6565, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.6132723112128147, | |
| "grad_norm": 2.715590715408325, | |
| "learning_rate": 1.9646730709637435e-05, | |
| "loss": 0.6705, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 0.6163234172387491, | |
| "grad_norm": 2.6704838275909424, | |
| "learning_rate": 1.9491788038425784e-05, | |
| "loss": 0.7789, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 0.6193745232646835, | |
| "grad_norm": 2.4763495922088623, | |
| "learning_rate": 1.9336845367214133e-05, | |
| "loss": 0.8014, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 0.6224256292906178, | |
| "grad_norm": 2.754671573638916, | |
| "learning_rate": 1.918190269600248e-05, | |
| "loss": 0.7272, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.6254767353165522, | |
| "grad_norm": 3.223806858062744, | |
| "learning_rate": 1.9026960024790828e-05, | |
| "loss": 0.7103, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.6285278413424866, | |
| "grad_norm": 2.489527702331543, | |
| "learning_rate": 1.8872017353579177e-05, | |
| "loss": 0.6492, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 0.631578947368421, | |
| "grad_norm": 3.088130235671997, | |
| "learning_rate": 1.8717074682367526e-05, | |
| "loss": 0.6808, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 0.6346300533943554, | |
| "grad_norm": 3.130558967590332, | |
| "learning_rate": 1.8562132011155872e-05, | |
| "loss": 0.7399, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 0.6376811594202898, | |
| "grad_norm": 2.672267436981201, | |
| "learning_rate": 1.840718933994422e-05, | |
| "loss": 0.7012, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 0.6407322654462243, | |
| "grad_norm": 3.445889472961426, | |
| "learning_rate": 1.825224666873257e-05, | |
| "loss": 0.7329, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.6437833714721587, | |
| "grad_norm": 2.1221354007720947, | |
| "learning_rate": 1.809730399752092e-05, | |
| "loss": 0.7105, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 0.6468344774980931, | |
| "grad_norm": 2.5181918144226074, | |
| "learning_rate": 1.7942361326309268e-05, | |
| "loss": 0.7719, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 0.6498855835240275, | |
| "grad_norm": 3.100332021713257, | |
| "learning_rate": 1.7787418655097614e-05, | |
| "loss": 0.6837, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 0.6529366895499619, | |
| "grad_norm": 3.2809691429138184, | |
| "learning_rate": 1.7632475983885963e-05, | |
| "loss": 0.7173, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 0.6559877955758963, | |
| "grad_norm": 2.191570520401001, | |
| "learning_rate": 1.747753331267431e-05, | |
| "loss": 0.6494, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.6590389016018307, | |
| "grad_norm": 2.8507699966430664, | |
| "learning_rate": 1.732259064146266e-05, | |
| "loss": 0.7094, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 0.662090007627765, | |
| "grad_norm": 2.7249112129211426, | |
| "learning_rate": 1.7167647970251006e-05, | |
| "loss": 0.6549, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 0.6651411136536994, | |
| "grad_norm": 1.7811733484268188, | |
| "learning_rate": 1.7012705299039355e-05, | |
| "loss": 0.6353, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 0.6681922196796338, | |
| "grad_norm": 2.451258659362793, | |
| "learning_rate": 1.6857762627827704e-05, | |
| "loss": 0.7104, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 0.6712433257055682, | |
| "grad_norm": 2.2157397270202637, | |
| "learning_rate": 1.6702819956616053e-05, | |
| "loss": 0.7135, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.6742944317315027, | |
| "grad_norm": 2.1415586471557617, | |
| "learning_rate": 1.65478772854044e-05, | |
| "loss": 0.6959, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 0.6773455377574371, | |
| "grad_norm": 2.9360268115997314, | |
| "learning_rate": 1.6392934614192748e-05, | |
| "loss": 0.7511, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 0.6803966437833715, | |
| "grad_norm": 2.7337160110473633, | |
| "learning_rate": 1.6237991942981097e-05, | |
| "loss": 0.7181, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 0.6834477498093059, | |
| "grad_norm": 3.827877998352051, | |
| "learning_rate": 1.6083049271769446e-05, | |
| "loss": 0.6735, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 0.6864988558352403, | |
| "grad_norm": 2.284428834915161, | |
| "learning_rate": 1.5928106600557792e-05, | |
| "loss": 0.6822, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.6895499618611747, | |
| "grad_norm": 2.9292097091674805, | |
| "learning_rate": 1.5773163929346144e-05, | |
| "loss": 0.7522, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 0.6926010678871091, | |
| "grad_norm": 2.607977867126465, | |
| "learning_rate": 1.561822125813449e-05, | |
| "loss": 0.656, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 0.6956521739130435, | |
| "grad_norm": 3.1531264781951904, | |
| "learning_rate": 1.546327858692284e-05, | |
| "loss": 0.7427, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 0.6987032799389779, | |
| "grad_norm": 2.421393871307373, | |
| "learning_rate": 1.5308335915711188e-05, | |
| "loss": 0.6408, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 0.7017543859649122, | |
| "grad_norm": 3.7150917053222656, | |
| "learning_rate": 1.5153393244499537e-05, | |
| "loss": 0.7536, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.7048054919908466, | |
| "grad_norm": 2.622455358505249, | |
| "learning_rate": 1.4998450573287884e-05, | |
| "loss": 0.7359, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 0.7078565980167811, | |
| "grad_norm": 2.524982452392578, | |
| "learning_rate": 1.4843507902076232e-05, | |
| "loss": 0.689, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 0.7109077040427155, | |
| "grad_norm": 3.156085729598999, | |
| "learning_rate": 1.4688565230864582e-05, | |
| "loss": 0.7111, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 0.7139588100686499, | |
| "grad_norm": 2.8505616188049316, | |
| "learning_rate": 1.453362255965293e-05, | |
| "loss": 0.6698, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 0.7170099160945843, | |
| "grad_norm": 2.8530240058898926, | |
| "learning_rate": 1.4378679888441277e-05, | |
| "loss": 0.7196, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.7200610221205187, | |
| "grad_norm": 2.8734195232391357, | |
| "learning_rate": 1.4223737217229626e-05, | |
| "loss": 0.7656, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 0.7231121281464531, | |
| "grad_norm": 2.84820556640625, | |
| "learning_rate": 1.4068794546017975e-05, | |
| "loss": 0.6777, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 0.7261632341723875, | |
| "grad_norm": 2.4297478199005127, | |
| "learning_rate": 1.3913851874806322e-05, | |
| "loss": 0.7145, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 0.7292143401983219, | |
| "grad_norm": 2.734090566635132, | |
| "learning_rate": 1.375890920359467e-05, | |
| "loss": 0.6577, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 0.7322654462242563, | |
| "grad_norm": 2.2257931232452393, | |
| "learning_rate": 1.3603966532383019e-05, | |
| "loss": 0.6857, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.7353165522501907, | |
| "grad_norm": 2.3681576251983643, | |
| "learning_rate": 1.3449023861171368e-05, | |
| "loss": 0.7619, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 0.738367658276125, | |
| "grad_norm": 2.033923864364624, | |
| "learning_rate": 1.3294081189959715e-05, | |
| "loss": 0.691, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 0.7414187643020596, | |
| "grad_norm": 2.131638526916504, | |
| "learning_rate": 1.3139138518748064e-05, | |
| "loss": 0.7191, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 0.7444698703279939, | |
| "grad_norm": 2.609546661376953, | |
| "learning_rate": 1.2984195847536412e-05, | |
| "loss": 0.6737, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 0.7475209763539283, | |
| "grad_norm": 2.4548799991607666, | |
| "learning_rate": 1.282925317632476e-05, | |
| "loss": 0.6301, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.7505720823798627, | |
| "grad_norm": 2.644568920135498, | |
| "learning_rate": 1.267431050511311e-05, | |
| "loss": 0.6824, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 0.7536231884057971, | |
| "grad_norm": 2.595654249191284, | |
| "learning_rate": 1.2519367833901457e-05, | |
| "loss": 0.654, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 0.7566742944317315, | |
| "grad_norm": 2.7976434230804443, | |
| "learning_rate": 1.2364425162689804e-05, | |
| "loss": 0.7883, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 0.7597254004576659, | |
| "grad_norm": 2.874528646469116, | |
| "learning_rate": 1.2209482491478153e-05, | |
| "loss": 0.7202, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 0.7627765064836003, | |
| "grad_norm": 2.6641483306884766, | |
| "learning_rate": 1.20545398202665e-05, | |
| "loss": 0.6692, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.7658276125095347, | |
| "grad_norm": 2.610469102859497, | |
| "learning_rate": 1.189959714905485e-05, | |
| "loss": 0.7201, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 0.7688787185354691, | |
| "grad_norm": 2.2959210872650146, | |
| "learning_rate": 1.1744654477843199e-05, | |
| "loss": 0.6302, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 0.7719298245614035, | |
| "grad_norm": 2.454118013381958, | |
| "learning_rate": 1.1589711806631546e-05, | |
| "loss": 0.7102, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 0.7749809305873379, | |
| "grad_norm": 2.6348319053649902, | |
| "learning_rate": 1.1434769135419895e-05, | |
| "loss": 0.7214, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 0.7780320366132724, | |
| "grad_norm": 2.254340887069702, | |
| "learning_rate": 1.1279826464208242e-05, | |
| "loss": 0.6973, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.7810831426392068, | |
| "grad_norm": 2.1454594135284424, | |
| "learning_rate": 1.1124883792996592e-05, | |
| "loss": 0.7037, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 0.7841342486651411, | |
| "grad_norm": 2.7335195541381836, | |
| "learning_rate": 1.096994112178494e-05, | |
| "loss": 0.7059, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 0.7871853546910755, | |
| "grad_norm": 3.499448776245117, | |
| "learning_rate": 1.0814998450573288e-05, | |
| "loss": 0.6518, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 0.7902364607170099, | |
| "grad_norm": 2.517699956893921, | |
| "learning_rate": 1.0660055779361637e-05, | |
| "loss": 0.6884, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 0.7932875667429443, | |
| "grad_norm": 2.7374229431152344, | |
| "learning_rate": 1.0505113108149984e-05, | |
| "loss": 0.6146, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.7963386727688787, | |
| "grad_norm": 3.5942227840423584, | |
| "learning_rate": 1.0350170436938333e-05, | |
| "loss": 0.7788, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 0.7993897787948131, | |
| "grad_norm": 2.709524393081665, | |
| "learning_rate": 1.019522776572668e-05, | |
| "loss": 0.6843, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 0.8024408848207475, | |
| "grad_norm": 2.7754933834075928, | |
| "learning_rate": 1.004028509451503e-05, | |
| "loss": 0.7694, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 0.8054919908466819, | |
| "grad_norm": 2.025585412979126, | |
| "learning_rate": 9.885342423303379e-06, | |
| "loss": 0.6237, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 0.8085430968726163, | |
| "grad_norm": 2.5337514877319336, | |
| "learning_rate": 9.730399752091726e-06, | |
| "loss": 0.7284, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.8115942028985508, | |
| "grad_norm": 2.5185277462005615, | |
| "learning_rate": 9.575457080880075e-06, | |
| "loss": 0.695, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 0.8146453089244852, | |
| "grad_norm": 3.0368494987487793, | |
| "learning_rate": 9.420514409668422e-06, | |
| "loss": 0.7291, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 0.8176964149504196, | |
| "grad_norm": 2.6141366958618164, | |
| "learning_rate": 9.265571738456771e-06, | |
| "loss": 0.6768, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 0.820747520976354, | |
| "grad_norm": 2.2215969562530518, | |
| "learning_rate": 9.11062906724512e-06, | |
| "loss": 0.6832, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 0.8237986270022883, | |
| "grad_norm": 3.0464236736297607, | |
| "learning_rate": 8.955686396033468e-06, | |
| "loss": 0.7362, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.8268497330282227, | |
| "grad_norm": 2.384838104248047, | |
| "learning_rate": 8.800743724821817e-06, | |
| "loss": 0.6501, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 0.8299008390541571, | |
| "grad_norm": 2.2411282062530518, | |
| "learning_rate": 8.645801053610164e-06, | |
| "loss": 0.6274, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 0.8329519450800915, | |
| "grad_norm": 2.8559658527374268, | |
| "learning_rate": 8.490858382398513e-06, | |
| "loss": 0.6282, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 0.8360030511060259, | |
| "grad_norm": 2.729321002960205, | |
| "learning_rate": 8.335915711186862e-06, | |
| "loss": 0.6966, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 0.8390541571319603, | |
| "grad_norm": 2.846073865890503, | |
| "learning_rate": 8.18097303997521e-06, | |
| "loss": 0.6925, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.8421052631578947, | |
| "grad_norm": 2.1397573947906494, | |
| "learning_rate": 8.026030368763559e-06, | |
| "loss": 0.6435, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 0.8451563691838292, | |
| "grad_norm": 3.3590445518493652, | |
| "learning_rate": 7.871087697551906e-06, | |
| "loss": 0.6592, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 0.8482074752097636, | |
| "grad_norm": 2.7317698001861572, | |
| "learning_rate": 7.716145026340255e-06, | |
| "loss": 0.6915, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 0.851258581235698, | |
| "grad_norm": 2.3746912479400635, | |
| "learning_rate": 7.561202355128602e-06, | |
| "loss": 0.7039, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 0.8543096872616324, | |
| "grad_norm": 3.0164999961853027, | |
| "learning_rate": 7.4062596839169505e-06, | |
| "loss": 0.7024, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.8573607932875668, | |
| "grad_norm": 3.2100493907928467, | |
| "learning_rate": 7.2513170127053e-06, | |
| "loss": 0.7137, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 0.8604118993135011, | |
| "grad_norm": 2.1263020038604736, | |
| "learning_rate": 7.096374341493647e-06, | |
| "loss": 0.6334, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 0.8634630053394355, | |
| "grad_norm": 2.614156484603882, | |
| "learning_rate": 6.941431670281996e-06, | |
| "loss": 0.7299, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 0.8665141113653699, | |
| "grad_norm": 4.311347961425781, | |
| "learning_rate": 6.786488999070343e-06, | |
| "loss": 0.6464, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 0.8695652173913043, | |
| "grad_norm": 2.2194840908050537, | |
| "learning_rate": 6.631546327858692e-06, | |
| "loss": 0.7055, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.8726163234172387, | |
| "grad_norm": 2.8160476684570312, | |
| "learning_rate": 6.476603656647041e-06, | |
| "loss": 0.7233, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 0.8756674294431731, | |
| "grad_norm": 1.9882564544677734, | |
| "learning_rate": 6.321660985435389e-06, | |
| "loss": 0.7041, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 0.8787185354691075, | |
| "grad_norm": 2.381114959716797, | |
| "learning_rate": 6.166718314223738e-06, | |
| "loss": 0.6916, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 0.881769641495042, | |
| "grad_norm": 2.911763906478882, | |
| "learning_rate": 6.011775643012086e-06, | |
| "loss": 0.6493, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 0.8848207475209764, | |
| "grad_norm": 2.830087900161743, | |
| "learning_rate": 5.856832971800434e-06, | |
| "loss": 0.6497, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.8878718535469108, | |
| "grad_norm": 2.609445333480835, | |
| "learning_rate": 5.701890300588782e-06, | |
| "loss": 0.6923, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 0.8909229595728452, | |
| "grad_norm": 2.1896908283233643, | |
| "learning_rate": 5.5469476293771305e-06, | |
| "loss": 0.6325, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 0.8939740655987796, | |
| "grad_norm": 3.2182352542877197, | |
| "learning_rate": 5.392004958165479e-06, | |
| "loss": 0.6613, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 0.897025171624714, | |
| "grad_norm": 2.7768335342407227, | |
| "learning_rate": 5.237062286953828e-06, | |
| "loss": 0.6944, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 0.9000762776506483, | |
| "grad_norm": 2.458332061767578, | |
| "learning_rate": 5.082119615742176e-06, | |
| "loss": 0.6689, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 0.9031273836765827, | |
| "grad_norm": 2.2689878940582275, | |
| "learning_rate": 4.927176944530524e-06, | |
| "loss": 0.6729, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 0.9061784897025171, | |
| "grad_norm": 3.256464719772339, | |
| "learning_rate": 4.772234273318872e-06, | |
| "loss": 0.6158, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 0.9092295957284515, | |
| "grad_norm": 3.0589096546173096, | |
| "learning_rate": 4.6172916021072205e-06, | |
| "loss": 0.7551, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 0.9122807017543859, | |
| "grad_norm": 2.630323648452759, | |
| "learning_rate": 4.4623489308955695e-06, | |
| "loss": 0.6235, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 0.9153318077803204, | |
| "grad_norm": 2.285128593444824, | |
| "learning_rate": 4.307406259683918e-06, | |
| "loss": 0.6948, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.9183829138062548, | |
| "grad_norm": 2.4126858711242676, | |
| "learning_rate": 4.152463588472266e-06, | |
| "loss": 0.6249, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 0.9214340198321892, | |
| "grad_norm": 2.4043056964874268, | |
| "learning_rate": 3.997520917260614e-06, | |
| "loss": 0.6822, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 0.9244851258581236, | |
| "grad_norm": 2.084949254989624, | |
| "learning_rate": 3.842578246048962e-06, | |
| "loss": 0.7345, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 0.927536231884058, | |
| "grad_norm": 2.251657247543335, | |
| "learning_rate": 3.68763557483731e-06, | |
| "loss": 0.6048, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 0.9305873379099924, | |
| "grad_norm": 2.20843505859375, | |
| "learning_rate": 3.532692903625659e-06, | |
| "loss": 0.6687, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 0.9336384439359268, | |
| "grad_norm": 2.692772150039673, | |
| "learning_rate": 3.3777502324140072e-06, | |
| "loss": 0.651, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 0.9366895499618612, | |
| "grad_norm": 2.778233766555786, | |
| "learning_rate": 3.2228075612023554e-06, | |
| "loss": 0.7709, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 0.9397406559877955, | |
| "grad_norm": 2.2121570110321045, | |
| "learning_rate": 3.0678648899907036e-06, | |
| "loss": 0.6741, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 0.9427917620137299, | |
| "grad_norm": 2.814641237258911, | |
| "learning_rate": 2.9129222187790522e-06, | |
| "loss": 0.6409, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 0.9458428680396643, | |
| "grad_norm": 2.183281660079956, | |
| "learning_rate": 2.7579795475674004e-06, | |
| "loss": 0.6192, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.9488939740655988, | |
| "grad_norm": 1.998976230621338, | |
| "learning_rate": 2.6030368763557486e-06, | |
| "loss": 0.5924, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 0.9519450800915332, | |
| "grad_norm": 2.7774434089660645, | |
| "learning_rate": 2.4480942051440968e-06, | |
| "loss": 0.7145, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 0.9549961861174676, | |
| "grad_norm": 2.510991334915161, | |
| "learning_rate": 2.293151533932445e-06, | |
| "loss": 0.6985, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 0.958047292143402, | |
| "grad_norm": 2.820533037185669, | |
| "learning_rate": 2.138208862720793e-06, | |
| "loss": 0.673, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 0.9610983981693364, | |
| "grad_norm": 3.7463274002075195, | |
| "learning_rate": 1.9832661915091418e-06, | |
| "loss": 0.6753, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 0.9641495041952708, | |
| "grad_norm": 2.6098906993865967, | |
| "learning_rate": 1.82832352029749e-06, | |
| "loss": 0.6077, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 0.9672006102212052, | |
| "grad_norm": 3.7235538959503174, | |
| "learning_rate": 1.6733808490858381e-06, | |
| "loss": 0.6528, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 0.9702517162471396, | |
| "grad_norm": 2.551973342895508, | |
| "learning_rate": 1.5184381778741865e-06, | |
| "loss": 0.7123, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 0.973302822273074, | |
| "grad_norm": 2.60445237159729, | |
| "learning_rate": 1.363495506662535e-06, | |
| "loss": 0.6368, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 0.9763539282990084, | |
| "grad_norm": 2.7576358318328857, | |
| "learning_rate": 1.2085528354508833e-06, | |
| "loss": 0.6172, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.9794050343249427, | |
| "grad_norm": 2.23207426071167, | |
| "learning_rate": 1.0536101642392315e-06, | |
| "loss": 0.6227, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 0.9824561403508771, | |
| "grad_norm": 3.0700480937957764, | |
| "learning_rate": 8.986674930275798e-07, | |
| "loss": 0.7168, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 0.9855072463768116, | |
| "grad_norm": 2.6852805614471436, | |
| "learning_rate": 7.437248218159281e-07, | |
| "loss": 0.6322, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 0.988558352402746, | |
| "grad_norm": 2.6967718601226807, | |
| "learning_rate": 5.887821506042764e-07, | |
| "loss": 0.6357, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 0.9916094584286804, | |
| "grad_norm": 3.2857532501220703, | |
| "learning_rate": 4.3383947939262475e-07, | |
| "loss": 0.7225, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 0.9946605644546148, | |
| "grad_norm": 2.565335750579834, | |
| "learning_rate": 2.788968081809731e-07, | |
| "loss": 0.6709, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 0.9977116704805492, | |
| "grad_norm": 3.099731922149658, | |
| "learning_rate": 1.2395413696932136e-07, | |
| "loss": 0.7549, | |
| "step": 3270 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 3277, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.2252825778126848e+16, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |