| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9997727789138832, | |
| "eval_steps": 500, | |
| "global_step": 550, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0018177686889343332, | |
| "grad_norm": 0.1331978142261505, | |
| "learning_rate": 3.5714285714285716e-07, | |
| "loss": 1.8898, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.0036355373778686664, | |
| "grad_norm": 0.13566601276397705, | |
| "learning_rate": 7.142857142857143e-07, | |
| "loss": 1.8867, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.0054533060668029995, | |
| "grad_norm": 0.13576287031173706, | |
| "learning_rate": 1.0714285714285714e-06, | |
| "loss": 1.8848, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.007271074755737333, | |
| "grad_norm": 0.1231953352689743, | |
| "learning_rate": 1.4285714285714286e-06, | |
| "loss": 1.888, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.009088843444671665, | |
| "grad_norm": 0.09326394647359848, | |
| "learning_rate": 1.7857142857142859e-06, | |
| "loss": 1.8816, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.010906612133605999, | |
| "grad_norm": 0.08585168421268463, | |
| "learning_rate": 2.1428571428571427e-06, | |
| "loss": 1.8851, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.012724380822540331, | |
| "grad_norm": 0.0567106269299984, | |
| "learning_rate": 2.5e-06, | |
| "loss": 1.8799, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.014542149511474665, | |
| "grad_norm": 0.05393998324871063, | |
| "learning_rate": 2.8571428571428573e-06, | |
| "loss": 1.8785, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.016359918200409, | |
| "grad_norm": 0.05495736747980118, | |
| "learning_rate": 3.2142857142857147e-06, | |
| "loss": 1.8763, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.01817768688934333, | |
| "grad_norm": 0.05345786362886429, | |
| "learning_rate": 3.5714285714285718e-06, | |
| "loss": 1.8676, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.019995455578277664, | |
| "grad_norm": 0.047461625188589096, | |
| "learning_rate": 3.928571428571429e-06, | |
| "loss": 1.8634, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.021813224267211998, | |
| "grad_norm": 0.061344344168901443, | |
| "learning_rate": 4.2857142857142855e-06, | |
| "loss": 1.864, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.02363099295614633, | |
| "grad_norm": 0.06245123967528343, | |
| "learning_rate": 4.642857142857144e-06, | |
| "loss": 1.8647, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.025448761645080663, | |
| "grad_norm": 0.053826089948415756, | |
| "learning_rate": 5e-06, | |
| "loss": 1.8605, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.027266530334014997, | |
| "grad_norm": 0.04343092441558838, | |
| "learning_rate": 5.357142857142857e-06, | |
| "loss": 1.8648, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.02908429902294933, | |
| "grad_norm": 0.04949821159243584, | |
| "learning_rate": 5.7142857142857145e-06, | |
| "loss": 1.8536, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.03090206771188366, | |
| "grad_norm": 0.06119069084525108, | |
| "learning_rate": 6.071428571428571e-06, | |
| "loss": 1.8485, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.032719836400818, | |
| "grad_norm": 0.05067905783653259, | |
| "learning_rate": 6.4285714285714295e-06, | |
| "loss": 1.8536, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.03453760508975233, | |
| "grad_norm": 0.03722887113690376, | |
| "learning_rate": 6.785714285714287e-06, | |
| "loss": 1.8491, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.03635537377868666, | |
| "grad_norm": 0.04830312356352806, | |
| "learning_rate": 7.1428571428571436e-06, | |
| "loss": 1.8361, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.038173142467621, | |
| "grad_norm": 0.052912868559360504, | |
| "learning_rate": 7.500000000000001e-06, | |
| "loss": 1.8522, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.03999091115655533, | |
| "grad_norm": 0.04469645023345947, | |
| "learning_rate": 7.857142857142858e-06, | |
| "loss": 1.8391, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.04180867984548966, | |
| "grad_norm": 0.04048198461532593, | |
| "learning_rate": 8.214285714285714e-06, | |
| "loss": 1.8345, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.043626448534423996, | |
| "grad_norm": 0.03836997598409653, | |
| "learning_rate": 8.571428571428571e-06, | |
| "loss": 1.8342, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.04544421722335833, | |
| "grad_norm": 0.038932956755161285, | |
| "learning_rate": 8.92857142857143e-06, | |
| "loss": 1.8399, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.04726198591229266, | |
| "grad_norm": 0.041100382804870605, | |
| "learning_rate": 9.285714285714288e-06, | |
| "loss": 1.833, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.049079754601226995, | |
| "grad_norm": 0.03821416571736336, | |
| "learning_rate": 9.642857142857144e-06, | |
| "loss": 1.8342, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.050897523290161326, | |
| "grad_norm": 0.037851471453905106, | |
| "learning_rate": 1e-05, | |
| "loss": 1.8313, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.05271529197909566, | |
| "grad_norm": 0.03763577714562416, | |
| "learning_rate": 9.999909448127131e-06, | |
| "loss": 1.8291, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.054533060668029994, | |
| "grad_norm": 0.03475307673215866, | |
| "learning_rate": 9.999637795788383e-06, | |
| "loss": 1.8185, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.056350829356964324, | |
| "grad_norm": 0.03289997950196266, | |
| "learning_rate": 9.999185052823207e-06, | |
| "loss": 1.8261, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.05816859804589866, | |
| "grad_norm": 0.03243958577513695, | |
| "learning_rate": 9.99855123563029e-06, | |
| "loss": 1.8237, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.05998636673483299, | |
| "grad_norm": 0.033227939158678055, | |
| "learning_rate": 9.997736367166967e-06, | |
| "loss": 1.827, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.06180413542376732, | |
| "grad_norm": 0.03226836398243904, | |
| "learning_rate": 9.996740476948386e-06, | |
| "loss": 1.8257, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.06362190411270166, | |
| "grad_norm": 0.029187630861997604, | |
| "learning_rate": 9.995563601046434e-06, | |
| "loss": 1.819, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.065439672801636, | |
| "grad_norm": 0.026967501267790794, | |
| "learning_rate": 9.994205782088438e-06, | |
| "loss": 1.8136, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.06725744149057032, | |
| "grad_norm": 0.031199270859360695, | |
| "learning_rate": 9.99266706925562e-06, | |
| "loss": 1.8206, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.06907521017950466, | |
| "grad_norm": 0.030985625460743904, | |
| "learning_rate": 9.990947518281312e-06, | |
| "loss": 1.8281, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.070892978868439, | |
| "grad_norm": 0.02339562401175499, | |
| "learning_rate": 9.989047191448934e-06, | |
| "loss": 1.82, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.07271074755737332, | |
| "grad_norm": 0.0256453026086092, | |
| "learning_rate": 9.986966157589751e-06, | |
| "loss": 1.8079, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.07452851624630766, | |
| "grad_norm": 0.025680653750896454, | |
| "learning_rate": 9.984704492080366e-06, | |
| "loss": 1.8088, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.076346284935242, | |
| "grad_norm": 0.026331942528486252, | |
| "learning_rate": 9.982262276840002e-06, | |
| "loss": 1.8153, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.07816405362417632, | |
| "grad_norm": 0.026452744379639626, | |
| "learning_rate": 9.979639600327522e-06, | |
| "loss": 1.8082, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.07998182231311066, | |
| "grad_norm": 0.020438341423869133, | |
| "learning_rate": 9.976836557538234e-06, | |
| "loss": 1.8087, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.081799591002045, | |
| "grad_norm": 0.022149616852402687, | |
| "learning_rate": 9.973853250000449e-06, | |
| "loss": 1.8132, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.08361735969097932, | |
| "grad_norm": 0.020680025219917297, | |
| "learning_rate": 9.970689785771798e-06, | |
| "loss": 1.8077, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.08543512837991366, | |
| "grad_norm": 0.018105728551745415, | |
| "learning_rate": 9.967346279435328e-06, | |
| "loss": 1.8063, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.08725289706884799, | |
| "grad_norm": 0.020593147724866867, | |
| "learning_rate": 9.963822852095344e-06, | |
| "loss": 1.8036, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.08907066575778232, | |
| "grad_norm": 0.0193562563508749, | |
| "learning_rate": 9.960119631373023e-06, | |
| "loss": 1.8135, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.09088843444671665, | |
| "grad_norm": 0.017045950517058372, | |
| "learning_rate": 9.95623675140179e-06, | |
| "loss": 1.8115, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.09270620313565099, | |
| "grad_norm": 0.01905151829123497, | |
| "learning_rate": 9.952174352822474e-06, | |
| "loss": 1.8087, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.09452397182458531, | |
| "grad_norm": 0.019179217517375946, | |
| "learning_rate": 9.947932582778188e-06, | |
| "loss": 1.8093, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.09634174051351965, | |
| "grad_norm": 0.016135873273015022, | |
| "learning_rate": 9.943511594909024e-06, | |
| "loss": 1.8008, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.09815950920245399, | |
| "grad_norm": 0.016653183847665787, | |
| "learning_rate": 9.938911549346473e-06, | |
| "loss": 1.8075, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.09997727789138833, | |
| "grad_norm": 0.01784764975309372, | |
| "learning_rate": 9.934132612707631e-06, | |
| "loss": 1.8065, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.10179504658032265, | |
| "grad_norm": 0.01742948405444622, | |
| "learning_rate": 9.929174958089167e-06, | |
| "loss": 1.8066, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.10361281526925699, | |
| "grad_norm": 0.015608050860464573, | |
| "learning_rate": 9.924038765061042e-06, | |
| "loss": 1.8089, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.10543058395819133, | |
| "grad_norm": 0.017180059105157852, | |
| "learning_rate": 9.918724219660013e-06, | |
| "loss": 1.8063, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.10724835264712565, | |
| "grad_norm": 0.01681089587509632, | |
| "learning_rate": 9.913231514382902e-06, | |
| "loss": 1.7952, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.10906612133605999, | |
| "grad_norm": 0.016128279268741608, | |
| "learning_rate": 9.907560848179607e-06, | |
| "loss": 1.797, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.11088389002499432, | |
| "grad_norm": 0.016297221183776855, | |
| "learning_rate": 9.901712426445901e-06, | |
| "loss": 1.7966, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.11270165871392865, | |
| "grad_norm": 0.017089389264583588, | |
| "learning_rate": 9.895686461016007e-06, | |
| "loss": 1.8097, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.11451942740286299, | |
| "grad_norm": 0.01613052934408188, | |
| "learning_rate": 9.889483170154903e-06, | |
| "loss": 1.7984, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.11633719609179732, | |
| "grad_norm": 0.016225503757596016, | |
| "learning_rate": 9.883102778550434e-06, | |
| "loss": 1.8013, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.11815496478073165, | |
| "grad_norm": 0.015952223911881447, | |
| "learning_rate": 9.876545517305163e-06, | |
| "loss": 1.7993, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.11997273346966598, | |
| "grad_norm": 0.016045618802309036, | |
| "learning_rate": 9.869811623928001e-06, | |
| "loss": 1.7968, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.12179050215860032, | |
| "grad_norm": 0.015822941437363625, | |
| "learning_rate": 9.862901342325617e-06, | |
| "loss": 1.7947, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.12360827084753465, | |
| "grad_norm": 0.016080934554338455, | |
| "learning_rate": 9.855814922793583e-06, | |
| "loss": 1.8011, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.125426039536469, | |
| "grad_norm": 0.01613529957830906, | |
| "learning_rate": 9.848552622007326e-06, | |
| "loss": 1.7956, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.12724380822540332, | |
| "grad_norm": 0.01521450374275446, | |
| "learning_rate": 9.841114703012817e-06, | |
| "loss": 1.7961, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.12906157691433764, | |
| "grad_norm": 0.01613503508269787, | |
| "learning_rate": 9.83350143521706e-06, | |
| "loss": 1.7981, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.130879345603272, | |
| "grad_norm": 0.01576644368469715, | |
| "learning_rate": 9.82571309437831e-06, | |
| "loss": 1.8042, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.13269711429220632, | |
| "grad_norm": 0.017247062176465988, | |
| "learning_rate": 9.817749962596115e-06, | |
| "loss": 1.793, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.13451488298114064, | |
| "grad_norm": 0.014981955289840698, | |
| "learning_rate": 9.809612328301071e-06, | |
| "loss": 1.8074, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.136332651670075, | |
| "grad_norm": 0.0150354178622365, | |
| "learning_rate": 9.801300486244385e-06, | |
| "loss": 1.7973, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.13815042035900932, | |
| "grad_norm": 0.015270021744072437, | |
| "learning_rate": 9.792814737487207e-06, | |
| "loss": 1.7973, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.13996818904794364, | |
| "grad_norm": 0.016216879710555077, | |
| "learning_rate": 9.784155389389713e-06, | |
| "loss": 1.7986, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.141785957736878, | |
| "grad_norm": 0.015781838446855545, | |
| "learning_rate": 9.775322755599979e-06, | |
| "loss": 1.7937, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.14360372642581232, | |
| "grad_norm": 0.015398108400404453, | |
| "learning_rate": 9.766317156042615e-06, | |
| "loss": 1.7976, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.14542149511474664, | |
| "grad_norm": 0.01513028983026743, | |
| "learning_rate": 9.757138916907184e-06, | |
| "loss": 1.7915, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.147239263803681, | |
| "grad_norm": 0.015322140417993069, | |
| "learning_rate": 9.747788370636389e-06, | |
| "loss": 1.8053, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.14905703249261532, | |
| "grad_norm": 0.016009092330932617, | |
| "learning_rate": 9.738265855914014e-06, | |
| "loss": 1.7908, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.15087480118154964, | |
| "grad_norm": 0.01483672671020031, | |
| "learning_rate": 9.728571717652677e-06, | |
| "loss": 1.7888, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.152692569870484, | |
| "grad_norm": 0.014686529524624348, | |
| "learning_rate": 9.718706306981332e-06, | |
| "loss": 1.7911, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.15451033855941831, | |
| "grad_norm": 0.01669451966881752, | |
| "learning_rate": 9.708669981232542e-06, | |
| "loss": 1.8017, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.15632810724835264, | |
| "grad_norm": 0.014686268754303455, | |
| "learning_rate": 9.698463103929542e-06, | |
| "loss": 1.7979, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.158145875937287, | |
| "grad_norm": 0.01508221123367548, | |
| "learning_rate": 9.688086044773079e-06, | |
| "loss": 1.7872, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.1599636446262213, | |
| "grad_norm": 0.0154942087829113, | |
| "learning_rate": 9.677539179628005e-06, | |
| "loss": 1.794, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.16178141331515564, | |
| "grad_norm": 0.016326844692230225, | |
| "learning_rate": 9.66682289050968e-06, | |
| "loss": 1.7981, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.16359918200409, | |
| "grad_norm": 0.015189899131655693, | |
| "learning_rate": 9.655937565570124e-06, | |
| "loss": 1.7943, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.1654169506930243, | |
| "grad_norm": 0.014669873751699924, | |
| "learning_rate": 9.644883599083959e-06, | |
| "loss": 1.7873, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.16723471938195864, | |
| "grad_norm": 0.015705488622188568, | |
| "learning_rate": 9.63366139143413e-06, | |
| "loss": 1.7959, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.169052488070893, | |
| "grad_norm": 0.015006368048489094, | |
| "learning_rate": 9.622271349097413e-06, | |
| "loss": 1.7883, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.1708702567598273, | |
| "grad_norm": 0.015823103487491608, | |
| "learning_rate": 9.610713884629667e-06, | |
| "loss": 1.7864, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.17268802544876163, | |
| "grad_norm": 0.03225838020443916, | |
| "learning_rate": 9.598989416650915e-06, | |
| "loss": 1.7871, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.17450579413769599, | |
| "grad_norm": 0.015597975812852383, | |
| "learning_rate": 9.587098369830171e-06, | |
| "loss": 1.7804, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.1763235628266303, | |
| "grad_norm": 0.01537901721894741, | |
| "learning_rate": 9.575041174870062e-06, | |
| "loss": 1.7858, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.17814133151556463, | |
| "grad_norm": 0.023264285176992416, | |
| "learning_rate": 9.562818268491216e-06, | |
| "loss": 1.7823, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.17995910020449898, | |
| "grad_norm": 0.01551035512238741, | |
| "learning_rate": 9.550430093416465e-06, | |
| "loss": 1.7882, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.1817768688934333, | |
| "grad_norm": 0.015448925085365772, | |
| "learning_rate": 9.537877098354787e-06, | |
| "loss": 1.7836, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.18359463758236763, | |
| "grad_norm": 0.01610329933464527, | |
| "learning_rate": 9.525159737985066e-06, | |
| "loss": 1.7843, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.18541240627130198, | |
| "grad_norm": 0.015887994319200516, | |
| "learning_rate": 9.512278472939627e-06, | |
| "loss": 1.7835, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.1872301749602363, | |
| "grad_norm": 0.015717443078756332, | |
| "learning_rate": 9.499233769787534e-06, | |
| "loss": 1.7899, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.18904794364917063, | |
| "grad_norm": 0.01613277569413185, | |
| "learning_rate": 9.486026101017711e-06, | |
| "loss": 1.789, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.19086571233810498, | |
| "grad_norm": 0.0161016546189785, | |
| "learning_rate": 9.472655945021815e-06, | |
| "loss": 1.7885, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.1926834810270393, | |
| "grad_norm": 0.015553218312561512, | |
| "learning_rate": 9.459123786076911e-06, | |
| "loss": 1.7841, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.19450124971597363, | |
| "grad_norm": 0.01636493392288685, | |
| "learning_rate": 9.445430114327936e-06, | |
| "loss": 1.7864, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.19631901840490798, | |
| "grad_norm": 0.016063738614320755, | |
| "learning_rate": 9.431575425769938e-06, | |
| "loss": 1.7836, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.1981367870938423, | |
| "grad_norm": 0.016147315502166748, | |
| "learning_rate": 9.417560222230115e-06, | |
| "loss": 1.7786, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.19995455578277666, | |
| "grad_norm": 0.01560090109705925, | |
| "learning_rate": 9.40338501134964e-06, | |
| "loss": 1.7782, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.20177232447171098, | |
| "grad_norm": 0.015402060933411121, | |
| "learning_rate": 9.389050306565269e-06, | |
| "loss": 1.7814, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.2035900931606453, | |
| "grad_norm": 0.017125973477959633, | |
| "learning_rate": 9.374556627090749e-06, | |
| "loss": 1.7793, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.20540786184957965, | |
| "grad_norm": 0.015735799446702003, | |
| "learning_rate": 9.359904497898009e-06, | |
| "loss": 1.7872, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.20722563053851398, | |
| "grad_norm": 0.01627574861049652, | |
| "learning_rate": 9.345094449698143e-06, | |
| "loss": 1.7893, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.2090433992274483, | |
| "grad_norm": 0.014931687153875828, | |
| "learning_rate": 9.330127018922195e-06, | |
| "loss": 1.7825, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.21086116791638265, | |
| "grad_norm": 0.015015835873782635, | |
| "learning_rate": 9.315002747701716e-06, | |
| "loss": 1.77, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.21267893660531698, | |
| "grad_norm": 0.01571677438914776, | |
| "learning_rate": 9.299722183849144e-06, | |
| "loss": 1.7843, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.2144967052942513, | |
| "grad_norm": 0.014991500414907932, | |
| "learning_rate": 9.284285880837947e-06, | |
| "loss": 1.7824, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.21631447398318565, | |
| "grad_norm": 0.016052858904004097, | |
| "learning_rate": 9.268694397782585e-06, | |
| "loss": 1.7805, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.21813224267211997, | |
| "grad_norm": 0.015834221616387367, | |
| "learning_rate": 9.252948299418255e-06, | |
| "loss": 1.7855, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.2199500113610543, | |
| "grad_norm": 0.01614440232515335, | |
| "learning_rate": 9.237048156080433e-06, | |
| "loss": 1.7885, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.22176778004998865, | |
| "grad_norm": 0.01563919708132744, | |
| "learning_rate": 9.220994543684225e-06, | |
| "loss": 1.7799, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.22358554873892297, | |
| "grad_norm": 0.015689659863710403, | |
| "learning_rate": 9.2047880437035e-06, | |
| "loss": 1.7808, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.2254033174278573, | |
| "grad_norm": 0.015433340333402157, | |
| "learning_rate": 9.188429243149824e-06, | |
| "loss": 1.7769, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.22722108611679165, | |
| "grad_norm": 0.01560978963971138, | |
| "learning_rate": 9.171918734551212e-06, | |
| "loss": 1.7791, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.22903885480572597, | |
| "grad_norm": 0.016046756878495216, | |
| "learning_rate": 9.155257115930651e-06, | |
| "loss": 1.7778, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.2308566234946603, | |
| "grad_norm": 0.01664203219115734, | |
| "learning_rate": 9.138444990784455e-06, | |
| "loss": 1.7811, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.23267439218359465, | |
| "grad_norm": 0.015654807910323143, | |
| "learning_rate": 9.121482968060384e-06, | |
| "loss": 1.7841, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.23449216087252897, | |
| "grad_norm": 0.016352280974388123, | |
| "learning_rate": 9.104371662135612e-06, | |
| "loss": 1.7839, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.2363099295614633, | |
| "grad_norm": 0.016163717955350876, | |
| "learning_rate": 9.08711169279446e-06, | |
| "loss": 1.7847, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.23812769825039765, | |
| "grad_norm": 0.016361849382519722, | |
| "learning_rate": 9.069703685205945e-06, | |
| "loss": 1.7804, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.23994546693933197, | |
| "grad_norm": 0.01635843515396118, | |
| "learning_rate": 9.052148269901145e-06, | |
| "loss": 1.7811, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.2417632356282663, | |
| "grad_norm": 0.016859732568264008, | |
| "learning_rate": 9.034446082750352e-06, | |
| "loss": 1.7863, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.24358100431720064, | |
| "grad_norm": 0.016207806766033173, | |
| "learning_rate": 9.01659776494005e-06, | |
| "loss": 1.7739, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.24539877300613497, | |
| "grad_norm": 0.016936447471380234, | |
| "learning_rate": 8.998603962949674e-06, | |
| "loss": 1.7818, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.2472165416950693, | |
| "grad_norm": 0.015802595764398575, | |
| "learning_rate": 8.98046532852822e-06, | |
| "loss": 1.7836, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.24903431038400364, | |
| "grad_norm": 0.016628528013825417, | |
| "learning_rate": 8.96218251867061e-06, | |
| "loss": 1.7822, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.250852079072938, | |
| "grad_norm": 0.01642756536602974, | |
| "learning_rate": 8.943756195593916e-06, | |
| "loss": 1.7756, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.2526698477618723, | |
| "grad_norm": 0.016094859689474106, | |
| "learning_rate": 8.925187026713363e-06, | |
| "loss": 1.766, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.25448761645080664, | |
| "grad_norm": 0.015560369938611984, | |
| "learning_rate": 8.90647568461816e-06, | |
| "loss": 1.783, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.256305385139741, | |
| "grad_norm": 0.01574082300066948, | |
| "learning_rate": 8.887622847047131e-06, | |
| "loss": 1.7882, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.2581231538286753, | |
| "grad_norm": 0.01694745570421219, | |
| "learning_rate": 8.868629196864182e-06, | |
| "loss": 1.7797, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.25994092251760964, | |
| "grad_norm": 0.01562688499689102, | |
| "learning_rate": 8.84949542203355e-06, | |
| "loss": 1.7832, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.261758691206544, | |
| "grad_norm": 0.015506752766668797, | |
| "learning_rate": 8.83022221559489e-06, | |
| "loss": 1.7749, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.2635764598954783, | |
| "grad_norm": 0.017343781888484955, | |
| "learning_rate": 8.810810275638183e-06, | |
| "loss": 1.7736, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.26539422858441264, | |
| "grad_norm": 0.01597374677658081, | |
| "learning_rate": 8.791260305278434e-06, | |
| "loss": 1.7879, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.267211997273347, | |
| "grad_norm": 0.015632351860404015, | |
| "learning_rate": 8.771573012630214e-06, | |
| "loss": 1.7804, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.2690297659622813, | |
| "grad_norm": 0.01659367047250271, | |
| "learning_rate": 8.751749110782013e-06, | |
| "loss": 1.7827, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.27084753465121564, | |
| "grad_norm": 0.01651635952293873, | |
| "learning_rate": 8.731789317770407e-06, | |
| "loss": 1.7781, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.27266530334015, | |
| "grad_norm": 0.01517146173864603, | |
| "learning_rate": 8.71169435655405e-06, | |
| "loss": 1.7811, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.2744830720290843, | |
| "grad_norm": 0.015295923687517643, | |
| "learning_rate": 8.691464954987494e-06, | |
| "loss": 1.7677, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.27630084071801864, | |
| "grad_norm": 0.015585844404995441, | |
| "learning_rate": 8.671101845794816e-06, | |
| "loss": 1.7745, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.278118609406953, | |
| "grad_norm": 0.015692081302404404, | |
| "learning_rate": 8.65060576654309e-06, | |
| "loss": 1.7745, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.2799363780958873, | |
| "grad_norm": 0.015136554837226868, | |
| "learning_rate": 8.629977459615655e-06, | |
| "loss": 1.7863, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.28175414678482164, | |
| "grad_norm": 0.015603788197040558, | |
| "learning_rate": 8.609217672185246e-06, | |
| "loss": 1.7796, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.283571915473756, | |
| "grad_norm": 0.016288187354803085, | |
| "learning_rate": 8.588327156186915e-06, | |
| "loss": 1.7785, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.2853896841626903, | |
| "grad_norm": 0.016181934624910355, | |
| "learning_rate": 8.567306668290801e-06, | |
| "loss": 1.7597, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.28720745285162463, | |
| "grad_norm": 0.0157309602946043, | |
| "learning_rate": 8.546156969874723e-06, | |
| "loss": 1.7827, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.289025221540559, | |
| "grad_norm": 0.016916731372475624, | |
| "learning_rate": 8.524878826996602e-06, | |
| "loss": 1.7749, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.2908429902294933, | |
| "grad_norm": 0.015968995168805122, | |
| "learning_rate": 8.503473010366713e-06, | |
| "loss": 1.7683, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.29266075891842763, | |
| "grad_norm": 0.01594395563006401, | |
| "learning_rate": 8.481940295319772e-06, | |
| "loss": 1.7792, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.294478527607362, | |
| "grad_norm": 0.016326317563652992, | |
| "learning_rate": 8.460281461786848e-06, | |
| "loss": 1.7734, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.2962962962962963, | |
| "grad_norm": 0.016297809779644012, | |
| "learning_rate": 8.438497294267117e-06, | |
| "loss": 1.769, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.29811406498523063, | |
| "grad_norm": 0.017145946621894836, | |
| "learning_rate": 8.416588581799447e-06, | |
| "loss": 1.7767, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.299931833674165, | |
| "grad_norm": 0.016356928274035454, | |
| "learning_rate": 8.394556117933816e-06, | |
| "loss": 1.772, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.3017496023630993, | |
| "grad_norm": 0.016378790140151978, | |
| "learning_rate": 8.372400700702569e-06, | |
| "loss": 1.7701, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.30356737105203363, | |
| "grad_norm": 0.018152521923184395, | |
| "learning_rate": 8.350123132591522e-06, | |
| "loss": 1.7769, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.305385139740968, | |
| "grad_norm": 0.017259759828448296, | |
| "learning_rate": 8.327724220510873e-06, | |
| "loss": 1.7742, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.3072029084299023, | |
| "grad_norm": 0.016766058281064034, | |
| "learning_rate": 8.305204775766003e-06, | |
| "loss": 1.771, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.30902067711883663, | |
| "grad_norm": 0.017410485073924065, | |
| "learning_rate": 8.282565614028068e-06, | |
| "loss": 1.7663, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.310838445807771, | |
| "grad_norm": 0.017518077045679092, | |
| "learning_rate": 8.259807555304469e-06, | |
| "loss": 1.769, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.3126562144967053, | |
| "grad_norm": 0.017017841339111328, | |
| "learning_rate": 8.23693142390914e-06, | |
| "loss": 1.7733, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.3144739831856396, | |
| "grad_norm": 0.017034539952874184, | |
| "learning_rate": 8.213938048432697e-06, | |
| "loss": 1.7715, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.316291751874574, | |
| "grad_norm": 0.016053663566708565, | |
| "learning_rate": 8.19082826171243e-06, | |
| "loss": 1.768, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.3181095205635083, | |
| "grad_norm": 0.017002522945404053, | |
| "learning_rate": 8.167602900802121e-06, | |
| "loss": 1.7571, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.3199272892524426, | |
| "grad_norm": 0.016666986048221588, | |
| "learning_rate": 8.144262806941743e-06, | |
| "loss": 1.776, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.321745057941377, | |
| "grad_norm": 0.017756953835487366, | |
| "learning_rate": 8.120808825526983e-06, | |
| "loss": 1.7701, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.3235628266303113, | |
| "grad_norm": 0.01685352995991707, | |
| "learning_rate": 8.097241806078616e-06, | |
| "loss": 1.7697, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.3253805953192456, | |
| "grad_norm": 0.01626460626721382, | |
| "learning_rate": 8.073562602211743e-06, | |
| "loss": 1.7733, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.32719836400818, | |
| "grad_norm": 0.017634931951761246, | |
| "learning_rate": 8.049772071604864e-06, | |
| "loss": 1.7817, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.32901613269711427, | |
| "grad_norm": 0.0157694723457098, | |
| "learning_rate": 8.025871075968828e-06, | |
| "loss": 1.7667, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.3308339013860486, | |
| "grad_norm": 0.016742341220378876, | |
| "learning_rate": 8.001860481015594e-06, | |
| "loss": 1.7753, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.332651670074983, | |
| "grad_norm": 0.015466434881091118, | |
| "learning_rate": 7.977741156426901e-06, | |
| "loss": 1.7706, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.33446943876391727, | |
| "grad_norm": 0.017226146534085274, | |
| "learning_rate": 7.953513975822755e-06, | |
| "loss": 1.7665, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.3362872074528516, | |
| "grad_norm": 0.01610388606786728, | |
| "learning_rate": 7.92917981672979e-06, | |
| "loss": 1.7723, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.338104976141786, | |
| "grad_norm": 0.016837526112794876, | |
| "learning_rate": 7.904739560549475e-06, | |
| "loss": 1.7754, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.33992274483072027, | |
| "grad_norm": 0.016696933656930923, | |
| "learning_rate": 7.8801940925262e-06, | |
| "loss": 1.7707, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.3417405135196546, | |
| "grad_norm": 0.016263185068964958, | |
| "learning_rate": 7.855544301715203e-06, | |
| "loss": 1.7702, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.34355828220858897, | |
| "grad_norm": 0.01645650342106819, | |
| "learning_rate": 7.830791080950373e-06, | |
| "loss": 1.768, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.34537605089752327, | |
| "grad_norm": 0.01569991558790207, | |
| "learning_rate": 7.805935326811913e-06, | |
| "loss": 1.767, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.3471938195864576, | |
| "grad_norm": 0.015973446890711784, | |
| "learning_rate": 7.780977939593856e-06, | |
| "loss": 1.7713, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.34901158827539197, | |
| "grad_norm": 0.01654656231403351, | |
| "learning_rate": 7.755919823271466e-06, | |
| "loss": 1.7577, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.35082935696432627, | |
| "grad_norm": 0.015675723552703857, | |
| "learning_rate": 7.730761885468486e-06, | |
| "loss": 1.7732, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.3526471256532606, | |
| "grad_norm": 0.018406856805086136, | |
| "learning_rate": 7.70550503742427e-06, | |
| "loss": 1.7668, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.35446489434219497, | |
| "grad_norm": 0.016395216807723045, | |
| "learning_rate": 7.68015019396078e-06, | |
| "loss": 1.7672, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.35628266303112927, | |
| "grad_norm": 0.016013452783226967, | |
| "learning_rate": 7.654698273449435e-06, | |
| "loss": 1.7646, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.3581004317200636, | |
| "grad_norm": 0.01679440774023533, | |
| "learning_rate": 7.629150197777866e-06, | |
| "loss": 1.7612, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.35991820040899797, | |
| "grad_norm": 0.01686931401491165, | |
| "learning_rate": 7.603506892316513e-06, | |
| "loss": 1.7597, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.36173596909793226, | |
| "grad_norm": 0.017471209168434143, | |
| "learning_rate": 7.57776928588511e-06, | |
| "loss": 1.7756, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.3635537377868666, | |
| "grad_norm": 0.017604535445570946, | |
| "learning_rate": 7.551938310719043e-06, | |
| "loss": 1.7706, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.36537150647580097, | |
| "grad_norm": 0.016083979979157448, | |
| "learning_rate": 7.526014902435583e-06, | |
| "loss": 1.7689, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.36718927516473526, | |
| "grad_norm": 0.017569448798894882, | |
| "learning_rate": 7.500000000000001e-06, | |
| "loss": 1.7716, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.3690070438536696, | |
| "grad_norm": 0.018971305340528488, | |
| "learning_rate": 7.4738945456915505e-06, | |
| "loss": 1.7639, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.37082481254260397, | |
| "grad_norm": 0.017489226534962654, | |
| "learning_rate": 7.447699485069342e-06, | |
| "loss": 1.7695, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.37264258123153826, | |
| "grad_norm": 0.016599513590335846, | |
| "learning_rate": 7.421415766938098e-06, | |
| "loss": 1.758, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.3744603499204726, | |
| "grad_norm": 0.017470112070441246, | |
| "learning_rate": 7.395044343313777e-06, | |
| "loss": 1.7635, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.37627811860940696, | |
| "grad_norm": 0.01866212487220764, | |
| "learning_rate": 7.3685861693891026e-06, | |
| "loss": 1.7698, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.37809588729834126, | |
| "grad_norm": 0.016111081466078758, | |
| "learning_rate": 7.342042203498952e-06, | |
| "loss": 1.763, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.3799136559872756, | |
| "grad_norm": 0.01669992506504059, | |
| "learning_rate": 7.315413407085656e-06, | |
| "loss": 1.7614, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.38173142467620996, | |
| "grad_norm": 0.01589970290660858, | |
| "learning_rate": 7.288700744664167e-06, | |
| "loss": 1.773, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.38354919336514426, | |
| "grad_norm": 0.01591925323009491, | |
| "learning_rate": 7.261905183787136e-06, | |
| "loss": 1.7754, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.3853669620540786, | |
| "grad_norm": 0.01747284270823002, | |
| "learning_rate": 7.235027695009846e-06, | |
| "loss": 1.7721, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.38718473074301296, | |
| "grad_norm": 0.016405848786234856, | |
| "learning_rate": 7.208069251855078e-06, | |
| "loss": 1.7622, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.38900249943194726, | |
| "grad_norm": 0.01654895953834057, | |
| "learning_rate": 7.181030830777838e-06, | |
| "loss": 1.7636, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.3908202681208816, | |
| "grad_norm": 0.015662197023630142, | |
| "learning_rate": 7.153913411129993e-06, | |
| "loss": 1.7751, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.39263803680981596, | |
| "grad_norm": 0.015878858044743538, | |
| "learning_rate": 7.1267179751248005e-06, | |
| "loss": 1.7708, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.3944558054987503, | |
| "grad_norm": 0.016220899298787117, | |
| "learning_rate": 7.099445507801324e-06, | |
| "loss": 1.7679, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.3962735741876846, | |
| "grad_norm": 0.015889156609773636, | |
| "learning_rate": 7.0720969969887595e-06, | |
| "loss": 1.7657, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.39809134287661896, | |
| "grad_norm": 0.01594599336385727, | |
| "learning_rate": 7.044673433270659e-06, | |
| "loss": 1.7641, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.3999091115655533, | |
| "grad_norm": 0.015293586999177933, | |
| "learning_rate": 7.017175809949044e-06, | |
| "loss": 1.7677, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.4017268802544876, | |
| "grad_norm": 0.015891166403889656, | |
| "learning_rate": 6.98960512300843e-06, | |
| "loss": 1.7629, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.40354464894342196, | |
| "grad_norm": 0.016649074852466583, | |
| "learning_rate": 6.961962371079752e-06, | |
| "loss": 1.7655, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.4053624176323563, | |
| "grad_norm": 0.016516495496034622, | |
| "learning_rate": 6.934248555404197e-06, | |
| "loss": 1.7741, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.4071801863212906, | |
| "grad_norm": 0.01925363577902317, | |
| "learning_rate": 6.906464679796927e-06, | |
| "loss": 1.7572, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.40899795501022496, | |
| "grad_norm": 0.01634056493639946, | |
| "learning_rate": 6.878611750610731e-06, | |
| "loss": 1.759, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.4108157236991593, | |
| "grad_norm": 0.016612950712442398, | |
| "learning_rate": 6.850690776699574e-06, | |
| "loss": 1.7562, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.4126334923880936, | |
| "grad_norm": 0.01613459922373295, | |
| "learning_rate": 6.822702769382042e-06, | |
| "loss": 1.7697, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.41445126107702795, | |
| "grad_norm": 0.016045957803726196, | |
| "learning_rate": 6.79464874240473e-06, | |
| "loss": 1.7623, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.4162690297659623, | |
| "grad_norm": 0.016840225085616112, | |
| "learning_rate": 6.766529711905513e-06, | |
| "loss": 1.7742, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.4180867984548966, | |
| "grad_norm": 0.015475032851099968, | |
| "learning_rate": 6.7383466963767386e-06, | |
| "loss": 1.7644, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.41990456714383095, | |
| "grad_norm": 0.016331806778907776, | |
| "learning_rate": 6.710100716628345e-06, | |
| "loss": 1.7722, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.4217223358327653, | |
| "grad_norm": 0.016033973544836044, | |
| "learning_rate": 6.681792795750876e-06, | |
| "loss": 1.7572, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.4235401045216996, | |
| "grad_norm": 0.015963230282068253, | |
| "learning_rate": 6.653423959078435e-06, | |
| "loss": 1.7714, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.42535787321063395, | |
| "grad_norm": 0.016069794073700905, | |
| "learning_rate": 6.624995234151539e-06, | |
| "loss": 1.7702, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.4271756418995683, | |
| "grad_norm": 0.016175484284758568, | |
| "learning_rate": 6.5965076506799e-06, | |
| "loss": 1.7595, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.4289934105885026, | |
| "grad_norm": 0.017575398087501526, | |
| "learning_rate": 6.567962240505136e-06, | |
| "loss": 1.7589, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.43081117927743695, | |
| "grad_norm": 0.01609048619866371, | |
| "learning_rate": 6.539360037563384e-06, | |
| "loss": 1.7583, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.4326289479663713, | |
| "grad_norm": 0.016053223982453346, | |
| "learning_rate": 6.510702077847864e-06, | |
| "loss": 1.7593, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.4344467166553056, | |
| "grad_norm": 0.01691989041864872, | |
| "learning_rate": 6.481989399371347e-06, | |
| "loss": 1.7643, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.43626448534423995, | |
| "grad_norm": 0.017391884699463844, | |
| "learning_rate": 6.453223042128556e-06, | |
| "loss": 1.7588, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.4380822540331743, | |
| "grad_norm": 0.016525816172361374, | |
| "learning_rate": 6.424404048058501e-06, | |
| "loss": 1.7637, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.4399000227221086, | |
| "grad_norm": 0.01585998386144638, | |
| "learning_rate": 6.395533461006736e-06, | |
| "loss": 1.7652, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.44171779141104295, | |
| "grad_norm": 0.01582312397658825, | |
| "learning_rate": 6.366612326687555e-06, | |
| "loss": 1.7584, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.4435355600999773, | |
| "grad_norm": 0.01715337485074997, | |
| "learning_rate": 6.337641692646106e-06, | |
| "loss": 1.7606, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.4453533287889116, | |
| "grad_norm": 0.021504878997802734, | |
| "learning_rate": 6.308622608220457e-06, | |
| "loss": 1.762, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.44717109747784595, | |
| "grad_norm": 0.015527226962149143, | |
| "learning_rate": 6.2795561245035895e-06, | |
| "loss": 1.757, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.4489888661667803, | |
| "grad_norm": 0.017598124220967293, | |
| "learning_rate": 6.250443294305315e-06, | |
| "loss": 1.7547, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.4508066348557146, | |
| "grad_norm": 0.016357263550162315, | |
| "learning_rate": 6.221285172114156e-06, | |
| "loss": 1.7585, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.45262440354464895, | |
| "grad_norm": 0.01646249182522297, | |
| "learning_rate": 6.192082814059141e-06, | |
| "loss": 1.76, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.4544421722335833, | |
| "grad_norm": 0.016435401514172554, | |
| "learning_rate": 6.162837277871553e-06, | |
| "loss": 1.7664, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.4562599409225176, | |
| "grad_norm": 0.016678526997566223, | |
| "learning_rate": 6.133549622846625e-06, | |
| "loss": 1.7713, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.45807770961145194, | |
| "grad_norm": 0.017534134909510612, | |
| "learning_rate": 6.104220909805162e-06, | |
| "loss": 1.7589, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.4598954783003863, | |
| "grad_norm": 0.016283275559544563, | |
| "learning_rate": 6.074852201055121e-06, | |
| "loss": 1.7598, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.4617132469893206, | |
| "grad_norm": 0.017745792865753174, | |
| "learning_rate": 6.045444560353136e-06, | |
| "loss": 1.7643, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.46353101567825494, | |
| "grad_norm": 0.017753778025507927, | |
| "learning_rate": 6.015999052865982e-06, | |
| "loss": 1.7545, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.4653487843671893, | |
| "grad_norm": 0.017292464151978493, | |
| "learning_rate": 5.986516745132e-06, | |
| "loss": 1.7582, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.4671665530561236, | |
| "grad_norm": 0.01648300141096115, | |
| "learning_rate": 5.956998705022464e-06, | |
| "loss": 1.7603, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.46898432174505794, | |
| "grad_norm": 0.017090782523155212, | |
| "learning_rate": 5.927446001702899e-06, | |
| "loss": 1.7654, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.4708020904339923, | |
| "grad_norm": 0.015470580197870731, | |
| "learning_rate": 5.8978597055943585e-06, | |
| "loss": 1.7529, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.4726198591229266, | |
| "grad_norm": 0.016197843477129936, | |
| "learning_rate": 5.8682408883346535e-06, | |
| "loss": 1.7551, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.47443762781186094, | |
| "grad_norm": 0.018076961860060692, | |
| "learning_rate": 5.8385906227395304e-06, | |
| "loss": 1.7629, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.4762553965007953, | |
| "grad_norm": 0.015964508056640625, | |
| "learning_rate": 5.808909982763825e-06, | |
| "loss": 1.7668, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.4780731651897296, | |
| "grad_norm": 0.016753260046243668, | |
| "learning_rate": 5.779200043462549e-06, | |
| "loss": 1.753, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.47989093387866394, | |
| "grad_norm": 0.01664654165506363, | |
| "learning_rate": 5.749461880951966e-06, | |
| "loss": 1.7654, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.4817087025675983, | |
| "grad_norm": 0.01592446304857731, | |
| "learning_rate": 5.719696572370596e-06, | |
| "loss": 1.763, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.4835264712565326, | |
| "grad_norm": 0.016646496951580048, | |
| "learning_rate": 5.689905195840216e-06, | |
| "loss": 1.766, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.48534423994546694, | |
| "grad_norm": 0.016208553686738014, | |
| "learning_rate": 5.660088830426804e-06, | |
| "loss": 1.7551, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.4871620086344013, | |
| "grad_norm": 0.01585574448108673, | |
| "learning_rate": 5.630248556101448e-06, | |
| "loss": 1.7638, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.4889797773233356, | |
| "grad_norm": 0.016133490949869156, | |
| "learning_rate": 5.600385453701241e-06, | |
| "loss": 1.7644, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.49079754601226994, | |
| "grad_norm": 0.015675894916057587, | |
| "learning_rate": 5.570500604890124e-06, | |
| "loss": 1.7675, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.4926153147012043, | |
| "grad_norm": 0.01614633947610855, | |
| "learning_rate": 5.540595092119709e-06, | |
| "loss": 1.7636, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.4944330833901386, | |
| "grad_norm": 0.01666291244328022, | |
| "learning_rate": 5.510669998590074e-06, | |
| "loss": 1.7583, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.49625085207907293, | |
| "grad_norm": 0.016553543508052826, | |
| "learning_rate": 5.480726408210519e-06, | |
| "loss": 1.7586, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.4980686207680073, | |
| "grad_norm": 0.017047051340341568, | |
| "learning_rate": 5.450765405560328e-06, | |
| "loss": 1.7534, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.4998863894569416, | |
| "grad_norm": 0.01579987071454525, | |
| "learning_rate": 5.4207880758494545e-06, | |
| "loss": 1.7669, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.501704158145876, | |
| "grad_norm": 0.016013607382774353, | |
| "learning_rate": 5.390795504879243e-06, | |
| "loss": 1.7546, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.5035219268348102, | |
| "grad_norm": 0.015493376180529594, | |
| "learning_rate": 5.360788779003082e-06, | |
| "loss": 1.7555, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.5053396955237446, | |
| "grad_norm": 0.016125505790114403, | |
| "learning_rate": 5.330768985087059e-06, | |
| "loss": 1.7485, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.5071574642126789, | |
| "grad_norm": 0.015707215294241905, | |
| "learning_rate": 5.300737210470603e-06, | |
| "loss": 1.7556, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.5089752329016133, | |
| "grad_norm": 0.016529636457562447, | |
| "learning_rate": 5.270694542927089e-06, | |
| "loss": 1.7621, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.5107930015905476, | |
| "grad_norm": 0.015912501141428947, | |
| "learning_rate": 5.2406420706244376e-06, | |
| "loss": 1.7578, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.512610770279482, | |
| "grad_norm": 0.017320740967988968, | |
| "learning_rate": 5.2105808820857126e-06, | |
| "loss": 1.7509, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.5144285389684162, | |
| "grad_norm": 0.016190189868211746, | |
| "learning_rate": 5.180512066149682e-06, | |
| "loss": 1.7586, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.5162463076573506, | |
| "grad_norm": 0.01586255431175232, | |
| "learning_rate": 5.150436711931387e-06, | |
| "loss": 1.7618, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.5180640763462849, | |
| "grad_norm": 0.016613394021987915, | |
| "learning_rate": 5.120355908782686e-06, | |
| "loss": 1.7582, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.5198818450352193, | |
| "grad_norm": 0.016856033354997635, | |
| "learning_rate": 5.090270746252803e-06, | |
| "loss": 1.766, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.5216996137241536, | |
| "grad_norm": 0.015804223716259003, | |
| "learning_rate": 5.060182314048865e-06, | |
| "loss": 1.7548, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.523517382413088, | |
| "grad_norm": 0.01533227227628231, | |
| "learning_rate": 5.030091701996428e-06, | |
| "loss": 1.7508, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.5253351511020222, | |
| "grad_norm": 0.017301153391599655, | |
| "learning_rate": 5e-06, | |
| "loss": 1.7508, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.5271529197909566, | |
| "grad_norm": 0.016463877633213997, | |
| "learning_rate": 4.9699082980035735e-06, | |
| "loss": 1.7612, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.5289706884798909, | |
| "grad_norm": 0.017038939520716667, | |
| "learning_rate": 4.939817685951135e-06, | |
| "loss": 1.7557, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.5307884571688253, | |
| "grad_norm": 0.01651296392083168, | |
| "learning_rate": 4.909729253747197e-06, | |
| "loss": 1.7555, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.5326062258577596, | |
| "grad_norm": 0.01751718856394291, | |
| "learning_rate": 4.879644091217317e-06, | |
| "loss": 1.7524, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.534423994546694, | |
| "grad_norm": 0.016333656385540962, | |
| "learning_rate": 4.8495632880686155e-06, | |
| "loss": 1.7452, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.5362417632356282, | |
| "grad_norm": 0.016173357143998146, | |
| "learning_rate": 4.819487933850319e-06, | |
| "loss": 1.7611, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.5380595319245626, | |
| "grad_norm": 0.016298582777380943, | |
| "learning_rate": 4.789419117914288e-06, | |
| "loss": 1.752, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.5398773006134969, | |
| "grad_norm": 0.017157401889562607, | |
| "learning_rate": 4.759357929375563e-06, | |
| "loss": 1.7518, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.5416950693024313, | |
| "grad_norm": 0.01661343313753605, | |
| "learning_rate": 4.729305457072913e-06, | |
| "loss": 1.7637, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.5435128379913656, | |
| "grad_norm": 0.016558021306991577, | |
| "learning_rate": 4.699262789529396e-06, | |
| "loss": 1.7511, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.5453306066803, | |
| "grad_norm": 0.016143113374710083, | |
| "learning_rate": 4.6692310149129425e-06, | |
| "loss": 1.7562, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.5471483753692342, | |
| "grad_norm": 0.01550297997891903, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7592, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.5489661440581686, | |
| "grad_norm": 0.016153663396835327, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7495, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.5507839127471029, | |
| "grad_norm": 0.017202477902173996, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7564, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.5526016814360373, | |
| "grad_norm": 0.01577403023838997, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7635, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.5544194501249716, | |
| "grad_norm": 0.016280407086014748, | |
| "learning_rate": 1e-05, | |
| "loss": 1.748, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.556237218813906, | |
| "grad_norm": 0.016771433874964714, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7467, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.5580549875028402, | |
| "grad_norm": 0.01556472573429346, | |
| "learning_rate": 1e-05, | |
| "loss": 1.751, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.5598727561917746, | |
| "grad_norm": 0.01656194217503071, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7605, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.5616905248807089, | |
| "grad_norm": 0.017003118991851807, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7516, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.5635082935696433, | |
| "grad_norm": 0.016028909012675285, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7557, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.5653260622585776, | |
| "grad_norm": 0.016611898317933083, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7548, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.567143830947512, | |
| "grad_norm": 0.01619804836809635, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7569, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.5689615996364462, | |
| "grad_norm": 0.01763117127120495, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7499, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.5707793683253806, | |
| "grad_norm": 0.017052598297595978, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7628, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.5725971370143149, | |
| "grad_norm": 0.015606777742505074, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7695, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.5744149057032493, | |
| "grad_norm": 0.017086924985051155, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7573, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.5762326743921836, | |
| "grad_norm": 0.01597212627530098, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7672, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 0.578050443081118, | |
| "grad_norm": 0.016126353293657303, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7481, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.5798682117700522, | |
| "grad_norm": 0.016764555126428604, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7543, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 0.5816859804589866, | |
| "grad_norm": 0.016383804380893707, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7595, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.5835037491479209, | |
| "grad_norm": 0.016328634694218636, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7624, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.5853215178368553, | |
| "grad_norm": 0.017615774646401405, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7633, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.5871392865257896, | |
| "grad_norm": 0.016653137281537056, | |
| "learning_rate": 1e-05, | |
| "loss": 1.753, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 0.588957055214724, | |
| "grad_norm": 0.016418032348155975, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7553, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.5907748239036582, | |
| "grad_norm": 0.01667468063533306, | |
| "learning_rate": 1e-05, | |
| "loss": 1.759, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.5925925925925926, | |
| "grad_norm": 0.015785276889801025, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7545, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.5944103612815269, | |
| "grad_norm": 0.017045632004737854, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7569, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.5962281299704613, | |
| "grad_norm": 0.016107341274619102, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7551, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.5980458986593956, | |
| "grad_norm": 0.016075948253273964, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7489, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.59986366734833, | |
| "grad_norm": 0.015299948863685131, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7584, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.6016814360372642, | |
| "grad_norm": 0.01539833564311266, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7484, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 0.6034992047261986, | |
| "grad_norm": 0.016403749585151672, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7549, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.6053169734151329, | |
| "grad_norm": 0.017300885170698166, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7503, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 0.6071347421040673, | |
| "grad_norm": 0.01626763306558132, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7613, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.6089525107930016, | |
| "grad_norm": 0.01677662320435047, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7539, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.610770279481936, | |
| "grad_norm": 0.017275378108024597, | |
| "learning_rate": 1e-05, | |
| "loss": 1.752, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.6125880481708702, | |
| "grad_norm": 0.015787243843078613, | |
| "learning_rate": 1e-05, | |
| "loss": 1.753, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 0.6144058168598046, | |
| "grad_norm": 0.016181068494915962, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7574, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.6162235855487389, | |
| "grad_norm": 0.01625332608819008, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7552, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 0.6180413542376733, | |
| "grad_norm": 0.01715734228491783, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7538, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.6198591229266076, | |
| "grad_norm": 0.018199391663074493, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7589, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 0.621676891615542, | |
| "grad_norm": 0.01592421531677246, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7514, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.6234946603044762, | |
| "grad_norm": 0.015030477195978165, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7578, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 0.6253124289934106, | |
| "grad_norm": 0.01609027571976185, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7528, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.6271301976823449, | |
| "grad_norm": 0.015512831509113312, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7511, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.6289479663712793, | |
| "grad_norm": 0.015017388388514519, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7504, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.6307657350602136, | |
| "grad_norm": 0.01578696072101593, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7545, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 0.632583503749148, | |
| "grad_norm": 0.015417453832924366, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7481, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.6344012724380823, | |
| "grad_norm": 0.015762289986014366, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7614, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 0.6362190411270165, | |
| "grad_norm": 0.01597565785050392, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7497, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.6380368098159509, | |
| "grad_norm": 0.01767154410481453, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7537, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 0.6398545785048853, | |
| "grad_norm": 0.01671607419848442, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7456, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.6416723471938196, | |
| "grad_norm": 0.015792865306138992, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7494, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 0.643490115882754, | |
| "grad_norm": 0.017053868621587753, | |
| "learning_rate": 1e-05, | |
| "loss": 1.743, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.6453078845716883, | |
| "grad_norm": 0.015672611072659492, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7478, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.6471256532606225, | |
| "grad_norm": 0.01585494540631771, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7535, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.6489434219495569, | |
| "grad_norm": 0.016009824350476265, | |
| "learning_rate": 1e-05, | |
| "loss": 1.759, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 0.6507611906384912, | |
| "grad_norm": 0.015507341362535954, | |
| "learning_rate": 1e-05, | |
| "loss": 1.755, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.6525789593274256, | |
| "grad_norm": 0.01644650474190712, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7597, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 0.65439672801636, | |
| "grad_norm": 0.016472771763801575, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7468, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.6562144967052943, | |
| "grad_norm": 0.016300657764077187, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7468, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 0.6580322653942285, | |
| "grad_norm": 0.016034092754125595, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7477, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.6598500340831629, | |
| "grad_norm": 0.01675514504313469, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7643, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 0.6616678027720972, | |
| "grad_norm": 0.016840513795614243, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7514, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.6634855714610316, | |
| "grad_norm": 0.017041552811861038, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7581, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.665303340149966, | |
| "grad_norm": 0.016030827537178993, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7455, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.6671211088389003, | |
| "grad_norm": 0.016785001382231712, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7483, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 0.6689388775278345, | |
| "grad_norm": 0.017177637666463852, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7512, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.6707566462167689, | |
| "grad_norm": 0.015744341537356377, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7528, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 0.6725744149057032, | |
| "grad_norm": 0.015531038865447044, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7446, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.6743921835946376, | |
| "grad_norm": 0.016207581385970116, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7533, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 0.676209952283572, | |
| "grad_norm": 0.016298890113830566, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7512, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.6780277209725063, | |
| "grad_norm": 0.016354553401470184, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7533, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 0.6798454896614405, | |
| "grad_norm": 0.01599087379872799, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7468, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.6816632583503749, | |
| "grad_norm": 0.015880877152085304, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7514, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.6834810270393092, | |
| "grad_norm": 0.016650687903165817, | |
| "learning_rate": 1e-05, | |
| "loss": 1.746, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.6852987957282436, | |
| "grad_norm": 0.0163528211414814, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7472, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 0.6871165644171779, | |
| "grad_norm": 0.01636846549808979, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7445, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.6889343331061123, | |
| "grad_norm": 0.016309088096022606, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7575, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 0.6907521017950465, | |
| "grad_norm": 0.01691536419093609, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7478, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.6925698704839809, | |
| "grad_norm": 0.01824839785695076, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7577, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 0.6943876391729152, | |
| "grad_norm": 0.01665637642145157, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7516, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.6962054078618496, | |
| "grad_norm": 0.015938177704811096, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7488, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 0.6980231765507839, | |
| "grad_norm": 0.01706807129085064, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7545, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.6998409452397183, | |
| "grad_norm": 0.01841641589999199, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7533, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.7016587139286525, | |
| "grad_norm": 0.01596180908381939, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7521, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.7034764826175869, | |
| "grad_norm": 0.016269559040665627, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7548, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 0.7052942513065212, | |
| "grad_norm": 0.01708034798502922, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7443, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.7071120199954556, | |
| "grad_norm": 0.01742040552198887, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7515, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 0.7089297886843899, | |
| "grad_norm": 0.017336854711174965, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7478, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.7107475573733243, | |
| "grad_norm": 0.016049761325120926, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7487, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 0.7125653260622585, | |
| "grad_norm": 0.017974358052015305, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7539, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.7143830947511929, | |
| "grad_norm": 0.01644211634993553, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7488, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 0.7162008634401272, | |
| "grad_norm": 0.018557770177721977, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7448, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.7180186321290616, | |
| "grad_norm": 0.01734108291566372, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7399, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.7198364008179959, | |
| "grad_norm": 0.01636637933552265, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7566, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.7216541695069303, | |
| "grad_norm": 0.01724686101078987, | |
| "learning_rate": 1e-05, | |
| "loss": 1.751, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 0.7234719381958645, | |
| "grad_norm": 0.01744897849857807, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7474, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.7252897068847989, | |
| "grad_norm": 0.017034457996487617, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7492, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 0.7271074755737332, | |
| "grad_norm": 0.016682956367731094, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7571, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.7289252442626676, | |
| "grad_norm": 0.016139404848217964, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7426, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 0.7307430129516019, | |
| "grad_norm": 0.01789063960313797, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7564, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.7325607816405363, | |
| "grad_norm": 0.017030801624059677, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7495, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 0.7343785503294705, | |
| "grad_norm": 0.02051538974046707, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7479, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.7361963190184049, | |
| "grad_norm": 0.016426604241132736, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7475, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.7380140877073392, | |
| "grad_norm": 0.016485676169395447, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7517, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.7398318563962736, | |
| "grad_norm": 0.017329517751932144, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7556, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 0.7416496250852079, | |
| "grad_norm": 0.0165878776460886, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7394, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.7434673937741423, | |
| "grad_norm": 0.016505807638168335, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7483, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 0.7452851624630765, | |
| "grad_norm": 0.016942374408245087, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7483, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.7471029311520109, | |
| "grad_norm": 0.01690479926764965, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7522, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 0.7489206998409452, | |
| "grad_norm": 0.016314556822180748, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7478, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.7507384685298796, | |
| "grad_norm": 0.016368621960282326, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7475, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 0.7525562372188139, | |
| "grad_norm": 0.01776360534131527, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7523, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.7543740059077483, | |
| "grad_norm": 0.01603596657514572, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7422, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.7561917745966825, | |
| "grad_norm": 0.015459864400327206, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7484, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.7580095432856169, | |
| "grad_norm": 0.018278229981660843, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7543, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 0.7598273119745512, | |
| "grad_norm": 0.016482891514897346, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7511, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.7616450806634856, | |
| "grad_norm": 0.0158072616904974, | |
| "learning_rate": 1e-05, | |
| "loss": 1.747, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 0.7634628493524199, | |
| "grad_norm": 0.01595921255648136, | |
| "learning_rate": 1e-05, | |
| "loss": 1.741, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.7652806180413543, | |
| "grad_norm": 0.01587016135454178, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7427, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 0.7670983867302885, | |
| "grad_norm": 0.017007585614919662, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7413, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.7689161554192229, | |
| "grad_norm": 0.015775319188833237, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7483, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 0.7707339241081572, | |
| "grad_norm": 0.015736114233732224, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7463, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.7725516927970916, | |
| "grad_norm": 0.01561545580625534, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7482, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.7743694614860259, | |
| "grad_norm": 0.01614650897681713, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7517, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.7761872301749603, | |
| "grad_norm": 0.016477441415190697, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7437, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 0.7780049988638945, | |
| "grad_norm": 0.01549589540809393, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7479, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.7798227675528289, | |
| "grad_norm": 0.015598030760884285, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7438, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 0.7816405362417632, | |
| "grad_norm": 0.01621238701045513, | |
| "learning_rate": 1e-05, | |
| "loss": 1.743, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.7834583049306976, | |
| "grad_norm": 0.015526995062828064, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7521, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 0.7852760736196319, | |
| "grad_norm": 0.01634833589196205, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7489, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.7870938423085663, | |
| "grad_norm": 0.01686246506869793, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7483, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 0.7889116109975006, | |
| "grad_norm": 0.01572590321302414, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7454, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.7907293796864349, | |
| "grad_norm": 0.016653846949338913, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7447, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.7925471483753692, | |
| "grad_norm": 0.016530562192201614, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7465, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.7943649170643036, | |
| "grad_norm": 0.016080396249890327, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7437, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 0.7961826857532379, | |
| "grad_norm": 0.016825426369905472, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7432, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.7980004544421723, | |
| "grad_norm": 0.01737258955836296, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7363, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 0.7998182231311066, | |
| "grad_norm": 0.015955086797475815, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7509, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.8016359918200409, | |
| "grad_norm": 0.016994798555970192, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7446, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 0.8034537605089752, | |
| "grad_norm": 0.0163293294608593, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7491, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.8052715291979096, | |
| "grad_norm": 0.016241351142525673, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7408, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 0.8070892978868439, | |
| "grad_norm": 0.03442993760108948, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7485, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.8089070665757783, | |
| "grad_norm": 0.01715024746954441, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7507, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.8107248352647126, | |
| "grad_norm": 0.016102071851491928, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7508, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.8125426039536469, | |
| "grad_norm": 0.018684349954128265, | |
| "learning_rate": 1e-05, | |
| "loss": 1.745, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 0.8143603726425812, | |
| "grad_norm": 0.01681571640074253, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7564, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.8161781413315156, | |
| "grad_norm": 0.01673213019967079, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7491, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 0.8179959100204499, | |
| "grad_norm": 0.01589960604906082, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7534, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.8198136787093843, | |
| "grad_norm": 0.018107162788510323, | |
| "learning_rate": 1e-05, | |
| "loss": 1.734, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 0.8216314473983186, | |
| "grad_norm": 0.016370611265301704, | |
| "learning_rate": 1e-05, | |
| "loss": 1.748, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.8234492160872529, | |
| "grad_norm": 0.01715346798300743, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7581, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 0.8252669847761872, | |
| "grad_norm": 0.016535120084881783, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7483, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.8270847534651216, | |
| "grad_norm": 0.01683277077972889, | |
| "learning_rate": 1e-05, | |
| "loss": 1.753, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.8289025221540559, | |
| "grad_norm": 0.016108205541968346, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7509, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.8307202908429903, | |
| "grad_norm": 0.01758972927927971, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7421, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 0.8325380595319246, | |
| "grad_norm": 0.016740551218390465, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7531, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.8343558282208589, | |
| "grad_norm": 0.017136069014668465, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7453, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 0.8361735969097932, | |
| "grad_norm": 0.018268654122948647, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7468, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.8379913655987276, | |
| "grad_norm": 0.01658778078854084, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7496, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 0.8398091342876619, | |
| "grad_norm": 0.016633301973342896, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7485, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.8416269029765963, | |
| "grad_norm": 0.016990309581160545, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7405, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 0.8434446716655306, | |
| "grad_norm": 0.01661493442952633, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7464, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.8452624403544649, | |
| "grad_norm": 0.01699172891676426, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7564, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.8470802090433992, | |
| "grad_norm": 0.016703175380825996, | |
| "learning_rate": 1e-05, | |
| "loss": 1.745, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.8488979777323336, | |
| "grad_norm": 0.01694013550877571, | |
| "learning_rate": 1e-05, | |
| "loss": 1.741, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 0.8507157464212679, | |
| "grad_norm": 0.017576703801751137, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7553, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.8525335151102023, | |
| "grad_norm": 0.016727445647120476, | |
| "learning_rate": 1e-05, | |
| "loss": 1.734, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 0.8543512837991366, | |
| "grad_norm": 0.015813367441296577, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7443, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.8561690524880708, | |
| "grad_norm": 0.01609817147254944, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7496, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 0.8579868211770052, | |
| "grad_norm": 0.01648952253162861, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7444, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.8598045898659396, | |
| "grad_norm": 0.016997788101434708, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7436, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 0.8616223585548739, | |
| "grad_norm": 0.016397470608353615, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7488, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.8634401272438083, | |
| "grad_norm": 0.01654043421149254, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7406, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.8652578959327426, | |
| "grad_norm": 0.016180653125047684, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7463, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.8670756646216768, | |
| "grad_norm": 0.016773954033851624, | |
| "learning_rate": 1e-05, | |
| "loss": 1.751, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 0.8688934333106112, | |
| "grad_norm": 0.01736517809331417, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7402, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.8707112019995455, | |
| "grad_norm": 0.01888013258576393, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7457, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 0.8725289706884799, | |
| "grad_norm": 0.018337909132242203, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7453, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.8743467393774143, | |
| "grad_norm": 0.01563389040529728, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7386, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 0.8761645080663486, | |
| "grad_norm": 0.017023077234625816, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7412, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.8779822767552828, | |
| "grad_norm": 0.01671590842306614, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7462, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 0.8798000454442172, | |
| "grad_norm": 0.019904915243387222, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7443, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.8816178141331515, | |
| "grad_norm": 0.01728987693786621, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7345, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.8834355828220859, | |
| "grad_norm": 0.019658857956528664, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7425, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.8852533515110202, | |
| "grad_norm": 0.01688159443438053, | |
| "learning_rate": 1e-05, | |
| "loss": 1.746, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 0.8870711201999546, | |
| "grad_norm": 0.01599729433655739, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7327, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.8888888888888888, | |
| "grad_norm": 0.016897086054086685, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7385, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 0.8907066575778232, | |
| "grad_norm": 0.016169127076864243, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7405, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.8925244262667575, | |
| "grad_norm": 0.01634543016552925, | |
| "learning_rate": 1e-05, | |
| "loss": 1.748, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 0.8943421949556919, | |
| "grad_norm": 0.016616657376289368, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7465, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.8961599636446262, | |
| "grad_norm": 0.016464397311210632, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7331, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 0.8979777323335606, | |
| "grad_norm": 0.017165830358862877, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7383, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.8997955010224948, | |
| "grad_norm": 0.016248662024736404, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7416, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.9016132697114292, | |
| "grad_norm": 0.01670646481215954, | |
| "learning_rate": 1e-05, | |
| "loss": 1.742, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.9034310384003635, | |
| "grad_norm": 0.016594985499978065, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7397, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 0.9052488070892979, | |
| "grad_norm": 0.016361333429813385, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7511, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.9070665757782322, | |
| "grad_norm": 0.016266893595457077, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7468, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 0.9088843444671666, | |
| "grad_norm": 0.017031649127602577, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7327, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.9107021131561008, | |
| "grad_norm": 0.016959581524133682, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7454, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 0.9125198818450352, | |
| "grad_norm": 0.07533946633338928, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7476, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 0.9143376505339695, | |
| "grad_norm": 0.01766197197139263, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7461, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 0.9161554192229039, | |
| "grad_norm": 0.01663908362388611, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7361, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.9179731879118382, | |
| "grad_norm": 0.02057843655347824, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7441, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.9197909566007726, | |
| "grad_norm": 0.017909778282046318, | |
| "learning_rate": 1e-05, | |
| "loss": 1.742, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 0.9216087252897068, | |
| "grad_norm": 0.017638977617025375, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7391, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 0.9234264939786412, | |
| "grad_norm": 0.018523376435041428, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7405, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.9252442626675755, | |
| "grad_norm": 0.01635800302028656, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7458, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 0.9270620313565099, | |
| "grad_norm": 0.01763818971812725, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7351, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.9288798000454442, | |
| "grad_norm": 0.017338305711746216, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7397, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 0.9306975687343786, | |
| "grad_norm": 0.01771395467221737, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7471, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.9325153374233128, | |
| "grad_norm": 0.017642149701714516, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7454, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 0.9343331061122472, | |
| "grad_norm": 0.017685122787952423, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7375, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 0.9361508748011815, | |
| "grad_norm": 0.017887357622385025, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7394, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.9379686434901159, | |
| "grad_norm": 0.01899501495063305, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7452, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 0.9397864121790502, | |
| "grad_norm": 0.017754577100276947, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7441, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 0.9416041808679846, | |
| "grad_norm": 0.01811014860868454, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7417, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 0.9434219495569189, | |
| "grad_norm": 0.01806728169322014, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7428, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 0.9452397182458532, | |
| "grad_norm": 0.018700286746025085, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7345, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.9470574869347875, | |
| "grad_norm": 0.01722894422709942, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7362, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 0.9488752556237219, | |
| "grad_norm": 0.016884060576558113, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7355, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 0.9506930243126562, | |
| "grad_norm": 0.017119232565164566, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7468, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 0.9525107930015906, | |
| "grad_norm": 0.017567407339811325, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7422, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 0.9543285616905249, | |
| "grad_norm": 0.017188768833875656, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7393, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.9561463303794592, | |
| "grad_norm": 0.016574783250689507, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7341, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 0.9579640990683935, | |
| "grad_norm": 0.020617837086319923, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7428, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 0.9597818677573279, | |
| "grad_norm": 0.018011432141065598, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7496, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.9615996364462622, | |
| "grad_norm": 0.018056875094771385, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7413, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 0.9634174051351966, | |
| "grad_norm": 0.018342627212405205, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7395, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.9652351738241309, | |
| "grad_norm": 0.022182267159223557, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7342, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 0.9670529425130652, | |
| "grad_norm": 0.01826542802155018, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7384, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 0.9688707112019995, | |
| "grad_norm": 0.01716247759759426, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7425, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 0.9706884798909339, | |
| "grad_norm": 0.017304804176092148, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7521, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 0.9725062485798682, | |
| "grad_norm": 0.01794220507144928, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7455, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.9743240172688026, | |
| "grad_norm": 0.017633073031902313, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7509, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 0.9761417859577369, | |
| "grad_norm": 0.016983771696686745, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7392, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 0.9779595546466712, | |
| "grad_norm": 0.01743633858859539, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7341, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 0.9797773233356055, | |
| "grad_norm": 0.017662547528743744, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7367, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 0.9815950920245399, | |
| "grad_norm": 0.01701057143509388, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7423, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.9834128607134742, | |
| "grad_norm": 0.017070814967155457, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7429, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 0.9852306294024086, | |
| "grad_norm": 0.01704619824886322, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7348, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 0.9870483980913429, | |
| "grad_norm": 0.017563099041581154, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7382, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 0.9888661667802772, | |
| "grad_norm": 0.01661253347992897, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7412, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.9906839354692115, | |
| "grad_norm": 0.016802560538053513, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7287, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.9925017041581459, | |
| "grad_norm": 0.01623694598674774, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7345, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 0.9943194728470802, | |
| "grad_norm": 0.01796470768749714, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7282, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 0.9961372415360146, | |
| "grad_norm": 0.016037970781326294, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7358, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 0.9979550102249489, | |
| "grad_norm": 0.016084497794508934, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7371, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 0.9997727789138832, | |
| "grad_norm": 0.016458775848150253, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7397, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.9997727789138832, | |
| "step": 550, | |
| "total_flos": 2868807299235840.0, | |
| "train_loss": 0.7944060720096935, | |
| "train_runtime": 47914.0805, | |
| "train_samples_per_second": 2.939, | |
| "train_steps_per_second": 0.011 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 550, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2868807299235840.0, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |