| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.000806234883096, |
| "eval_steps": 233, |
| "global_step": 931, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0010749798441279225, |
| "grad_norm": 17.287843704223633, |
| "learning_rate": 2e-05, |
| "loss": 18.567, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0010749798441279225, |
| "eval_loss": 4.0416483879089355, |
| "eval_runtime": 6.371, |
| "eval_samples_per_second": 61.529, |
| "eval_steps_per_second": 30.765, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.002149959688255845, |
| "grad_norm": 19.48370933532715, |
| "learning_rate": 4e-05, |
| "loss": 18.1211, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.0032249395323837677, |
| "grad_norm": 14.203254699707031, |
| "learning_rate": 6e-05, |
| "loss": 14.5618, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.00429991937651169, |
| "grad_norm": 16.90338897705078, |
| "learning_rate": 8e-05, |
| "loss": 17.2773, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.005374899220639613, |
| "grad_norm": 10.459429740905762, |
| "learning_rate": 0.0001, |
| "loss": 14.4675, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.0064498790647675355, |
| "grad_norm": 16.3222713470459, |
| "learning_rate": 0.00012, |
| "loss": 16.8006, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.007524858908895459, |
| "grad_norm": 17.23369789123535, |
| "learning_rate": 0.00014, |
| "loss": 14.2422, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.00859983875302338, |
| "grad_norm": 18.750120162963867, |
| "learning_rate": 0.00016, |
| "loss": 17.4167, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.009674818597151304, |
| "grad_norm": 12.818103790283203, |
| "learning_rate": 0.00018, |
| "loss": 13.2251, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.010749798441279226, |
| "grad_norm": 15.632926940917969, |
| "learning_rate": 0.0002, |
| "loss": 14.3551, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.011824778285407149, |
| "grad_norm": 10.966743469238281, |
| "learning_rate": 0.00019999941823167997, |
| "loss": 12.1329, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.012899758129535071, |
| "grad_norm": 10.447884559631348, |
| "learning_rate": 0.00019999767293348887, |
| "loss": 11.0189, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.013974737973662993, |
| "grad_norm": 14.069694519042969, |
| "learning_rate": 0.00019999476412573398, |
| "loss": 12.6871, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.015049717817790917, |
| "grad_norm": 17.129362106323242, |
| "learning_rate": 0.0001999906918422603, |
| "loss": 12.774, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.01612469766191884, |
| "grad_norm": 12.345664978027344, |
| "learning_rate": 0.00019998545613045035, |
| "loss": 10.1907, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.01719967750604676, |
| "grad_norm": 12.960017204284668, |
| "learning_rate": 0.00019997905705122353, |
| "loss": 9.124, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.018274657350174684, |
| "grad_norm": 17.12679672241211, |
| "learning_rate": 0.0001999714946790355, |
| "loss": 11.1126, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.019349637194302608, |
| "grad_norm": 13.355628967285156, |
| "learning_rate": 0.0001999627691018772, |
| "loss": 9.9217, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.02042461703843053, |
| "grad_norm": 14.47995376586914, |
| "learning_rate": 0.00019995288042127393, |
| "loss": 9.8122, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.021499596882558453, |
| "grad_norm": 15.26109504699707, |
| "learning_rate": 0.00019994182875228417, |
| "loss": 9.1869, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.022574576726686373, |
| "grad_norm": 19.227262496948242, |
| "learning_rate": 0.00019992961422349805, |
| "loss": 8.0937, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.023649556570814297, |
| "grad_norm": 15.230995178222656, |
| "learning_rate": 0.00019991623697703613, |
| "loss": 8.5341, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.02472453641494222, |
| "grad_norm": 16.82895278930664, |
| "learning_rate": 0.00019990169716854758, |
| "loss": 9.1735, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.025799516259070142, |
| "grad_norm": 22.261363983154297, |
| "learning_rate": 0.00019988599496720836, |
| "loss": 8.5753, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.026874496103198066, |
| "grad_norm": 14.500650405883789, |
| "learning_rate": 0.0001998691305557194, |
| "loss": 8.7694, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.027949475947325986, |
| "grad_norm": 13.450296401977539, |
| "learning_rate": 0.00019985110413030425, |
| "loss": 7.6744, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.02902445579145391, |
| "grad_norm": 11.800576210021973, |
| "learning_rate": 0.00019983191590070703, |
| "loss": 6.7168, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.030099435635581834, |
| "grad_norm": 15.437250137329102, |
| "learning_rate": 0.00019981156609018977, |
| "loss": 7.8992, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.031174415479709755, |
| "grad_norm": 13.048258781433105, |
| "learning_rate": 0.00019979005493552996, |
| "loss": 7.4647, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.03224939532383768, |
| "grad_norm": 17.663209915161133, |
| "learning_rate": 0.00019976738268701784, |
| "loss": 7.6277, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.0333243751679656, |
| "grad_norm": 17.522117614746094, |
| "learning_rate": 0.00019974354960845326, |
| "loss": 7.3131, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.03439935501209352, |
| "grad_norm": 16.121286392211914, |
| "learning_rate": 0.00019971855597714284, |
| "loss": 7.1682, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.035474334856221444, |
| "grad_norm": 12.511422157287598, |
| "learning_rate": 0.00019969240208389665, |
| "loss": 6.4537, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.03654931470034937, |
| "grad_norm": 14.760931015014648, |
| "learning_rate": 0.00019966508823302483, |
| "loss": 6.8972, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.03762429454447729, |
| "grad_norm": 16.834484100341797, |
| "learning_rate": 0.00019963661474233402, |
| "loss": 8.2614, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.038699274388605216, |
| "grad_norm": 13.5601224899292, |
| "learning_rate": 0.0001996069819431237, |
| "loss": 6.4588, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.03977425423273313, |
| "grad_norm": 14.121377944946289, |
| "learning_rate": 0.00019957619018018242, |
| "loss": 6.057, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.04084923407686106, |
| "grad_norm": 14.331984519958496, |
| "learning_rate": 0.00019954423981178354, |
| "loss": 5.9236, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.04192421392098898, |
| "grad_norm": 14.163195610046387, |
| "learning_rate": 0.00019951113120968134, |
| "loss": 6.0719, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.042999193765116905, |
| "grad_norm": 13.852533340454102, |
| "learning_rate": 0.00019947686475910655, |
| "loss": 5.6034, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.04407417360924483, |
| "grad_norm": 14.488564491271973, |
| "learning_rate": 0.00019944144085876184, |
| "loss": 7.0848, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.045149153453372746, |
| "grad_norm": 11.431620597839355, |
| "learning_rate": 0.0001994048599208173, |
| "loss": 5.5335, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.04622413329750067, |
| "grad_norm": 13.871944427490234, |
| "learning_rate": 0.00019936712237090553, |
| "loss": 5.8063, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.047299113141628595, |
| "grad_norm": 18.87192726135254, |
| "learning_rate": 0.00019932822864811677, |
| "loss": 6.0023, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.04837409298575652, |
| "grad_norm": 12.797957420349121, |
| "learning_rate": 0.00019928817920499375, |
| "loss": 5.546, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.04944907282988444, |
| "grad_norm": 14.95291519165039, |
| "learning_rate": 0.00019924697450752633, |
| "loss": 6.1613, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.05052405267401236, |
| "grad_norm": 18.501853942871094, |
| "learning_rate": 0.00019920461503514635, |
| "loss": 6.1402, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.051599032518140284, |
| "grad_norm": 19.192930221557617, |
| "learning_rate": 0.0001991611012807218, |
| "loss": 5.7711, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.05267401236226821, |
| "grad_norm": 23.865346908569336, |
| "learning_rate": 0.00019911643375055107, |
| "loss": 6.6772, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.05374899220639613, |
| "grad_norm": 27.616785049438477, |
| "learning_rate": 0.00019907061296435728, |
| "loss": 6.6335, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.054823972050524056, |
| "grad_norm": 20.929126739501953, |
| "learning_rate": 0.0001990236394552821, |
| "loss": 6.4005, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.05589895189465197, |
| "grad_norm": 10.98021411895752, |
| "learning_rate": 0.00019897551376987948, |
| "loss": 4.4406, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.0569739317387799, |
| "grad_norm": 12.884988784790039, |
| "learning_rate": 0.00019892623646810943, |
| "loss": 4.7416, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.05804891158290782, |
| "grad_norm": 14.663339614868164, |
| "learning_rate": 0.0001988758081233314, |
| "loss": 5.6428, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.059123891427035745, |
| "grad_norm": 13.638205528259277, |
| "learning_rate": 0.00019882422932229765, |
| "loss": 6.3548, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.06019887127116367, |
| "grad_norm": 15.025946617126465, |
| "learning_rate": 0.00019877150066514645, |
| "loss": 4.9333, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.061273851115291586, |
| "grad_norm": 20.201622009277344, |
| "learning_rate": 0.000198717622765395, |
| "loss": 6.5034, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.06234883095941951, |
| "grad_norm": 14.798491477966309, |
| "learning_rate": 0.00019866259624993246, |
| "loss": 4.757, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.06342381080354743, |
| "grad_norm": 15.673213005065918, |
| "learning_rate": 0.00019860642175901247, |
| "loss": 7.0599, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.06449879064767536, |
| "grad_norm": 18.45370864868164, |
| "learning_rate": 0.00019854909994624582, |
| "loss": 6.7934, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.06557377049180328, |
| "grad_norm": 15.39392375946045, |
| "learning_rate": 0.0001984906314785928, |
| "loss": 5.5127, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.0666487503359312, |
| "grad_norm": 16.213571548461914, |
| "learning_rate": 0.00019843101703635548, |
| "loss": 4.8815, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.06772373018005913, |
| "grad_norm": 20.046100616455078, |
| "learning_rate": 0.00019837025731316967, |
| "loss": 5.3901, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.06879871002418704, |
| "grad_norm": 16.978891372680664, |
| "learning_rate": 0.0001983083530159971, |
| "loss": 5.7858, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.06987368986831496, |
| "grad_norm": 17.5430965423584, |
| "learning_rate": 0.00019824530486511687, |
| "loss": 6.2824, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.07094866971244289, |
| "grad_norm": 15.383797645568848, |
| "learning_rate": 0.00019818111359411737, |
| "loss": 4.4531, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.07202364955657081, |
| "grad_norm": 16.83544921875, |
| "learning_rate": 0.00019811577994988754, |
| "loss": 6.4399, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.07309862940069874, |
| "grad_norm": 22.3226261138916, |
| "learning_rate": 0.00019804930469260828, |
| "loss": 7.8473, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.07417360924482666, |
| "grad_norm": 18.50650978088379, |
| "learning_rate": 0.00019798168859574356, |
| "loss": 6.8441, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.07524858908895458, |
| "grad_norm": 17.836515426635742, |
| "learning_rate": 0.00019791293244603142, |
| "loss": 5.3271, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.07632356893308251, |
| "grad_norm": 16.706695556640625, |
| "learning_rate": 0.00019784303704347488, |
| "loss": 5.4312, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.07739854877721043, |
| "grad_norm": 18.03818130493164, |
| "learning_rate": 0.00019777200320133254, |
| "loss": 5.9135, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.07847352862133836, |
| "grad_norm": 11.856945991516113, |
| "learning_rate": 0.00019769983174610918, |
| "loss": 5.6232, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.07954850846546627, |
| "grad_norm": 17.87145233154297, |
| "learning_rate": 0.00019762652351754616, |
| "loss": 4.9234, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.08062348830959419, |
| "grad_norm": 16.913291931152344, |
| "learning_rate": 0.00019755207936861155, |
| "loss": 6.6548, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.08169846815372211, |
| "grad_norm": 12.137495040893555, |
| "learning_rate": 0.00019747650016549027, |
| "loss": 4.1446, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.08277344799785004, |
| "grad_norm": 15.74571704864502, |
| "learning_rate": 0.00019739978678757412, |
| "loss": 6.0891, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.08384842784197796, |
| "grad_norm": 13.001721382141113, |
| "learning_rate": 0.0001973219401274513, |
| "loss": 4.2512, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.08492340768610589, |
| "grad_norm": 20.199098587036133, |
| "learning_rate": 0.00019724296109089622, |
| "loss": 6.0262, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.08599838753023381, |
| "grad_norm": 16.964731216430664, |
| "learning_rate": 0.00019716285059685892, |
| "loss": 4.7964, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.08707336737436173, |
| "grad_norm": 15.965873718261719, |
| "learning_rate": 0.0001970816095774544, |
| "loss": 5.7548, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.08814834721848966, |
| "grad_norm": 22.0924129486084, |
| "learning_rate": 0.00019699923897795163, |
| "loss": 7.1131, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.08922332706261758, |
| "grad_norm": 16.394224166870117, |
| "learning_rate": 0.0001969157397567627, |
| "loss": 5.7294, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.09029830690674549, |
| "grad_norm": 14.458036422729492, |
| "learning_rate": 0.0001968311128854317, |
| "loss": 5.4966, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.09137328675087342, |
| "grad_norm": 12.637007713317871, |
| "learning_rate": 0.00019674535934862325, |
| "loss": 3.4551, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.09244826659500134, |
| "grad_norm": 13.059622764587402, |
| "learning_rate": 0.00019665848014411118, |
| "loss": 5.1353, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.09352324643912927, |
| "grad_norm": 20.794404983520508, |
| "learning_rate": 0.00019657047628276688, |
| "loss": 4.9761, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.09459822628325719, |
| "grad_norm": 15.221658706665039, |
| "learning_rate": 0.00019648134878854747, |
| "loss": 4.8321, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.09567320612738511, |
| "grad_norm": 14.838767051696777, |
| "learning_rate": 0.0001963910986984841, |
| "loss": 4.5949, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.09674818597151304, |
| "grad_norm": 12.857973098754883, |
| "learning_rate": 0.00019629972706266952, |
| "loss": 4.0017, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.09782316581564096, |
| "grad_norm": 15.524980545043945, |
| "learning_rate": 0.00019620723494424627, |
| "loss": 4.57, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.09889814565976889, |
| "grad_norm": 11.060179710388184, |
| "learning_rate": 0.000196113623419394, |
| "loss": 4.1406, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.0999731255038968, |
| "grad_norm": 18.566953659057617, |
| "learning_rate": 0.00019601889357731713, |
| "loss": 4.3026, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.10104810534802472, |
| "grad_norm": 13.795799255371094, |
| "learning_rate": 0.00019592304652023206, |
| "loss": 3.585, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.10212308519215264, |
| "grad_norm": 17.49445343017578, |
| "learning_rate": 0.0001958260833633544, |
| "loss": 5.1268, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.10319806503628057, |
| "grad_norm": 13.12109661102295, |
| "learning_rate": 0.00019572800523488609, |
| "loss": 4.2585, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.10427304488040849, |
| "grad_norm": 15.185480117797852, |
| "learning_rate": 0.00019562881327600198, |
| "loss": 4.8719, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.10534802472453642, |
| "grad_norm": 11.378191947937012, |
| "learning_rate": 0.00019552850864083693, |
| "loss": 4.2474, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.10642300456866434, |
| "grad_norm": 17.479673385620117, |
| "learning_rate": 0.0001954270924964721, |
| "loss": 4.1351, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.10749798441279226, |
| "grad_norm": 20.179716110229492, |
| "learning_rate": 0.0001953245660229215, |
| "loss": 4.218, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.10857296425692019, |
| "grad_norm": 15.806692123413086, |
| "learning_rate": 0.00019522093041311815, |
| "loss": 5.7112, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.10964794410104811, |
| "grad_norm": 13.089418411254883, |
| "learning_rate": 0.00019511618687290043, |
| "loss": 3.3798, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.11072292394517602, |
| "grad_norm": 15.82168197631836, |
| "learning_rate": 0.00019501033662099778, |
| "loss": 5.123, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.11179790378930395, |
| "grad_norm": 17.98412322998047, |
| "learning_rate": 0.00019490338088901666, |
| "loss": 4.6133, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.11287288363343187, |
| "grad_norm": 13.41305160522461, |
| "learning_rate": 0.0001947953209214262, |
| "loss": 4.4088, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.1139478634775598, |
| "grad_norm": 17.843494415283203, |
| "learning_rate": 0.00019468615797554374, |
| "loss": 3.5071, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.11502284332168772, |
| "grad_norm": 17.681631088256836, |
| "learning_rate": 0.00019457589332152008, |
| "loss": 5.0372, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.11609782316581564, |
| "grad_norm": 17.937023162841797, |
| "learning_rate": 0.00019446452824232492, |
| "loss": 4.3635, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.11717280300994357, |
| "grad_norm": 21.669342041015625, |
| "learning_rate": 0.00019435206403373178, |
| "loss": 5.2923, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.11824778285407149, |
| "grad_norm": 18.59075927734375, |
| "learning_rate": 0.00019423850200430293, |
| "loss": 4.7142, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.11932276269819941, |
| "grad_norm": 19.25830841064453, |
| "learning_rate": 0.00019412384347537414, |
| "loss": 5.0176, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.12039774254232734, |
| "grad_norm": 21.377017974853516, |
| "learning_rate": 0.00019400808978103947, |
| "loss": 5.0599, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.12147272238645525, |
| "grad_norm": 14.341522216796875, |
| "learning_rate": 0.0001938912422681355, |
| "loss": 4.9352, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.12254770223058317, |
| "grad_norm": 15.528069496154785, |
| "learning_rate": 0.00019377330229622595, |
| "loss": 5.6631, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.1236226820747111, |
| "grad_norm": 18.492849349975586, |
| "learning_rate": 0.0001936542712375855, |
| "loss": 4.6148, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.12469766191883902, |
| "grad_norm": 20.3253116607666, |
| "learning_rate": 0.0001935341504771842, |
| "loss": 4.5666, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.12577264176296696, |
| "grad_norm": 14.674714088439941, |
| "learning_rate": 0.00019341294141267108, |
| "loss": 4.8294, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.12684762160709487, |
| "grad_norm": 10.383010864257812, |
| "learning_rate": 0.00019329064545435803, |
| "loss": 4.0049, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.12792260145122278, |
| "grad_norm": 15.706880569458008, |
| "learning_rate": 0.00019316726402520334, |
| "loss": 4.5301, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.12899758129535072, |
| "grad_norm": 14.116766929626465, |
| "learning_rate": 0.0001930427985607951, |
| "loss": 4.21, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.13007256113947863, |
| "grad_norm": 12.008879661560059, |
| "learning_rate": 0.00019291725050933468, |
| "loss": 3.6814, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.13114754098360656, |
| "grad_norm": 15.146964073181152, |
| "learning_rate": 0.00019279062133161957, |
| "loss": 4.0279, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.13222252082773447, |
| "grad_norm": 15.026398658752441, |
| "learning_rate": 0.0001926629125010267, |
| "loss": 4.0473, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.1332975006718624, |
| "grad_norm": 15.858999252319336, |
| "learning_rate": 0.00019253412550349509, |
| "loss": 4.3264, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.13437248051599032, |
| "grad_norm": 18.721647262573242, |
| "learning_rate": 0.00019240426183750865, |
| "loss": 4.4262, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.13544746036011826, |
| "grad_norm": 19.55031394958496, |
| "learning_rate": 0.0001922733230140787, |
| "loss": 5.5739, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.13652244020424617, |
| "grad_norm": 10.331392288208008, |
| "learning_rate": 0.00019214131055672647, |
| "loss": 3.5695, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.13759742004837408, |
| "grad_norm": 19.27557373046875, |
| "learning_rate": 0.0001920082260014652, |
| "loss": 5.4195, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.13867239989250202, |
| "grad_norm": 21.552522659301758, |
| "learning_rate": 0.0001918740708967825, |
| "loss": 4.1473, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.13974737973662993, |
| "grad_norm": 19.07160186767578, |
| "learning_rate": 0.0001917388468036222, |
| "loss": 4.3624, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.14082235958075787, |
| "grad_norm": 24.328269958496094, |
| "learning_rate": 0.0001916025552953661, |
| "loss": 4.5408, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.14189733942488578, |
| "grad_norm": 22.924718856811523, |
| "learning_rate": 0.00019146519795781587, |
| "loss": 4.2812, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.14297231926901371, |
| "grad_norm": 15.937036514282227, |
| "learning_rate": 0.00019132677638917449, |
| "loss": 4.6842, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.14404729911314162, |
| "grad_norm": 14.515525817871094, |
| "learning_rate": 0.00019118729220002755, |
| "loss": 3.2523, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.14512227895726956, |
| "grad_norm": 19.804189682006836, |
| "learning_rate": 0.00019104674701332476, |
| "loss": 4.5473, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.14619725880139747, |
| "grad_norm": 13.662827491760254, |
| "learning_rate": 0.00019090514246436087, |
| "loss": 4.1841, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.14727223864552538, |
| "grad_norm": 22.40411376953125, |
| "learning_rate": 0.00019076248020075665, |
| "loss": 6.2449, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.14834721848965332, |
| "grad_norm": 15.33782958984375, |
| "learning_rate": 0.00019061876188243982, |
| "loss": 2.8611, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.14942219833378123, |
| "grad_norm": 20.899106979370117, |
| "learning_rate": 0.00019047398918162572, |
| "loss": 5.3855, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.15049717817790917, |
| "grad_norm": 16.781774520874023, |
| "learning_rate": 0.00019032816378279768, |
| "loss": 4.2343, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.15157215802203708, |
| "grad_norm": 15.55665397644043, |
| "learning_rate": 0.00019018128738268773, |
| "loss": 4.5545, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.15264713786616502, |
| "grad_norm": 22.28097152709961, |
| "learning_rate": 0.00019003336169025654, |
| "loss": 5.1255, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.15372211771029293, |
| "grad_norm": 14.668632507324219, |
| "learning_rate": 0.00018988438842667375, |
| "loss": 5.7869, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.15479709755442086, |
| "grad_norm": 21.854108810424805, |
| "learning_rate": 0.00018973436932529793, |
| "loss": 5.1173, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.15587207739854878, |
| "grad_norm": 16.630081176757812, |
| "learning_rate": 0.00018958330613165622, |
| "loss": 4.251, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.1569470572426767, |
| "grad_norm": 16.389333724975586, |
| "learning_rate": 0.00018943120060342425, |
| "loss": 4.6531, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.15802203708680462, |
| "grad_norm": 14.595359802246094, |
| "learning_rate": 0.0001892780545104056, |
| "loss": 4.1349, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.15909701693093253, |
| "grad_norm": 18.753944396972656, |
| "learning_rate": 0.00018912386963451113, |
| "loss": 4.0963, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.16017199677506047, |
| "grad_norm": 15.209190368652344, |
| "learning_rate": 0.00018896864776973837, |
| "loss": 3.6522, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.16124697661918838, |
| "grad_norm": 20.873994827270508, |
| "learning_rate": 0.00018881239072215063, |
| "loss": 5.3913, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.16232195646331632, |
| "grad_norm": 12.859075546264648, |
| "learning_rate": 0.00018865510030985588, |
| "loss": 2.6075, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.16339693630744423, |
| "grad_norm": 21.292451858520508, |
| "learning_rate": 0.00018849677836298568, |
| "loss": 4.9356, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.16447191615157217, |
| "grad_norm": 14.94565200805664, |
| "learning_rate": 0.00018833742672367393, |
| "loss": 3.804, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.16554689599570008, |
| "grad_norm": 20.21578025817871, |
| "learning_rate": 0.00018817704724603536, |
| "loss": 5.3554, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.16662187583982802, |
| "grad_norm": 18.05601692199707, |
| "learning_rate": 0.00018801564179614388, |
| "loss": 4.6274, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.16769685568395593, |
| "grad_norm": 13.378555297851562, |
| "learning_rate": 0.00018785321225201108, |
| "loss": 3.8398, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.16877183552808384, |
| "grad_norm": 13.038491249084473, |
| "learning_rate": 0.00018768976050356426, |
| "loss": 3.7924, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.16984681537221177, |
| "grad_norm": 10.797876358032227, |
| "learning_rate": 0.00018752528845262433, |
| "loss": 3.273, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.17092179521633968, |
| "grad_norm": 12.845779418945312, |
| "learning_rate": 0.00018735979801288392, |
| "loss": 3.7228, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.17199677506046762, |
| "grad_norm": 12.737683296203613, |
| "learning_rate": 0.00018719329110988486, |
| "loss": 4.2175, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.17307175490459553, |
| "grad_norm": 16.20618438720703, |
| "learning_rate": 0.00018702576968099608, |
| "loss": 3.4056, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.17414673474872347, |
| "grad_norm": 19.91802978515625, |
| "learning_rate": 0.00018685723567539068, |
| "loss": 4.6, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.17522171459285138, |
| "grad_norm": 15.743769645690918, |
| "learning_rate": 0.00018668769105402365, |
| "loss": 3.5829, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.17629669443697932, |
| "grad_norm": 16.444740295410156, |
| "learning_rate": 0.00018651713778960875, |
| "loss": 4.4017, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.17737167428110723, |
| "grad_norm": 15.459141731262207, |
| "learning_rate": 0.0001863455778665957, |
| "loss": 4.1357, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.17844665412523517, |
| "grad_norm": 18.962736129760742, |
| "learning_rate": 0.00018617301328114705, |
| "loss": 4.5289, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.17952163396936308, |
| "grad_norm": 17.9486083984375, |
| "learning_rate": 0.000185999446041115, |
| "loss": 4.1333, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.18059661381349099, |
| "grad_norm": 12.445462226867676, |
| "learning_rate": 0.00018582487816601797, |
| "loss": 3.7512, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.18167159365761892, |
| "grad_norm": 19.503494262695312, |
| "learning_rate": 0.00018564931168701712, |
| "loss": 4.716, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.18274657350174683, |
| "grad_norm": 31.339258193969727, |
| "learning_rate": 0.00018547274864689285, |
| "loss": 6.1173, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.18382155334587477, |
| "grad_norm": 10.792606353759766, |
| "learning_rate": 0.00018529519110002077, |
| "loss": 3.1399, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.18489653319000268, |
| "grad_norm": 16.1004695892334, |
| "learning_rate": 0.00018511664111234798, |
| "loss": 3.8947, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.18597151303413062, |
| "grad_norm": 11.773107528686523, |
| "learning_rate": 0.00018493710076136898, |
| "loss": 3.0606, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.18704649287825853, |
| "grad_norm": 12.119939804077148, |
| "learning_rate": 0.00018475657213610166, |
| "loss": 2.9083, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.18812147272238647, |
| "grad_norm": 17.70090103149414, |
| "learning_rate": 0.0001845750573370626, |
| "loss": 5.4718, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.18919645256651438, |
| "grad_norm": 15.901100158691406, |
| "learning_rate": 0.00018439255847624303, |
| "loss": 5.1192, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.1902714324106423, |
| "grad_norm": 14.755876541137695, |
| "learning_rate": 0.00018420907767708407, |
| "loss": 3.7262, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.19134641225477023, |
| "grad_norm": 20.44917869567871, |
| "learning_rate": 0.00018402461707445205, |
| "loss": 4.4912, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.19242139209889814, |
| "grad_norm": 12.053194046020508, |
| "learning_rate": 0.00018383917881461366, |
| "loss": 3.2561, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.19349637194302607, |
| "grad_norm": 16.65236473083496, |
| "learning_rate": 0.000183652765055211, |
| "loss": 3.8193, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.19457135178715398, |
| "grad_norm": 18.52997589111328, |
| "learning_rate": 0.00018346537796523645, |
| "loss": 5.1119, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.19564633163128192, |
| "grad_norm": 20.083873748779297, |
| "learning_rate": 0.0001832770197250075, |
| "loss": 3.7478, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.19672131147540983, |
| "grad_norm": 16.985313415527344, |
| "learning_rate": 0.00018308769252614124, |
| "loss": 4.1994, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.19779629131953777, |
| "grad_norm": 20.08932113647461, |
| "learning_rate": 0.00018289739857152903, |
| "loss": 5.1951, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.19887127116366568, |
| "grad_norm": 19.779159545898438, |
| "learning_rate": 0.00018270614007531076, |
| "loss": 3.849, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.1999462510077936, |
| "grad_norm": 17.439552307128906, |
| "learning_rate": 0.00018251391926284906, |
| "loss": 3.9962, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.20102123085192153, |
| "grad_norm": 22.15019416809082, |
| "learning_rate": 0.0001823207383707036, |
| "loss": 5.3047, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.20209621069604944, |
| "grad_norm": 12.635649681091309, |
| "learning_rate": 0.00018212659964660476, |
| "loss": 2.8466, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.20317119054017738, |
| "grad_norm": 21.744354248046875, |
| "learning_rate": 0.00018193150534942778, |
| "loss": 4.3091, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.2042461703843053, |
| "grad_norm": 26.043798446655273, |
| "learning_rate": 0.00018173545774916627, |
| "loss": 3.7433, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.20532115022843322, |
| "grad_norm": 13.015897750854492, |
| "learning_rate": 0.00018153845912690587, |
| "loss": 4.0063, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.20639613007256113, |
| "grad_norm": 21.41050148010254, |
| "learning_rate": 0.00018134051177479777, |
| "loss": 3.7365, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.20747110991668907, |
| "grad_norm": 20.63169288635254, |
| "learning_rate": 0.00018114161799603193, |
| "loss": 3.8878, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.20854608976081698, |
| "grad_norm": 18.148544311523438, |
| "learning_rate": 0.00018094178010481034, |
| "loss": 3.4437, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.20962106960494492, |
| "grad_norm": 15.158918380737305, |
| "learning_rate": 0.00018074100042632005, |
| "loss": 3.2009, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.21069604944907283, |
| "grad_norm": 14.152005195617676, |
| "learning_rate": 0.00018053928129670624, |
| "loss": 3.4912, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.21177102929320074, |
| "grad_norm": 13.470719337463379, |
| "learning_rate": 0.00018033662506304485, |
| "loss": 3.3799, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.21284600913732868, |
| "grad_norm": 16.506755828857422, |
| "learning_rate": 0.00018013303408331543, |
| "loss": 2.9757, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.2139209889814566, |
| "grad_norm": 15.626431465148926, |
| "learning_rate": 0.00017992851072637364, |
| "loss": 4.4908, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.21499596882558453, |
| "grad_norm": 13.705154418945312, |
| "learning_rate": 0.00017972305737192366, |
| "loss": 3.9591, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.21607094866971244, |
| "grad_norm": 17.541763305664062, |
| "learning_rate": 0.00017951667641049053, |
| "loss": 3.2296, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.21714592851384037, |
| "grad_norm": 17.75946044921875, |
| "learning_rate": 0.0001793093702433924, |
| "loss": 3.4873, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.21822090835796829, |
| "grad_norm": 21.355859756469727, |
| "learning_rate": 0.0001791011412827124, |
| "loss": 5.255, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.21929588820209622, |
| "grad_norm": 11.548168182373047, |
| "learning_rate": 0.00017889199195127086, |
| "loss": 3.1538, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.22037086804622413, |
| "grad_norm": 22.31789207458496, |
| "learning_rate": 0.00017868192468259686, |
| "loss": 4.0628, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.22144584789035204, |
| "grad_norm": 13.790849685668945, |
| "learning_rate": 0.00017847094192090005, |
| "loss": 3.399, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.22252082773447998, |
| "grad_norm": 9.146944046020508, |
| "learning_rate": 0.00017825904612104215, |
| "loss": 2.4616, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.2235958075786079, |
| "grad_norm": 14.422385215759277, |
| "learning_rate": 0.00017804623974850844, |
| "loss": 3.5906, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.22467078742273583, |
| "grad_norm": 16.845298767089844, |
| "learning_rate": 0.00017783252527937905, |
| "loss": 4.7812, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.22574576726686374, |
| "grad_norm": 24.236703872680664, |
| "learning_rate": 0.0001776179052003001, |
| "loss": 5.1536, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.22682074711099168, |
| "grad_norm": 21.463781356811523, |
| "learning_rate": 0.00017740238200845485, |
| "loss": 5.0244, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.2278957269551196, |
| "grad_norm": 18.184011459350586, |
| "learning_rate": 0.00017718595821153462, |
| "loss": 5.0591, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.22897070679924753, |
| "grad_norm": 16.528148651123047, |
| "learning_rate": 0.0001769686363277096, |
| "loss": 3.7127, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.23004568664337544, |
| "grad_norm": 16.78187370300293, |
| "learning_rate": 0.0001767504188855995, |
| "loss": 4.499, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.23112066648750335, |
| "grad_norm": 19.419116973876953, |
| "learning_rate": 0.00017653130842424427, |
| "loss": 3.4537, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.23219564633163128, |
| "grad_norm": 14.738585472106934, |
| "learning_rate": 0.00017631130749307436, |
| "loss": 3.7363, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.2332706261757592, |
| "grad_norm": 16.021595001220703, |
| "learning_rate": 0.0001760904186518812, |
| "loss": 4.2678, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.23434560601988713, |
| "grad_norm": 16.80089569091797, |
| "learning_rate": 0.00017586864447078742, |
| "loss": 4.2492, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.23542058586401504, |
| "grad_norm": 17.562274932861328, |
| "learning_rate": 0.0001756459875302169, |
| "loss": 4.4658, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.23649556570814298, |
| "grad_norm": 13.105591773986816, |
| "learning_rate": 0.0001754224504208647, |
| "loss": 3.8664, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.2375705455522709, |
| "grad_norm": 11.499171257019043, |
| "learning_rate": 0.00017519803574366698, |
| "loss": 3.7275, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.23864552539639883, |
| "grad_norm": 14.275655746459961, |
| "learning_rate": 0.00017497274610977072, |
| "loss": 3.9924, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.23972050524052674, |
| "grad_norm": 20.798551559448242, |
| "learning_rate": 0.00017474658414050342, |
| "loss": 4.1779, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.24079548508465468, |
| "grad_norm": 17.76445770263672, |
| "learning_rate": 0.0001745195524673424, |
| "loss": 4.5601, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.24187046492878259, |
| "grad_norm": 15.017122268676758, |
| "learning_rate": 0.00017429165373188438, |
| "loss": 3.23, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.2429454447729105, |
| "grad_norm": 17.35443878173828, |
| "learning_rate": 0.00017406289058581465, |
| "loss": 4.0901, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.24402042461703843, |
| "grad_norm": 17.933059692382812, |
| "learning_rate": 0.00017383326569087623, |
| "loss": 4.353, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.24509540446116634, |
| "grad_norm": 16.176124572753906, |
| "learning_rate": 0.0001736027817188389, |
| "loss": 4.1159, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.24617038430529428, |
| "grad_norm": 18.375131607055664, |
| "learning_rate": 0.00017337144135146817, |
| "loss": 4.673, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.2472453641494222, |
| "grad_norm": 14.222498893737793, |
| "learning_rate": 0.00017313924728049393, |
| "loss": 3.5181, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.24832034399355013, |
| "grad_norm": 14.064003944396973, |
| "learning_rate": 0.00017290620220757928, |
| "loss": 3.0101, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.24939532383767804, |
| "grad_norm": 14.68502140045166, |
| "learning_rate": 0.00017267230884428905, |
| "loss": 2.8587, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.25047030368180595, |
| "grad_norm": 15.439518928527832, |
| "learning_rate": 0.0001724375699120582, |
| "loss": 3.5475, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.25047030368180595, |
| "eval_loss": 0.9161850214004517, |
| "eval_runtime": 5.6189, |
| "eval_samples_per_second": 69.765, |
| "eval_steps_per_second": 34.882, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.2515452835259339, |
| "grad_norm": 16.505876541137695, |
| "learning_rate": 0.0001722019881421602, |
| "loss": 3.5136, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.2526202633700618, |
| "grad_norm": 11.073945999145508, |
| "learning_rate": 0.0001719655662756753, |
| "loss": 2.7815, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.25369524321418974, |
| "grad_norm": 18.41621208190918, |
| "learning_rate": 0.00017172830706345854, |
| "loss": 2.9592, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.25477022305831765, |
| "grad_norm": 17.0198974609375, |
| "learning_rate": 0.00017149021326610776, |
| "loss": 3.414, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.25584520290244556, |
| "grad_norm": 18.122941970825195, |
| "learning_rate": 0.00017125128765393155, |
| "loss": 3.9874, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.2569201827465735, |
| "grad_norm": 16.342615127563477, |
| "learning_rate": 0.00017101153300691694, |
| "loss": 4.0199, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.25799516259070143, |
| "grad_norm": 17.71503257751465, |
| "learning_rate": 0.00017077095211469708, |
| "loss": 3.5693, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.25907014243482934, |
| "grad_norm": 14.306414604187012, |
| "learning_rate": 0.00017052954777651883, |
| "loss": 3.7534, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.26014512227895725, |
| "grad_norm": 19.34443473815918, |
| "learning_rate": 0.00017028732280121008, |
| "loss": 4.534, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.2612201021230852, |
| "grad_norm": 19.286706924438477, |
| "learning_rate": 0.00017004428000714722, |
| "loss": 4.4074, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.26229508196721313, |
| "grad_norm": 14.999690055847168, |
| "learning_rate": 0.00016980042222222217, |
| "loss": 3.5992, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.26337006181134104, |
| "grad_norm": 12.448857307434082, |
| "learning_rate": 0.0001695557522838096, |
| "loss": 3.4263, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.26444504165546895, |
| "grad_norm": 14.967903137207031, |
| "learning_rate": 0.00016931027303873392, |
| "loss": 3.3476, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.26552002149959686, |
| "grad_norm": 16.7852725982666, |
| "learning_rate": 0.00016906398734323606, |
| "loss": 3.4892, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.2665950013437248, |
| "grad_norm": 12.612491607666016, |
| "learning_rate": 0.00016881689806294036, |
| "loss": 3.6491, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.26766998118785273, |
| "grad_norm": 23.1019287109375, |
| "learning_rate": 0.00016856900807282114, |
| "loss": 4.2173, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.26874496103198064, |
| "grad_norm": 13.499241828918457, |
| "learning_rate": 0.00016832032025716921, |
| "loss": 4.167, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.26981994087610855, |
| "grad_norm": 17.28055191040039, |
| "learning_rate": 0.00016807083750955846, |
| "loss": 3.9582, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.2708949207202365, |
| "grad_norm": 21.756202697753906, |
| "learning_rate": 0.00016782056273281207, |
| "loss": 4.9348, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.27196990056436443, |
| "grad_norm": 19.564050674438477, |
| "learning_rate": 0.00016756949883896876, |
| "loss": 4.5317, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.27304488040849234, |
| "grad_norm": 17.771780014038086, |
| "learning_rate": 0.0001673176487492489, |
| "loss": 4.1873, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.27411986025262025, |
| "grad_norm": 20.27021598815918, |
| "learning_rate": 0.00016706501539402063, |
| "loss": 4.289, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.27519484009674816, |
| "grad_norm": 14.25924301147461, |
| "learning_rate": 0.0001668116017127655, |
| "loss": 3.5322, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.2762698199408761, |
| "grad_norm": 10.925680160522461, |
| "learning_rate": 0.0001665574106540446, |
| "loss": 3.296, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.27734479978500404, |
| "grad_norm": 17.243980407714844, |
| "learning_rate": 0.0001663024451754641, |
| "loss": 3.4127, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.27841977962913195, |
| "grad_norm": 11.710211753845215, |
| "learning_rate": 0.00016604670824364067, |
| "loss": 3.4865, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.27949475947325986, |
| "grad_norm": 15.336100578308105, |
| "learning_rate": 0.00016579020283416724, |
| "loss": 3.9446, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.2805697393173878, |
| "grad_norm": 13.642552375793457, |
| "learning_rate": 0.00016553293193157824, |
| "loss": 3.8352, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.28164471916151573, |
| "grad_norm": 15.919568061828613, |
| "learning_rate": 0.0001652748985293149, |
| "loss": 2.8705, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.28271969900564364, |
| "grad_norm": 16.444141387939453, |
| "learning_rate": 0.00016501610562969033, |
| "loss": 3.1671, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.28379467884977155, |
| "grad_norm": 21.732051849365234, |
| "learning_rate": 0.00016475655624385483, |
| "loss": 3.4969, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.28486965869389946, |
| "grad_norm": 18.68508529663086, |
| "learning_rate": 0.00016449625339176054, |
| "loss": 4.4919, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.28594463853802743, |
| "grad_norm": 18.98984146118164, |
| "learning_rate": 0.00016423520010212656, |
| "loss": 3.9249, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.28701961838215534, |
| "grad_norm": 9.13257122039795, |
| "learning_rate": 0.00016397339941240355, |
| "loss": 2.1867, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.28809459822628325, |
| "grad_norm": 19.376365661621094, |
| "learning_rate": 0.00016371085436873845, |
| "loss": 4.499, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.28916957807041116, |
| "grad_norm": 14.490234375, |
| "learning_rate": 0.00016344756802593905, |
| "loss": 3.1082, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.2902445579145391, |
| "grad_norm": 14.469581604003906, |
| "learning_rate": 0.00016318354344743843, |
| "loss": 3.027, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.29131953775866704, |
| "grad_norm": 19.300182342529297, |
| "learning_rate": 0.00016291878370525926, |
| "loss": 3.7289, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.29239451760279495, |
| "grad_norm": 17.35890007019043, |
| "learning_rate": 0.00016265329187997818, |
| "loss": 4.74, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.29346949744692286, |
| "grad_norm": 13.944664001464844, |
| "learning_rate": 0.00016238707106068983, |
| "loss": 3.7071, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.29454447729105077, |
| "grad_norm": 13.07278060913086, |
| "learning_rate": 0.00016212012434497103, |
| "loss": 3.3124, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.29561945713517873, |
| "grad_norm": 14.615857124328613, |
| "learning_rate": 0.00016185245483884457, |
| "loss": 3.766, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.29669443697930664, |
| "grad_norm": 15.86770248413086, |
| "learning_rate": 0.0001615840656567433, |
| "loss": 4.3149, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.29776941682343455, |
| "grad_norm": 16.81600570678711, |
| "learning_rate": 0.0001613149599214736, |
| "loss": 4.2699, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.29884439666756246, |
| "grad_norm": 16.2762393951416, |
| "learning_rate": 0.00016104514076417935, |
| "loss": 3.4487, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.2999193765116904, |
| "grad_norm": 15.317776679992676, |
| "learning_rate": 0.00016077461132430533, |
| "loss": 4.2894, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.30099435635581834, |
| "grad_norm": 14.264720916748047, |
| "learning_rate": 0.00016050337474956067, |
| "loss": 3.7338, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.30206933619994625, |
| "grad_norm": 16.04495620727539, |
| "learning_rate": 0.00016023143419588228, |
| "loss": 3.7665, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.30314431604407416, |
| "grad_norm": 13.27541446685791, |
| "learning_rate": 0.0001599587928273982, |
| "loss": 3.0945, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.3042192958882021, |
| "grad_norm": 20.908628463745117, |
| "learning_rate": 0.0001596854538163906, |
| "loss": 3.5324, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.30529427573233003, |
| "grad_norm": 22.76654815673828, |
| "learning_rate": 0.000159411420343259, |
| "loss": 4.0003, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.30636925557645794, |
| "grad_norm": 15.583036422729492, |
| "learning_rate": 0.00015913669559648334, |
| "loss": 4.1793, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.30744423542058585, |
| "grad_norm": 19.990497589111328, |
| "learning_rate": 0.00015886128277258662, |
| "loss": 4.0978, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.30851921526471376, |
| "grad_norm": 15.330724716186523, |
| "learning_rate": 0.00015858518507609804, |
| "loss": 3.8858, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.30959419510884173, |
| "grad_norm": 18.55107307434082, |
| "learning_rate": 0.00015830840571951543, |
| "loss": 3.3965, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.31066917495296964, |
| "grad_norm": 16.54085350036621, |
| "learning_rate": 0.000158030947923268, |
| "loss": 3.9363, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.31174415479709755, |
| "grad_norm": 16.187206268310547, |
| "learning_rate": 0.00015775281491567887, |
| "loss": 3.9742, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.31281913464122546, |
| "grad_norm": 24.83617401123047, |
| "learning_rate": 0.00015747400993292756, |
| "loss": 3.765, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.3138941144853534, |
| "grad_norm": 19.85297393798828, |
| "learning_rate": 0.0001571945362190121, |
| "loss": 4.4267, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.31496909432948134, |
| "grad_norm": 17.904691696166992, |
| "learning_rate": 0.0001569143970257116, |
| "loss": 3.953, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.31604407417360925, |
| "grad_norm": 14.280094146728516, |
| "learning_rate": 0.00015663359561254823, |
| "loss": 4.2375, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.31711905401773716, |
| "grad_norm": 20.711565017700195, |
| "learning_rate": 0.00015635213524674928, |
| "loss": 3.4025, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.31819403386186507, |
| "grad_norm": 17.165376663208008, |
| "learning_rate": 0.00015607001920320927, |
| "loss": 4.1795, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.31926901370599303, |
| "grad_norm": 14.331582069396973, |
| "learning_rate": 0.0001557872507644517, |
| "loss": 3.846, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.32034399355012094, |
| "grad_norm": 13.659123420715332, |
| "learning_rate": 0.000155503833220591, |
| "loss": 3.6604, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.32141897339424885, |
| "grad_norm": 23.412158966064453, |
| "learning_rate": 0.0001552197698692941, |
| "loss": 4.6184, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.32249395323837676, |
| "grad_norm": 13.751562118530273, |
| "learning_rate": 0.00015493506401574218, |
| "loss": 2.5928, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.32356893308250473, |
| "grad_norm": 19.892213821411133, |
| "learning_rate": 0.00015464971897259219, |
| "loss": 4.3952, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.32464391292663264, |
| "grad_norm": 15.620636940002441, |
| "learning_rate": 0.00015436373805993825, |
| "loss": 4.1718, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.32571889277076055, |
| "grad_norm": 25.407278060913086, |
| "learning_rate": 0.00015407712460527304, |
| "loss": 3.3582, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.32679387261488846, |
| "grad_norm": 15.422714233398438, |
| "learning_rate": 0.0001537898819434491, |
| "loss": 4.1691, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.32786885245901637, |
| "grad_norm": 15.950149536132812, |
| "learning_rate": 0.00015350201341664014, |
| "loss": 3.1559, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.32894383230314433, |
| "grad_norm": 16.09635353088379, |
| "learning_rate": 0.00015321352237430185, |
| "loss": 3.5173, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.33001881214727224, |
| "grad_norm": 14.290729522705078, |
| "learning_rate": 0.00015292441217313324, |
| "loss": 3.5474, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.33109379199140015, |
| "grad_norm": 12.314271926879883, |
| "learning_rate": 0.00015263468617703743, |
| "loss": 3.81, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.33216877183552806, |
| "grad_norm": 12.49377155303955, |
| "learning_rate": 0.0001523443477570826, |
| "loss": 3.0498, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.33324375167965603, |
| "grad_norm": 14.970670700073242, |
| "learning_rate": 0.00015205340029146255, |
| "loss": 3.8808, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.33431873152378394, |
| "grad_norm": 21.925323486328125, |
| "learning_rate": 0.0001517618471654577, |
| "loss": 3.9662, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.33539371136791185, |
| "grad_norm": 15.550761222839355, |
| "learning_rate": 0.0001514696917713955, |
| "loss": 3.0303, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.33646869121203976, |
| "grad_norm": 19.790069580078125, |
| "learning_rate": 0.00015117693750861096, |
| "loss": 3.3957, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.33754367105616767, |
| "grad_norm": 16.23335075378418, |
| "learning_rate": 0.00015088358778340725, |
| "loss": 3.386, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.33861865090029564, |
| "grad_norm": 16.61414337158203, |
| "learning_rate": 0.00015058964600901583, |
| "loss": 3.6885, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.33969363074442355, |
| "grad_norm": 14.917675018310547, |
| "learning_rate": 0.00015029511560555708, |
| "loss": 3.1788, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.34076861058855146, |
| "grad_norm": 16.645130157470703, |
| "learning_rate": 0.00015000000000000001, |
| "loss": 3.0699, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.34184359043267937, |
| "grad_norm": 14.486559867858887, |
| "learning_rate": 0.0001497043026261229, |
| "loss": 2.9101, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.34291857027680733, |
| "grad_norm": 20.343292236328125, |
| "learning_rate": 0.00014940802692447306, |
| "loss": 3.3769, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.34399355012093524, |
| "grad_norm": 12.295371055603027, |
| "learning_rate": 0.00014911117634232678, |
| "loss": 3.1421, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.34506852996506315, |
| "grad_norm": 16.151826858520508, |
| "learning_rate": 0.00014881375433364936, |
| "loss": 3.8197, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.34614350980919106, |
| "grad_norm": 17.043066024780273, |
| "learning_rate": 0.0001485157643590549, |
| "loss": 3.6181, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.347218489653319, |
| "grad_norm": 22.119674682617188, |
| "learning_rate": 0.00014821720988576585, |
| "loss": 5.0421, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.34829346949744694, |
| "grad_norm": 15.809839248657227, |
| "learning_rate": 0.00014791809438757296, |
| "loss": 3.9497, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.34936844934157485, |
| "grad_norm": 12.98931884765625, |
| "learning_rate": 0.00014761842134479463, |
| "loss": 3.346, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.35044342918570276, |
| "grad_norm": 17.794139862060547, |
| "learning_rate": 0.00014731819424423651, |
| "loss": 3.7576, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.35151840902983067, |
| "grad_norm": 12.563948631286621, |
| "learning_rate": 0.00014701741657915094, |
| "loss": 3.4395, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.35259338887395864, |
| "grad_norm": 21.27597999572754, |
| "learning_rate": 0.0001467160918491962, |
| "loss": 5.3584, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.35366836871808655, |
| "grad_norm": 17.49233055114746, |
| "learning_rate": 0.00014641422356039604, |
| "loss": 3.3713, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.35474334856221446, |
| "grad_norm": 18.22296142578125, |
| "learning_rate": 0.00014611181522509846, |
| "loss": 2.7666, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.35581832840634237, |
| "grad_norm": 19.422439575195312, |
| "learning_rate": 0.00014580887036193537, |
| "loss": 4.8558, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.35689330825047033, |
| "grad_norm": 14.737062454223633, |
| "learning_rate": 0.0001455053924957812, |
| "loss": 3.5708, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.35796828809459824, |
| "grad_norm": 12.72256088256836, |
| "learning_rate": 0.0001452013851577121, |
| "loss": 2.6484, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.35904326793872615, |
| "grad_norm": 15.688788414001465, |
| "learning_rate": 0.00014489685188496488, |
| "loss": 3.2307, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.36011824778285406, |
| "grad_norm": 12.274057388305664, |
| "learning_rate": 0.0001445917962208957, |
| "loss": 3.0441, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.36119322762698197, |
| "grad_norm": 16.341550827026367, |
| "learning_rate": 0.000144286221714939, |
| "loss": 3.9343, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.36226820747110994, |
| "grad_norm": 18.544235229492188, |
| "learning_rate": 0.00014398013192256615, |
| "loss": 3.5993, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.36334318731523785, |
| "grad_norm": 15.311888694763184, |
| "learning_rate": 0.000143673530405244, |
| "loss": 3.3302, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.36441816715936576, |
| "grad_norm": 13.064177513122559, |
| "learning_rate": 0.00014336642073039358, |
| "loss": 2.9194, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.36549314700349367, |
| "grad_norm": 17.951086044311523, |
| "learning_rate": 0.00014305880647134847, |
| "loss": 3.0824, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.36656812684762163, |
| "grad_norm": 23.24056625366211, |
| "learning_rate": 0.00014275069120731323, |
| "loss": 5.6002, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.36764310669174954, |
| "grad_norm": 14.519164085388184, |
| "learning_rate": 0.0001424420785233219, |
| "loss": 3.6317, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.36871808653587745, |
| "grad_norm": 13.191924095153809, |
| "learning_rate": 0.00014213297201019618, |
| "loss": 3.3136, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.36979306638000536, |
| "grad_norm": 11.676475524902344, |
| "learning_rate": 0.0001418233752645035, |
| "loss": 3.3879, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.3708680462241333, |
| "grad_norm": 12.395687103271484, |
| "learning_rate": 0.00014151329188851554, |
| "loss": 3.2117, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.37194302606826124, |
| "grad_norm": 15.743541717529297, |
| "learning_rate": 0.0001412027254901659, |
| "loss": 3.259, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.37301800591238915, |
| "grad_norm": 19.783029556274414, |
| "learning_rate": 0.0001408916796830085, |
| "loss": 4.3767, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.37409298575651706, |
| "grad_norm": 17.02685546875, |
| "learning_rate": 0.0001405801580861752, |
| "loss": 3.9807, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.37516796560064497, |
| "grad_norm": 14.147680282592773, |
| "learning_rate": 0.00014026816432433399, |
| "loss": 3.1309, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.37624294544477294, |
| "grad_norm": 14.738043785095215, |
| "learning_rate": 0.00013995570202764656, |
| "loss": 2.9714, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.37731792528890085, |
| "grad_norm": 17.08428382873535, |
| "learning_rate": 0.0001396427748317262, |
| "loss": 3.1942, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.37839290513302876, |
| "grad_norm": 15.932269096374512, |
| "learning_rate": 0.00013932938637759555, |
| "loss": 3.6591, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.37946788497715667, |
| "grad_norm": 19.795040130615234, |
| "learning_rate": 0.00013901554031164404, |
| "loss": 4.9021, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.3805428648212846, |
| "grad_norm": 17.933841705322266, |
| "learning_rate": 0.0001387012402855857, |
| "loss": 4.1404, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.38161784466541254, |
| "grad_norm": 24.006519317626953, |
| "learning_rate": 0.00013838648995641645, |
| "loss": 5.4279, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.38269282450954045, |
| "grad_norm": 18.881019592285156, |
| "learning_rate": 0.0001380712929863717, |
| "loss": 4.1727, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.38376780435366836, |
| "grad_norm": 18.83523178100586, |
| "learning_rate": 0.00013775565304288372, |
| "loss": 4.6629, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.3848427841977963, |
| "grad_norm": 12.512502670288086, |
| "learning_rate": 0.00013743957379853884, |
| "loss": 2.8857, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.38591776404192424, |
| "grad_norm": 18.216079711914062, |
| "learning_rate": 0.00013712305893103492, |
| "loss": 2.8652, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.38699274388605215, |
| "grad_norm": 16.71242332458496, |
| "learning_rate": 0.00013680611212313841, |
| "loss": 3.6149, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.38806772373018006, |
| "grad_norm": 21.3747501373291, |
| "learning_rate": 0.0001364887370626416, |
| "loss": 3.4502, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.38914270357430797, |
| "grad_norm": 16.120399475097656, |
| "learning_rate": 0.0001361709374423195, |
| "loss": 2.8048, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.3902176834184359, |
| "grad_norm": 17.512006759643555, |
| "learning_rate": 0.00013585271695988718, |
| "loss": 3.7296, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.39129266326256384, |
| "grad_norm": 15.320964813232422, |
| "learning_rate": 0.00013553407931795662, |
| "loss": 3.4195, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.39236764310669175, |
| "grad_norm": 15.187535285949707, |
| "learning_rate": 0.0001352150282239934, |
| "loss": 2.9683, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.39344262295081966, |
| "grad_norm": 23.12388038635254, |
| "learning_rate": 0.000134895567390274, |
| "loss": 3.8934, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.3945176027949476, |
| "grad_norm": 19.0565128326416, |
| "learning_rate": 0.00013457570053384226, |
| "loss": 3.7732, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.39559258263907554, |
| "grad_norm": 13.927467346191406, |
| "learning_rate": 0.00013425543137646624, |
| "loss": 3.1398, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.39666756248320345, |
| "grad_norm": 17.896343231201172, |
| "learning_rate": 0.00013393476364459493, |
| "loss": 3.4939, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.39774254232733136, |
| "grad_norm": 14.155213356018066, |
| "learning_rate": 0.00013361370106931486, |
| "loss": 3.9665, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.39881752217145927, |
| "grad_norm": 10.331148147583008, |
| "learning_rate": 0.00013329224738630678, |
| "loss": 2.5528, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.3998925020155872, |
| "grad_norm": 16.241106033325195, |
| "learning_rate": 0.00013297040633580202, |
| "loss": 3.4909, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.40096748185971515, |
| "grad_norm": 21.060741424560547, |
| "learning_rate": 0.00013264818166253917, |
| "loss": 4.3651, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.40204246170384306, |
| "grad_norm": 19.373760223388672, |
| "learning_rate": 0.00013232557711572032, |
| "loss": 3.4165, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.40311744154797097, |
| "grad_norm": 16.87083625793457, |
| "learning_rate": 0.00013200259644896762, |
| "loss": 3.7119, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.4041924213920989, |
| "grad_norm": 15.271788597106934, |
| "learning_rate": 0.00013167924342027945, |
| "loss": 2.9353, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.40526740123622684, |
| "grad_norm": 18.673574447631836, |
| "learning_rate": 0.00013135552179198678, |
| "loss": 3.4936, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.40634238108035475, |
| "grad_norm": 14.075126647949219, |
| "learning_rate": 0.00013103143533070937, |
| "loss": 3.1212, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.40741736092448266, |
| "grad_norm": 15.539584159851074, |
| "learning_rate": 0.00013070698780731193, |
| "loss": 3.0859, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.4084923407686106, |
| "grad_norm": 14.371941566467285, |
| "learning_rate": 0.0001303821829968603, |
| "loss": 3.601, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.4095673206127385, |
| "grad_norm": 18.58356285095215, |
| "learning_rate": 0.00013005702467857742, |
| "loss": 4.0211, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.41064230045686645, |
| "grad_norm": 16.63950538635254, |
| "learning_rate": 0.00012973151663579947, |
| "loss": 3.3913, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.41171728030099436, |
| "grad_norm": 14.509610176086426, |
| "learning_rate": 0.0001294056626559318, |
| "loss": 2.8095, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.41279226014512227, |
| "grad_norm": 12.644404411315918, |
| "learning_rate": 0.0001290794665304049, |
| "loss": 2.9944, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.4138672399892502, |
| "grad_norm": 11.41877555847168, |
| "learning_rate": 0.00012875293205463016, |
| "loss": 2.5601, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.41494221983337815, |
| "grad_norm": 23.640005111694336, |
| "learning_rate": 0.00012842606302795585, |
| "loss": 4.2101, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.41601719967750606, |
| "grad_norm": 14.225778579711914, |
| "learning_rate": 0.00012809886325362287, |
| "loss": 3.0655, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.41709217952163397, |
| "grad_norm": 22.691972732543945, |
| "learning_rate": 0.0001277713365387205, |
| "loss": 4.6002, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.4181671593657619, |
| "grad_norm": 18.885147094726562, |
| "learning_rate": 0.00012744348669414203, |
| "loss": 4.323, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.41924213920988984, |
| "grad_norm": 17.81749153137207, |
| "learning_rate": 0.00012711531753454056, |
| "loss": 3.2404, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.42031711905401775, |
| "grad_norm": 15.428961753845215, |
| "learning_rate": 0.0001267868328782845, |
| "loss": 2.9677, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.42139209889814566, |
| "grad_norm": 12.817580223083496, |
| "learning_rate": 0.00012645803654741318, |
| "loss": 3.8399, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.42246707874227357, |
| "grad_norm": 11.081942558288574, |
| "learning_rate": 0.00012612893236759238, |
| "loss": 2.4973, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.4235420585864015, |
| "grad_norm": 17.789241790771484, |
| "learning_rate": 0.0001257995241680698, |
| "loss": 3.9219, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.42461703843052945, |
| "grad_norm": 17.394941329956055, |
| "learning_rate": 0.00012546981578163058, |
| "loss": 3.189, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.42569201827465736, |
| "grad_norm": 17.376474380493164, |
| "learning_rate": 0.00012513981104455256, |
| "loss": 3.4426, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.42676699811878527, |
| "grad_norm": 16.225542068481445, |
| "learning_rate": 0.00012480951379656175, |
| "loss": 4.0302, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.4278419779629132, |
| "grad_norm": 17.361696243286133, |
| "learning_rate": 0.00012447892788078772, |
| "loss": 3.3795, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.42891695780704114, |
| "grad_norm": 14.174507141113281, |
| "learning_rate": 0.0001241480571437187, |
| "loss": 3.9256, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.42999193765116905, |
| "grad_norm": 17.67365074157715, |
| "learning_rate": 0.00012381690543515693, |
| "loss": 4.5173, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.43106691749529696, |
| "grad_norm": 14.265713691711426, |
| "learning_rate": 0.00012348547660817385, |
| "loss": 2.7096, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.4321418973394249, |
| "grad_norm": 13.033537864685059, |
| "learning_rate": 0.00012315377451906537, |
| "loss": 2.5288, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.4332168771835528, |
| "grad_norm": 9.724547386169434, |
| "learning_rate": 0.00012282180302730682, |
| "loss": 2.2101, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.43429185702768075, |
| "grad_norm": 11.268912315368652, |
| "learning_rate": 0.00012248956599550804, |
| "loss": 1.9814, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.43536683687180866, |
| "grad_norm": 11.205545425415039, |
| "learning_rate": 0.00012215706728936875, |
| "loss": 2.8051, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.43644181671593657, |
| "grad_norm": 13.20801830291748, |
| "learning_rate": 0.00012182431077763317, |
| "loss": 3.1918, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.4375167965600645, |
| "grad_norm": 19.06052589416504, |
| "learning_rate": 0.00012149130033204525, |
| "loss": 3.7518, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.43859177640419245, |
| "grad_norm": 17.768795013427734, |
| "learning_rate": 0.00012115803982730352, |
| "loss": 3.7953, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.43966675624832036, |
| "grad_norm": 20.065616607666016, |
| "learning_rate": 0.00012082453314101607, |
| "loss": 4.4466, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.44074173609244827, |
| "grad_norm": 10.540583610534668, |
| "learning_rate": 0.00012049078415365543, |
| "loss": 2.42, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.4418167159365762, |
| "grad_norm": 18.175241470336914, |
| "learning_rate": 0.00012015679674851328, |
| "loss": 4.3468, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.4428916957807041, |
| "grad_norm": 14.195399284362793, |
| "learning_rate": 0.00011982257481165546, |
| "loss": 3.681, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.44396667562483205, |
| "grad_norm": 16.479141235351562, |
| "learning_rate": 0.00011948812223187675, |
| "loss": 3.6083, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.44504165546895996, |
| "grad_norm": 16.942975997924805, |
| "learning_rate": 0.0001191534429006554, |
| "loss": 3.8114, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.4461166353130879, |
| "grad_norm": 12.2439546585083, |
| "learning_rate": 0.00011881854071210805, |
| "loss": 2.9306, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.4471916151572158, |
| "grad_norm": 16.4477481842041, |
| "learning_rate": 0.00011848341956294437, |
| "loss": 3.2065, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.44826659500134375, |
| "grad_norm": 15.710596084594727, |
| "learning_rate": 0.00011814808335242173, |
| "loss": 2.4522, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.44934157484547166, |
| "grad_norm": 12.070144653320312, |
| "learning_rate": 0.00011781253598229982, |
| "loss": 2.3715, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.45041655468959957, |
| "grad_norm": 12.033697128295898, |
| "learning_rate": 0.00011747678135679521, |
| "loss": 3.0672, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.4514915345337275, |
| "grad_norm": 16.81391716003418, |
| "learning_rate": 0.00011714082338253603, |
| "loss": 2.8255, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.4525665143778554, |
| "grad_norm": 15.085043907165527, |
| "learning_rate": 0.00011680466596851635, |
| "loss": 3.1703, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.45364149422198335, |
| "grad_norm": 19.121475219726562, |
| "learning_rate": 0.0001164683130260509, |
| "loss": 4.1685, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.45471647406611126, |
| "grad_norm": 18.101137161254883, |
| "learning_rate": 0.00011613176846872937, |
| "loss": 3.6327, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.4557914539102392, |
| "grad_norm": 12.256120681762695, |
| "learning_rate": 0.00011579503621237102, |
| "loss": 3.4243, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.4568664337543671, |
| "grad_norm": 10.51490306854248, |
| "learning_rate": 0.00011545812017497901, |
| "loss": 3.4243, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.45794141359849505, |
| "grad_norm": 22.243715286254883, |
| "learning_rate": 0.00011512102427669488, |
| "loss": 4.0067, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.45901639344262296, |
| "grad_norm": 10.290740966796875, |
| "learning_rate": 0.00011478375243975296, |
| "loss": 2.5808, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.46009137328675087, |
| "grad_norm": 13.278031349182129, |
| "learning_rate": 0.00011444630858843461, |
| "loss": 2.8027, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.4611663531308788, |
| "grad_norm": 14.611438751220703, |
| "learning_rate": 0.0001141086966490227, |
| "loss": 2.9664, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.4622413329750067, |
| "grad_norm": 16.739944458007812, |
| "learning_rate": 0.00011377092054975584, |
| "loss": 2.5552, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.46331631281913466, |
| "grad_norm": 16.316120147705078, |
| "learning_rate": 0.0001134329842207827, |
| "loss": 3.7572, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.46439129266326257, |
| "grad_norm": 18.96500587463379, |
| "learning_rate": 0.0001130948915941163, |
| "loss": 4.1539, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.4654662725073905, |
| "grad_norm": 11.103170394897461, |
| "learning_rate": 0.00011275664660358818, |
| "loss": 2.3669, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.4665412523515184, |
| "grad_norm": 16.899873733520508, |
| "learning_rate": 0.00011241825318480281, |
| "loss": 3.4796, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.46761623219564635, |
| "grad_norm": 17.01308822631836, |
| "learning_rate": 0.00011207971527509158, |
| "loss": 3.6144, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.46869121203977426, |
| "grad_norm": 13.122427940368652, |
| "learning_rate": 0.00011174103681346711, |
| "loss": 3.1118, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.4697661918839022, |
| "grad_norm": 14.372381210327148, |
| "learning_rate": 0.00011140222174057734, |
| "loss": 3.7498, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.4708411717280301, |
| "grad_norm": 18.45073699951172, |
| "learning_rate": 0.00011106327399865988, |
| "loss": 3.9332, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.47191615157215805, |
| "grad_norm": 15.843331336975098, |
| "learning_rate": 0.00011072419753149586, |
| "loss": 4.2044, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.47299113141628596, |
| "grad_norm": 14.005358695983887, |
| "learning_rate": 0.00011038499628436416, |
| "loss": 3.1071, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.47406611126041387, |
| "grad_norm": 15.334671974182129, |
| "learning_rate": 0.00011004567420399563, |
| "loss": 3.2788, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.4751410911045418, |
| "grad_norm": 12.769185066223145, |
| "learning_rate": 0.00010970623523852699, |
| "loss": 2.3523, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.4762160709486697, |
| "grad_norm": 17.81171417236328, |
| "learning_rate": 0.00010936668333745499, |
| "loss": 2.9305, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.47729105079279766, |
| "grad_norm": 12.29996395111084, |
| "learning_rate": 0.0001090270224515904, |
| "loss": 2.219, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.47836603063692557, |
| "grad_norm": 14.522968292236328, |
| "learning_rate": 0.00010868725653301206, |
| "loss": 3.0004, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.4794410104810535, |
| "grad_norm": 13.94498062133789, |
| "learning_rate": 0.00010834738953502095, |
| "loss": 3.0531, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.4805159903251814, |
| "grad_norm": 15.410560607910156, |
| "learning_rate": 0.0001080074254120941, |
| "loss": 2.3937, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.48159097016930935, |
| "grad_norm": 10.539775848388672, |
| "learning_rate": 0.00010766736811983865, |
| "loss": 2.817, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.48266595001343726, |
| "grad_norm": 16.945173263549805, |
| "learning_rate": 0.00010732722161494579, |
| "loss": 2.8102, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.48374092985756517, |
| "grad_norm": 15.775056838989258, |
| "learning_rate": 0.00010698698985514475, |
| "loss": 3.7962, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.4848159097016931, |
| "grad_norm": 16.637723922729492, |
| "learning_rate": 0.0001066466767991567, |
| "loss": 3.2478, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.485890889545821, |
| "grad_norm": 25.035099029541016, |
| "learning_rate": 0.00010630628640664874, |
| "loss": 5.1592, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.48696586938994896, |
| "grad_norm": 21.126035690307617, |
| "learning_rate": 0.00010596582263818781, |
| "loss": 3.2066, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.48804084923407687, |
| "grad_norm": 16.712631225585938, |
| "learning_rate": 0.00010562528945519463, |
| "loss": 3.5551, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.4891158290782048, |
| "grad_norm": 24.561006546020508, |
| "learning_rate": 0.00010528469081989749, |
| "loss": 4.7308, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.4901908089223327, |
| "grad_norm": 23.1239013671875, |
| "learning_rate": 0.00010494403069528634, |
| "loss": 3.2912, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.49126578876646065, |
| "grad_norm": 14.793785095214844, |
| "learning_rate": 0.00010460331304506657, |
| "loss": 2.3428, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.49234076861058856, |
| "grad_norm": 14.798579216003418, |
| "learning_rate": 0.00010426254183361286, |
| "loss": 3.0977, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.4934157484547165, |
| "grad_norm": 16.173603057861328, |
| "learning_rate": 0.00010392172102592313, |
| "loss": 3.4115, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.4944907282988444, |
| "grad_norm": 24.551992416381836, |
| "learning_rate": 0.00010358085458757232, |
| "loss": 3.7147, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.4955657081429723, |
| "grad_norm": 18.784887313842773, |
| "learning_rate": 0.00010323994648466638, |
| "loss": 4.1496, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.49664068798710026, |
| "grad_norm": 14.511931419372559, |
| "learning_rate": 0.00010289900068379595, |
| "loss": 3.409, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.49771566783122817, |
| "grad_norm": 17.688566207885742, |
| "learning_rate": 0.00010255802115199033, |
| "loss": 1.9911, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.4987906476753561, |
| "grad_norm": 18.56838607788086, |
| "learning_rate": 0.00010221701185667141, |
| "loss": 3.1941, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.499865627519484, |
| "grad_norm": 13.474034309387207, |
| "learning_rate": 0.00010187597676560718, |
| "loss": 3.0192, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.5009406073636119, |
| "grad_norm": 12.874210357666016, |
| "learning_rate": 0.00010153491984686593, |
| "loss": 1.5535, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.5009406073636119, |
| "eval_loss": 0.7541670799255371, |
| "eval_runtime": 5.5547, |
| "eval_samples_per_second": 70.571, |
| "eval_steps_per_second": 35.286, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.5020155872077399, |
| "grad_norm": 14.241241455078125, |
| "learning_rate": 0.0001011938450687699, |
| "loss": 2.6351, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.5030905670518678, |
| "grad_norm": 15.222787857055664, |
| "learning_rate": 0.00010085275639984904, |
| "loss": 3.6475, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.5041655468959957, |
| "grad_norm": 12.280945777893066, |
| "learning_rate": 0.00010051165780879504, |
| "loss": 2.5128, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.5052405267401237, |
| "grad_norm": 14.948744773864746, |
| "learning_rate": 0.00010017055326441494, |
| "loss": 2.9114, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.5063155065842515, |
| "grad_norm": 17.295438766479492, |
| "learning_rate": 9.982944673558508e-05, |
| "loss": 3.4193, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.5073904864283795, |
| "grad_norm": 18.69068145751953, |
| "learning_rate": 9.9488342191205e-05, |
| "loss": 4.0637, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.5084654662725074, |
| "grad_norm": 17.060588836669922, |
| "learning_rate": 9.914724360015099e-05, |
| "loss": 3.1746, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.5095404461166353, |
| "grad_norm": 13.83548641204834, |
| "learning_rate": 9.880615493123012e-05, |
| "loss": 2.757, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.5106154259607633, |
| "grad_norm": 12.663851737976074, |
| "learning_rate": 9.846508015313408e-05, |
| "loss": 2.7217, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.5116904058048911, |
| "grad_norm": 14.959012985229492, |
| "learning_rate": 9.812402323439284e-05, |
| "loss": 3.3345, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.5127653856490191, |
| "grad_norm": 14.708747863769531, |
| "learning_rate": 9.778298814332863e-05, |
| "loss": 2.8671, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.513840365493147, |
| "grad_norm": 10.635260581970215, |
| "learning_rate": 9.744197884800969e-05, |
| "loss": 2.6943, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.5149153453372749, |
| "grad_norm": 14.44356632232666, |
| "learning_rate": 9.710099931620408e-05, |
| "loss": 2.1394, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.5159903251814029, |
| "grad_norm": 9.918120384216309, |
| "learning_rate": 9.676005351533366e-05, |
| "loss": 2.2011, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.5170653050255307, |
| "grad_norm": 17.1123046875, |
| "learning_rate": 9.64191454124277e-05, |
| "loss": 3.2862, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.5181402848696587, |
| "grad_norm": 13.601349830627441, |
| "learning_rate": 9.60782789740769e-05, |
| "loss": 2.3652, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.5192152647137867, |
| "grad_norm": 16.888429641723633, |
| "learning_rate": 9.573745816638716e-05, |
| "loss": 2.7698, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.5202902445579145, |
| "grad_norm": 16.15688705444336, |
| "learning_rate": 9.539668695493344e-05, |
| "loss": 2.6367, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.5213652244020425, |
| "grad_norm": 16.414520263671875, |
| "learning_rate": 9.505596930471367e-05, |
| "loss": 3.9243, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.5224402042461704, |
| "grad_norm": 13.967657089233398, |
| "learning_rate": 9.471530918010253e-05, |
| "loss": 3.1243, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.5235151840902983, |
| "grad_norm": 15.140684127807617, |
| "learning_rate": 9.43747105448054e-05, |
| "loss": 2.659, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.5245901639344263, |
| "grad_norm": 13.074856758117676, |
| "learning_rate": 9.40341773618122e-05, |
| "loss": 3.3224, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.5256651437785541, |
| "grad_norm": 15.123608589172363, |
| "learning_rate": 9.369371359335128e-05, |
| "loss": 3.385, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.5267401236226821, |
| "grad_norm": 15.648529052734375, |
| "learning_rate": 9.335332320084331e-05, |
| "loss": 2.8329, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.52781510346681, |
| "grad_norm": 15.20040225982666, |
| "learning_rate": 9.301301014485528e-05, |
| "loss": 3.5456, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.5288900833109379, |
| "grad_norm": 23.638113021850586, |
| "learning_rate": 9.267277838505423e-05, |
| "loss": 4.8434, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.5299650631550659, |
| "grad_norm": 11.857388496398926, |
| "learning_rate": 9.233263188016138e-05, |
| "loss": 2.2761, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.5310400429991937, |
| "grad_norm": 12.123178482055664, |
| "learning_rate": 9.199257458790591e-05, |
| "loss": 3.0025, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.5321150228433217, |
| "grad_norm": 11.024534225463867, |
| "learning_rate": 9.165261046497907e-05, |
| "loss": 2.265, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.5331900026874496, |
| "grad_norm": 16.32103157043457, |
| "learning_rate": 9.131274346698796e-05, |
| "loss": 3.5393, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.5342649825315775, |
| "grad_norm": 25.771560668945312, |
| "learning_rate": 9.097297754840962e-05, |
| "loss": 3.8375, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.5353399623757055, |
| "grad_norm": 12.820847511291504, |
| "learning_rate": 9.063331666254503e-05, |
| "loss": 2.6361, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.5364149422198333, |
| "grad_norm": 12.816265106201172, |
| "learning_rate": 9.029376476147302e-05, |
| "loss": 2.2486, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.5374899220639613, |
| "grad_norm": 9.368247032165527, |
| "learning_rate": 8.995432579600439e-05, |
| "loss": 2.5467, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.5385649019080893, |
| "grad_norm": 16.314271926879883, |
| "learning_rate": 8.961500371563585e-05, |
| "loss": 3.1917, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.5396398817522171, |
| "grad_norm": 17.877838134765625, |
| "learning_rate": 8.927580246850418e-05, |
| "loss": 3.636, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.5407148615963451, |
| "grad_norm": 15.371697425842285, |
| "learning_rate": 8.893672600134013e-05, |
| "loss": 4.3843, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.541789841440473, |
| "grad_norm": 14.944757461547852, |
| "learning_rate": 8.859777825942267e-05, |
| "loss": 2.2189, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.5428648212846009, |
| "grad_norm": 17.238252639770508, |
| "learning_rate": 8.825896318653293e-05, |
| "loss": 2.525, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.5439398011287289, |
| "grad_norm": 14.758814811706543, |
| "learning_rate": 8.792028472490844e-05, |
| "loss": 3.1758, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.5450147809728567, |
| "grad_norm": 9.887633323669434, |
| "learning_rate": 8.758174681519721e-05, |
| "loss": 2.2908, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.5460897608169847, |
| "grad_norm": 13.68622875213623, |
| "learning_rate": 8.724335339641184e-05, |
| "loss": 2.105, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.5471647406611126, |
| "grad_norm": 12.679695129394531, |
| "learning_rate": 8.690510840588373e-05, |
| "loss": 2.1756, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.5482397205052405, |
| "grad_norm": 14.024535179138184, |
| "learning_rate": 8.656701577921732e-05, |
| "loss": 3.0431, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.5493147003493685, |
| "grad_norm": 12.965935707092285, |
| "learning_rate": 8.622907945024417e-05, |
| "loss": 2.1099, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.5503896801934963, |
| "grad_norm": 19.419710159301758, |
| "learning_rate": 8.589130335097732e-05, |
| "loss": 3.3639, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.5514646600376243, |
| "grad_norm": 18.27731704711914, |
| "learning_rate": 8.55536914115654e-05, |
| "loss": 3.5416, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.5525396398817523, |
| "grad_norm": 16.81820297241211, |
| "learning_rate": 8.521624756024705e-05, |
| "loss": 3.8419, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.5536146197258801, |
| "grad_norm": 14.465489387512207, |
| "learning_rate": 8.487897572330513e-05, |
| "loss": 2.3487, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.5546895995700081, |
| "grad_norm": 11.499032974243164, |
| "learning_rate": 8.454187982502101e-05, |
| "loss": 2.6283, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.555764579414136, |
| "grad_norm": 21.029111862182617, |
| "learning_rate": 8.4204963787629e-05, |
| "loss": 4.7078, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.5568395592582639, |
| "grad_norm": 18.131210327148438, |
| "learning_rate": 8.386823153127064e-05, |
| "loss": 3.6223, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.5579145391023919, |
| "grad_norm": 15.802128791809082, |
| "learning_rate": 8.353168697394913e-05, |
| "loss": 2.6126, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.5589895189465197, |
| "grad_norm": 16.3378849029541, |
| "learning_rate": 8.319533403148367e-05, |
| "loss": 2.8075, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.5600644987906477, |
| "grad_norm": 13.895936012268066, |
| "learning_rate": 8.285917661746401e-05, |
| "loss": 2.7503, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.5611394786347756, |
| "grad_norm": 13.693531036376953, |
| "learning_rate": 8.25232186432048e-05, |
| "loss": 2.9142, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.5622144584789035, |
| "grad_norm": 14.982376098632812, |
| "learning_rate": 8.218746401770022e-05, |
| "loss": 3.0101, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.5632894383230315, |
| "grad_norm": 18.6697998046875, |
| "learning_rate": 8.185191664757828e-05, |
| "loss": 3.6426, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.5643644181671593, |
| "grad_norm": 12.057175636291504, |
| "learning_rate": 8.151658043705565e-05, |
| "loss": 2.9482, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.5654393980112873, |
| "grad_norm": 14.079970359802246, |
| "learning_rate": 8.118145928789199e-05, |
| "loss": 3.1769, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.5665143778554153, |
| "grad_norm": 12.814187049865723, |
| "learning_rate": 8.084655709934462e-05, |
| "loss": 2.472, |
| "step": 527 |
| }, |
| { |
| "epoch": 0.5675893576995431, |
| "grad_norm": 10.802642822265625, |
| "learning_rate": 8.051187776812326e-05, |
| "loss": 2.0466, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.5686643375436711, |
| "grad_norm": 12.31850528717041, |
| "learning_rate": 8.017742518834454e-05, |
| "loss": 2.4457, |
| "step": 529 |
| }, |
| { |
| "epoch": 0.5697393173877989, |
| "grad_norm": 13.78878116607666, |
| "learning_rate": 7.984320325148675e-05, |
| "loss": 2.6326, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.5708142972319269, |
| "grad_norm": 9.785225868225098, |
| "learning_rate": 7.950921584634461e-05, |
| "loss": 2.8243, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.5718892770760549, |
| "grad_norm": 16.127605438232422, |
| "learning_rate": 7.917546685898391e-05, |
| "loss": 3.5011, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.5729642569201827, |
| "grad_norm": 20.46214485168457, |
| "learning_rate": 7.884196017269648e-05, |
| "loss": 2.5311, |
| "step": 533 |
| }, |
| { |
| "epoch": 0.5740392367643107, |
| "grad_norm": 13.586955070495605, |
| "learning_rate": 7.850869966795476e-05, |
| "loss": 2.8393, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.5751142166084386, |
| "grad_norm": 16.564584732055664, |
| "learning_rate": 7.817568922236682e-05, |
| "loss": 2.3696, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.5761891964525665, |
| "grad_norm": 21.446279525756836, |
| "learning_rate": 7.784293271063124e-05, |
| "loss": 4.4285, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.5772641762966945, |
| "grad_norm": 18.923242568969727, |
| "learning_rate": 7.751043400449197e-05, |
| "loss": 3.2939, |
| "step": 537 |
| }, |
| { |
| "epoch": 0.5783391561408223, |
| "grad_norm": 16.000579833984375, |
| "learning_rate": 7.717819697269321e-05, |
| "loss": 3.8915, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.5794141359849503, |
| "grad_norm": 11.695368766784668, |
| "learning_rate": 7.684622548093461e-05, |
| "loss": 2.5856, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.5804891158290783, |
| "grad_norm": 15.072840690612793, |
| "learning_rate": 7.651452339182613e-05, |
| "loss": 2.8462, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.5815640956732061, |
| "grad_norm": 18.407136917114258, |
| "learning_rate": 7.618309456484308e-05, |
| "loss": 2.5811, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.5826390755173341, |
| "grad_norm": 17.274293899536133, |
| "learning_rate": 7.58519428562813e-05, |
| "loss": 3.2455, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.5837140553614619, |
| "grad_norm": 15.445805549621582, |
| "learning_rate": 7.552107211921229e-05, |
| "loss": 3.3812, |
| "step": 543 |
| }, |
| { |
| "epoch": 0.5847890352055899, |
| "grad_norm": 16.473222732543945, |
| "learning_rate": 7.519048620343825e-05, |
| "loss": 2.9544, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.5858640150497179, |
| "grad_norm": 16.593351364135742, |
| "learning_rate": 7.486018895544748e-05, |
| "loss": 3.8982, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.5869389948938457, |
| "grad_norm": 18.829208374023438, |
| "learning_rate": 7.453018421836946e-05, |
| "loss": 2.933, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.5880139747379737, |
| "grad_norm": 21.860137939453125, |
| "learning_rate": 7.420047583193019e-05, |
| "loss": 3.5987, |
| "step": 547 |
| }, |
| { |
| "epoch": 0.5890889545821015, |
| "grad_norm": 15.280534744262695, |
| "learning_rate": 7.387106763240763e-05, |
| "loss": 2.8143, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.5901639344262295, |
| "grad_norm": 15.49770736694336, |
| "learning_rate": 7.354196345258683e-05, |
| "loss": 2.4214, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.5912389142703575, |
| "grad_norm": 15.602472305297852, |
| "learning_rate": 7.32131671217155e-05, |
| "loss": 3.4367, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.5923138941144853, |
| "grad_norm": 14.388925552368164, |
| "learning_rate": 7.288468246545946e-05, |
| "loss": 3.4037, |
| "step": 551 |
| }, |
| { |
| "epoch": 0.5933888739586133, |
| "grad_norm": 19.788185119628906, |
| "learning_rate": 7.255651330585797e-05, |
| "loss": 3.3473, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.5944638538027412, |
| "grad_norm": 13.820460319519043, |
| "learning_rate": 7.222866346127953e-05, |
| "loss": 2.5223, |
| "step": 553 |
| }, |
| { |
| "epoch": 0.5955388336468691, |
| "grad_norm": 16.7007999420166, |
| "learning_rate": 7.190113674637714e-05, |
| "loss": 2.8172, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.5966138134909971, |
| "grad_norm": 16.35044288635254, |
| "learning_rate": 7.157393697204416e-05, |
| "loss": 2.9871, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.5976887933351249, |
| "grad_norm": 13.284764289855957, |
| "learning_rate": 7.124706794536983e-05, |
| "loss": 3.6496, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.5987637731792529, |
| "grad_norm": 12.027867317199707, |
| "learning_rate": 7.09205334695951e-05, |
| "loss": 2.2565, |
| "step": 557 |
| }, |
| { |
| "epoch": 0.5998387530233809, |
| "grad_norm": 17.299827575683594, |
| "learning_rate": 7.059433734406818e-05, |
| "loss": 2.7168, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.6009137328675087, |
| "grad_norm": 17.865158081054688, |
| "learning_rate": 7.026848336420054e-05, |
| "loss": 3.6538, |
| "step": 559 |
| }, |
| { |
| "epoch": 0.6019887127116367, |
| "grad_norm": 12.184990882873535, |
| "learning_rate": 6.99429753214226e-05, |
| "loss": 1.9531, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.6030636925557645, |
| "grad_norm": 15.416332244873047, |
| "learning_rate": 6.961781700313972e-05, |
| "loss": 2.8138, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.6041386723998925, |
| "grad_norm": 16.23126220703125, |
| "learning_rate": 6.929301219268805e-05, |
| "loss": 2.6759, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.6052136522440205, |
| "grad_norm": 21.727291107177734, |
| "learning_rate": 6.896856466929062e-05, |
| "loss": 3.2578, |
| "step": 563 |
| }, |
| { |
| "epoch": 0.6062886320881483, |
| "grad_norm": 9.553668975830078, |
| "learning_rate": 6.86444782080132e-05, |
| "loss": 2.1244, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.6073636119322763, |
| "grad_norm": 21.076509475708008, |
| "learning_rate": 6.832075657972054e-05, |
| "loss": 3.2957, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.6084385917764042, |
| "grad_norm": 11.683152198791504, |
| "learning_rate": 6.799740355103239e-05, |
| "loss": 2.4247, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.6095135716205321, |
| "grad_norm": 13.204034805297852, |
| "learning_rate": 6.76744228842797e-05, |
| "loss": 2.9528, |
| "step": 567 |
| }, |
| { |
| "epoch": 0.6105885514646601, |
| "grad_norm": 14.521734237670898, |
| "learning_rate": 6.735181833746086e-05, |
| "loss": 2.5978, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.6116635313087879, |
| "grad_norm": 17.30651092529297, |
| "learning_rate": 6.702959366419801e-05, |
| "loss": 3.5166, |
| "step": 569 |
| }, |
| { |
| "epoch": 0.6127385111529159, |
| "grad_norm": 15.7838716506958, |
| "learning_rate": 6.670775261369325e-05, |
| "loss": 2.6126, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.6138134909970439, |
| "grad_norm": 13.338095664978027, |
| "learning_rate": 6.638629893068515e-05, |
| "loss": 3.1597, |
| "step": 571 |
| }, |
| { |
| "epoch": 0.6148884708411717, |
| "grad_norm": 16.482463836669922, |
| "learning_rate": 6.60652363554051e-05, |
| "loss": 2.4673, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.6159634506852997, |
| "grad_norm": 13.602096557617188, |
| "learning_rate": 6.574456862353377e-05, |
| "loss": 2.7441, |
| "step": 573 |
| }, |
| { |
| "epoch": 0.6170384305294275, |
| "grad_norm": 15.007840156555176, |
| "learning_rate": 6.542429946615774e-05, |
| "loss": 2.7128, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.6181134103735555, |
| "grad_norm": 11.352252006530762, |
| "learning_rate": 6.510443260972599e-05, |
| "loss": 3.2629, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.6191883902176835, |
| "grad_norm": 14.53456974029541, |
| "learning_rate": 6.47849717760066e-05, |
| "loss": 1.8599, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.6202633700618113, |
| "grad_norm": 12.216171264648438, |
| "learning_rate": 6.446592068204341e-05, |
| "loss": 2.2911, |
| "step": 577 |
| }, |
| { |
| "epoch": 0.6213383499059393, |
| "grad_norm": 15.579245567321777, |
| "learning_rate": 6.41472830401128e-05, |
| "loss": 2.4855, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.6224133297500671, |
| "grad_norm": 19.280820846557617, |
| "learning_rate": 6.382906255768051e-05, |
| "loss": 4.8336, |
| "step": 579 |
| }, |
| { |
| "epoch": 0.6234883095941951, |
| "grad_norm": 19.063934326171875, |
| "learning_rate": 6.351126293735843e-05, |
| "loss": 2.7687, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.6245632894383231, |
| "grad_norm": 13.213850975036621, |
| "learning_rate": 6.319388787686158e-05, |
| "loss": 3.1479, |
| "step": 581 |
| }, |
| { |
| "epoch": 0.6256382692824509, |
| "grad_norm": 19.5723934173584, |
| "learning_rate": 6.287694106896509e-05, |
| "loss": 4.7255, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.6267132491265789, |
| "grad_norm": 12.059754371643066, |
| "learning_rate": 6.256042620146119e-05, |
| "loss": 2.5616, |
| "step": 583 |
| }, |
| { |
| "epoch": 0.6277882289707069, |
| "grad_norm": 10.33979320526123, |
| "learning_rate": 6.224434695711631e-05, |
| "loss": 1.6791, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.6288632088148347, |
| "grad_norm": 12.472517967224121, |
| "learning_rate": 6.19287070136283e-05, |
| "loss": 2.8273, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.6299381886589627, |
| "grad_norm": 13.145999908447266, |
| "learning_rate": 6.16135100435836e-05, |
| "loss": 2.4934, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.6310131685030905, |
| "grad_norm": 12.09467887878418, |
| "learning_rate": 6.129875971441434e-05, |
| "loss": 2.5874, |
| "step": 587 |
| }, |
| { |
| "epoch": 0.6320881483472185, |
| "grad_norm": 12.754231452941895, |
| "learning_rate": 6.0984459688356e-05, |
| "loss": 3.2328, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.6331631281913465, |
| "grad_norm": 20.645830154418945, |
| "learning_rate": 6.0670613622404496e-05, |
| "loss": 3.202, |
| "step": 589 |
| }, |
| { |
| "epoch": 0.6342381080354743, |
| "grad_norm": 13.244977951049805, |
| "learning_rate": 6.035722516827382e-05, |
| "loss": 2.1665, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.6353130878796023, |
| "grad_norm": 17.669931411743164, |
| "learning_rate": 6.004429797235349e-05, |
| "loss": 2.6613, |
| "step": 591 |
| }, |
| { |
| "epoch": 0.6363880677237301, |
| "grad_norm": 10.736635208129883, |
| "learning_rate": 5.973183567566605e-05, |
| "loss": 2.4063, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.6374630475678581, |
| "grad_norm": 18.680782318115234, |
| "learning_rate": 5.9419841913824824e-05, |
| "loss": 2.6796, |
| "step": 593 |
| }, |
| { |
| "epoch": 0.6385380274119861, |
| "grad_norm": 19.364147186279297, |
| "learning_rate": 5.9108320316991536e-05, |
| "loss": 3.8844, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.6396130072561139, |
| "grad_norm": 13.238618850708008, |
| "learning_rate": 5.879727450983412e-05, |
| "loss": 3.3821, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.6406879871002419, |
| "grad_norm": 21.127750396728516, |
| "learning_rate": 5.848670811148451e-05, |
| "loss": 3.8302, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.6417629669443697, |
| "grad_norm": 19.388427734375, |
| "learning_rate": 5.817662473549651e-05, |
| "loss": 2.4551, |
| "step": 597 |
| }, |
| { |
| "epoch": 0.6428379467884977, |
| "grad_norm": 13.296785354614258, |
| "learning_rate": 5.786702798980388e-05, |
| "loss": 2.7313, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.6439129266326257, |
| "grad_norm": 13.282078742980957, |
| "learning_rate": 5.755792147667811e-05, |
| "loss": 2.6865, |
| "step": 599 |
| }, |
| { |
| "epoch": 0.6449879064767535, |
| "grad_norm": 16.131546020507812, |
| "learning_rate": 5.7249308792686815e-05, |
| "loss": 2.9787, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.6460628863208815, |
| "grad_norm": 13.292814254760742, |
| "learning_rate": 5.6941193528651596e-05, |
| "loss": 2.7872, |
| "step": 601 |
| }, |
| { |
| "epoch": 0.6471378661650095, |
| "grad_norm": 13.973624229431152, |
| "learning_rate": 5.663357926960644e-05, |
| "loss": 2.6566, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.6482128460091373, |
| "grad_norm": 14.040596961975098, |
| "learning_rate": 5.6326469594756034e-05, |
| "loss": 3.0928, |
| "step": 603 |
| }, |
| { |
| "epoch": 0.6492878258532653, |
| "grad_norm": 17.149208068847656, |
| "learning_rate": 5.6019868077433876e-05, |
| "loss": 3.2098, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.6503628056973931, |
| "grad_norm": 14.205854415893555, |
| "learning_rate": 5.5713778285061046e-05, |
| "loss": 2.1934, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.6514377855415211, |
| "grad_norm": 20.856319427490234, |
| "learning_rate": 5.540820377910435e-05, |
| "loss": 4.2625, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.6525127653856491, |
| "grad_norm": 19.353553771972656, |
| "learning_rate": 5.5103148115035195e-05, |
| "loss": 3.3242, |
| "step": 607 |
| }, |
| { |
| "epoch": 0.6535877452297769, |
| "grad_norm": 13.190366744995117, |
| "learning_rate": 5.479861484228794e-05, |
| "loss": 2.3837, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.6546627250739049, |
| "grad_norm": 16.302879333496094, |
| "learning_rate": 5.449460750421883e-05, |
| "loss": 3.2505, |
| "step": 609 |
| }, |
| { |
| "epoch": 0.6557377049180327, |
| "grad_norm": 13.280342102050781, |
| "learning_rate": 5.419112963806468e-05, |
| "loss": 1.8674, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.6568126847621607, |
| "grad_norm": 21.006906509399414, |
| "learning_rate": 5.388818477490154e-05, |
| "loss": 3.6557, |
| "step": 611 |
| }, |
| { |
| "epoch": 0.6578876646062887, |
| "grad_norm": 19.307729721069336, |
| "learning_rate": 5.358577643960403e-05, |
| "loss": 2.2382, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.6589626444504165, |
| "grad_norm": 11.849048614501953, |
| "learning_rate": 5.328390815080381e-05, |
| "loss": 2.0229, |
| "step": 613 |
| }, |
| { |
| "epoch": 0.6600376242945445, |
| "grad_norm": 11.787457466125488, |
| "learning_rate": 5.2982583420849116e-05, |
| "loss": 2.6637, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.6611126041386725, |
| "grad_norm": 15.777995109558105, |
| "learning_rate": 5.268180575576352e-05, |
| "loss": 2.893, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.6621875839828003, |
| "grad_norm": 19.96381378173828, |
| "learning_rate": 5.238157865520539e-05, |
| "loss": 3.2706, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.6632625638269283, |
| "grad_norm": 20.699359893798828, |
| "learning_rate": 5.208190561242708e-05, |
| "loss": 2.6676, |
| "step": 617 |
| }, |
| { |
| "epoch": 0.6643375436710561, |
| "grad_norm": 14.418145179748535, |
| "learning_rate": 5.178279011423417e-05, |
| "loss": 2.6929, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.6654125235151841, |
| "grad_norm": 12.969032287597656, |
| "learning_rate": 5.148423564094517e-05, |
| "loss": 2.5543, |
| "step": 619 |
| }, |
| { |
| "epoch": 0.6664875033593121, |
| "grad_norm": 25.956907272338867, |
| "learning_rate": 5.118624566635066e-05, |
| "loss": 3.7616, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.6675624832034399, |
| "grad_norm": 16.259069442749023, |
| "learning_rate": 5.0888823657673266e-05, |
| "loss": 2.6596, |
| "step": 621 |
| }, |
| { |
| "epoch": 0.6686374630475679, |
| "grad_norm": 15.737533569335938, |
| "learning_rate": 5.059197307552698e-05, |
| "loss": 3.4037, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.6697124428916957, |
| "grad_norm": 15.57932186126709, |
| "learning_rate": 5.0295697373877096e-05, |
| "loss": 3.3037, |
| "step": 623 |
| }, |
| { |
| "epoch": 0.6707874227358237, |
| "grad_norm": 16.854917526245117, |
| "learning_rate": 5.000000000000002e-05, |
| "loss": 2.7998, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.6718624025799517, |
| "grad_norm": 20.23788070678711, |
| "learning_rate": 4.9704884394442964e-05, |
| "loss": 2.6882, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.6729373824240795, |
| "grad_norm": 13.206673622131348, |
| "learning_rate": 4.941035399098418e-05, |
| "loss": 2.8392, |
| "step": 626 |
| }, |
| { |
| "epoch": 0.6740123622682075, |
| "grad_norm": 11.313912391662598, |
| "learning_rate": 4.911641221659279e-05, |
| "loss": 2.6687, |
| "step": 627 |
| }, |
| { |
| "epoch": 0.6750873421123353, |
| "grad_norm": 17.33076286315918, |
| "learning_rate": 4.8823062491389094e-05, |
| "loss": 2.5485, |
| "step": 628 |
| }, |
| { |
| "epoch": 0.6761623219564633, |
| "grad_norm": 17.54669952392578, |
| "learning_rate": 4.853030822860455e-05, |
| "loss": 3.4131, |
| "step": 629 |
| }, |
| { |
| "epoch": 0.6772373018005913, |
| "grad_norm": 14.568754196166992, |
| "learning_rate": 4.823815283454235e-05, |
| "loss": 3.2074, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.6783122816447191, |
| "grad_norm": 14.021873474121094, |
| "learning_rate": 4.794659970853749e-05, |
| "loss": 2.3931, |
| "step": 631 |
| }, |
| { |
| "epoch": 0.6793872614888471, |
| "grad_norm": 15.752306938171387, |
| "learning_rate": 4.765565224291743e-05, |
| "loss": 2.4038, |
| "step": 632 |
| }, |
| { |
| "epoch": 0.6804622413329751, |
| "grad_norm": 17.143024444580078, |
| "learning_rate": 4.7365313822962576e-05, |
| "loss": 2.8179, |
| "step": 633 |
| }, |
| { |
| "epoch": 0.6815372211771029, |
| "grad_norm": 18.36078643798828, |
| "learning_rate": 4.707558782686677e-05, |
| "loss": 2.4392, |
| "step": 634 |
| }, |
| { |
| "epoch": 0.6826122010212309, |
| "grad_norm": 17.252010345458984, |
| "learning_rate": 4.67864776256982e-05, |
| "loss": 2.6834, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.6836871808653587, |
| "grad_norm": 25.259824752807617, |
| "learning_rate": 4.64979865833599e-05, |
| "loss": 3.477, |
| "step": 636 |
| }, |
| { |
| "epoch": 0.6847621607094867, |
| "grad_norm": 14.85805606842041, |
| "learning_rate": 4.621011805655093e-05, |
| "loss": 2.3259, |
| "step": 637 |
| }, |
| { |
| "epoch": 0.6858371405536147, |
| "grad_norm": 14.022159576416016, |
| "learning_rate": 4.592287539472701e-05, |
| "loss": 2.6373, |
| "step": 638 |
| }, |
| { |
| "epoch": 0.6869121203977425, |
| "grad_norm": 13.847736358642578, |
| "learning_rate": 4.563626194006178e-05, |
| "loss": 2.5128, |
| "step": 639 |
| }, |
| { |
| "epoch": 0.6879871002418705, |
| "grad_norm": 12.617048263549805, |
| "learning_rate": 4.535028102740785e-05, |
| "loss": 2.6718, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.6890620800859983, |
| "grad_norm": 15.006109237670898, |
| "learning_rate": 4.5064935984257826e-05, |
| "loss": 3.0097, |
| "step": 641 |
| }, |
| { |
| "epoch": 0.6901370599301263, |
| "grad_norm": 18.60867691040039, |
| "learning_rate": 4.478023013070595e-05, |
| "loss": 3.4391, |
| "step": 642 |
| }, |
| { |
| "epoch": 0.6912120397742543, |
| "grad_norm": 18.005126953125, |
| "learning_rate": 4.449616677940903e-05, |
| "loss": 2.8679, |
| "step": 643 |
| }, |
| { |
| "epoch": 0.6922870196183821, |
| "grad_norm": 15.206576347351074, |
| "learning_rate": 4.421274923554835e-05, |
| "loss": 2.5725, |
| "step": 644 |
| }, |
| { |
| "epoch": 0.6933619994625101, |
| "grad_norm": 10.55386734008789, |
| "learning_rate": 4.392998079679076e-05, |
| "loss": 2.5634, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.694436979306638, |
| "grad_norm": 17.997459411621094, |
| "learning_rate": 4.364786475325072e-05, |
| "loss": 4.0269, |
| "step": 646 |
| }, |
| { |
| "epoch": 0.6955119591507659, |
| "grad_norm": 17.598691940307617, |
| "learning_rate": 4.33664043874518e-05, |
| "loss": 3.0651, |
| "step": 647 |
| }, |
| { |
| "epoch": 0.6965869389948939, |
| "grad_norm": 19.84808349609375, |
| "learning_rate": 4.30856029742884e-05, |
| "loss": 3.4776, |
| "step": 648 |
| }, |
| { |
| "epoch": 0.6976619188390217, |
| "grad_norm": 19.29751968383789, |
| "learning_rate": 4.280546378098792e-05, |
| "loss": 3.4422, |
| "step": 649 |
| }, |
| { |
| "epoch": 0.6987368986831497, |
| "grad_norm": 19.02079963684082, |
| "learning_rate": 4.252599006707245e-05, |
| "loss": 2.9825, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.6998118785272777, |
| "grad_norm": 17.91227912902832, |
| "learning_rate": 4.224718508432113e-05, |
| "loss": 2.7378, |
| "step": 651 |
| }, |
| { |
| "epoch": 0.7008868583714055, |
| "grad_norm": 20.459753036499023, |
| "learning_rate": 4.196905207673201e-05, |
| "loss": 3.7669, |
| "step": 652 |
| }, |
| { |
| "epoch": 0.7019618382155335, |
| "grad_norm": 12.434891700744629, |
| "learning_rate": 4.16915942804846e-05, |
| "loss": 2.6119, |
| "step": 653 |
| }, |
| { |
| "epoch": 0.7030368180596613, |
| "grad_norm": 17.53223419189453, |
| "learning_rate": 4.141481492390197e-05, |
| "loss": 2.7561, |
| "step": 654 |
| }, |
| { |
| "epoch": 0.7041117979037893, |
| "grad_norm": 15.005716323852539, |
| "learning_rate": 4.113871722741337e-05, |
| "loss": 2.5018, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.7051867777479173, |
| "grad_norm": 19.277578353881836, |
| "learning_rate": 4.08633044035167e-05, |
| "loss": 3.2063, |
| "step": 656 |
| }, |
| { |
| "epoch": 0.7062617575920451, |
| "grad_norm": 18.722848892211914, |
| "learning_rate": 4.058857965674101e-05, |
| "loss": 2.4138, |
| "step": 657 |
| }, |
| { |
| "epoch": 0.7073367374361731, |
| "grad_norm": 12.36088752746582, |
| "learning_rate": 4.031454618360945e-05, |
| "loss": 2.4916, |
| "step": 658 |
| }, |
| { |
| "epoch": 0.708411717280301, |
| "grad_norm": 18.981199264526367, |
| "learning_rate": 4.0041207172601826e-05, |
| "loss": 3.578, |
| "step": 659 |
| }, |
| { |
| "epoch": 0.7094866971244289, |
| "grad_norm": 15.9765625, |
| "learning_rate": 3.976856580411774e-05, |
| "loss": 2.896, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.7105616769685569, |
| "grad_norm": 10.438156127929688, |
| "learning_rate": 3.9496625250439344e-05, |
| "loss": 2.5757, |
| "step": 661 |
| }, |
| { |
| "epoch": 0.7116366568126847, |
| "grad_norm": 15.564522743225098, |
| "learning_rate": 3.922538867569466e-05, |
| "loss": 2.7255, |
| "step": 662 |
| }, |
| { |
| "epoch": 0.7127116366568127, |
| "grad_norm": 10.465269088745117, |
| "learning_rate": 3.8954859235820664e-05, |
| "loss": 1.888, |
| "step": 663 |
| }, |
| { |
| "epoch": 0.7137866165009407, |
| "grad_norm": 10.99638843536377, |
| "learning_rate": 3.8685040078526415e-05, |
| "loss": 2.6292, |
| "step": 664 |
| }, |
| { |
| "epoch": 0.7148615963450685, |
| "grad_norm": 14.457728385925293, |
| "learning_rate": 3.841593434325675e-05, |
| "loss": 2.3995, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.7159365761891965, |
| "grad_norm": 17.016172409057617, |
| "learning_rate": 3.814754516115544e-05, |
| "loss": 2.8181, |
| "step": 666 |
| }, |
| { |
| "epoch": 0.7170115560333243, |
| "grad_norm": 18.437854766845703, |
| "learning_rate": 3.787987565502902e-05, |
| "loss": 2.1409, |
| "step": 667 |
| }, |
| { |
| "epoch": 0.7180865358774523, |
| "grad_norm": 19.192140579223633, |
| "learning_rate": 3.761292893931019e-05, |
| "loss": 3.0362, |
| "step": 668 |
| }, |
| { |
| "epoch": 0.7191615157215803, |
| "grad_norm": 18.337635040283203, |
| "learning_rate": 3.734670812002183e-05, |
| "loss": 3.1711, |
| "step": 669 |
| }, |
| { |
| "epoch": 0.7202364955657081, |
| "grad_norm": 12.289979934692383, |
| "learning_rate": 3.708121629474077e-05, |
| "loss": 2.4776, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.7213114754098361, |
| "grad_norm": 16.519603729248047, |
| "learning_rate": 3.681645655256159e-05, |
| "loss": 2.5979, |
| "step": 671 |
| }, |
| { |
| "epoch": 0.7223864552539639, |
| "grad_norm": 16.15471076965332, |
| "learning_rate": 3.655243197406097e-05, |
| "loss": 3.1705, |
| "step": 672 |
| }, |
| { |
| "epoch": 0.7234614350980919, |
| "grad_norm": 14.030595779418945, |
| "learning_rate": 3.628914563126156e-05, |
| "loss": 2.5392, |
| "step": 673 |
| }, |
| { |
| "epoch": 0.7245364149422199, |
| "grad_norm": 14.324786186218262, |
| "learning_rate": 3.6026600587596484e-05, |
| "loss": 3.4405, |
| "step": 674 |
| }, |
| { |
| "epoch": 0.7256113947863477, |
| "grad_norm": 14.356512069702148, |
| "learning_rate": 3.576479989787345e-05, |
| "loss": 2.7351, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.7266863746304757, |
| "grad_norm": 22.63331413269043, |
| "learning_rate": 3.550374660823949e-05, |
| "loss": 4.4579, |
| "step": 676 |
| }, |
| { |
| "epoch": 0.7277613544746036, |
| "grad_norm": 16.112449645996094, |
| "learning_rate": 3.52434437561452e-05, |
| "loss": 3.2883, |
| "step": 677 |
| }, |
| { |
| "epoch": 0.7288363343187315, |
| "grad_norm": 15.933297157287598, |
| "learning_rate": 3.4983894370309665e-05, |
| "loss": 3.1271, |
| "step": 678 |
| }, |
| { |
| "epoch": 0.7299113141628595, |
| "grad_norm": 18.30646514892578, |
| "learning_rate": 3.472510147068515e-05, |
| "loss": 3.1462, |
| "step": 679 |
| }, |
| { |
| "epoch": 0.7309862940069873, |
| "grad_norm": 14.52500057220459, |
| "learning_rate": 3.446706806842177e-05, |
| "loss": 3.5975, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.7320612738511153, |
| "grad_norm": 19.05596160888672, |
| "learning_rate": 3.420979716583279e-05, |
| "loss": 3.3156, |
| "step": 681 |
| }, |
| { |
| "epoch": 0.7331362536952433, |
| "grad_norm": 17.002887725830078, |
| "learning_rate": 3.395329175635935e-05, |
| "loss": 2.8059, |
| "step": 682 |
| }, |
| { |
| "epoch": 0.7342112335393711, |
| "grad_norm": 13.947765350341797, |
| "learning_rate": 3.369755482453594e-05, |
| "loss": 2.3958, |
| "step": 683 |
| }, |
| { |
| "epoch": 0.7352862133834991, |
| "grad_norm": 14.15211009979248, |
| "learning_rate": 3.344258934595539e-05, |
| "loss": 2.7893, |
| "step": 684 |
| }, |
| { |
| "epoch": 0.7363611932276269, |
| "grad_norm": 16.130531311035156, |
| "learning_rate": 3.31883982872345e-05, |
| "loss": 2.4694, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.7374361730717549, |
| "grad_norm": 19.640533447265625, |
| "learning_rate": 3.2934984605979424e-05, |
| "loss": 3.6272, |
| "step": 686 |
| }, |
| { |
| "epoch": 0.7385111529158829, |
| "grad_norm": 16.92746925354004, |
| "learning_rate": 3.268235125075111e-05, |
| "loss": 3.1489, |
| "step": 687 |
| }, |
| { |
| "epoch": 0.7395861327600107, |
| "grad_norm": 13.478626251220703, |
| "learning_rate": 3.243050116103128e-05, |
| "loss": 4.0042, |
| "step": 688 |
| }, |
| { |
| "epoch": 0.7406611126041387, |
| "grad_norm": 16.014270782470703, |
| "learning_rate": 3.217943726718795e-05, |
| "loss": 2.8463, |
| "step": 689 |
| }, |
| { |
| "epoch": 0.7417360924482665, |
| "grad_norm": 10.819585800170898, |
| "learning_rate": 3.1929162490441565e-05, |
| "loss": 2.3828, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.7428110722923945, |
| "grad_norm": 13.190820693969727, |
| "learning_rate": 3.16796797428308e-05, |
| "loss": 2.5904, |
| "step": 691 |
| }, |
| { |
| "epoch": 0.7438860521365225, |
| "grad_norm": 16.281742095947266, |
| "learning_rate": 3.1430991927178866e-05, |
| "loss": 2.2573, |
| "step": 692 |
| }, |
| { |
| "epoch": 0.7449610319806503, |
| "grad_norm": 17.6481990814209, |
| "learning_rate": 3.1183101937059647e-05, |
| "loss": 2.8613, |
| "step": 693 |
| }, |
| { |
| "epoch": 0.7460360118247783, |
| "grad_norm": 15.284998893737793, |
| "learning_rate": 3.093601265676393e-05, |
| "loss": 3.2382, |
| "step": 694 |
| }, |
| { |
| "epoch": 0.7471109916689062, |
| "grad_norm": 15.303094863891602, |
| "learning_rate": 3.068972696126611e-05, |
| "loss": 2.2713, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.7481859715130341, |
| "grad_norm": 17.59407615661621, |
| "learning_rate": 3.044424771619041e-05, |
| "loss": 2.976, |
| "step": 696 |
| }, |
| { |
| "epoch": 0.7492609513571621, |
| "grad_norm": 15.894051551818848, |
| "learning_rate": 3.0199577777777875e-05, |
| "loss": 3.3442, |
| "step": 697 |
| }, |
| { |
| "epoch": 0.7503359312012899, |
| "grad_norm": 13.673702239990234, |
| "learning_rate": 2.9955719992852804e-05, |
| "loss": 2.5122, |
| "step": 698 |
| }, |
| { |
| "epoch": 0.7514109110454179, |
| "grad_norm": 16.84321403503418, |
| "learning_rate": 2.9712677198789916e-05, |
| "loss": 2.9834, |
| "step": 699 |
| }, |
| { |
| "epoch": 0.7514109110454179, |
| "eval_loss": 0.6793206930160522, |
| "eval_runtime": 5.5979, |
| "eval_samples_per_second": 70.026, |
| "eval_steps_per_second": 35.013, |
| "step": 699 |
| }, |
| { |
| "epoch": 0.7524858908895459, |
| "grad_norm": 13.654646873474121, |
| "learning_rate": 2.9470452223481204e-05, |
| "loss": 2.9831, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.7535608707336737, |
| "grad_norm": 9.951108932495117, |
| "learning_rate": 2.922904788530293e-05, |
| "loss": 2.2996, |
| "step": 701 |
| }, |
| { |
| "epoch": 0.7546358505778017, |
| "grad_norm": 9.867563247680664, |
| "learning_rate": 2.8988466993083097e-05, |
| "loss": 2.1596, |
| "step": 702 |
| }, |
| { |
| "epoch": 0.7557108304219295, |
| "grad_norm": 14.348727226257324, |
| "learning_rate": 2.8748712346068464e-05, |
| "loss": 2.8225, |
| "step": 703 |
| }, |
| { |
| "epoch": 0.7567858102660575, |
| "grad_norm": 14.35478687286377, |
| "learning_rate": 2.8509786733892264e-05, |
| "loss": 2.9542, |
| "step": 704 |
| }, |
| { |
| "epoch": 0.7578607901101855, |
| "grad_norm": 23.562644958496094, |
| "learning_rate": 2.827169293654147e-05, |
| "loss": 4.984, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.7589357699543133, |
| "grad_norm": 11.014055252075195, |
| "learning_rate": 2.8034433724324715e-05, |
| "loss": 2.2337, |
| "step": 706 |
| }, |
| { |
| "epoch": 0.7600107497984413, |
| "grad_norm": 14.4437837600708, |
| "learning_rate": 2.77980118578398e-05, |
| "loss": 2.8005, |
| "step": 707 |
| }, |
| { |
| "epoch": 0.7610857296425692, |
| "grad_norm": 14.065092086791992, |
| "learning_rate": 2.7562430087941814e-05, |
| "loss": 3.0488, |
| "step": 708 |
| }, |
| { |
| "epoch": 0.7621607094866971, |
| "grad_norm": 18.38102912902832, |
| "learning_rate": 2.7327691155710976e-05, |
| "loss": 2.857, |
| "step": 709 |
| }, |
| { |
| "epoch": 0.7632356893308251, |
| "grad_norm": 15.708161354064941, |
| "learning_rate": 2.7093797792420728e-05, |
| "loss": 3.3492, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.7643106691749529, |
| "grad_norm": 13.264737129211426, |
| "learning_rate": 2.68607527195061e-05, |
| "loss": 2.2699, |
| "step": 711 |
| }, |
| { |
| "epoch": 0.7653856490190809, |
| "grad_norm": 22.40721893310547, |
| "learning_rate": 2.6628558648531843e-05, |
| "loss": 3.6175, |
| "step": 712 |
| }, |
| { |
| "epoch": 0.7664606288632088, |
| "grad_norm": 16.45822525024414, |
| "learning_rate": 2.639721828116112e-05, |
| "loss": 1.7208, |
| "step": 713 |
| }, |
| { |
| "epoch": 0.7675356087073367, |
| "grad_norm": 15.185565948486328, |
| "learning_rate": 2.6166734309123787e-05, |
| "loss": 2.439, |
| "step": 714 |
| }, |
| { |
| "epoch": 0.7686105885514647, |
| "grad_norm": 13.136150360107422, |
| "learning_rate": 2.5937109414185366e-05, |
| "loss": 2.9833, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.7696855683955925, |
| "grad_norm": 13.081589698791504, |
| "learning_rate": 2.5708346268115647e-05, |
| "loss": 2.4198, |
| "step": 716 |
| }, |
| { |
| "epoch": 0.7707605482397205, |
| "grad_norm": 10.561399459838867, |
| "learning_rate": 2.5480447532657624e-05, |
| "loss": 2.3794, |
| "step": 717 |
| }, |
| { |
| "epoch": 0.7718355280838485, |
| "grad_norm": 15.137680053710938, |
| "learning_rate": 2.525341585949662e-05, |
| "loss": 2.6444, |
| "step": 718 |
| }, |
| { |
| "epoch": 0.7729105079279763, |
| "grad_norm": 18.32636070251465, |
| "learning_rate": 2.5027253890229285e-05, |
| "loss": 2.978, |
| "step": 719 |
| }, |
| { |
| "epoch": 0.7739854877721043, |
| "grad_norm": 14.093949317932129, |
| "learning_rate": 2.4801964256333053e-05, |
| "loss": 2.4359, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.7750604676162322, |
| "grad_norm": 15.987081527709961, |
| "learning_rate": 2.457754957913532e-05, |
| "loss": 3.6221, |
| "step": 721 |
| }, |
| { |
| "epoch": 0.7761354474603601, |
| "grad_norm": 14.7132568359375, |
| "learning_rate": 2.4354012469783094e-05, |
| "loss": 2.7112, |
| "step": 722 |
| }, |
| { |
| "epoch": 0.7772104273044881, |
| "grad_norm": 15.25096321105957, |
| "learning_rate": 2.4131355529212573e-05, |
| "loss": 2.8029, |
| "step": 723 |
| }, |
| { |
| "epoch": 0.7782854071486159, |
| "grad_norm": 11.852306365966797, |
| "learning_rate": 2.3909581348118805e-05, |
| "loss": 2.6288, |
| "step": 724 |
| }, |
| { |
| "epoch": 0.7793603869927439, |
| "grad_norm": 14.22082805633545, |
| "learning_rate": 2.368869250692567e-05, |
| "loss": 2.6691, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.7804353668368718, |
| "grad_norm": 14.895218849182129, |
| "learning_rate": 2.346869157575574e-05, |
| "loss": 2.223, |
| "step": 726 |
| }, |
| { |
| "epoch": 0.7815103466809997, |
| "grad_norm": 11.507866859436035, |
| "learning_rate": 2.324958111440051e-05, |
| "loss": 2.4171, |
| "step": 727 |
| }, |
| { |
| "epoch": 0.7825853265251277, |
| "grad_norm": 10.818052291870117, |
| "learning_rate": 2.3031363672290406e-05, |
| "loss": 2.4578, |
| "step": 728 |
| }, |
| { |
| "epoch": 0.7836603063692555, |
| "grad_norm": 14.49646282196045, |
| "learning_rate": 2.28140417884654e-05, |
| "loss": 3.3386, |
| "step": 729 |
| }, |
| { |
| "epoch": 0.7847352862133835, |
| "grad_norm": 16.65618896484375, |
| "learning_rate": 2.2597617991545162e-05, |
| "loss": 2.4579, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.7858102660575115, |
| "grad_norm": 21.370105743408203, |
| "learning_rate": 2.2382094799699917e-05, |
| "loss": 2.0147, |
| "step": 731 |
| }, |
| { |
| "epoch": 0.7868852459016393, |
| "grad_norm": 18.23256492614746, |
| "learning_rate": 2.2167474720620974e-05, |
| "loss": 2.8713, |
| "step": 732 |
| }, |
| { |
| "epoch": 0.7879602257457673, |
| "grad_norm": 13.314383506774902, |
| "learning_rate": 2.1953760251491563e-05, |
| "loss": 2.5137, |
| "step": 733 |
| }, |
| { |
| "epoch": 0.7890352055898951, |
| "grad_norm": 13.909300804138184, |
| "learning_rate": 2.174095387895786e-05, |
| "loss": 2.7543, |
| "step": 734 |
| }, |
| { |
| "epoch": 0.7901101854340231, |
| "grad_norm": 12.180556297302246, |
| "learning_rate": 2.152905807909995e-05, |
| "loss": 2.0877, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.7911851652781511, |
| "grad_norm": 22.62627410888672, |
| "learning_rate": 2.131807531740315e-05, |
| "loss": 4.0689, |
| "step": 736 |
| }, |
| { |
| "epoch": 0.7922601451222789, |
| "grad_norm": 15.548689842224121, |
| "learning_rate": 2.1108008048729145e-05, |
| "loss": 3.4416, |
| "step": 737 |
| }, |
| { |
| "epoch": 0.7933351249664069, |
| "grad_norm": 17.072410583496094, |
| "learning_rate": 2.0898858717287594e-05, |
| "loss": 4.3289, |
| "step": 738 |
| }, |
| { |
| "epoch": 0.7944101048105348, |
| "grad_norm": 12.466560363769531, |
| "learning_rate": 2.0690629756607648e-05, |
| "loss": 2.6264, |
| "step": 739 |
| }, |
| { |
| "epoch": 0.7954850846546627, |
| "grad_norm": 10.822991371154785, |
| "learning_rate": 2.0483323589509483e-05, |
| "loss": 1.9285, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.7965600644987907, |
| "grad_norm": 16.706153869628906, |
| "learning_rate": 2.0276942628076378e-05, |
| "loss": 3.2333, |
| "step": 741 |
| }, |
| { |
| "epoch": 0.7976350443429185, |
| "grad_norm": 13.810402870178223, |
| "learning_rate": 2.0071489273626376e-05, |
| "loss": 2.5904, |
| "step": 742 |
| }, |
| { |
| "epoch": 0.7987100241870465, |
| "grad_norm": 15.366273880004883, |
| "learning_rate": 1.9866965916684587e-05, |
| "loss": 2.8742, |
| "step": 743 |
| }, |
| { |
| "epoch": 0.7997850040311744, |
| "grad_norm": 11.5558500289917, |
| "learning_rate": 1.966337493695516e-05, |
| "loss": 2.3341, |
| "step": 744 |
| }, |
| { |
| "epoch": 0.8008599838753023, |
| "grad_norm": 17.35418701171875, |
| "learning_rate": 1.9460718703293768e-05, |
| "loss": 2.6252, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.8019349637194303, |
| "grad_norm": 11.60128402709961, |
| "learning_rate": 1.925899957367996e-05, |
| "loss": 2.109, |
| "step": 746 |
| }, |
| { |
| "epoch": 0.8030099435635581, |
| "grad_norm": 12.664255142211914, |
| "learning_rate": 1.9058219895189666e-05, |
| "loss": 2.339, |
| "step": 747 |
| }, |
| { |
| "epoch": 0.8040849234076861, |
| "grad_norm": 16.977569580078125, |
| "learning_rate": 1.8858382003968078e-05, |
| "loss": 2.3301, |
| "step": 748 |
| }, |
| { |
| "epoch": 0.8051599032518141, |
| "grad_norm": 12.833663940429688, |
| "learning_rate": 1.8659488225202226e-05, |
| "loss": 2.461, |
| "step": 749 |
| }, |
| { |
| "epoch": 0.8062348830959419, |
| "grad_norm": 14.8052339553833, |
| "learning_rate": 1.846154087309414e-05, |
| "loss": 2.9448, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.8073098629400699, |
| "grad_norm": 16.044788360595703, |
| "learning_rate": 1.826454225083375e-05, |
| "loss": 2.9095, |
| "step": 751 |
| }, |
| { |
| "epoch": 0.8083848427841978, |
| "grad_norm": 24.449602127075195, |
| "learning_rate": 1.8068494650572243e-05, |
| "loss": 3.3211, |
| "step": 752 |
| }, |
| { |
| "epoch": 0.8094598226283257, |
| "grad_norm": 18.429533004760742, |
| "learning_rate": 1.787340035339524e-05, |
| "loss": 3.7572, |
| "step": 753 |
| }, |
| { |
| "epoch": 0.8105348024724537, |
| "grad_norm": 18.038867950439453, |
| "learning_rate": 1.7679261629296408e-05, |
| "loss": 3.5666, |
| "step": 754 |
| }, |
| { |
| "epoch": 0.8116097823165815, |
| "grad_norm": 17.882884979248047, |
| "learning_rate": 1.7486080737150945e-05, |
| "loss": 3.4553, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.8126847621607095, |
| "grad_norm": 18.42115592956543, |
| "learning_rate": 1.7293859924689258e-05, |
| "loss": 2.3743, |
| "step": 756 |
| }, |
| { |
| "epoch": 0.8137597420048374, |
| "grad_norm": 10.389144897460938, |
| "learning_rate": 1.7102601428470987e-05, |
| "loss": 1.7921, |
| "step": 757 |
| }, |
| { |
| "epoch": 0.8148347218489653, |
| "grad_norm": 19.917224884033203, |
| "learning_rate": 1.691230747385878e-05, |
| "loss": 4.1469, |
| "step": 758 |
| }, |
| { |
| "epoch": 0.8159097016930933, |
| "grad_norm": 22.9906005859375, |
| "learning_rate": 1.672298027499254e-05, |
| "loss": 3.5433, |
| "step": 759 |
| }, |
| { |
| "epoch": 0.8169846815372211, |
| "grad_norm": 9.970109939575195, |
| "learning_rate": 1.653462203476356e-05, |
| "loss": 1.9898, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.8180596613813491, |
| "grad_norm": 15.049894332885742, |
| "learning_rate": 1.6347234944789014e-05, |
| "loss": 3.014, |
| "step": 761 |
| }, |
| { |
| "epoch": 0.819134641225477, |
| "grad_norm": 18.622976303100586, |
| "learning_rate": 1.6160821185386364e-05, |
| "loss": 2.6812, |
| "step": 762 |
| }, |
| { |
| "epoch": 0.8202096210696049, |
| "grad_norm": 18.029285430908203, |
| "learning_rate": 1.5975382925547965e-05, |
| "loss": 2.8567, |
| "step": 763 |
| }, |
| { |
| "epoch": 0.8212846009137329, |
| "grad_norm": 17.624483108520508, |
| "learning_rate": 1.5790922322915958e-05, |
| "loss": 3.039, |
| "step": 764 |
| }, |
| { |
| "epoch": 0.8223595807578608, |
| "grad_norm": 21.21023178100586, |
| "learning_rate": 1.5607441523756993e-05, |
| "loss": 3.2504, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.8234345606019887, |
| "grad_norm": 15.159942626953125, |
| "learning_rate": 1.5424942662937435e-05, |
| "loss": 2.2915, |
| "step": 766 |
| }, |
| { |
| "epoch": 0.8245095404461167, |
| "grad_norm": 16.94089126586914, |
| "learning_rate": 1.5243427863898364e-05, |
| "loss": 2.781, |
| "step": 767 |
| }, |
| { |
| "epoch": 0.8255845202902445, |
| "grad_norm": 14.287386894226074, |
| "learning_rate": 1.5062899238631e-05, |
| "loss": 2.2052, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.8266595001343725, |
| "grad_norm": 23.06917953491211, |
| "learning_rate": 1.4883358887652044e-05, |
| "loss": 4.0856, |
| "step": 769 |
| }, |
| { |
| "epoch": 0.8277344799785004, |
| "grad_norm": 14.637825012207031, |
| "learning_rate": 1.4704808899979239e-05, |
| "loss": 2.954, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.8288094598226283, |
| "grad_norm": 9.044286727905273, |
| "learning_rate": 1.4527251353107163e-05, |
| "loss": 1.8635, |
| "step": 771 |
| }, |
| { |
| "epoch": 0.8298844396667563, |
| "grad_norm": 17.238901138305664, |
| "learning_rate": 1.4350688312982864e-05, |
| "loss": 2.5819, |
| "step": 772 |
| }, |
| { |
| "epoch": 0.8309594195108841, |
| "grad_norm": 11.831186294555664, |
| "learning_rate": 1.4175121833982052e-05, |
| "loss": 2.4721, |
| "step": 773 |
| }, |
| { |
| "epoch": 0.8320343993550121, |
| "grad_norm": 21.131061553955078, |
| "learning_rate": 1.4000553958885021e-05, |
| "loss": 4.1583, |
| "step": 774 |
| }, |
| { |
| "epoch": 0.83310937919914, |
| "grad_norm": 15.86319637298584, |
| "learning_rate": 1.3826986718852952e-05, |
| "loss": 2.5534, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.8341843590432679, |
| "grad_norm": 16.642955780029297, |
| "learning_rate": 1.365442213340432e-05, |
| "loss": 2.3907, |
| "step": 776 |
| }, |
| { |
| "epoch": 0.8352593388873959, |
| "grad_norm": 19.222917556762695, |
| "learning_rate": 1.3482862210391245e-05, |
| "loss": 3.667, |
| "step": 777 |
| }, |
| { |
| "epoch": 0.8363343187315238, |
| "grad_norm": 18.172245025634766, |
| "learning_rate": 1.3312308945976348e-05, |
| "loss": 3.029, |
| "step": 778 |
| }, |
| { |
| "epoch": 0.8374092985756517, |
| "grad_norm": 15.490896224975586, |
| "learning_rate": 1.3142764324609303e-05, |
| "loss": 3.208, |
| "step": 779 |
| }, |
| { |
| "epoch": 0.8384842784197797, |
| "grad_norm": 18.96816062927246, |
| "learning_rate": 1.2974230319003944e-05, |
| "loss": 3.4618, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.8395592582639075, |
| "grad_norm": 21.89808464050293, |
| "learning_rate": 1.2806708890115138e-05, |
| "loss": 4.5078, |
| "step": 781 |
| }, |
| { |
| "epoch": 0.8406342381080355, |
| "grad_norm": 14.64857006072998, |
| "learning_rate": 1.2640201987116117e-05, |
| "loss": 3.2866, |
| "step": 782 |
| }, |
| { |
| "epoch": 0.8417092179521634, |
| "grad_norm": 15.451770782470703, |
| "learning_rate": 1.2474711547375683e-05, |
| "loss": 2.3176, |
| "step": 783 |
| }, |
| { |
| "epoch": 0.8427841977962913, |
| "grad_norm": 12.625808715820312, |
| "learning_rate": 1.2310239496435749e-05, |
| "loss": 2.557, |
| "step": 784 |
| }, |
| { |
| "epoch": 0.8438591776404193, |
| "grad_norm": 16.126930236816406, |
| "learning_rate": 1.2146787747988919e-05, |
| "loss": 2.9173, |
| "step": 785 |
| }, |
| { |
| "epoch": 0.8449341574845471, |
| "grad_norm": 16.86185646057129, |
| "learning_rate": 1.1984358203856116e-05, |
| "loss": 2.4147, |
| "step": 786 |
| }, |
| { |
| "epoch": 0.8460091373286751, |
| "grad_norm": 17.082571029663086, |
| "learning_rate": 1.1822952753964667e-05, |
| "loss": 2.9913, |
| "step": 787 |
| }, |
| { |
| "epoch": 0.847084117172803, |
| "grad_norm": 15.173123359680176, |
| "learning_rate": 1.1662573276326061e-05, |
| "loss": 2.8246, |
| "step": 788 |
| }, |
| { |
| "epoch": 0.8481590970169309, |
| "grad_norm": 17.791603088378906, |
| "learning_rate": 1.1503221637014327e-05, |
| "loss": 3.7321, |
| "step": 789 |
| }, |
| { |
| "epoch": 0.8492340768610589, |
| "grad_norm": 16.170446395874023, |
| "learning_rate": 1.134489969014414e-05, |
| "loss": 3.1235, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.8503090567051867, |
| "grad_norm": 13.791912078857422, |
| "learning_rate": 1.1187609277849376e-05, |
| "loss": 2.7921, |
| "step": 791 |
| }, |
| { |
| "epoch": 0.8513840365493147, |
| "grad_norm": 12.047784805297852, |
| "learning_rate": 1.1031352230261637e-05, |
| "loss": 1.8449, |
| "step": 792 |
| }, |
| { |
| "epoch": 0.8524590163934426, |
| "grad_norm": 21.286800384521484, |
| "learning_rate": 1.0876130365488878e-05, |
| "loss": 3.6973, |
| "step": 793 |
| }, |
| { |
| "epoch": 0.8535339962375705, |
| "grad_norm": 15.110943794250488, |
| "learning_rate": 1.072194548959442e-05, |
| "loss": 3.3045, |
| "step": 794 |
| }, |
| { |
| "epoch": 0.8546089760816985, |
| "grad_norm": 11.98826789855957, |
| "learning_rate": 1.0568799396575746e-05, |
| "loss": 2.831, |
| "step": 795 |
| }, |
| { |
| "epoch": 0.8556839559258264, |
| "grad_norm": 18.719282150268555, |
| "learning_rate": 1.0416693868343797e-05, |
| "loss": 3.2608, |
| "step": 796 |
| }, |
| { |
| "epoch": 0.8567589357699543, |
| "grad_norm": 14.92679500579834, |
| "learning_rate": 1.0265630674702076e-05, |
| "loss": 2.494, |
| "step": 797 |
| }, |
| { |
| "epoch": 0.8578339156140823, |
| "grad_norm": 12.818703651428223, |
| "learning_rate": 1.0115611573326233e-05, |
| "loss": 2.6008, |
| "step": 798 |
| }, |
| { |
| "epoch": 0.8589088954582101, |
| "grad_norm": 17.74081039428711, |
| "learning_rate": 9.966638309743482e-06, |
| "loss": 3.2788, |
| "step": 799 |
| }, |
| { |
| "epoch": 0.8599838753023381, |
| "grad_norm": 13.859757423400879, |
| "learning_rate": 9.818712617312287e-06, |
| "loss": 2.3176, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.861058855146466, |
| "grad_norm": 26.295276641845703, |
| "learning_rate": 9.671836217202334e-06, |
| "loss": 3.3959, |
| "step": 801 |
| }, |
| { |
| "epoch": 0.8621338349905939, |
| "grad_norm": 15.147491455078125, |
| "learning_rate": 9.52601081837431e-06, |
| "loss": 3.1437, |
| "step": 802 |
| }, |
| { |
| "epoch": 0.8632088148347219, |
| "grad_norm": 12.152910232543945, |
| "learning_rate": 9.381238117560187e-06, |
| "loss": 2.4838, |
| "step": 803 |
| }, |
| { |
| "epoch": 0.8642837946788497, |
| "grad_norm": 28.314746856689453, |
| "learning_rate": 9.237519799243355e-06, |
| "loss": 3.2608, |
| "step": 804 |
| }, |
| { |
| "epoch": 0.8653587745229777, |
| "grad_norm": 14.662104606628418, |
| "learning_rate": 9.094857535639156e-06, |
| "loss": 2.9242, |
| "step": 805 |
| }, |
| { |
| "epoch": 0.8664337543671056, |
| "grad_norm": 13.321383476257324, |
| "learning_rate": 8.95325298667523e-06, |
| "loss": 2.8041, |
| "step": 806 |
| }, |
| { |
| "epoch": 0.8675087342112335, |
| "grad_norm": 16.60413360595703, |
| "learning_rate": 8.812707799972442e-06, |
| "loss": 2.5522, |
| "step": 807 |
| }, |
| { |
| "epoch": 0.8685837140553615, |
| "grad_norm": 16.389780044555664, |
| "learning_rate": 8.673223610825531e-06, |
| "loss": 2.647, |
| "step": 808 |
| }, |
| { |
| "epoch": 0.8696586938994894, |
| "grad_norm": 10.362971305847168, |
| "learning_rate": 8.53480204218412e-06, |
| "loss": 2.1054, |
| "step": 809 |
| }, |
| { |
| "epoch": 0.8707336737436173, |
| "grad_norm": 18.633100509643555, |
| "learning_rate": 8.397444704633906e-06, |
| "loss": 3.8698, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.8718086535877452, |
| "grad_norm": 14.801193237304688, |
| "learning_rate": 8.261153196377814e-06, |
| "loss": 2.1947, |
| "step": 811 |
| }, |
| { |
| "epoch": 0.8728836334318731, |
| "grad_norm": 16.298856735229492, |
| "learning_rate": 8.1259291032175e-06, |
| "loss": 3.4508, |
| "step": 812 |
| }, |
| { |
| "epoch": 0.8739586132760011, |
| "grad_norm": 17.139835357666016, |
| "learning_rate": 7.991773998534802e-06, |
| "loss": 2.6625, |
| "step": 813 |
| }, |
| { |
| "epoch": 0.875033593120129, |
| "grad_norm": 14.451316833496094, |
| "learning_rate": 7.858689443273547e-06, |
| "loss": 2.5462, |
| "step": 814 |
| }, |
| { |
| "epoch": 0.8761085729642569, |
| "grad_norm": 19.52531623840332, |
| "learning_rate": 7.72667698592131e-06, |
| "loss": 2.7614, |
| "step": 815 |
| }, |
| { |
| "epoch": 0.8771835528083849, |
| "grad_norm": 23.297826766967773, |
| "learning_rate": 7.595738162491383e-06, |
| "loss": 2.0079, |
| "step": 816 |
| }, |
| { |
| "epoch": 0.8782585326525127, |
| "grad_norm": 16.850067138671875, |
| "learning_rate": 7.465874496504943e-06, |
| "loss": 3.0355, |
| "step": 817 |
| }, |
| { |
| "epoch": 0.8793335124966407, |
| "grad_norm": 19.124881744384766, |
| "learning_rate": 7.337087498973327e-06, |
| "loss": 3.1917, |
| "step": 818 |
| }, |
| { |
| "epoch": 0.8804084923407686, |
| "grad_norm": 16.559707641601562, |
| "learning_rate": 7.209378668380451e-06, |
| "loss": 2.6744, |
| "step": 819 |
| }, |
| { |
| "epoch": 0.8814834721848965, |
| "grad_norm": 11.441034317016602, |
| "learning_rate": 7.0827494906653526e-06, |
| "loss": 2.2681, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.8825584520290245, |
| "grad_norm": 18.97810935974121, |
| "learning_rate": 6.957201439204897e-06, |
| "loss": 3.6872, |
| "step": 821 |
| }, |
| { |
| "epoch": 0.8836334318731524, |
| "grad_norm": 15.341508865356445, |
| "learning_rate": 6.832735974796689e-06, |
| "loss": 2.0165, |
| "step": 822 |
| }, |
| { |
| "epoch": 0.8847084117172803, |
| "grad_norm": 16.359432220458984, |
| "learning_rate": 6.7093545456419886e-06, |
| "loss": 3.3588, |
| "step": 823 |
| }, |
| { |
| "epoch": 0.8857833915614082, |
| "grad_norm": 16.616304397583008, |
| "learning_rate": 6.5870585873289425e-06, |
| "loss": 3.643, |
| "step": 824 |
| }, |
| { |
| "epoch": 0.8868583714055361, |
| "grad_norm": 14.876137733459473, |
| "learning_rate": 6.4658495228158146e-06, |
| "loss": 2.8213, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.8879333512496641, |
| "grad_norm": 11.03225326538086, |
| "learning_rate": 6.345728762414505e-06, |
| "loss": 2.1051, |
| "step": 826 |
| }, |
| { |
| "epoch": 0.889008331093792, |
| "grad_norm": 17.549360275268555, |
| "learning_rate": 6.226697703774076e-06, |
| "loss": 3.0634, |
| "step": 827 |
| }, |
| { |
| "epoch": 0.8900833109379199, |
| "grad_norm": 12.46760368347168, |
| "learning_rate": 6.108757731864489e-06, |
| "loss": 2.7482, |
| "step": 828 |
| }, |
| { |
| "epoch": 0.8911582907820479, |
| "grad_norm": 11.985771179199219, |
| "learning_rate": 5.99191021896055e-06, |
| "loss": 2.4109, |
| "step": 829 |
| }, |
| { |
| "epoch": 0.8922332706261757, |
| "grad_norm": 15.262474060058594, |
| "learning_rate": 5.876156524625864e-06, |
| "loss": 3.0215, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.8933082504703037, |
| "grad_norm": 12.595239639282227, |
| "learning_rate": 5.7614979956971075e-06, |
| "loss": 2.726, |
| "step": 831 |
| }, |
| { |
| "epoch": 0.8943832303144316, |
| "grad_norm": 18.540124893188477, |
| "learning_rate": 5.647935966268225e-06, |
| "loss": 1.9021, |
| "step": 832 |
| }, |
| { |
| "epoch": 0.8954582101585595, |
| "grad_norm": 12.652937889099121, |
| "learning_rate": 5.5354717576750816e-06, |
| "loss": 2.366, |
| "step": 833 |
| }, |
| { |
| "epoch": 0.8965331900026875, |
| "grad_norm": 13.983992576599121, |
| "learning_rate": 5.424106678479945e-06, |
| "loss": 2.7013, |
| "step": 834 |
| }, |
| { |
| "epoch": 0.8976081698468154, |
| "grad_norm": 15.04990291595459, |
| "learning_rate": 5.313842024456306e-06, |
| "loss": 2.3414, |
| "step": 835 |
| }, |
| { |
| "epoch": 0.8986831496909433, |
| "grad_norm": 18.419647216796875, |
| "learning_rate": 5.204679078573827e-06, |
| "loss": 3.8336, |
| "step": 836 |
| }, |
| { |
| "epoch": 0.8997581295350712, |
| "grad_norm": 17.847749710083008, |
| "learning_rate": 5.096619110983347e-06, |
| "loss": 2.9503, |
| "step": 837 |
| }, |
| { |
| "epoch": 0.9008331093791991, |
| "grad_norm": 15.034934043884277, |
| "learning_rate": 4.9896633790022405e-06, |
| "loss": 2.6579, |
| "step": 838 |
| }, |
| { |
| "epoch": 0.9019080892233271, |
| "grad_norm": 16.45047950744629, |
| "learning_rate": 4.883813127099579e-06, |
| "loss": 3.8603, |
| "step": 839 |
| }, |
| { |
| "epoch": 0.902983069067455, |
| "grad_norm": 14.44510269165039, |
| "learning_rate": 4.779069586881857e-06, |
| "loss": 2.7965, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.9040580489115829, |
| "grad_norm": 14.151533126831055, |
| "learning_rate": 4.675433977078547e-06, |
| "loss": 2.3626, |
| "step": 841 |
| }, |
| { |
| "epoch": 0.9051330287557108, |
| "grad_norm": 20.559478759765625, |
| "learning_rate": 4.572907503527923e-06, |
| "loss": 2.3899, |
| "step": 842 |
| }, |
| { |
| "epoch": 0.9062080085998387, |
| "grad_norm": 11.681385040283203, |
| "learning_rate": 4.471491359163094e-06, |
| "loss": 2.3098, |
| "step": 843 |
| }, |
| { |
| "epoch": 0.9072829884439667, |
| "grad_norm": 16.471866607666016, |
| "learning_rate": 4.3711867239980335e-06, |
| "loss": 2.6948, |
| "step": 844 |
| }, |
| { |
| "epoch": 0.9083579682880946, |
| "grad_norm": 10.209484100341797, |
| "learning_rate": 4.271994765113952e-06, |
| "loss": 2.1588, |
| "step": 845 |
| }, |
| { |
| "epoch": 0.9094329481322225, |
| "grad_norm": 15.725363731384277, |
| "learning_rate": 4.173916636645591e-06, |
| "loss": 2.8127, |
| "step": 846 |
| }, |
| { |
| "epoch": 0.9105079279763505, |
| "grad_norm": 15.666772842407227, |
| "learning_rate": 4.0769534797679645e-06, |
| "loss": 2.667, |
| "step": 847 |
| }, |
| { |
| "epoch": 0.9115829078204783, |
| "grad_norm": 15.909188270568848, |
| "learning_rate": 3.9811064226828895e-06, |
| "loss": 3.317, |
| "step": 848 |
| }, |
| { |
| "epoch": 0.9126578876646063, |
| "grad_norm": 11.588101387023926, |
| "learning_rate": 3.8863765806060105e-06, |
| "loss": 1.9476, |
| "step": 849 |
| }, |
| { |
| "epoch": 0.9137328675087342, |
| "grad_norm": 10.225213050842285, |
| "learning_rate": 3.7927650557537555e-06, |
| "loss": 2.1618, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.9148078473528621, |
| "grad_norm": 19.92328643798828, |
| "learning_rate": 3.7002729373304957e-06, |
| "loss": 2.6487, |
| "step": 851 |
| }, |
| { |
| "epoch": 0.9158828271969901, |
| "grad_norm": 21.881338119506836, |
| "learning_rate": 3.6089013015159433e-06, |
| "loss": 4.0831, |
| "step": 852 |
| }, |
| { |
| "epoch": 0.916957807041118, |
| "grad_norm": 16.8553524017334, |
| "learning_rate": 3.5186512114525282e-06, |
| "loss": 3.4859, |
| "step": 853 |
| }, |
| { |
| "epoch": 0.9180327868852459, |
| "grad_norm": 15.485822677612305, |
| "learning_rate": 3.4295237172331516e-06, |
| "loss": 2.7058, |
| "step": 854 |
| }, |
| { |
| "epoch": 0.9191077667293738, |
| "grad_norm": 11.821995735168457, |
| "learning_rate": 3.3415198558888305e-06, |
| "loss": 2.4467, |
| "step": 855 |
| }, |
| { |
| "epoch": 0.9201827465735017, |
| "grad_norm": 10.078314781188965, |
| "learning_rate": 3.2546406513767504e-06, |
| "loss": 1.897, |
| "step": 856 |
| }, |
| { |
| "epoch": 0.9212577264176297, |
| "grad_norm": 13.78282356262207, |
| "learning_rate": 3.1688871145683086e-06, |
| "loss": 2.9327, |
| "step": 857 |
| }, |
| { |
| "epoch": 0.9223327062617576, |
| "grad_norm": 17.97776222229004, |
| "learning_rate": 3.0842602432373024e-06, |
| "loss": 2.6016, |
| "step": 858 |
| }, |
| { |
| "epoch": 0.9234076861058855, |
| "grad_norm": 17.566068649291992, |
| "learning_rate": 3.0007610220483927e-06, |
| "loss": 2.7243, |
| "step": 859 |
| }, |
| { |
| "epoch": 0.9244826659500134, |
| "grad_norm": 12.550614356994629, |
| "learning_rate": 2.918390422545614e-06, |
| "loss": 2.2397, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.9255576457941413, |
| "grad_norm": 26.070829391479492, |
| "learning_rate": 2.8371494031410704e-06, |
| "loss": 3.6046, |
| "step": 861 |
| }, |
| { |
| "epoch": 0.9266326256382693, |
| "grad_norm": 20.401790618896484, |
| "learning_rate": 2.757038909103793e-06, |
| "loss": 2.7354, |
| "step": 862 |
| }, |
| { |
| "epoch": 0.9277076054823972, |
| "grad_norm": 10.504318237304688, |
| "learning_rate": 2.6780598725487214e-06, |
| "loss": 2.4165, |
| "step": 863 |
| }, |
| { |
| "epoch": 0.9287825853265251, |
| "grad_norm": 12.440912246704102, |
| "learning_rate": 2.6002132124258947e-06, |
| "loss": 2.0556, |
| "step": 864 |
| }, |
| { |
| "epoch": 0.9298575651706531, |
| "grad_norm": 15.771154403686523, |
| "learning_rate": 2.5234998345097238e-06, |
| "loss": 3.0948, |
| "step": 865 |
| }, |
| { |
| "epoch": 0.930932545014781, |
| "grad_norm": 13.663370132446289, |
| "learning_rate": 2.4479206313884784e-06, |
| "loss": 2.6857, |
| "step": 866 |
| }, |
| { |
| "epoch": 0.9320075248589089, |
| "grad_norm": 12.99375057220459, |
| "learning_rate": 2.3734764824538515e-06, |
| "loss": 2.46, |
| "step": 867 |
| }, |
| { |
| "epoch": 0.9330825047030368, |
| "grad_norm": 10.756906509399414, |
| "learning_rate": 2.300168253890833e-06, |
| "loss": 2.2951, |
| "step": 868 |
| }, |
| { |
| "epoch": 0.9341574845471647, |
| "grad_norm": 15.61550521850586, |
| "learning_rate": 2.2279967986674756e-06, |
| "loss": 2.947, |
| "step": 869 |
| }, |
| { |
| "epoch": 0.9352324643912927, |
| "grad_norm": 17.821706771850586, |
| "learning_rate": 2.1569629565251546e-06, |
| "loss": 2.9358, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.9363074442354206, |
| "grad_norm": 13.073902130126953, |
| "learning_rate": 2.0870675539686023e-06, |
| "loss": 2.665, |
| "step": 871 |
| }, |
| { |
| "epoch": 0.9373824240795485, |
| "grad_norm": 16.576457977294922, |
| "learning_rate": 2.0183114042564567e-06, |
| "loss": 3.3876, |
| "step": 872 |
| }, |
| { |
| "epoch": 0.9384574039236764, |
| "grad_norm": 12.26412296295166, |
| "learning_rate": 1.9506953073917365e-06, |
| "loss": 2.3773, |
| "step": 873 |
| }, |
| { |
| "epoch": 0.9395323837678043, |
| "grad_norm": 15.137334823608398, |
| "learning_rate": 1.8842200501124618e-06, |
| "loss": 2.353, |
| "step": 874 |
| }, |
| { |
| "epoch": 0.9406073636119323, |
| "grad_norm": 11.213092803955078, |
| "learning_rate": 1.818886405882636e-06, |
| "loss": 1.8275, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.9416823434560602, |
| "grad_norm": 12.951726913452148, |
| "learning_rate": 1.7546951348831441e-06, |
| "loss": 2.4801, |
| "step": 876 |
| }, |
| { |
| "epoch": 0.9427573233001881, |
| "grad_norm": 9.286628723144531, |
| "learning_rate": 1.6916469840029369e-06, |
| "loss": 1.8095, |
| "step": 877 |
| }, |
| { |
| "epoch": 0.9438323031443161, |
| "grad_norm": 16.789724349975586, |
| "learning_rate": 1.6297426868303378e-06, |
| "loss": 2.2362, |
| "step": 878 |
| }, |
| { |
| "epoch": 0.944907282988444, |
| "grad_norm": 9.946615219116211, |
| "learning_rate": 1.5689829636445496e-06, |
| "loss": 2.0806, |
| "step": 879 |
| }, |
| { |
| "epoch": 0.9459822628325719, |
| "grad_norm": 10.376627922058105, |
| "learning_rate": 1.5093685214072174e-06, |
| "loss": 2.7637, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.9470572426766998, |
| "grad_norm": 10.574774742126465, |
| "learning_rate": 1.4509000537541895e-06, |
| "loss": 2.6576, |
| "step": 881 |
| }, |
| { |
| "epoch": 0.9481322225208277, |
| "grad_norm": 19.413841247558594, |
| "learning_rate": 1.3935782409875476e-06, |
| "loss": 3.1755, |
| "step": 882 |
| }, |
| { |
| "epoch": 0.9492072023649557, |
| "grad_norm": 24.996675491333008, |
| "learning_rate": 1.337403750067545e-06, |
| "loss": 4.3345, |
| "step": 883 |
| }, |
| { |
| "epoch": 0.9502821822090836, |
| "grad_norm": 15.001642227172852, |
| "learning_rate": 1.2823772346050034e-06, |
| "loss": 3.8014, |
| "step": 884 |
| }, |
| { |
| "epoch": 0.9513571620532115, |
| "grad_norm": 20.97924041748047, |
| "learning_rate": 1.2284993348535723e-06, |
| "loss": 3.1809, |
| "step": 885 |
| }, |
| { |
| "epoch": 0.9524321418973394, |
| "grad_norm": 12.326496124267578, |
| "learning_rate": 1.1757706777023592e-06, |
| "loss": 2.0848, |
| "step": 886 |
| }, |
| { |
| "epoch": 0.9535071217414673, |
| "grad_norm": 16.77161979675293, |
| "learning_rate": 1.1241918766686122e-06, |
| "loss": 2.5849, |
| "step": 887 |
| }, |
| { |
| "epoch": 0.9545821015855953, |
| "grad_norm": 18.30064582824707, |
| "learning_rate": 1.0737635318905704e-06, |
| "loss": 2.8427, |
| "step": 888 |
| }, |
| { |
| "epoch": 0.9556570814297232, |
| "grad_norm": 16.708187103271484, |
| "learning_rate": 1.0244862301205249e-06, |
| "loss": 3.3412, |
| "step": 889 |
| }, |
| { |
| "epoch": 0.9567320612738511, |
| "grad_norm": 9.98150634765625, |
| "learning_rate": 9.763605447179137e-07, |
| "loss": 1.9673, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.957807041117979, |
| "grad_norm": 16.982563018798828, |
| "learning_rate": 9.293870356427259e-07, |
| "loss": 2.7158, |
| "step": 891 |
| }, |
| { |
| "epoch": 0.958882020962107, |
| "grad_norm": 14.91385555267334, |
| "learning_rate": 8.835662494489638e-07, |
| "loss": 2.4724, |
| "step": 892 |
| }, |
| { |
| "epoch": 0.9599570008062349, |
| "grad_norm": 11.67471981048584, |
| "learning_rate": 8.388987192782472e-07, |
| "loss": 2.0277, |
| "step": 893 |
| }, |
| { |
| "epoch": 0.9610319806503628, |
| "grad_norm": 17.66205596923828, |
| "learning_rate": 7.953849648536516e-07, |
| "loss": 3.0247, |
| "step": 894 |
| }, |
| { |
| "epoch": 0.9621069604944907, |
| "grad_norm": 12.348430633544922, |
| "learning_rate": 7.53025492473669e-07, |
| "loss": 2.1941, |
| "step": 895 |
| }, |
| { |
| "epoch": 0.9631819403386187, |
| "grad_norm": 11.82780647277832, |
| "learning_rate": 7.118207950062905e-07, |
| "loss": 1.9668, |
| "step": 896 |
| }, |
| { |
| "epoch": 0.9642569201827466, |
| "grad_norm": 11.388262748718262, |
| "learning_rate": 6.717713518832325e-07, |
| "loss": 1.7028, |
| "step": 897 |
| }, |
| { |
| "epoch": 0.9653319000268745, |
| "grad_norm": 20.69382095336914, |
| "learning_rate": 6.328776290944749e-07, |
| "loss": 2.4699, |
| "step": 898 |
| }, |
| { |
| "epoch": 0.9664068798710024, |
| "grad_norm": 16.286813735961914, |
| "learning_rate": 5.9514007918271e-07, |
| "loss": 2.5732, |
| "step": 899 |
| }, |
| { |
| "epoch": 0.9674818597151303, |
| "grad_norm": 16.142581939697266, |
| "learning_rate": 5.585591412381797e-07, |
| "loss": 3.1468, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.9685568395592583, |
| "grad_norm": 21.596254348754883, |
| "learning_rate": 5.231352408934686e-07, |
| "loss": 3.7028, |
| "step": 901 |
| }, |
| { |
| "epoch": 0.9696318194033862, |
| "grad_norm": 10.433320999145508, |
| "learning_rate": 4.88868790318675e-07, |
| "loss": 2.0005, |
| "step": 902 |
| }, |
| { |
| "epoch": 0.9707067992475141, |
| "grad_norm": 20.765460968017578, |
| "learning_rate": 4.557601882164808e-07, |
| "loss": 3.7903, |
| "step": 903 |
| }, |
| { |
| "epoch": 0.971781779091642, |
| "grad_norm": 18.080036163330078, |
| "learning_rate": 4.2380981981759994e-07, |
| "loss": 3.3676, |
| "step": 904 |
| }, |
| { |
| "epoch": 0.97285675893577, |
| "grad_norm": 14.852317810058594, |
| "learning_rate": 3.930180568762931e-07, |
| "loss": 2.6543, |
| "step": 905 |
| }, |
| { |
| "epoch": 0.9739317387798979, |
| "grad_norm": 18.983341217041016, |
| "learning_rate": 3.633852576659935e-07, |
| "loss": 3.433, |
| "step": 906 |
| }, |
| { |
| "epoch": 0.9750067186240258, |
| "grad_norm": 14.545281410217285, |
| "learning_rate": 3.3491176697517667e-07, |
| "loss": 2.89, |
| "step": 907 |
| }, |
| { |
| "epoch": 0.9760816984681537, |
| "grad_norm": 10.29315185546875, |
| "learning_rate": 3.0759791610335267e-07, |
| "loss": 2.0145, |
| "step": 908 |
| }, |
| { |
| "epoch": 0.9771566783122816, |
| "grad_norm": 21.322124481201172, |
| "learning_rate": 2.81444022857158e-07, |
| "loss": 3.5667, |
| "step": 909 |
| }, |
| { |
| "epoch": 0.9782316581564096, |
| "grad_norm": 13.829008102416992, |
| "learning_rate": 2.5645039154675864e-07, |
| "loss": 2.6689, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.9793066380005375, |
| "grad_norm": 15.563042640686035, |
| "learning_rate": 2.3261731298217514e-07, |
| "loss": 2.4102, |
| "step": 911 |
| }, |
| { |
| "epoch": 0.9803816178446654, |
| "grad_norm": 17.429529190063477, |
| "learning_rate": 2.099450644700407e-07, |
| "loss": 2.4791, |
| "step": 912 |
| }, |
| { |
| "epoch": 0.9814565976887933, |
| "grad_norm": 15.122339248657227, |
| "learning_rate": 1.8843390981024834e-07, |
| "loss": 2.1212, |
| "step": 913 |
| }, |
| { |
| "epoch": 0.9825315775329213, |
| "grad_norm": 17.688457489013672, |
| "learning_rate": 1.6808409929298663e-07, |
| "loss": 2.279, |
| "step": 914 |
| }, |
| { |
| "epoch": 0.9836065573770492, |
| "grad_norm": 17.063894271850586, |
| "learning_rate": 1.488958696957421e-07, |
| "loss": 3.7765, |
| "step": 915 |
| }, |
| { |
| "epoch": 0.9846815372211771, |
| "grad_norm": 10.769682884216309, |
| "learning_rate": 1.3086944428060132e-07, |
| "loss": 2.1393, |
| "step": 916 |
| }, |
| { |
| "epoch": 0.985756517065305, |
| "grad_norm": 10.24843692779541, |
| "learning_rate": 1.1400503279163088e-07, |
| "loss": 2.2815, |
| "step": 917 |
| }, |
| { |
| "epoch": 0.986831496909433, |
| "grad_norm": 13.591650009155273, |
| "learning_rate": 9.83028314524348e-08, |
| "loss": 2.809, |
| "step": 918 |
| }, |
| { |
| "epoch": 0.9879064767535609, |
| "grad_norm": 13.300793647766113, |
| "learning_rate": 8.376302296387861e-08, |
| "loss": 2.9568, |
| "step": 919 |
| }, |
| { |
| "epoch": 0.9889814565976888, |
| "grad_norm": 16.980257034301758, |
| "learning_rate": 7.038577650195777e-08, |
| "loss": 2.0696, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.9900564364418167, |
| "grad_norm": 17.34079360961914, |
| "learning_rate": 5.8171247715854696e-08, |
| "loss": 3.1045, |
| "step": 921 |
| }, |
| { |
| "epoch": 0.9911314162859446, |
| "grad_norm": 18.76333999633789, |
| "learning_rate": 4.711957872606254e-08, |
| "loss": 2.967, |
| "step": 922 |
| }, |
| { |
| "epoch": 0.9922063961300726, |
| "grad_norm": 15.434213638305664, |
| "learning_rate": 3.7230898122808665e-08, |
| "loss": 3.0553, |
| "step": 923 |
| }, |
| { |
| "epoch": 0.9932813759742005, |
| "grad_norm": 20.012191772460938, |
| "learning_rate": 2.850532096452252e-08, |
| "loss": 3.6372, |
| "step": 924 |
| }, |
| { |
| "epoch": 0.9943563558183284, |
| "grad_norm": 14.739432334899902, |
| "learning_rate": 2.0942948776481175e-08, |
| "loss": 1.9846, |
| "step": 925 |
| }, |
| { |
| "epoch": 0.9954313356624563, |
| "grad_norm": 14.258848190307617, |
| "learning_rate": 1.4543869549665801e-08, |
| "loss": 2.9188, |
| "step": 926 |
| }, |
| { |
| "epoch": 0.9965063155065843, |
| "grad_norm": 14.79078483581543, |
| "learning_rate": 9.308157739706946e-09, |
| "loss": 2.4791, |
| "step": 927 |
| }, |
| { |
| "epoch": 0.9975812953507122, |
| "grad_norm": 17.466236114501953, |
| "learning_rate": 5.23587426601857e-09, |
| "loss": 2.1995, |
| "step": 928 |
| }, |
| { |
| "epoch": 0.9986562751948401, |
| "grad_norm": 19.64586067199707, |
| "learning_rate": 2.327066511120801e-09, |
| "loss": 2.872, |
| "step": 929 |
| }, |
| { |
| "epoch": 0.999731255038968, |
| "grad_norm": 12.937053680419922, |
| "learning_rate": 5.817683200515233e-10, |
| "loss": 2.6175, |
| "step": 930 |
| }, |
| { |
| "epoch": 1.000806234883096, |
| "grad_norm": 16.165599822998047, |
| "learning_rate": 0.0, |
| "loss": 2.2581, |
| "step": 931 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 931, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 233, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 7022441851256832.0, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|