| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9958417169684776, | |
| "eval_steps": 500, | |
| "global_step": 232, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.004292421193829644, | |
| "grad_norm": 3.342827936598552, | |
| "learning_rate": 1.4285714285714286e-06, | |
| "loss": 0.679, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.008584842387659289, | |
| "grad_norm": 2.896501931914046, | |
| "learning_rate": 2.8571428571428573e-06, | |
| "loss": 0.6709, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.012877263581488933, | |
| "grad_norm": 3.3718581451101177, | |
| "learning_rate": 4.2857142857142855e-06, | |
| "loss": 0.6619, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.017169684775318578, | |
| "grad_norm": 2.529609856453942, | |
| "learning_rate": 5.7142857142857145e-06, | |
| "loss": 0.7753, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.021462105969148222, | |
| "grad_norm": 2.4044812444703103, | |
| "learning_rate": 7.1428571428571436e-06, | |
| "loss": 0.7833, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.025754527162977867, | |
| "grad_norm": 2.152377713157304, | |
| "learning_rate": 8.571428571428571e-06, | |
| "loss": 0.7411, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.03004694835680751, | |
| "grad_norm": 7.163921239673759, | |
| "learning_rate": 1e-05, | |
| "loss": 0.6751, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.034339369550637155, | |
| "grad_norm": 1.9811371567636475, | |
| "learning_rate": 9.999512620046523e-06, | |
| "loss": 0.6019, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.0386317907444668, | |
| "grad_norm": 2.2991427332066663, | |
| "learning_rate": 9.998050575201772e-06, | |
| "loss": 0.6195, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.042924211938296444, | |
| "grad_norm": 2.633277122254043, | |
| "learning_rate": 9.995614150494293e-06, | |
| "loss": 0.6773, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.04721663313212609, | |
| "grad_norm": 0.8137858553380385, | |
| "learning_rate": 9.992203820909906e-06, | |
| "loss": 0.44, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.05150905432595573, | |
| "grad_norm": 2.5789476047477495, | |
| "learning_rate": 9.987820251299121e-06, | |
| "loss": 0.6856, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.05580147551978538, | |
| "grad_norm": 2.325022123665024, | |
| "learning_rate": 9.982464296247523e-06, | |
| "loss": 0.6573, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.06009389671361502, | |
| "grad_norm": 2.1016133484431814, | |
| "learning_rate": 9.976136999909156e-06, | |
| "loss": 0.5795, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.06438631790744467, | |
| "grad_norm": 2.203575792232459, | |
| "learning_rate": 9.968839595802982e-06, | |
| "loss": 0.5512, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.06867873910127431, | |
| "grad_norm": 1.7649770138174061, | |
| "learning_rate": 9.960573506572391e-06, | |
| "loss": 0.5991, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.07297116029510396, | |
| "grad_norm": 2.125017710581645, | |
| "learning_rate": 9.951340343707852e-06, | |
| "loss": 0.6961, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.0772635814889336, | |
| "grad_norm": 1.9648109979705357, | |
| "learning_rate": 9.941141907232766e-06, | |
| "loss": 0.6274, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.08155600268276325, | |
| "grad_norm": 2.1430042253940997, | |
| "learning_rate": 9.929980185352525e-06, | |
| "loss": 0.5933, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.08584842387659289, | |
| "grad_norm": 2.187065629596637, | |
| "learning_rate": 9.91785735406693e-06, | |
| "loss": 0.6537, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.09014084507042254, | |
| "grad_norm": 2.0411377847693366, | |
| "learning_rate": 9.904775776745959e-06, | |
| "loss": 0.5277, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.09443326626425218, | |
| "grad_norm": 1.9222476925913479, | |
| "learning_rate": 9.890738003669029e-06, | |
| "loss": 0.6009, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.09872568745808183, | |
| "grad_norm": 2.202338617043183, | |
| "learning_rate": 9.875746771527817e-06, | |
| "loss": 0.6808, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.10301810865191147, | |
| "grad_norm": 1.8704298128204246, | |
| "learning_rate": 9.859805002892733e-06, | |
| "loss": 0.5834, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.10731052984574112, | |
| "grad_norm": 2.26606517803316, | |
| "learning_rate": 9.842915805643156e-06, | |
| "loss": 0.6416, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.11160295103957076, | |
| "grad_norm": 1.957724168370316, | |
| "learning_rate": 9.825082472361558e-06, | |
| "loss": 0.5565, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.1158953722334004, | |
| "grad_norm": 2.7304041309488367, | |
| "learning_rate": 9.806308479691595e-06, | |
| "loss": 0.6941, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.12018779342723004, | |
| "grad_norm": 2.252924472572446, | |
| "learning_rate": 9.786597487660336e-06, | |
| "loss": 0.6806, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.1244802146210597, | |
| "grad_norm": 2.208805819097276, | |
| "learning_rate": 9.765953338964736e-06, | |
| "loss": 0.6123, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.12877263581488935, | |
| "grad_norm": 2.0330562196938238, | |
| "learning_rate": 9.744380058222483e-06, | |
| "loss": 0.5966, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.13306505700871898, | |
| "grad_norm": 2.4286898255231533, | |
| "learning_rate": 9.721881851187406e-06, | |
| "loss": 0.6148, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.13735747820254862, | |
| "grad_norm": 1.911339918779555, | |
| "learning_rate": 9.698463103929542e-06, | |
| "loss": 0.5662, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.14164989939637826, | |
| "grad_norm": 2.0543803346952583, | |
| "learning_rate": 9.674128381980073e-06, | |
| "loss": 0.6602, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.14594232059020792, | |
| "grad_norm": 2.086982956085146, | |
| "learning_rate": 9.648882429441258e-06, | |
| "loss": 0.5432, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.15023474178403756, | |
| "grad_norm": 2.0901966575723474, | |
| "learning_rate": 9.622730168061568e-06, | |
| "loss": 0.5274, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.1545271629778672, | |
| "grad_norm": 4.381472469442544, | |
| "learning_rate": 9.595676696276173e-06, | |
| "loss": 0.583, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.15881958417169684, | |
| "grad_norm": 2.3265871885088503, | |
| "learning_rate": 9.567727288213005e-06, | |
| "loss": 0.5623, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.1631120053655265, | |
| "grad_norm": 2.0021378156582963, | |
| "learning_rate": 9.538887392664544e-06, | |
| "loss": 0.6318, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.16740442655935614, | |
| "grad_norm": 1.0984958441170714, | |
| "learning_rate": 9.50916263202557e-06, | |
| "loss": 0.4548, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.17169684775318578, | |
| "grad_norm": 1.8318936065536409, | |
| "learning_rate": 9.478558801197065e-06, | |
| "loss": 0.5985, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.17598926894701541, | |
| "grad_norm": 2.7994275799684076, | |
| "learning_rate": 9.44708186645649e-06, | |
| "loss": 0.5926, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.18028169014084508, | |
| "grad_norm": 2.072220455177539, | |
| "learning_rate": 9.414737964294636e-06, | |
| "loss": 0.5318, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.18457411133467472, | |
| "grad_norm": 2.2812531791006645, | |
| "learning_rate": 9.381533400219319e-06, | |
| "loss": 0.5948, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.18886653252850436, | |
| "grad_norm": 1.8766396474219533, | |
| "learning_rate": 9.347474647526095e-06, | |
| "loss": 0.5514, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.193158953722334, | |
| "grad_norm": 1.9908306217286644, | |
| "learning_rate": 9.312568346036288e-06, | |
| "loss": 0.5282, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.19745137491616366, | |
| "grad_norm": 2.2942225048556284, | |
| "learning_rate": 9.276821300802535e-06, | |
| "loss": 0.5875, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.2017437961099933, | |
| "grad_norm": 3.1824151916415087, | |
| "learning_rate": 9.24024048078213e-06, | |
| "loss": 0.6144, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.20603621730382293, | |
| "grad_norm": 0.992721768550404, | |
| "learning_rate": 9.202833017478421e-06, | |
| "loss": 0.4847, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.21032863849765257, | |
| "grad_norm": 1.9310743502843328, | |
| "learning_rate": 9.164606203550498e-06, | |
| "loss": 0.5974, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.21462105969148224, | |
| "grad_norm": 2.1730863843947392, | |
| "learning_rate": 9.125567491391476e-06, | |
| "loss": 0.6293, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.21891348088531187, | |
| "grad_norm": 0.8680338310564839, | |
| "learning_rate": 9.085724491675642e-06, | |
| "loss": 0.4757, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.2232059020791415, | |
| "grad_norm": 2.1915863963781916, | |
| "learning_rate": 9.045084971874738e-06, | |
| "loss": 0.5925, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.22749832327297115, | |
| "grad_norm": 2.3492931220336195, | |
| "learning_rate": 9.003656854743667e-06, | |
| "loss": 0.6402, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.2317907444668008, | |
| "grad_norm": 0.8164522682092396, | |
| "learning_rate": 8.961448216775955e-06, | |
| "loss": 0.4382, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.23608316566063045, | |
| "grad_norm": 2.2933091603858418, | |
| "learning_rate": 8.9184672866292e-06, | |
| "loss": 0.5367, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.2403755868544601, | |
| "grad_norm": 2.051540581160434, | |
| "learning_rate": 8.874722443520898e-06, | |
| "loss": 0.5736, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.24466800804828973, | |
| "grad_norm": 1.9508118237454382, | |
| "learning_rate": 8.83022221559489e-06, | |
| "loss": 0.6196, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.2489604292421194, | |
| "grad_norm": 2.4166528722303036, | |
| "learning_rate": 8.784975278258783e-06, | |
| "loss": 0.5279, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.25325285043594903, | |
| "grad_norm": 1.9719959484618856, | |
| "learning_rate": 8.73899045249266e-06, | |
| "loss": 0.5248, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.2575452716297787, | |
| "grad_norm": 2.088094168914141, | |
| "learning_rate": 8.692276703129421e-06, | |
| "loss": 0.5436, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.2618376928236083, | |
| "grad_norm": 2.347294342285649, | |
| "learning_rate": 8.644843137107058e-06, | |
| "loss": 0.7, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.26613011401743797, | |
| "grad_norm": 2.152662276659629, | |
| "learning_rate": 8.596699001693257e-06, | |
| "loss": 0.5052, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.2704225352112676, | |
| "grad_norm": 2.1260026022790552, | |
| "learning_rate": 8.547853682682605e-06, | |
| "loss": 0.6121, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.27471495640509724, | |
| "grad_norm": 1.9148541801577235, | |
| "learning_rate": 8.498316702566828e-06, | |
| "loss": 0.5516, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.2790073775989269, | |
| "grad_norm": 2.134479085245887, | |
| "learning_rate": 8.44809771867835e-06, | |
| "loss": 0.6297, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.2832997987927565, | |
| "grad_norm": 1.776446730314099, | |
| "learning_rate": 8.397206521307584e-06, | |
| "loss": 0.5337, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.2875922199865862, | |
| "grad_norm": 1.955780895559264, | |
| "learning_rate": 8.345653031794292e-06, | |
| "loss": 0.6187, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.29188464118041585, | |
| "grad_norm": 2.065677438153802, | |
| "learning_rate": 8.293447300593402e-06, | |
| "loss": 0.4712, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.29617706237424546, | |
| "grad_norm": 1.929637625719691, | |
| "learning_rate": 8.240599505315656e-06, | |
| "loss": 0.5638, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.3004694835680751, | |
| "grad_norm": 2.1234613246255294, | |
| "learning_rate": 8.18711994874345e-06, | |
| "loss": 0.5622, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.3047619047619048, | |
| "grad_norm": 2.4974658951008935, | |
| "learning_rate": 8.133019056822303e-06, | |
| "loss": 0.5656, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.3090543259557344, | |
| "grad_norm": 2.1148121496211028, | |
| "learning_rate": 8.078307376628292e-06, | |
| "loss": 0.5706, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.31334674714956406, | |
| "grad_norm": 1.9637387645317304, | |
| "learning_rate": 8.022995574311876e-06, | |
| "loss": 0.6431, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.3176391683433937, | |
| "grad_norm": 2.2321260500467996, | |
| "learning_rate": 7.967094433018508e-06, | |
| "loss": 0.6038, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.32193158953722334, | |
| "grad_norm": 2.1333068986028136, | |
| "learning_rate": 7.910614850786448e-06, | |
| "loss": 0.5666, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.326224010731053, | |
| "grad_norm": 2.195983244813881, | |
| "learning_rate": 7.85356783842216e-06, | |
| "loss": 0.5877, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.3305164319248826, | |
| "grad_norm": 2.9185529975089644, | |
| "learning_rate": 7.795964517353734e-06, | |
| "loss": 0.5221, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.3348088531187123, | |
| "grad_norm": 2.1356064453519363, | |
| "learning_rate": 7.737816117462752e-06, | |
| "loss": 0.5536, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.33910127431254194, | |
| "grad_norm": 0.9888358843543892, | |
| "learning_rate": 7.679133974894984e-06, | |
| "loss": 0.427, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.34339369550637155, | |
| "grad_norm": 0.9532274184167417, | |
| "learning_rate": 7.619929529850397e-06, | |
| "loss": 0.4607, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.3476861167002012, | |
| "grad_norm": 1.969695168470127, | |
| "learning_rate": 7.560214324352858e-06, | |
| "loss": 0.525, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.35197853789403083, | |
| "grad_norm": 2.2331654792734272, | |
| "learning_rate": 7.500000000000001e-06, | |
| "loss": 0.6041, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.3562709590878605, | |
| "grad_norm": 2.4884565474083606, | |
| "learning_rate": 7.4392982956936644e-06, | |
| "loss": 0.5886, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.36056338028169016, | |
| "grad_norm": 1.8314120774170615, | |
| "learning_rate": 7.378121045351378e-06, | |
| "loss": 0.5349, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.36485580147551977, | |
| "grad_norm": 2.1762807073421127, | |
| "learning_rate": 7.31648017559931e-06, | |
| "loss": 0.5606, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.36914822266934944, | |
| "grad_norm": 2.422109142275802, | |
| "learning_rate": 7.254387703447154e-06, | |
| "loss": 0.5431, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.3734406438631791, | |
| "grad_norm": 1.9079552738678454, | |
| "learning_rate": 7.191855733945388e-06, | |
| "loss": 0.5553, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.3777330650570087, | |
| "grad_norm": 1.760125442274873, | |
| "learning_rate": 7.128896457825364e-06, | |
| "loss": 0.5808, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.3820254862508384, | |
| "grad_norm": 0.9979331285164651, | |
| "learning_rate": 7.06552214912271e-06, | |
| "loss": 0.4579, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.386317907444668, | |
| "grad_norm": 1.8496753131089991, | |
| "learning_rate": 7.0017451627844765e-06, | |
| "loss": 0.591, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.39061032863849765, | |
| "grad_norm": 2.3343608471053265, | |
| "learning_rate": 6.9375779322605154e-06, | |
| "loss": 0.6091, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.3949027498323273, | |
| "grad_norm": 1.9668213618430554, | |
| "learning_rate": 6.873032967079562e-06, | |
| "loss": 0.6944, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.3991951710261569, | |
| "grad_norm": 1.8725242772507493, | |
| "learning_rate": 6.808122850410461e-06, | |
| "loss": 0.6055, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.4034875922199866, | |
| "grad_norm": 1.9966003945224369, | |
| "learning_rate": 6.7428602366090764e-06, | |
| "loss": 0.5595, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.40778001341381626, | |
| "grad_norm": 3.053074851635477, | |
| "learning_rate": 6.677257848751276e-06, | |
| "loss": 0.5857, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.41207243460764587, | |
| "grad_norm": 2.010221226340498, | |
| "learning_rate": 6.611328476152557e-06, | |
| "loss": 0.5995, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.41636485580147553, | |
| "grad_norm": 1.742641730594434, | |
| "learning_rate": 6.545084971874738e-06, | |
| "loss": 0.5389, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.42065727699530514, | |
| "grad_norm": 0.7858607450939203, | |
| "learning_rate": 6.4785402502202345e-06, | |
| "loss": 0.4598, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.4249496981891348, | |
| "grad_norm": 1.9026874984032676, | |
| "learning_rate": 6.411707284214384e-06, | |
| "loss": 0.5824, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.42924211938296447, | |
| "grad_norm": 1.7972924537621116, | |
| "learning_rate": 6.344599103076329e-06, | |
| "loss": 0.5605, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.4335345405767941, | |
| "grad_norm": 1.6566505050926905, | |
| "learning_rate": 6.277228789678953e-06, | |
| "loss": 0.55, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.43782696177062375, | |
| "grad_norm": 0.7871200662869098, | |
| "learning_rate": 6.209609477998339e-06, | |
| "loss": 0.4487, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.4421193829644534, | |
| "grad_norm": 2.118924742242862, | |
| "learning_rate": 6.141754350553279e-06, | |
| "loss": 0.5791, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.446411804158283, | |
| "grad_norm": 1.9417216221368725, | |
| "learning_rate": 6.073676635835317e-06, | |
| "loss": 0.5321, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.4507042253521127, | |
| "grad_norm": 0.8301362013675532, | |
| "learning_rate": 6.005389605729824e-06, | |
| "loss": 0.471, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.4549966465459423, | |
| "grad_norm": 1.9216842351031653, | |
| "learning_rate": 5.936906572928625e-06, | |
| "loss": 0.4981, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.45928906773977196, | |
| "grad_norm": 2.1309977101502913, | |
| "learning_rate": 5.8682408883346535e-06, | |
| "loss": 0.523, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.4635814889336016, | |
| "grad_norm": 6.307531137311205, | |
| "learning_rate": 5.799405938459175e-06, | |
| "loss": 0.5631, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.46787391012743124, | |
| "grad_norm": 0.8161016261331794, | |
| "learning_rate": 5.730415142812059e-06, | |
| "loss": 0.4739, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.4721663313212609, | |
| "grad_norm": 1.9009887448608773, | |
| "learning_rate": 5.661281951285613e-06, | |
| "loss": 0.6272, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.47645875251509057, | |
| "grad_norm": 2.2577462172723233, | |
| "learning_rate": 5.592019841532507e-06, | |
| "loss": 0.5796, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.4807511737089202, | |
| "grad_norm": 2.1012817228049814, | |
| "learning_rate": 5.522642316338268e-06, | |
| "loss": 0.6074, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.48504359490274984, | |
| "grad_norm": 3.169512060210574, | |
| "learning_rate": 5.453162900988902e-06, | |
| "loss": 0.5817, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.48933601609657945, | |
| "grad_norm": 0.7965146219233805, | |
| "learning_rate": 5.383595140634093e-06, | |
| "loss": 0.4628, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.4936284372904091, | |
| "grad_norm": 2.528449046345642, | |
| "learning_rate": 5.3139525976465675e-06, | |
| "loss": 0.6113, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.4979208584842388, | |
| "grad_norm": 2.169410644155486, | |
| "learning_rate": 5.244248848978067e-06, | |
| "loss": 0.6655, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.5022132796780684, | |
| "grad_norm": 2.114218695353453, | |
| "learning_rate": 5.174497483512506e-06, | |
| "loss": 0.5438, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.5065057008718981, | |
| "grad_norm": 2.322736697640736, | |
| "learning_rate": 5.1047120994167855e-06, | |
| "loss": 0.619, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.5107981220657277, | |
| "grad_norm": 1.9355642603619068, | |
| "learning_rate": 5.034906301489808e-06, | |
| "loss": 0.568, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.5150905432595574, | |
| "grad_norm": 0.8275929454164218, | |
| "learning_rate": 4.965093698510192e-06, | |
| "loss": 0.4642, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.5193829644533869, | |
| "grad_norm": 0.7826606670717038, | |
| "learning_rate": 4.895287900583216e-06, | |
| "loss": 0.428, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.5236753856472166, | |
| "grad_norm": 2.227565392356624, | |
| "learning_rate": 4.825502516487497e-06, | |
| "loss": 0.616, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.5279678068410463, | |
| "grad_norm": 1.9049065560319947, | |
| "learning_rate": 4.755751151021934e-06, | |
| "loss": 0.6396, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.5322602280348759, | |
| "grad_norm": 2.360278324447153, | |
| "learning_rate": 4.686047402353433e-06, | |
| "loss": 0.581, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.5365526492287056, | |
| "grad_norm": 2.1436744795832463, | |
| "learning_rate": 4.6164048593659076e-06, | |
| "loss": 0.6027, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.5408450704225352, | |
| "grad_norm": 2.0687980774005856, | |
| "learning_rate": 4.546837099011101e-06, | |
| "loss": 0.5455, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.5451374916163648, | |
| "grad_norm": 2.094975644434325, | |
| "learning_rate": 4.477357683661734e-06, | |
| "loss": 0.5659, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.5494299128101945, | |
| "grad_norm": 2.183849966879906, | |
| "learning_rate": 4.4079801584674955e-06, | |
| "loss": 0.578, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.5537223340040242, | |
| "grad_norm": 1.8848396606946802, | |
| "learning_rate": 4.3387180487143875e-06, | |
| "loss": 0.5052, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.5580147551978538, | |
| "grad_norm": 0.843007824971089, | |
| "learning_rate": 4.269584857187942e-06, | |
| "loss": 0.4997, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.5623071763916835, | |
| "grad_norm": 2.763950937739874, | |
| "learning_rate": 4.200594061540827e-06, | |
| "loss": 0.5819, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.566599597585513, | |
| "grad_norm": 1.7630340741332657, | |
| "learning_rate": 4.131759111665349e-06, | |
| "loss": 0.604, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.5708920187793427, | |
| "grad_norm": 3.2094078977714897, | |
| "learning_rate": 4.063093427071376e-06, | |
| "loss": 0.6265, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.5751844399731724, | |
| "grad_norm": 1.8312046959677455, | |
| "learning_rate": 3.994610394270178e-06, | |
| "loss": 0.5885, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.579476861167002, | |
| "grad_norm": 1.8491871843778356, | |
| "learning_rate": 3.926323364164684e-06, | |
| "loss": 0.634, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.5837692823608317, | |
| "grad_norm": 2.6473671182169167, | |
| "learning_rate": 3.8582456494467214e-06, | |
| "loss": 0.6355, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.5880617035546613, | |
| "grad_norm": 2.999849822112049, | |
| "learning_rate": 3.790390522001662e-06, | |
| "loss": 0.529, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.5923541247484909, | |
| "grad_norm": 1.9445772581815945, | |
| "learning_rate": 3.7227712103210485e-06, | |
| "loss": 0.5575, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.5966465459423206, | |
| "grad_norm": 1.8104560751827103, | |
| "learning_rate": 3.655400896923672e-06, | |
| "loss": 0.5254, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.6009389671361502, | |
| "grad_norm": 2.3671152557639346, | |
| "learning_rate": 3.5882927157856175e-06, | |
| "loss": 0.5583, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.6052313883299799, | |
| "grad_norm": 2.074346620633345, | |
| "learning_rate": 3.521459749779769e-06, | |
| "loss": 0.6084, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.6095238095238096, | |
| "grad_norm": 2.035892537869217, | |
| "learning_rate": 3.4549150281252635e-06, | |
| "loss": 0.5832, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.6138162307176391, | |
| "grad_norm": 2.1099675037548966, | |
| "learning_rate": 3.3886715238474454e-06, | |
| "loss": 0.5579, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.6181086519114688, | |
| "grad_norm": 2.0468759829486443, | |
| "learning_rate": 3.322742151248726e-06, | |
| "loss": 0.5848, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.6224010731052985, | |
| "grad_norm": 1.9674918076912449, | |
| "learning_rate": 3.2571397633909252e-06, | |
| "loss": 0.5398, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.6266934942991281, | |
| "grad_norm": 1.7459454556549392, | |
| "learning_rate": 3.1918771495895395e-06, | |
| "loss": 0.6756, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.6309859154929578, | |
| "grad_norm": 0.7735085423697842, | |
| "learning_rate": 3.12696703292044e-06, | |
| "loss": 0.4194, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.6352783366867873, | |
| "grad_norm": 1.820298479603609, | |
| "learning_rate": 3.0624220677394854e-06, | |
| "loss": 0.5858, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.639570757880617, | |
| "grad_norm": 3.1630846211682, | |
| "learning_rate": 2.9982548372155264e-06, | |
| "loss": 0.5303, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.6438631790744467, | |
| "grad_norm": 2.413654673008136, | |
| "learning_rate": 2.934477850877292e-06, | |
| "loss": 0.5315, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.6481556002682763, | |
| "grad_norm": 2.508188149902217, | |
| "learning_rate": 2.871103542174637e-06, | |
| "loss": 0.5468, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.652448021462106, | |
| "grad_norm": 1.801696332460669, | |
| "learning_rate": 2.8081442660546126e-06, | |
| "loss": 0.5817, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.6567404426559356, | |
| "grad_norm": 2.26133822829944, | |
| "learning_rate": 2.7456122965528475e-06, | |
| "loss": 0.522, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.6610328638497652, | |
| "grad_norm": 2.4471397871687834, | |
| "learning_rate": 2.683519824400693e-06, | |
| "loss": 0.5892, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.6653252850435949, | |
| "grad_norm": 1.9131453194028882, | |
| "learning_rate": 2.6218789546486235e-06, | |
| "loss": 0.5554, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.6696177062374246, | |
| "grad_norm": 2.4034600764131606, | |
| "learning_rate": 2.560701704306336e-06, | |
| "loss": 0.5969, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.6739101274312542, | |
| "grad_norm": 2.1929545250468423, | |
| "learning_rate": 2.5000000000000015e-06, | |
| "loss": 0.5703, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.6782025486250839, | |
| "grad_norm": 2.2198350864402348, | |
| "learning_rate": 2.4397856756471435e-06, | |
| "loss": 0.5812, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.6824949698189134, | |
| "grad_norm": 2.0128371852734332, | |
| "learning_rate": 2.380070470149605e-06, | |
| "loss": 0.6307, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.6867873910127431, | |
| "grad_norm": 2.217606056909539, | |
| "learning_rate": 2.320866025105016e-06, | |
| "loss": 0.5601, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.6910798122065728, | |
| "grad_norm": 2.0541472609321065, | |
| "learning_rate": 2.2621838825372496e-06, | |
| "loss": 0.6326, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.6953722334004024, | |
| "grad_norm": 1.7762192766929534, | |
| "learning_rate": 2.204035482646267e-06, | |
| "loss": 0.501, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.6996646545942321, | |
| "grad_norm": 2.4025706210449322, | |
| "learning_rate": 2.146432161577842e-06, | |
| "loss": 0.5587, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.7039570757880617, | |
| "grad_norm": 3.686601475649348, | |
| "learning_rate": 2.0893851492135536e-06, | |
| "loss": 0.5732, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.7082494969818913, | |
| "grad_norm": 1.8617954191026609, | |
| "learning_rate": 2.0329055669814936e-06, | |
| "loss": 0.5018, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.712541918175721, | |
| "grad_norm": 3.1015155924797804, | |
| "learning_rate": 1.977004425688126e-06, | |
| "loss": 0.6083, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.7168343393695507, | |
| "grad_norm": 11.133598367985865, | |
| "learning_rate": 1.9216926233717087e-06, | |
| "loss": 0.5543, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.7211267605633803, | |
| "grad_norm": 1.8192465630367278, | |
| "learning_rate": 1.8669809431776991e-06, | |
| "loss": 0.5863, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.7254191817572099, | |
| "grad_norm": 2.0788969216539757, | |
| "learning_rate": 1.8128800512565514e-06, | |
| "loss": 0.5291, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.7297116029510395, | |
| "grad_norm": 1.8368930044145042, | |
| "learning_rate": 1.7594004946843458e-06, | |
| "loss": 0.5378, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.7340040241448692, | |
| "grad_norm": 2.0502782524895315, | |
| "learning_rate": 1.7065526994065973e-06, | |
| "loss": 0.5478, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.7382964453386989, | |
| "grad_norm": 1.7763584764334766, | |
| "learning_rate": 1.6543469682057105e-06, | |
| "loss": 0.5448, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.7425888665325285, | |
| "grad_norm": 1.7918310960966537, | |
| "learning_rate": 1.6027934786924187e-06, | |
| "loss": 0.6076, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.7468812877263582, | |
| "grad_norm": 2.3563140946836523, | |
| "learning_rate": 1.551902281321651e-06, | |
| "loss": 0.565, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.7511737089201878, | |
| "grad_norm": 1.8987524052104983, | |
| "learning_rate": 1.5016832974331725e-06, | |
| "loss": 0.5367, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.7554661301140174, | |
| "grad_norm": 1.9189932438009185, | |
| "learning_rate": 1.4521463173173966e-06, | |
| "loss": 0.5435, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.7597585513078471, | |
| "grad_norm": 2.42896938145852, | |
| "learning_rate": 1.4033009983067454e-06, | |
| "loss": 0.5141, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.7640509725016768, | |
| "grad_norm": 2.5249548327367766, | |
| "learning_rate": 1.3551568628929434e-06, | |
| "loss": 0.674, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.7683433936955064, | |
| "grad_norm": 2.806361559097513, | |
| "learning_rate": 1.3077232968705805e-06, | |
| "loss": 0.5517, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.772635814889336, | |
| "grad_norm": 2.2153752757917555, | |
| "learning_rate": 1.2610095475073415e-06, | |
| "loss": 0.6408, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.7769282360831656, | |
| "grad_norm": 2.5628201933255124, | |
| "learning_rate": 1.2150247217412186e-06, | |
| "loss": 0.5957, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.7812206572769953, | |
| "grad_norm": 2.079321530887709, | |
| "learning_rate": 1.1697777844051105e-06, | |
| "loss": 0.6155, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.785513078470825, | |
| "grad_norm": 3.2999932447363416, | |
| "learning_rate": 1.1252775564791023e-06, | |
| "loss": 0.5515, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.7898054996646546, | |
| "grad_norm": 0.7905984900012323, | |
| "learning_rate": 1.0815327133708015e-06, | |
| "loss": 0.4313, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.7940979208584842, | |
| "grad_norm": 1.9368152867312256, | |
| "learning_rate": 1.0385517832240472e-06, | |
| "loss": 0.6071, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.7983903420523139, | |
| "grad_norm": 2.366580720258878, | |
| "learning_rate": 9.963431452563331e-07, | |
| "loss": 0.5578, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.8026827632461435, | |
| "grad_norm": 2.3822261396548576, | |
| "learning_rate": 9.549150281252633e-07, | |
| "loss": 0.499, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.8069751844399732, | |
| "grad_norm": 1.7417985644400509, | |
| "learning_rate": 9.142755083243577e-07, | |
| "loss": 0.5696, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.8112676056338028, | |
| "grad_norm": 3.9103317258184376, | |
| "learning_rate": 8.744325086085248e-07, | |
| "loss": 0.5079, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.8155600268276325, | |
| "grad_norm": 3.0678763221012586, | |
| "learning_rate": 8.353937964495029e-07, | |
| "loss": 0.5418, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.8198524480214621, | |
| "grad_norm": 2.042653243276808, | |
| "learning_rate": 7.971669825215789e-07, | |
| "loss": 0.5748, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.8241448692152917, | |
| "grad_norm": 2.39821396059648, | |
| "learning_rate": 7.597595192178702e-07, | |
| "loss": 0.5428, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.8284372904091214, | |
| "grad_norm": 2.762719441820346, | |
| "learning_rate": 7.23178699197467e-07, | |
| "loss": 0.5673, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.8327297116029511, | |
| "grad_norm": 1.9044906325399893, | |
| "learning_rate": 6.874316539637127e-07, | |
| "loss": 0.5932, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.8370221327967807, | |
| "grad_norm": 1.856435008918489, | |
| "learning_rate": 6.52525352473905e-07, | |
| "loss": 0.5787, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.8413145539906103, | |
| "grad_norm": 2.3652417447261995, | |
| "learning_rate": 6.184665997806832e-07, | |
| "loss": 0.5167, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.84560697518444, | |
| "grad_norm": 2.723770526598716, | |
| "learning_rate": 5.852620357053651e-07, | |
| "loss": 0.571, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.8498993963782696, | |
| "grad_norm": 2.4817466195220437, | |
| "learning_rate": 5.529181335435124e-07, | |
| "loss": 0.58, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.8541918175720993, | |
| "grad_norm": 2.0455723374907397, | |
| "learning_rate": 5.214411988029355e-07, | |
| "loss": 0.5313, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.8584842387659289, | |
| "grad_norm": 1.9318379229380933, | |
| "learning_rate": 4.908373679744316e-07, | |
| "loss": 0.5439, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.8627766599597585, | |
| "grad_norm": 0.7959253630064103, | |
| "learning_rate": 4.6111260733545714e-07, | |
| "loss": 0.4454, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.8670690811535882, | |
| "grad_norm": 1.9913907969524394, | |
| "learning_rate": 4.322727117869951e-07, | |
| "loss": 0.5008, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.8713615023474178, | |
| "grad_norm": 2.1458695326057082, | |
| "learning_rate": 4.043233037238281e-07, | |
| "loss": 0.5459, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.8756539235412475, | |
| "grad_norm": 1.9619666249914482, | |
| "learning_rate": 3.772698319384349e-07, | |
| "loss": 0.4999, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.8799463447350772, | |
| "grad_norm": 4.3401484961367744, | |
| "learning_rate": 3.511175705587433e-07, | |
| "loss": 0.5758, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.8842387659289068, | |
| "grad_norm": 2.5430076829964174, | |
| "learning_rate": 3.258716180199278e-07, | |
| "loss": 0.5761, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.8885311871227364, | |
| "grad_norm": 0.767231711493243, | |
| "learning_rate": 3.015368960704584e-07, | |
| "loss": 0.4614, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.892823608316566, | |
| "grad_norm": 2.193097249503329, | |
| "learning_rate": 2.7811814881259503e-07, | |
| "loss": 0.6247, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.8971160295103957, | |
| "grad_norm": 1.9331493033717462, | |
| "learning_rate": 2.556199417775174e-07, | |
| "loss": 0.56, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.9014084507042254, | |
| "grad_norm": 2.057966480432949, | |
| "learning_rate": 2.3404666103526542e-07, | |
| "loss": 0.5618, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.905700871898055, | |
| "grad_norm": 2.351596162489054, | |
| "learning_rate": 2.134025123396638e-07, | |
| "loss": 0.4834, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.9099932930918846, | |
| "grad_norm": 1.9120169610557864, | |
| "learning_rate": 1.9369152030840553e-07, | |
| "loss": 0.5541, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.9142857142857143, | |
| "grad_norm": 2.2049446195768763, | |
| "learning_rate": 1.7491752763844294e-07, | |
| "loss": 0.5889, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.9185781354795439, | |
| "grad_norm": 2.0434712402893145, | |
| "learning_rate": 1.5708419435684463e-07, | |
| "loss": 0.5638, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.9228705566733736, | |
| "grad_norm": 2.1757829918674916, | |
| "learning_rate": 1.4019499710726913e-07, | |
| "loss": 0.642, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.9271629778672033, | |
| "grad_norm": 1.9838844707633234, | |
| "learning_rate": 1.2425322847218368e-07, | |
| "loss": 0.6747, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.9314553990610329, | |
| "grad_norm": 2.3027124358123836, | |
| "learning_rate": 1.0926199633097156e-07, | |
| "loss": 0.5337, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.9357478202548625, | |
| "grad_norm": 1.8834569228162623, | |
| "learning_rate": 9.522422325404234e-08, | |
| "loss": 0.4917, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.9400402414486921, | |
| "grad_norm": 1.8818328820882426, | |
| "learning_rate": 8.214264593307097e-08, | |
| "loss": 0.5806, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.9443326626425218, | |
| "grad_norm": 2.121602640833575, | |
| "learning_rate": 7.001981464747565e-08, | |
| "loss": 0.6032, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.9486250838363515, | |
| "grad_norm": 1.7145222003788387, | |
| "learning_rate": 5.8858092767236084e-08, | |
| "loss": 0.5455, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.9529175050301811, | |
| "grad_norm": 1.9516038348847442, | |
| "learning_rate": 4.865965629214819e-08, | |
| "loss": 0.4984, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.9572099262240107, | |
| "grad_norm": 2.156767335001454, | |
| "learning_rate": 3.9426493427611177e-08, | |
| "loss": 0.5284, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.9615023474178404, | |
| "grad_norm": 1.8089215543018735, | |
| "learning_rate": 3.1160404197018155e-08, | |
| "loss": 0.5852, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.96579476861167, | |
| "grad_norm": 3.2819358158442062, | |
| "learning_rate": 2.386300009084408e-08, | |
| "loss": 0.535, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.9700871898054997, | |
| "grad_norm": 1.7304415336384433, | |
| "learning_rate": 1.753570375247815e-08, | |
| "loss": 0.5152, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.9743796109993293, | |
| "grad_norm": 2.7057456233183785, | |
| "learning_rate": 1.2179748700879013e-08, | |
| "loss": 0.5592, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.9786720321931589, | |
| "grad_norm": 2.0005265863891872, | |
| "learning_rate": 7.796179090094891e-09, | |
| "loss": 0.548, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.9829644533869886, | |
| "grad_norm": 3.129587457460373, | |
| "learning_rate": 4.385849505708084e-09, | |
| "loss": 0.5154, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.9872568745808182, | |
| "grad_norm": 2.082407038336782, | |
| "learning_rate": 1.9494247982282386e-09, | |
| "loss": 0.5263, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.9915492957746479, | |
| "grad_norm": 2.035928140228953, | |
| "learning_rate": 4.87379953478806e-10, | |
| "loss": 0.5001, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.9958417169684776, | |
| "grad_norm": 1.9493320834510575, | |
| "learning_rate": 0.0, | |
| "loss": 0.5539, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.9958417169684776, | |
| "step": 232, | |
| "total_flos": 57740051464192.0, | |
| "train_loss": 0.5700135146235598, | |
| "train_runtime": 22473.305, | |
| "train_samples_per_second": 1.327, | |
| "train_steps_per_second": 0.01 | |
| } | |
| ], | |
| "logging_steps": 1.0, | |
| "max_steps": 232, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 5000.0, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": false, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 57740051464192.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |