| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 5.0, | |
| "eval_steps": 500, | |
| "global_step": 47455, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.00010536297545042672, | |
| "grad_norm": 8.704546928405762, | |
| "learning_rate": 0.0, | |
| "loss": 3.1115, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.010536297545042672, | |
| "grad_norm": 0.45912429690361023, | |
| "learning_rate": 2.085967130214918e-05, | |
| "loss": 2.4969, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.021072595090085345, | |
| "grad_norm": 0.45048093795776367, | |
| "learning_rate": 4.193004635482512e-05, | |
| "loss": 2.1901, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.03160889263512802, | |
| "grad_norm": 0.4515824615955353, | |
| "learning_rate": 6.300042140750104e-05, | |
| "loss": 2.1404, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.04214519018017069, | |
| "grad_norm": 0.44400346279144287, | |
| "learning_rate": 8.4070796460177e-05, | |
| "loss": 2.1252, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.05268148772521336, | |
| "grad_norm": 0.43594038486480713, | |
| "learning_rate": 0.00010514117151285294, | |
| "loss": 2.1131, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.06321778527025604, | |
| "grad_norm": 0.48596614599227905, | |
| "learning_rate": 0.00012621154656552885, | |
| "loss": 2.1137, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.0737540828152987, | |
| "grad_norm": 0.4419753849506378, | |
| "learning_rate": 0.0001472819216182048, | |
| "loss": 2.1132, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.08429038036034138, | |
| "grad_norm": 0.4662840962409973, | |
| "learning_rate": 0.00016835229667088076, | |
| "loss": 2.0996, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.09482667790538404, | |
| "grad_norm": 0.5108239054679871, | |
| "learning_rate": 0.00018942267172355668, | |
| "loss": 2.097, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.10536297545042672, | |
| "grad_norm": 0.4582119584083557, | |
| "learning_rate": 0.00021049304677623262, | |
| "loss": 2.1024, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.1158992729954694, | |
| "grad_norm": 0.4549405574798584, | |
| "learning_rate": 0.00023156342182890856, | |
| "loss": 2.0953, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.12643557054051208, | |
| "grad_norm": 0.5161290168762207, | |
| "learning_rate": 0.0002526337968815845, | |
| "loss": 2.0932, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.13697186808555473, | |
| "grad_norm": 0.5365496277809143, | |
| "learning_rate": 0.00027370417193426044, | |
| "loss": 2.0952, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.1475081656305974, | |
| "grad_norm": 0.5388957858085632, | |
| "learning_rate": 0.0002947745469869364, | |
| "loss": 2.0908, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.15804446317564008, | |
| "grad_norm": 0.46142223477363586, | |
| "learning_rate": 0.00031584492203961227, | |
| "loss": 2.0942, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.16858076072068276, | |
| "grad_norm": 0.46338245272636414, | |
| "learning_rate": 0.00033691529709228824, | |
| "loss": 2.0844, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.17911705826572544, | |
| "grad_norm": 0.48414379358291626, | |
| "learning_rate": 0.0003579856721449642, | |
| "loss": 2.1006, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.1896533558107681, | |
| "grad_norm": 0.5171504020690918, | |
| "learning_rate": 0.0003790560471976401, | |
| "loss": 2.0942, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.20018965335581076, | |
| "grad_norm": 0.4922062158584595, | |
| "learning_rate": 0.0004001264222503161, | |
| "loss": 2.0904, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.21072595090085344, | |
| "grad_norm": 0.6023987531661987, | |
| "learning_rate": 0.000421196797302992, | |
| "loss": 2.0961, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.22126224844589612, | |
| "grad_norm": 0.5345625281333923, | |
| "learning_rate": 0.0004422671723556679, | |
| "loss": 2.0952, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.2317985459909388, | |
| "grad_norm": 0.5566565990447998, | |
| "learning_rate": 0.0004633375474083439, | |
| "loss": 2.0977, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.24233484353598145, | |
| "grad_norm": 0.5522327423095703, | |
| "learning_rate": 0.00048440792246101985, | |
| "loss": 2.1047, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.25287114108102415, | |
| "grad_norm": 0.5047522783279419, | |
| "learning_rate": 0.0004997116365733552, | |
| "loss": 2.0979, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.2634074386260668, | |
| "grad_norm": 0.4784170687198639, | |
| "learning_rate": 0.0004986025464708753, | |
| "loss": 2.102, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.27394373617110945, | |
| "grad_norm": 0.5242518782615662, | |
| "learning_rate": 0.0004974934563683953, | |
| "loss": 2.0998, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.28448003371615216, | |
| "grad_norm": 0.47221043705940247, | |
| "learning_rate": 0.0004963843662659154, | |
| "loss": 2.1038, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.2950163312611948, | |
| "grad_norm": 0.6364756226539612, | |
| "learning_rate": 0.0004952752761634355, | |
| "loss": 2.0921, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.3055526288062375, | |
| "grad_norm": 0.4995081424713135, | |
| "learning_rate": 0.0004941661860609556, | |
| "loss": 2.0924, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.31608892635128016, | |
| "grad_norm": 0.4774039685726166, | |
| "learning_rate": 0.0004930570959584757, | |
| "loss": 2.0911, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.3266252238963228, | |
| "grad_norm": 0.4661494195461273, | |
| "learning_rate": 0.0004919480058559958, | |
| "loss": 2.0936, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.3371615214413655, | |
| "grad_norm": 0.4524199366569519, | |
| "learning_rate": 0.0004908389157535158, | |
| "loss": 2.0885, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.34769781898640817, | |
| "grad_norm": 0.5359546542167664, | |
| "learning_rate": 0.0004897298256510359, | |
| "loss": 2.0836, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.3582341165314509, | |
| "grad_norm": 0.46531638503074646, | |
| "learning_rate": 0.0004886207355485559, | |
| "loss": 2.0732, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.3687704140764935, | |
| "grad_norm": 0.5188313722610474, | |
| "learning_rate": 0.0004875116454460761, | |
| "loss": 2.0701, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.3793067116215362, | |
| "grad_norm": 0.4912554621696472, | |
| "learning_rate": 0.0004864025553435961, | |
| "loss": 2.0734, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.3898430091665789, | |
| "grad_norm": 0.46518808603286743, | |
| "learning_rate": 0.00048529346524111617, | |
| "loss": 2.0673, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.40037930671162153, | |
| "grad_norm": 0.46551960706710815, | |
| "learning_rate": 0.00048418437513863627, | |
| "loss": 2.0733, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.41091560425666424, | |
| "grad_norm": 0.543953001499176, | |
| "learning_rate": 0.0004830752850361563, | |
| "loss": 2.066, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.4214519018017069, | |
| "grad_norm": 0.5043941140174866, | |
| "learning_rate": 0.0004819661949336764, | |
| "loss": 2.0662, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.43198819934674954, | |
| "grad_norm": 0.4602905511856079, | |
| "learning_rate": 0.00048085710483119647, | |
| "loss": 2.0704, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.44252449689179224, | |
| "grad_norm": 0.5024540424346924, | |
| "learning_rate": 0.0004797480147287166, | |
| "loss": 2.0654, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.4530607944368349, | |
| "grad_norm": 0.48672983050346375, | |
| "learning_rate": 0.00047863892462623667, | |
| "loss": 2.0572, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.4635970919818776, | |
| "grad_norm": 0.44031229615211487, | |
| "learning_rate": 0.0004775298345237567, | |
| "loss": 2.0672, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.47413338952692025, | |
| "grad_norm": 0.46178367733955383, | |
| "learning_rate": 0.0004764207444212768, | |
| "loss": 2.0617, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.4846696870719629, | |
| "grad_norm": 0.43033888936042786, | |
| "learning_rate": 0.00047531165431879686, | |
| "loss": 2.0545, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.4952059846170056, | |
| "grad_norm": 0.47262778878211975, | |
| "learning_rate": 0.00047420256421631696, | |
| "loss": 2.0581, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.5057422821620483, | |
| "grad_norm": 0.5511783957481384, | |
| "learning_rate": 0.000473093474113837, | |
| "loss": 2.0497, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.5162785797070909, | |
| "grad_norm": 0.5176393985748291, | |
| "learning_rate": 0.00047198438401135705, | |
| "loss": 2.0466, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.5268148772521336, | |
| "grad_norm": 0.4723651111125946, | |
| "learning_rate": 0.00047087529390887716, | |
| "loss": 2.0468, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.5373511747971763, | |
| "grad_norm": 0.4407116174697876, | |
| "learning_rate": 0.00046976620380639726, | |
| "loss": 2.0551, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 0.5478874723422189, | |
| "grad_norm": 0.4954802989959717, | |
| "learning_rate": 0.00046865711370391736, | |
| "loss": 2.0418, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 0.5584237698872616, | |
| "grad_norm": 0.43379735946655273, | |
| "learning_rate": 0.0004675480236014374, | |
| "loss": 2.0455, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 0.5689600674323043, | |
| "grad_norm": 0.4137374758720398, | |
| "learning_rate": 0.00046643893349895745, | |
| "loss": 2.0435, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 0.5794963649773469, | |
| "grad_norm": 0.42562806606292725, | |
| "learning_rate": 0.00046532984339647755, | |
| "loss": 2.0462, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.5900326625223896, | |
| "grad_norm": 0.49963149428367615, | |
| "learning_rate": 0.0004642207532939976, | |
| "loss": 2.0461, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 0.6005689600674323, | |
| "grad_norm": 0.4317498803138733, | |
| "learning_rate": 0.0004631116631915177, | |
| "loss": 2.043, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 0.611105257612475, | |
| "grad_norm": 0.48041588068008423, | |
| "learning_rate": 0.00046200257308903775, | |
| "loss": 2.0407, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 0.6216415551575176, | |
| "grad_norm": 0.4867211878299713, | |
| "learning_rate": 0.0004608934829865578, | |
| "loss": 2.0399, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 0.6321778527025603, | |
| "grad_norm": 0.4728844165802002, | |
| "learning_rate": 0.0004597843928840779, | |
| "loss": 2.0481, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.642714150247603, | |
| "grad_norm": 0.42306768894195557, | |
| "learning_rate": 0.000458675302781598, | |
| "loss": 2.036, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 0.6532504477926456, | |
| "grad_norm": 0.4628433883190155, | |
| "learning_rate": 0.0004575662126791181, | |
| "loss": 2.0316, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 0.6637867453376883, | |
| "grad_norm": 0.5507206916809082, | |
| "learning_rate": 0.00045645712257663814, | |
| "loss": 2.037, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 0.674323042882731, | |
| "grad_norm": 0.5245053172111511, | |
| "learning_rate": 0.00045534803247415824, | |
| "loss": 2.0331, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 0.6848593404277736, | |
| "grad_norm": 0.4395572543144226, | |
| "learning_rate": 0.0004542389423716783, | |
| "loss": 2.0319, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.6953956379728163, | |
| "grad_norm": 0.4458150565624237, | |
| "learning_rate": 0.00045312985226919833, | |
| "loss": 2.0262, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 0.705931935517859, | |
| "grad_norm": 0.4589666724205017, | |
| "learning_rate": 0.00045202076216671844, | |
| "loss": 2.0281, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 0.7164682330629017, | |
| "grad_norm": 0.49729079008102417, | |
| "learning_rate": 0.0004509116720642385, | |
| "loss": 2.0254, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 0.7270045306079443, | |
| "grad_norm": 0.41776230931282043, | |
| "learning_rate": 0.0004498025819617586, | |
| "loss": 2.0274, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 0.737540828152987, | |
| "grad_norm": 0.5071027278900146, | |
| "learning_rate": 0.0004486934918592787, | |
| "loss": 2.0226, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.7480771256980298, | |
| "grad_norm": 0.47906801104545593, | |
| "learning_rate": 0.00044758440175679873, | |
| "loss": 2.0225, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 0.7586134232430723, | |
| "grad_norm": 0.501970648765564, | |
| "learning_rate": 0.00044647531165431883, | |
| "loss": 2.0212, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 0.769149720788115, | |
| "grad_norm": 0.4116053879261017, | |
| "learning_rate": 0.0004453662215518389, | |
| "loss": 2.0259, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 0.7796860183331578, | |
| "grad_norm": 0.4501636028289795, | |
| "learning_rate": 0.000444257131449359, | |
| "loss": 2.0205, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 0.7902223158782004, | |
| "grad_norm": 0.5196821093559265, | |
| "learning_rate": 0.000443148041346879, | |
| "loss": 2.0216, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.8007586134232431, | |
| "grad_norm": 0.4288316071033478, | |
| "learning_rate": 0.00044203895124439907, | |
| "loss": 2.0178, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 0.8112949109682858, | |
| "grad_norm": 0.41823074221611023, | |
| "learning_rate": 0.00044092986114191917, | |
| "loss": 2.0131, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 0.8218312085133285, | |
| "grad_norm": 0.4197762608528137, | |
| "learning_rate": 0.0004398207710394392, | |
| "loss": 2.0147, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 0.8323675060583711, | |
| "grad_norm": 0.4367753267288208, | |
| "learning_rate": 0.0004387116809369593, | |
| "loss": 2.024, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 0.8429038036034138, | |
| "grad_norm": 0.43104997277259827, | |
| "learning_rate": 0.0004376025908344794, | |
| "loss": 2.0123, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.8534401011484565, | |
| "grad_norm": 0.4320082664489746, | |
| "learning_rate": 0.0004364935007319995, | |
| "loss": 2.0126, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 0.8639763986934991, | |
| "grad_norm": 0.4402988851070404, | |
| "learning_rate": 0.00043538441062951957, | |
| "loss": 2.0127, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 0.8745126962385418, | |
| "grad_norm": 0.4331250786781311, | |
| "learning_rate": 0.0004342753205270396, | |
| "loss": 2.0138, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 0.8850489937835845, | |
| "grad_norm": 0.43004143238067627, | |
| "learning_rate": 0.0004331662304245597, | |
| "loss": 2.0034, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 0.8955852913286271, | |
| "grad_norm": 0.4509132504463196, | |
| "learning_rate": 0.00043205714032207976, | |
| "loss": 2.0058, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.9061215888736698, | |
| "grad_norm": 0.43472710251808167, | |
| "learning_rate": 0.00043094805021959986, | |
| "loss": 2.0042, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 0.9166578864187125, | |
| "grad_norm": 0.5285255908966064, | |
| "learning_rate": 0.0004298389601171199, | |
| "loss": 2.0018, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 0.9271941839637552, | |
| "grad_norm": 0.40384572744369507, | |
| "learning_rate": 0.00042872987001463996, | |
| "loss": 2.0079, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 0.9377304815087978, | |
| "grad_norm": 0.4634927213191986, | |
| "learning_rate": 0.0004276207799121601, | |
| "loss": 1.9999, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 0.9482667790538405, | |
| "grad_norm": 0.4585327208042145, | |
| "learning_rate": 0.00042651168980968016, | |
| "loss": 2.0076, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.9588030765988832, | |
| "grad_norm": 0.44724905490875244, | |
| "learning_rate": 0.00042540259970720026, | |
| "loss": 2.0055, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 0.9693393741439258, | |
| "grad_norm": 0.4930990934371948, | |
| "learning_rate": 0.0004242935096047203, | |
| "loss": 2.0034, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 0.9798756716889685, | |
| "grad_norm": 0.4466867744922638, | |
| "learning_rate": 0.00042318441950224035, | |
| "loss": 2.0046, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 0.9904119692340112, | |
| "grad_norm": 0.44049832224845886, | |
| "learning_rate": 0.00042207532939976045, | |
| "loss": 2.0015, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 1.000948266779054, | |
| "grad_norm": 0.4658033549785614, | |
| "learning_rate": 0.0004209662392972805, | |
| "loss": 1.9938, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 1.0114845643240966, | |
| "grad_norm": 0.45460689067840576, | |
| "learning_rate": 0.0004198571491948006, | |
| "loss": 1.9694, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 1.022020861869139, | |
| "grad_norm": 0.43489739298820496, | |
| "learning_rate": 0.00041874805909232065, | |
| "loss": 1.9684, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 1.0325571594141818, | |
| "grad_norm": 0.4363148510456085, | |
| "learning_rate": 0.0004176389689898407, | |
| "loss": 1.961, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 1.0430934569592245, | |
| "grad_norm": 0.41610002517700195, | |
| "learning_rate": 0.00041652987888736085, | |
| "loss": 1.9694, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 1.0536297545042672, | |
| "grad_norm": 0.39003250002861023, | |
| "learning_rate": 0.0004154207887848809, | |
| "loss": 1.9636, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 1.06416605204931, | |
| "grad_norm": 0.36780601739883423, | |
| "learning_rate": 0.000414311698682401, | |
| "loss": 1.9686, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 1.0747023495943526, | |
| "grad_norm": 0.4296736419200897, | |
| "learning_rate": 0.00041320260857992104, | |
| "loss": 1.9668, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 1.0852386471393953, | |
| "grad_norm": 0.45763176679611206, | |
| "learning_rate": 0.00041209351847744114, | |
| "loss": 1.9639, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 1.0957749446844378, | |
| "grad_norm": 0.41805505752563477, | |
| "learning_rate": 0.0004109844283749612, | |
| "loss": 1.9633, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 1.1063112422294805, | |
| "grad_norm": 0.42308661341667175, | |
| "learning_rate": 0.00040987533827248124, | |
| "loss": 1.9745, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 1.1168475397745232, | |
| "grad_norm": 0.4240245223045349, | |
| "learning_rate": 0.00040876624817000134, | |
| "loss": 1.9689, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 1.127383837319566, | |
| "grad_norm": 0.40365278720855713, | |
| "learning_rate": 0.0004076571580675214, | |
| "loss": 1.9653, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 1.1379201348646086, | |
| "grad_norm": 0.4099302887916565, | |
| "learning_rate": 0.00040654806796504154, | |
| "loss": 1.958, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 1.1484564324096513, | |
| "grad_norm": 0.4134521186351776, | |
| "learning_rate": 0.0004054389778625616, | |
| "loss": 1.9686, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 1.158992729954694, | |
| "grad_norm": 0.40292927622795105, | |
| "learning_rate": 0.00040432988776008163, | |
| "loss": 1.9627, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 1.1695290274997365, | |
| "grad_norm": 0.4272337555885315, | |
| "learning_rate": 0.00040322079765760173, | |
| "loss": 1.9655, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 1.1800653250447792, | |
| "grad_norm": 0.43145930767059326, | |
| "learning_rate": 0.0004021117075551218, | |
| "loss": 1.963, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 1.190601622589822, | |
| "grad_norm": 0.39788371324539185, | |
| "learning_rate": 0.0004010026174526419, | |
| "loss": 1.9616, | |
| "step": 11300 | |
| }, | |
| { | |
| "epoch": 1.2011379201348646, | |
| "grad_norm": 0.45902547240257263, | |
| "learning_rate": 0.0003998935273501619, | |
| "loss": 1.9589, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 1.2116742176799074, | |
| "grad_norm": 0.4540606141090393, | |
| "learning_rate": 0.00039878443724768197, | |
| "loss": 1.958, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 1.2222105152249498, | |
| "grad_norm": 0.4402179419994354, | |
| "learning_rate": 0.0003976753471452021, | |
| "loss": 1.9555, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 1.2327468127699925, | |
| "grad_norm": 0.389726459980011, | |
| "learning_rate": 0.0003965662570427221, | |
| "loss": 1.9512, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 1.2432831103150352, | |
| "grad_norm": 0.440833181142807, | |
| "learning_rate": 0.0003954571669402423, | |
| "loss": 1.9561, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 1.253819407860078, | |
| "grad_norm": 0.3972662091255188, | |
| "learning_rate": 0.0003943480768377623, | |
| "loss": 1.965, | |
| "step": 11900 | |
| }, | |
| { | |
| "epoch": 1.2643557054051207, | |
| "grad_norm": 0.41316962242126465, | |
| "learning_rate": 0.00039323898673528237, | |
| "loss": 1.9522, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 1.2748920029501634, | |
| "grad_norm": 0.41109901666641235, | |
| "learning_rate": 0.00039212989663280247, | |
| "loss": 1.9553, | |
| "step": 12100 | |
| }, | |
| { | |
| "epoch": 1.285428300495206, | |
| "grad_norm": 0.4357900023460388, | |
| "learning_rate": 0.0003910208065303225, | |
| "loss": 1.9513, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 1.2959645980402485, | |
| "grad_norm": 0.3943662941455841, | |
| "learning_rate": 0.0003899117164278426, | |
| "loss": 1.9611, | |
| "step": 12300 | |
| }, | |
| { | |
| "epoch": 1.3065008955852913, | |
| "grad_norm": 0.39483174681663513, | |
| "learning_rate": 0.00038880262632536266, | |
| "loss": 1.9478, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 1.317037193130334, | |
| "grad_norm": 0.43672600388526917, | |
| "learning_rate": 0.00038769353622288276, | |
| "loss": 1.9485, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 1.3275734906753767, | |
| "grad_norm": 0.42754313349723816, | |
| "learning_rate": 0.0003865844461204028, | |
| "loss": 1.9463, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 1.3381097882204194, | |
| "grad_norm": 0.41211095452308655, | |
| "learning_rate": 0.0003854753560179229, | |
| "loss": 1.95, | |
| "step": 12700 | |
| }, | |
| { | |
| "epoch": 1.348646085765462, | |
| "grad_norm": 0.3844158947467804, | |
| "learning_rate": 0.000384366265915443, | |
| "loss": 1.9426, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 1.3591823833105048, | |
| "grad_norm": 0.4544881582260132, | |
| "learning_rate": 0.00038325717581296306, | |
| "loss": 1.951, | |
| "step": 12900 | |
| }, | |
| { | |
| "epoch": 1.3697186808555473, | |
| "grad_norm": 0.4058513641357422, | |
| "learning_rate": 0.00038214808571048316, | |
| "loss": 1.9521, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 1.38025497840059, | |
| "grad_norm": 0.38905027508735657, | |
| "learning_rate": 0.0003810389956080032, | |
| "loss": 1.9534, | |
| "step": 13100 | |
| }, | |
| { | |
| "epoch": 1.3907912759456327, | |
| "grad_norm": 0.4224783182144165, | |
| "learning_rate": 0.00037992990550552325, | |
| "loss": 1.9485, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 1.4013275734906754, | |
| "grad_norm": 0.3894629180431366, | |
| "learning_rate": 0.00037882081540304335, | |
| "loss": 1.9459, | |
| "step": 13300 | |
| }, | |
| { | |
| "epoch": 1.411863871035718, | |
| "grad_norm": 0.4435978829860687, | |
| "learning_rate": 0.0003777117253005634, | |
| "loss": 1.9428, | |
| "step": 13400 | |
| }, | |
| { | |
| "epoch": 1.4224001685807608, | |
| "grad_norm": 0.4090045690536499, | |
| "learning_rate": 0.0003766026351980835, | |
| "loss": 1.951, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 1.4329364661258035, | |
| "grad_norm": 0.4192126989364624, | |
| "learning_rate": 0.00037549354509560355, | |
| "loss": 1.9498, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 1.443472763670846, | |
| "grad_norm": 0.399774968624115, | |
| "learning_rate": 0.00037438445499312365, | |
| "loss": 1.9463, | |
| "step": 13700 | |
| }, | |
| { | |
| "epoch": 1.4540090612158887, | |
| "grad_norm": 0.3659054636955261, | |
| "learning_rate": 0.00037327536489064375, | |
| "loss": 1.9549, | |
| "step": 13800 | |
| }, | |
| { | |
| "epoch": 1.4645453587609314, | |
| "grad_norm": 0.385452538728714, | |
| "learning_rate": 0.0003721662747881638, | |
| "loss": 1.9472, | |
| "step": 13900 | |
| }, | |
| { | |
| "epoch": 1.475081656305974, | |
| "grad_norm": 0.3904755413532257, | |
| "learning_rate": 0.0003710571846856839, | |
| "loss": 1.9438, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 1.4856179538510168, | |
| "grad_norm": 0.3969903290271759, | |
| "learning_rate": 0.00036994809458320394, | |
| "loss": 1.941, | |
| "step": 14100 | |
| }, | |
| { | |
| "epoch": 1.4961542513960593, | |
| "grad_norm": 0.4201650321483612, | |
| "learning_rate": 0.000368839004480724, | |
| "loss": 1.9451, | |
| "step": 14200 | |
| }, | |
| { | |
| "epoch": 1.5066905489411022, | |
| "grad_norm": 0.3867323100566864, | |
| "learning_rate": 0.0003677299143782441, | |
| "loss": 1.9463, | |
| "step": 14300 | |
| }, | |
| { | |
| "epoch": 1.5172268464861447, | |
| "grad_norm": 0.40658488869667053, | |
| "learning_rate": 0.00036662082427576414, | |
| "loss": 1.9461, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 1.5277631440311874, | |
| "grad_norm": 0.39837929606437683, | |
| "learning_rate": 0.00036551173417328424, | |
| "loss": 1.9517, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 1.53829944157623, | |
| "grad_norm": 0.42312178015708923, | |
| "learning_rate": 0.00036440264407080434, | |
| "loss": 1.9351, | |
| "step": 14600 | |
| }, | |
| { | |
| "epoch": 1.5488357391212728, | |
| "grad_norm": 0.4057867228984833, | |
| "learning_rate": 0.00036329355396832444, | |
| "loss": 1.9403, | |
| "step": 14700 | |
| }, | |
| { | |
| "epoch": 1.5593720366663155, | |
| "grad_norm": 0.39428508281707764, | |
| "learning_rate": 0.0003621844638658445, | |
| "loss": 1.9484, | |
| "step": 14800 | |
| }, | |
| { | |
| "epoch": 1.569908334211358, | |
| "grad_norm": 0.381671279668808, | |
| "learning_rate": 0.00036107537376336453, | |
| "loss": 1.9399, | |
| "step": 14900 | |
| }, | |
| { | |
| "epoch": 1.580444631756401, | |
| "grad_norm": 0.4080953598022461, | |
| "learning_rate": 0.00035996628366088463, | |
| "loss": 1.9316, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 1.5909809293014434, | |
| "grad_norm": 0.3612942397594452, | |
| "learning_rate": 0.0003588571935584047, | |
| "loss": 1.9337, | |
| "step": 15100 | |
| }, | |
| { | |
| "epoch": 1.6015172268464861, | |
| "grad_norm": 0.37906691431999207, | |
| "learning_rate": 0.0003577481034559248, | |
| "loss": 1.9338, | |
| "step": 15200 | |
| }, | |
| { | |
| "epoch": 1.6120535243915288, | |
| "grad_norm": 0.4057066738605499, | |
| "learning_rate": 0.0003566390133534448, | |
| "loss": 1.9399, | |
| "step": 15300 | |
| }, | |
| { | |
| "epoch": 1.6225898219365715, | |
| "grad_norm": 0.396557480096817, | |
| "learning_rate": 0.0003555299232509649, | |
| "loss": 1.9472, | |
| "step": 15400 | |
| }, | |
| { | |
| "epoch": 1.6331261194816142, | |
| "grad_norm": 0.37647131085395813, | |
| "learning_rate": 0.000354420833148485, | |
| "loss": 1.9368, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 1.6436624170266567, | |
| "grad_norm": 0.3920493721961975, | |
| "learning_rate": 0.0003533117430460051, | |
| "loss": 1.9407, | |
| "step": 15600 | |
| }, | |
| { | |
| "epoch": 1.6541987145716996, | |
| "grad_norm": 0.39372900128364563, | |
| "learning_rate": 0.0003522026529435252, | |
| "loss": 1.9327, | |
| "step": 15700 | |
| }, | |
| { | |
| "epoch": 1.6647350121167421, | |
| "grad_norm": 0.3832472264766693, | |
| "learning_rate": 0.0003510935628410452, | |
| "loss": 1.9365, | |
| "step": 15800 | |
| }, | |
| { | |
| "epoch": 1.6752713096617848, | |
| "grad_norm": 0.3669210970401764, | |
| "learning_rate": 0.00034998447273856527, | |
| "loss": 1.9323, | |
| "step": 15900 | |
| }, | |
| { | |
| "epoch": 1.6858076072068275, | |
| "grad_norm": 0.37810054421424866, | |
| "learning_rate": 0.00034887538263608537, | |
| "loss": 1.93, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 1.6963439047518702, | |
| "grad_norm": 0.3972882330417633, | |
| "learning_rate": 0.0003477662925336054, | |
| "loss": 1.9299, | |
| "step": 16100 | |
| }, | |
| { | |
| "epoch": 1.706880202296913, | |
| "grad_norm": 0.39600399136543274, | |
| "learning_rate": 0.0003466572024311255, | |
| "loss": 1.9337, | |
| "step": 16200 | |
| }, | |
| { | |
| "epoch": 1.7174164998419554, | |
| "grad_norm": 0.367546021938324, | |
| "learning_rate": 0.00034554811232864556, | |
| "loss": 1.934, | |
| "step": 16300 | |
| }, | |
| { | |
| "epoch": 1.7279527973869984, | |
| "grad_norm": 0.43116411566734314, | |
| "learning_rate": 0.00034443902222616566, | |
| "loss": 1.9296, | |
| "step": 16400 | |
| }, | |
| { | |
| "epoch": 1.7384890949320408, | |
| "grad_norm": 0.41438373923301697, | |
| "learning_rate": 0.00034332993212368577, | |
| "loss": 1.9304, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 1.7490253924770836, | |
| "grad_norm": 0.387265145778656, | |
| "learning_rate": 0.0003422208420212058, | |
| "loss": 1.9273, | |
| "step": 16600 | |
| }, | |
| { | |
| "epoch": 1.7595616900221263, | |
| "grad_norm": 0.3982371687889099, | |
| "learning_rate": 0.0003411117519187259, | |
| "loss": 1.9338, | |
| "step": 16700 | |
| }, | |
| { | |
| "epoch": 1.7700979875671687, | |
| "grad_norm": 0.3915503919124603, | |
| "learning_rate": 0.00034000266181624596, | |
| "loss": 1.9305, | |
| "step": 16800 | |
| }, | |
| { | |
| "epoch": 1.7806342851122117, | |
| "grad_norm": 0.38060539960861206, | |
| "learning_rate": 0.00033889357171376606, | |
| "loss": 1.927, | |
| "step": 16900 | |
| }, | |
| { | |
| "epoch": 1.7911705826572542, | |
| "grad_norm": 0.4222376048564911, | |
| "learning_rate": 0.0003377844816112861, | |
| "loss": 1.9311, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 1.801706880202297, | |
| "grad_norm": 0.3746761381626129, | |
| "learning_rate": 0.00033667539150880615, | |
| "loss": 1.9269, | |
| "step": 17100 | |
| }, | |
| { | |
| "epoch": 1.8122431777473396, | |
| "grad_norm": 0.3764290511608124, | |
| "learning_rate": 0.00033556630140632625, | |
| "loss": 1.9239, | |
| "step": 17200 | |
| }, | |
| { | |
| "epoch": 1.8227794752923823, | |
| "grad_norm": 0.3536926209926605, | |
| "learning_rate": 0.0003344572113038463, | |
| "loss": 1.9312, | |
| "step": 17300 | |
| }, | |
| { | |
| "epoch": 1.833315772837425, | |
| "grad_norm": 0.3796480596065521, | |
| "learning_rate": 0.0003333481212013664, | |
| "loss": 1.9229, | |
| "step": 17400 | |
| }, | |
| { | |
| "epoch": 1.8438520703824675, | |
| "grad_norm": 0.3728596866130829, | |
| "learning_rate": 0.0003322390310988865, | |
| "loss": 1.9248, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 1.8543883679275104, | |
| "grad_norm": 0.3622676432132721, | |
| "learning_rate": 0.00033112994099640655, | |
| "loss": 1.9274, | |
| "step": 17600 | |
| }, | |
| { | |
| "epoch": 1.8649246654725529, | |
| "grad_norm": 0.3914555013179779, | |
| "learning_rate": 0.00033002085089392665, | |
| "loss": 1.917, | |
| "step": 17700 | |
| }, | |
| { | |
| "epoch": 1.8754609630175956, | |
| "grad_norm": 0.3367026448249817, | |
| "learning_rate": 0.0003289117607914467, | |
| "loss": 1.9213, | |
| "step": 17800 | |
| }, | |
| { | |
| "epoch": 1.8859972605626383, | |
| "grad_norm": 0.41049453616142273, | |
| "learning_rate": 0.0003278026706889668, | |
| "loss": 1.921, | |
| "step": 17900 | |
| }, | |
| { | |
| "epoch": 1.896533558107681, | |
| "grad_norm": 0.38005101680755615, | |
| "learning_rate": 0.00032669358058648684, | |
| "loss": 1.9188, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 1.9070698556527237, | |
| "grad_norm": 0.3855360150337219, | |
| "learning_rate": 0.0003255844904840069, | |
| "loss": 1.9224, | |
| "step": 18100 | |
| }, | |
| { | |
| "epoch": 1.9176061531977662, | |
| "grad_norm": 0.3764369487762451, | |
| "learning_rate": 0.000324475400381527, | |
| "loss": 1.9221, | |
| "step": 18200 | |
| }, | |
| { | |
| "epoch": 1.928142450742809, | |
| "grad_norm": 0.3933279514312744, | |
| "learning_rate": 0.00032336631027904704, | |
| "loss": 1.9233, | |
| "step": 18300 | |
| }, | |
| { | |
| "epoch": 1.9386787482878516, | |
| "grad_norm": 0.3530935049057007, | |
| "learning_rate": 0.0003222572201765672, | |
| "loss": 1.9218, | |
| "step": 18400 | |
| }, | |
| { | |
| "epoch": 1.9492150458328943, | |
| "grad_norm": 0.36857885122299194, | |
| "learning_rate": 0.00032114813007408724, | |
| "loss": 1.9211, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 1.959751343377937, | |
| "grad_norm": 0.3870936930179596, | |
| "learning_rate": 0.00032003903997160734, | |
| "loss": 1.919, | |
| "step": 18600 | |
| }, | |
| { | |
| "epoch": 1.9702876409229797, | |
| "grad_norm": 0.38852736353874207, | |
| "learning_rate": 0.0003189299498691274, | |
| "loss": 1.9137, | |
| "step": 18700 | |
| }, | |
| { | |
| "epoch": 1.9808239384680224, | |
| "grad_norm": 0.3802979290485382, | |
| "learning_rate": 0.00031782085976664743, | |
| "loss": 1.9238, | |
| "step": 18800 | |
| }, | |
| { | |
| "epoch": 1.9913602360130649, | |
| "grad_norm": 0.39477866888046265, | |
| "learning_rate": 0.00031671176966416753, | |
| "loss": 1.9226, | |
| "step": 18900 | |
| }, | |
| { | |
| "epoch": 2.001896533558108, | |
| "grad_norm": 0.39578545093536377, | |
| "learning_rate": 0.0003156026795616876, | |
| "loss": 1.9067, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 2.0124328311031503, | |
| "grad_norm": 0.3758637607097626, | |
| "learning_rate": 0.0003144935894592077, | |
| "loss": 1.8889, | |
| "step": 19100 | |
| }, | |
| { | |
| "epoch": 2.0229691286481932, | |
| "grad_norm": 0.3424636125564575, | |
| "learning_rate": 0.00031338449935672773, | |
| "loss": 1.881, | |
| "step": 19200 | |
| }, | |
| { | |
| "epoch": 2.0335054261932357, | |
| "grad_norm": 0.3473268151283264, | |
| "learning_rate": 0.0003122754092542478, | |
| "loss": 1.8824, | |
| "step": 19300 | |
| }, | |
| { | |
| "epoch": 2.044041723738278, | |
| "grad_norm": 0.34891676902770996, | |
| "learning_rate": 0.00031116631915176793, | |
| "loss": 1.8876, | |
| "step": 19400 | |
| }, | |
| { | |
| "epoch": 2.054578021283321, | |
| "grad_norm": 0.40848681330680847, | |
| "learning_rate": 0.000310057229049288, | |
| "loss": 1.8804, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 2.0651143188283636, | |
| "grad_norm": 0.3565325140953064, | |
| "learning_rate": 0.0003089481389468081, | |
| "loss": 1.8846, | |
| "step": 19600 | |
| }, | |
| { | |
| "epoch": 2.0756506163734065, | |
| "grad_norm": 0.3714432418346405, | |
| "learning_rate": 0.0003078390488443281, | |
| "loss": 1.8952, | |
| "step": 19700 | |
| }, | |
| { | |
| "epoch": 2.086186913918449, | |
| "grad_norm": 0.39024487137794495, | |
| "learning_rate": 0.00030672995874184817, | |
| "loss": 1.8886, | |
| "step": 19800 | |
| }, | |
| { | |
| "epoch": 2.096723211463492, | |
| "grad_norm": 0.37265217304229736, | |
| "learning_rate": 0.00030562086863936827, | |
| "loss": 1.8815, | |
| "step": 19900 | |
| }, | |
| { | |
| "epoch": 2.1072595090085344, | |
| "grad_norm": 0.4258386194705963, | |
| "learning_rate": 0.0003045117785368883, | |
| "loss": 1.8797, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 2.117795806553577, | |
| "grad_norm": 0.3775697350502014, | |
| "learning_rate": 0.0003034026884344084, | |
| "loss": 1.8863, | |
| "step": 20100 | |
| }, | |
| { | |
| "epoch": 2.12833210409862, | |
| "grad_norm": 0.3451697826385498, | |
| "learning_rate": 0.00030229359833192846, | |
| "loss": 1.8812, | |
| "step": 20200 | |
| }, | |
| { | |
| "epoch": 2.1388684016436623, | |
| "grad_norm": 0.3747578561306, | |
| "learning_rate": 0.00030118450822944857, | |
| "loss": 1.8884, | |
| "step": 20300 | |
| }, | |
| { | |
| "epoch": 2.1494046991887052, | |
| "grad_norm": 0.35056072473526, | |
| "learning_rate": 0.00030007541812696867, | |
| "loss": 1.8721, | |
| "step": 20400 | |
| }, | |
| { | |
| "epoch": 2.1599409967337477, | |
| "grad_norm": 0.3892049491405487, | |
| "learning_rate": 0.0002989663280244887, | |
| "loss": 1.8869, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 2.1704772942787907, | |
| "grad_norm": 0.4040903151035309, | |
| "learning_rate": 0.0002978572379220088, | |
| "loss": 1.8773, | |
| "step": 20600 | |
| }, | |
| { | |
| "epoch": 2.181013591823833, | |
| "grad_norm": 0.4122794568538666, | |
| "learning_rate": 0.00029674814781952886, | |
| "loss": 1.8858, | |
| "step": 20700 | |
| }, | |
| { | |
| "epoch": 2.1915498893688756, | |
| "grad_norm": 0.38314470648765564, | |
| "learning_rate": 0.00029563905771704896, | |
| "loss": 1.8887, | |
| "step": 20800 | |
| }, | |
| { | |
| "epoch": 2.2020861869139186, | |
| "grad_norm": 0.3841986358165741, | |
| "learning_rate": 0.000294529967614569, | |
| "loss": 1.8886, | |
| "step": 20900 | |
| }, | |
| { | |
| "epoch": 2.212622484458961, | |
| "grad_norm": 0.3989698588848114, | |
| "learning_rate": 0.00029342087751208905, | |
| "loss": 1.8876, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 2.223158782004004, | |
| "grad_norm": 0.3878525495529175, | |
| "learning_rate": 0.00029231178740960915, | |
| "loss": 1.8831, | |
| "step": 21100 | |
| }, | |
| { | |
| "epoch": 2.2336950795490464, | |
| "grad_norm": 0.36871328949928284, | |
| "learning_rate": 0.0002912026973071292, | |
| "loss": 1.8869, | |
| "step": 21200 | |
| }, | |
| { | |
| "epoch": 2.244231377094089, | |
| "grad_norm": 0.3922217786312103, | |
| "learning_rate": 0.00029009360720464936, | |
| "loss": 1.8867, | |
| "step": 21300 | |
| }, | |
| { | |
| "epoch": 2.254767674639132, | |
| "grad_norm": 0.37641048431396484, | |
| "learning_rate": 0.0002889845171021694, | |
| "loss": 1.8813, | |
| "step": 21400 | |
| }, | |
| { | |
| "epoch": 2.2653039721841743, | |
| "grad_norm": 0.3834270238876343, | |
| "learning_rate": 0.00028787542699968945, | |
| "loss": 1.8858, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 2.2758402697292173, | |
| "grad_norm": 0.3613283336162567, | |
| "learning_rate": 0.00028676633689720955, | |
| "loss": 1.8788, | |
| "step": 21600 | |
| }, | |
| { | |
| "epoch": 2.2863765672742598, | |
| "grad_norm": 0.3932812511920929, | |
| "learning_rate": 0.0002856572467947296, | |
| "loss": 1.8841, | |
| "step": 21700 | |
| }, | |
| { | |
| "epoch": 2.2969128648193027, | |
| "grad_norm": 0.380537748336792, | |
| "learning_rate": 0.0002845481566922497, | |
| "loss": 1.8867, | |
| "step": 21800 | |
| }, | |
| { | |
| "epoch": 2.307449162364345, | |
| "grad_norm": 0.35902804136276245, | |
| "learning_rate": 0.00028343906658976974, | |
| "loss": 1.8925, | |
| "step": 21900 | |
| }, | |
| { | |
| "epoch": 2.317985459909388, | |
| "grad_norm": 0.3631201386451721, | |
| "learning_rate": 0.0002823299764872898, | |
| "loss": 1.8779, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 2.3285217574544306, | |
| "grad_norm": 0.3709360361099243, | |
| "learning_rate": 0.0002812208863848099, | |
| "loss": 1.877, | |
| "step": 22100 | |
| }, | |
| { | |
| "epoch": 2.339058054999473, | |
| "grad_norm": 0.35048261284828186, | |
| "learning_rate": 0.00028011179628233, | |
| "loss": 1.8717, | |
| "step": 22200 | |
| }, | |
| { | |
| "epoch": 2.349594352544516, | |
| "grad_norm": 0.35067349672317505, | |
| "learning_rate": 0.0002790027061798501, | |
| "loss": 1.8778, | |
| "step": 22300 | |
| }, | |
| { | |
| "epoch": 2.3601306500895585, | |
| "grad_norm": 0.3626950681209564, | |
| "learning_rate": 0.00027789361607737014, | |
| "loss": 1.886, | |
| "step": 22400 | |
| }, | |
| { | |
| "epoch": 2.370666947634601, | |
| "grad_norm": 0.35151103138923645, | |
| "learning_rate": 0.00027678452597489024, | |
| "loss": 1.8776, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 2.381203245179644, | |
| "grad_norm": 0.3527145981788635, | |
| "learning_rate": 0.0002756754358724103, | |
| "loss": 1.8786, | |
| "step": 22600 | |
| }, | |
| { | |
| "epoch": 2.3917395427246864, | |
| "grad_norm": 0.3571159541606903, | |
| "learning_rate": 0.00027456634576993033, | |
| "loss": 1.8704, | |
| "step": 22700 | |
| }, | |
| { | |
| "epoch": 2.4022758402697293, | |
| "grad_norm": 0.35839220881462097, | |
| "learning_rate": 0.00027345725566745043, | |
| "loss": 1.8815, | |
| "step": 22800 | |
| }, | |
| { | |
| "epoch": 2.4128121378147718, | |
| "grad_norm": 0.3516599237918854, | |
| "learning_rate": 0.0002723481655649705, | |
| "loss": 1.8745, | |
| "step": 22900 | |
| }, | |
| { | |
| "epoch": 2.4233484353598147, | |
| "grad_norm": 0.37703123688697815, | |
| "learning_rate": 0.0002712390754624906, | |
| "loss": 1.8717, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 2.433884732904857, | |
| "grad_norm": 0.35914528369903564, | |
| "learning_rate": 0.00027012998536001063, | |
| "loss": 1.8751, | |
| "step": 23100 | |
| }, | |
| { | |
| "epoch": 2.4444210304498997, | |
| "grad_norm": 0.379916787147522, | |
| "learning_rate": 0.00026902089525753073, | |
| "loss": 1.8694, | |
| "step": 23200 | |
| }, | |
| { | |
| "epoch": 2.4549573279949426, | |
| "grad_norm": 0.38764089345932007, | |
| "learning_rate": 0.00026791180515505083, | |
| "loss": 1.8762, | |
| "step": 23300 | |
| }, | |
| { | |
| "epoch": 2.465493625539985, | |
| "grad_norm": 0.3425200879573822, | |
| "learning_rate": 0.0002668027150525709, | |
| "loss": 1.8765, | |
| "step": 23400 | |
| }, | |
| { | |
| "epoch": 2.476029923085028, | |
| "grad_norm": 0.37601912021636963, | |
| "learning_rate": 0.000265693624950091, | |
| "loss": 1.8751, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 2.4865662206300705, | |
| "grad_norm": 0.3854159414768219, | |
| "learning_rate": 0.000264584534847611, | |
| "loss": 1.8746, | |
| "step": 23600 | |
| }, | |
| { | |
| "epoch": 2.4971025181751134, | |
| "grad_norm": 0.402798593044281, | |
| "learning_rate": 0.00026347544474513107, | |
| "loss": 1.8758, | |
| "step": 23700 | |
| }, | |
| { | |
| "epoch": 2.507638815720156, | |
| "grad_norm": 0.3488067388534546, | |
| "learning_rate": 0.00026236635464265117, | |
| "loss": 1.8823, | |
| "step": 23800 | |
| }, | |
| { | |
| "epoch": 2.5181751132651984, | |
| "grad_norm": 0.38071927428245544, | |
| "learning_rate": 0.0002612572645401712, | |
| "loss": 1.8746, | |
| "step": 23900 | |
| }, | |
| { | |
| "epoch": 2.5287114108102413, | |
| "grad_norm": 0.3481471538543701, | |
| "learning_rate": 0.0002601481744376913, | |
| "loss": 1.8787, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 2.539247708355284, | |
| "grad_norm": 0.34442374110221863, | |
| "learning_rate": 0.0002590390843352114, | |
| "loss": 1.88, | |
| "step": 24100 | |
| }, | |
| { | |
| "epoch": 2.5497840059003267, | |
| "grad_norm": 0.34286609292030334, | |
| "learning_rate": 0.00025792999423273147, | |
| "loss": 1.8711, | |
| "step": 24200 | |
| }, | |
| { | |
| "epoch": 2.560320303445369, | |
| "grad_norm": 0.3455844819545746, | |
| "learning_rate": 0.00025682090413025157, | |
| "loss": 1.8692, | |
| "step": 24300 | |
| }, | |
| { | |
| "epoch": 2.570856600990412, | |
| "grad_norm": 0.3363890051841736, | |
| "learning_rate": 0.0002557118140277716, | |
| "loss": 1.8723, | |
| "step": 24400 | |
| }, | |
| { | |
| "epoch": 2.5813928985354546, | |
| "grad_norm": 0.3758355677127838, | |
| "learning_rate": 0.0002546027239252917, | |
| "loss": 1.8786, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 2.591929196080497, | |
| "grad_norm": 0.3661966621875763, | |
| "learning_rate": 0.00025349363382281176, | |
| "loss": 1.8742, | |
| "step": 24600 | |
| }, | |
| { | |
| "epoch": 2.60246549362554, | |
| "grad_norm": 0.3269520103931427, | |
| "learning_rate": 0.00025238454372033186, | |
| "loss": 1.8765, | |
| "step": 24700 | |
| }, | |
| { | |
| "epoch": 2.6130017911705825, | |
| "grad_norm": 0.37588828802108765, | |
| "learning_rate": 0.0002512754536178519, | |
| "loss": 1.8755, | |
| "step": 24800 | |
| }, | |
| { | |
| "epoch": 2.6235380887156254, | |
| "grad_norm": 0.34371519088745117, | |
| "learning_rate": 0.00025016636351537195, | |
| "loss": 1.8689, | |
| "step": 24900 | |
| }, | |
| { | |
| "epoch": 2.634074386260668, | |
| "grad_norm": 0.3703347444534302, | |
| "learning_rate": 0.00024905727341289206, | |
| "loss": 1.869, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 2.644610683805711, | |
| "grad_norm": 0.3689127266407013, | |
| "learning_rate": 0.00024794818331041216, | |
| "loss": 1.8681, | |
| "step": 25100 | |
| }, | |
| { | |
| "epoch": 2.6551469813507533, | |
| "grad_norm": 0.3827933371067047, | |
| "learning_rate": 0.0002468390932079322, | |
| "loss": 1.8693, | |
| "step": 25200 | |
| }, | |
| { | |
| "epoch": 2.665683278895796, | |
| "grad_norm": 0.3681269586086273, | |
| "learning_rate": 0.0002457300031054523, | |
| "loss": 1.8668, | |
| "step": 25300 | |
| }, | |
| { | |
| "epoch": 2.6762195764408387, | |
| "grad_norm": 0.3521827757358551, | |
| "learning_rate": 0.00024462091300297235, | |
| "loss": 1.872, | |
| "step": 25400 | |
| }, | |
| { | |
| "epoch": 2.6867558739858812, | |
| "grad_norm": 0.35968610644340515, | |
| "learning_rate": 0.00024351182290049245, | |
| "loss": 1.868, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 2.697292171530924, | |
| "grad_norm": 0.34900325536727905, | |
| "learning_rate": 0.0002424027327980125, | |
| "loss": 1.8639, | |
| "step": 25600 | |
| }, | |
| { | |
| "epoch": 2.7078284690759666, | |
| "grad_norm": 0.36115318536758423, | |
| "learning_rate": 0.00024129364269553257, | |
| "loss": 1.8666, | |
| "step": 25700 | |
| }, | |
| { | |
| "epoch": 2.7183647666210096, | |
| "grad_norm": 0.3598721921443939, | |
| "learning_rate": 0.00024018455259305267, | |
| "loss": 1.8588, | |
| "step": 25800 | |
| }, | |
| { | |
| "epoch": 2.728901064166052, | |
| "grad_norm": 0.3527396619319916, | |
| "learning_rate": 0.00023907546249057275, | |
| "loss": 1.8626, | |
| "step": 25900 | |
| }, | |
| { | |
| "epoch": 2.7394373617110945, | |
| "grad_norm": 0.3464626967906952, | |
| "learning_rate": 0.00023796637238809282, | |
| "loss": 1.8724, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 2.7499736592561375, | |
| "grad_norm": 0.36689963936805725, | |
| "learning_rate": 0.0002368572822856129, | |
| "loss": 1.8658, | |
| "step": 26100 | |
| }, | |
| { | |
| "epoch": 2.76050995680118, | |
| "grad_norm": 0.3785768151283264, | |
| "learning_rate": 0.00023574819218313297, | |
| "loss": 1.8642, | |
| "step": 26200 | |
| }, | |
| { | |
| "epoch": 2.771046254346223, | |
| "grad_norm": 0.3481883704662323, | |
| "learning_rate": 0.00023463910208065304, | |
| "loss": 1.8561, | |
| "step": 26300 | |
| }, | |
| { | |
| "epoch": 2.7815825518912654, | |
| "grad_norm": 0.36630862951278687, | |
| "learning_rate": 0.00023353001197817311, | |
| "loss": 1.862, | |
| "step": 26400 | |
| }, | |
| { | |
| "epoch": 2.7921188494363083, | |
| "grad_norm": 0.35414576530456543, | |
| "learning_rate": 0.0002324209218756932, | |
| "loss": 1.8676, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 2.8026551469813508, | |
| "grad_norm": 0.3922441601753235, | |
| "learning_rate": 0.00023131183177321326, | |
| "loss": 1.8709, | |
| "step": 26600 | |
| }, | |
| { | |
| "epoch": 2.8131914445263932, | |
| "grad_norm": 0.34433358907699585, | |
| "learning_rate": 0.00023020274167073334, | |
| "loss": 1.8676, | |
| "step": 26700 | |
| }, | |
| { | |
| "epoch": 2.823727742071436, | |
| "grad_norm": 0.32512736320495605, | |
| "learning_rate": 0.0002290936515682534, | |
| "loss": 1.8694, | |
| "step": 26800 | |
| }, | |
| { | |
| "epoch": 2.8342640396164787, | |
| "grad_norm": 0.3611021041870117, | |
| "learning_rate": 0.00022798456146577348, | |
| "loss": 1.8686, | |
| "step": 26900 | |
| }, | |
| { | |
| "epoch": 2.8448003371615216, | |
| "grad_norm": 0.34630611538887024, | |
| "learning_rate": 0.00022687547136329356, | |
| "loss": 1.8628, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 2.855336634706564, | |
| "grad_norm": 0.34372755885124207, | |
| "learning_rate": 0.00022576638126081363, | |
| "loss": 1.8613, | |
| "step": 27100 | |
| }, | |
| { | |
| "epoch": 2.865872932251607, | |
| "grad_norm": 0.3749391436576843, | |
| "learning_rate": 0.00022465729115833373, | |
| "loss": 1.8725, | |
| "step": 27200 | |
| }, | |
| { | |
| "epoch": 2.8764092297966495, | |
| "grad_norm": 0.3814404606819153, | |
| "learning_rate": 0.00022354820105585378, | |
| "loss": 1.8627, | |
| "step": 27300 | |
| }, | |
| { | |
| "epoch": 2.886945527341692, | |
| "grad_norm": 0.35840287804603577, | |
| "learning_rate": 0.00022243911095337385, | |
| "loss": 1.8606, | |
| "step": 27400 | |
| }, | |
| { | |
| "epoch": 2.897481824886735, | |
| "grad_norm": 0.3533620834350586, | |
| "learning_rate": 0.00022133002085089392, | |
| "loss": 1.8665, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 2.9080181224317774, | |
| "grad_norm": 0.3550478518009186, | |
| "learning_rate": 0.000220220930748414, | |
| "loss": 1.8587, | |
| "step": 27600 | |
| }, | |
| { | |
| "epoch": 2.9185544199768203, | |
| "grad_norm": 0.3665110468864441, | |
| "learning_rate": 0.0002191118406459341, | |
| "loss": 1.8655, | |
| "step": 27700 | |
| }, | |
| { | |
| "epoch": 2.929090717521863, | |
| "grad_norm": 0.3647795021533966, | |
| "learning_rate": 0.00021800275054345415, | |
| "loss": 1.8555, | |
| "step": 27800 | |
| }, | |
| { | |
| "epoch": 2.9396270150669057, | |
| "grad_norm": 0.34207072854042053, | |
| "learning_rate": 0.00021689366044097422, | |
| "loss": 1.8601, | |
| "step": 27900 | |
| }, | |
| { | |
| "epoch": 2.950163312611948, | |
| "grad_norm": 0.3422704339027405, | |
| "learning_rate": 0.0002157845703384943, | |
| "loss": 1.8553, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 2.9606996101569907, | |
| "grad_norm": 0.3600524961948395, | |
| "learning_rate": 0.0002146754802360144, | |
| "loss": 1.8597, | |
| "step": 28100 | |
| }, | |
| { | |
| "epoch": 2.9712359077020336, | |
| "grad_norm": 0.35774359107017517, | |
| "learning_rate": 0.00021356639013353447, | |
| "loss": 1.86, | |
| "step": 28200 | |
| }, | |
| { | |
| "epoch": 2.981772205247076, | |
| "grad_norm": 0.3582908511161804, | |
| "learning_rate": 0.00021245730003105454, | |
| "loss": 1.8591, | |
| "step": 28300 | |
| }, | |
| { | |
| "epoch": 2.9923085027921186, | |
| "grad_norm": 0.36876824498176575, | |
| "learning_rate": 0.0002113482099285746, | |
| "loss": 1.8655, | |
| "step": 28400 | |
| }, | |
| { | |
| "epoch": 3.0028448003371615, | |
| "grad_norm": 0.3600168526172638, | |
| "learning_rate": 0.00021023911982609466, | |
| "loss": 1.8473, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 3.013381097882204, | |
| "grad_norm": 0.33718979358673096, | |
| "learning_rate": 0.00020913002972361476, | |
| "loss": 1.8256, | |
| "step": 28600 | |
| }, | |
| { | |
| "epoch": 3.023917395427247, | |
| "grad_norm": 0.3321118950843811, | |
| "learning_rate": 0.00020802093962113484, | |
| "loss": 1.8251, | |
| "step": 28700 | |
| }, | |
| { | |
| "epoch": 3.0344536929722894, | |
| "grad_norm": 0.34264570474624634, | |
| "learning_rate": 0.0002069118495186549, | |
| "loss": 1.831, | |
| "step": 28800 | |
| }, | |
| { | |
| "epoch": 3.0449899905173323, | |
| "grad_norm": 0.3522898852825165, | |
| "learning_rate": 0.00020580275941617496, | |
| "loss": 1.8249, | |
| "step": 28900 | |
| }, | |
| { | |
| "epoch": 3.055526288062375, | |
| "grad_norm": 0.38659289479255676, | |
| "learning_rate": 0.00020469366931369503, | |
| "loss": 1.829, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 3.0660625856074177, | |
| "grad_norm": 0.3475963771343231, | |
| "learning_rate": 0.00020358457921121513, | |
| "loss": 1.8287, | |
| "step": 29100 | |
| }, | |
| { | |
| "epoch": 3.07659888315246, | |
| "grad_norm": 0.37323230504989624, | |
| "learning_rate": 0.0002024754891087352, | |
| "loss": 1.827, | |
| "step": 29200 | |
| }, | |
| { | |
| "epoch": 3.0871351806975027, | |
| "grad_norm": 0.3953257203102112, | |
| "learning_rate": 0.00020136639900625528, | |
| "loss": 1.8303, | |
| "step": 29300 | |
| }, | |
| { | |
| "epoch": 3.0976714782425456, | |
| "grad_norm": 0.34784358739852905, | |
| "learning_rate": 0.00020025730890377535, | |
| "loss": 1.8225, | |
| "step": 29400 | |
| }, | |
| { | |
| "epoch": 3.108207775787588, | |
| "grad_norm": 0.3565751314163208, | |
| "learning_rate": 0.0001991482188012954, | |
| "loss": 1.8292, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 3.118744073332631, | |
| "grad_norm": 0.368730753660202, | |
| "learning_rate": 0.0001980391286988155, | |
| "loss": 1.8357, | |
| "step": 29600 | |
| }, | |
| { | |
| "epoch": 3.1292803708776735, | |
| "grad_norm": 0.37354937195777893, | |
| "learning_rate": 0.00019693003859633557, | |
| "loss": 1.8276, | |
| "step": 29700 | |
| }, | |
| { | |
| "epoch": 3.1398166684227165, | |
| "grad_norm": 0.3472649157047272, | |
| "learning_rate": 0.00019582094849385565, | |
| "loss": 1.8335, | |
| "step": 29800 | |
| }, | |
| { | |
| "epoch": 3.150352965967759, | |
| "grad_norm": 0.35036763548851013, | |
| "learning_rate": 0.00019471185839137572, | |
| "loss": 1.8276, | |
| "step": 29900 | |
| }, | |
| { | |
| "epoch": 3.1608892635128014, | |
| "grad_norm": 0.3752099573612213, | |
| "learning_rate": 0.0001936027682888958, | |
| "loss": 1.8308, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 3.1714255610578443, | |
| "grad_norm": 0.337298184633255, | |
| "learning_rate": 0.00019249367818641587, | |
| "loss": 1.8268, | |
| "step": 30100 | |
| }, | |
| { | |
| "epoch": 3.181961858602887, | |
| "grad_norm": 0.3451649844646454, | |
| "learning_rate": 0.00019138458808393594, | |
| "loss": 1.825, | |
| "step": 30200 | |
| }, | |
| { | |
| "epoch": 3.1924981561479298, | |
| "grad_norm": 0.36679157614707947, | |
| "learning_rate": 0.00019027549798145602, | |
| "loss": 1.8389, | |
| "step": 30300 | |
| }, | |
| { | |
| "epoch": 3.2030344536929722, | |
| "grad_norm": 0.34255459904670715, | |
| "learning_rate": 0.0001891664078789761, | |
| "loss": 1.8321, | |
| "step": 30400 | |
| }, | |
| { | |
| "epoch": 3.213570751238015, | |
| "grad_norm": 0.36408087611198425, | |
| "learning_rate": 0.0001880573177764962, | |
| "loss": 1.8324, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 3.2241070487830576, | |
| "grad_norm": 0.32933005690574646, | |
| "learning_rate": 0.00018694822767401624, | |
| "loss": 1.8256, | |
| "step": 30600 | |
| }, | |
| { | |
| "epoch": 3.2346433463281, | |
| "grad_norm": 0.37449416518211365, | |
| "learning_rate": 0.0001858391375715363, | |
| "loss": 1.8332, | |
| "step": 30700 | |
| }, | |
| { | |
| "epoch": 3.245179643873143, | |
| "grad_norm": 0.32968634366989136, | |
| "learning_rate": 0.00018473004746905638, | |
| "loss": 1.8247, | |
| "step": 30800 | |
| }, | |
| { | |
| "epoch": 3.2557159414181855, | |
| "grad_norm": 0.3492085635662079, | |
| "learning_rate": 0.00018362095736657646, | |
| "loss": 1.8339, | |
| "step": 30900 | |
| }, | |
| { | |
| "epoch": 3.2662522389632285, | |
| "grad_norm": 0.37141090631484985, | |
| "learning_rate": 0.00018251186726409656, | |
| "loss": 1.8332, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 3.276788536508271, | |
| "grad_norm": 0.3904590308666229, | |
| "learning_rate": 0.0001814027771616166, | |
| "loss": 1.827, | |
| "step": 31100 | |
| }, | |
| { | |
| "epoch": 3.2873248340533134, | |
| "grad_norm": 0.3764263987541199, | |
| "learning_rate": 0.00018029368705913668, | |
| "loss": 1.827, | |
| "step": 31200 | |
| }, | |
| { | |
| "epoch": 3.2978611315983564, | |
| "grad_norm": 0.36718282103538513, | |
| "learning_rate": 0.00017918459695665675, | |
| "loss": 1.828, | |
| "step": 31300 | |
| }, | |
| { | |
| "epoch": 3.308397429143399, | |
| "grad_norm": 0.33118733763694763, | |
| "learning_rate": 0.00017807550685417683, | |
| "loss": 1.8304, | |
| "step": 31400 | |
| }, | |
| { | |
| "epoch": 3.3189337266884418, | |
| "grad_norm": 0.3702305853366852, | |
| "learning_rate": 0.00017696641675169693, | |
| "loss": 1.8313, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 3.3294700242334843, | |
| "grad_norm": 0.3547195792198181, | |
| "learning_rate": 0.000175857326649217, | |
| "loss": 1.8306, | |
| "step": 31600 | |
| }, | |
| { | |
| "epoch": 3.340006321778527, | |
| "grad_norm": 0.3350249230861664, | |
| "learning_rate": 0.00017474823654673705, | |
| "loss": 1.8327, | |
| "step": 31700 | |
| }, | |
| { | |
| "epoch": 3.3505426193235697, | |
| "grad_norm": 0.34737563133239746, | |
| "learning_rate": 0.00017363914644425712, | |
| "loss": 1.8256, | |
| "step": 31800 | |
| }, | |
| { | |
| "epoch": 3.361078916868612, | |
| "grad_norm": 0.3753857910633087, | |
| "learning_rate": 0.00017253005634177722, | |
| "loss": 1.8304, | |
| "step": 31900 | |
| }, | |
| { | |
| "epoch": 3.371615214413655, | |
| "grad_norm": 0.34666532278060913, | |
| "learning_rate": 0.0001714209662392973, | |
| "loss": 1.835, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 3.3821515119586976, | |
| "grad_norm": 0.3317427933216095, | |
| "learning_rate": 0.00017031187613681737, | |
| "loss": 1.8231, | |
| "step": 32100 | |
| }, | |
| { | |
| "epoch": 3.3926878095037405, | |
| "grad_norm": 0.33654922246932983, | |
| "learning_rate": 0.00016920278603433742, | |
| "loss": 1.8272, | |
| "step": 32200 | |
| }, | |
| { | |
| "epoch": 3.403224107048783, | |
| "grad_norm": 0.35222548246383667, | |
| "learning_rate": 0.0001680936959318575, | |
| "loss": 1.8254, | |
| "step": 32300 | |
| }, | |
| { | |
| "epoch": 3.413760404593826, | |
| "grad_norm": 0.3511573374271393, | |
| "learning_rate": 0.0001669846058293776, | |
| "loss": 1.8297, | |
| "step": 32400 | |
| }, | |
| { | |
| "epoch": 3.4242967021388684, | |
| "grad_norm": 0.35278716683387756, | |
| "learning_rate": 0.00016587551572689766, | |
| "loss": 1.8269, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 3.434832999683911, | |
| "grad_norm": 0.3196614682674408, | |
| "learning_rate": 0.00016476642562441774, | |
| "loss": 1.8183, | |
| "step": 32600 | |
| }, | |
| { | |
| "epoch": 3.445369297228954, | |
| "grad_norm": 0.3310936987400055, | |
| "learning_rate": 0.0001636573355219378, | |
| "loss": 1.8234, | |
| "step": 32700 | |
| }, | |
| { | |
| "epoch": 3.4559055947739963, | |
| "grad_norm": 0.35424286127090454, | |
| "learning_rate": 0.00016254824541945786, | |
| "loss": 1.8306, | |
| "step": 32800 | |
| }, | |
| { | |
| "epoch": 3.466441892319039, | |
| "grad_norm": 0.3745037913322449, | |
| "learning_rate": 0.00016143915531697796, | |
| "loss": 1.8313, | |
| "step": 32900 | |
| }, | |
| { | |
| "epoch": 3.4769781898640817, | |
| "grad_norm": 0.3382411599159241, | |
| "learning_rate": 0.00016033006521449803, | |
| "loss": 1.8225, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 3.4875144874091246, | |
| "grad_norm": 0.33086690306663513, | |
| "learning_rate": 0.0001592209751120181, | |
| "loss": 1.8208, | |
| "step": 33100 | |
| }, | |
| { | |
| "epoch": 3.498050784954167, | |
| "grad_norm": 0.3586762249469757, | |
| "learning_rate": 0.00015811188500953818, | |
| "loss": 1.8255, | |
| "step": 33200 | |
| }, | |
| { | |
| "epoch": 3.5085870824992096, | |
| "grad_norm": 0.3511541187763214, | |
| "learning_rate": 0.00015700279490705825, | |
| "loss": 1.8259, | |
| "step": 33300 | |
| }, | |
| { | |
| "epoch": 3.5191233800442525, | |
| "grad_norm": 0.3497931659221649, | |
| "learning_rate": 0.00015589370480457833, | |
| "loss": 1.8226, | |
| "step": 33400 | |
| }, | |
| { | |
| "epoch": 3.529659677589295, | |
| "grad_norm": 0.35156911611557007, | |
| "learning_rate": 0.0001547846147020984, | |
| "loss": 1.8231, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 3.540195975134338, | |
| "grad_norm": 0.34975793957710266, | |
| "learning_rate": 0.00015367552459961847, | |
| "loss": 1.824, | |
| "step": 33600 | |
| }, | |
| { | |
| "epoch": 3.5507322726793804, | |
| "grad_norm": 0.3560537099838257, | |
| "learning_rate": 0.00015256643449713855, | |
| "loss": 1.8284, | |
| "step": 33700 | |
| }, | |
| { | |
| "epoch": 3.5612685702244233, | |
| "grad_norm": 0.37322962284088135, | |
| "learning_rate": 0.00015145734439465865, | |
| "loss": 1.8229, | |
| "step": 33800 | |
| }, | |
| { | |
| "epoch": 3.571804867769466, | |
| "grad_norm": 0.3404606878757477, | |
| "learning_rate": 0.0001503482542921787, | |
| "loss": 1.8295, | |
| "step": 33900 | |
| }, | |
| { | |
| "epoch": 3.5823411653145083, | |
| "grad_norm": 0.3346281349658966, | |
| "learning_rate": 0.00014923916418969877, | |
| "loss": 1.8221, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 3.5928774628595512, | |
| "grad_norm": 0.3319614827632904, | |
| "learning_rate": 0.00014813007408721884, | |
| "loss": 1.8225, | |
| "step": 34100 | |
| }, | |
| { | |
| "epoch": 3.6034137604045937, | |
| "grad_norm": 0.3317611515522003, | |
| "learning_rate": 0.00014702098398473892, | |
| "loss": 1.8175, | |
| "step": 34200 | |
| }, | |
| { | |
| "epoch": 3.6139500579496366, | |
| "grad_norm": 0.3446439206600189, | |
| "learning_rate": 0.00014591189388225902, | |
| "loss": 1.8283, | |
| "step": 34300 | |
| }, | |
| { | |
| "epoch": 3.624486355494679, | |
| "grad_norm": 0.32466185092926025, | |
| "learning_rate": 0.0001448028037797791, | |
| "loss": 1.8201, | |
| "step": 34400 | |
| }, | |
| { | |
| "epoch": 3.635022653039722, | |
| "grad_norm": 0.3251676559448242, | |
| "learning_rate": 0.00014369371367729914, | |
| "loss": 1.8269, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 3.6455589505847645, | |
| "grad_norm": 0.3591017723083496, | |
| "learning_rate": 0.0001425846235748192, | |
| "loss": 1.8202, | |
| "step": 34600 | |
| }, | |
| { | |
| "epoch": 3.656095248129807, | |
| "grad_norm": 0.34030893445014954, | |
| "learning_rate": 0.00014147553347233928, | |
| "loss": 1.8185, | |
| "step": 34700 | |
| }, | |
| { | |
| "epoch": 3.66663154567485, | |
| "grad_norm": 0.35147637128829956, | |
| "learning_rate": 0.00014036644336985939, | |
| "loss": 1.8252, | |
| "step": 34800 | |
| }, | |
| { | |
| "epoch": 3.6771678432198924, | |
| "grad_norm": 0.3547748327255249, | |
| "learning_rate": 0.00013925735326737946, | |
| "loss": 1.8142, | |
| "step": 34900 | |
| }, | |
| { | |
| "epoch": 3.6877041407649354, | |
| "grad_norm": 0.3361000716686249, | |
| "learning_rate": 0.0001381482631648995, | |
| "loss": 1.8235, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 3.698240438309978, | |
| "grad_norm": 0.3312234580516815, | |
| "learning_rate": 0.00013703917306241958, | |
| "loss": 1.8267, | |
| "step": 35100 | |
| }, | |
| { | |
| "epoch": 3.7087767358550208, | |
| "grad_norm": 0.36078423261642456, | |
| "learning_rate": 0.00013593008295993965, | |
| "loss": 1.8192, | |
| "step": 35200 | |
| }, | |
| { | |
| "epoch": 3.7193130334000633, | |
| "grad_norm": 0.32330262660980225, | |
| "learning_rate": 0.00013482099285745975, | |
| "loss": 1.8228, | |
| "step": 35300 | |
| }, | |
| { | |
| "epoch": 3.7298493309451057, | |
| "grad_norm": 0.34211012721061707, | |
| "learning_rate": 0.00013371190275497983, | |
| "loss": 1.8207, | |
| "step": 35400 | |
| }, | |
| { | |
| "epoch": 3.7403856284901487, | |
| "grad_norm": 0.34478235244750977, | |
| "learning_rate": 0.0001326028126524999, | |
| "loss": 1.8221, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 3.750921926035191, | |
| "grad_norm": 0.3438977301120758, | |
| "learning_rate": 0.00013149372255001995, | |
| "loss": 1.8214, | |
| "step": 35600 | |
| }, | |
| { | |
| "epoch": 3.7614582235802336, | |
| "grad_norm": 0.3275744616985321, | |
| "learning_rate": 0.00013038463244754005, | |
| "loss": 1.8153, | |
| "step": 35700 | |
| }, | |
| { | |
| "epoch": 3.7719945211252766, | |
| "grad_norm": 0.35410231351852417, | |
| "learning_rate": 0.00012927554234506012, | |
| "loss": 1.8144, | |
| "step": 35800 | |
| }, | |
| { | |
| "epoch": 3.7825308186703195, | |
| "grad_norm": 0.3045212924480438, | |
| "learning_rate": 0.0001281664522425802, | |
| "loss": 1.8162, | |
| "step": 35900 | |
| }, | |
| { | |
| "epoch": 3.793067116215362, | |
| "grad_norm": 0.32530274987220764, | |
| "learning_rate": 0.00012705736214010027, | |
| "loss": 1.8212, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 3.8036034137604045, | |
| "grad_norm": 0.35284802317619324, | |
| "learning_rate": 0.00012594827203762032, | |
| "loss": 1.8217, | |
| "step": 36100 | |
| }, | |
| { | |
| "epoch": 3.8141397113054474, | |
| "grad_norm": 0.35002532601356506, | |
| "learning_rate": 0.00012483918193514042, | |
| "loss": 1.8179, | |
| "step": 36200 | |
| }, | |
| { | |
| "epoch": 3.82467600885049, | |
| "grad_norm": 0.33642175793647766, | |
| "learning_rate": 0.0001237300918326605, | |
| "loss": 1.8136, | |
| "step": 36300 | |
| }, | |
| { | |
| "epoch": 3.8352123063955323, | |
| "grad_norm": 0.3203926086425781, | |
| "learning_rate": 0.00012262100173018056, | |
| "loss": 1.8189, | |
| "step": 36400 | |
| }, | |
| { | |
| "epoch": 3.8457486039405753, | |
| "grad_norm": 0.3277607560157776, | |
| "learning_rate": 0.00012151191162770062, | |
| "loss": 1.813, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 3.856284901485618, | |
| "grad_norm": 0.3415702283382416, | |
| "learning_rate": 0.00012040282152522071, | |
| "loss": 1.8157, | |
| "step": 36600 | |
| }, | |
| { | |
| "epoch": 3.8668211990306607, | |
| "grad_norm": 0.33326780796051025, | |
| "learning_rate": 0.00011929373142274079, | |
| "loss": 1.8144, | |
| "step": 36700 | |
| }, | |
| { | |
| "epoch": 3.877357496575703, | |
| "grad_norm": 0.3394588530063629, | |
| "learning_rate": 0.00011818464132026086, | |
| "loss": 1.8069, | |
| "step": 36800 | |
| }, | |
| { | |
| "epoch": 3.887893794120746, | |
| "grad_norm": 0.38374754786491394, | |
| "learning_rate": 0.00011707555121778093, | |
| "loss": 1.8076, | |
| "step": 36900 | |
| }, | |
| { | |
| "epoch": 3.8984300916657886, | |
| "grad_norm": 0.34460264444351196, | |
| "learning_rate": 0.00011596646111530102, | |
| "loss": 1.8129, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 3.908966389210831, | |
| "grad_norm": 0.3361436724662781, | |
| "learning_rate": 0.00011485737101282108, | |
| "loss": 1.8105, | |
| "step": 37100 | |
| }, | |
| { | |
| "epoch": 3.919502686755874, | |
| "grad_norm": 0.35143253207206726, | |
| "learning_rate": 0.00011374828091034115, | |
| "loss": 1.8184, | |
| "step": 37200 | |
| }, | |
| { | |
| "epoch": 3.930038984300917, | |
| "grad_norm": 0.34239351749420166, | |
| "learning_rate": 0.00011263919080786124, | |
| "loss": 1.8061, | |
| "step": 37300 | |
| }, | |
| { | |
| "epoch": 3.9405752818459594, | |
| "grad_norm": 0.3523593246936798, | |
| "learning_rate": 0.0001115301007053813, | |
| "loss": 1.8092, | |
| "step": 37400 | |
| }, | |
| { | |
| "epoch": 3.951111579391002, | |
| "grad_norm": 0.36350205540657043, | |
| "learning_rate": 0.00011042101060290139, | |
| "loss": 1.8094, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 3.961647876936045, | |
| "grad_norm": 0.3419075906276703, | |
| "learning_rate": 0.00010931192050042146, | |
| "loss": 1.8077, | |
| "step": 37600 | |
| }, | |
| { | |
| "epoch": 3.9721841744810873, | |
| "grad_norm": 0.3350605070590973, | |
| "learning_rate": 0.00010820283039794154, | |
| "loss": 1.8115, | |
| "step": 37700 | |
| }, | |
| { | |
| "epoch": 3.9827204720261298, | |
| "grad_norm": 0.33970579504966736, | |
| "learning_rate": 0.00010709374029546161, | |
| "loss": 1.8171, | |
| "step": 37800 | |
| }, | |
| { | |
| "epoch": 3.9932567695711727, | |
| "grad_norm": 0.36339592933654785, | |
| "learning_rate": 0.00010598465019298167, | |
| "loss": 1.8073, | |
| "step": 37900 | |
| }, | |
| { | |
| "epoch": 4.003793067116216, | |
| "grad_norm": 0.33541393280029297, | |
| "learning_rate": 0.00010487556009050176, | |
| "loss": 1.7981, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 4.014329364661258, | |
| "grad_norm": 0.36207860708236694, | |
| "learning_rate": 0.00010376646998802183, | |
| "loss": 1.7909, | |
| "step": 38100 | |
| }, | |
| { | |
| "epoch": 4.024865662206301, | |
| "grad_norm": 0.34258803725242615, | |
| "learning_rate": 0.0001026573798855419, | |
| "loss": 1.7793, | |
| "step": 38200 | |
| }, | |
| { | |
| "epoch": 4.0354019597513435, | |
| "grad_norm": 0.34286418557167053, | |
| "learning_rate": 0.00010154828978306198, | |
| "loss": 1.7894, | |
| "step": 38300 | |
| }, | |
| { | |
| "epoch": 4.0459382572963865, | |
| "grad_norm": 0.3334041237831116, | |
| "learning_rate": 0.00010043919968058205, | |
| "loss": 1.7841, | |
| "step": 38400 | |
| }, | |
| { | |
| "epoch": 4.0564745548414285, | |
| "grad_norm": 0.3277220129966736, | |
| "learning_rate": 9.933010957810213e-05, | |
| "loss": 1.7851, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 4.067010852386471, | |
| "grad_norm": 0.3734584450721741, | |
| "learning_rate": 9.82210194756222e-05, | |
| "loss": 1.7889, | |
| "step": 38600 | |
| }, | |
| { | |
| "epoch": 4.077547149931514, | |
| "grad_norm": 0.3457617461681366, | |
| "learning_rate": 9.711192937314229e-05, | |
| "loss": 1.792, | |
| "step": 38700 | |
| }, | |
| { | |
| "epoch": 4.088083447476556, | |
| "grad_norm": 0.35224205255508423, | |
| "learning_rate": 9.600283927066235e-05, | |
| "loss": 1.7906, | |
| "step": 38800 | |
| }, | |
| { | |
| "epoch": 4.098619745021599, | |
| "grad_norm": 0.3286111652851105, | |
| "learning_rate": 9.489374916818243e-05, | |
| "loss": 1.7812, | |
| "step": 38900 | |
| }, | |
| { | |
| "epoch": 4.109156042566642, | |
| "grad_norm": 0.32292017340660095, | |
| "learning_rate": 9.37846590657025e-05, | |
| "loss": 1.7875, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 4.119692340111685, | |
| "grad_norm": 0.33784738183021545, | |
| "learning_rate": 9.267556896322257e-05, | |
| "loss": 1.785, | |
| "step": 39100 | |
| }, | |
| { | |
| "epoch": 4.130228637656727, | |
| "grad_norm": 0.33517780900001526, | |
| "learning_rate": 9.156647886074265e-05, | |
| "loss": 1.7926, | |
| "step": 39200 | |
| }, | |
| { | |
| "epoch": 4.14076493520177, | |
| "grad_norm": 0.340833842754364, | |
| "learning_rate": 9.045738875826271e-05, | |
| "loss": 1.7875, | |
| "step": 39300 | |
| }, | |
| { | |
| "epoch": 4.151301232746813, | |
| "grad_norm": 0.3653368353843689, | |
| "learning_rate": 8.93482986557828e-05, | |
| "loss": 1.7843, | |
| "step": 39400 | |
| }, | |
| { | |
| "epoch": 4.161837530291855, | |
| "grad_norm": 0.3394693434238434, | |
| "learning_rate": 8.823920855330288e-05, | |
| "loss": 1.7804, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 4.172373827836898, | |
| "grad_norm": 0.3323003947734833, | |
| "learning_rate": 8.713011845082295e-05, | |
| "loss": 1.7848, | |
| "step": 39600 | |
| }, | |
| { | |
| "epoch": 4.182910125381941, | |
| "grad_norm": 0.35341712832450867, | |
| "learning_rate": 8.602102834834302e-05, | |
| "loss": 1.7833, | |
| "step": 39700 | |
| }, | |
| { | |
| "epoch": 4.193446422926984, | |
| "grad_norm": 0.3553250730037689, | |
| "learning_rate": 8.49119382458631e-05, | |
| "loss": 1.7844, | |
| "step": 39800 | |
| }, | |
| { | |
| "epoch": 4.203982720472026, | |
| "grad_norm": 0.3491000831127167, | |
| "learning_rate": 8.380284814338317e-05, | |
| "loss": 1.783, | |
| "step": 39900 | |
| }, | |
| { | |
| "epoch": 4.214519018017069, | |
| "grad_norm": 0.36473289132118225, | |
| "learning_rate": 8.269375804090324e-05, | |
| "loss": 1.7873, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 4.225055315562112, | |
| "grad_norm": 0.3357420563697815, | |
| "learning_rate": 8.158466793842332e-05, | |
| "loss": 1.7909, | |
| "step": 40100 | |
| }, | |
| { | |
| "epoch": 4.235591613107154, | |
| "grad_norm": 0.33982038497924805, | |
| "learning_rate": 8.047557783594339e-05, | |
| "loss": 1.7877, | |
| "step": 40200 | |
| }, | |
| { | |
| "epoch": 4.246127910652197, | |
| "grad_norm": 0.33362457156181335, | |
| "learning_rate": 7.936648773346347e-05, | |
| "loss": 1.7878, | |
| "step": 40300 | |
| }, | |
| { | |
| "epoch": 4.25666420819724, | |
| "grad_norm": 0.33826008439064026, | |
| "learning_rate": 7.825739763098354e-05, | |
| "loss": 1.7949, | |
| "step": 40400 | |
| }, | |
| { | |
| "epoch": 4.267200505742283, | |
| "grad_norm": 0.3940160572528839, | |
| "learning_rate": 7.714830752850361e-05, | |
| "loss": 1.7886, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 4.277736803287325, | |
| "grad_norm": 0.33485040068626404, | |
| "learning_rate": 7.60392174260237e-05, | |
| "loss": 1.7837, | |
| "step": 40600 | |
| }, | |
| { | |
| "epoch": 4.288273100832368, | |
| "grad_norm": 0.3465060591697693, | |
| "learning_rate": 7.493012732354376e-05, | |
| "loss": 1.7794, | |
| "step": 40700 | |
| }, | |
| { | |
| "epoch": 4.2988093983774105, | |
| "grad_norm": 0.3455548584461212, | |
| "learning_rate": 7.382103722106385e-05, | |
| "loss": 1.7877, | |
| "step": 40800 | |
| }, | |
| { | |
| "epoch": 4.3093456959224525, | |
| "grad_norm": 0.33163055777549744, | |
| "learning_rate": 7.271194711858392e-05, | |
| "loss": 1.7861, | |
| "step": 40900 | |
| }, | |
| { | |
| "epoch": 4.3198819934674955, | |
| "grad_norm": 0.34442830085754395, | |
| "learning_rate": 7.160285701610398e-05, | |
| "loss": 1.7861, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 4.330418291012538, | |
| "grad_norm": 0.3633157014846802, | |
| "learning_rate": 7.049376691362407e-05, | |
| "loss": 1.7842, | |
| "step": 41100 | |
| }, | |
| { | |
| "epoch": 4.340954588557581, | |
| "grad_norm": 0.3231643736362457, | |
| "learning_rate": 6.938467681114414e-05, | |
| "loss": 1.7833, | |
| "step": 41200 | |
| }, | |
| { | |
| "epoch": 4.351490886102623, | |
| "grad_norm": 0.36037677526474, | |
| "learning_rate": 6.827558670866422e-05, | |
| "loss": 1.7836, | |
| "step": 41300 | |
| }, | |
| { | |
| "epoch": 4.362027183647666, | |
| "grad_norm": 0.3292723000049591, | |
| "learning_rate": 6.716649660618429e-05, | |
| "loss": 1.7806, | |
| "step": 41400 | |
| }, | |
| { | |
| "epoch": 4.372563481192709, | |
| "grad_norm": 0.37054258584976196, | |
| "learning_rate": 6.605740650370436e-05, | |
| "loss": 1.79, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 4.383099778737751, | |
| "grad_norm": 0.3358231782913208, | |
| "learning_rate": 6.494831640122444e-05, | |
| "loss": 1.7882, | |
| "step": 41600 | |
| }, | |
| { | |
| "epoch": 4.393636076282794, | |
| "grad_norm": 0.3368220031261444, | |
| "learning_rate": 6.383922629874451e-05, | |
| "loss": 1.7812, | |
| "step": 41700 | |
| }, | |
| { | |
| "epoch": 4.404172373827837, | |
| "grad_norm": 0.34333834052085876, | |
| "learning_rate": 6.273013619626458e-05, | |
| "loss": 1.7837, | |
| "step": 41800 | |
| }, | |
| { | |
| "epoch": 4.41470867137288, | |
| "grad_norm": 0.3434154987335205, | |
| "learning_rate": 6.162104609378466e-05, | |
| "loss": 1.7858, | |
| "step": 41900 | |
| }, | |
| { | |
| "epoch": 4.425244968917922, | |
| "grad_norm": 0.35153815150260925, | |
| "learning_rate": 6.051195599130473e-05, | |
| "loss": 1.7759, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 4.435781266462965, | |
| "grad_norm": 0.3414738178253174, | |
| "learning_rate": 5.940286588882481e-05, | |
| "loss": 1.7827, | |
| "step": 42100 | |
| }, | |
| { | |
| "epoch": 4.446317564008008, | |
| "grad_norm": 0.3285759687423706, | |
| "learning_rate": 5.8293775786344886e-05, | |
| "loss": 1.7826, | |
| "step": 42200 | |
| }, | |
| { | |
| "epoch": 4.45685386155305, | |
| "grad_norm": 0.35258546471595764, | |
| "learning_rate": 5.718468568386496e-05, | |
| "loss": 1.7883, | |
| "step": 42300 | |
| }, | |
| { | |
| "epoch": 4.467390159098093, | |
| "grad_norm": 0.33706724643707275, | |
| "learning_rate": 5.607559558138503e-05, | |
| "loss": 1.7786, | |
| "step": 42400 | |
| }, | |
| { | |
| "epoch": 4.477926456643136, | |
| "grad_norm": 0.3357242941856384, | |
| "learning_rate": 5.496650547890511e-05, | |
| "loss": 1.7904, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 4.488462754188178, | |
| "grad_norm": 0.3552809953689575, | |
| "learning_rate": 5.385741537642518e-05, | |
| "loss": 1.7858, | |
| "step": 42600 | |
| }, | |
| { | |
| "epoch": 4.498999051733221, | |
| "grad_norm": 0.3606029450893402, | |
| "learning_rate": 5.2748325273945254e-05, | |
| "loss": 1.7767, | |
| "step": 42700 | |
| }, | |
| { | |
| "epoch": 4.509535349278264, | |
| "grad_norm": 0.3668212592601776, | |
| "learning_rate": 5.163923517146533e-05, | |
| "loss": 1.7841, | |
| "step": 42800 | |
| }, | |
| { | |
| "epoch": 4.520071646823307, | |
| "grad_norm": 0.34113767743110657, | |
| "learning_rate": 5.053014506898541e-05, | |
| "loss": 1.7777, | |
| "step": 42900 | |
| }, | |
| { | |
| "epoch": 4.530607944368349, | |
| "grad_norm": 0.33344870805740356, | |
| "learning_rate": 4.942105496650548e-05, | |
| "loss": 1.7789, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 4.541144241913392, | |
| "grad_norm": 0.34441855549812317, | |
| "learning_rate": 4.8311964864025556e-05, | |
| "loss": 1.786, | |
| "step": 43100 | |
| }, | |
| { | |
| "epoch": 4.5516805394584345, | |
| "grad_norm": 0.3361603617668152, | |
| "learning_rate": 4.720287476154563e-05, | |
| "loss": 1.7835, | |
| "step": 43200 | |
| }, | |
| { | |
| "epoch": 4.5622168370034775, | |
| "grad_norm": 0.3377070426940918, | |
| "learning_rate": 4.60937846590657e-05, | |
| "loss": 1.7842, | |
| "step": 43300 | |
| }, | |
| { | |
| "epoch": 4.5727531345485195, | |
| "grad_norm": 0.3532165288925171, | |
| "learning_rate": 4.4984694556585777e-05, | |
| "loss": 1.7848, | |
| "step": 43400 | |
| }, | |
| { | |
| "epoch": 4.583289432093562, | |
| "grad_norm": 0.35418322682380676, | |
| "learning_rate": 4.387560445410585e-05, | |
| "loss": 1.7854, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 4.593825729638605, | |
| "grad_norm": 0.33272701501846313, | |
| "learning_rate": 4.276651435162593e-05, | |
| "loss": 1.7754, | |
| "step": 43600 | |
| }, | |
| { | |
| "epoch": 4.604362027183647, | |
| "grad_norm": 0.36113685369491577, | |
| "learning_rate": 4.1657424249146004e-05, | |
| "loss": 1.7752, | |
| "step": 43700 | |
| }, | |
| { | |
| "epoch": 4.61489832472869, | |
| "grad_norm": 0.34041377902030945, | |
| "learning_rate": 4.054833414666607e-05, | |
| "loss": 1.774, | |
| "step": 43800 | |
| }, | |
| { | |
| "epoch": 4.625434622273733, | |
| "grad_norm": 0.3422810435295105, | |
| "learning_rate": 3.943924404418615e-05, | |
| "loss": 1.7832, | |
| "step": 43900 | |
| }, | |
| { | |
| "epoch": 4.635970919818776, | |
| "grad_norm": 0.3397616744041443, | |
| "learning_rate": 3.8330153941706225e-05, | |
| "loss": 1.78, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 4.646507217363818, | |
| "grad_norm": 0.3389655649662018, | |
| "learning_rate": 3.72210638392263e-05, | |
| "loss": 1.7771, | |
| "step": 44100 | |
| }, | |
| { | |
| "epoch": 4.657043514908861, | |
| "grad_norm": 0.3590547442436218, | |
| "learning_rate": 3.611197373674637e-05, | |
| "loss": 1.7838, | |
| "step": 44200 | |
| }, | |
| { | |
| "epoch": 4.667579812453904, | |
| "grad_norm": 0.33880913257598877, | |
| "learning_rate": 3.500288363426645e-05, | |
| "loss": 1.7708, | |
| "step": 44300 | |
| }, | |
| { | |
| "epoch": 4.678116109998946, | |
| "grad_norm": 0.3376372456550598, | |
| "learning_rate": 3.389379353178653e-05, | |
| "loss": 1.7767, | |
| "step": 44400 | |
| }, | |
| { | |
| "epoch": 4.688652407543989, | |
| "grad_norm": 0.3335518538951874, | |
| "learning_rate": 3.2784703429306594e-05, | |
| "loss": 1.7784, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 4.699188705089032, | |
| "grad_norm": 0.37929996848106384, | |
| "learning_rate": 3.167561332682667e-05, | |
| "loss": 1.7714, | |
| "step": 44600 | |
| }, | |
| { | |
| "epoch": 4.709725002634074, | |
| "grad_norm": 0.3256159722805023, | |
| "learning_rate": 3.056652322434675e-05, | |
| "loss": 1.7824, | |
| "step": 44700 | |
| }, | |
| { | |
| "epoch": 4.720261300179117, | |
| "grad_norm": 0.34018459916114807, | |
| "learning_rate": 2.9457433121866822e-05, | |
| "loss": 1.7821, | |
| "step": 44800 | |
| }, | |
| { | |
| "epoch": 4.73079759772416, | |
| "grad_norm": 0.3662751317024231, | |
| "learning_rate": 2.8348343019386895e-05, | |
| "loss": 1.7799, | |
| "step": 44900 | |
| }, | |
| { | |
| "epoch": 4.741333895269202, | |
| "grad_norm": 0.32580700516700745, | |
| "learning_rate": 2.723925291690697e-05, | |
| "loss": 1.7801, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 4.751870192814245, | |
| "grad_norm": 0.3326426148414612, | |
| "learning_rate": 2.6130162814427046e-05, | |
| "loss": 1.7824, | |
| "step": 45100 | |
| }, | |
| { | |
| "epoch": 4.762406490359288, | |
| "grad_norm": 0.3480491042137146, | |
| "learning_rate": 2.502107271194712e-05, | |
| "loss": 1.7738, | |
| "step": 45200 | |
| }, | |
| { | |
| "epoch": 4.772942787904331, | |
| "grad_norm": 0.3338908553123474, | |
| "learning_rate": 2.3911982609467194e-05, | |
| "loss": 1.7809, | |
| "step": 45300 | |
| }, | |
| { | |
| "epoch": 4.783479085449373, | |
| "grad_norm": 0.35016825795173645, | |
| "learning_rate": 2.2802892506987267e-05, | |
| "loss": 1.7798, | |
| "step": 45400 | |
| }, | |
| { | |
| "epoch": 4.794015382994416, | |
| "grad_norm": 0.35119980573654175, | |
| "learning_rate": 2.1693802404507344e-05, | |
| "loss": 1.7772, | |
| "step": 45500 | |
| }, | |
| { | |
| "epoch": 4.804551680539459, | |
| "grad_norm": 0.34869563579559326, | |
| "learning_rate": 2.0584712302027415e-05, | |
| "loss": 1.7834, | |
| "step": 45600 | |
| }, | |
| { | |
| "epoch": 4.815087978084501, | |
| "grad_norm": 0.3165900409221649, | |
| "learning_rate": 1.9475622199547492e-05, | |
| "loss": 1.7766, | |
| "step": 45700 | |
| }, | |
| { | |
| "epoch": 4.8256242756295435, | |
| "grad_norm": 0.33901646733283997, | |
| "learning_rate": 1.836653209706757e-05, | |
| "loss": 1.7781, | |
| "step": 45800 | |
| }, | |
| { | |
| "epoch": 4.8361605731745865, | |
| "grad_norm": 0.34397250413894653, | |
| "learning_rate": 1.725744199458764e-05, | |
| "loss": 1.7773, | |
| "step": 45900 | |
| }, | |
| { | |
| "epoch": 4.846696870719629, | |
| "grad_norm": 0.3640625476837158, | |
| "learning_rate": 1.6148351892107716e-05, | |
| "loss": 1.7775, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 4.857233168264671, | |
| "grad_norm": 0.3395892381668091, | |
| "learning_rate": 1.503926178962779e-05, | |
| "loss": 1.7817, | |
| "step": 46100 | |
| }, | |
| { | |
| "epoch": 4.867769465809714, | |
| "grad_norm": 0.3353815972805023, | |
| "learning_rate": 1.3930171687147865e-05, | |
| "loss": 1.7759, | |
| "step": 46200 | |
| }, | |
| { | |
| "epoch": 4.878305763354757, | |
| "grad_norm": 0.34299150109291077, | |
| "learning_rate": 1.2821081584667939e-05, | |
| "loss": 1.779, | |
| "step": 46300 | |
| }, | |
| { | |
| "epoch": 4.888842060899799, | |
| "grad_norm": 0.34803491830825806, | |
| "learning_rate": 1.1711991482188014e-05, | |
| "loss": 1.7787, | |
| "step": 46400 | |
| }, | |
| { | |
| "epoch": 4.899378358444842, | |
| "grad_norm": 0.3452516198158264, | |
| "learning_rate": 1.0602901379708088e-05, | |
| "loss": 1.7822, | |
| "step": 46500 | |
| }, | |
| { | |
| "epoch": 4.909914655989885, | |
| "grad_norm": 0.32334357500076294, | |
| "learning_rate": 9.493811277228162e-06, | |
| "loss": 1.7774, | |
| "step": 46600 | |
| }, | |
| { | |
| "epoch": 4.920450953534928, | |
| "grad_norm": 0.34011390805244446, | |
| "learning_rate": 8.384721174748237e-06, | |
| "loss": 1.7788, | |
| "step": 46700 | |
| }, | |
| { | |
| "epoch": 4.93098725107997, | |
| "grad_norm": 0.3399524688720703, | |
| "learning_rate": 7.2756310722683116e-06, | |
| "loss": 1.778, | |
| "step": 46800 | |
| }, | |
| { | |
| "epoch": 4.941523548625013, | |
| "grad_norm": 0.33615124225616455, | |
| "learning_rate": 6.166540969788386e-06, | |
| "loss": 1.7771, | |
| "step": 46900 | |
| }, | |
| { | |
| "epoch": 4.952059846170056, | |
| "grad_norm": 0.3466767966747284, | |
| "learning_rate": 5.05745086730846e-06, | |
| "loss": 1.7774, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 4.962596143715098, | |
| "grad_norm": 0.33684036135673523, | |
| "learning_rate": 3.948360764828534e-06, | |
| "loss": 1.7735, | |
| "step": 47100 | |
| }, | |
| { | |
| "epoch": 4.973132441260141, | |
| "grad_norm": 0.3275541663169861, | |
| "learning_rate": 2.8392706623486093e-06, | |
| "loss": 1.773, | |
| "step": 47200 | |
| }, | |
| { | |
| "epoch": 4.983668738805184, | |
| "grad_norm": 0.3321060240268707, | |
| "learning_rate": 1.7301805598686838e-06, | |
| "loss": 1.7764, | |
| "step": 47300 | |
| }, | |
| { | |
| "epoch": 4.994205036350227, | |
| "grad_norm": 0.3356621265411377, | |
| "learning_rate": 6.210904573887583e-07, | |
| "loss": 1.7762, | |
| "step": 47400 | |
| } | |
| ], | |
| "logging_steps": 100, | |
| "max_steps": 47455, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 4.0383510041641344e+17, | |
| "train_batch_size": 128, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |