| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.8650519031141869, |
| "eval_steps": 500, |
| "global_step": 1500, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0005767012687427913, |
| "grad_norm": 0.5134636163711548, |
| "learning_rate": 0.0, |
| "loss": 1.6129628419876099, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0011534025374855825, |
| "grad_norm": 0.45678019523620605, |
| "learning_rate": 4e-05, |
| "loss": 1.713558554649353, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.0017301038062283738, |
| "grad_norm": 0.6324027180671692, |
| "learning_rate": 8e-05, |
| "loss": 1.9871511459350586, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.002306805074971165, |
| "grad_norm": 0.5307025909423828, |
| "learning_rate": 0.00012, |
| "loss": 1.6862211227416992, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.0028835063437139563, |
| "grad_norm": 0.616538941860199, |
| "learning_rate": 0.00016, |
| "loss": 2.1033642292022705, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.0034602076124567475, |
| "grad_norm": 0.7627953290939331, |
| "learning_rate": 0.0002, |
| "loss": 2.150984764099121, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.004036908881199538, |
| "grad_norm": 0.8402333855628967, |
| "learning_rate": 0.00019996151625938042, |
| "loss": 2.0197458267211914, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.00461361014994233, |
| "grad_norm": 3.813333034515381, |
| "learning_rate": 0.00019992303251876084, |
| "loss": 2.396656036376953, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.005190311418685121, |
| "grad_norm": 0.9861733913421631, |
| "learning_rate": 0.00019988454877814126, |
| "loss": 2.1392970085144043, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.0057670126874279125, |
| "grad_norm": 0.7931668758392334, |
| "learning_rate": 0.00019984606503752164, |
| "loss": 1.8062304258346558, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.006343713956170703, |
| "grad_norm": 0.8828097581863403, |
| "learning_rate": 0.00019980758129690206, |
| "loss": 1.76358962059021, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.006920415224913495, |
| "grad_norm": 0.7205682396888733, |
| "learning_rate": 0.00019976909755628247, |
| "loss": 1.3197358846664429, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.007497116493656286, |
| "grad_norm": 1.2321408987045288, |
| "learning_rate": 0.0001997306138156629, |
| "loss": 1.7697328329086304, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.008073817762399077, |
| "grad_norm": 0.9804911613464355, |
| "learning_rate": 0.0001996921300750433, |
| "loss": 1.7214155197143555, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.00865051903114187, |
| "grad_norm": 0.9436901807785034, |
| "learning_rate": 0.00019965364633442372, |
| "loss": 1.6395944356918335, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.00922722029988466, |
| "grad_norm": 1.6564269065856934, |
| "learning_rate": 0.00019961516259380414, |
| "loss": 1.8607707023620605, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.00980392156862745, |
| "grad_norm": 1.0676305294036865, |
| "learning_rate": 0.00019957667885318455, |
| "loss": 1.4897263050079346, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.010380622837370242, |
| "grad_norm": 0.9889469146728516, |
| "learning_rate": 0.00019953819511256494, |
| "loss": 1.7445942163467407, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.010957324106113034, |
| "grad_norm": 0.8717456459999084, |
| "learning_rate": 0.00019949971137194535, |
| "loss": 1.4854474067687988, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.011534025374855825, |
| "grad_norm": 1.110196590423584, |
| "learning_rate": 0.00019946122763132577, |
| "loss": 1.32136869430542, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.012110726643598616, |
| "grad_norm": 0.7795314192771912, |
| "learning_rate": 0.00019942274389070618, |
| "loss": 1.7199318408966064, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.012687427912341407, |
| "grad_norm": 0.7504187822341919, |
| "learning_rate": 0.0001993842601500866, |
| "loss": 1.2975201606750488, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.0132641291810842, |
| "grad_norm": 0.8012252449989319, |
| "learning_rate": 0.00019934577640946702, |
| "loss": 1.2630457878112793, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.01384083044982699, |
| "grad_norm": 0.9531145691871643, |
| "learning_rate": 0.00019930729266884743, |
| "loss": 1.6974424123764038, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.01441753171856978, |
| "grad_norm": 1.020970106124878, |
| "learning_rate": 0.00019926880892822785, |
| "loss": 1.294957160949707, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.014994232987312572, |
| "grad_norm": 1.7608129978179932, |
| "learning_rate": 0.00019923032518760823, |
| "loss": 1.801735520362854, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.015570934256055362, |
| "grad_norm": 0.9601960182189941, |
| "learning_rate": 0.00019919184144698865, |
| "loss": 1.4538304805755615, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.016147635524798153, |
| "grad_norm": 0.7025886178016663, |
| "learning_rate": 0.00019915335770636906, |
| "loss": 1.1746238470077515, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.016724336793540944, |
| "grad_norm": 0.8506267666816711, |
| "learning_rate": 0.00019911487396574948, |
| "loss": 1.1891943216323853, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.01730103806228374, |
| "grad_norm": 0.9117224216461182, |
| "learning_rate": 0.0001990763902251299, |
| "loss": 1.4325735569000244, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.01787773933102653, |
| "grad_norm": 0.8756442070007324, |
| "learning_rate": 0.0001990379064845103, |
| "loss": 1.3962581157684326, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.01845444059976932, |
| "grad_norm": 1.0293549299240112, |
| "learning_rate": 0.00019899942274389073, |
| "loss": 1.4936443567276, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.01903114186851211, |
| "grad_norm": 0.8239012360572815, |
| "learning_rate": 0.00019896093900327114, |
| "loss": 1.1294159889221191, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.0196078431372549, |
| "grad_norm": 0.6293753385543823, |
| "learning_rate": 0.00019892245526265153, |
| "loss": 1.219704031944275, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.020184544405997693, |
| "grad_norm": 0.9778785109519958, |
| "learning_rate": 0.00019888397152203194, |
| "loss": 1.3405961990356445, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.020761245674740483, |
| "grad_norm": 0.9916248917579651, |
| "learning_rate": 0.00019884548778141236, |
| "loss": 1.7191007137298584, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.021337946943483274, |
| "grad_norm": 0.9758312106132507, |
| "learning_rate": 0.00019880700404079277, |
| "loss": 1.2949879169464111, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.02191464821222607, |
| "grad_norm": 0.7310605645179749, |
| "learning_rate": 0.0001987685203001732, |
| "loss": 1.282931923866272, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.02249134948096886, |
| "grad_norm": 0.6537899374961853, |
| "learning_rate": 0.0001987300365595536, |
| "loss": 1.4050456285476685, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.02306805074971165, |
| "grad_norm": 0.6727839708328247, |
| "learning_rate": 0.00019869155281893402, |
| "loss": 1.3566672801971436, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.02364475201845444, |
| "grad_norm": 0.6026540994644165, |
| "learning_rate": 0.00019865306907831444, |
| "loss": 1.6914572715759277, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.02422145328719723, |
| "grad_norm": 0.7345203161239624, |
| "learning_rate": 0.00019861458533769482, |
| "loss": 1.3210856914520264, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.024798154555940023, |
| "grad_norm": 1.7062476873397827, |
| "learning_rate": 0.00019857610159707524, |
| "loss": 1.6727783679962158, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.025374855824682813, |
| "grad_norm": 0.7726621627807617, |
| "learning_rate": 0.00019853761785645565, |
| "loss": 1.7425484657287598, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.025951557093425604, |
| "grad_norm": 0.6947644948959351, |
| "learning_rate": 0.00019849913411583607, |
| "loss": 1.0628504753112793, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.0265282583621684, |
| "grad_norm": 0.7833652496337891, |
| "learning_rate": 0.00019846065037521649, |
| "loss": 1.4800021648406982, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.02710495963091119, |
| "grad_norm": 0.8065851926803589, |
| "learning_rate": 0.0001984221666345969, |
| "loss": 1.2809616327285767, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.02768166089965398, |
| "grad_norm": 1.044630527496338, |
| "learning_rate": 0.00019838368289397732, |
| "loss": 1.602962851524353, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.02825836216839677, |
| "grad_norm": 0.5969672203063965, |
| "learning_rate": 0.00019834519915335773, |
| "loss": 1.5166534185409546, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.02883506343713956, |
| "grad_norm": 0.848512589931488, |
| "learning_rate": 0.00019830671541273812, |
| "loss": 1.442568063735962, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.029411764705882353, |
| "grad_norm": 0.5782500505447388, |
| "learning_rate": 0.00019826823167211853, |
| "loss": 1.3492627143859863, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.029988465974625143, |
| "grad_norm": 0.850151777267456, |
| "learning_rate": 0.00019822974793149895, |
| "loss": 1.5313668251037598, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.030565167243367934, |
| "grad_norm": 0.613896906375885, |
| "learning_rate": 0.00019819126419087937, |
| "loss": 1.0709185600280762, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.031141868512110725, |
| "grad_norm": 0.9450347423553467, |
| "learning_rate": 0.00019815278045025978, |
| "loss": 1.5562160015106201, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.031718569780853516, |
| "grad_norm": 0.9424428939819336, |
| "learning_rate": 0.0001981142967096402, |
| "loss": 1.764065146446228, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.03229527104959631, |
| "grad_norm": 0.9744471311569214, |
| "learning_rate": 0.0001980758129690206, |
| "loss": 0.9400297403335571, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.0328719723183391, |
| "grad_norm": 0.7247487902641296, |
| "learning_rate": 0.00019803732922840103, |
| "loss": 1.572107195854187, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.03344867358708189, |
| "grad_norm": 0.6125597357749939, |
| "learning_rate": 0.00019799884548778141, |
| "loss": 1.2189209461212158, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.034025374855824686, |
| "grad_norm": 1.0781699419021606, |
| "learning_rate": 0.00019796036174716183, |
| "loss": 1.3933414220809937, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.03460207612456748, |
| "grad_norm": 0.8329439759254456, |
| "learning_rate": 0.00019792187800654224, |
| "loss": 1.4748475551605225, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.03517877739331027, |
| "grad_norm": 0.7766849398612976, |
| "learning_rate": 0.00019788339426592266, |
| "loss": 1.4775745868682861, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.03575547866205306, |
| "grad_norm": 0.7776947021484375, |
| "learning_rate": 0.00019784491052530308, |
| "loss": 1.4959548711776733, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.03633217993079585, |
| "grad_norm": 0.7114179134368896, |
| "learning_rate": 0.0001978064267846835, |
| "loss": 1.4756664037704468, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.03690888119953864, |
| "grad_norm": 0.675800621509552, |
| "learning_rate": 0.0001977679430440639, |
| "loss": 1.4753670692443848, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.03748558246828143, |
| "grad_norm": 1.5709729194641113, |
| "learning_rate": 0.00019772945930344432, |
| "loss": 1.5947999954223633, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.03806228373702422, |
| "grad_norm": 0.7363697290420532, |
| "learning_rate": 0.0001976909755628247, |
| "loss": 1.2786856889724731, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.03863898500576701, |
| "grad_norm": 0.8212243318557739, |
| "learning_rate": 0.00019765249182220512, |
| "loss": 1.3553478717803955, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.0392156862745098, |
| "grad_norm": 0.6724039912223816, |
| "learning_rate": 0.00019761400808158554, |
| "loss": 1.3045082092285156, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.039792387543252594, |
| "grad_norm": 1.0372695922851562, |
| "learning_rate": 0.00019757552434096596, |
| "loss": 1.5149048566818237, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.040369088811995385, |
| "grad_norm": 0.7058703303337097, |
| "learning_rate": 0.00019753704060034637, |
| "loss": 1.2227076292037964, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.040945790080738176, |
| "grad_norm": 0.8637105226516724, |
| "learning_rate": 0.00019749855685972679, |
| "loss": 1.0762852430343628, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.04152249134948097, |
| "grad_norm": 0.8108904957771301, |
| "learning_rate": 0.0001974600731191072, |
| "loss": 1.4130628108978271, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.04209919261822376, |
| "grad_norm": 1.2491207122802734, |
| "learning_rate": 0.00019742158937848762, |
| "loss": 1.7983347177505493, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.04267589388696655, |
| "grad_norm": 1.1523128747940063, |
| "learning_rate": 0.000197383105637868, |
| "loss": 1.5859603881835938, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.04325259515570934, |
| "grad_norm": 0.7240892648696899, |
| "learning_rate": 0.00019734462189724842, |
| "loss": 1.4029178619384766, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.04382929642445214, |
| "grad_norm": 0.7445366978645325, |
| "learning_rate": 0.00019730613815662884, |
| "loss": 1.351811170578003, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.04440599769319493, |
| "grad_norm": 0.9881113767623901, |
| "learning_rate": 0.00019726765441600925, |
| "loss": 1.437370777130127, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.04498269896193772, |
| "grad_norm": 1.0404249429702759, |
| "learning_rate": 0.00019722917067538967, |
| "loss": 1.0401325225830078, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.04555940023068051, |
| "grad_norm": 0.998892605304718, |
| "learning_rate": 0.00019719068693477008, |
| "loss": 1.2733221054077148, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.0461361014994233, |
| "grad_norm": 1.0299255847930908, |
| "learning_rate": 0.0001971522031941505, |
| "loss": 1.8878190517425537, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.04671280276816609, |
| "grad_norm": 0.6168495416641235, |
| "learning_rate": 0.0001971137194535309, |
| "loss": 1.3375468254089355, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.04728950403690888, |
| "grad_norm": 0.645830512046814, |
| "learning_rate": 0.0001970752357129113, |
| "loss": 0.986657440662384, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.04786620530565167, |
| "grad_norm": 0.7971145510673523, |
| "learning_rate": 0.00019703675197229172, |
| "loss": 1.3205912113189697, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.04844290657439446, |
| "grad_norm": 0.6297418475151062, |
| "learning_rate": 0.00019699826823167213, |
| "loss": 1.3360888957977295, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.049019607843137254, |
| "grad_norm": 0.9845420718193054, |
| "learning_rate": 0.00019695978449105255, |
| "loss": 1.4006659984588623, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.049596309111880045, |
| "grad_norm": 0.73700350522995, |
| "learning_rate": 0.00019692130075043296, |
| "loss": 1.1298922300338745, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.050173010380622836, |
| "grad_norm": 0.7659608721733093, |
| "learning_rate": 0.00019688281700981338, |
| "loss": 1.2487225532531738, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.05074971164936563, |
| "grad_norm": 0.7576966285705566, |
| "learning_rate": 0.0001968443332691938, |
| "loss": 1.346827507019043, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.05132641291810842, |
| "grad_norm": 0.6777650117874146, |
| "learning_rate": 0.0001968058495285742, |
| "loss": 1.9484481811523438, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.05190311418685121, |
| "grad_norm": 0.9935969114303589, |
| "learning_rate": 0.0001967673657879546, |
| "loss": 1.1737089157104492, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.052479815455594, |
| "grad_norm": 1.0581051111221313, |
| "learning_rate": 0.000196728882047335, |
| "loss": 1.2755905389785767, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.0530565167243368, |
| "grad_norm": 0.8372200131416321, |
| "learning_rate": 0.00019669039830671543, |
| "loss": 1.7988427877426147, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.05363321799307959, |
| "grad_norm": 0.8300452828407288, |
| "learning_rate": 0.00019665191456609584, |
| "loss": 0.9904743432998657, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.05420991926182238, |
| "grad_norm": 0.6703553199768066, |
| "learning_rate": 0.00019661343082547626, |
| "loss": 1.2092053890228271, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.05478662053056517, |
| "grad_norm": 0.703804075717926, |
| "learning_rate": 0.00019657494708485667, |
| "loss": 1.1028215885162354, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.05536332179930796, |
| "grad_norm": 0.8232657313346863, |
| "learning_rate": 0.0001965364633442371, |
| "loss": 1.3875727653503418, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.05594002306805075, |
| "grad_norm": 0.6119164824485779, |
| "learning_rate": 0.00019649797960361747, |
| "loss": 1.161183476448059, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.05651672433679354, |
| "grad_norm": 0.7460926175117493, |
| "learning_rate": 0.0001964594958629979, |
| "loss": 1.3667285442352295, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.05709342560553633, |
| "grad_norm": 0.6345133185386658, |
| "learning_rate": 0.0001964210121223783, |
| "loss": 1.1740115880966187, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.05767012687427912, |
| "grad_norm": 0.800463080406189, |
| "learning_rate": 0.00019638252838175872, |
| "loss": 1.1274670362472534, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.058246828143021914, |
| "grad_norm": 0.6817663311958313, |
| "learning_rate": 0.00019634404464113914, |
| "loss": 1.2432150840759277, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.058823529411764705, |
| "grad_norm": 0.7663673162460327, |
| "learning_rate": 0.00019630556090051955, |
| "loss": 1.2066948413848877, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.059400230680507496, |
| "grad_norm": 1.0259535312652588, |
| "learning_rate": 0.00019626707715989997, |
| "loss": 1.3713116645812988, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.05997693194925029, |
| "grad_norm": 0.6617158055305481, |
| "learning_rate": 0.00019622859341928038, |
| "loss": 1.0320123434066772, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.06055363321799308, |
| "grad_norm": 1.0050235986709595, |
| "learning_rate": 0.00019619010967866077, |
| "loss": 1.5375267267227173, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.06113033448673587, |
| "grad_norm": 0.5563177466392517, |
| "learning_rate": 0.00019615162593804119, |
| "loss": 0.9102802276611328, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.06170703575547866, |
| "grad_norm": 0.9994164109230042, |
| "learning_rate": 0.0001961131421974216, |
| "loss": 1.6505589485168457, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.06228373702422145, |
| "grad_norm": 0.907625675201416, |
| "learning_rate": 0.00019607465845680202, |
| "loss": 1.6013598442077637, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.06286043829296424, |
| "grad_norm": 1.0009554624557495, |
| "learning_rate": 0.00019603617471618243, |
| "loss": 1.0403454303741455, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.06343713956170703, |
| "grad_norm": 0.8243467807769775, |
| "learning_rate": 0.00019599769097556285, |
| "loss": 1.5382654666900635, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.06401384083044982, |
| "grad_norm": 1.0160003900527954, |
| "learning_rate": 0.00019595920723494326, |
| "loss": 1.2732863426208496, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.06459054209919261, |
| "grad_norm": 0.608269453048706, |
| "learning_rate": 0.00019592072349432368, |
| "loss": 1.070478916168213, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.0651672433679354, |
| "grad_norm": 0.7176778316497803, |
| "learning_rate": 0.00019588223975370406, |
| "loss": 1.302718162536621, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.0657439446366782, |
| "grad_norm": 0.551771879196167, |
| "learning_rate": 0.00019584375601308448, |
| "loss": 0.9242706894874573, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.06632064590542099, |
| "grad_norm": 0.9680222868919373, |
| "learning_rate": 0.0001958052722724649, |
| "loss": 1.9658548831939697, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.06689734717416378, |
| "grad_norm": 0.8025707602500916, |
| "learning_rate": 0.0001957667885318453, |
| "loss": 1.5753577947616577, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.06747404844290658, |
| "grad_norm": 0.7211287021636963, |
| "learning_rate": 0.00019572830479122573, |
| "loss": 1.3677327632904053, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.06805074971164937, |
| "grad_norm": 0.7547542452812195, |
| "learning_rate": 0.00019568982105060614, |
| "loss": 1.507096767425537, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.06862745098039216, |
| "grad_norm": 0.6146650314331055, |
| "learning_rate": 0.00019565133730998656, |
| "loss": 1.1320711374282837, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.06920415224913495, |
| "grad_norm": 0.7611070275306702, |
| "learning_rate": 0.00019561285356936697, |
| "loss": 1.207049012184143, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.06978085351787774, |
| "grad_norm": 0.714883029460907, |
| "learning_rate": 0.00019557436982874736, |
| "loss": 1.3823729753494263, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.07035755478662054, |
| "grad_norm": 0.6768732666969299, |
| "learning_rate": 0.00019553588608812778, |
| "loss": 1.3038188219070435, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.07093425605536333, |
| "grad_norm": 0.6013675332069397, |
| "learning_rate": 0.0001954974023475082, |
| "loss": 1.056199073791504, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.07151095732410612, |
| "grad_norm": 0.8240784406661987, |
| "learning_rate": 0.0001954589186068886, |
| "loss": 1.4242757558822632, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.07208765859284891, |
| "grad_norm": 0.6539785265922546, |
| "learning_rate": 0.00019542043486626902, |
| "loss": 1.161075472831726, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.0726643598615917, |
| "grad_norm": 0.6347744464874268, |
| "learning_rate": 0.00019538195112564944, |
| "loss": 1.179503321647644, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.07324106113033449, |
| "grad_norm": 0.7294688820838928, |
| "learning_rate": 0.00019534346738502985, |
| "loss": 1.2521535158157349, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.07381776239907728, |
| "grad_norm": 0.6087843179702759, |
| "learning_rate": 0.00019530498364441027, |
| "loss": 1.0938013792037964, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.07439446366782007, |
| "grad_norm": 1.116716980934143, |
| "learning_rate": 0.00019526649990379066, |
| "loss": 1.74098539352417, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.07497116493656286, |
| "grad_norm": 0.7590331435203552, |
| "learning_rate": 0.00019522801616317107, |
| "loss": 1.2943538427352905, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.07554786620530565, |
| "grad_norm": 0.9142744541168213, |
| "learning_rate": 0.00019518953242255149, |
| "loss": 1.0948201417922974, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.07612456747404844, |
| "grad_norm": 0.8165064454078674, |
| "learning_rate": 0.0001951510486819319, |
| "loss": 1.5152888298034668, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.07670126874279123, |
| "grad_norm": 0.8904751539230347, |
| "learning_rate": 0.00019511256494131232, |
| "loss": 1.3492425680160522, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.07727797001153403, |
| "grad_norm": 0.632338285446167, |
| "learning_rate": 0.00019507408120069273, |
| "loss": 1.1460604667663574, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.07785467128027682, |
| "grad_norm": 0.6621445417404175, |
| "learning_rate": 0.00019503559746007315, |
| "loss": 1.153398871421814, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.0784313725490196, |
| "grad_norm": 0.928593635559082, |
| "learning_rate": 0.00019499711371945356, |
| "loss": 1.4575080871582031, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.0790080738177624, |
| "grad_norm": 0.9125704765319824, |
| "learning_rate": 0.00019495862997883395, |
| "loss": 1.176555871963501, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.07958477508650519, |
| "grad_norm": 0.7735126614570618, |
| "learning_rate": 0.00019492014623821437, |
| "loss": 1.3028615713119507, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.08016147635524798, |
| "grad_norm": 1.4182281494140625, |
| "learning_rate": 0.00019488166249759478, |
| "loss": 1.7123095989227295, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.08073817762399077, |
| "grad_norm": 0.957777738571167, |
| "learning_rate": 0.0001948431787569752, |
| "loss": 1.2952847480773926, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.08131487889273356, |
| "grad_norm": 0.6284865140914917, |
| "learning_rate": 0.0001948046950163556, |
| "loss": 1.063300609588623, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.08189158016147635, |
| "grad_norm": 1.020240068435669, |
| "learning_rate": 0.00019476621127573603, |
| "loss": 1.0956578254699707, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.08246828143021914, |
| "grad_norm": 0.9629870057106018, |
| "learning_rate": 0.00019472772753511644, |
| "loss": 1.6626744270324707, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.08304498269896193, |
| "grad_norm": 0.723129391670227, |
| "learning_rate": 0.00019468924379449686, |
| "loss": 1.5930454730987549, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.08362168396770472, |
| "grad_norm": 0.6031758785247803, |
| "learning_rate": 0.00019465076005387725, |
| "loss": 1.3550267219543457, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.08419838523644751, |
| "grad_norm": 0.6608120799064636, |
| "learning_rate": 0.00019461227631325766, |
| "loss": 1.091226577758789, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.0847750865051903, |
| "grad_norm": 0.8583825826644897, |
| "learning_rate": 0.00019457379257263808, |
| "loss": 1.2840064764022827, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.0853517877739331, |
| "grad_norm": 0.6371753215789795, |
| "learning_rate": 0.0001945353088320185, |
| "loss": 1.0223405361175537, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.08592848904267589, |
| "grad_norm": 0.6101475954055786, |
| "learning_rate": 0.0001944968250913989, |
| "loss": 1.2935165166854858, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.08650519031141868, |
| "grad_norm": 0.8921840190887451, |
| "learning_rate": 0.00019445834135077932, |
| "loss": 1.3194819688796997, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.08708189158016148, |
| "grad_norm": 1.0423651933670044, |
| "learning_rate": 0.0001944198576101597, |
| "loss": 1.162503957748413, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.08765859284890427, |
| "grad_norm": 0.9011998772621155, |
| "learning_rate": 0.00019438137386954013, |
| "loss": 1.4854192733764648, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.08823529411764706, |
| "grad_norm": 0.6850185990333557, |
| "learning_rate": 0.00019434289012892054, |
| "loss": 1.2653287649154663, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.08881199538638986, |
| "grad_norm": 0.5742697715759277, |
| "learning_rate": 0.00019430440638830093, |
| "loss": 1.1639142036437988, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.08938869665513265, |
| "grad_norm": 0.5625914931297302, |
| "learning_rate": 0.00019426592264768134, |
| "loss": 1.0387107133865356, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.08996539792387544, |
| "grad_norm": 0.7183355689048767, |
| "learning_rate": 0.00019422743890706176, |
| "loss": 1.211965799331665, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.09054209919261823, |
| "grad_norm": 0.8835011124610901, |
| "learning_rate": 0.00019418895516644217, |
| "loss": 1.0958670377731323, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.09111880046136102, |
| "grad_norm": 0.6885069608688354, |
| "learning_rate": 0.0001941504714258226, |
| "loss": 1.297393798828125, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.09169550173010381, |
| "grad_norm": 0.7518923878669739, |
| "learning_rate": 0.000194111987685203, |
| "loss": 1.1739790439605713, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.0922722029988466, |
| "grad_norm": 0.8452180027961731, |
| "learning_rate": 0.00019407350394458342, |
| "loss": 1.2312185764312744, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.09284890426758939, |
| "grad_norm": 0.8018324971199036, |
| "learning_rate": 0.00019403502020396384, |
| "loss": 1.392999291419983, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.09342560553633218, |
| "grad_norm": 0.743302583694458, |
| "learning_rate": 0.00019399653646334422, |
| "loss": 1.1602349281311035, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.09400230680507497, |
| "grad_norm": 0.551163911819458, |
| "learning_rate": 0.00019395805272272464, |
| "loss": 1.0061742067337036, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.09457900807381776, |
| "grad_norm": 0.6732088327407837, |
| "learning_rate": 0.00019391956898210505, |
| "loss": 1.2422168254852295, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.09515570934256055, |
| "grad_norm": 0.6432737708091736, |
| "learning_rate": 0.00019388108524148547, |
| "loss": 0.8992981910705566, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.09573241061130335, |
| "grad_norm": 0.893099308013916, |
| "learning_rate": 0.00019384260150086589, |
| "loss": 1.4426004886627197, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.09630911188004614, |
| "grad_norm": 0.7915064692497253, |
| "learning_rate": 0.0001938041177602463, |
| "loss": 1.1332988739013672, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.09688581314878893, |
| "grad_norm": 0.7785482406616211, |
| "learning_rate": 0.00019376563401962672, |
| "loss": 1.1662797927856445, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.09746251441753172, |
| "grad_norm": 0.7676025032997131, |
| "learning_rate": 0.00019372715027900713, |
| "loss": 1.276615858078003, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.09803921568627451, |
| "grad_norm": 0.7058248519897461, |
| "learning_rate": 0.00019368866653838752, |
| "loss": 1.2280982732772827, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.0986159169550173, |
| "grad_norm": 0.7814574241638184, |
| "learning_rate": 0.00019365018279776793, |
| "loss": 1.6545538902282715, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.09919261822376009, |
| "grad_norm": 0.5429863333702087, |
| "learning_rate": 0.00019361169905714835, |
| "loss": 1.047904133796692, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.09976931949250288, |
| "grad_norm": 0.7021914124488831, |
| "learning_rate": 0.00019357321531652876, |
| "loss": 1.3578035831451416, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.10034602076124567, |
| "grad_norm": 0.7608473896980286, |
| "learning_rate": 0.00019353473157590918, |
| "loss": 1.3332273960113525, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.10092272202998846, |
| "grad_norm": 0.8988219499588013, |
| "learning_rate": 0.0001934962478352896, |
| "loss": 1.5955560207366943, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.10149942329873125, |
| "grad_norm": 0.8784334659576416, |
| "learning_rate": 0.00019345776409467, |
| "loss": 1.4267313480377197, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.10207612456747404, |
| "grad_norm": 0.9006462097167969, |
| "learning_rate": 0.00019341928035405043, |
| "loss": 1.2960124015808105, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.10265282583621683, |
| "grad_norm": 0.7736122608184814, |
| "learning_rate": 0.00019338079661343081, |
| "loss": 1.3841434717178345, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.10322952710495963, |
| "grad_norm": 0.8202458620071411, |
| "learning_rate": 0.00019334231287281123, |
| "loss": 1.2962226867675781, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.10380622837370242, |
| "grad_norm": 0.743390679359436, |
| "learning_rate": 0.00019330382913219164, |
| "loss": 1.010484218597412, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.10438292964244521, |
| "grad_norm": 0.7926476001739502, |
| "learning_rate": 0.00019326534539157206, |
| "loss": 1.45333731174469, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.104959630911188, |
| "grad_norm": 0.527367889881134, |
| "learning_rate": 0.00019322686165095248, |
| "loss": 0.7763160467147827, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.10553633217993079, |
| "grad_norm": 1.0006170272827148, |
| "learning_rate": 0.0001931883779103329, |
| "loss": 1.089290738105774, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.1061130334486736, |
| "grad_norm": 0.7497840523719788, |
| "learning_rate": 0.0001931498941697133, |
| "loss": 1.1641783714294434, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.10668973471741638, |
| "grad_norm": 0.6732814908027649, |
| "learning_rate": 0.00019311141042909372, |
| "loss": 1.0954653024673462, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.10726643598615918, |
| "grad_norm": 0.7817464470863342, |
| "learning_rate": 0.0001930729266884741, |
| "loss": 1.5050190687179565, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.10784313725490197, |
| "grad_norm": 0.813869297504425, |
| "learning_rate": 0.00019303444294785452, |
| "loss": 1.5048751831054688, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.10841983852364476, |
| "grad_norm": 0.6368386745452881, |
| "learning_rate": 0.00019299595920723494, |
| "loss": 1.0601242780685425, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.10899653979238755, |
| "grad_norm": 0.817610502243042, |
| "learning_rate": 0.00019295747546661536, |
| "loss": 1.2267041206359863, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.10957324106113034, |
| "grad_norm": 0.768892228603363, |
| "learning_rate": 0.00019291899172599577, |
| "loss": 1.0935152769088745, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.11014994232987313, |
| "grad_norm": 0.8072124123573303, |
| "learning_rate": 0.00019288050798537619, |
| "loss": 1.5566798448562622, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.11072664359861592, |
| "grad_norm": 0.7275574803352356, |
| "learning_rate": 0.0001928420242447566, |
| "loss": 1.5278323888778687, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.11130334486735871, |
| "grad_norm": 0.6448370814323425, |
| "learning_rate": 0.00019280354050413702, |
| "loss": 1.2096084356307983, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.1118800461361015, |
| "grad_norm": 0.9334590435028076, |
| "learning_rate": 0.0001927650567635174, |
| "loss": 1.2487378120422363, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.11245674740484429, |
| "grad_norm": 0.6830427646636963, |
| "learning_rate": 0.00019272657302289782, |
| "loss": 1.3567012548446655, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.11303344867358708, |
| "grad_norm": 0.9035089612007141, |
| "learning_rate": 0.00019268808928227823, |
| "loss": 1.1751577854156494, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.11361014994232987, |
| "grad_norm": 0.5569579005241394, |
| "learning_rate": 0.00019264960554165865, |
| "loss": 1.0159823894500732, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.11418685121107267, |
| "grad_norm": 0.6232113838195801, |
| "learning_rate": 0.00019261112180103907, |
| "loss": 1.0779603719711304, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.11476355247981546, |
| "grad_norm": 0.7666590213775635, |
| "learning_rate": 0.00019257263806041948, |
| "loss": 1.2052793502807617, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.11534025374855825, |
| "grad_norm": 0.6218665242195129, |
| "learning_rate": 0.0001925341543197999, |
| "loss": 1.2699958086013794, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.11591695501730104, |
| "grad_norm": 0.6059345006942749, |
| "learning_rate": 0.0001924956705791803, |
| "loss": 1.0522977113723755, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.11649365628604383, |
| "grad_norm": 0.6952403783798218, |
| "learning_rate": 0.0001924571868385607, |
| "loss": 1.3461261987686157, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.11707035755478662, |
| "grad_norm": 0.7097076177597046, |
| "learning_rate": 0.00019241870309794111, |
| "loss": 1.0901520252227783, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.11764705882352941, |
| "grad_norm": 1.3426554203033447, |
| "learning_rate": 0.00019238021935732153, |
| "loss": 1.8886399269104004, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.1182237600922722, |
| "grad_norm": 1.00478196144104, |
| "learning_rate": 0.00019234173561670195, |
| "loss": 1.2172045707702637, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.11880046136101499, |
| "grad_norm": 0.8586134314537048, |
| "learning_rate": 0.00019230325187608236, |
| "loss": 1.0469045639038086, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.11937716262975778, |
| "grad_norm": 0.7872591018676758, |
| "learning_rate": 0.00019226476813546278, |
| "loss": 1.1137733459472656, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.11995386389850057, |
| "grad_norm": 0.8721824884414673, |
| "learning_rate": 0.0001922262843948432, |
| "loss": 1.3743940591812134, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.12053056516724336, |
| "grad_norm": 0.6212759613990784, |
| "learning_rate": 0.0001921878006542236, |
| "loss": 0.900457501411438, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.12110726643598616, |
| "grad_norm": 1.0083750486373901, |
| "learning_rate": 0.000192149316913604, |
| "loss": 1.339089035987854, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.12168396770472895, |
| "grad_norm": 0.794417142868042, |
| "learning_rate": 0.0001921108331729844, |
| "loss": 1.194704532623291, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.12226066897347174, |
| "grad_norm": 1.1438184976577759, |
| "learning_rate": 0.00019207234943236483, |
| "loss": 1.3168675899505615, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.12283737024221453, |
| "grad_norm": 0.5655554533004761, |
| "learning_rate": 0.00019203386569174524, |
| "loss": 1.008853793144226, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.12341407151095732, |
| "grad_norm": 0.7868179082870483, |
| "learning_rate": 0.00019199538195112566, |
| "loss": 1.3174118995666504, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.12399077277970011, |
| "grad_norm": 0.6736404299736023, |
| "learning_rate": 0.00019195689821050607, |
| "loss": 1.054055094718933, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.1245674740484429, |
| "grad_norm": 0.7425172328948975, |
| "learning_rate": 0.00019191841446988649, |
| "loss": 1.2892072200775146, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.1251441753171857, |
| "grad_norm": 0.7724793553352356, |
| "learning_rate": 0.00019187993072926687, |
| "loss": 1.3278907537460327, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.12572087658592848, |
| "grad_norm": 0.7415600419044495, |
| "learning_rate": 0.0001918414469886473, |
| "loss": 1.1893579959869385, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.12629757785467127, |
| "grad_norm": 0.8178536295890808, |
| "learning_rate": 0.0001918029632480277, |
| "loss": 1.3486452102661133, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.12687427912341406, |
| "grad_norm": 0.803683340549469, |
| "learning_rate": 0.00019176447950740812, |
| "loss": 1.297539234161377, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.12745098039215685, |
| "grad_norm": 0.6226982474327087, |
| "learning_rate": 0.00019172599576678854, |
| "loss": 1.0952314138412476, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.12802768166089964, |
| "grad_norm": 0.652317225933075, |
| "learning_rate": 0.00019168751202616895, |
| "loss": 0.9360387325286865, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.12860438292964244, |
| "grad_norm": 0.8147749900817871, |
| "learning_rate": 0.00019164902828554937, |
| "loss": 1.0632787942886353, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.12918108419838523, |
| "grad_norm": 0.9202223420143127, |
| "learning_rate": 0.00019161054454492978, |
| "loss": 1.3678290843963623, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.12975778546712802, |
| "grad_norm": 1.1951165199279785, |
| "learning_rate": 0.00019157206080431017, |
| "loss": 1.2670767307281494, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.1303344867358708, |
| "grad_norm": 0.7266793847084045, |
| "learning_rate": 0.00019153357706369058, |
| "loss": 1.1158084869384766, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.1309111880046136, |
| "grad_norm": 0.6181395649909973, |
| "learning_rate": 0.000191495093323071, |
| "loss": 1.1156044006347656, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.1314878892733564, |
| "grad_norm": 0.7921776175498962, |
| "learning_rate": 0.00019145660958245142, |
| "loss": 1.001752257347107, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.13206459054209918, |
| "grad_norm": 0.5998401045799255, |
| "learning_rate": 0.00019141812584183183, |
| "loss": 0.7688826322555542, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.13264129181084197, |
| "grad_norm": 0.7660285234451294, |
| "learning_rate": 0.00019137964210121225, |
| "loss": 1.2462745904922485, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.13321799307958476, |
| "grad_norm": 0.7925796508789062, |
| "learning_rate": 0.00019134115836059266, |
| "loss": 1.1053651571273804, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.13379469434832755, |
| "grad_norm": 0.6407649517059326, |
| "learning_rate": 0.00019130267461997308, |
| "loss": 0.8710946440696716, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.13437139561707034, |
| "grad_norm": 0.7516645789146423, |
| "learning_rate": 0.00019126419087935346, |
| "loss": 1.009436011314392, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.13494809688581316, |
| "grad_norm": 0.5998948216438293, |
| "learning_rate": 0.00019122570713873388, |
| "loss": 1.0309457778930664, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.13552479815455595, |
| "grad_norm": 1.1897567510604858, |
| "learning_rate": 0.0001911872233981143, |
| "loss": 0.9930981397628784, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.13610149942329874, |
| "grad_norm": 0.7404462695121765, |
| "learning_rate": 0.0001911487396574947, |
| "loss": 1.1489670276641846, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.13667820069204153, |
| "grad_norm": 0.7168471813201904, |
| "learning_rate": 0.00019111025591687513, |
| "loss": 1.202157735824585, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.13725490196078433, |
| "grad_norm": 0.7502639293670654, |
| "learning_rate": 0.00019107177217625554, |
| "loss": 1.022951364517212, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.13783160322952712, |
| "grad_norm": 0.6795151233673096, |
| "learning_rate": 0.00019103328843563596, |
| "loss": 1.1194236278533936, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.1384083044982699, |
| "grad_norm": 0.7620200514793396, |
| "learning_rate": 0.00019099480469501637, |
| "loss": 0.8411365747451782, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.1389850057670127, |
| "grad_norm": 0.6618032455444336, |
| "learning_rate": 0.00019095632095439676, |
| "loss": 0.7801553606987, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.1395617070357555, |
| "grad_norm": 0.9366044402122498, |
| "learning_rate": 0.00019091783721377718, |
| "loss": 1.0621672868728638, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.14013840830449828, |
| "grad_norm": 1.0874788761138916, |
| "learning_rate": 0.0001908793534731576, |
| "loss": 1.6787068843841553, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.14071510957324107, |
| "grad_norm": 0.8962084054946899, |
| "learning_rate": 0.000190840869732538, |
| "loss": 1.1922732591629028, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.14129181084198386, |
| "grad_norm": 0.7039315700531006, |
| "learning_rate": 0.00019080238599191842, |
| "loss": 1.177897334098816, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.14186851211072665, |
| "grad_norm": 0.9172819256782532, |
| "learning_rate": 0.00019076390225129884, |
| "loss": 1.3276829719543457, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.14244521337946944, |
| "grad_norm": 1.002533197402954, |
| "learning_rate": 0.00019072541851067925, |
| "loss": 1.11848783493042, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.14302191464821223, |
| "grad_norm": 0.9164738059043884, |
| "learning_rate": 0.00019068693477005967, |
| "loss": 0.7153259515762329, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.14359861591695502, |
| "grad_norm": 0.7163867354393005, |
| "learning_rate": 0.00019064845102944006, |
| "loss": 1.206921100616455, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.14417531718569782, |
| "grad_norm": 0.8200199604034424, |
| "learning_rate": 0.00019060996728882047, |
| "loss": 0.9798004031181335, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.1447520184544406, |
| "grad_norm": 0.9806034564971924, |
| "learning_rate": 0.00019057148354820089, |
| "loss": 1.0969898700714111, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.1453287197231834, |
| "grad_norm": 1.0849624872207642, |
| "learning_rate": 0.0001905329998075813, |
| "loss": 1.2618253231048584, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.1459054209919262, |
| "grad_norm": 0.8736698031425476, |
| "learning_rate": 0.00019049451606696172, |
| "loss": 1.1534979343414307, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.14648212226066898, |
| "grad_norm": 0.6748337745666504, |
| "learning_rate": 0.00019045603232634213, |
| "loss": 0.9178370237350464, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.14705882352941177, |
| "grad_norm": 0.8655548691749573, |
| "learning_rate": 0.00019041754858572255, |
| "loss": 1.157179355621338, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.14763552479815456, |
| "grad_norm": 0.7558174133300781, |
| "learning_rate": 0.00019037906484510296, |
| "loss": 0.7844438552856445, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.14821222606689735, |
| "grad_norm": 0.8278117179870605, |
| "learning_rate": 0.00019034058110448335, |
| "loss": 1.4085724353790283, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.14878892733564014, |
| "grad_norm": 0.9563509225845337, |
| "learning_rate": 0.00019030209736386377, |
| "loss": 1.244802713394165, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.14936562860438293, |
| "grad_norm": 0.8018333315849304, |
| "learning_rate": 0.00019026361362324418, |
| "loss": 0.801522970199585, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.14994232987312572, |
| "grad_norm": 0.555248498916626, |
| "learning_rate": 0.0001902251298826246, |
| "loss": 0.8989696502685547, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.15051903114186851, |
| "grad_norm": 0.5092940926551819, |
| "learning_rate": 0.000190186646142005, |
| "loss": 0.8229849338531494, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.1510957324106113, |
| "grad_norm": 0.614162266254425, |
| "learning_rate": 0.00019014816240138543, |
| "loss": 1.14143705368042, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.1516724336793541, |
| "grad_norm": 0.7050411701202393, |
| "learning_rate": 0.00019010967866076584, |
| "loss": 1.2602849006652832, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.1522491349480969, |
| "grad_norm": 0.8917875289916992, |
| "learning_rate": 0.00019007119492014626, |
| "loss": 1.2684617042541504, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.15282583621683968, |
| "grad_norm": 0.7177139520645142, |
| "learning_rate": 0.00019003271117952665, |
| "loss": 0.664681077003479, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.15340253748558247, |
| "grad_norm": 0.7513463497161865, |
| "learning_rate": 0.00018999422743890706, |
| "loss": 0.9689874649047852, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.15397923875432526, |
| "grad_norm": 0.8350100517272949, |
| "learning_rate": 0.00018995574369828748, |
| "loss": 1.222740888595581, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.15455594002306805, |
| "grad_norm": 1.152787685394287, |
| "learning_rate": 0.0001899172599576679, |
| "loss": 1.0707926750183105, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.15513264129181084, |
| "grad_norm": 0.7810789346694946, |
| "learning_rate": 0.0001898787762170483, |
| "loss": 1.1552890539169312, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.15570934256055363, |
| "grad_norm": 0.864863395690918, |
| "learning_rate": 0.00018984029247642872, |
| "loss": 1.2455859184265137, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.15628604382929642, |
| "grad_norm": 0.578794002532959, |
| "learning_rate": 0.00018980180873580914, |
| "loss": 0.9284070730209351, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.1568627450980392, |
| "grad_norm": 0.9245108962059021, |
| "learning_rate": 0.00018976332499518955, |
| "loss": 0.8936307430267334, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.157439446366782, |
| "grad_norm": 1.022964358329773, |
| "learning_rate": 0.00018972484125456994, |
| "loss": 1.2052812576293945, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.1580161476355248, |
| "grad_norm": 0.6136555075645447, |
| "learning_rate": 0.00018968635751395036, |
| "loss": 0.9395220875740051, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.15859284890426759, |
| "grad_norm": 0.49354949593544006, |
| "learning_rate": 0.00018964787377333077, |
| "loss": 0.7979940176010132, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.15916955017301038, |
| "grad_norm": 0.8118260502815247, |
| "learning_rate": 0.00018960939003271119, |
| "loss": 1.3310189247131348, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.15974625144175317, |
| "grad_norm": 0.7864040732383728, |
| "learning_rate": 0.0001895709062920916, |
| "loss": 0.995107889175415, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.16032295271049596, |
| "grad_norm": 0.7795019149780273, |
| "learning_rate": 0.00018953242255147202, |
| "loss": 1.031097412109375, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.16089965397923875, |
| "grad_norm": 0.7358199954032898, |
| "learning_rate": 0.00018949393881085243, |
| "loss": 1.2151832580566406, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.16147635524798154, |
| "grad_norm": 0.592187225818634, |
| "learning_rate": 0.00018945545507023285, |
| "loss": 1.18082857131958, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.16205305651672433, |
| "grad_norm": 0.6349275708198547, |
| "learning_rate": 0.00018941697132961324, |
| "loss": 1.0011241436004639, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.16262975778546712, |
| "grad_norm": 0.827673614025116, |
| "learning_rate": 0.00018937848758899365, |
| "loss": 1.1634137630462646, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.1632064590542099, |
| "grad_norm": 0.7459465861320496, |
| "learning_rate": 0.00018934000384837407, |
| "loss": 1.2054771184921265, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.1637831603229527, |
| "grad_norm": 0.8688679337501526, |
| "learning_rate": 0.00018930152010775448, |
| "loss": 1.5523681640625, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.1643598615916955, |
| "grad_norm": 0.5501953959465027, |
| "learning_rate": 0.0001892630363671349, |
| "loss": 0.8807846903800964, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.16493656286043828, |
| "grad_norm": 0.9370623230934143, |
| "learning_rate": 0.0001892245526265153, |
| "loss": 1.480832815170288, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.16551326412918108, |
| "grad_norm": 0.824664831161499, |
| "learning_rate": 0.00018918606888589573, |
| "loss": 1.1490377187728882, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.16608996539792387, |
| "grad_norm": 0.6960827708244324, |
| "learning_rate": 0.00018914758514527614, |
| "loss": 0.9883493185043335, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.16666666666666666, |
| "grad_norm": 0.5384089946746826, |
| "learning_rate": 0.00018910910140465653, |
| "loss": 0.9772455096244812, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.16724336793540945, |
| "grad_norm": 0.5826528072357178, |
| "learning_rate": 0.00018907061766403695, |
| "loss": 0.80659019947052, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.16782006920415224, |
| "grad_norm": 0.8662609457969666, |
| "learning_rate": 0.00018903213392341736, |
| "loss": 1.438920497894287, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.16839677047289503, |
| "grad_norm": 0.8694437742233276, |
| "learning_rate": 0.00018899365018279778, |
| "loss": 1.594082236289978, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.16897347174163782, |
| "grad_norm": 0.9895355701446533, |
| "learning_rate": 0.0001889551664421782, |
| "loss": 1.1623947620391846, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.1695501730103806, |
| "grad_norm": 0.7757118940353394, |
| "learning_rate": 0.0001889166827015586, |
| "loss": 1.2969348430633545, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.1701268742791234, |
| "grad_norm": 1.1235777139663696, |
| "learning_rate": 0.00018887819896093902, |
| "loss": 1.5447598695755005, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.1707035755478662, |
| "grad_norm": 0.5995392799377441, |
| "learning_rate": 0.00018883971522031944, |
| "loss": 1.1860620975494385, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.17128027681660898, |
| "grad_norm": 0.7350177764892578, |
| "learning_rate": 0.00018880123147969983, |
| "loss": 1.1964070796966553, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.17185697808535177, |
| "grad_norm": 0.7769676446914673, |
| "learning_rate": 0.00018876274773908024, |
| "loss": 0.9732775688171387, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.17243367935409457, |
| "grad_norm": 1.0317054986953735, |
| "learning_rate": 0.00018872426399846066, |
| "loss": 1.1931625604629517, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.17301038062283736, |
| "grad_norm": 0.855571985244751, |
| "learning_rate": 0.00018868578025784107, |
| "loss": 1.2726032733917236, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.17358708189158017, |
| "grad_norm": 1.0038337707519531, |
| "learning_rate": 0.0001886472965172215, |
| "loss": 1.3021737337112427, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.17416378316032297, |
| "grad_norm": 1.05097496509552, |
| "learning_rate": 0.0001886088127766019, |
| "loss": 1.6369917392730713, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.17474048442906576, |
| "grad_norm": 0.6620575189590454, |
| "learning_rate": 0.00018857032903598232, |
| "loss": 1.0873693227767944, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.17531718569780855, |
| "grad_norm": 0.8430469036102295, |
| "learning_rate": 0.0001885318452953627, |
| "loss": 1.1750123500823975, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.17589388696655134, |
| "grad_norm": 0.8181238174438477, |
| "learning_rate": 0.00018849336155474312, |
| "loss": 1.3522461652755737, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.17647058823529413, |
| "grad_norm": 0.6994307041168213, |
| "learning_rate": 0.00018845487781412354, |
| "loss": 1.327797293663025, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.17704728950403692, |
| "grad_norm": 0.7090145349502563, |
| "learning_rate": 0.00018841639407350395, |
| "loss": 1.3075491189956665, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.1776239907727797, |
| "grad_norm": 0.7612029314041138, |
| "learning_rate": 0.00018837791033288437, |
| "loss": 1.0585792064666748, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.1782006920415225, |
| "grad_norm": 0.8592241406440735, |
| "learning_rate": 0.00018833942659226478, |
| "loss": 0.6441008448600769, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.1787773933102653, |
| "grad_norm": 1.0303255319595337, |
| "learning_rate": 0.0001883009428516452, |
| "loss": 1.520599365234375, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.17935409457900808, |
| "grad_norm": 0.80874103307724, |
| "learning_rate": 0.0001882624591110256, |
| "loss": 0.902335524559021, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.17993079584775087, |
| "grad_norm": 0.7039778232574463, |
| "learning_rate": 0.000188223975370406, |
| "loss": 1.0226070880889893, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.18050749711649366, |
| "grad_norm": 0.7102690935134888, |
| "learning_rate": 0.00018818549162978642, |
| "loss": 1.0590555667877197, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.18108419838523646, |
| "grad_norm": 1.0405141115188599, |
| "learning_rate": 0.00018814700788916683, |
| "loss": 1.4237335920333862, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.18166089965397925, |
| "grad_norm": 0.6633170247077942, |
| "learning_rate": 0.00018810852414854725, |
| "loss": 0.9277420043945312, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.18223760092272204, |
| "grad_norm": 0.6740328073501587, |
| "learning_rate": 0.00018807004040792766, |
| "loss": 1.053580403327942, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.18281430219146483, |
| "grad_norm": 0.6842854619026184, |
| "learning_rate": 0.00018803155666730808, |
| "loss": 1.0379540920257568, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.18339100346020762, |
| "grad_norm": 0.6766674518585205, |
| "learning_rate": 0.0001879930729266885, |
| "loss": 0.9214432835578918, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.1839677047289504, |
| "grad_norm": 0.8358355164527893, |
| "learning_rate": 0.0001879545891860689, |
| "loss": 1.069684624671936, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.1845444059976932, |
| "grad_norm": 0.9044516086578369, |
| "learning_rate": 0.0001879161054454493, |
| "loss": 1.4757916927337646, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.185121107266436, |
| "grad_norm": 0.9662521481513977, |
| "learning_rate": 0.0001878776217048297, |
| "loss": 1.3449480533599854, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.18569780853517878, |
| "grad_norm": 0.8681714534759521, |
| "learning_rate": 0.00018783913796421013, |
| "loss": 1.2057011127471924, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.18627450980392157, |
| "grad_norm": 0.7318335175514221, |
| "learning_rate": 0.00018780065422359054, |
| "loss": 1.276970386505127, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.18685121107266436, |
| "grad_norm": 0.798865556716919, |
| "learning_rate": 0.00018776217048297096, |
| "loss": 1.1334099769592285, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.18742791234140715, |
| "grad_norm": 0.6787270903587341, |
| "learning_rate": 0.00018772368674235137, |
| "loss": 1.0829839706420898, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.18800461361014995, |
| "grad_norm": 0.705894947052002, |
| "learning_rate": 0.0001876852030017318, |
| "loss": 1.3146710395812988, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.18858131487889274, |
| "grad_norm": 0.7403978705406189, |
| "learning_rate": 0.0001876467192611122, |
| "loss": 0.7811852693557739, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.18915801614763553, |
| "grad_norm": 0.8138331770896912, |
| "learning_rate": 0.0001876082355204926, |
| "loss": 1.3800559043884277, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.18973471741637832, |
| "grad_norm": 1.0053505897521973, |
| "learning_rate": 0.000187569751779873, |
| "loss": 1.502892017364502, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.1903114186851211, |
| "grad_norm": 1.2905986309051514, |
| "learning_rate": 0.00018753126803925342, |
| "loss": 1.6044906377792358, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.1908881199538639, |
| "grad_norm": 0.7266846299171448, |
| "learning_rate": 0.00018749278429863384, |
| "loss": 0.8269582390785217, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.1914648212226067, |
| "grad_norm": 0.9892683029174805, |
| "learning_rate": 0.00018745430055801425, |
| "loss": 1.2374012470245361, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.19204152249134948, |
| "grad_norm": 0.8026344180107117, |
| "learning_rate": 0.00018741581681739467, |
| "loss": 0.9166598916053772, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.19261822376009227, |
| "grad_norm": 0.7790790796279907, |
| "learning_rate": 0.00018737733307677508, |
| "loss": 0.8837241530418396, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.19319492502883506, |
| "grad_norm": 0.8625907897949219, |
| "learning_rate": 0.0001873388493361555, |
| "loss": 1.0963804721832275, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.19377162629757785, |
| "grad_norm": 0.8408490419387817, |
| "learning_rate": 0.00018730036559553589, |
| "loss": 1.2887423038482666, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.19434832756632064, |
| "grad_norm": 0.8141940236091614, |
| "learning_rate": 0.0001872618818549163, |
| "loss": 1.234419584274292, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.19492502883506344, |
| "grad_norm": 0.7913158535957336, |
| "learning_rate": 0.00018722339811429672, |
| "loss": 0.8931217193603516, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.19550173010380623, |
| "grad_norm": 0.9377291202545166, |
| "learning_rate": 0.00018718491437367713, |
| "loss": 1.1958264112472534, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.19607843137254902, |
| "grad_norm": 1.1096664667129517, |
| "learning_rate": 0.00018714643063305755, |
| "loss": 0.871677041053772, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.1966551326412918, |
| "grad_norm": 0.7379001379013062, |
| "learning_rate": 0.00018710794689243796, |
| "loss": 0.9309886693954468, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.1972318339100346, |
| "grad_norm": 0.738572895526886, |
| "learning_rate": 0.00018706946315181838, |
| "loss": 1.065298080444336, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.1978085351787774, |
| "grad_norm": 0.8279491066932678, |
| "learning_rate": 0.0001870309794111988, |
| "loss": 1.0682514905929565, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.19838523644752018, |
| "grad_norm": 0.9108213782310486, |
| "learning_rate": 0.00018699249567057918, |
| "loss": 1.2043181657791138, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.19896193771626297, |
| "grad_norm": 0.9347065687179565, |
| "learning_rate": 0.0001869540119299596, |
| "loss": 1.5744340419769287, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.19953863898500576, |
| "grad_norm": 0.5783383250236511, |
| "learning_rate": 0.00018691552818934, |
| "loss": 0.7808327674865723, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.20011534025374855, |
| "grad_norm": 0.661321759223938, |
| "learning_rate": 0.00018687704444872043, |
| "loss": 0.9458237290382385, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.20069204152249134, |
| "grad_norm": 0.5592895746231079, |
| "learning_rate": 0.00018683856070810084, |
| "loss": 0.8761368989944458, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.20126874279123413, |
| "grad_norm": 0.6626494526863098, |
| "learning_rate": 0.00018680007696748126, |
| "loss": 0.9110841751098633, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.20184544405997693, |
| "grad_norm": 0.8392354249954224, |
| "learning_rate": 0.00018676159322686167, |
| "loss": 1.234721302986145, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.20242214532871972, |
| "grad_norm": 0.5596436262130737, |
| "learning_rate": 0.0001867231094862421, |
| "loss": 0.837221622467041, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.2029988465974625, |
| "grad_norm": 0.5023308992385864, |
| "learning_rate": 0.00018668462574562248, |
| "loss": 0.7079763412475586, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.2035755478662053, |
| "grad_norm": 0.7946610450744629, |
| "learning_rate": 0.0001866461420050029, |
| "loss": 1.3043620586395264, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.2041522491349481, |
| "grad_norm": 0.8124772310256958, |
| "learning_rate": 0.0001866076582643833, |
| "loss": 1.1276662349700928, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.20472895040369088, |
| "grad_norm": 0.5195242166519165, |
| "learning_rate": 0.00018656917452376372, |
| "loss": 0.737315833568573, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.20530565167243367, |
| "grad_norm": 0.7146646976470947, |
| "learning_rate": 0.00018653069078314414, |
| "loss": 1.0838680267333984, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.20588235294117646, |
| "grad_norm": 0.7928506135940552, |
| "learning_rate": 0.00018649220704252455, |
| "loss": 1.2697861194610596, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.20645905420991925, |
| "grad_norm": 0.6152468919754028, |
| "learning_rate": 0.00018645372330190497, |
| "loss": 0.9355758428573608, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.20703575547866204, |
| "grad_norm": 1.0809266567230225, |
| "learning_rate": 0.00018641523956128538, |
| "loss": 1.9420266151428223, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.20761245674740483, |
| "grad_norm": 0.59016352891922, |
| "learning_rate": 0.00018637675582066577, |
| "loss": 0.9944459199905396, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.20818915801614762, |
| "grad_norm": 0.7870339751243591, |
| "learning_rate": 0.0001863382720800462, |
| "loss": 1.0614302158355713, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.20876585928489041, |
| "grad_norm": 0.7203708291053772, |
| "learning_rate": 0.0001862997883394266, |
| "loss": 0.9602723717689514, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.2093425605536332, |
| "grad_norm": 0.532341480255127, |
| "learning_rate": 0.00018626130459880702, |
| "loss": 0.8718068599700928, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.209919261822376, |
| "grad_norm": 0.9565883278846741, |
| "learning_rate": 0.00018622282085818743, |
| "loss": 1.278198480606079, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.2104959630911188, |
| "grad_norm": 0.7197461724281311, |
| "learning_rate": 0.00018618433711756785, |
| "loss": 1.3148860931396484, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.21107266435986158, |
| "grad_norm": 0.6119058728218079, |
| "learning_rate": 0.00018614585337694826, |
| "loss": 0.9266935586929321, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.2116493656286044, |
| "grad_norm": 0.9047015309333801, |
| "learning_rate": 0.00018610736963632868, |
| "loss": 1.1473264694213867, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.2122260668973472, |
| "grad_norm": 0.6796925663948059, |
| "learning_rate": 0.00018606888589570907, |
| "loss": 1.0393201112747192, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.21280276816608998, |
| "grad_norm": 0.6059300303459167, |
| "learning_rate": 0.00018603040215508948, |
| "loss": 1.001380443572998, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.21337946943483277, |
| "grad_norm": 0.6669148206710815, |
| "learning_rate": 0.0001859919184144699, |
| "loss": 0.8133573532104492, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.21395617070357556, |
| "grad_norm": 0.6025424003601074, |
| "learning_rate": 0.0001859534346738503, |
| "loss": 0.9277598261833191, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.21453287197231835, |
| "grad_norm": 0.8728757500648499, |
| "learning_rate": 0.00018591495093323073, |
| "loss": 1.1496421098709106, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.21510957324106114, |
| "grad_norm": 0.587089478969574, |
| "learning_rate": 0.00018587646719261114, |
| "loss": 0.8672431707382202, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.21568627450980393, |
| "grad_norm": 0.7482187747955322, |
| "learning_rate": 0.00018583798345199156, |
| "loss": 1.0713750123977661, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.21626297577854672, |
| "grad_norm": 0.8591217398643494, |
| "learning_rate": 0.00018579949971137197, |
| "loss": 1.4045636653900146, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.21683967704728951, |
| "grad_norm": 0.7630711793899536, |
| "learning_rate": 0.00018576101597075236, |
| "loss": 0.9842856526374817, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.2174163783160323, |
| "grad_norm": 1.2762526273727417, |
| "learning_rate": 0.00018572253223013278, |
| "loss": 1.5381450653076172, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.2179930795847751, |
| "grad_norm": 0.7234092950820923, |
| "learning_rate": 0.0001856840484895132, |
| "loss": 1.0782972574234009, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.2185697808535179, |
| "grad_norm": 0.8868815898895264, |
| "learning_rate": 0.0001856455647488936, |
| "loss": 0.9910011291503906, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.21914648212226068, |
| "grad_norm": 0.5880477428436279, |
| "learning_rate": 0.00018560708100827402, |
| "loss": 0.9178383946418762, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.21972318339100347, |
| "grad_norm": 0.7115210294723511, |
| "learning_rate": 0.00018556859726765444, |
| "loss": 1.3695993423461914, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.22029988465974626, |
| "grad_norm": 0.9036445617675781, |
| "learning_rate": 0.00018553011352703485, |
| "loss": 1.049261212348938, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.22087658592848905, |
| "grad_norm": 1.044411540031433, |
| "learning_rate": 0.00018549162978641527, |
| "loss": 1.272240400314331, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.22145328719723184, |
| "grad_norm": 0.6363574862480164, |
| "learning_rate": 0.00018545314604579566, |
| "loss": 1.0237360000610352, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.22202998846597463, |
| "grad_norm": 0.7671105861663818, |
| "learning_rate": 0.00018541466230517607, |
| "loss": 0.9970401525497437, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.22260668973471742, |
| "grad_norm": 1.170229434967041, |
| "learning_rate": 0.0001853761785645565, |
| "loss": 1.5654575824737549, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.2231833910034602, |
| "grad_norm": 0.9486715793609619, |
| "learning_rate": 0.0001853376948239369, |
| "loss": 1.8445625305175781, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.223760092272203, |
| "grad_norm": 0.7049561142921448, |
| "learning_rate": 0.00018529921108331732, |
| "loss": 1.147915005683899, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.2243367935409458, |
| "grad_norm": 0.7626886963844299, |
| "learning_rate": 0.00018526072734269773, |
| "loss": 0.9354770183563232, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.22491349480968859, |
| "grad_norm": 0.8018368482589722, |
| "learning_rate": 0.00018522224360207815, |
| "loss": 1.0617220401763916, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.22549019607843138, |
| "grad_norm": 0.7590807676315308, |
| "learning_rate": 0.00018518375986145854, |
| "loss": 0.9120303988456726, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.22606689734717417, |
| "grad_norm": 0.6623148918151855, |
| "learning_rate": 0.00018514527612083895, |
| "loss": 0.7569756507873535, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.22664359861591696, |
| "grad_norm": 0.5547282099723816, |
| "learning_rate": 0.00018510679238021937, |
| "loss": 0.7989190816879272, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.22722029988465975, |
| "grad_norm": 0.5765286087989807, |
| "learning_rate": 0.00018506830863959978, |
| "loss": 0.6133571863174438, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.22779700115340254, |
| "grad_norm": 0.8331816792488098, |
| "learning_rate": 0.0001850298248989802, |
| "loss": 1.1577847003936768, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.22837370242214533, |
| "grad_norm": 0.7655069231987, |
| "learning_rate": 0.0001849913411583606, |
| "loss": 1.0809553861618042, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.22895040369088812, |
| "grad_norm": 0.7397854924201965, |
| "learning_rate": 0.00018495285741774103, |
| "loss": 0.9830250144004822, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.2295271049596309, |
| "grad_norm": 0.6970857381820679, |
| "learning_rate": 0.00018491437367712144, |
| "loss": 0.8101853132247925, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.2301038062283737, |
| "grad_norm": 0.5724602937698364, |
| "learning_rate": 0.00018487588993650183, |
| "loss": 0.70196932554245, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.2306805074971165, |
| "grad_norm": 0.9593637585639954, |
| "learning_rate": 0.00018483740619588225, |
| "loss": 0.9378552436828613, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.23125720876585928, |
| "grad_norm": 0.7079650163650513, |
| "learning_rate": 0.00018479892245526266, |
| "loss": 0.8764985799789429, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.23183391003460208, |
| "grad_norm": 0.7374391555786133, |
| "learning_rate": 0.00018476043871464308, |
| "loss": 0.8556146025657654, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.23241061130334487, |
| "grad_norm": 0.6992713809013367, |
| "learning_rate": 0.0001847219549740235, |
| "loss": 0.9657334089279175, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.23298731257208766, |
| "grad_norm": 0.8299751281738281, |
| "learning_rate": 0.0001846834712334039, |
| "loss": 1.2171483039855957, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.23356401384083045, |
| "grad_norm": 0.5866743922233582, |
| "learning_rate": 0.00018464498749278432, |
| "loss": 0.9809523820877075, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.23414071510957324, |
| "grad_norm": 0.8412980437278748, |
| "learning_rate": 0.00018460650375216474, |
| "loss": 1.1848514080047607, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.23471741637831603, |
| "grad_norm": 0.7566470503807068, |
| "learning_rate": 0.00018456802001154513, |
| "loss": 1.0939483642578125, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.23529411764705882, |
| "grad_norm": 0.787800669670105, |
| "learning_rate": 0.00018452953627092554, |
| "loss": 1.2347867488861084, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.2358708189158016, |
| "grad_norm": 0.8511201739311218, |
| "learning_rate": 0.00018449105253030596, |
| "loss": 0.9385696053504944, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.2364475201845444, |
| "grad_norm": 0.9360937476158142, |
| "learning_rate": 0.00018445256878968637, |
| "loss": 1.3519483804702759, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.2370242214532872, |
| "grad_norm": 0.556093692779541, |
| "learning_rate": 0.0001844140850490668, |
| "loss": 0.8482391238212585, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.23760092272202998, |
| "grad_norm": 0.6390929818153381, |
| "learning_rate": 0.0001843756013084472, |
| "loss": 1.0374037027359009, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.23817762399077277, |
| "grad_norm": 0.5385326743125916, |
| "learning_rate": 0.00018433711756782762, |
| "loss": 0.8951395750045776, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.23875432525951557, |
| "grad_norm": 0.7417898774147034, |
| "learning_rate": 0.00018429863382720803, |
| "loss": 1.1854356527328491, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.23933102652825836, |
| "grad_norm": 0.7092972993850708, |
| "learning_rate": 0.00018426015008658842, |
| "loss": 1.2556312084197998, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.23990772779700115, |
| "grad_norm": 0.6026037931442261, |
| "learning_rate": 0.00018422166634596884, |
| "loss": 0.8205006718635559, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.24048442906574394, |
| "grad_norm": 0.7460249662399292, |
| "learning_rate": 0.00018418318260534925, |
| "loss": 0.9955434203147888, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.24106113033448673, |
| "grad_norm": 0.6313579082489014, |
| "learning_rate": 0.00018414469886472967, |
| "loss": 1.15024995803833, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.24163783160322952, |
| "grad_norm": 0.7596423029899597, |
| "learning_rate": 0.00018410621512411008, |
| "loss": 1.196816325187683, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.2422145328719723, |
| "grad_norm": 0.7336683869361877, |
| "learning_rate": 0.0001840677313834905, |
| "loss": 1.0791605710983276, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.2427912341407151, |
| "grad_norm": 0.6802041530609131, |
| "learning_rate": 0.00018402924764287091, |
| "loss": 0.8439788222312927, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.2433679354094579, |
| "grad_norm": 0.9311268329620361, |
| "learning_rate": 0.00018399076390225133, |
| "loss": 1.4188232421875, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.24394463667820068, |
| "grad_norm": 0.9715989232063293, |
| "learning_rate": 0.00018395228016163172, |
| "loss": 1.149898648262024, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.24452133794694347, |
| "grad_norm": 0.6722977161407471, |
| "learning_rate": 0.00018391379642101213, |
| "loss": 1.0626373291015625, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.24509803921568626, |
| "grad_norm": 0.9417729377746582, |
| "learning_rate": 0.00018387531268039255, |
| "loss": 1.277899980545044, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.24567474048442905, |
| "grad_norm": 0.8700136542320251, |
| "learning_rate": 0.00018383682893977296, |
| "loss": 1.106884479522705, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.24625144175317185, |
| "grad_norm": 0.71380615234375, |
| "learning_rate": 0.00018379834519915338, |
| "loss": 1.1928266286849976, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.24682814302191464, |
| "grad_norm": 0.7276275157928467, |
| "learning_rate": 0.0001837598614585338, |
| "loss": 1.2448585033416748, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.24740484429065743, |
| "grad_norm": 0.8795212507247925, |
| "learning_rate": 0.0001837213777179142, |
| "loss": 1.317166805267334, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.24798154555940022, |
| "grad_norm": 0.9904524087905884, |
| "learning_rate": 0.00018368289397729462, |
| "loss": 1.166348934173584, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.248558246828143, |
| "grad_norm": 0.7632173299789429, |
| "learning_rate": 0.000183644410236675, |
| "loss": 1.5664170980453491, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.2491349480968858, |
| "grad_norm": 0.8291054964065552, |
| "learning_rate": 0.00018360592649605543, |
| "loss": 1.4953291416168213, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.2497116493656286, |
| "grad_norm": 0.6445023417472839, |
| "learning_rate": 0.00018356744275543584, |
| "loss": 0.8673335313796997, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.2502883506343714, |
| "grad_norm": 1.2072186470031738, |
| "learning_rate": 0.00018352895901481626, |
| "loss": 1.59421968460083, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.2508650519031142, |
| "grad_norm": 0.7409680485725403, |
| "learning_rate": 0.00018349047527419667, |
| "loss": 1.0224432945251465, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.25144175317185696, |
| "grad_norm": 0.8207524418830872, |
| "learning_rate": 0.0001834519915335771, |
| "loss": 1.276658058166504, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.2520184544405998, |
| "grad_norm": 0.8591949343681335, |
| "learning_rate": 0.0001834135077929575, |
| "loss": 1.1319093704223633, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.25259515570934254, |
| "grad_norm": 0.6689372658729553, |
| "learning_rate": 0.00018337502405233792, |
| "loss": 0.9691576361656189, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.25317185697808536, |
| "grad_norm": 0.9033296704292297, |
| "learning_rate": 0.0001833365403117183, |
| "loss": 1.4272680282592773, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.2537485582468281, |
| "grad_norm": 0.6959604620933533, |
| "learning_rate": 0.0001832980565710987, |
| "loss": 1.1449182033538818, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.25432525951557095, |
| "grad_norm": 0.6695550680160522, |
| "learning_rate": 0.0001832595728304791, |
| "loss": 1.0492792129516602, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.2549019607843137, |
| "grad_norm": 0.710794985294342, |
| "learning_rate": 0.00018322108908985953, |
| "loss": 0.9534090757369995, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.2554786620530565, |
| "grad_norm": 0.6955594420433044, |
| "learning_rate": 0.00018318260534923994, |
| "loss": 0.8743690252304077, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.2560553633217993, |
| "grad_norm": 0.6831961274147034, |
| "learning_rate": 0.00018314412160862036, |
| "loss": 1.3500818014144897, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.2566320645905421, |
| "grad_norm": 0.7839577198028564, |
| "learning_rate": 0.00018310563786800077, |
| "loss": 1.0105950832366943, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.25720876585928487, |
| "grad_norm": 0.8791704773902893, |
| "learning_rate": 0.0001830671541273812, |
| "loss": 1.2243623733520508, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.2577854671280277, |
| "grad_norm": 0.7005860209465027, |
| "learning_rate": 0.0001830286703867616, |
| "loss": 1.077842354774475, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.25836216839677045, |
| "grad_norm": 0.822964072227478, |
| "learning_rate": 0.000182990186646142, |
| "loss": 1.2265344858169556, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.25893886966551327, |
| "grad_norm": 0.773158609867096, |
| "learning_rate": 0.0001829517029055224, |
| "loss": 0.8715431690216064, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.25951557093425603, |
| "grad_norm": 0.8603456616401672, |
| "learning_rate": 0.00018291321916490282, |
| "loss": 0.9889146089553833, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.26009227220299885, |
| "grad_norm": 0.8188443779945374, |
| "learning_rate": 0.00018287473542428324, |
| "loss": 0.8885264992713928, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.2606689734717416, |
| "grad_norm": 1.0877407789230347, |
| "learning_rate": 0.00018283625168366365, |
| "loss": 1.0748121738433838, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.26124567474048443, |
| "grad_norm": 0.5481402277946472, |
| "learning_rate": 0.00018279776794304407, |
| "loss": 0.807957649230957, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.2618223760092272, |
| "grad_norm": 0.8591419458389282, |
| "learning_rate": 0.00018275928420242448, |
| "loss": 1.3057336807250977, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.26239907727797, |
| "grad_norm": 0.7936019897460938, |
| "learning_rate": 0.0001827208004618049, |
| "loss": 1.185962200164795, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.2629757785467128, |
| "grad_norm": 0.6581904888153076, |
| "learning_rate": 0.00018268231672118529, |
| "loss": 0.8275895118713379, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.2635524798154556, |
| "grad_norm": 0.831302285194397, |
| "learning_rate": 0.0001826438329805657, |
| "loss": 1.299217939376831, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.26412918108419836, |
| "grad_norm": 0.6771467924118042, |
| "learning_rate": 0.00018260534923994612, |
| "loss": 0.8427085876464844, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.2647058823529412, |
| "grad_norm": 0.7914313077926636, |
| "learning_rate": 0.00018256686549932653, |
| "loss": 1.369484305381775, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.26528258362168394, |
| "grad_norm": 0.5916578769683838, |
| "learning_rate": 0.00018252838175870695, |
| "loss": 0.6241229772567749, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.26585928489042676, |
| "grad_norm": 0.6836418509483337, |
| "learning_rate": 0.00018248989801808736, |
| "loss": 0.8050651550292969, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.2664359861591695, |
| "grad_norm": 0.7545502185821533, |
| "learning_rate": 0.00018245141427746778, |
| "loss": 0.7911585569381714, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.26701268742791234, |
| "grad_norm": 0.6010773181915283, |
| "learning_rate": 0.0001824129305368482, |
| "loss": 1.1161192655563354, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.2675893886966551, |
| "grad_norm": 0.813204824924469, |
| "learning_rate": 0.00018237444679622858, |
| "loss": 1.096695065498352, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.2681660899653979, |
| "grad_norm": 0.91140216588974, |
| "learning_rate": 0.000182335963055609, |
| "loss": 1.4385195970535278, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.2687427912341407, |
| "grad_norm": 0.9745720624923706, |
| "learning_rate": 0.0001822974793149894, |
| "loss": 1.3157883882522583, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.2693194925028835, |
| "grad_norm": 0.4999851584434509, |
| "learning_rate": 0.00018225899557436983, |
| "loss": 0.6729867458343506, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.2698961937716263, |
| "grad_norm": 0.9021291732788086, |
| "learning_rate": 0.00018222051183375024, |
| "loss": 1.0553233623504639, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.2704728950403691, |
| "grad_norm": 0.8061716556549072, |
| "learning_rate": 0.00018218202809313066, |
| "loss": 1.3081198930740356, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.2710495963091119, |
| "grad_norm": 0.6820981502532959, |
| "learning_rate": 0.00018214354435251107, |
| "loss": 0.9388906359672546, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.27162629757785467, |
| "grad_norm": 1.0991320610046387, |
| "learning_rate": 0.0001821050606118915, |
| "loss": 1.528028964996338, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.2722029988465975, |
| "grad_norm": 0.7934592962265015, |
| "learning_rate": 0.00018206657687127188, |
| "loss": 1.2054097652435303, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.27277970011534025, |
| "grad_norm": 0.7113450765609741, |
| "learning_rate": 0.0001820280931306523, |
| "loss": 1.0254576206207275, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.27335640138408307, |
| "grad_norm": 0.7593767046928406, |
| "learning_rate": 0.0001819896093900327, |
| "loss": 1.284333348274231, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.27393310265282583, |
| "grad_norm": 1.006116509437561, |
| "learning_rate": 0.00018195112564941312, |
| "loss": 1.3650097846984863, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.27450980392156865, |
| "grad_norm": 0.8706763982772827, |
| "learning_rate": 0.00018191264190879354, |
| "loss": 1.6067880392074585, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.2750865051903114, |
| "grad_norm": 0.7428901195526123, |
| "learning_rate": 0.00018187415816817395, |
| "loss": 1.373342514038086, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.27566320645905423, |
| "grad_norm": 0.8846433162689209, |
| "learning_rate": 0.00018183567442755437, |
| "loss": 1.5520777702331543, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.276239907727797, |
| "grad_norm": 0.8808581829071045, |
| "learning_rate": 0.00018179719068693478, |
| "loss": 1.1342291831970215, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.2768166089965398, |
| "grad_norm": 0.7310512065887451, |
| "learning_rate": 0.00018175870694631517, |
| "loss": 0.7762906551361084, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.2773933102652826, |
| "grad_norm": 0.8467727303504944, |
| "learning_rate": 0.0001817202232056956, |
| "loss": 0.990180253982544, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.2779700115340254, |
| "grad_norm": 0.642230212688446, |
| "learning_rate": 0.000181681739465076, |
| "loss": 0.845292329788208, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.27854671280276816, |
| "grad_norm": 0.7775582075119019, |
| "learning_rate": 0.00018164325572445642, |
| "loss": 1.279380202293396, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.279123414071511, |
| "grad_norm": 0.6477130651473999, |
| "learning_rate": 0.00018160477198383683, |
| "loss": 0.8197907209396362, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.27970011534025374, |
| "grad_norm": 0.6508778929710388, |
| "learning_rate": 0.00018156628824321725, |
| "loss": 0.9538026452064514, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.28027681660899656, |
| "grad_norm": 0.9379159212112427, |
| "learning_rate": 0.00018152780450259766, |
| "loss": 1.2874410152435303, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.2808535178777393, |
| "grad_norm": 0.8014243245124817, |
| "learning_rate": 0.00018148932076197808, |
| "loss": 1.364856481552124, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.28143021914648214, |
| "grad_norm": 1.0049822330474854, |
| "learning_rate": 0.00018145083702135847, |
| "loss": 1.3461369276046753, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.2820069204152249, |
| "grad_norm": 0.8764071464538574, |
| "learning_rate": 0.00018141235328073888, |
| "loss": 1.549091100692749, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.2825836216839677, |
| "grad_norm": 0.6743770241737366, |
| "learning_rate": 0.0001813738695401193, |
| "loss": 0.8718385696411133, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.2831603229527105, |
| "grad_norm": 0.8501721024513245, |
| "learning_rate": 0.0001813353857994997, |
| "loss": 0.9592713117599487, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.2837370242214533, |
| "grad_norm": 0.6727166771888733, |
| "learning_rate": 0.00018129690205888013, |
| "loss": 1.0024611949920654, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.28431372549019607, |
| "grad_norm": 0.7949026226997375, |
| "learning_rate": 0.00018125841831826054, |
| "loss": 0.889624297618866, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.2848904267589389, |
| "grad_norm": 0.8814200758934021, |
| "learning_rate": 0.00018121993457764096, |
| "loss": 1.7483818531036377, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.28546712802768165, |
| "grad_norm": 0.6116936206817627, |
| "learning_rate": 0.00018118145083702137, |
| "loss": 1.097643256187439, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.28604382929642447, |
| "grad_norm": 0.6951889395713806, |
| "learning_rate": 0.00018114296709640176, |
| "loss": 0.9292160272598267, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.28662053056516723, |
| "grad_norm": 0.9138390421867371, |
| "learning_rate": 0.00018110448335578218, |
| "loss": 1.174808144569397, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.28719723183391005, |
| "grad_norm": 0.6442549824714661, |
| "learning_rate": 0.0001810659996151626, |
| "loss": 0.9390018582344055, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.2877739331026528, |
| "grad_norm": 0.9683842658996582, |
| "learning_rate": 0.000181027515874543, |
| "loss": 1.4045450687408447, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.28835063437139563, |
| "grad_norm": 0.7444068193435669, |
| "learning_rate": 0.00018098903213392342, |
| "loss": 0.9792321920394897, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.2889273356401384, |
| "grad_norm": 0.7402380108833313, |
| "learning_rate": 0.00018095054839330384, |
| "loss": 1.231440782546997, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.2895040369088812, |
| "grad_norm": 0.7022894024848938, |
| "learning_rate": 0.00018091206465268425, |
| "loss": 0.856300950050354, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.290080738177624, |
| "grad_norm": 0.7641032338142395, |
| "learning_rate": 0.00018087358091206467, |
| "loss": 0.9729149341583252, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.2906574394463668, |
| "grad_norm": 0.9500510096549988, |
| "learning_rate": 0.00018083509717144506, |
| "loss": 1.2449204921722412, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.29123414071510956, |
| "grad_norm": 0.6954758763313293, |
| "learning_rate": 0.00018079661343082547, |
| "loss": 0.8000816106796265, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.2918108419838524, |
| "grad_norm": 0.7313628196716309, |
| "learning_rate": 0.0001807581296902059, |
| "loss": 1.233512282371521, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.29238754325259514, |
| "grad_norm": 0.8792680501937866, |
| "learning_rate": 0.0001807196459495863, |
| "loss": 1.092308521270752, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.29296424452133796, |
| "grad_norm": 0.6230028867721558, |
| "learning_rate": 0.00018068116220896672, |
| "loss": 0.7719423174858093, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.2935409457900807, |
| "grad_norm": 0.8965409398078918, |
| "learning_rate": 0.00018064267846834713, |
| "loss": 1.576930284500122, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.29411764705882354, |
| "grad_norm": 0.756908118724823, |
| "learning_rate": 0.00018060419472772755, |
| "loss": 0.9762069582939148, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.2946943483275663, |
| "grad_norm": 0.7524373531341553, |
| "learning_rate": 0.00018056571098710794, |
| "loss": 0.9206646680831909, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.2952710495963091, |
| "grad_norm": 0.9292136430740356, |
| "learning_rate": 0.00018052722724648835, |
| "loss": 1.534470558166504, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.2958477508650519, |
| "grad_norm": 1.0442750453948975, |
| "learning_rate": 0.00018048874350586877, |
| "loss": 1.2520341873168945, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.2964244521337947, |
| "grad_norm": 0.8131316900253296, |
| "learning_rate": 0.00018045025976524918, |
| "loss": 1.5056309700012207, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.29700115340253747, |
| "grad_norm": 0.7711693048477173, |
| "learning_rate": 0.0001804117760246296, |
| "loss": 1.2189143896102905, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.2975778546712803, |
| "grad_norm": 0.6610523462295532, |
| "learning_rate": 0.00018037329228401, |
| "loss": 1.1120340824127197, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.29815455594002305, |
| "grad_norm": 0.7343090772628784, |
| "learning_rate": 0.00018033480854339043, |
| "loss": 1.0496878623962402, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.29873125720876587, |
| "grad_norm": 0.6952423453330994, |
| "learning_rate": 0.00018029632480277084, |
| "loss": 1.0725046396255493, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.29930795847750863, |
| "grad_norm": 1.0385462045669556, |
| "learning_rate": 0.00018025784106215123, |
| "loss": 1.3104898929595947, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.29988465974625145, |
| "grad_norm": 0.6035030484199524, |
| "learning_rate": 0.00018021935732153165, |
| "loss": 0.7342404127120972, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.3004613610149942, |
| "grad_norm": 0.5726889371871948, |
| "learning_rate": 0.00018018087358091206, |
| "loss": 0.9352455139160156, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.30103806228373703, |
| "grad_norm": 0.5148364305496216, |
| "learning_rate": 0.00018014238984029248, |
| "loss": 0.8527913093566895, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.3016147635524798, |
| "grad_norm": 0.8307221531867981, |
| "learning_rate": 0.0001801039060996729, |
| "loss": 1.180746078491211, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.3021914648212226, |
| "grad_norm": 0.8560492396354675, |
| "learning_rate": 0.0001800654223590533, |
| "loss": 1.4329997301101685, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.3027681660899654, |
| "grad_norm": 0.5972908139228821, |
| "learning_rate": 0.00018002693861843372, |
| "loss": 0.7385514974594116, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.3033448673587082, |
| "grad_norm": 0.5159963965415955, |
| "learning_rate": 0.00017998845487781414, |
| "loss": 0.646453320980072, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.30392156862745096, |
| "grad_norm": 0.9237578511238098, |
| "learning_rate": 0.00017994997113719453, |
| "loss": 1.442482590675354, |
| "step": 527 |
| }, |
| { |
| "epoch": 0.3044982698961938, |
| "grad_norm": 0.9341033697128296, |
| "learning_rate": 0.00017991148739657494, |
| "loss": 1.3850878477096558, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.30507497116493654, |
| "grad_norm": 0.5422039031982422, |
| "learning_rate": 0.00017987300365595536, |
| "loss": 0.6736562252044678, |
| "step": 529 |
| }, |
| { |
| "epoch": 0.30565167243367936, |
| "grad_norm": 0.6220455765724182, |
| "learning_rate": 0.00017983451991533577, |
| "loss": 0.7528645992279053, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.3062283737024221, |
| "grad_norm": 0.8073663115501404, |
| "learning_rate": 0.0001797960361747162, |
| "loss": 1.2123267650604248, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.30680507497116494, |
| "grad_norm": 0.5491252541542053, |
| "learning_rate": 0.0001797575524340966, |
| "loss": 0.5903505086898804, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.3073817762399077, |
| "grad_norm": 1.9019479751586914, |
| "learning_rate": 0.00017971906869347702, |
| "loss": 1.4316587448120117, |
| "step": 533 |
| }, |
| { |
| "epoch": 0.3079584775086505, |
| "grad_norm": 0.45649734139442444, |
| "learning_rate": 0.00017968058495285743, |
| "loss": 0.659195065498352, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.30853517877739334, |
| "grad_norm": 0.7406135201454163, |
| "learning_rate": 0.00017964210121223782, |
| "loss": 1.0346477031707764, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.3091118800461361, |
| "grad_norm": 0.9768670201301575, |
| "learning_rate": 0.00017960361747161824, |
| "loss": 1.584676742553711, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.3096885813148789, |
| "grad_norm": 0.7869756817817688, |
| "learning_rate": 0.00017956513373099865, |
| "loss": 1.0404967069625854, |
| "step": 537 |
| }, |
| { |
| "epoch": 0.3102652825836217, |
| "grad_norm": 0.6868966221809387, |
| "learning_rate": 0.00017952664999037907, |
| "loss": 0.8878238201141357, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.3108419838523645, |
| "grad_norm": 0.7594157457351685, |
| "learning_rate": 0.00017948816624975948, |
| "loss": 1.0191287994384766, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.31141868512110726, |
| "grad_norm": 0.8346229195594788, |
| "learning_rate": 0.0001794496825091399, |
| "loss": 1.021256923675537, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.3119953863898501, |
| "grad_norm": 1.0493948459625244, |
| "learning_rate": 0.00017941119876852031, |
| "loss": 1.0015616416931152, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.31257208765859285, |
| "grad_norm": 0.62034010887146, |
| "learning_rate": 0.00017937271502790073, |
| "loss": 0.9237149357795715, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.31314878892733566, |
| "grad_norm": 0.7169587016105652, |
| "learning_rate": 0.00017933423128728112, |
| "loss": 0.8658795356750488, |
| "step": 543 |
| }, |
| { |
| "epoch": 0.3137254901960784, |
| "grad_norm": 0.7205992341041565, |
| "learning_rate": 0.00017929574754666153, |
| "loss": 1.1227588653564453, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.31430219146482125, |
| "grad_norm": 0.7573957443237305, |
| "learning_rate": 0.00017925726380604195, |
| "loss": 0.9638352394104004, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.314878892733564, |
| "grad_norm": 0.981253981590271, |
| "learning_rate": 0.00017921878006542236, |
| "loss": 1.0400216579437256, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.3154555940023068, |
| "grad_norm": 0.6763452291488647, |
| "learning_rate": 0.00017918029632480278, |
| "loss": 1.0069935321807861, |
| "step": 547 |
| }, |
| { |
| "epoch": 0.3160322952710496, |
| "grad_norm": 0.5641304850578308, |
| "learning_rate": 0.0001791418125841832, |
| "loss": 0.7099517583847046, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.3166089965397924, |
| "grad_norm": 0.542838454246521, |
| "learning_rate": 0.0001791033288435636, |
| "loss": 0.7347281575202942, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.31718569780853517, |
| "grad_norm": 0.6865650415420532, |
| "learning_rate": 0.00017906484510294402, |
| "loss": 0.9269914031028748, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.317762399077278, |
| "grad_norm": 0.6794233322143555, |
| "learning_rate": 0.0001790263613623244, |
| "loss": 0.8624827861785889, |
| "step": 551 |
| }, |
| { |
| "epoch": 0.31833910034602075, |
| "grad_norm": 0.9417468905448914, |
| "learning_rate": 0.00017898787762170483, |
| "loss": 1.2194072008132935, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.31891580161476357, |
| "grad_norm": 0.8551915287971497, |
| "learning_rate": 0.00017894939388108524, |
| "loss": 1.1121107339859009, |
| "step": 553 |
| }, |
| { |
| "epoch": 0.31949250288350634, |
| "grad_norm": 1.0210304260253906, |
| "learning_rate": 0.00017891091014046566, |
| "loss": 1.3061752319335938, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.32006920415224915, |
| "grad_norm": 0.9833082556724548, |
| "learning_rate": 0.00017887242639984607, |
| "loss": 1.3157097101211548, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.3206459054209919, |
| "grad_norm": 0.8534771203994751, |
| "learning_rate": 0.0001788339426592265, |
| "loss": 1.1443736553192139, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.32122260668973474, |
| "grad_norm": 0.5206373929977417, |
| "learning_rate": 0.0001787954589186069, |
| "loss": 0.9210702776908875, |
| "step": 557 |
| }, |
| { |
| "epoch": 0.3217993079584775, |
| "grad_norm": 0.9890329837799072, |
| "learning_rate": 0.00017875697517798732, |
| "loss": 1.1474642753601074, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.3223760092272203, |
| "grad_norm": 1.033987045288086, |
| "learning_rate": 0.0001787184914373677, |
| "loss": 1.3469852209091187, |
| "step": 559 |
| }, |
| { |
| "epoch": 0.3229527104959631, |
| "grad_norm": 0.5397274494171143, |
| "learning_rate": 0.00017868000769674812, |
| "loss": 0.8606307506561279, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.3235294117647059, |
| "grad_norm": 0.7607125639915466, |
| "learning_rate": 0.00017864152395612854, |
| "loss": 1.5313308238983154, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.32410611303344866, |
| "grad_norm": 0.8187709450721741, |
| "learning_rate": 0.00017860304021550895, |
| "loss": 1.2671842575073242, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.3246828143021915, |
| "grad_norm": 0.8652257919311523, |
| "learning_rate": 0.00017856455647488937, |
| "loss": 1.0011459589004517, |
| "step": 563 |
| }, |
| { |
| "epoch": 0.32525951557093424, |
| "grad_norm": 0.8205957412719727, |
| "learning_rate": 0.00017852607273426978, |
| "loss": 0.9995499849319458, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.32583621683967706, |
| "grad_norm": 0.8630533814430237, |
| "learning_rate": 0.0001784875889936502, |
| "loss": 1.119580864906311, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.3264129181084198, |
| "grad_norm": 0.6678904294967651, |
| "learning_rate": 0.00017844910525303061, |
| "loss": 0.9301247596740723, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.32698961937716264, |
| "grad_norm": 0.7211806774139404, |
| "learning_rate": 0.000178410621512411, |
| "loss": 1.3346351385116577, |
| "step": 567 |
| }, |
| { |
| "epoch": 0.3275663206459054, |
| "grad_norm": 0.6392566561698914, |
| "learning_rate": 0.00017837213777179142, |
| "loss": 0.6997557878494263, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.3281430219146482, |
| "grad_norm": 0.8357546329498291, |
| "learning_rate": 0.00017833365403117183, |
| "loss": 1.3044462203979492, |
| "step": 569 |
| }, |
| { |
| "epoch": 0.328719723183391, |
| "grad_norm": 0.7778827548027039, |
| "learning_rate": 0.00017829517029055225, |
| "loss": 0.9234685897827148, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.3292964244521338, |
| "grad_norm": 0.7168182730674744, |
| "learning_rate": 0.00017825668654993266, |
| "loss": 1.532446265220642, |
| "step": 571 |
| }, |
| { |
| "epoch": 0.32987312572087657, |
| "grad_norm": 1.016398549079895, |
| "learning_rate": 0.00017821820280931308, |
| "loss": 1.4056748151779175, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.3304498269896194, |
| "grad_norm": 0.8056113719940186, |
| "learning_rate": 0.0001781797190686935, |
| "loss": 1.0595710277557373, |
| "step": 573 |
| }, |
| { |
| "epoch": 0.33102652825836215, |
| "grad_norm": 0.6588327884674072, |
| "learning_rate": 0.0001781412353280739, |
| "loss": 0.849087655544281, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.33160322952710497, |
| "grad_norm": 0.7659177184104919, |
| "learning_rate": 0.0001781027515874543, |
| "loss": 1.1442945003509521, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.33217993079584773, |
| "grad_norm": 0.8960584402084351, |
| "learning_rate": 0.0001780642678468347, |
| "loss": 1.2777467966079712, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.33275663206459055, |
| "grad_norm": 0.8990175724029541, |
| "learning_rate": 0.00017802578410621513, |
| "loss": 1.0199333429336548, |
| "step": 577 |
| }, |
| { |
| "epoch": 0.3333333333333333, |
| "grad_norm": 0.7010089159011841, |
| "learning_rate": 0.00017798730036559554, |
| "loss": 1.2177313566207886, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.33391003460207613, |
| "grad_norm": 0.8779993057250977, |
| "learning_rate": 0.00017794881662497596, |
| "loss": 0.8511064648628235, |
| "step": 579 |
| }, |
| { |
| "epoch": 0.3344867358708189, |
| "grad_norm": 0.8380318880081177, |
| "learning_rate": 0.00017791033288435637, |
| "loss": 1.0792275667190552, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.3350634371395617, |
| "grad_norm": 0.7335569858551025, |
| "learning_rate": 0.0001778718491437368, |
| "loss": 1.0502758026123047, |
| "step": 581 |
| }, |
| { |
| "epoch": 0.3356401384083045, |
| "grad_norm": 0.7759366631507874, |
| "learning_rate": 0.0001778333654031172, |
| "loss": 1.000847578048706, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.3362168396770473, |
| "grad_norm": 0.565648078918457, |
| "learning_rate": 0.0001777948816624976, |
| "loss": 0.7337483167648315, |
| "step": 583 |
| }, |
| { |
| "epoch": 0.33679354094579006, |
| "grad_norm": 0.8646697998046875, |
| "learning_rate": 0.000177756397921878, |
| "loss": 1.2806568145751953, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.3373702422145329, |
| "grad_norm": 0.9556112289428711, |
| "learning_rate": 0.00017771791418125842, |
| "loss": 1.1648443937301636, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.33794694348327564, |
| "grad_norm": 0.6629974842071533, |
| "learning_rate": 0.00017767943044063884, |
| "loss": 1.0415198802947998, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.33852364475201846, |
| "grad_norm": 0.5972018837928772, |
| "learning_rate": 0.00017764094670001925, |
| "loss": 0.6916914582252502, |
| "step": 587 |
| }, |
| { |
| "epoch": 0.3391003460207612, |
| "grad_norm": 0.7391757965087891, |
| "learning_rate": 0.00017760246295939967, |
| "loss": 1.194846510887146, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.33967704728950404, |
| "grad_norm": 0.7234671711921692, |
| "learning_rate": 0.00017756397921878008, |
| "loss": 0.9572672247886658, |
| "step": 589 |
| }, |
| { |
| "epoch": 0.3402537485582468, |
| "grad_norm": 0.6949688792228699, |
| "learning_rate": 0.0001775254954781605, |
| "loss": 0.9968490600585938, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.3408304498269896, |
| "grad_norm": 0.9384737610816956, |
| "learning_rate": 0.0001774870117375409, |
| "loss": 1.106278896331787, |
| "step": 591 |
| }, |
| { |
| "epoch": 0.3414071510957324, |
| "grad_norm": 0.8691385388374329, |
| "learning_rate": 0.0001774485279969213, |
| "loss": 0.8517290353775024, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.3419838523644752, |
| "grad_norm": 0.6864728331565857, |
| "learning_rate": 0.00017741004425630172, |
| "loss": 0.9280612468719482, |
| "step": 593 |
| }, |
| { |
| "epoch": 0.34256055363321797, |
| "grad_norm": 0.7656051516532898, |
| "learning_rate": 0.00017737156051568213, |
| "loss": 1.0975104570388794, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.3431372549019608, |
| "grad_norm": 0.6587508916854858, |
| "learning_rate": 0.00017733307677506255, |
| "loss": 0.9575508236885071, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.34371395617070355, |
| "grad_norm": 0.8466372489929199, |
| "learning_rate": 0.00017729459303444296, |
| "loss": 1.2343617677688599, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.34429065743944637, |
| "grad_norm": 1.0839906930923462, |
| "learning_rate": 0.00017725610929382338, |
| "loss": 1.3552396297454834, |
| "step": 597 |
| }, |
| { |
| "epoch": 0.34486735870818913, |
| "grad_norm": 0.7300306558609009, |
| "learning_rate": 0.00017721762555320377, |
| "loss": 1.0701713562011719, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.34544405997693195, |
| "grad_norm": 0.737766683101654, |
| "learning_rate": 0.00017717914181258418, |
| "loss": 1.0968977212905884, |
| "step": 599 |
| }, |
| { |
| "epoch": 0.3460207612456747, |
| "grad_norm": 0.749933660030365, |
| "learning_rate": 0.0001771406580719646, |
| "loss": 1.3320926427841187, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.34659746251441753, |
| "grad_norm": 1.0226854085922241, |
| "learning_rate": 0.000177102174331345, |
| "loss": 1.5281516313552856, |
| "step": 601 |
| }, |
| { |
| "epoch": 0.34717416378316035, |
| "grad_norm": 0.5458315014839172, |
| "learning_rate": 0.00017706369059072543, |
| "loss": 0.6243756413459778, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.3477508650519031, |
| "grad_norm": 0.6592231392860413, |
| "learning_rate": 0.00017702520685010584, |
| "loss": 1.007111668586731, |
| "step": 603 |
| }, |
| { |
| "epoch": 0.34832756632064593, |
| "grad_norm": 0.7599675059318542, |
| "learning_rate": 0.00017698672310948626, |
| "loss": 1.059772253036499, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.3489042675893887, |
| "grad_norm": 0.7249642610549927, |
| "learning_rate": 0.00017694823936886667, |
| "loss": 1.0405762195587158, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.3494809688581315, |
| "grad_norm": 0.6669758558273315, |
| "learning_rate": 0.00017690975562824706, |
| "loss": 0.8157357573509216, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.3500576701268743, |
| "grad_norm": 1.0521658658981323, |
| "learning_rate": 0.00017687127188762748, |
| "loss": 1.3226133584976196, |
| "step": 607 |
| }, |
| { |
| "epoch": 0.3506343713956171, |
| "grad_norm": 1.190586805343628, |
| "learning_rate": 0.0001768327881470079, |
| "loss": 0.9668002724647522, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.35121107266435986, |
| "grad_norm": 0.7342950105667114, |
| "learning_rate": 0.0001767943044063883, |
| "loss": 1.0137907266616821, |
| "step": 609 |
| }, |
| { |
| "epoch": 0.3517877739331027, |
| "grad_norm": 0.8390425443649292, |
| "learning_rate": 0.00017675582066576872, |
| "loss": 1.2452900409698486, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.35236447520184544, |
| "grad_norm": 0.7040269374847412, |
| "learning_rate": 0.00017671733692514914, |
| "loss": 1.1274709701538086, |
| "step": 611 |
| }, |
| { |
| "epoch": 0.35294117647058826, |
| "grad_norm": 0.6184991002082825, |
| "learning_rate": 0.00017667885318452955, |
| "loss": 0.8320228457450867, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.353517877739331, |
| "grad_norm": 0.9174041748046875, |
| "learning_rate": 0.00017664036944390997, |
| "loss": 1.0515730381011963, |
| "step": 613 |
| }, |
| { |
| "epoch": 0.35409457900807384, |
| "grad_norm": 0.8032795786857605, |
| "learning_rate": 0.00017660188570329036, |
| "loss": 0.9692851901054382, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.3546712802768166, |
| "grad_norm": 0.854794979095459, |
| "learning_rate": 0.00017656340196267077, |
| "loss": 0.9672110676765442, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.3552479815455594, |
| "grad_norm": 0.8945924043655396, |
| "learning_rate": 0.0001765249182220512, |
| "loss": 1.1629329919815063, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.3558246828143022, |
| "grad_norm": 0.8737151622772217, |
| "learning_rate": 0.0001764864344814316, |
| "loss": 1.022585153579712, |
| "step": 617 |
| }, |
| { |
| "epoch": 0.356401384083045, |
| "grad_norm": 0.7043283581733704, |
| "learning_rate": 0.00017644795074081202, |
| "loss": 1.825275182723999, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.35697808535178777, |
| "grad_norm": 0.81025230884552, |
| "learning_rate": 0.00017640946700019243, |
| "loss": 1.1937224864959717, |
| "step": 619 |
| }, |
| { |
| "epoch": 0.3575547866205306, |
| "grad_norm": 0.6064541339874268, |
| "learning_rate": 0.00017637098325957285, |
| "loss": 1.144992709159851, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.35813148788927335, |
| "grad_norm": 0.7281432747840881, |
| "learning_rate": 0.00017633249951895326, |
| "loss": 0.8976823091506958, |
| "step": 621 |
| }, |
| { |
| "epoch": 0.35870818915801617, |
| "grad_norm": 0.7124044895172119, |
| "learning_rate": 0.00017629401577833365, |
| "loss": 0.9814664721488953, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.35928489042675893, |
| "grad_norm": 0.7080062031745911, |
| "learning_rate": 0.00017625553203771407, |
| "loss": 0.8040327429771423, |
| "step": 623 |
| }, |
| { |
| "epoch": 0.35986159169550175, |
| "grad_norm": 0.9307262897491455, |
| "learning_rate": 0.00017621704829709448, |
| "loss": 1.1769636869430542, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.3604382929642445, |
| "grad_norm": 0.6040496230125427, |
| "learning_rate": 0.0001761785645564749, |
| "loss": 0.8058497905731201, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.36101499423298733, |
| "grad_norm": 0.6352747678756714, |
| "learning_rate": 0.00017614008081585531, |
| "loss": 1.0901957750320435, |
| "step": 626 |
| }, |
| { |
| "epoch": 0.3615916955017301, |
| "grad_norm": 1.0686722993850708, |
| "learning_rate": 0.00017610159707523573, |
| "loss": 1.0280206203460693, |
| "step": 627 |
| }, |
| { |
| "epoch": 0.3621683967704729, |
| "grad_norm": 0.823551595211029, |
| "learning_rate": 0.00017606311333461614, |
| "loss": 1.1255362033843994, |
| "step": 628 |
| }, |
| { |
| "epoch": 0.3627450980392157, |
| "grad_norm": 0.8719285726547241, |
| "learning_rate": 0.00017602462959399656, |
| "loss": 1.1470766067504883, |
| "step": 629 |
| }, |
| { |
| "epoch": 0.3633217993079585, |
| "grad_norm": 0.8169400691986084, |
| "learning_rate": 0.00017598614585337695, |
| "loss": 1.0567045211791992, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.36389850057670126, |
| "grad_norm": 1.0707166194915771, |
| "learning_rate": 0.00017594766211275736, |
| "loss": 1.3314507007598877, |
| "step": 631 |
| }, |
| { |
| "epoch": 0.3644752018454441, |
| "grad_norm": 0.6268380284309387, |
| "learning_rate": 0.00017590917837213778, |
| "loss": 1.100555419921875, |
| "step": 632 |
| }, |
| { |
| "epoch": 0.36505190311418684, |
| "grad_norm": 0.7382054328918457, |
| "learning_rate": 0.0001758706946315182, |
| "loss": 0.9670585989952087, |
| "step": 633 |
| }, |
| { |
| "epoch": 0.36562860438292966, |
| "grad_norm": 1.193224310874939, |
| "learning_rate": 0.0001758322108908986, |
| "loss": 1.3042614459991455, |
| "step": 634 |
| }, |
| { |
| "epoch": 0.3662053056516724, |
| "grad_norm": 1.0091503858566284, |
| "learning_rate": 0.00017579372715027902, |
| "loss": 1.3520644903182983, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.36678200692041524, |
| "grad_norm": 0.6810548901557922, |
| "learning_rate": 0.00017575524340965944, |
| "loss": 0.8741036653518677, |
| "step": 636 |
| }, |
| { |
| "epoch": 0.367358708189158, |
| "grad_norm": 0.7155483365058899, |
| "learning_rate": 0.00017571675966903986, |
| "loss": 0.8751124143600464, |
| "step": 637 |
| }, |
| { |
| "epoch": 0.3679354094579008, |
| "grad_norm": 1.0436261892318726, |
| "learning_rate": 0.00017567827592842024, |
| "loss": 1.5696821212768555, |
| "step": 638 |
| }, |
| { |
| "epoch": 0.3685121107266436, |
| "grad_norm": 0.9394407868385315, |
| "learning_rate": 0.00017563979218780066, |
| "loss": 0.8675939440727234, |
| "step": 639 |
| }, |
| { |
| "epoch": 0.3690888119953864, |
| "grad_norm": 1.4341135025024414, |
| "learning_rate": 0.00017560130844718107, |
| "loss": 1.498160481452942, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.36966551326412916, |
| "grad_norm": 1.006375789642334, |
| "learning_rate": 0.0001755628247065615, |
| "loss": 1.2490055561065674, |
| "step": 641 |
| }, |
| { |
| "epoch": 0.370242214532872, |
| "grad_norm": 0.6104082465171814, |
| "learning_rate": 0.0001755243409659419, |
| "loss": 0.8300263285636902, |
| "step": 642 |
| }, |
| { |
| "epoch": 0.37081891580161475, |
| "grad_norm": 0.8571838736534119, |
| "learning_rate": 0.00017548585722532232, |
| "loss": 1.146481990814209, |
| "step": 643 |
| }, |
| { |
| "epoch": 0.37139561707035756, |
| "grad_norm": 0.6824607253074646, |
| "learning_rate": 0.00017544737348470273, |
| "loss": 1.2418452501296997, |
| "step": 644 |
| }, |
| { |
| "epoch": 0.3719723183391003, |
| "grad_norm": 1.0891611576080322, |
| "learning_rate": 0.00017540888974408315, |
| "loss": 1.2160457372665405, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.37254901960784315, |
| "grad_norm": 0.6260281801223755, |
| "learning_rate": 0.00017537040600346354, |
| "loss": 0.8934881091117859, |
| "step": 646 |
| }, |
| { |
| "epoch": 0.3731257208765859, |
| "grad_norm": 0.8351913690567017, |
| "learning_rate": 0.00017533192226284395, |
| "loss": 1.5422282218933105, |
| "step": 647 |
| }, |
| { |
| "epoch": 0.3737024221453287, |
| "grad_norm": 0.7572267055511475, |
| "learning_rate": 0.00017529343852222437, |
| "loss": 1.2659950256347656, |
| "step": 648 |
| }, |
| { |
| "epoch": 0.3742791234140715, |
| "grad_norm": 0.7712565064430237, |
| "learning_rate": 0.00017525495478160478, |
| "loss": 1.2143782377243042, |
| "step": 649 |
| }, |
| { |
| "epoch": 0.3748558246828143, |
| "grad_norm": 0.6880773305892944, |
| "learning_rate": 0.0001752164710409852, |
| "loss": 1.0878217220306396, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.3754325259515571, |
| "grad_norm": 0.8996551632881165, |
| "learning_rate": 0.00017517798730036561, |
| "loss": 0.9668335914611816, |
| "step": 651 |
| }, |
| { |
| "epoch": 0.3760092272202999, |
| "grad_norm": 0.921444296836853, |
| "learning_rate": 0.00017513950355974603, |
| "loss": 1.1585900783538818, |
| "step": 652 |
| }, |
| { |
| "epoch": 0.37658592848904265, |
| "grad_norm": 0.8658480048179626, |
| "learning_rate": 0.00017510101981912645, |
| "loss": 1.1533393859863281, |
| "step": 653 |
| }, |
| { |
| "epoch": 0.3771626297577855, |
| "grad_norm": 0.6665229797363281, |
| "learning_rate": 0.00017506253607850683, |
| "loss": 0.8233336210250854, |
| "step": 654 |
| }, |
| { |
| "epoch": 0.37773933102652824, |
| "grad_norm": 0.746337890625, |
| "learning_rate": 0.00017502405233788725, |
| "loss": 1.099341630935669, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.37831603229527105, |
| "grad_norm": 0.8498716354370117, |
| "learning_rate": 0.00017498556859726766, |
| "loss": 1.333115577697754, |
| "step": 656 |
| }, |
| { |
| "epoch": 0.3788927335640138, |
| "grad_norm": 0.7371817827224731, |
| "learning_rate": 0.00017494708485664808, |
| "loss": 1.05489182472229, |
| "step": 657 |
| }, |
| { |
| "epoch": 0.37946943483275664, |
| "grad_norm": 0.7369913458824158, |
| "learning_rate": 0.0001749086011160285, |
| "loss": 0.7275075912475586, |
| "step": 658 |
| }, |
| { |
| "epoch": 0.3800461361014994, |
| "grad_norm": 1.4918899536132812, |
| "learning_rate": 0.0001748701173754089, |
| "loss": 1.2430638074874878, |
| "step": 659 |
| }, |
| { |
| "epoch": 0.3806228373702422, |
| "grad_norm": 0.686100423336029, |
| "learning_rate": 0.00017483163363478933, |
| "loss": 0.7841339707374573, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.381199538638985, |
| "grad_norm": 0.7799985408782959, |
| "learning_rate": 0.00017479314989416974, |
| "loss": 1.1784673929214478, |
| "step": 661 |
| }, |
| { |
| "epoch": 0.3817762399077278, |
| "grad_norm": 0.7435747385025024, |
| "learning_rate": 0.00017475466615355013, |
| "loss": 1.180450439453125, |
| "step": 662 |
| }, |
| { |
| "epoch": 0.38235294117647056, |
| "grad_norm": 0.7358818650245667, |
| "learning_rate": 0.00017471618241293054, |
| "loss": 0.9987742900848389, |
| "step": 663 |
| }, |
| { |
| "epoch": 0.3829296424452134, |
| "grad_norm": 0.7353511452674866, |
| "learning_rate": 0.00017467769867231096, |
| "loss": 1.1325185298919678, |
| "step": 664 |
| }, |
| { |
| "epoch": 0.38350634371395614, |
| "grad_norm": 0.7735626697540283, |
| "learning_rate": 0.00017463921493169137, |
| "loss": 1.0828659534454346, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.38408304498269896, |
| "grad_norm": 0.6293249130249023, |
| "learning_rate": 0.0001746007311910718, |
| "loss": 0.9253727793693542, |
| "step": 666 |
| }, |
| { |
| "epoch": 0.3846597462514418, |
| "grad_norm": 0.6271319389343262, |
| "learning_rate": 0.0001745622474504522, |
| "loss": 0.7645162343978882, |
| "step": 667 |
| }, |
| { |
| "epoch": 0.38523644752018454, |
| "grad_norm": 0.6632966995239258, |
| "learning_rate": 0.00017452376370983262, |
| "loss": 0.9796670079231262, |
| "step": 668 |
| }, |
| { |
| "epoch": 0.38581314878892736, |
| "grad_norm": 0.8829965591430664, |
| "learning_rate": 0.00017448527996921304, |
| "loss": 0.9777094721794128, |
| "step": 669 |
| }, |
| { |
| "epoch": 0.3863898500576701, |
| "grad_norm": 0.7675085663795471, |
| "learning_rate": 0.00017444679622859342, |
| "loss": 1.0497252941131592, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.38696655132641294, |
| "grad_norm": 0.9194138050079346, |
| "learning_rate": 0.00017440831248797384, |
| "loss": 1.0992257595062256, |
| "step": 671 |
| }, |
| { |
| "epoch": 0.3875432525951557, |
| "grad_norm": 1.0398883819580078, |
| "learning_rate": 0.00017436982874735425, |
| "loss": 1.25284743309021, |
| "step": 672 |
| }, |
| { |
| "epoch": 0.3881199538638985, |
| "grad_norm": 0.5921796560287476, |
| "learning_rate": 0.00017433134500673467, |
| "loss": 0.6763097047805786, |
| "step": 673 |
| }, |
| { |
| "epoch": 0.3886966551326413, |
| "grad_norm": 1.0226387977600098, |
| "learning_rate": 0.00017429286126611508, |
| "loss": 1.3273173570632935, |
| "step": 674 |
| }, |
| { |
| "epoch": 0.3892733564013841, |
| "grad_norm": 0.5252590179443359, |
| "learning_rate": 0.0001742543775254955, |
| "loss": 0.6646312475204468, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.38985005767012687, |
| "grad_norm": 0.600639820098877, |
| "learning_rate": 0.00017421589378487592, |
| "loss": 0.7095688581466675, |
| "step": 676 |
| }, |
| { |
| "epoch": 0.3904267589388697, |
| "grad_norm": 0.7131365537643433, |
| "learning_rate": 0.00017417741004425633, |
| "loss": 1.2200595140457153, |
| "step": 677 |
| }, |
| { |
| "epoch": 0.39100346020761245, |
| "grad_norm": 0.9018159508705139, |
| "learning_rate": 0.00017413892630363672, |
| "loss": 0.9669409394264221, |
| "step": 678 |
| }, |
| { |
| "epoch": 0.39158016147635527, |
| "grad_norm": 0.9841684103012085, |
| "learning_rate": 0.00017410044256301713, |
| "loss": 1.028241515159607, |
| "step": 679 |
| }, |
| { |
| "epoch": 0.39215686274509803, |
| "grad_norm": 0.9678821563720703, |
| "learning_rate": 0.00017406195882239755, |
| "loss": 1.3122403621673584, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.39273356401384085, |
| "grad_norm": 0.6439565420150757, |
| "learning_rate": 0.00017402347508177796, |
| "loss": 0.8441326022148132, |
| "step": 681 |
| }, |
| { |
| "epoch": 0.3933102652825836, |
| "grad_norm": 0.8460219502449036, |
| "learning_rate": 0.00017398499134115838, |
| "loss": 1.193575382232666, |
| "step": 682 |
| }, |
| { |
| "epoch": 0.39388696655132643, |
| "grad_norm": 0.8068860769271851, |
| "learning_rate": 0.0001739465076005388, |
| "loss": 1.209285020828247, |
| "step": 683 |
| }, |
| { |
| "epoch": 0.3944636678200692, |
| "grad_norm": 0.6420811414718628, |
| "learning_rate": 0.0001739080238599192, |
| "loss": 0.9203285574913025, |
| "step": 684 |
| }, |
| { |
| "epoch": 0.395040369088812, |
| "grad_norm": 1.1171250343322754, |
| "learning_rate": 0.0001738695401192996, |
| "loss": 1.5638062953948975, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.3956170703575548, |
| "grad_norm": 0.7218726873397827, |
| "learning_rate": 0.00017383105637868001, |
| "loss": 1.1434835195541382, |
| "step": 686 |
| }, |
| { |
| "epoch": 0.3961937716262976, |
| "grad_norm": 0.9958249926567078, |
| "learning_rate": 0.00017379257263806043, |
| "loss": 0.7441573143005371, |
| "step": 687 |
| }, |
| { |
| "epoch": 0.39677047289504036, |
| "grad_norm": 0.8222061395645142, |
| "learning_rate": 0.00017375408889744084, |
| "loss": 1.2088245153427124, |
| "step": 688 |
| }, |
| { |
| "epoch": 0.3973471741637832, |
| "grad_norm": 0.5759637355804443, |
| "learning_rate": 0.00017371560515682126, |
| "loss": 0.9504674077033997, |
| "step": 689 |
| }, |
| { |
| "epoch": 0.39792387543252594, |
| "grad_norm": 0.8157130479812622, |
| "learning_rate": 0.00017367712141620168, |
| "loss": 1.319948673248291, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.39850057670126876, |
| "grad_norm": 0.7266381978988647, |
| "learning_rate": 0.0001736386376755821, |
| "loss": 0.8739478588104248, |
| "step": 691 |
| }, |
| { |
| "epoch": 0.3990772779700115, |
| "grad_norm": 0.644598126411438, |
| "learning_rate": 0.0001736001539349625, |
| "loss": 0.9521651864051819, |
| "step": 692 |
| }, |
| { |
| "epoch": 0.39965397923875434, |
| "grad_norm": 0.5922922492027283, |
| "learning_rate": 0.0001735616701943429, |
| "loss": 0.7051569223403931, |
| "step": 693 |
| }, |
| { |
| "epoch": 0.4002306805074971, |
| "grad_norm": 0.6880702972412109, |
| "learning_rate": 0.0001735231864537233, |
| "loss": 1.1202598810195923, |
| "step": 694 |
| }, |
| { |
| "epoch": 0.4008073817762399, |
| "grad_norm": 1.1836776733398438, |
| "learning_rate": 0.00017348470271310372, |
| "loss": 1.2588169574737549, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.4013840830449827, |
| "grad_norm": 0.965606689453125, |
| "learning_rate": 0.00017344621897248414, |
| "loss": 0.7970831990242004, |
| "step": 696 |
| }, |
| { |
| "epoch": 0.4019607843137255, |
| "grad_norm": 0.8883787989616394, |
| "learning_rate": 0.00017340773523186456, |
| "loss": 1.6653708219528198, |
| "step": 697 |
| }, |
| { |
| "epoch": 0.40253748558246827, |
| "grad_norm": 0.7349938750267029, |
| "learning_rate": 0.00017336925149124497, |
| "loss": 0.7324041724205017, |
| "step": 698 |
| }, |
| { |
| "epoch": 0.4031141868512111, |
| "grad_norm": 1.0731885433197021, |
| "learning_rate": 0.00017333076775062539, |
| "loss": 0.9731301069259644, |
| "step": 699 |
| }, |
| { |
| "epoch": 0.40369088811995385, |
| "grad_norm": 0.8691738843917847, |
| "learning_rate": 0.0001732922840100058, |
| "loss": 1.0968525409698486, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.40426758938869667, |
| "grad_norm": 0.921116292476654, |
| "learning_rate": 0.0001732538002693862, |
| "loss": 1.3427119255065918, |
| "step": 701 |
| }, |
| { |
| "epoch": 0.40484429065743943, |
| "grad_norm": 0.8539203405380249, |
| "learning_rate": 0.0001732153165287666, |
| "loss": 1.2618871927261353, |
| "step": 702 |
| }, |
| { |
| "epoch": 0.40542099192618225, |
| "grad_norm": 0.6238696575164795, |
| "learning_rate": 0.00017317683278814702, |
| "loss": 0.7679486274719238, |
| "step": 703 |
| }, |
| { |
| "epoch": 0.405997693194925, |
| "grad_norm": 0.6827321648597717, |
| "learning_rate": 0.00017313834904752743, |
| "loss": 0.9498722553253174, |
| "step": 704 |
| }, |
| { |
| "epoch": 0.40657439446366783, |
| "grad_norm": 0.9637985229492188, |
| "learning_rate": 0.00017309986530690785, |
| "loss": 1.2945339679718018, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.4071510957324106, |
| "grad_norm": 0.6361503601074219, |
| "learning_rate": 0.00017306138156628827, |
| "loss": 1.2040516138076782, |
| "step": 706 |
| }, |
| { |
| "epoch": 0.4077277970011534, |
| "grad_norm": 0.713758647441864, |
| "learning_rate": 0.00017302289782566868, |
| "loss": 1.1285666227340698, |
| "step": 707 |
| }, |
| { |
| "epoch": 0.4083044982698962, |
| "grad_norm": 1.0620390176773071, |
| "learning_rate": 0.0001729844140850491, |
| "loss": 1.2117018699645996, |
| "step": 708 |
| }, |
| { |
| "epoch": 0.408881199538639, |
| "grad_norm": 0.6957300305366516, |
| "learning_rate": 0.00017294593034442948, |
| "loss": 1.2091706991195679, |
| "step": 709 |
| }, |
| { |
| "epoch": 0.40945790080738176, |
| "grad_norm": 0.4594845771789551, |
| "learning_rate": 0.0001729074466038099, |
| "loss": 3.3324732780456543, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.4100346020761246, |
| "grad_norm": 0.8902932405471802, |
| "learning_rate": 0.00017286896286319031, |
| "loss": 1.1579055786132812, |
| "step": 711 |
| }, |
| { |
| "epoch": 0.41061130334486734, |
| "grad_norm": 0.7140578031539917, |
| "learning_rate": 0.00017283047912257073, |
| "loss": 0.877116858959198, |
| "step": 712 |
| }, |
| { |
| "epoch": 0.41118800461361016, |
| "grad_norm": 0.8449535369873047, |
| "learning_rate": 0.00017279199538195115, |
| "loss": 1.2400063276290894, |
| "step": 713 |
| }, |
| { |
| "epoch": 0.4117647058823529, |
| "grad_norm": 1.0700358152389526, |
| "learning_rate": 0.00017275351164133156, |
| "loss": 1.1401453018188477, |
| "step": 714 |
| }, |
| { |
| "epoch": 0.41234140715109574, |
| "grad_norm": 0.6705982685089111, |
| "learning_rate": 0.00017271502790071198, |
| "loss": 0.8326209783554077, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.4129181084198385, |
| "grad_norm": 0.7149010896682739, |
| "learning_rate": 0.0001726765441600924, |
| "loss": 1.0872998237609863, |
| "step": 716 |
| }, |
| { |
| "epoch": 0.4134948096885813, |
| "grad_norm": 0.46808966994285583, |
| "learning_rate": 0.00017263806041947278, |
| "loss": 0.6795035004615784, |
| "step": 717 |
| }, |
| { |
| "epoch": 0.4140715109573241, |
| "grad_norm": 0.8606752157211304, |
| "learning_rate": 0.0001725995766788532, |
| "loss": 1.0544252395629883, |
| "step": 718 |
| }, |
| { |
| "epoch": 0.4146482122260669, |
| "grad_norm": 0.5839232802391052, |
| "learning_rate": 0.0001725610929382336, |
| "loss": 0.7785719633102417, |
| "step": 719 |
| }, |
| { |
| "epoch": 0.41522491349480967, |
| "grad_norm": 0.8700772523880005, |
| "learning_rate": 0.00017252260919761403, |
| "loss": 0.988602340221405, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.4158016147635525, |
| "grad_norm": 0.9886090159416199, |
| "learning_rate": 0.00017248412545699444, |
| "loss": 1.3493539094924927, |
| "step": 721 |
| }, |
| { |
| "epoch": 0.41637831603229525, |
| "grad_norm": 0.9088316559791565, |
| "learning_rate": 0.00017244564171637486, |
| "loss": 1.0131090879440308, |
| "step": 722 |
| }, |
| { |
| "epoch": 0.41695501730103807, |
| "grad_norm": 0.9066189527511597, |
| "learning_rate": 0.00017240715797575527, |
| "loss": 1.2530944347381592, |
| "step": 723 |
| }, |
| { |
| "epoch": 0.41753171856978083, |
| "grad_norm": 0.7733665704727173, |
| "learning_rate": 0.00017236867423513569, |
| "loss": 1.1255629062652588, |
| "step": 724 |
| }, |
| { |
| "epoch": 0.41810841983852365, |
| "grad_norm": 0.609832763671875, |
| "learning_rate": 0.00017233019049451607, |
| "loss": 0.7514859437942505, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.4186851211072664, |
| "grad_norm": 0.6903802752494812, |
| "learning_rate": 0.0001722917067538965, |
| "loss": 0.8925538063049316, |
| "step": 726 |
| }, |
| { |
| "epoch": 0.41926182237600923, |
| "grad_norm": 0.7692581415176392, |
| "learning_rate": 0.0001722532230132769, |
| "loss": 1.103420376777649, |
| "step": 727 |
| }, |
| { |
| "epoch": 0.419838523644752, |
| "grad_norm": 0.7881311774253845, |
| "learning_rate": 0.0001722147392726573, |
| "loss": 1.3109550476074219, |
| "step": 728 |
| }, |
| { |
| "epoch": 0.4204152249134948, |
| "grad_norm": 0.6949164271354675, |
| "learning_rate": 0.0001721762555320377, |
| "loss": 1.0904300212860107, |
| "step": 729 |
| }, |
| { |
| "epoch": 0.4209919261822376, |
| "grad_norm": 0.6746834516525269, |
| "learning_rate": 0.00017213777179141812, |
| "loss": 1.240382194519043, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.4215686274509804, |
| "grad_norm": 0.8831079602241516, |
| "learning_rate": 0.00017209928805079854, |
| "loss": 1.546260118484497, |
| "step": 731 |
| }, |
| { |
| "epoch": 0.42214532871972316, |
| "grad_norm": 0.917523205280304, |
| "learning_rate": 0.00017206080431017895, |
| "loss": 1.3464173078536987, |
| "step": 732 |
| }, |
| { |
| "epoch": 0.422722029988466, |
| "grad_norm": 0.729640007019043, |
| "learning_rate": 0.00017202232056955937, |
| "loss": 0.9092597961425781, |
| "step": 733 |
| }, |
| { |
| "epoch": 0.4232987312572088, |
| "grad_norm": 0.9597057104110718, |
| "learning_rate": 0.00017198383682893976, |
| "loss": 1.449595332145691, |
| "step": 734 |
| }, |
| { |
| "epoch": 0.42387543252595156, |
| "grad_norm": 0.570996880531311, |
| "learning_rate": 0.00017194535308832017, |
| "loss": 0.660990297794342, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.4244521337946944, |
| "grad_norm": 0.8485130071640015, |
| "learning_rate": 0.0001719068693477006, |
| "loss": 1.009351372718811, |
| "step": 736 |
| }, |
| { |
| "epoch": 0.42502883506343714, |
| "grad_norm": 1.1340487003326416, |
| "learning_rate": 0.000171868385607081, |
| "loss": 1.186898946762085, |
| "step": 737 |
| }, |
| { |
| "epoch": 0.42560553633217996, |
| "grad_norm": 0.9666796326637268, |
| "learning_rate": 0.00017182990186646142, |
| "loss": 1.3713027238845825, |
| "step": 738 |
| }, |
| { |
| "epoch": 0.4261822376009227, |
| "grad_norm": 0.8104447722434998, |
| "learning_rate": 0.00017179141812584183, |
| "loss": 0.7822756767272949, |
| "step": 739 |
| }, |
| { |
| "epoch": 0.42675893886966554, |
| "grad_norm": 0.7587509155273438, |
| "learning_rate": 0.00017175293438522225, |
| "loss": 1.1129992008209229, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.4273356401384083, |
| "grad_norm": 0.854256272315979, |
| "learning_rate": 0.00017171445064460266, |
| "loss": 1.1753698587417603, |
| "step": 741 |
| }, |
| { |
| "epoch": 0.4279123414071511, |
| "grad_norm": 0.7335513234138489, |
| "learning_rate": 0.00017167596690398305, |
| "loss": 1.1233677864074707, |
| "step": 742 |
| }, |
| { |
| "epoch": 0.4284890426758939, |
| "grad_norm": 1.1383814811706543, |
| "learning_rate": 0.00017163748316336347, |
| "loss": 1.6328407526016235, |
| "step": 743 |
| }, |
| { |
| "epoch": 0.4290657439446367, |
| "grad_norm": 0.5805800557136536, |
| "learning_rate": 0.00017159899942274388, |
| "loss": 0.8374234437942505, |
| "step": 744 |
| }, |
| { |
| "epoch": 0.42964244521337946, |
| "grad_norm": 0.5744853615760803, |
| "learning_rate": 0.0001715605156821243, |
| "loss": 0.7072418332099915, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.4302191464821223, |
| "grad_norm": 1.0968151092529297, |
| "learning_rate": 0.00017152203194150471, |
| "loss": 0.9308477640151978, |
| "step": 746 |
| }, |
| { |
| "epoch": 0.43079584775086505, |
| "grad_norm": 0.7771037220954895, |
| "learning_rate": 0.00017148354820088513, |
| "loss": 1.0803910493850708, |
| "step": 747 |
| }, |
| { |
| "epoch": 0.43137254901960786, |
| "grad_norm": 0.760296106338501, |
| "learning_rate": 0.00017144506446026554, |
| "loss": 0.9416469931602478, |
| "step": 748 |
| }, |
| { |
| "epoch": 0.43194925028835063, |
| "grad_norm": 0.8478863835334778, |
| "learning_rate": 0.00017140658071964596, |
| "loss": 1.0037909746170044, |
| "step": 749 |
| }, |
| { |
| "epoch": 0.43252595155709345, |
| "grad_norm": 0.802010715007782, |
| "learning_rate": 0.00017136809697902635, |
| "loss": 1.2789827585220337, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.4331026528258362, |
| "grad_norm": 0.7146703004837036, |
| "learning_rate": 0.00017132961323840676, |
| "loss": 0.925313413143158, |
| "step": 751 |
| }, |
| { |
| "epoch": 0.43367935409457903, |
| "grad_norm": 1.1419707536697388, |
| "learning_rate": 0.00017129112949778718, |
| "loss": 1.3266316652297974, |
| "step": 752 |
| }, |
| { |
| "epoch": 0.4342560553633218, |
| "grad_norm": 0.5337522029876709, |
| "learning_rate": 0.0001712526457571676, |
| "loss": 0.8182927966117859, |
| "step": 753 |
| }, |
| { |
| "epoch": 0.4348327566320646, |
| "grad_norm": 0.7067147493362427, |
| "learning_rate": 0.000171214162016548, |
| "loss": 1.01529061794281, |
| "step": 754 |
| }, |
| { |
| "epoch": 0.4354094579008074, |
| "grad_norm": 0.8742361664772034, |
| "learning_rate": 0.00017117567827592842, |
| "loss": 0.9216449856758118, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.4359861591695502, |
| "grad_norm": 1.0121413469314575, |
| "learning_rate": 0.00017113719453530884, |
| "loss": 1.5315768718719482, |
| "step": 756 |
| }, |
| { |
| "epoch": 0.43656286043829295, |
| "grad_norm": 0.970582127571106, |
| "learning_rate": 0.00017109871079468925, |
| "loss": 1.1701881885528564, |
| "step": 757 |
| }, |
| { |
| "epoch": 0.4371395617070358, |
| "grad_norm": 0.8317894339561462, |
| "learning_rate": 0.00017106022705406964, |
| "loss": 1.1619702577590942, |
| "step": 758 |
| }, |
| { |
| "epoch": 0.43771626297577854, |
| "grad_norm": 0.6935670375823975, |
| "learning_rate": 0.00017102174331345006, |
| "loss": 1.0018664598464966, |
| "step": 759 |
| }, |
| { |
| "epoch": 0.43829296424452135, |
| "grad_norm": 1.0123279094696045, |
| "learning_rate": 0.00017098325957283047, |
| "loss": 1.1231794357299805, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.4388696655132641, |
| "grad_norm": 0.7619280219078064, |
| "learning_rate": 0.0001709447758322109, |
| "loss": 1.0395662784576416, |
| "step": 761 |
| }, |
| { |
| "epoch": 0.43944636678200694, |
| "grad_norm": 0.8570308089256287, |
| "learning_rate": 0.0001709062920915913, |
| "loss": 1.4022446870803833, |
| "step": 762 |
| }, |
| { |
| "epoch": 0.4400230680507497, |
| "grad_norm": 1.178285837173462, |
| "learning_rate": 0.00017086780835097172, |
| "loss": 1.5245153903961182, |
| "step": 763 |
| }, |
| { |
| "epoch": 0.4405997693194925, |
| "grad_norm": 0.876589298248291, |
| "learning_rate": 0.00017082932461035213, |
| "loss": 1.482165813446045, |
| "step": 764 |
| }, |
| { |
| "epoch": 0.4411764705882353, |
| "grad_norm": 0.8614532947540283, |
| "learning_rate": 0.00017079084086973255, |
| "loss": 1.312232255935669, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.4417531718569781, |
| "grad_norm": 0.6772201061248779, |
| "learning_rate": 0.00017075235712911294, |
| "loss": 1.1610076427459717, |
| "step": 766 |
| }, |
| { |
| "epoch": 0.44232987312572086, |
| "grad_norm": 0.805927038192749, |
| "learning_rate": 0.00017071387338849335, |
| "loss": 1.3874244689941406, |
| "step": 767 |
| }, |
| { |
| "epoch": 0.4429065743944637, |
| "grad_norm": 0.5419954061508179, |
| "learning_rate": 0.00017067538964787377, |
| "loss": 0.7610808610916138, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.44348327566320644, |
| "grad_norm": 0.773598313331604, |
| "learning_rate": 0.00017063690590725418, |
| "loss": 0.8612810373306274, |
| "step": 769 |
| }, |
| { |
| "epoch": 0.44405997693194926, |
| "grad_norm": 0.6376165151596069, |
| "learning_rate": 0.0001705984221666346, |
| "loss": 0.8417828679084778, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.444636678200692, |
| "grad_norm": 0.6870789527893066, |
| "learning_rate": 0.00017055993842601501, |
| "loss": 1.1764918565750122, |
| "step": 771 |
| }, |
| { |
| "epoch": 0.44521337946943484, |
| "grad_norm": 0.5562968254089355, |
| "learning_rate": 0.00017052145468539543, |
| "loss": 0.8358933925628662, |
| "step": 772 |
| }, |
| { |
| "epoch": 0.4457900807381776, |
| "grad_norm": 0.602963924407959, |
| "learning_rate": 0.00017048297094477585, |
| "loss": 1.197677731513977, |
| "step": 773 |
| }, |
| { |
| "epoch": 0.4463667820069204, |
| "grad_norm": 1.0190907716751099, |
| "learning_rate": 0.00017044448720415623, |
| "loss": 1.4355199337005615, |
| "step": 774 |
| }, |
| { |
| "epoch": 0.4469434832756632, |
| "grad_norm": 0.633346676826477, |
| "learning_rate": 0.00017040600346353665, |
| "loss": 0.7924656867980957, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.447520184544406, |
| "grad_norm": 0.797099232673645, |
| "learning_rate": 0.00017036751972291706, |
| "loss": 1.2302619218826294, |
| "step": 776 |
| }, |
| { |
| "epoch": 0.44809688581314877, |
| "grad_norm": 0.7166492938995361, |
| "learning_rate": 0.00017032903598229748, |
| "loss": 1.063340187072754, |
| "step": 777 |
| }, |
| { |
| "epoch": 0.4486735870818916, |
| "grad_norm": 0.9511370062828064, |
| "learning_rate": 0.0001702905522416779, |
| "loss": 0.8998168706893921, |
| "step": 778 |
| }, |
| { |
| "epoch": 0.44925028835063435, |
| "grad_norm": 0.8487029075622559, |
| "learning_rate": 0.0001702520685010583, |
| "loss": 1.1850653886795044, |
| "step": 779 |
| }, |
| { |
| "epoch": 0.44982698961937717, |
| "grad_norm": 1.0267854928970337, |
| "learning_rate": 0.00017021358476043873, |
| "loss": 1.246724009513855, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.45040369088811993, |
| "grad_norm": 1.155428409576416, |
| "learning_rate": 0.00017017510101981914, |
| "loss": 1.539854884147644, |
| "step": 781 |
| }, |
| { |
| "epoch": 0.45098039215686275, |
| "grad_norm": 0.6774823069572449, |
| "learning_rate": 0.00017013661727919953, |
| "loss": 0.7472063302993774, |
| "step": 782 |
| }, |
| { |
| "epoch": 0.4515570934256055, |
| "grad_norm": 0.7500667572021484, |
| "learning_rate": 0.00017009813353857994, |
| "loss": 0.9946876168251038, |
| "step": 783 |
| }, |
| { |
| "epoch": 0.45213379469434833, |
| "grad_norm": 0.7643426656723022, |
| "learning_rate": 0.00017005964979796036, |
| "loss": 0.8451071977615356, |
| "step": 784 |
| }, |
| { |
| "epoch": 0.4527104959630911, |
| "grad_norm": 0.721379816532135, |
| "learning_rate": 0.00017002116605734077, |
| "loss": 0.9988998174667358, |
| "step": 785 |
| }, |
| { |
| "epoch": 0.4532871972318339, |
| "grad_norm": 0.8850287199020386, |
| "learning_rate": 0.0001699826823167212, |
| "loss": 0.9789897203445435, |
| "step": 786 |
| }, |
| { |
| "epoch": 0.4538638985005767, |
| "grad_norm": 1.0076375007629395, |
| "learning_rate": 0.0001699441985761016, |
| "loss": 1.3830417394638062, |
| "step": 787 |
| }, |
| { |
| "epoch": 0.4544405997693195, |
| "grad_norm": 0.6105207204818726, |
| "learning_rate": 0.00016990571483548202, |
| "loss": 0.8870081901550293, |
| "step": 788 |
| }, |
| { |
| "epoch": 0.45501730103806226, |
| "grad_norm": 0.7732753157615662, |
| "learning_rate": 0.00016986723109486244, |
| "loss": 0.9958963990211487, |
| "step": 789 |
| }, |
| { |
| "epoch": 0.4555940023068051, |
| "grad_norm": 0.9871165156364441, |
| "learning_rate": 0.00016982874735424282, |
| "loss": 1.1141139268875122, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.45617070357554784, |
| "grad_norm": 0.7117231488227844, |
| "learning_rate": 0.00016979026361362324, |
| "loss": 1.0168585777282715, |
| "step": 791 |
| }, |
| { |
| "epoch": 0.45674740484429066, |
| "grad_norm": 0.6954454183578491, |
| "learning_rate": 0.00016975177987300365, |
| "loss": 0.9319931268692017, |
| "step": 792 |
| }, |
| { |
| "epoch": 0.4573241061130334, |
| "grad_norm": 0.6463753581047058, |
| "learning_rate": 0.00016971329613238407, |
| "loss": 0.9734832644462585, |
| "step": 793 |
| }, |
| { |
| "epoch": 0.45790080738177624, |
| "grad_norm": 0.7156365513801575, |
| "learning_rate": 0.00016967481239176448, |
| "loss": 1.0014495849609375, |
| "step": 794 |
| }, |
| { |
| "epoch": 0.458477508650519, |
| "grad_norm": 0.8648508787155151, |
| "learning_rate": 0.0001696363286511449, |
| "loss": 1.3907616138458252, |
| "step": 795 |
| }, |
| { |
| "epoch": 0.4590542099192618, |
| "grad_norm": 0.8066338300704956, |
| "learning_rate": 0.00016959784491052532, |
| "loss": 1.0530327558517456, |
| "step": 796 |
| }, |
| { |
| "epoch": 0.4596309111880046, |
| "grad_norm": 0.8617266416549683, |
| "learning_rate": 0.00016955936116990573, |
| "loss": 1.7989249229431152, |
| "step": 797 |
| }, |
| { |
| "epoch": 0.4602076124567474, |
| "grad_norm": 0.7956259250640869, |
| "learning_rate": 0.00016952087742928612, |
| "loss": 0.928198516368866, |
| "step": 798 |
| }, |
| { |
| "epoch": 0.46078431372549017, |
| "grad_norm": 0.8778709173202515, |
| "learning_rate": 0.00016948239368866653, |
| "loss": 0.9466978907585144, |
| "step": 799 |
| }, |
| { |
| "epoch": 0.461361014994233, |
| "grad_norm": 0.8518659472465515, |
| "learning_rate": 0.00016944390994804695, |
| "loss": 1.0593540668487549, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.4619377162629758, |
| "grad_norm": 0.79550701379776, |
| "learning_rate": 0.00016940542620742736, |
| "loss": 1.1164321899414062, |
| "step": 801 |
| }, |
| { |
| "epoch": 0.46251441753171857, |
| "grad_norm": 1.0006239414215088, |
| "learning_rate": 0.00016936694246680778, |
| "loss": 1.160499930381775, |
| "step": 802 |
| }, |
| { |
| "epoch": 0.4630911188004614, |
| "grad_norm": 0.8525403738021851, |
| "learning_rate": 0.0001693284587261882, |
| "loss": 1.0770652294158936, |
| "step": 803 |
| }, |
| { |
| "epoch": 0.46366782006920415, |
| "grad_norm": 0.6851354837417603, |
| "learning_rate": 0.0001692899749855686, |
| "loss": 1.0310590267181396, |
| "step": 804 |
| }, |
| { |
| "epoch": 0.46424452133794697, |
| "grad_norm": 0.6831552386283875, |
| "learning_rate": 0.000169251491244949, |
| "loss": 1.0782524347305298, |
| "step": 805 |
| }, |
| { |
| "epoch": 0.46482122260668973, |
| "grad_norm": 0.8892863988876343, |
| "learning_rate": 0.00016921300750432941, |
| "loss": 1.3154478073120117, |
| "step": 806 |
| }, |
| { |
| "epoch": 0.46539792387543255, |
| "grad_norm": 0.6863577961921692, |
| "learning_rate": 0.00016917452376370983, |
| "loss": 0.5912436842918396, |
| "step": 807 |
| }, |
| { |
| "epoch": 0.4659746251441753, |
| "grad_norm": 0.8612192869186401, |
| "learning_rate": 0.00016913604002309024, |
| "loss": 1.0140503644943237, |
| "step": 808 |
| }, |
| { |
| "epoch": 0.46655132641291813, |
| "grad_norm": 0.6565495729446411, |
| "learning_rate": 0.00016909755628247066, |
| "loss": 0.8388250470161438, |
| "step": 809 |
| }, |
| { |
| "epoch": 0.4671280276816609, |
| "grad_norm": 0.5729434490203857, |
| "learning_rate": 0.00016905907254185107, |
| "loss": 0.8662521839141846, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.4677047289504037, |
| "grad_norm": 0.8261442184448242, |
| "learning_rate": 0.0001690205888012315, |
| "loss": 1.1527458429336548, |
| "step": 811 |
| }, |
| { |
| "epoch": 0.4682814302191465, |
| "grad_norm": 0.6182582974433899, |
| "learning_rate": 0.0001689821050606119, |
| "loss": 0.7817882895469666, |
| "step": 812 |
| }, |
| { |
| "epoch": 0.4688581314878893, |
| "grad_norm": 0.5987662672996521, |
| "learning_rate": 0.0001689436213199923, |
| "loss": 0.864625871181488, |
| "step": 813 |
| }, |
| { |
| "epoch": 0.46943483275663206, |
| "grad_norm": 0.8617327809333801, |
| "learning_rate": 0.0001689051375793727, |
| "loss": 1.1531751155853271, |
| "step": 814 |
| }, |
| { |
| "epoch": 0.4700115340253749, |
| "grad_norm": 0.8277755379676819, |
| "learning_rate": 0.00016886665383875312, |
| "loss": 0.928108811378479, |
| "step": 815 |
| }, |
| { |
| "epoch": 0.47058823529411764, |
| "grad_norm": 0.7510029673576355, |
| "learning_rate": 0.00016882817009813354, |
| "loss": 1.0068414211273193, |
| "step": 816 |
| }, |
| { |
| "epoch": 0.47116493656286046, |
| "grad_norm": 0.8691316246986389, |
| "learning_rate": 0.00016878968635751395, |
| "loss": 1.0941516160964966, |
| "step": 817 |
| }, |
| { |
| "epoch": 0.4717416378316032, |
| "grad_norm": 0.581984281539917, |
| "learning_rate": 0.00016875120261689437, |
| "loss": 0.6039727926254272, |
| "step": 818 |
| }, |
| { |
| "epoch": 0.47231833910034604, |
| "grad_norm": 0.7486310005187988, |
| "learning_rate": 0.00016871271887627479, |
| "loss": 1.140452265739441, |
| "step": 819 |
| }, |
| { |
| "epoch": 0.4728950403690888, |
| "grad_norm": 0.8794305324554443, |
| "learning_rate": 0.0001686742351356552, |
| "loss": 1.2717854976654053, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.4734717416378316, |
| "grad_norm": 0.8812481164932251, |
| "learning_rate": 0.0001686357513950356, |
| "loss": 0.9813717007637024, |
| "step": 821 |
| }, |
| { |
| "epoch": 0.4740484429065744, |
| "grad_norm": 0.9091891646385193, |
| "learning_rate": 0.000168597267654416, |
| "loss": 1.2938401699066162, |
| "step": 822 |
| }, |
| { |
| "epoch": 0.4746251441753172, |
| "grad_norm": 0.9045780301094055, |
| "learning_rate": 0.00016855878391379642, |
| "loss": 1.312792181968689, |
| "step": 823 |
| }, |
| { |
| "epoch": 0.47520184544405997, |
| "grad_norm": 0.8430265784263611, |
| "learning_rate": 0.00016852030017317683, |
| "loss": 1.2679914236068726, |
| "step": 824 |
| }, |
| { |
| "epoch": 0.4757785467128028, |
| "grad_norm": 0.6870001554489136, |
| "learning_rate": 0.00016848181643255725, |
| "loss": 0.970576822757721, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.47635524798154555, |
| "grad_norm": 0.8256406188011169, |
| "learning_rate": 0.00016844333269193767, |
| "loss": 1.302760362625122, |
| "step": 826 |
| }, |
| { |
| "epoch": 0.47693194925028837, |
| "grad_norm": 0.7057660222053528, |
| "learning_rate": 0.00016840484895131808, |
| "loss": 0.9811574220657349, |
| "step": 827 |
| }, |
| { |
| "epoch": 0.47750865051903113, |
| "grad_norm": 0.8487821817398071, |
| "learning_rate": 0.0001683663652106985, |
| "loss": 1.0537941455841064, |
| "step": 828 |
| }, |
| { |
| "epoch": 0.47808535178777395, |
| "grad_norm": 0.7474492788314819, |
| "learning_rate": 0.00016832788147007888, |
| "loss": 0.856541633605957, |
| "step": 829 |
| }, |
| { |
| "epoch": 0.4786620530565167, |
| "grad_norm": 0.9228368401527405, |
| "learning_rate": 0.0001682893977294593, |
| "loss": 1.0505741834640503, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.47923875432525953, |
| "grad_norm": 0.9288182854652405, |
| "learning_rate": 0.00016825091398883971, |
| "loss": 1.3584654331207275, |
| "step": 831 |
| }, |
| { |
| "epoch": 0.4798154555940023, |
| "grad_norm": 1.4403129816055298, |
| "learning_rate": 0.00016821243024822013, |
| "loss": 1.911801815032959, |
| "step": 832 |
| }, |
| { |
| "epoch": 0.4803921568627451, |
| "grad_norm": 0.6283893585205078, |
| "learning_rate": 0.00016817394650760055, |
| "loss": 0.8583131432533264, |
| "step": 833 |
| }, |
| { |
| "epoch": 0.4809688581314879, |
| "grad_norm": 0.6910902261734009, |
| "learning_rate": 0.00016813546276698096, |
| "loss": 1.3508315086364746, |
| "step": 834 |
| }, |
| { |
| "epoch": 0.4815455594002307, |
| "grad_norm": 0.6606875658035278, |
| "learning_rate": 0.00016809697902636138, |
| "loss": 1.0815465450286865, |
| "step": 835 |
| }, |
| { |
| "epoch": 0.48212226066897346, |
| "grad_norm": 0.8546112775802612, |
| "learning_rate": 0.0001680584952857418, |
| "loss": 1.2201032638549805, |
| "step": 836 |
| }, |
| { |
| "epoch": 0.4826989619377163, |
| "grad_norm": 0.9130816459655762, |
| "learning_rate": 0.00016802001154512218, |
| "loss": 1.208343744277954, |
| "step": 837 |
| }, |
| { |
| "epoch": 0.48327566320645904, |
| "grad_norm": 0.7690496444702148, |
| "learning_rate": 0.0001679815278045026, |
| "loss": 1.0452954769134521, |
| "step": 838 |
| }, |
| { |
| "epoch": 0.48385236447520186, |
| "grad_norm": 0.7210266590118408, |
| "learning_rate": 0.000167943044063883, |
| "loss": 0.7897384166717529, |
| "step": 839 |
| }, |
| { |
| "epoch": 0.4844290657439446, |
| "grad_norm": 0.5705054402351379, |
| "learning_rate": 0.00016790456032326342, |
| "loss": 0.8288441896438599, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.48500576701268744, |
| "grad_norm": 0.6143510341644287, |
| "learning_rate": 0.00016786607658264384, |
| "loss": 0.8081311583518982, |
| "step": 841 |
| }, |
| { |
| "epoch": 0.4855824682814302, |
| "grad_norm": 0.7222305536270142, |
| "learning_rate": 0.00016782759284202426, |
| "loss": 1.1107532978057861, |
| "step": 842 |
| }, |
| { |
| "epoch": 0.486159169550173, |
| "grad_norm": 0.6712546944618225, |
| "learning_rate": 0.00016778910910140467, |
| "loss": 0.8375999927520752, |
| "step": 843 |
| }, |
| { |
| "epoch": 0.4867358708189158, |
| "grad_norm": 0.9085020422935486, |
| "learning_rate": 0.00016775062536078509, |
| "loss": 0.9624453186988831, |
| "step": 844 |
| }, |
| { |
| "epoch": 0.4873125720876586, |
| "grad_norm": 0.773102879524231, |
| "learning_rate": 0.00016771214162016547, |
| "loss": 1.0454928874969482, |
| "step": 845 |
| }, |
| { |
| "epoch": 0.48788927335640137, |
| "grad_norm": 0.5635338425636292, |
| "learning_rate": 0.0001676736578795459, |
| "loss": 0.7329631447792053, |
| "step": 846 |
| }, |
| { |
| "epoch": 0.4884659746251442, |
| "grad_norm": 0.8183399438858032, |
| "learning_rate": 0.0001676351741389263, |
| "loss": 0.859244704246521, |
| "step": 847 |
| }, |
| { |
| "epoch": 0.48904267589388695, |
| "grad_norm": 0.7920128107070923, |
| "learning_rate": 0.00016759669039830672, |
| "loss": 0.9889219403266907, |
| "step": 848 |
| }, |
| { |
| "epoch": 0.48961937716262977, |
| "grad_norm": 1.1391570568084717, |
| "learning_rate": 0.00016755820665768714, |
| "loss": 1.146942138671875, |
| "step": 849 |
| }, |
| { |
| "epoch": 0.49019607843137253, |
| "grad_norm": 0.6648845076560974, |
| "learning_rate": 0.00016751972291706755, |
| "loss": 0.7090552449226379, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.49077277970011535, |
| "grad_norm": 0.7156478762626648, |
| "learning_rate": 0.00016748123917644797, |
| "loss": 0.7772218585014343, |
| "step": 851 |
| }, |
| { |
| "epoch": 0.4913494809688581, |
| "grad_norm": 0.7279021739959717, |
| "learning_rate": 0.00016744275543582838, |
| "loss": 1.0468722581863403, |
| "step": 852 |
| }, |
| { |
| "epoch": 0.49192618223760093, |
| "grad_norm": 1.0862352848052979, |
| "learning_rate": 0.00016740427169520877, |
| "loss": 1.3199949264526367, |
| "step": 853 |
| }, |
| { |
| "epoch": 0.4925028835063437, |
| "grad_norm": 0.5989871025085449, |
| "learning_rate": 0.00016736578795458918, |
| "loss": 0.7066143751144409, |
| "step": 854 |
| }, |
| { |
| "epoch": 0.4930795847750865, |
| "grad_norm": 0.88418048620224, |
| "learning_rate": 0.0001673273042139696, |
| "loss": 0.9679941534996033, |
| "step": 855 |
| }, |
| { |
| "epoch": 0.4936562860438293, |
| "grad_norm": 0.7538619637489319, |
| "learning_rate": 0.00016728882047335002, |
| "loss": 0.906350314617157, |
| "step": 856 |
| }, |
| { |
| "epoch": 0.4942329873125721, |
| "grad_norm": 1.0406384468078613, |
| "learning_rate": 0.00016725033673273043, |
| "loss": 1.0761326551437378, |
| "step": 857 |
| }, |
| { |
| "epoch": 0.49480968858131485, |
| "grad_norm": 0.9118819236755371, |
| "learning_rate": 0.00016721185299211085, |
| "loss": 1.449715495109558, |
| "step": 858 |
| }, |
| { |
| "epoch": 0.4953863898500577, |
| "grad_norm": 0.7859880328178406, |
| "learning_rate": 0.00016717336925149126, |
| "loss": 1.0066848993301392, |
| "step": 859 |
| }, |
| { |
| "epoch": 0.49596309111880044, |
| "grad_norm": 0.7971929907798767, |
| "learning_rate": 0.00016713488551087168, |
| "loss": 1.0836429595947266, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.49653979238754326, |
| "grad_norm": 0.7688129544258118, |
| "learning_rate": 0.00016709640177025206, |
| "loss": 0.8990678191184998, |
| "step": 861 |
| }, |
| { |
| "epoch": 0.497116493656286, |
| "grad_norm": 0.6911450028419495, |
| "learning_rate": 0.00016705791802963248, |
| "loss": 0.9118435382843018, |
| "step": 862 |
| }, |
| { |
| "epoch": 0.49769319492502884, |
| "grad_norm": 0.9296817183494568, |
| "learning_rate": 0.0001670194342890129, |
| "loss": 1.0580615997314453, |
| "step": 863 |
| }, |
| { |
| "epoch": 0.4982698961937716, |
| "grad_norm": 0.5820940732955933, |
| "learning_rate": 0.0001669809505483933, |
| "loss": 0.6944743394851685, |
| "step": 864 |
| }, |
| { |
| "epoch": 0.4988465974625144, |
| "grad_norm": 0.9766574501991272, |
| "learning_rate": 0.00016694246680777373, |
| "loss": 1.4097439050674438, |
| "step": 865 |
| }, |
| { |
| "epoch": 0.4994232987312572, |
| "grad_norm": 0.658211350440979, |
| "learning_rate": 0.00016690398306715414, |
| "loss": 0.7773644924163818, |
| "step": 866 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 0.7480500340461731, |
| "learning_rate": 0.00016686549932653456, |
| "loss": 1.1536113023757935, |
| "step": 867 |
| }, |
| { |
| "epoch": 0.5005767012687428, |
| "grad_norm": 0.5885343551635742, |
| "learning_rate": 0.00016682701558591497, |
| "loss": 0.5359970927238464, |
| "step": 868 |
| }, |
| { |
| "epoch": 0.5011534025374856, |
| "grad_norm": 0.7808444499969482, |
| "learning_rate": 0.00016678853184529536, |
| "loss": 0.6940274834632874, |
| "step": 869 |
| }, |
| { |
| "epoch": 0.5017301038062284, |
| "grad_norm": 0.8007370233535767, |
| "learning_rate": 0.00016675004810467577, |
| "loss": 1.3268241882324219, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.5023068050749712, |
| "grad_norm": 0.6729685068130493, |
| "learning_rate": 0.0001667115643640562, |
| "loss": 0.9482746124267578, |
| "step": 871 |
| }, |
| { |
| "epoch": 0.5028835063437139, |
| "grad_norm": 0.648239016532898, |
| "learning_rate": 0.0001666730806234366, |
| "loss": 0.9904931783676147, |
| "step": 872 |
| }, |
| { |
| "epoch": 0.5034602076124568, |
| "grad_norm": 0.7997180223464966, |
| "learning_rate": 0.00016663459688281702, |
| "loss": 1.0594019889831543, |
| "step": 873 |
| }, |
| { |
| "epoch": 0.5040369088811996, |
| "grad_norm": 0.8298223614692688, |
| "learning_rate": 0.00016659611314219744, |
| "loss": 0.9604882597923279, |
| "step": 874 |
| }, |
| { |
| "epoch": 0.5046136101499423, |
| "grad_norm": 0.8724483251571655, |
| "learning_rate": 0.00016655762940157785, |
| "loss": 1.0515791177749634, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.5051903114186851, |
| "grad_norm": 0.7477858662605286, |
| "learning_rate": 0.00016651914566095827, |
| "loss": 1.0346887111663818, |
| "step": 876 |
| }, |
| { |
| "epoch": 0.505767012687428, |
| "grad_norm": 0.6524494886398315, |
| "learning_rate": 0.00016648066192033865, |
| "loss": 0.8699806928634644, |
| "step": 877 |
| }, |
| { |
| "epoch": 0.5063437139561707, |
| "grad_norm": 0.7959410548210144, |
| "learning_rate": 0.00016644217817971907, |
| "loss": 1.0138338804244995, |
| "step": 878 |
| }, |
| { |
| "epoch": 0.5069204152249135, |
| "grad_norm": 0.7872818112373352, |
| "learning_rate": 0.00016640369443909949, |
| "loss": 1.0084038972854614, |
| "step": 879 |
| }, |
| { |
| "epoch": 0.5074971164936563, |
| "grad_norm": 0.9153385758399963, |
| "learning_rate": 0.0001663652106984799, |
| "loss": 0.9120053052902222, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.5080738177623991, |
| "grad_norm": 0.8691549301147461, |
| "learning_rate": 0.00016632672695786032, |
| "loss": 0.9792031645774841, |
| "step": 881 |
| }, |
| { |
| "epoch": 0.5086505190311419, |
| "grad_norm": 0.7193480730056763, |
| "learning_rate": 0.00016628824321724073, |
| "loss": 0.9441159963607788, |
| "step": 882 |
| }, |
| { |
| "epoch": 0.5092272202998847, |
| "grad_norm": 0.5675065517425537, |
| "learning_rate": 0.00016624975947662115, |
| "loss": 0.7550349235534668, |
| "step": 883 |
| }, |
| { |
| "epoch": 0.5098039215686274, |
| "grad_norm": 0.45122864842414856, |
| "learning_rate": 0.00016621127573600156, |
| "loss": 0.494687020778656, |
| "step": 884 |
| }, |
| { |
| "epoch": 0.5103806228373703, |
| "grad_norm": 0.5535047650337219, |
| "learning_rate": 0.00016617279199538195, |
| "loss": 1.0048768520355225, |
| "step": 885 |
| }, |
| { |
| "epoch": 0.510957324106113, |
| "grad_norm": 1.1627446413040161, |
| "learning_rate": 0.00016613430825476237, |
| "loss": 1.3231415748596191, |
| "step": 886 |
| }, |
| { |
| "epoch": 0.5115340253748558, |
| "grad_norm": 0.5924594402313232, |
| "learning_rate": 0.00016609582451414278, |
| "loss": 0.8373284339904785, |
| "step": 887 |
| }, |
| { |
| "epoch": 0.5121107266435986, |
| "grad_norm": 1.071594476699829, |
| "learning_rate": 0.0001660573407735232, |
| "loss": 1.1695808172225952, |
| "step": 888 |
| }, |
| { |
| "epoch": 0.5126874279123415, |
| "grad_norm": 0.7243885397911072, |
| "learning_rate": 0.0001660188570329036, |
| "loss": 0.9688019156455994, |
| "step": 889 |
| }, |
| { |
| "epoch": 0.5132641291810842, |
| "grad_norm": 0.7857576012611389, |
| "learning_rate": 0.00016598037329228403, |
| "loss": 0.9062821269035339, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.513840830449827, |
| "grad_norm": 0.6501168012619019, |
| "learning_rate": 0.00016594188955166444, |
| "loss": 0.7230191230773926, |
| "step": 891 |
| }, |
| { |
| "epoch": 0.5144175317185697, |
| "grad_norm": 0.7679166197776794, |
| "learning_rate": 0.00016590340581104483, |
| "loss": 0.9849987030029297, |
| "step": 892 |
| }, |
| { |
| "epoch": 0.5149942329873126, |
| "grad_norm": 0.5687773823738098, |
| "learning_rate": 0.00016586492207042524, |
| "loss": 0.5315793752670288, |
| "step": 893 |
| }, |
| { |
| "epoch": 0.5155709342560554, |
| "grad_norm": 0.5201639533042908, |
| "learning_rate": 0.00016582643832980566, |
| "loss": 0.833229660987854, |
| "step": 894 |
| }, |
| { |
| "epoch": 0.5161476355247981, |
| "grad_norm": 0.9703792333602905, |
| "learning_rate": 0.00016578795458918608, |
| "loss": 1.2787346839904785, |
| "step": 895 |
| }, |
| { |
| "epoch": 0.5167243367935409, |
| "grad_norm": 0.5964572429656982, |
| "learning_rate": 0.0001657494708485665, |
| "loss": 0.8054360151290894, |
| "step": 896 |
| }, |
| { |
| "epoch": 0.5173010380622838, |
| "grad_norm": 0.8156993389129639, |
| "learning_rate": 0.0001657109871079469, |
| "loss": 1.1183547973632812, |
| "step": 897 |
| }, |
| { |
| "epoch": 0.5178777393310265, |
| "grad_norm": 0.9944779276847839, |
| "learning_rate": 0.00016567250336732732, |
| "loss": 1.4230319261550903, |
| "step": 898 |
| }, |
| { |
| "epoch": 0.5184544405997693, |
| "grad_norm": 0.6466273069381714, |
| "learning_rate": 0.00016563401962670774, |
| "loss": 0.9248323440551758, |
| "step": 899 |
| }, |
| { |
| "epoch": 0.5190311418685121, |
| "grad_norm": 0.6486216187477112, |
| "learning_rate": 0.00016559553588608812, |
| "loss": 0.8279266357421875, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.5196078431372549, |
| "grad_norm": 0.8492687940597534, |
| "learning_rate": 0.00016555705214546854, |
| "loss": 1.1167151927947998, |
| "step": 901 |
| }, |
| { |
| "epoch": 0.5201845444059977, |
| "grad_norm": 0.7403521537780762, |
| "learning_rate": 0.00016551856840484896, |
| "loss": 0.9129210710525513, |
| "step": 902 |
| }, |
| { |
| "epoch": 0.5207612456747405, |
| "grad_norm": 0.9525539875030518, |
| "learning_rate": 0.00016548008466422937, |
| "loss": 1.0805696249008179, |
| "step": 903 |
| }, |
| { |
| "epoch": 0.5213379469434832, |
| "grad_norm": 0.6410759091377258, |
| "learning_rate": 0.00016544160092360979, |
| "loss": 0.7183154821395874, |
| "step": 904 |
| }, |
| { |
| "epoch": 0.5219146482122261, |
| "grad_norm": 0.9240155816078186, |
| "learning_rate": 0.0001654031171829902, |
| "loss": 1.2977594137191772, |
| "step": 905 |
| }, |
| { |
| "epoch": 0.5224913494809689, |
| "grad_norm": 0.5909906625747681, |
| "learning_rate": 0.00016536463344237062, |
| "loss": 0.8771336078643799, |
| "step": 906 |
| }, |
| { |
| "epoch": 0.5230680507497116, |
| "grad_norm": 0.6739245653152466, |
| "learning_rate": 0.00016532614970175103, |
| "loss": 0.9435271620750427, |
| "step": 907 |
| }, |
| { |
| "epoch": 0.5236447520184544, |
| "grad_norm": 0.7840787172317505, |
| "learning_rate": 0.00016528766596113142, |
| "loss": 0.9116816520690918, |
| "step": 908 |
| }, |
| { |
| "epoch": 0.5242214532871973, |
| "grad_norm": 0.7001404762268066, |
| "learning_rate": 0.00016524918222051184, |
| "loss": 0.7686711549758911, |
| "step": 909 |
| }, |
| { |
| "epoch": 0.52479815455594, |
| "grad_norm": 0.7492363452911377, |
| "learning_rate": 0.00016521069847989225, |
| "loss": 0.894406795501709, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.5253748558246828, |
| "grad_norm": 0.6643780469894409, |
| "learning_rate": 0.00016517221473927267, |
| "loss": 0.9077553153038025, |
| "step": 911 |
| }, |
| { |
| "epoch": 0.5259515570934256, |
| "grad_norm": 0.6426498889923096, |
| "learning_rate": 0.00016513373099865308, |
| "loss": 0.7784804701805115, |
| "step": 912 |
| }, |
| { |
| "epoch": 0.5265282583621684, |
| "grad_norm": 0.6445097923278809, |
| "learning_rate": 0.0001650952472580335, |
| "loss": 0.8351481556892395, |
| "step": 913 |
| }, |
| { |
| "epoch": 0.5271049596309112, |
| "grad_norm": 0.9749622344970703, |
| "learning_rate": 0.0001650567635174139, |
| "loss": 1.3779326677322388, |
| "step": 914 |
| }, |
| { |
| "epoch": 0.527681660899654, |
| "grad_norm": 1.0297281742095947, |
| "learning_rate": 0.00016501827977679433, |
| "loss": 1.4258373975753784, |
| "step": 915 |
| }, |
| { |
| "epoch": 0.5282583621683967, |
| "grad_norm": 0.8116568326950073, |
| "learning_rate": 0.00016497979603617472, |
| "loss": 1.120481252670288, |
| "step": 916 |
| }, |
| { |
| "epoch": 0.5288350634371396, |
| "grad_norm": 0.8832195401191711, |
| "learning_rate": 0.00016494131229555513, |
| "loss": 1.0475956201553345, |
| "step": 917 |
| }, |
| { |
| "epoch": 0.5294117647058824, |
| "grad_norm": 0.7668746709823608, |
| "learning_rate": 0.00016490282855493555, |
| "loss": 0.9356057643890381, |
| "step": 918 |
| }, |
| { |
| "epoch": 0.5299884659746251, |
| "grad_norm": 0.7938312292098999, |
| "learning_rate": 0.00016486434481431596, |
| "loss": 1.0766160488128662, |
| "step": 919 |
| }, |
| { |
| "epoch": 0.5305651672433679, |
| "grad_norm": 0.6379091739654541, |
| "learning_rate": 0.00016482586107369638, |
| "loss": 0.8664296865463257, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.5311418685121108, |
| "grad_norm": 0.5966930389404297, |
| "learning_rate": 0.0001647873773330768, |
| "loss": 0.7848939299583435, |
| "step": 921 |
| }, |
| { |
| "epoch": 0.5317185697808535, |
| "grad_norm": 0.7270369529724121, |
| "learning_rate": 0.0001647488935924572, |
| "loss": 0.8690502643585205, |
| "step": 922 |
| }, |
| { |
| "epoch": 0.5322952710495963, |
| "grad_norm": 0.7373891472816467, |
| "learning_rate": 0.00016471040985183762, |
| "loss": 0.9187401533126831, |
| "step": 923 |
| }, |
| { |
| "epoch": 0.532871972318339, |
| "grad_norm": 0.6114344596862793, |
| "learning_rate": 0.000164671926111218, |
| "loss": 0.7336284518241882, |
| "step": 924 |
| }, |
| { |
| "epoch": 0.5334486735870819, |
| "grad_norm": 0.7629640102386475, |
| "learning_rate": 0.00016463344237059843, |
| "loss": 1.0568023920059204, |
| "step": 925 |
| }, |
| { |
| "epoch": 0.5340253748558247, |
| "grad_norm": 0.5172185897827148, |
| "learning_rate": 0.00016459495862997884, |
| "loss": 0.6043404936790466, |
| "step": 926 |
| }, |
| { |
| "epoch": 0.5346020761245674, |
| "grad_norm": 0.6732125282287598, |
| "learning_rate": 0.00016455647488935926, |
| "loss": 0.7869133353233337, |
| "step": 927 |
| }, |
| { |
| "epoch": 0.5351787773933102, |
| "grad_norm": 0.993881344795227, |
| "learning_rate": 0.00016451799114873967, |
| "loss": 1.3750996589660645, |
| "step": 928 |
| }, |
| { |
| "epoch": 0.5357554786620531, |
| "grad_norm": 0.6748846173286438, |
| "learning_rate": 0.0001644795074081201, |
| "loss": 0.7957302331924438, |
| "step": 929 |
| }, |
| { |
| "epoch": 0.5363321799307958, |
| "grad_norm": 0.5961597561836243, |
| "learning_rate": 0.0001644410236675005, |
| "loss": 0.817986786365509, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.5369088811995386, |
| "grad_norm": 0.8336942195892334, |
| "learning_rate": 0.00016440253992688092, |
| "loss": 1.071876883506775, |
| "step": 931 |
| }, |
| { |
| "epoch": 0.5374855824682814, |
| "grad_norm": 0.8322470784187317, |
| "learning_rate": 0.0001643640561862613, |
| "loss": 0.9675548672676086, |
| "step": 932 |
| }, |
| { |
| "epoch": 0.5380622837370242, |
| "grad_norm": 0.8054575324058533, |
| "learning_rate": 0.00016432557244564172, |
| "loss": 1.0018256902694702, |
| "step": 933 |
| }, |
| { |
| "epoch": 0.538638985005767, |
| "grad_norm": 0.7546166181564331, |
| "learning_rate": 0.00016428708870502214, |
| "loss": 0.9199832677841187, |
| "step": 934 |
| }, |
| { |
| "epoch": 0.5392156862745098, |
| "grad_norm": 0.6384134292602539, |
| "learning_rate": 0.00016424860496440255, |
| "loss": 0.5693946480751038, |
| "step": 935 |
| }, |
| { |
| "epoch": 0.5397923875432526, |
| "grad_norm": 0.8509575128555298, |
| "learning_rate": 0.00016421012122378297, |
| "loss": 1.3604402542114258, |
| "step": 936 |
| }, |
| { |
| "epoch": 0.5403690888119954, |
| "grad_norm": 1.0863171815872192, |
| "learning_rate": 0.00016417163748316338, |
| "loss": 1.441767692565918, |
| "step": 937 |
| }, |
| { |
| "epoch": 0.5409457900807382, |
| "grad_norm": 0.7680332064628601, |
| "learning_rate": 0.0001641331537425438, |
| "loss": 0.8990482091903687, |
| "step": 938 |
| }, |
| { |
| "epoch": 0.5415224913494809, |
| "grad_norm": 0.9804447889328003, |
| "learning_rate": 0.0001640946700019242, |
| "loss": 1.0421537160873413, |
| "step": 939 |
| }, |
| { |
| "epoch": 0.5420991926182238, |
| "grad_norm": 1.0693145990371704, |
| "learning_rate": 0.0001640561862613046, |
| "loss": 1.1600146293640137, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.5426758938869666, |
| "grad_norm": 0.8488958477973938, |
| "learning_rate": 0.00016401770252068502, |
| "loss": 1.2710307836532593, |
| "step": 941 |
| }, |
| { |
| "epoch": 0.5432525951557093, |
| "grad_norm": 1.048317313194275, |
| "learning_rate": 0.00016397921878006543, |
| "loss": 0.8453274369239807, |
| "step": 942 |
| }, |
| { |
| "epoch": 0.5438292964244521, |
| "grad_norm": 0.7326422929763794, |
| "learning_rate": 0.00016394073503944585, |
| "loss": 1.0167326927185059, |
| "step": 943 |
| }, |
| { |
| "epoch": 0.544405997693195, |
| "grad_norm": 0.877862274646759, |
| "learning_rate": 0.00016390225129882626, |
| "loss": 0.9589974880218506, |
| "step": 944 |
| }, |
| { |
| "epoch": 0.5449826989619377, |
| "grad_norm": 0.8096463680267334, |
| "learning_rate": 0.00016386376755820668, |
| "loss": 0.8364965915679932, |
| "step": 945 |
| }, |
| { |
| "epoch": 0.5455594002306805, |
| "grad_norm": 0.9232637882232666, |
| "learning_rate": 0.0001638252838175871, |
| "loss": 0.9332213997840881, |
| "step": 946 |
| }, |
| { |
| "epoch": 0.5461361014994233, |
| "grad_norm": 0.7885507941246033, |
| "learning_rate": 0.0001637868000769675, |
| "loss": 1.0532820224761963, |
| "step": 947 |
| }, |
| { |
| "epoch": 0.5467128027681661, |
| "grad_norm": 0.914097249507904, |
| "learning_rate": 0.0001637483163363479, |
| "loss": 0.8059665560722351, |
| "step": 948 |
| }, |
| { |
| "epoch": 0.5472895040369089, |
| "grad_norm": 0.8124399781227112, |
| "learning_rate": 0.0001637098325957283, |
| "loss": 0.7342300415039062, |
| "step": 949 |
| }, |
| { |
| "epoch": 0.5478662053056517, |
| "grad_norm": 0.8677952289581299, |
| "learning_rate": 0.00016367134885510873, |
| "loss": 1.2200864553451538, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.5484429065743944, |
| "grad_norm": 0.8235622048377991, |
| "learning_rate": 0.00016363286511448914, |
| "loss": 1.2276276350021362, |
| "step": 951 |
| }, |
| { |
| "epoch": 0.5490196078431373, |
| "grad_norm": 0.8734779953956604, |
| "learning_rate": 0.00016359438137386956, |
| "loss": 1.481785535812378, |
| "step": 952 |
| }, |
| { |
| "epoch": 0.5495963091118801, |
| "grad_norm": 0.7058696746826172, |
| "learning_rate": 0.00016355589763324997, |
| "loss": 0.8971320390701294, |
| "step": 953 |
| }, |
| { |
| "epoch": 0.5501730103806228, |
| "grad_norm": 0.7818495035171509, |
| "learning_rate": 0.0001635174138926304, |
| "loss": 0.9900298118591309, |
| "step": 954 |
| }, |
| { |
| "epoch": 0.5507497116493656, |
| "grad_norm": 0.9933992028236389, |
| "learning_rate": 0.0001634789301520108, |
| "loss": 1.377812147140503, |
| "step": 955 |
| }, |
| { |
| "epoch": 0.5513264129181085, |
| "grad_norm": 0.6487358808517456, |
| "learning_rate": 0.0001634404464113912, |
| "loss": 0.8082116842269897, |
| "step": 956 |
| }, |
| { |
| "epoch": 0.5519031141868512, |
| "grad_norm": 0.7896233201026917, |
| "learning_rate": 0.0001634019626707716, |
| "loss": 0.8894538879394531, |
| "step": 957 |
| }, |
| { |
| "epoch": 0.552479815455594, |
| "grad_norm": 0.5499460697174072, |
| "learning_rate": 0.00016336347893015202, |
| "loss": 0.7779909372329712, |
| "step": 958 |
| }, |
| { |
| "epoch": 0.5530565167243368, |
| "grad_norm": 0.7304683327674866, |
| "learning_rate": 0.00016332499518953244, |
| "loss": 0.9466789960861206, |
| "step": 959 |
| }, |
| { |
| "epoch": 0.5536332179930796, |
| "grad_norm": 0.8766285181045532, |
| "learning_rate": 0.00016328651144891285, |
| "loss": 0.654015064239502, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.5542099192618224, |
| "grad_norm": 0.5168980956077576, |
| "learning_rate": 0.00016324802770829327, |
| "loss": 0.7942756414413452, |
| "step": 961 |
| }, |
| { |
| "epoch": 0.5547866205305652, |
| "grad_norm": 0.8975361585617065, |
| "learning_rate": 0.00016320954396767368, |
| "loss": 1.1166660785675049, |
| "step": 962 |
| }, |
| { |
| "epoch": 0.5553633217993079, |
| "grad_norm": 0.559033215045929, |
| "learning_rate": 0.0001631710602270541, |
| "loss": 0.7238450050354004, |
| "step": 963 |
| }, |
| { |
| "epoch": 0.5559400230680508, |
| "grad_norm": 0.5114202499389648, |
| "learning_rate": 0.00016313257648643449, |
| "loss": 0.8229402303695679, |
| "step": 964 |
| }, |
| { |
| "epoch": 0.5565167243367936, |
| "grad_norm": 0.8146692514419556, |
| "learning_rate": 0.0001630940927458149, |
| "loss": 0.9510258436203003, |
| "step": 965 |
| }, |
| { |
| "epoch": 0.5570934256055363, |
| "grad_norm": 0.7686490416526794, |
| "learning_rate": 0.00016305560900519532, |
| "loss": 1.3754280805587769, |
| "step": 966 |
| }, |
| { |
| "epoch": 0.5576701268742791, |
| "grad_norm": 0.6895797252655029, |
| "learning_rate": 0.00016301712526457573, |
| "loss": 0.9850455522537231, |
| "step": 967 |
| }, |
| { |
| "epoch": 0.558246828143022, |
| "grad_norm": 0.6049807667732239, |
| "learning_rate": 0.00016297864152395615, |
| "loss": 0.6829259395599365, |
| "step": 968 |
| }, |
| { |
| "epoch": 0.5588235294117647, |
| "grad_norm": 0.7376249432563782, |
| "learning_rate": 0.00016294015778333656, |
| "loss": 0.7787905931472778, |
| "step": 969 |
| }, |
| { |
| "epoch": 0.5594002306805075, |
| "grad_norm": 0.5940505862236023, |
| "learning_rate": 0.00016290167404271698, |
| "loss": 0.7658302783966064, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.5599769319492502, |
| "grad_norm": 0.8353221416473389, |
| "learning_rate": 0.0001628631903020974, |
| "loss": 1.0191570520401, |
| "step": 971 |
| }, |
| { |
| "epoch": 0.5605536332179931, |
| "grad_norm": 0.6136527061462402, |
| "learning_rate": 0.00016282470656147778, |
| "loss": 0.9413414001464844, |
| "step": 972 |
| }, |
| { |
| "epoch": 0.5611303344867359, |
| "grad_norm": 0.64887535572052, |
| "learning_rate": 0.0001627862228208582, |
| "loss": 0.763261616230011, |
| "step": 973 |
| }, |
| { |
| "epoch": 0.5617070357554786, |
| "grad_norm": 0.8027318716049194, |
| "learning_rate": 0.0001627477390802386, |
| "loss": 1.1142311096191406, |
| "step": 974 |
| }, |
| { |
| "epoch": 0.5622837370242214, |
| "grad_norm": 0.6630944609642029, |
| "learning_rate": 0.00016270925533961903, |
| "loss": 0.8240130543708801, |
| "step": 975 |
| }, |
| { |
| "epoch": 0.5628604382929643, |
| "grad_norm": 0.7404500246047974, |
| "learning_rate": 0.00016267077159899944, |
| "loss": 0.9690840244293213, |
| "step": 976 |
| }, |
| { |
| "epoch": 0.563437139561707, |
| "grad_norm": 1.0134172439575195, |
| "learning_rate": 0.00016263228785837986, |
| "loss": 1.4774882793426514, |
| "step": 977 |
| }, |
| { |
| "epoch": 0.5640138408304498, |
| "grad_norm": 0.8651242256164551, |
| "learning_rate": 0.00016259380411776027, |
| "loss": 0.898904025554657, |
| "step": 978 |
| }, |
| { |
| "epoch": 0.5645905420991926, |
| "grad_norm": 0.6225872039794922, |
| "learning_rate": 0.00016255532037714066, |
| "loss": 1.149839162826538, |
| "step": 979 |
| }, |
| { |
| "epoch": 0.5651672433679354, |
| "grad_norm": 0.5773558020591736, |
| "learning_rate": 0.00016251683663652108, |
| "loss": 0.516633152961731, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.5657439446366782, |
| "grad_norm": 0.6350861191749573, |
| "learning_rate": 0.0001624783528959015, |
| "loss": 1.0271410942077637, |
| "step": 981 |
| }, |
| { |
| "epoch": 0.566320645905421, |
| "grad_norm": 0.8134899139404297, |
| "learning_rate": 0.0001624398691552819, |
| "loss": 0.8847084045410156, |
| "step": 982 |
| }, |
| { |
| "epoch": 0.5668973471741637, |
| "grad_norm": 0.793136477470398, |
| "learning_rate": 0.00016240138541466232, |
| "loss": 1.0517855882644653, |
| "step": 983 |
| }, |
| { |
| "epoch": 0.5674740484429066, |
| "grad_norm": 0.6838855743408203, |
| "learning_rate": 0.00016236290167404274, |
| "loss": 0.9592060446739197, |
| "step": 984 |
| }, |
| { |
| "epoch": 0.5680507497116494, |
| "grad_norm": 0.77060467004776, |
| "learning_rate": 0.00016232441793342315, |
| "loss": 1.1476876735687256, |
| "step": 985 |
| }, |
| { |
| "epoch": 0.5686274509803921, |
| "grad_norm": 0.6759986281394958, |
| "learning_rate": 0.00016228593419280357, |
| "loss": 0.9518548846244812, |
| "step": 986 |
| }, |
| { |
| "epoch": 0.5692041522491349, |
| "grad_norm": 0.6088658571243286, |
| "learning_rate": 0.00016224745045218396, |
| "loss": 0.6659010648727417, |
| "step": 987 |
| }, |
| { |
| "epoch": 0.5697808535178778, |
| "grad_norm": 0.9436719417572021, |
| "learning_rate": 0.00016220896671156437, |
| "loss": 1.1346865892410278, |
| "step": 988 |
| }, |
| { |
| "epoch": 0.5703575547866205, |
| "grad_norm": 1.0091006755828857, |
| "learning_rate": 0.0001621704829709448, |
| "loss": 1.1687716245651245, |
| "step": 989 |
| }, |
| { |
| "epoch": 0.5709342560553633, |
| "grad_norm": 0.9080367684364319, |
| "learning_rate": 0.0001621319992303252, |
| "loss": 1.0989638566970825, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.5715109573241061, |
| "grad_norm": 0.7519204020500183, |
| "learning_rate": 0.00016209351548970562, |
| "loss": 1.3017445802688599, |
| "step": 991 |
| }, |
| { |
| "epoch": 0.5720876585928489, |
| "grad_norm": 0.545911431312561, |
| "learning_rate": 0.00016205503174908603, |
| "loss": 0.7622886300086975, |
| "step": 992 |
| }, |
| { |
| "epoch": 0.5726643598615917, |
| "grad_norm": 0.9163870215415955, |
| "learning_rate": 0.00016201654800846645, |
| "loss": 1.2744814157485962, |
| "step": 993 |
| }, |
| { |
| "epoch": 0.5732410611303345, |
| "grad_norm": 0.7644914388656616, |
| "learning_rate": 0.00016197806426784686, |
| "loss": 0.9071030616760254, |
| "step": 994 |
| }, |
| { |
| "epoch": 0.5738177623990772, |
| "grad_norm": 0.761933445930481, |
| "learning_rate": 0.00016193958052722725, |
| "loss": 1.0261884927749634, |
| "step": 995 |
| }, |
| { |
| "epoch": 0.5743944636678201, |
| "grad_norm": 0.5850253701210022, |
| "learning_rate": 0.00016190109678660767, |
| "loss": 0.8700547814369202, |
| "step": 996 |
| }, |
| { |
| "epoch": 0.5749711649365629, |
| "grad_norm": 0.8303119540214539, |
| "learning_rate": 0.00016186261304598808, |
| "loss": 0.7401360273361206, |
| "step": 997 |
| }, |
| { |
| "epoch": 0.5755478662053056, |
| "grad_norm": 0.8335464000701904, |
| "learning_rate": 0.0001618241293053685, |
| "loss": 1.058925986289978, |
| "step": 998 |
| }, |
| { |
| "epoch": 0.5761245674740484, |
| "grad_norm": 0.6967325806617737, |
| "learning_rate": 0.0001617856455647489, |
| "loss": 1.3550879955291748, |
| "step": 999 |
| }, |
| { |
| "epoch": 0.5767012687427913, |
| "grad_norm": 1.0509662628173828, |
| "learning_rate": 0.00016174716182412933, |
| "loss": 1.3809900283813477, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.577277970011534, |
| "grad_norm": 0.7688459157943726, |
| "learning_rate": 0.00016170867808350974, |
| "loss": 0.7888709306716919, |
| "step": 1001 |
| }, |
| { |
| "epoch": 0.5778546712802768, |
| "grad_norm": 1.4081027507781982, |
| "learning_rate": 0.00016167019434289016, |
| "loss": 0.8922286033630371, |
| "step": 1002 |
| }, |
| { |
| "epoch": 0.5784313725490197, |
| "grad_norm": 0.8513575196266174, |
| "learning_rate": 0.00016163171060227055, |
| "loss": 0.9064381718635559, |
| "step": 1003 |
| }, |
| { |
| "epoch": 0.5790080738177624, |
| "grad_norm": 0.8020631670951843, |
| "learning_rate": 0.00016159322686165096, |
| "loss": 1.0038318634033203, |
| "step": 1004 |
| }, |
| { |
| "epoch": 0.5795847750865052, |
| "grad_norm": 0.6308439373970032, |
| "learning_rate": 0.00016155474312103138, |
| "loss": 1.0535993576049805, |
| "step": 1005 |
| }, |
| { |
| "epoch": 0.580161476355248, |
| "grad_norm": 0.9487643837928772, |
| "learning_rate": 0.0001615162593804118, |
| "loss": 1.0733325481414795, |
| "step": 1006 |
| }, |
| { |
| "epoch": 0.5807381776239908, |
| "grad_norm": 0.5813226699829102, |
| "learning_rate": 0.0001614777756397922, |
| "loss": 0.6475256085395813, |
| "step": 1007 |
| }, |
| { |
| "epoch": 0.5813148788927336, |
| "grad_norm": 0.8787825703620911, |
| "learning_rate": 0.00016143929189917262, |
| "loss": 1.2669293880462646, |
| "step": 1008 |
| }, |
| { |
| "epoch": 0.5818915801614764, |
| "grad_norm": 0.5114219784736633, |
| "learning_rate": 0.00016140080815855304, |
| "loss": 0.5243850946426392, |
| "step": 1009 |
| }, |
| { |
| "epoch": 0.5824682814302191, |
| "grad_norm": 0.9315117597579956, |
| "learning_rate": 0.00016136232441793345, |
| "loss": 1.0958704948425293, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.583044982698962, |
| "grad_norm": 0.7866684794425964, |
| "learning_rate": 0.00016132384067731384, |
| "loss": 1.0202006101608276, |
| "step": 1011 |
| }, |
| { |
| "epoch": 0.5836216839677048, |
| "grad_norm": 0.9690834283828735, |
| "learning_rate": 0.00016128535693669426, |
| "loss": 0.7898403406143188, |
| "step": 1012 |
| }, |
| { |
| "epoch": 0.5841983852364475, |
| "grad_norm": 1.17559015750885, |
| "learning_rate": 0.00016124687319607467, |
| "loss": 1.0564637184143066, |
| "step": 1013 |
| }, |
| { |
| "epoch": 0.5847750865051903, |
| "grad_norm": 0.9403568506240845, |
| "learning_rate": 0.0001612083894554551, |
| "loss": 1.1451847553253174, |
| "step": 1014 |
| }, |
| { |
| "epoch": 0.5853517877739332, |
| "grad_norm": 0.7303722500801086, |
| "learning_rate": 0.0001611699057148355, |
| "loss": 1.143730878829956, |
| "step": 1015 |
| }, |
| { |
| "epoch": 0.5859284890426759, |
| "grad_norm": 0.9661723375320435, |
| "learning_rate": 0.00016113142197421592, |
| "loss": 1.1612937450408936, |
| "step": 1016 |
| }, |
| { |
| "epoch": 0.5865051903114187, |
| "grad_norm": 0.9506820440292358, |
| "learning_rate": 0.0001610929382335963, |
| "loss": 1.3300495147705078, |
| "step": 1017 |
| }, |
| { |
| "epoch": 0.5870818915801614, |
| "grad_norm": 0.9524713754653931, |
| "learning_rate": 0.00016105445449297672, |
| "loss": 1.4797887802124023, |
| "step": 1018 |
| }, |
| { |
| "epoch": 0.5876585928489043, |
| "grad_norm": 0.8756133317947388, |
| "learning_rate": 0.00016101597075235714, |
| "loss": 1.0017035007476807, |
| "step": 1019 |
| }, |
| { |
| "epoch": 0.5882352941176471, |
| "grad_norm": 0.8561094403266907, |
| "learning_rate": 0.00016097748701173752, |
| "loss": 1.4500423669815063, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.5888119953863898, |
| "grad_norm": 0.7503087520599365, |
| "learning_rate": 0.00016093900327111794, |
| "loss": 1.0606659650802612, |
| "step": 1021 |
| }, |
| { |
| "epoch": 0.5893886966551326, |
| "grad_norm": 0.5415161848068237, |
| "learning_rate": 0.00016090051953049836, |
| "loss": 0.6421483159065247, |
| "step": 1022 |
| }, |
| { |
| "epoch": 0.5899653979238755, |
| "grad_norm": 0.6148718595504761, |
| "learning_rate": 0.00016086203578987877, |
| "loss": 0.94537353515625, |
| "step": 1023 |
| }, |
| { |
| "epoch": 0.5905420991926182, |
| "grad_norm": 0.7274061441421509, |
| "learning_rate": 0.00016082355204925919, |
| "loss": 1.1045122146606445, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.591118800461361, |
| "grad_norm": 1.0995570421218872, |
| "learning_rate": 0.0001607850683086396, |
| "loss": 1.0006502866744995, |
| "step": 1025 |
| }, |
| { |
| "epoch": 0.5916955017301038, |
| "grad_norm": 0.6411669850349426, |
| "learning_rate": 0.00016074658456802002, |
| "loss": 0.8185054063796997, |
| "step": 1026 |
| }, |
| { |
| "epoch": 0.5922722029988466, |
| "grad_norm": 0.8972517848014832, |
| "learning_rate": 0.00016070810082740043, |
| "loss": 1.0834156274795532, |
| "step": 1027 |
| }, |
| { |
| "epoch": 0.5928489042675894, |
| "grad_norm": 1.3362998962402344, |
| "learning_rate": 0.00016066961708678082, |
| "loss": 1.3157958984375, |
| "step": 1028 |
| }, |
| { |
| "epoch": 0.5934256055363322, |
| "grad_norm": 0.9085165858268738, |
| "learning_rate": 0.00016063113334616124, |
| "loss": 1.0817850828170776, |
| "step": 1029 |
| }, |
| { |
| "epoch": 0.5940023068050749, |
| "grad_norm": 1.028162956237793, |
| "learning_rate": 0.00016059264960554165, |
| "loss": 1.324896216392517, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.5945790080738178, |
| "grad_norm": 0.6264161467552185, |
| "learning_rate": 0.00016055416586492207, |
| "loss": 0.7769796848297119, |
| "step": 1031 |
| }, |
| { |
| "epoch": 0.5951557093425606, |
| "grad_norm": 0.6027923822402954, |
| "learning_rate": 0.00016051568212430248, |
| "loss": 0.7691771388053894, |
| "step": 1032 |
| }, |
| { |
| "epoch": 0.5957324106113033, |
| "grad_norm": 1.1957632303237915, |
| "learning_rate": 0.0001604771983836829, |
| "loss": 1.5915735960006714, |
| "step": 1033 |
| }, |
| { |
| "epoch": 0.5963091118800461, |
| "grad_norm": 0.8243029713630676, |
| "learning_rate": 0.0001604387146430633, |
| "loss": 1.4467861652374268, |
| "step": 1034 |
| }, |
| { |
| "epoch": 0.596885813148789, |
| "grad_norm": 0.9241074919700623, |
| "learning_rate": 0.00016040023090244373, |
| "loss": 1.2037115097045898, |
| "step": 1035 |
| }, |
| { |
| "epoch": 0.5974625144175317, |
| "grad_norm": 0.7573208212852478, |
| "learning_rate": 0.00016036174716182411, |
| "loss": 1.111187219619751, |
| "step": 1036 |
| }, |
| { |
| "epoch": 0.5980392156862745, |
| "grad_norm": 0.9766779541969299, |
| "learning_rate": 0.00016032326342120453, |
| "loss": 1.3394712209701538, |
| "step": 1037 |
| }, |
| { |
| "epoch": 0.5986159169550173, |
| "grad_norm": 0.7223910093307495, |
| "learning_rate": 0.00016028477968058495, |
| "loss": 0.9714270830154419, |
| "step": 1038 |
| }, |
| { |
| "epoch": 0.5991926182237601, |
| "grad_norm": 0.8372020721435547, |
| "learning_rate": 0.00016024629593996536, |
| "loss": 0.9755414724349976, |
| "step": 1039 |
| }, |
| { |
| "epoch": 0.5997693194925029, |
| "grad_norm": 1.060224175453186, |
| "learning_rate": 0.00016020781219934578, |
| "loss": 1.0653870105743408, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.6003460207612457, |
| "grad_norm": 1.0068564414978027, |
| "learning_rate": 0.0001601693284587262, |
| "loss": 1.1695475578308105, |
| "step": 1041 |
| }, |
| { |
| "epoch": 0.6009227220299884, |
| "grad_norm": 0.8202903866767883, |
| "learning_rate": 0.0001601308447181066, |
| "loss": 1.430415391921997, |
| "step": 1042 |
| }, |
| { |
| "epoch": 0.6014994232987313, |
| "grad_norm": 0.6556461453437805, |
| "learning_rate": 0.00016009236097748702, |
| "loss": 0.6565566658973694, |
| "step": 1043 |
| }, |
| { |
| "epoch": 0.6020761245674741, |
| "grad_norm": 1.0711745023727417, |
| "learning_rate": 0.0001600538772368674, |
| "loss": 1.4629727602005005, |
| "step": 1044 |
| }, |
| { |
| "epoch": 0.6026528258362168, |
| "grad_norm": 0.857792317867279, |
| "learning_rate": 0.00016001539349624783, |
| "loss": 1.375361442565918, |
| "step": 1045 |
| }, |
| { |
| "epoch": 0.6032295271049596, |
| "grad_norm": 0.8610656261444092, |
| "learning_rate": 0.00015997690975562824, |
| "loss": 1.319663166999817, |
| "step": 1046 |
| }, |
| { |
| "epoch": 0.6038062283737025, |
| "grad_norm": 0.5466272830963135, |
| "learning_rate": 0.00015993842601500866, |
| "loss": 0.9326815009117126, |
| "step": 1047 |
| }, |
| { |
| "epoch": 0.6043829296424452, |
| "grad_norm": 0.5424578189849854, |
| "learning_rate": 0.00015989994227438907, |
| "loss": 0.8943756818771362, |
| "step": 1048 |
| }, |
| { |
| "epoch": 0.604959630911188, |
| "grad_norm": 1.0392166376113892, |
| "learning_rate": 0.00015986145853376949, |
| "loss": 1.1610779762268066, |
| "step": 1049 |
| }, |
| { |
| "epoch": 0.6055363321799307, |
| "grad_norm": 0.7397944331169128, |
| "learning_rate": 0.0001598229747931499, |
| "loss": 0.9297494888305664, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.6061130334486736, |
| "grad_norm": 0.7921435832977295, |
| "learning_rate": 0.00015978449105253032, |
| "loss": 0.9271104335784912, |
| "step": 1051 |
| }, |
| { |
| "epoch": 0.6066897347174164, |
| "grad_norm": 1.0713645219802856, |
| "learning_rate": 0.0001597460073119107, |
| "loss": 1.429350733757019, |
| "step": 1052 |
| }, |
| { |
| "epoch": 0.6072664359861591, |
| "grad_norm": 0.7312497496604919, |
| "learning_rate": 0.00015970752357129112, |
| "loss": 0.9167627096176147, |
| "step": 1053 |
| }, |
| { |
| "epoch": 0.6078431372549019, |
| "grad_norm": 0.7499086260795593, |
| "learning_rate": 0.00015966903983067154, |
| "loss": 0.7258137464523315, |
| "step": 1054 |
| }, |
| { |
| "epoch": 0.6084198385236448, |
| "grad_norm": 0.7300564646720886, |
| "learning_rate": 0.00015963055609005195, |
| "loss": 1.058071494102478, |
| "step": 1055 |
| }, |
| { |
| "epoch": 0.6089965397923875, |
| "grad_norm": 0.652527928352356, |
| "learning_rate": 0.00015959207234943237, |
| "loss": 0.6544615030288696, |
| "step": 1056 |
| }, |
| { |
| "epoch": 0.6095732410611303, |
| "grad_norm": 0.7193166613578796, |
| "learning_rate": 0.00015955358860881278, |
| "loss": 0.7395502328872681, |
| "step": 1057 |
| }, |
| { |
| "epoch": 0.6101499423298731, |
| "grad_norm": 0.7402684092521667, |
| "learning_rate": 0.0001595151048681932, |
| "loss": 0.8958665728569031, |
| "step": 1058 |
| }, |
| { |
| "epoch": 0.610726643598616, |
| "grad_norm": 1.0471738576889038, |
| "learning_rate": 0.0001594766211275736, |
| "loss": 1.383862018585205, |
| "step": 1059 |
| }, |
| { |
| "epoch": 0.6113033448673587, |
| "grad_norm": 0.926358699798584, |
| "learning_rate": 0.000159438137386954, |
| "loss": 1.3329360485076904, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.6118800461361015, |
| "grad_norm": 1.3576291799545288, |
| "learning_rate": 0.00015939965364633442, |
| "loss": 1.4153847694396973, |
| "step": 1061 |
| }, |
| { |
| "epoch": 0.6124567474048442, |
| "grad_norm": 1.043614387512207, |
| "learning_rate": 0.00015936116990571483, |
| "loss": 1.1355584859848022, |
| "step": 1062 |
| }, |
| { |
| "epoch": 0.6130334486735871, |
| "grad_norm": 0.6180047988891602, |
| "learning_rate": 0.00015932268616509525, |
| "loss": 0.7877006530761719, |
| "step": 1063 |
| }, |
| { |
| "epoch": 0.6136101499423299, |
| "grad_norm": 1.188005805015564, |
| "learning_rate": 0.00015928420242447566, |
| "loss": 1.185757040977478, |
| "step": 1064 |
| }, |
| { |
| "epoch": 0.6141868512110726, |
| "grad_norm": 0.6937184929847717, |
| "learning_rate": 0.00015924571868385608, |
| "loss": 0.8133529424667358, |
| "step": 1065 |
| }, |
| { |
| "epoch": 0.6147635524798154, |
| "grad_norm": 0.5152422785758972, |
| "learning_rate": 0.0001592072349432365, |
| "loss": 0.6955524682998657, |
| "step": 1066 |
| }, |
| { |
| "epoch": 0.6153402537485583, |
| "grad_norm": 0.8295215964317322, |
| "learning_rate": 0.0001591687512026169, |
| "loss": 0.9180642366409302, |
| "step": 1067 |
| }, |
| { |
| "epoch": 0.615916955017301, |
| "grad_norm": 1.131622314453125, |
| "learning_rate": 0.0001591302674619973, |
| "loss": 1.2194663286209106, |
| "step": 1068 |
| }, |
| { |
| "epoch": 0.6164936562860438, |
| "grad_norm": 0.744301438331604, |
| "learning_rate": 0.0001590917837213777, |
| "loss": 0.9852138161659241, |
| "step": 1069 |
| }, |
| { |
| "epoch": 0.6170703575547867, |
| "grad_norm": 0.7841970920562744, |
| "learning_rate": 0.00015905329998075813, |
| "loss": 1.302487850189209, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.6176470588235294, |
| "grad_norm": 0.6610711216926575, |
| "learning_rate": 0.00015901481624013854, |
| "loss": 0.8427870273590088, |
| "step": 1071 |
| }, |
| { |
| "epoch": 0.6182237600922722, |
| "grad_norm": 0.9735661745071411, |
| "learning_rate": 0.00015897633249951896, |
| "loss": 1.1720025539398193, |
| "step": 1072 |
| }, |
| { |
| "epoch": 0.618800461361015, |
| "grad_norm": 0.6673301458358765, |
| "learning_rate": 0.00015893784875889937, |
| "loss": 1.0172441005706787, |
| "step": 1073 |
| }, |
| { |
| "epoch": 0.6193771626297578, |
| "grad_norm": 1.0327497720718384, |
| "learning_rate": 0.0001588993650182798, |
| "loss": 1.168729305267334, |
| "step": 1074 |
| }, |
| { |
| "epoch": 0.6199538638985006, |
| "grad_norm": 0.6887943744659424, |
| "learning_rate": 0.0001588608812776602, |
| "loss": 0.9284838438034058, |
| "step": 1075 |
| }, |
| { |
| "epoch": 0.6205305651672434, |
| "grad_norm": 0.6660910844802856, |
| "learning_rate": 0.0001588223975370406, |
| "loss": 1.1769919395446777, |
| "step": 1076 |
| }, |
| { |
| "epoch": 0.6211072664359861, |
| "grad_norm": 0.7416674494743347, |
| "learning_rate": 0.000158783913796421, |
| "loss": 0.750725269317627, |
| "step": 1077 |
| }, |
| { |
| "epoch": 0.621683967704729, |
| "grad_norm": 0.6302111148834229, |
| "learning_rate": 0.00015874543005580142, |
| "loss": 0.8207563161849976, |
| "step": 1078 |
| }, |
| { |
| "epoch": 0.6222606689734718, |
| "grad_norm": 0.720021665096283, |
| "learning_rate": 0.00015870694631518184, |
| "loss": 1.133636474609375, |
| "step": 1079 |
| }, |
| { |
| "epoch": 0.6228373702422145, |
| "grad_norm": 0.9188029170036316, |
| "learning_rate": 0.00015866846257456225, |
| "loss": 1.5215458869934082, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.6234140715109573, |
| "grad_norm": 0.7337254881858826, |
| "learning_rate": 0.00015862997883394267, |
| "loss": 0.9544572830200195, |
| "step": 1081 |
| }, |
| { |
| "epoch": 0.6239907727797002, |
| "grad_norm": 1.0431314706802368, |
| "learning_rate": 0.00015859149509332308, |
| "loss": 1.0790281295776367, |
| "step": 1082 |
| }, |
| { |
| "epoch": 0.6245674740484429, |
| "grad_norm": 0.6344501376152039, |
| "learning_rate": 0.0001585530113527035, |
| "loss": 0.9151628017425537, |
| "step": 1083 |
| }, |
| { |
| "epoch": 0.6251441753171857, |
| "grad_norm": 1.332190752029419, |
| "learning_rate": 0.00015851452761208389, |
| "loss": 1.5466241836547852, |
| "step": 1084 |
| }, |
| { |
| "epoch": 0.6257208765859285, |
| "grad_norm": 0.7802074551582336, |
| "learning_rate": 0.0001584760438714643, |
| "loss": 1.1575053930282593, |
| "step": 1085 |
| }, |
| { |
| "epoch": 0.6262975778546713, |
| "grad_norm": 0.5755362510681152, |
| "learning_rate": 0.00015843756013084472, |
| "loss": 0.6923443078994751, |
| "step": 1086 |
| }, |
| { |
| "epoch": 0.6268742791234141, |
| "grad_norm": 0.8710469007492065, |
| "learning_rate": 0.00015839907639022513, |
| "loss": 1.0893003940582275, |
| "step": 1087 |
| }, |
| { |
| "epoch": 0.6274509803921569, |
| "grad_norm": 0.6689137816429138, |
| "learning_rate": 0.00015836059264960555, |
| "loss": 0.9777762293815613, |
| "step": 1088 |
| }, |
| { |
| "epoch": 0.6280276816608996, |
| "grad_norm": 0.9923802614212036, |
| "learning_rate": 0.00015832210890898596, |
| "loss": 1.2578145265579224, |
| "step": 1089 |
| }, |
| { |
| "epoch": 0.6286043829296425, |
| "grad_norm": 0.7596067190170288, |
| "learning_rate": 0.00015828362516836638, |
| "loss": 1.0804511308670044, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.6291810841983853, |
| "grad_norm": 0.9255754947662354, |
| "learning_rate": 0.0001582451414277468, |
| "loss": 1.2536742687225342, |
| "step": 1091 |
| }, |
| { |
| "epoch": 0.629757785467128, |
| "grad_norm": 0.6089752912521362, |
| "learning_rate": 0.00015820665768712718, |
| "loss": 0.8234043121337891, |
| "step": 1092 |
| }, |
| { |
| "epoch": 0.6303344867358708, |
| "grad_norm": 0.8412203192710876, |
| "learning_rate": 0.0001581681739465076, |
| "loss": 0.8689320683479309, |
| "step": 1093 |
| }, |
| { |
| "epoch": 0.6309111880046137, |
| "grad_norm": 0.6300414204597473, |
| "learning_rate": 0.000158129690205888, |
| "loss": 0.8836315274238586, |
| "step": 1094 |
| }, |
| { |
| "epoch": 0.6314878892733564, |
| "grad_norm": 0.8622999787330627, |
| "learning_rate": 0.00015809120646526843, |
| "loss": 0.8355990648269653, |
| "step": 1095 |
| }, |
| { |
| "epoch": 0.6320645905420992, |
| "grad_norm": 1.0277838706970215, |
| "learning_rate": 0.00015805272272464884, |
| "loss": 1.0228278636932373, |
| "step": 1096 |
| }, |
| { |
| "epoch": 0.6326412918108419, |
| "grad_norm": 0.7297544479370117, |
| "learning_rate": 0.00015801423898402926, |
| "loss": 0.9207032918930054, |
| "step": 1097 |
| }, |
| { |
| "epoch": 0.6332179930795848, |
| "grad_norm": 0.6923787593841553, |
| "learning_rate": 0.00015797575524340967, |
| "loss": 0.8914310932159424, |
| "step": 1098 |
| }, |
| { |
| "epoch": 0.6337946943483276, |
| "grad_norm": 0.984605073928833, |
| "learning_rate": 0.00015793727150279006, |
| "loss": 1.030419945716858, |
| "step": 1099 |
| }, |
| { |
| "epoch": 0.6343713956170703, |
| "grad_norm": 0.7933477759361267, |
| "learning_rate": 0.00015789878776217048, |
| "loss": 0.8263508081436157, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.6349480968858131, |
| "grad_norm": 0.6690862774848938, |
| "learning_rate": 0.0001578603040215509, |
| "loss": 0.8062323927879333, |
| "step": 1101 |
| }, |
| { |
| "epoch": 0.635524798154556, |
| "grad_norm": 1.1080838441848755, |
| "learning_rate": 0.0001578218202809313, |
| "loss": 1.0695234537124634, |
| "step": 1102 |
| }, |
| { |
| "epoch": 0.6361014994232987, |
| "grad_norm": 0.7373805046081543, |
| "learning_rate": 0.00015778333654031172, |
| "loss": 0.7782353162765503, |
| "step": 1103 |
| }, |
| { |
| "epoch": 0.6366782006920415, |
| "grad_norm": 0.9623069167137146, |
| "learning_rate": 0.00015774485279969214, |
| "loss": 1.299721121788025, |
| "step": 1104 |
| }, |
| { |
| "epoch": 0.6372549019607843, |
| "grad_norm": 0.8447510004043579, |
| "learning_rate": 0.00015770636905907255, |
| "loss": 0.751670241355896, |
| "step": 1105 |
| }, |
| { |
| "epoch": 0.6378316032295271, |
| "grad_norm": 0.7200034260749817, |
| "learning_rate": 0.00015766788531845297, |
| "loss": 0.8565016388893127, |
| "step": 1106 |
| }, |
| { |
| "epoch": 0.6384083044982699, |
| "grad_norm": 0.791018545627594, |
| "learning_rate": 0.00015762940157783336, |
| "loss": 1.014164924621582, |
| "step": 1107 |
| }, |
| { |
| "epoch": 0.6389850057670127, |
| "grad_norm": 0.7488639950752258, |
| "learning_rate": 0.00015759091783721377, |
| "loss": 0.7353352904319763, |
| "step": 1108 |
| }, |
| { |
| "epoch": 0.6395617070357554, |
| "grad_norm": 0.6376444697380066, |
| "learning_rate": 0.00015755243409659419, |
| "loss": 0.8452020287513733, |
| "step": 1109 |
| }, |
| { |
| "epoch": 0.6401384083044983, |
| "grad_norm": 0.7400408387184143, |
| "learning_rate": 0.0001575139503559746, |
| "loss": 0.8612061738967896, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.6407151095732411, |
| "grad_norm": 0.630378007888794, |
| "learning_rate": 0.00015747546661535502, |
| "loss": 0.8225241899490356, |
| "step": 1111 |
| }, |
| { |
| "epoch": 0.6412918108419838, |
| "grad_norm": 0.7687711715698242, |
| "learning_rate": 0.00015743698287473543, |
| "loss": 1.0129132270812988, |
| "step": 1112 |
| }, |
| { |
| "epoch": 0.6418685121107266, |
| "grad_norm": 0.8225964903831482, |
| "learning_rate": 0.00015739849913411585, |
| "loss": 1.0317823886871338, |
| "step": 1113 |
| }, |
| { |
| "epoch": 0.6424452133794695, |
| "grad_norm": 0.8062997460365295, |
| "learning_rate": 0.00015736001539349626, |
| "loss": 1.2668901681900024, |
| "step": 1114 |
| }, |
| { |
| "epoch": 0.6430219146482122, |
| "grad_norm": 0.7937533855438232, |
| "learning_rate": 0.00015732153165287665, |
| "loss": 0.5984291434288025, |
| "step": 1115 |
| }, |
| { |
| "epoch": 0.643598615916955, |
| "grad_norm": 0.6556064486503601, |
| "learning_rate": 0.00015728304791225707, |
| "loss": 0.6811074018478394, |
| "step": 1116 |
| }, |
| { |
| "epoch": 0.6441753171856978, |
| "grad_norm": 0.6815225481987, |
| "learning_rate": 0.00015724456417163748, |
| "loss": 0.8315191268920898, |
| "step": 1117 |
| }, |
| { |
| "epoch": 0.6447520184544406, |
| "grad_norm": 0.8624749779701233, |
| "learning_rate": 0.0001572060804310179, |
| "loss": 1.024225115776062, |
| "step": 1118 |
| }, |
| { |
| "epoch": 0.6453287197231834, |
| "grad_norm": 0.9867150187492371, |
| "learning_rate": 0.0001571675966903983, |
| "loss": 1.1838812828063965, |
| "step": 1119 |
| }, |
| { |
| "epoch": 0.6459054209919262, |
| "grad_norm": 0.9800993204116821, |
| "learning_rate": 0.00015712911294977873, |
| "loss": 1.0964932441711426, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.6464821222606689, |
| "grad_norm": 0.6755380034446716, |
| "learning_rate": 0.00015709062920915914, |
| "loss": 0.6732958555221558, |
| "step": 1121 |
| }, |
| { |
| "epoch": 0.6470588235294118, |
| "grad_norm": 0.6237842440605164, |
| "learning_rate": 0.00015705214546853956, |
| "loss": 0.769539475440979, |
| "step": 1122 |
| }, |
| { |
| "epoch": 0.6476355247981546, |
| "grad_norm": 0.9327729344367981, |
| "learning_rate": 0.00015701366172791995, |
| "loss": 1.2593892812728882, |
| "step": 1123 |
| }, |
| { |
| "epoch": 0.6482122260668973, |
| "grad_norm": 0.7165786623954773, |
| "learning_rate": 0.00015697517798730036, |
| "loss": 0.8721244931221008, |
| "step": 1124 |
| }, |
| { |
| "epoch": 0.6487889273356401, |
| "grad_norm": 0.7718213200569153, |
| "learning_rate": 0.00015693669424668078, |
| "loss": 0.9298558235168457, |
| "step": 1125 |
| }, |
| { |
| "epoch": 0.649365628604383, |
| "grad_norm": 0.7327983975410461, |
| "learning_rate": 0.0001568982105060612, |
| "loss": 0.9947003722190857, |
| "step": 1126 |
| }, |
| { |
| "epoch": 0.6499423298731257, |
| "grad_norm": 0.8242558240890503, |
| "learning_rate": 0.0001568597267654416, |
| "loss": 1.3076270818710327, |
| "step": 1127 |
| }, |
| { |
| "epoch": 0.6505190311418685, |
| "grad_norm": 0.5866062641143799, |
| "learning_rate": 0.00015682124302482202, |
| "loss": 0.7161552309989929, |
| "step": 1128 |
| }, |
| { |
| "epoch": 0.6510957324106112, |
| "grad_norm": 0.690351665019989, |
| "learning_rate": 0.00015678275928420244, |
| "loss": 0.7334930896759033, |
| "step": 1129 |
| }, |
| { |
| "epoch": 0.6516724336793541, |
| "grad_norm": 0.7475882172584534, |
| "learning_rate": 0.00015674427554358285, |
| "loss": 0.8960260152816772, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.6522491349480969, |
| "grad_norm": 0.7973214983940125, |
| "learning_rate": 0.00015670579180296324, |
| "loss": 0.9681750535964966, |
| "step": 1131 |
| }, |
| { |
| "epoch": 0.6528258362168397, |
| "grad_norm": 0.7747503519058228, |
| "learning_rate": 0.00015666730806234366, |
| "loss": 1.051071047782898, |
| "step": 1132 |
| }, |
| { |
| "epoch": 0.6534025374855824, |
| "grad_norm": 0.6149755120277405, |
| "learning_rate": 0.00015662882432172407, |
| "loss": 1.0745124816894531, |
| "step": 1133 |
| }, |
| { |
| "epoch": 0.6539792387543253, |
| "grad_norm": 0.8245506286621094, |
| "learning_rate": 0.0001565903405811045, |
| "loss": 1.3383489847183228, |
| "step": 1134 |
| }, |
| { |
| "epoch": 0.654555940023068, |
| "grad_norm": 0.754502534866333, |
| "learning_rate": 0.0001565518568404849, |
| "loss": 0.709721028804779, |
| "step": 1135 |
| }, |
| { |
| "epoch": 0.6551326412918108, |
| "grad_norm": 0.5991480946540833, |
| "learning_rate": 0.00015651337309986532, |
| "loss": 0.6601396203041077, |
| "step": 1136 |
| }, |
| { |
| "epoch": 0.6557093425605537, |
| "grad_norm": 0.7160611152648926, |
| "learning_rate": 0.00015647488935924573, |
| "loss": 1.244566559791565, |
| "step": 1137 |
| }, |
| { |
| "epoch": 0.6562860438292965, |
| "grad_norm": 0.6996898055076599, |
| "learning_rate": 0.00015643640561862615, |
| "loss": 0.7976762056350708, |
| "step": 1138 |
| }, |
| { |
| "epoch": 0.6568627450980392, |
| "grad_norm": 1.1391624212265015, |
| "learning_rate": 0.00015639792187800654, |
| "loss": 1.1150181293487549, |
| "step": 1139 |
| }, |
| { |
| "epoch": 0.657439446366782, |
| "grad_norm": 0.6305305361747742, |
| "learning_rate": 0.00015635943813738695, |
| "loss": 0.9086626768112183, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.6580161476355249, |
| "grad_norm": 1.1590427160263062, |
| "learning_rate": 0.00015632095439676737, |
| "loss": 1.2399204969406128, |
| "step": 1141 |
| }, |
| { |
| "epoch": 0.6585928489042676, |
| "grad_norm": 0.6845443844795227, |
| "learning_rate": 0.00015628247065614778, |
| "loss": 0.9434126019477844, |
| "step": 1142 |
| }, |
| { |
| "epoch": 0.6591695501730104, |
| "grad_norm": 0.8011909127235413, |
| "learning_rate": 0.0001562439869155282, |
| "loss": 0.9793667197227478, |
| "step": 1143 |
| }, |
| { |
| "epoch": 0.6597462514417531, |
| "grad_norm": 0.7350550293922424, |
| "learning_rate": 0.0001562055031749086, |
| "loss": 1.27531099319458, |
| "step": 1144 |
| }, |
| { |
| "epoch": 0.660322952710496, |
| "grad_norm": 0.9062415361404419, |
| "learning_rate": 0.00015616701943428903, |
| "loss": 0.9977236986160278, |
| "step": 1145 |
| }, |
| { |
| "epoch": 0.6608996539792388, |
| "grad_norm": 0.8427753448486328, |
| "learning_rate": 0.00015612853569366944, |
| "loss": 1.3097494840621948, |
| "step": 1146 |
| }, |
| { |
| "epoch": 0.6614763552479815, |
| "grad_norm": 0.7309291958808899, |
| "learning_rate": 0.00015609005195304983, |
| "loss": 1.1841623783111572, |
| "step": 1147 |
| }, |
| { |
| "epoch": 0.6620530565167243, |
| "grad_norm": 0.8518312573432922, |
| "learning_rate": 0.00015605156821243025, |
| "loss": 1.0959196090698242, |
| "step": 1148 |
| }, |
| { |
| "epoch": 0.6626297577854672, |
| "grad_norm": 0.7902095317840576, |
| "learning_rate": 0.00015601308447181066, |
| "loss": 1.186163067817688, |
| "step": 1149 |
| }, |
| { |
| "epoch": 0.6632064590542099, |
| "grad_norm": 0.8482567071914673, |
| "learning_rate": 0.00015597460073119108, |
| "loss": 0.9569811820983887, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.6637831603229527, |
| "grad_norm": 0.5328805446624756, |
| "learning_rate": 0.0001559361169905715, |
| "loss": 0.6388610005378723, |
| "step": 1151 |
| }, |
| { |
| "epoch": 0.6643598615916955, |
| "grad_norm": 0.6060228943824768, |
| "learning_rate": 0.0001558976332499519, |
| "loss": 0.7743721008300781, |
| "step": 1152 |
| }, |
| { |
| "epoch": 0.6649365628604383, |
| "grad_norm": 0.615100085735321, |
| "learning_rate": 0.00015585914950933232, |
| "loss": 0.8808379769325256, |
| "step": 1153 |
| }, |
| { |
| "epoch": 0.6655132641291811, |
| "grad_norm": 1.1238489151000977, |
| "learning_rate": 0.00015582066576871274, |
| "loss": 1.2252037525177002, |
| "step": 1154 |
| }, |
| { |
| "epoch": 0.6660899653979239, |
| "grad_norm": 0.8212980628013611, |
| "learning_rate": 0.00015578218202809313, |
| "loss": 1.0264016389846802, |
| "step": 1155 |
| }, |
| { |
| "epoch": 0.6666666666666666, |
| "grad_norm": 0.8575494885444641, |
| "learning_rate": 0.00015574369828747354, |
| "loss": 0.9453893899917603, |
| "step": 1156 |
| }, |
| { |
| "epoch": 0.6672433679354095, |
| "grad_norm": 0.8559103608131409, |
| "learning_rate": 0.00015570521454685396, |
| "loss": 1.01399564743042, |
| "step": 1157 |
| }, |
| { |
| "epoch": 0.6678200692041523, |
| "grad_norm": 0.8769490122795105, |
| "learning_rate": 0.00015566673080623437, |
| "loss": 1.1861730813980103, |
| "step": 1158 |
| }, |
| { |
| "epoch": 0.668396770472895, |
| "grad_norm": 0.5112201571464539, |
| "learning_rate": 0.0001556282470656148, |
| "loss": 0.6198689341545105, |
| "step": 1159 |
| }, |
| { |
| "epoch": 0.6689734717416378, |
| "grad_norm": 0.6346172094345093, |
| "learning_rate": 0.0001555897633249952, |
| "loss": 0.757227897644043, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.6695501730103807, |
| "grad_norm": 0.7918882966041565, |
| "learning_rate": 0.00015555127958437562, |
| "loss": 0.7224777936935425, |
| "step": 1161 |
| }, |
| { |
| "epoch": 0.6701268742791234, |
| "grad_norm": 0.5124825835227966, |
| "learning_rate": 0.00015551279584375603, |
| "loss": 0.7446980476379395, |
| "step": 1162 |
| }, |
| { |
| "epoch": 0.6707035755478662, |
| "grad_norm": 0.6950685977935791, |
| "learning_rate": 0.00015547431210313642, |
| "loss": 0.8628665804862976, |
| "step": 1163 |
| }, |
| { |
| "epoch": 0.671280276816609, |
| "grad_norm": 0.8380517363548279, |
| "learning_rate": 0.00015543582836251684, |
| "loss": 1.0211181640625, |
| "step": 1164 |
| }, |
| { |
| "epoch": 0.6718569780853518, |
| "grad_norm": 0.732266902923584, |
| "learning_rate": 0.00015539734462189725, |
| "loss": 0.7137742042541504, |
| "step": 1165 |
| }, |
| { |
| "epoch": 0.6724336793540946, |
| "grad_norm": 0.7325503826141357, |
| "learning_rate": 0.00015535886088127767, |
| "loss": 1.0089268684387207, |
| "step": 1166 |
| }, |
| { |
| "epoch": 0.6730103806228374, |
| "grad_norm": 0.8091567158699036, |
| "learning_rate": 0.00015532037714065808, |
| "loss": 1.0261311531066895, |
| "step": 1167 |
| }, |
| { |
| "epoch": 0.6735870818915801, |
| "grad_norm": 0.8078528642654419, |
| "learning_rate": 0.0001552818934000385, |
| "loss": 1.0196332931518555, |
| "step": 1168 |
| }, |
| { |
| "epoch": 0.674163783160323, |
| "grad_norm": 0.5558749437332153, |
| "learning_rate": 0.0001552434096594189, |
| "loss": 0.8882730007171631, |
| "step": 1169 |
| }, |
| { |
| "epoch": 0.6747404844290658, |
| "grad_norm": 0.7303665280342102, |
| "learning_rate": 0.00015520492591879933, |
| "loss": 0.9657995700836182, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.6753171856978085, |
| "grad_norm": 0.7512165904045105, |
| "learning_rate": 0.00015516644217817972, |
| "loss": 1.0741921663284302, |
| "step": 1171 |
| }, |
| { |
| "epoch": 0.6758938869665513, |
| "grad_norm": 0.7227686047554016, |
| "learning_rate": 0.0001549788542868128, |
| "loss": 1.0935313701629639, |
| "step": 1172 |
| }, |
| { |
| "epoch": 0.6764705882352942, |
| "grad_norm": 0.9613728523254395, |
| "learning_rate": 0.00015494040753556324, |
| "loss": 1.0458366870880127, |
| "step": 1173 |
| }, |
| { |
| "epoch": 0.6770472895040369, |
| "grad_norm": 0.7592456936836243, |
| "learning_rate": 0.00015490196078431375, |
| "loss": 1.048318862915039, |
| "step": 1174 |
| }, |
| { |
| "epoch": 0.6776239907727797, |
| "grad_norm": 0.6358122229576111, |
| "learning_rate": 0.0001548635140330642, |
| "loss": 0.9271713495254517, |
| "step": 1175 |
| }, |
| { |
| "epoch": 0.6782006920415224, |
| "grad_norm": 0.6779629588127136, |
| "learning_rate": 0.0001548250672818147, |
| "loss": 0.8732894062995911, |
| "step": 1176 |
| }, |
| { |
| "epoch": 0.6787773933102653, |
| "grad_norm": 0.7252342700958252, |
| "learning_rate": 0.00015478662053056518, |
| "loss": 1.016528606414795, |
| "step": 1177 |
| }, |
| { |
| "epoch": 0.6793540945790081, |
| "grad_norm": 0.5252419710159302, |
| "learning_rate": 0.00015474817377931566, |
| "loss": 0.6656200885772705, |
| "step": 1178 |
| }, |
| { |
| "epoch": 0.6799307958477508, |
| "grad_norm": 0.7480099201202393, |
| "learning_rate": 0.00015470972702806614, |
| "loss": 0.9825901389122009, |
| "step": 1179 |
| }, |
| { |
| "epoch": 0.6805074971164936, |
| "grad_norm": 0.5403528809547424, |
| "learning_rate": 0.00015467128027681662, |
| "loss": 0.8263649344444275, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.6810841983852365, |
| "grad_norm": 0.909685492515564, |
| "learning_rate": 0.0001546328335255671, |
| "loss": 1.1039624214172363, |
| "step": 1181 |
| }, |
| { |
| "epoch": 0.6816608996539792, |
| "grad_norm": 0.6782054305076599, |
| "learning_rate": 0.00015459438677431757, |
| "loss": 0.8667647242546082, |
| "step": 1182 |
| }, |
| { |
| "epoch": 0.682237600922722, |
| "grad_norm": 0.9437413811683655, |
| "learning_rate": 0.00015455594002306805, |
| "loss": 1.0089085102081299, |
| "step": 1183 |
| }, |
| { |
| "epoch": 0.6828143021914648, |
| "grad_norm": 0.631352424621582, |
| "learning_rate": 0.00015451749327181856, |
| "loss": 0.8900731801986694, |
| "step": 1184 |
| }, |
| { |
| "epoch": 0.6833910034602076, |
| "grad_norm": 0.9895037412643433, |
| "learning_rate": 0.000154479046520569, |
| "loss": 1.4409505128860474, |
| "step": 1185 |
| }, |
| { |
| "epoch": 0.6839677047289504, |
| "grad_norm": 0.655288815498352, |
| "learning_rate": 0.00015444059976931951, |
| "loss": 0.8149420022964478, |
| "step": 1186 |
| }, |
| { |
| "epoch": 0.6845444059976932, |
| "grad_norm": 0.906093418598175, |
| "learning_rate": 0.00015440215301806997, |
| "loss": 1.440996527671814, |
| "step": 1187 |
| }, |
| { |
| "epoch": 0.6851211072664359, |
| "grad_norm": 0.7067789435386658, |
| "learning_rate": 0.00015436370626682047, |
| "loss": 0.6415053009986877, |
| "step": 1188 |
| }, |
| { |
| "epoch": 0.6856978085351788, |
| "grad_norm": 0.7950546741485596, |
| "learning_rate": 0.00015432525951557095, |
| "loss": 1.0555880069732666, |
| "step": 1189 |
| }, |
| { |
| "epoch": 0.6862745098039216, |
| "grad_norm": 0.7521815299987793, |
| "learning_rate": 0.00015428681276432143, |
| "loss": 1.0289030075073242, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.6868512110726643, |
| "grad_norm": 0.8053890466690063, |
| "learning_rate": 0.0001542483660130719, |
| "loss": 1.0104256868362427, |
| "step": 1191 |
| }, |
| { |
| "epoch": 0.6874279123414071, |
| "grad_norm": 0.8960652351379395, |
| "learning_rate": 0.00015420991926182238, |
| "loss": 1.3124630451202393, |
| "step": 1192 |
| }, |
| { |
| "epoch": 0.68800461361015, |
| "grad_norm": 0.6445242762565613, |
| "learning_rate": 0.00015417147251057286, |
| "loss": 0.7147958278656006, |
| "step": 1193 |
| }, |
| { |
| "epoch": 0.6885813148788927, |
| "grad_norm": 0.8771377801895142, |
| "learning_rate": 0.00015413302575932334, |
| "loss": 1.1068731546401978, |
| "step": 1194 |
| }, |
| { |
| "epoch": 0.6891580161476355, |
| "grad_norm": 0.746562659740448, |
| "learning_rate": 0.00015409457900807382, |
| "loss": 0.8577734231948853, |
| "step": 1195 |
| }, |
| { |
| "epoch": 0.6897347174163783, |
| "grad_norm": 0.8225957155227661, |
| "learning_rate": 0.00015405613225682432, |
| "loss": 1.137495994567871, |
| "step": 1196 |
| }, |
| { |
| "epoch": 0.6903114186851211, |
| "grad_norm": 1.2180874347686768, |
| "learning_rate": 0.00015401768550557478, |
| "loss": 1.3055964708328247, |
| "step": 1197 |
| }, |
| { |
| "epoch": 0.6908881199538639, |
| "grad_norm": 0.8417837619781494, |
| "learning_rate": 0.00015397923875432528, |
| "loss": 0.719217836856842, |
| "step": 1198 |
| }, |
| { |
| "epoch": 0.6914648212226067, |
| "grad_norm": 0.5893595218658447, |
| "learning_rate": 0.00015394079200307573, |
| "loss": 0.7719886302947998, |
| "step": 1199 |
| }, |
| { |
| "epoch": 0.6920415224913494, |
| "grad_norm": 0.6734403371810913, |
| "learning_rate": 0.00015390234525182624, |
| "loss": 0.960877537727356, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.6926182237600923, |
| "grad_norm": 0.7350678443908691, |
| "learning_rate": 0.00015386389850057672, |
| "loss": 1.039952278137207, |
| "step": 1201 |
| }, |
| { |
| "epoch": 0.6931949250288351, |
| "grad_norm": 0.8072929978370667, |
| "learning_rate": 0.0001538254517493272, |
| "loss": 0.9792311787605286, |
| "step": 1202 |
| }, |
| { |
| "epoch": 0.6937716262975778, |
| "grad_norm": 0.6742820739746094, |
| "learning_rate": 0.00015378700499807767, |
| "loss": 0.8704882860183716, |
| "step": 1203 |
| }, |
| { |
| "epoch": 0.6943483275663207, |
| "grad_norm": 0.6590847969055176, |
| "learning_rate": 0.00015374855824682815, |
| "loss": 0.7836930155754089, |
| "step": 1204 |
| }, |
| { |
| "epoch": 0.6949250288350635, |
| "grad_norm": 0.6364882588386536, |
| "learning_rate": 0.00015371011149557863, |
| "loss": 0.6790116429328918, |
| "step": 1205 |
| }, |
| { |
| "epoch": 0.6955017301038062, |
| "grad_norm": 0.8620322346687317, |
| "learning_rate": 0.0001536716647443291, |
| "loss": 1.1667858362197876, |
| "step": 1206 |
| }, |
| { |
| "epoch": 0.696078431372549, |
| "grad_norm": 0.9262224435806274, |
| "learning_rate": 0.00015363321799307959, |
| "loss": 1.2684681415557861, |
| "step": 1207 |
| }, |
| { |
| "epoch": 0.6966551326412919, |
| "grad_norm": 0.7098090052604675, |
| "learning_rate": 0.0001535947712418301, |
| "loss": 1.108170986175537, |
| "step": 1208 |
| }, |
| { |
| "epoch": 0.6972318339100346, |
| "grad_norm": 0.8219681978225708, |
| "learning_rate": 0.00015355632449058054, |
| "loss": 1.1987258195877075, |
| "step": 1209 |
| }, |
| { |
| "epoch": 0.6978085351787774, |
| "grad_norm": 0.7267138957977295, |
| "learning_rate": 0.00015351787773933105, |
| "loss": 0.8790909051895142, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.6983852364475202, |
| "grad_norm": 0.9880861043930054, |
| "learning_rate": 0.0001534794309880815, |
| "loss": 0.7550561428070068, |
| "step": 1211 |
| }, |
| { |
| "epoch": 0.698961937716263, |
| "grad_norm": 1.0179109573364258, |
| "learning_rate": 0.000153440984236832, |
| "loss": 1.2887327671051025, |
| "step": 1212 |
| }, |
| { |
| "epoch": 0.6995386389850058, |
| "grad_norm": 1.0065605640411377, |
| "learning_rate": 0.00015340253748558246, |
| "loss": 1.3018262386322021, |
| "step": 1213 |
| }, |
| { |
| "epoch": 0.7001153402537486, |
| "grad_norm": 0.7868698835372925, |
| "learning_rate": 0.00015336409073433296, |
| "loss": 1.0050418376922607, |
| "step": 1214 |
| }, |
| { |
| "epoch": 0.7006920415224913, |
| "grad_norm": 1.2052333354949951, |
| "learning_rate": 0.00015332564398308344, |
| "loss": 1.4229861497879028, |
| "step": 1215 |
| }, |
| { |
| "epoch": 0.7012687427912342, |
| "grad_norm": 0.7077322006225586, |
| "learning_rate": 0.00015328719723183392, |
| "loss": 0.6043359041213989, |
| "step": 1216 |
| }, |
| { |
| "epoch": 0.701845444059977, |
| "grad_norm": 0.587632417678833, |
| "learning_rate": 0.0001532487504805844, |
| "loss": 0.6483091115951538, |
| "step": 1217 |
| }, |
| { |
| "epoch": 0.7024221453287197, |
| "grad_norm": 0.5759986042976379, |
| "learning_rate": 0.00015321030372933487, |
| "loss": 0.8392894864082336, |
| "step": 1218 |
| }, |
| { |
| "epoch": 0.7029988465974625, |
| "grad_norm": 0.6800678372383118, |
| "learning_rate": 0.00015317185697808535, |
| "loss": 0.8921798467636108, |
| "step": 1219 |
| }, |
| { |
| "epoch": 0.7035755478662054, |
| "grad_norm": 0.7683438658714294, |
| "learning_rate": 0.00015313341022683586, |
| "loss": 0.9112846851348877, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.7041522491349481, |
| "grad_norm": 1.0117342472076416, |
| "learning_rate": 0.0001530949634755863, |
| "loss": 1.4151829481124878, |
| "step": 1221 |
| }, |
| { |
| "epoch": 0.7047289504036909, |
| "grad_norm": 0.889950156211853, |
| "learning_rate": 0.00015305651672433681, |
| "loss": 1.190742015838623, |
| "step": 1222 |
| }, |
| { |
| "epoch": 0.7053056516724336, |
| "grad_norm": 0.7858697772026062, |
| "learning_rate": 0.00015301806997308727, |
| "loss": 1.0679411888122559, |
| "step": 1223 |
| }, |
| { |
| "epoch": 0.7058823529411765, |
| "grad_norm": 0.894363522529602, |
| "learning_rate": 0.00015297962322183777, |
| "loss": 1.1472891569137573, |
| "step": 1224 |
| }, |
| { |
| "epoch": 0.7064590542099193, |
| "grad_norm": 0.7669128775596619, |
| "learning_rate": 0.00015294117647058822, |
| "loss": 1.1536177396774292, |
| "step": 1225 |
| }, |
| { |
| "epoch": 0.707035755478662, |
| "grad_norm": 0.6551662683486938, |
| "learning_rate": 0.00015290272971933873, |
| "loss": 1.1004867553710938, |
| "step": 1226 |
| }, |
| { |
| "epoch": 0.7076124567474048, |
| "grad_norm": 1.0020555257797241, |
| "learning_rate": 0.0001528642829680892, |
| "loss": 1.2485133409500122, |
| "step": 1227 |
| }, |
| { |
| "epoch": 0.7081891580161477, |
| "grad_norm": 0.725662887096405, |
| "learning_rate": 0.00015282583621683968, |
| "loss": 0.8090496063232422, |
| "step": 1228 |
| }, |
| { |
| "epoch": 0.7087658592848904, |
| "grad_norm": 0.8500173091888428, |
| "learning_rate": 0.00015278738946559016, |
| "loss": 1.1222527027130127, |
| "step": 1229 |
| }, |
| { |
| "epoch": 0.7093425605536332, |
| "grad_norm": 0.7580368518829346, |
| "learning_rate": 0.00015274894271434064, |
| "loss": 0.8194168210029602, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.709919261822376, |
| "grad_norm": 0.936622679233551, |
| "learning_rate": 0.00015271049596309112, |
| "loss": 0.9981272220611572, |
| "step": 1231 |
| }, |
| { |
| "epoch": 0.7104959630911188, |
| "grad_norm": 0.8283603191375732, |
| "learning_rate": 0.00015267204921184162, |
| "loss": 0.9328891634941101, |
| "step": 1232 |
| }, |
| { |
| "epoch": 0.7110726643598616, |
| "grad_norm": 1.0028311014175415, |
| "learning_rate": 0.00015263360246059208, |
| "loss": 0.9482144117355347, |
| "step": 1233 |
| }, |
| { |
| "epoch": 0.7116493656286044, |
| "grad_norm": 1.1841291189193726, |
| "learning_rate": 0.00015259515570934258, |
| "loss": 1.4021642208099365, |
| "step": 1234 |
| }, |
| { |
| "epoch": 0.7122260668973471, |
| "grad_norm": 1.0274176597595215, |
| "learning_rate": 0.00015255670895809303, |
| "loss": 1.1408722400665283, |
| "step": 1235 |
| }, |
| { |
| "epoch": 0.71280276816609, |
| "grad_norm": 0.8339233994483948, |
| "learning_rate": 0.00015251826220684354, |
| "loss": 1.2026294469833374, |
| "step": 1236 |
| }, |
| { |
| "epoch": 0.7133794694348328, |
| "grad_norm": 0.8232172727584839, |
| "learning_rate": 0.000152479815455594, |
| "loss": 1.0658057928085327, |
| "step": 1237 |
| }, |
| { |
| "epoch": 0.7139561707035755, |
| "grad_norm": 0.6768394708633423, |
| "learning_rate": 0.0001524413687043445, |
| "loss": 0.7539021968841553, |
| "step": 1238 |
| }, |
| { |
| "epoch": 0.7145328719723183, |
| "grad_norm": 1.0153294801712036, |
| "learning_rate": 0.00015240292195309497, |
| "loss": 1.1792476177215576, |
| "step": 1239 |
| }, |
| { |
| "epoch": 0.7151095732410612, |
| "grad_norm": 1.2099579572677612, |
| "learning_rate": 0.00015236447520184545, |
| "loss": 1.482499599456787, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.7156862745098039, |
| "grad_norm": 0.5826729536056519, |
| "learning_rate": 0.00015232602845059593, |
| "loss": 0.7845430374145508, |
| "step": 1241 |
| }, |
| { |
| "epoch": 0.7162629757785467, |
| "grad_norm": 0.7632762789726257, |
| "learning_rate": 0.0001522875816993464, |
| "loss": 0.8908877968788147, |
| "step": 1242 |
| }, |
| { |
| "epoch": 0.7168396770472895, |
| "grad_norm": 0.835464358329773, |
| "learning_rate": 0.00015224913494809689, |
| "loss": 1.0795903205871582, |
| "step": 1243 |
| }, |
| { |
| "epoch": 0.7174163783160323, |
| "grad_norm": 0.998972475528717, |
| "learning_rate": 0.0001522106881968474, |
| "loss": 0.9715967178344727, |
| "step": 1244 |
| }, |
| { |
| "epoch": 0.7179930795847751, |
| "grad_norm": 0.5176213383674622, |
| "learning_rate": 0.00015217224144559784, |
| "loss": 0.7307795286178589, |
| "step": 1245 |
| }, |
| { |
| "epoch": 0.7185697808535179, |
| "grad_norm": 1.0009640455245972, |
| "learning_rate": 0.00015213379469434835, |
| "loss": 1.253312587738037, |
| "step": 1246 |
| }, |
| { |
| "epoch": 0.7191464821222606, |
| "grad_norm": 1.1499648094177246, |
| "learning_rate": 0.0001520953479430988, |
| "loss": 1.2523915767669678, |
| "step": 1247 |
| }, |
| { |
| "epoch": 0.7197231833910035, |
| "grad_norm": 0.9233465790748596, |
| "learning_rate": 0.0001520569011918493, |
| "loss": 1.025418996810913, |
| "step": 1248 |
| }, |
| { |
| "epoch": 0.7202998846597463, |
| "grad_norm": 0.5469316840171814, |
| "learning_rate": 0.00015201845444059975, |
| "loss": 0.6671372652053833, |
| "step": 1249 |
| }, |
| { |
| "epoch": 0.720876585928489, |
| "grad_norm": 0.7743379473686218, |
| "learning_rate": 0.00015198000768935026, |
| "loss": 1.2212378978729248, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.7214532871972318, |
| "grad_norm": 0.971682608127594, |
| "learning_rate": 0.00015194156093810074, |
| "loss": 1.2435131072998047, |
| "step": 1251 |
| }, |
| { |
| "epoch": 0.7220299884659747, |
| "grad_norm": 0.9899376630783081, |
| "learning_rate": 0.00015190311418685122, |
| "loss": 1.2595231533050537, |
| "step": 1252 |
| }, |
| { |
| "epoch": 0.7226066897347174, |
| "grad_norm": 0.8441123962402344, |
| "learning_rate": 0.0001518646674356017, |
| "loss": 0.9278808832168579, |
| "step": 1253 |
| }, |
| { |
| "epoch": 0.7231833910034602, |
| "grad_norm": 0.5254001021385193, |
| "learning_rate": 0.00015182622068435217, |
| "loss": 0.786496102809906, |
| "step": 1254 |
| }, |
| { |
| "epoch": 0.723760092272203, |
| "grad_norm": 0.9715943932533264, |
| "learning_rate": 0.00015178777393310265, |
| "loss": 0.9957152605056763, |
| "step": 1255 |
| }, |
| { |
| "epoch": 0.7243367935409458, |
| "grad_norm": 0.9919838905334473, |
| "learning_rate": 0.00015174932718185316, |
| "loss": 1.3595893383026123, |
| "step": 1256 |
| }, |
| { |
| "epoch": 0.7249134948096886, |
| "grad_norm": 0.7739357352256775, |
| "learning_rate": 0.0001517108804306036, |
| "loss": 0.7901654839515686, |
| "step": 1257 |
| }, |
| { |
| "epoch": 0.7254901960784313, |
| "grad_norm": 0.996926486492157, |
| "learning_rate": 0.00015167243367935411, |
| "loss": 1.0908658504486084, |
| "step": 1258 |
| }, |
| { |
| "epoch": 0.7260668973471741, |
| "grad_norm": 0.6757825016975403, |
| "learning_rate": 0.00015163398692810456, |
| "loss": 0.7795881032943726, |
| "step": 1259 |
| }, |
| { |
| "epoch": 0.726643598615917, |
| "grad_norm": 0.9458150863647461, |
| "learning_rate": 0.00015159554017685507, |
| "loss": 1.0505211353302002, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.7272202998846597, |
| "grad_norm": 0.8086127638816833, |
| "learning_rate": 0.00015155709342560552, |
| "loss": 0.9041070938110352, |
| "step": 1261 |
| }, |
| { |
| "epoch": 0.7277970011534025, |
| "grad_norm": 0.6491602659225464, |
| "learning_rate": 0.00015151864667435603, |
| "loss": 0.9067816734313965, |
| "step": 1262 |
| }, |
| { |
| "epoch": 0.7283737024221453, |
| "grad_norm": 0.5835777521133423, |
| "learning_rate": 0.0001514801999231065, |
| "loss": 0.7853602170944214, |
| "step": 1263 |
| }, |
| { |
| "epoch": 0.7289504036908881, |
| "grad_norm": 0.8881536722183228, |
| "learning_rate": 0.00015144175317185698, |
| "loss": 1.2767361402511597, |
| "step": 1264 |
| }, |
| { |
| "epoch": 0.7295271049596309, |
| "grad_norm": 0.6160046458244324, |
| "learning_rate": 0.00015140330642060746, |
| "loss": 0.7595696449279785, |
| "step": 1265 |
| }, |
| { |
| "epoch": 0.7301038062283737, |
| "grad_norm": 0.7877328991889954, |
| "learning_rate": 0.00015136485966935794, |
| "loss": 0.9727606773376465, |
| "step": 1266 |
| }, |
| { |
| "epoch": 0.7306805074971164, |
| "grad_norm": 0.6233464479446411, |
| "learning_rate": 0.00015132641291810842, |
| "loss": 0.6097822785377502, |
| "step": 1267 |
| }, |
| { |
| "epoch": 0.7312572087658593, |
| "grad_norm": 0.8846599459648132, |
| "learning_rate": 0.00015128796616685892, |
| "loss": 1.314606785774231, |
| "step": 1268 |
| }, |
| { |
| "epoch": 0.7318339100346021, |
| "grad_norm": 0.6752328872680664, |
| "learning_rate": 0.00015124951941560937, |
| "loss": 0.9257625341415405, |
| "step": 1269 |
| }, |
| { |
| "epoch": 0.7324106113033448, |
| "grad_norm": 0.6147440075874329, |
| "learning_rate": 0.00015121107266435988, |
| "loss": 0.7304266691207886, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.7329873125720877, |
| "grad_norm": 0.8625065088272095, |
| "learning_rate": 0.00015117262591311033, |
| "loss": 1.2385823726654053, |
| "step": 1271 |
| }, |
| { |
| "epoch": 0.7335640138408305, |
| "grad_norm": 0.6224170923233032, |
| "learning_rate": 0.00015113417916186084, |
| "loss": 0.7687395215034485, |
| "step": 1272 |
| }, |
| { |
| "epoch": 0.7341407151095732, |
| "grad_norm": 0.839799165725708, |
| "learning_rate": 0.0001510957324106113, |
| "loss": 1.0231621265411377, |
| "step": 1273 |
| }, |
| { |
| "epoch": 0.734717416378316, |
| "grad_norm": 0.8609519600868225, |
| "learning_rate": 0.0001510572856593618, |
| "loss": 1.1030302047729492, |
| "step": 1274 |
| }, |
| { |
| "epoch": 0.7352941176470589, |
| "grad_norm": 0.8059080243110657, |
| "learning_rate": 0.00015101883890811227, |
| "loss": 1.307667851448059, |
| "step": 1275 |
| }, |
| { |
| "epoch": 0.7358708189158016, |
| "grad_norm": 0.7881230115890503, |
| "learning_rate": 0.00015098039215686275, |
| "loss": 0.8685023784637451, |
| "step": 1276 |
| }, |
| { |
| "epoch": 0.7364475201845444, |
| "grad_norm": 0.6535466909408569, |
| "learning_rate": 0.00015094194540561323, |
| "loss": 0.8849316835403442, |
| "step": 1277 |
| }, |
| { |
| "epoch": 0.7370242214532872, |
| "grad_norm": 0.664448082447052, |
| "learning_rate": 0.0001509034986543637, |
| "loss": 0.809040904045105, |
| "step": 1278 |
| }, |
| { |
| "epoch": 0.73760092272203, |
| "grad_norm": 0.9526609182357788, |
| "learning_rate": 0.00015086505190311418, |
| "loss": 1.2887682914733887, |
| "step": 1279 |
| }, |
| { |
| "epoch": 0.7381776239907728, |
| "grad_norm": 0.8947210907936096, |
| "learning_rate": 0.00015082660515186466, |
| "loss": 1.0613007545471191, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.7387543252595156, |
| "grad_norm": 0.9127343893051147, |
| "learning_rate": 0.00015078815840061514, |
| "loss": 0.9401702284812927, |
| "step": 1281 |
| }, |
| { |
| "epoch": 0.7393310265282583, |
| "grad_norm": 1.0288292169570923, |
| "learning_rate": 0.00015074971164936565, |
| "loss": 1.2102299928665161, |
| "step": 1282 |
| }, |
| { |
| "epoch": 0.7399077277970012, |
| "grad_norm": 0.6608892679214478, |
| "learning_rate": 0.0001507112648981161, |
| "loss": 0.7817317247390747, |
| "step": 1283 |
| }, |
| { |
| "epoch": 0.740484429065744, |
| "grad_norm": 0.5857222080230713, |
| "learning_rate": 0.0001506728181468666, |
| "loss": 0.7468012571334839, |
| "step": 1284 |
| }, |
| { |
| "epoch": 0.7410611303344867, |
| "grad_norm": 0.6499783992767334, |
| "learning_rate": 0.00015063437139561708, |
| "loss": 0.7113574147224426, |
| "step": 1285 |
| }, |
| { |
| "epoch": 0.7416378316032295, |
| "grad_norm": 0.718450129032135, |
| "learning_rate": 0.00015059592464436756, |
| "loss": 0.9823046326637268, |
| "step": 1286 |
| }, |
| { |
| "epoch": 0.7422145328719724, |
| "grad_norm": 0.7987701296806335, |
| "learning_rate": 0.00015055747789311804, |
| "loss": 0.9410796761512756, |
| "step": 1287 |
| }, |
| { |
| "epoch": 0.7427912341407151, |
| "grad_norm": 0.7227610349655151, |
| "learning_rate": 0.00015051903114186852, |
| "loss": 0.7366760969161987, |
| "step": 1288 |
| }, |
| { |
| "epoch": 0.7433679354094579, |
| "grad_norm": 0.9411056637763977, |
| "learning_rate": 0.000150480584390619, |
| "loss": 0.9475510120391846, |
| "step": 1289 |
| }, |
| { |
| "epoch": 0.7439446366782007, |
| "grad_norm": 0.5987991690635681, |
| "learning_rate": 0.00015044213763936947, |
| "loss": 0.8084846138954163, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.7445213379469435, |
| "grad_norm": 0.6214851140975952, |
| "learning_rate": 0.00015040369088811995, |
| "loss": 0.6952444911003113, |
| "step": 1291 |
| }, |
| { |
| "epoch": 0.7450980392156863, |
| "grad_norm": 0.7398913502693176, |
| "learning_rate": 0.00015036524413687043, |
| "loss": 0.8432753086090088, |
| "step": 1292 |
| }, |
| { |
| "epoch": 0.745674740484429, |
| "grad_norm": 0.8513553142547607, |
| "learning_rate": 0.0001503267973856209, |
| "loss": 0.8751744627952576, |
| "step": 1293 |
| }, |
| { |
| "epoch": 0.7462514417531718, |
| "grad_norm": 0.7704481482505798, |
| "learning_rate": 0.0001502883506343714, |
| "loss": 0.9727562665939331, |
| "step": 1294 |
| }, |
| { |
| "epoch": 0.7468281430219147, |
| "grad_norm": 0.6925477385520935, |
| "learning_rate": 0.0001502499038831219, |
| "loss": 1.044316291809082, |
| "step": 1295 |
| }, |
| { |
| "epoch": 0.7474048442906575, |
| "grad_norm": 0.8089653253555298, |
| "learning_rate": 0.00015021145713187237, |
| "loss": 0.9385859966278076, |
| "step": 1296 |
| }, |
| { |
| "epoch": 0.7479815455594002, |
| "grad_norm": 0.8045443296432495, |
| "learning_rate": 0.00015017301038062285, |
| "loss": 1.093725562095642, |
| "step": 1297 |
| }, |
| { |
| "epoch": 0.748558246828143, |
| "grad_norm": 0.8403393626213074, |
| "learning_rate": 0.00015013456362937333, |
| "loss": 0.7081382870674133, |
| "step": 1298 |
| }, |
| { |
| "epoch": 0.7491349480968859, |
| "grad_norm": 0.8455471992492676, |
| "learning_rate": 0.0001500961168781238, |
| "loss": 1.2357611656188965, |
| "step": 1299 |
| }, |
| { |
| "epoch": 0.7497116493656286, |
| "grad_norm": 0.8819023966789246, |
| "learning_rate": 0.00015005767012687428, |
| "loss": 1.2907012701034546, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.7502883506343714, |
| "grad_norm": 0.6467103362083435, |
| "learning_rate": 0.00015001922337562476, |
| "loss": 0.7991781830787659, |
| "step": 1301 |
| }, |
| { |
| "epoch": 0.7508650519031141, |
| "grad_norm": 1.0841728448867798, |
| "learning_rate": 0.00014998077662437524, |
| "loss": 1.156419038772583, |
| "step": 1302 |
| }, |
| { |
| "epoch": 0.751441753171857, |
| "grad_norm": 0.4863538146018982, |
| "learning_rate": 0.00014994232987312572, |
| "loss": 0.5481974482536316, |
| "step": 1303 |
| }, |
| { |
| "epoch": 0.7520184544405998, |
| "grad_norm": 0.631119966506958, |
| "learning_rate": 0.0001499038831218762, |
| "loss": 0.7421573996543884, |
| "step": 1304 |
| }, |
| { |
| "epoch": 0.7525951557093425, |
| "grad_norm": 0.6919093728065491, |
| "learning_rate": 0.0001498654363706267, |
| "loss": 0.6554936170578003, |
| "step": 1305 |
| }, |
| { |
| "epoch": 0.7531718569780853, |
| "grad_norm": 0.7746281027793884, |
| "learning_rate": 0.00014982698961937718, |
| "loss": 0.9226951599121094, |
| "step": 1306 |
| }, |
| { |
| "epoch": 0.7537485582468282, |
| "grad_norm": 0.821020245552063, |
| "learning_rate": 0.00014978854286812766, |
| "loss": 1.2231357097625732, |
| "step": 1307 |
| }, |
| { |
| "epoch": 0.754325259515571, |
| "grad_norm": 0.6167652606964111, |
| "learning_rate": 0.00014975009611687814, |
| "loss": 0.9597879648208618, |
| "step": 1308 |
| }, |
| { |
| "epoch": 0.7549019607843137, |
| "grad_norm": 0.6786548495292664, |
| "learning_rate": 0.00014971164936562861, |
| "loss": 0.8253003358840942, |
| "step": 1309 |
| }, |
| { |
| "epoch": 0.7554786620530565, |
| "grad_norm": 0.9683876037597656, |
| "learning_rate": 0.0001496732026143791, |
| "loss": 1.1294584274291992, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.7560553633217993, |
| "grad_norm": 0.8556981086730957, |
| "learning_rate": 0.00014963475586312957, |
| "loss": 1.009643316268921, |
| "step": 1311 |
| }, |
| { |
| "epoch": 0.7566320645905421, |
| "grad_norm": 0.7639108896255493, |
| "learning_rate": 0.00014959630911188005, |
| "loss": 0.8871880769729614, |
| "step": 1312 |
| }, |
| { |
| "epoch": 0.7572087658592849, |
| "grad_norm": 0.9662507176399231, |
| "learning_rate": 0.00014955786236063053, |
| "loss": 1.2890512943267822, |
| "step": 1313 |
| }, |
| { |
| "epoch": 0.7577854671280276, |
| "grad_norm": 0.7260032892227173, |
| "learning_rate": 0.000149519415609381, |
| "loss": 1.2696185111999512, |
| "step": 1314 |
| }, |
| { |
| "epoch": 0.7583621683967705, |
| "grad_norm": 1.0413408279418945, |
| "learning_rate": 0.0001494809688581315, |
| "loss": 1.2239567041397095, |
| "step": 1315 |
| }, |
| { |
| "epoch": 0.7589388696655133, |
| "grad_norm": 0.9003005623817444, |
| "learning_rate": 0.00014944252210688196, |
| "loss": 1.248561143875122, |
| "step": 1316 |
| }, |
| { |
| "epoch": 0.759515570934256, |
| "grad_norm": 0.9604087471961975, |
| "learning_rate": 0.00014940407535563247, |
| "loss": 1.2369884252548218, |
| "step": 1317 |
| }, |
| { |
| "epoch": 0.7600922722029988, |
| "grad_norm": 0.7198401093482971, |
| "learning_rate": 0.00014936562860438295, |
| "loss": 0.743487536907196, |
| "step": 1318 |
| }, |
| { |
| "epoch": 0.7606689734717417, |
| "grad_norm": 0.7526591420173645, |
| "learning_rate": 0.00014932718185313342, |
| "loss": 0.7714953422546387, |
| "step": 1319 |
| }, |
| { |
| "epoch": 0.7612456747404844, |
| "grad_norm": 1.1336771249771118, |
| "learning_rate": 0.0001492887351018839, |
| "loss": 1.1577683687210083, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.7618223760092272, |
| "grad_norm": 0.7607272267341614, |
| "learning_rate": 0.00014925028835063438, |
| "loss": 0.903020977973938, |
| "step": 1321 |
| }, |
| { |
| "epoch": 0.76239907727797, |
| "grad_norm": 0.7855517268180847, |
| "learning_rate": 0.00014921184159938486, |
| "loss": 0.9421197772026062, |
| "step": 1322 |
| }, |
| { |
| "epoch": 0.7629757785467128, |
| "grad_norm": 0.9380967020988464, |
| "learning_rate": 0.00014917339484813534, |
| "loss": 1.0594120025634766, |
| "step": 1323 |
| }, |
| { |
| "epoch": 0.7635524798154556, |
| "grad_norm": 0.9255303740501404, |
| "learning_rate": 0.00014913494809688582, |
| "loss": 1.1912791728973389, |
| "step": 1324 |
| }, |
| { |
| "epoch": 0.7641291810841984, |
| "grad_norm": 0.7085497379302979, |
| "learning_rate": 0.00014909650134563632, |
| "loss": 0.7702199816703796, |
| "step": 1325 |
| }, |
| { |
| "epoch": 0.7647058823529411, |
| "grad_norm": 0.8080468773841858, |
| "learning_rate": 0.00014905805459438677, |
| "loss": 0.9640858769416809, |
| "step": 1326 |
| }, |
| { |
| "epoch": 0.765282583621684, |
| "grad_norm": 0.8854598999023438, |
| "learning_rate": 0.00014901960784313728, |
| "loss": 1.0912519693374634, |
| "step": 1327 |
| }, |
| { |
| "epoch": 0.7658592848904268, |
| "grad_norm": 1.158070683479309, |
| "learning_rate": 0.00014898116109188773, |
| "loss": 1.259207010269165, |
| "step": 1328 |
| }, |
| { |
| "epoch": 0.7664359861591695, |
| "grad_norm": 0.7163742780685425, |
| "learning_rate": 0.00014894271434063823, |
| "loss": 0.9091912508010864, |
| "step": 1329 |
| }, |
| { |
| "epoch": 0.7670126874279123, |
| "grad_norm": 0.6578546762466431, |
| "learning_rate": 0.0001489042675893887, |
| "loss": 1.13603937625885, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.7675893886966552, |
| "grad_norm": 0.641118586063385, |
| "learning_rate": 0.0001488658208381392, |
| "loss": 0.6926564574241638, |
| "step": 1331 |
| }, |
| { |
| "epoch": 0.7681660899653979, |
| "grad_norm": 1.3342225551605225, |
| "learning_rate": 0.00014882737408688967, |
| "loss": 1.1259536743164062, |
| "step": 1332 |
| }, |
| { |
| "epoch": 0.7687427912341407, |
| "grad_norm": 0.6777533292770386, |
| "learning_rate": 0.00014878892733564015, |
| "loss": 0.8380722403526306, |
| "step": 1333 |
| }, |
| { |
| "epoch": 0.7693194925028836, |
| "grad_norm": 0.5475529432296753, |
| "learning_rate": 0.00014875048058439063, |
| "loss": 0.7194100618362427, |
| "step": 1334 |
| }, |
| { |
| "epoch": 0.7698961937716263, |
| "grad_norm": 0.7109413743019104, |
| "learning_rate": 0.0001487120338331411, |
| "loss": 0.7877069711685181, |
| "step": 1335 |
| }, |
| { |
| "epoch": 0.7704728950403691, |
| "grad_norm": 0.5451337099075317, |
| "learning_rate": 0.00014867358708189158, |
| "loss": 0.7354110479354858, |
| "step": 1336 |
| }, |
| { |
| "epoch": 0.7710495963091119, |
| "grad_norm": 0.7789444327354431, |
| "learning_rate": 0.0001486351403306421, |
| "loss": 0.9675291776657104, |
| "step": 1337 |
| }, |
| { |
| "epoch": 0.7716262975778547, |
| "grad_norm": 0.7246870398521423, |
| "learning_rate": 0.00014859669357939254, |
| "loss": 0.9592723846435547, |
| "step": 1338 |
| }, |
| { |
| "epoch": 0.7722029988465975, |
| "grad_norm": 0.7461789846420288, |
| "learning_rate": 0.00014855824682814304, |
| "loss": 1.062403678894043, |
| "step": 1339 |
| }, |
| { |
| "epoch": 0.7727797001153403, |
| "grad_norm": 0.6598569750785828, |
| "learning_rate": 0.0001485198000768935, |
| "loss": 0.959195077419281, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.773356401384083, |
| "grad_norm": 0.8688694834709167, |
| "learning_rate": 0.000148481353325644, |
| "loss": 1.3393487930297852, |
| "step": 1341 |
| }, |
| { |
| "epoch": 0.7739331026528259, |
| "grad_norm": 0.7083797454833984, |
| "learning_rate": 0.00014844290657439448, |
| "loss": 0.9515122175216675, |
| "step": 1342 |
| }, |
| { |
| "epoch": 0.7745098039215687, |
| "grad_norm": 0.7261124849319458, |
| "learning_rate": 0.00014840445982314496, |
| "loss": 1.048977017402649, |
| "step": 1343 |
| }, |
| { |
| "epoch": 0.7750865051903114, |
| "grad_norm": 0.9450129270553589, |
| "learning_rate": 0.00014836601307189544, |
| "loss": 1.1335430145263672, |
| "step": 1344 |
| }, |
| { |
| "epoch": 0.7756632064590542, |
| "grad_norm": 0.47535234689712524, |
| "learning_rate": 0.00014832756632064591, |
| "loss": 0.6887091398239136, |
| "step": 1345 |
| }, |
| { |
| "epoch": 0.776239907727797, |
| "grad_norm": 0.714235782623291, |
| "learning_rate": 0.0001482891195693964, |
| "loss": 0.9414650201797485, |
| "step": 1346 |
| }, |
| { |
| "epoch": 0.7768166089965398, |
| "grad_norm": 0.6094812750816345, |
| "learning_rate": 0.00014825067281814687, |
| "loss": 0.8214763402938843, |
| "step": 1347 |
| }, |
| { |
| "epoch": 0.7773933102652826, |
| "grad_norm": 0.7122801542282104, |
| "learning_rate": 0.00014821222606689735, |
| "loss": 0.9144871830940247, |
| "step": 1348 |
| }, |
| { |
| "epoch": 0.7779700115340253, |
| "grad_norm": 0.8147172927856445, |
| "learning_rate": 0.00014817377931564785, |
| "loss": 1.1212399005889893, |
| "step": 1349 |
| }, |
| { |
| "epoch": 0.7785467128027682, |
| "grad_norm": 0.5866456627845764, |
| "learning_rate": 0.0001481353325643983, |
| "loss": 0.6841553449630737, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.779123414071511, |
| "grad_norm": 1.2120155096054077, |
| "learning_rate": 0.0001480968858131488, |
| "loss": 1.1782194375991821, |
| "step": 1351 |
| }, |
| { |
| "epoch": 0.7797001153402537, |
| "grad_norm": 0.8661918640136719, |
| "learning_rate": 0.00014805843906189926, |
| "loss": 1.1883846521377563, |
| "step": 1352 |
| }, |
| { |
| "epoch": 0.7802768166089965, |
| "grad_norm": 1.2335827350616455, |
| "learning_rate": 0.00014801999231064977, |
| "loss": 1.199598789215088, |
| "step": 1353 |
| }, |
| { |
| "epoch": 0.7808535178777394, |
| "grad_norm": 0.8413060307502747, |
| "learning_rate": 0.00014798154555940025, |
| "loss": 1.0878143310546875, |
| "step": 1354 |
| }, |
| { |
| "epoch": 0.7814302191464821, |
| "grad_norm": 1.042397379875183, |
| "learning_rate": 0.00014794309880815072, |
| "loss": 1.5179508924484253, |
| "step": 1355 |
| }, |
| { |
| "epoch": 0.7820069204152249, |
| "grad_norm": 1.2029002904891968, |
| "learning_rate": 0.0001479046520569012, |
| "loss": 1.361120343208313, |
| "step": 1356 |
| }, |
| { |
| "epoch": 0.7825836216839677, |
| "grad_norm": 0.9056934714317322, |
| "learning_rate": 0.00014786620530565168, |
| "loss": 1.0812435150146484, |
| "step": 1357 |
| }, |
| { |
| "epoch": 0.7831603229527105, |
| "grad_norm": 0.7730829119682312, |
| "learning_rate": 0.00014782775855440216, |
| "loss": 1.0833256244659424, |
| "step": 1358 |
| }, |
| { |
| "epoch": 0.7837370242214533, |
| "grad_norm": 0.8789440393447876, |
| "learning_rate": 0.00014778931180315264, |
| "loss": 1.0179883241653442, |
| "step": 1359 |
| }, |
| { |
| "epoch": 0.7843137254901961, |
| "grad_norm": 0.775190532207489, |
| "learning_rate": 0.00014775086505190312, |
| "loss": 1.0584783554077148, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.7848904267589388, |
| "grad_norm": 0.7954389452934265, |
| "learning_rate": 0.00014771241830065362, |
| "loss": 1.1697866916656494, |
| "step": 1361 |
| }, |
| { |
| "epoch": 0.7854671280276817, |
| "grad_norm": 0.8194144368171692, |
| "learning_rate": 0.00014767397154940407, |
| "loss": 0.9788481593132019, |
| "step": 1362 |
| }, |
| { |
| "epoch": 0.7860438292964245, |
| "grad_norm": 0.7247309684753418, |
| "learning_rate": 0.00014763552479815458, |
| "loss": 0.9953986406326294, |
| "step": 1363 |
| }, |
| { |
| "epoch": 0.7866205305651672, |
| "grad_norm": 0.8735687136650085, |
| "learning_rate": 0.00014759707804690503, |
| "loss": 1.108184576034546, |
| "step": 1364 |
| }, |
| { |
| "epoch": 0.78719723183391, |
| "grad_norm": 0.8578454256057739, |
| "learning_rate": 0.00014755863129565553, |
| "loss": 1.0608623027801514, |
| "step": 1365 |
| }, |
| { |
| "epoch": 0.7877739331026529, |
| "grad_norm": 1.038670301437378, |
| "learning_rate": 0.000147520184544406, |
| "loss": 1.2398217916488647, |
| "step": 1366 |
| }, |
| { |
| "epoch": 0.7883506343713956, |
| "grad_norm": 0.832326352596283, |
| "learning_rate": 0.0001474817377931565, |
| "loss": 1.5559954643249512, |
| "step": 1367 |
| }, |
| { |
| "epoch": 0.7889273356401384, |
| "grad_norm": 0.5325842499732971, |
| "learning_rate": 0.00014744329104190697, |
| "loss": 0.6711868047714233, |
| "step": 1368 |
| }, |
| { |
| "epoch": 0.7895040369088812, |
| "grad_norm": 0.6845494508743286, |
| "learning_rate": 0.00014740484429065745, |
| "loss": 0.9054516553878784, |
| "step": 1369 |
| }, |
| { |
| "epoch": 0.790080738177624, |
| "grad_norm": 0.8053160309791565, |
| "learning_rate": 0.00014736639753940793, |
| "loss": 1.1551737785339355, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.7906574394463668, |
| "grad_norm": 0.9268645644187927, |
| "learning_rate": 0.0001473279507881584, |
| "loss": 0.9230217933654785, |
| "step": 1371 |
| }, |
| { |
| "epoch": 0.7912341407151096, |
| "grad_norm": 1.0553678274154663, |
| "learning_rate": 0.00014728950403690888, |
| "loss": 1.2223023176193237, |
| "step": 1372 |
| }, |
| { |
| "epoch": 0.7918108419838523, |
| "grad_norm": 0.6177469491958618, |
| "learning_rate": 0.0001472510572856594, |
| "loss": 0.8992686867713928, |
| "step": 1373 |
| }, |
| { |
| "epoch": 0.7923875432525952, |
| "grad_norm": 1.138965368270874, |
| "learning_rate": 0.00014721261053440984, |
| "loss": 0.8630029559135437, |
| "step": 1374 |
| }, |
| { |
| "epoch": 0.792964244521338, |
| "grad_norm": 0.5512900948524475, |
| "learning_rate": 0.00014717416378316034, |
| "loss": 0.8302984237670898, |
| "step": 1375 |
| }, |
| { |
| "epoch": 0.7935409457900807, |
| "grad_norm": 0.6091440916061401, |
| "learning_rate": 0.0001471357170319108, |
| "loss": 0.7380212545394897, |
| "step": 1376 |
| }, |
| { |
| "epoch": 0.7941176470588235, |
| "grad_norm": 0.909902811050415, |
| "learning_rate": 0.0001470972702806613, |
| "loss": 1.0644478797912598, |
| "step": 1377 |
| }, |
| { |
| "epoch": 0.7946943483275664, |
| "grad_norm": 0.9841009378433228, |
| "learning_rate": 0.00014705882352941178, |
| "loss": 1.5122861862182617, |
| "step": 1378 |
| }, |
| { |
| "epoch": 0.7952710495963091, |
| "grad_norm": 0.7682785391807556, |
| "learning_rate": 0.00014702037677816226, |
| "loss": 0.8122522830963135, |
| "step": 1379 |
| }, |
| { |
| "epoch": 0.7958477508650519, |
| "grad_norm": 0.8022129535675049, |
| "learning_rate": 0.00014698193002691274, |
| "loss": 0.7516300678253174, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.7964244521337946, |
| "grad_norm": 0.8423136472702026, |
| "learning_rate": 0.00014694348327566321, |
| "loss": 0.9571545124053955, |
| "step": 1381 |
| }, |
| { |
| "epoch": 0.7970011534025375, |
| "grad_norm": 0.61954665184021, |
| "learning_rate": 0.0001469050365244137, |
| "loss": 0.8543866872787476, |
| "step": 1382 |
| }, |
| { |
| "epoch": 0.7975778546712803, |
| "grad_norm": 0.5888648629188538, |
| "learning_rate": 0.00014686658977316417, |
| "loss": 0.6958523988723755, |
| "step": 1383 |
| }, |
| { |
| "epoch": 0.798154555940023, |
| "grad_norm": 0.9419842958450317, |
| "learning_rate": 0.00014682814302191465, |
| "loss": 1.3051813840866089, |
| "step": 1384 |
| }, |
| { |
| "epoch": 0.7987312572087658, |
| "grad_norm": 1.1472746133804321, |
| "learning_rate": 0.00014678969627066515, |
| "loss": 1.284635305404663, |
| "step": 1385 |
| }, |
| { |
| "epoch": 0.7993079584775087, |
| "grad_norm": 0.5858578681945801, |
| "learning_rate": 0.0001467512495194156, |
| "loss": 0.7809937596321106, |
| "step": 1386 |
| }, |
| { |
| "epoch": 0.7998846597462514, |
| "grad_norm": 0.7086213231086731, |
| "learning_rate": 0.0001467128027681661, |
| "loss": 0.6571354269981384, |
| "step": 1387 |
| }, |
| { |
| "epoch": 0.8004613610149942, |
| "grad_norm": 0.8438594341278076, |
| "learning_rate": 0.00014667435601691656, |
| "loss": 0.9461796283721924, |
| "step": 1388 |
| }, |
| { |
| "epoch": 0.801038062283737, |
| "grad_norm": 0.6701700687408447, |
| "learning_rate": 0.00014663590926566707, |
| "loss": 0.7518469095230103, |
| "step": 1389 |
| }, |
| { |
| "epoch": 0.8016147635524798, |
| "grad_norm": 0.7239779233932495, |
| "learning_rate": 0.00014659746251441755, |
| "loss": 0.98681640625, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.8021914648212226, |
| "grad_norm": 0.9055145382881165, |
| "learning_rate": 0.00014655901576316802, |
| "loss": 1.038681983947754, |
| "step": 1391 |
| }, |
| { |
| "epoch": 0.8027681660899654, |
| "grad_norm": 0.674439013004303, |
| "learning_rate": 0.0001465205690119185, |
| "loss": 0.7289140820503235, |
| "step": 1392 |
| }, |
| { |
| "epoch": 0.8033448673587081, |
| "grad_norm": 0.6101412773132324, |
| "learning_rate": 0.00014648212226066898, |
| "loss": 0.8470169901847839, |
| "step": 1393 |
| }, |
| { |
| "epoch": 0.803921568627451, |
| "grad_norm": 1.0043631792068481, |
| "learning_rate": 0.00014644367550941946, |
| "loss": 0.9277285933494568, |
| "step": 1394 |
| }, |
| { |
| "epoch": 0.8044982698961938, |
| "grad_norm": 0.8795577883720398, |
| "learning_rate": 0.00014640522875816994, |
| "loss": 1.2433722019195557, |
| "step": 1395 |
| }, |
| { |
| "epoch": 0.8050749711649365, |
| "grad_norm": 0.469595730304718, |
| "learning_rate": 0.00014636678200692042, |
| "loss": 0.5572987794876099, |
| "step": 1396 |
| }, |
| { |
| "epoch": 0.8056516724336793, |
| "grad_norm": 0.8809022903442383, |
| "learning_rate": 0.00014632833525567092, |
| "loss": 1.1597031354904175, |
| "step": 1397 |
| }, |
| { |
| "epoch": 0.8062283737024222, |
| "grad_norm": 0.9675459861755371, |
| "learning_rate": 0.00014628988850442137, |
| "loss": 1.0070991516113281, |
| "step": 1398 |
| }, |
| { |
| "epoch": 0.8068050749711649, |
| "grad_norm": 0.8547102212905884, |
| "learning_rate": 0.00014625144175317188, |
| "loss": 0.9210143089294434, |
| "step": 1399 |
| }, |
| { |
| "epoch": 0.8073817762399077, |
| "grad_norm": 0.5635284185409546, |
| "learning_rate": 0.00014621299500192233, |
| "loss": 0.5849195122718811, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.8079584775086506, |
| "grad_norm": 0.8755897283554077, |
| "learning_rate": 0.00014617454825067283, |
| "loss": 1.014789342880249, |
| "step": 1401 |
| }, |
| { |
| "epoch": 0.8085351787773933, |
| "grad_norm": 0.6002927422523499, |
| "learning_rate": 0.00014613610149942328, |
| "loss": 0.8705483675003052, |
| "step": 1402 |
| }, |
| { |
| "epoch": 0.8091118800461361, |
| "grad_norm": 0.9547945857048035, |
| "learning_rate": 0.0001460976547481738, |
| "loss": 1.0433237552642822, |
| "step": 1403 |
| }, |
| { |
| "epoch": 0.8096885813148789, |
| "grad_norm": 0.8594508767127991, |
| "learning_rate": 0.00014605920799692427, |
| "loss": 0.857754111289978, |
| "step": 1404 |
| }, |
| { |
| "epoch": 0.8102652825836217, |
| "grad_norm": 0.632087230682373, |
| "learning_rate": 0.00014602076124567475, |
| "loss": 1.0932989120483398, |
| "step": 1405 |
| }, |
| { |
| "epoch": 0.8108419838523645, |
| "grad_norm": 0.6727497577667236, |
| "learning_rate": 0.00014598231449442523, |
| "loss": 1.1335169076919556, |
| "step": 1406 |
| }, |
| { |
| "epoch": 0.8114186851211073, |
| "grad_norm": 1.050377368927002, |
| "learning_rate": 0.0001459438677431757, |
| "loss": 1.1787501573562622, |
| "step": 1407 |
| }, |
| { |
| "epoch": 0.81199538638985, |
| "grad_norm": 0.624580442905426, |
| "learning_rate": 0.00014590542099192618, |
| "loss": 0.8040243983268738, |
| "step": 1408 |
| }, |
| { |
| "epoch": 0.8125720876585929, |
| "grad_norm": 0.644497275352478, |
| "learning_rate": 0.0001458669742406767, |
| "loss": 0.9769735336303711, |
| "step": 1409 |
| }, |
| { |
| "epoch": 0.8131487889273357, |
| "grad_norm": 0.8106479048728943, |
| "learning_rate": 0.00014582852748942714, |
| "loss": 1.2847563028335571, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.8137254901960784, |
| "grad_norm": 0.6234838962554932, |
| "learning_rate": 0.00014579008073817764, |
| "loss": 0.7418760061264038, |
| "step": 1411 |
| }, |
| { |
| "epoch": 0.8143021914648212, |
| "grad_norm": 0.7591360807418823, |
| "learning_rate": 0.0001457516339869281, |
| "loss": 1.0062642097473145, |
| "step": 1412 |
| }, |
| { |
| "epoch": 0.8148788927335641, |
| "grad_norm": 0.7684062123298645, |
| "learning_rate": 0.0001457131872356786, |
| "loss": 0.9963294267654419, |
| "step": 1413 |
| }, |
| { |
| "epoch": 0.8154555940023068, |
| "grad_norm": 0.8234810829162598, |
| "learning_rate": 0.00014567474048442905, |
| "loss": 0.9132286310195923, |
| "step": 1414 |
| }, |
| { |
| "epoch": 0.8160322952710496, |
| "grad_norm": 1.3752492666244507, |
| "learning_rate": 0.00014563629373317956, |
| "loss": 1.3458770513534546, |
| "step": 1415 |
| }, |
| { |
| "epoch": 0.8166089965397924, |
| "grad_norm": 0.8771060109138489, |
| "learning_rate": 0.00014559784698193004, |
| "loss": 0.9146612882614136, |
| "step": 1416 |
| }, |
| { |
| "epoch": 0.8171856978085352, |
| "grad_norm": 0.5799472332000732, |
| "learning_rate": 0.0001455594002306805, |
| "loss": 0.8132292032241821, |
| "step": 1417 |
| }, |
| { |
| "epoch": 0.817762399077278, |
| "grad_norm": 1.0692527294158936, |
| "learning_rate": 0.000145520953479431, |
| "loss": 1.0524235963821411, |
| "step": 1418 |
| }, |
| { |
| "epoch": 0.8183391003460208, |
| "grad_norm": 0.6880149245262146, |
| "learning_rate": 0.00014548250672818147, |
| "loss": 0.8549849987030029, |
| "step": 1419 |
| }, |
| { |
| "epoch": 0.8189158016147635, |
| "grad_norm": 0.9311429858207703, |
| "learning_rate": 0.00014544405997693195, |
| "loss": 1.2363505363464355, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.8194925028835064, |
| "grad_norm": 0.6105409860610962, |
| "learning_rate": 0.00014540561322568245, |
| "loss": 0.8256676197052002, |
| "step": 1421 |
| }, |
| { |
| "epoch": 0.8200692041522492, |
| "grad_norm": 0.9718572497367859, |
| "learning_rate": 0.0001453671664744329, |
| "loss": 1.349236249923706, |
| "step": 1422 |
| }, |
| { |
| "epoch": 0.8206459054209919, |
| "grad_norm": 0.9589305520057678, |
| "learning_rate": 0.0001453287197231834, |
| "loss": 0.8896529674530029, |
| "step": 1423 |
| }, |
| { |
| "epoch": 0.8212226066897347, |
| "grad_norm": 1.1475483179092407, |
| "learning_rate": 0.00014529027297193386, |
| "loss": 1.392863154411316, |
| "step": 1424 |
| }, |
| { |
| "epoch": 0.8217993079584776, |
| "grad_norm": 0.9420047402381897, |
| "learning_rate": 0.00014525182622068437, |
| "loss": 1.1920685768127441, |
| "step": 1425 |
| }, |
| { |
| "epoch": 0.8223760092272203, |
| "grad_norm": 0.584073007106781, |
| "learning_rate": 0.00014521337946943482, |
| "loss": 0.5488528609275818, |
| "step": 1426 |
| }, |
| { |
| "epoch": 0.8229527104959631, |
| "grad_norm": 0.6110360622406006, |
| "learning_rate": 0.00014517493271818532, |
| "loss": 0.7226777672767639, |
| "step": 1427 |
| }, |
| { |
| "epoch": 0.8235294117647058, |
| "grad_norm": 0.5320557355880737, |
| "learning_rate": 0.0001451364859669358, |
| "loss": 0.5602037906646729, |
| "step": 1428 |
| }, |
| { |
| "epoch": 0.8241061130334487, |
| "grad_norm": 0.5847785472869873, |
| "learning_rate": 0.00014509803921568628, |
| "loss": 0.632820725440979, |
| "step": 1429 |
| }, |
| { |
| "epoch": 0.8246828143021915, |
| "grad_norm": 1.1915888786315918, |
| "learning_rate": 0.00014505959246443676, |
| "loss": 1.2395484447479248, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.8252595155709342, |
| "grad_norm": 0.7745262980461121, |
| "learning_rate": 0.00014502114571318724, |
| "loss": 0.9293632507324219, |
| "step": 1431 |
| }, |
| { |
| "epoch": 0.825836216839677, |
| "grad_norm": 0.9716136455535889, |
| "learning_rate": 0.00014498269896193771, |
| "loss": 1.2587440013885498, |
| "step": 1432 |
| }, |
| { |
| "epoch": 0.8264129181084199, |
| "grad_norm": 0.6674740314483643, |
| "learning_rate": 0.00014494425221068822, |
| "loss": 0.9000645875930786, |
| "step": 1433 |
| }, |
| { |
| "epoch": 0.8269896193771626, |
| "grad_norm": 0.9345766305923462, |
| "learning_rate": 0.00014490580545943867, |
| "loss": 0.9881076812744141, |
| "step": 1434 |
| }, |
| { |
| "epoch": 0.8275663206459054, |
| "grad_norm": 0.8641346096992493, |
| "learning_rate": 0.00014486735870818918, |
| "loss": 1.0706219673156738, |
| "step": 1435 |
| }, |
| { |
| "epoch": 0.8281430219146482, |
| "grad_norm": 0.8997068405151367, |
| "learning_rate": 0.00014482891195693963, |
| "loss": 0.932431697845459, |
| "step": 1436 |
| }, |
| { |
| "epoch": 0.828719723183391, |
| "grad_norm": 0.7539141774177551, |
| "learning_rate": 0.00014479046520569013, |
| "loss": 0.8891205191612244, |
| "step": 1437 |
| }, |
| { |
| "epoch": 0.8292964244521338, |
| "grad_norm": 0.8675488233566284, |
| "learning_rate": 0.00014475201845444058, |
| "loss": 0.9973325729370117, |
| "step": 1438 |
| }, |
| { |
| "epoch": 0.8298731257208766, |
| "grad_norm": 0.7566542029380798, |
| "learning_rate": 0.0001447135717031911, |
| "loss": 1.1265358924865723, |
| "step": 1439 |
| }, |
| { |
| "epoch": 0.8304498269896193, |
| "grad_norm": 0.902654230594635, |
| "learning_rate": 0.00014467512495194157, |
| "loss": 1.0915746688842773, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.8310265282583622, |
| "grad_norm": 0.618813693523407, |
| "learning_rate": 0.00014463667820069205, |
| "loss": 0.6798044443130493, |
| "step": 1441 |
| }, |
| { |
| "epoch": 0.831603229527105, |
| "grad_norm": 0.6372320055961609, |
| "learning_rate": 0.00014459823144944252, |
| "loss": 0.8383584022521973, |
| "step": 1442 |
| }, |
| { |
| "epoch": 0.8321799307958477, |
| "grad_norm": 0.742468535900116, |
| "learning_rate": 0.000144559784698193, |
| "loss": 1.0003979206085205, |
| "step": 1443 |
| }, |
| { |
| "epoch": 0.8327566320645905, |
| "grad_norm": 0.9815142750740051, |
| "learning_rate": 0.00014452133794694348, |
| "loss": 1.2571461200714111, |
| "step": 1444 |
| }, |
| { |
| "epoch": 0.8333333333333334, |
| "grad_norm": 0.7362657785415649, |
| "learning_rate": 0.000144482891195694, |
| "loss": 0.9890142679214478, |
| "step": 1445 |
| }, |
| { |
| "epoch": 0.8339100346020761, |
| "grad_norm": 1.047896385192871, |
| "learning_rate": 0.00014444444444444444, |
| "loss": 0.7491689920425415, |
| "step": 1446 |
| }, |
| { |
| "epoch": 0.8344867358708189, |
| "grad_norm": 1.0869019031524658, |
| "learning_rate": 0.00014440599769319494, |
| "loss": 1.0598435401916504, |
| "step": 1447 |
| }, |
| { |
| "epoch": 0.8350634371395617, |
| "grad_norm": 0.8003841042518616, |
| "learning_rate": 0.0001443675509419454, |
| "loss": 0.7503578662872314, |
| "step": 1448 |
| }, |
| { |
| "epoch": 0.8356401384083045, |
| "grad_norm": 1.3352385759353638, |
| "learning_rate": 0.0001443291041906959, |
| "loss": 1.7147669792175293, |
| "step": 1449 |
| }, |
| { |
| "epoch": 0.8362168396770473, |
| "grad_norm": 0.7203720808029175, |
| "learning_rate": 0.00014429065743944635, |
| "loss": 0.7103738188743591, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.8367935409457901, |
| "grad_norm": 0.7292425036430359, |
| "learning_rate": 0.00014425221068819686, |
| "loss": 0.9089938402175903, |
| "step": 1451 |
| }, |
| { |
| "epoch": 0.8373702422145328, |
| "grad_norm": 1.5864981412887573, |
| "learning_rate": 0.00014421376393694733, |
| "loss": 1.2735176086425781, |
| "step": 1452 |
| }, |
| { |
| "epoch": 0.8379469434832757, |
| "grad_norm": 0.5966582298278809, |
| "learning_rate": 0.0001441753171856978, |
| "loss": 0.8211960196495056, |
| "step": 1453 |
| }, |
| { |
| "epoch": 0.8385236447520185, |
| "grad_norm": 0.6568999886512756, |
| "learning_rate": 0.0001441368704344483, |
| "loss": 0.9273509979248047, |
| "step": 1454 |
| }, |
| { |
| "epoch": 0.8391003460207612, |
| "grad_norm": 0.6672592163085938, |
| "learning_rate": 0.00014409842368319877, |
| "loss": 0.7854159474372864, |
| "step": 1455 |
| }, |
| { |
| "epoch": 0.839677047289504, |
| "grad_norm": 1.1119751930236816, |
| "learning_rate": 0.00014405997693194925, |
| "loss": 1.2850849628448486, |
| "step": 1456 |
| }, |
| { |
| "epoch": 0.8402537485582469, |
| "grad_norm": 0.8437113165855408, |
| "learning_rate": 0.00014402153018069975, |
| "loss": 0.9052360653877258, |
| "step": 1457 |
| }, |
| { |
| "epoch": 0.8408304498269896, |
| "grad_norm": 1.1120409965515137, |
| "learning_rate": 0.0001439830834294502, |
| "loss": 1.4261767864227295, |
| "step": 1458 |
| }, |
| { |
| "epoch": 0.8414071510957324, |
| "grad_norm": 0.6494320631027222, |
| "learning_rate": 0.0001439446366782007, |
| "loss": 0.8434788584709167, |
| "step": 1459 |
| }, |
| { |
| "epoch": 0.8419838523644751, |
| "grad_norm": 0.5622795820236206, |
| "learning_rate": 0.00014390618992695116, |
| "loss": 0.646868109703064, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.842560553633218, |
| "grad_norm": 0.8375677466392517, |
| "learning_rate": 0.00014386774317570167, |
| "loss": 1.0123827457427979, |
| "step": 1461 |
| }, |
| { |
| "epoch": 0.8431372549019608, |
| "grad_norm": 0.6013731956481934, |
| "learning_rate": 0.00014382929642445214, |
| "loss": 0.7129334211349487, |
| "step": 1462 |
| }, |
| { |
| "epoch": 0.8437139561707035, |
| "grad_norm": 0.7148757576942444, |
| "learning_rate": 0.00014379084967320262, |
| "loss": 0.7350738048553467, |
| "step": 1463 |
| }, |
| { |
| "epoch": 0.8442906574394463, |
| "grad_norm": 0.7380696535110474, |
| "learning_rate": 0.0001437524029219531, |
| "loss": 0.7962418794631958, |
| "step": 1464 |
| }, |
| { |
| "epoch": 0.8448673587081892, |
| "grad_norm": 0.6836022734642029, |
| "learning_rate": 0.00014371395617070358, |
| "loss": 1.0249385833740234, |
| "step": 1465 |
| }, |
| { |
| "epoch": 0.845444059976932, |
| "grad_norm": 0.8065418004989624, |
| "learning_rate": 0.00014367550941945406, |
| "loss": 1.0036308765411377, |
| "step": 1466 |
| }, |
| { |
| "epoch": 0.8460207612456747, |
| "grad_norm": 0.8336586356163025, |
| "learning_rate": 0.00014363706266820454, |
| "loss": 0.9442139863967896, |
| "step": 1467 |
| }, |
| { |
| "epoch": 0.8465974625144176, |
| "grad_norm": 0.9105651378631592, |
| "learning_rate": 0.00014359861591695501, |
| "loss": 1.198281168937683, |
| "step": 1468 |
| }, |
| { |
| "epoch": 0.8471741637831603, |
| "grad_norm": 0.6932002902030945, |
| "learning_rate": 0.0001435601691657055, |
| "loss": 0.76617431640625, |
| "step": 1469 |
| }, |
| { |
| "epoch": 0.8477508650519031, |
| "grad_norm": 0.6474612951278687, |
| "learning_rate": 0.00014352172241445597, |
| "loss": 0.9350631237030029, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.8483275663206459, |
| "grad_norm": 1.0232489109039307, |
| "learning_rate": 0.00014348327566320648, |
| "loss": 1.2790873050689697, |
| "step": 1471 |
| }, |
| { |
| "epoch": 0.8489042675893888, |
| "grad_norm": 0.5638800263404846, |
| "learning_rate": 0.00014344482891195695, |
| "loss": 0.6640872359275818, |
| "step": 1472 |
| }, |
| { |
| "epoch": 0.8494809688581315, |
| "grad_norm": 0.7060153484344482, |
| "learning_rate": 0.00014340638216070743, |
| "loss": 0.549694299697876, |
| "step": 1473 |
| }, |
| { |
| "epoch": 0.8500576701268743, |
| "grad_norm": 0.7553113698959351, |
| "learning_rate": 0.0001433679354094579, |
| "loss": 0.6748926639556885, |
| "step": 1474 |
| }, |
| { |
| "epoch": 0.850634371395617, |
| "grad_norm": 1.0750683546066284, |
| "learning_rate": 0.0001433294886582084, |
| "loss": 1.2567592859268188, |
| "step": 1475 |
| }, |
| { |
| "epoch": 0.8512110726643599, |
| "grad_norm": 0.8767377138137817, |
| "learning_rate": 0.00014329104190695887, |
| "loss": 0.8606712818145752, |
| "step": 1476 |
| }, |
| { |
| "epoch": 0.8517877739331027, |
| "grad_norm": 0.8583175539970398, |
| "learning_rate": 0.00014325259515570935, |
| "loss": 1.0961095094680786, |
| "step": 1477 |
| }, |
| { |
| "epoch": 0.8523644752018454, |
| "grad_norm": 0.8185640573501587, |
| "learning_rate": 0.00014321414840445982, |
| "loss": 0.9456279277801514, |
| "step": 1478 |
| }, |
| { |
| "epoch": 0.8529411764705882, |
| "grad_norm": 0.7922638058662415, |
| "learning_rate": 0.0001431757016532103, |
| "loss": 0.8527402281761169, |
| "step": 1479 |
| }, |
| { |
| "epoch": 0.8535178777393311, |
| "grad_norm": 0.8317216634750366, |
| "learning_rate": 0.00014313725490196078, |
| "loss": 1.0812233686447144, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.8540945790080738, |
| "grad_norm": 0.5592607855796814, |
| "learning_rate": 0.00014309880815071126, |
| "loss": 0.6856215000152588, |
| "step": 1481 |
| }, |
| { |
| "epoch": 0.8546712802768166, |
| "grad_norm": 0.6144684553146362, |
| "learning_rate": 0.00014306036139946174, |
| "loss": 0.8217105269432068, |
| "step": 1482 |
| }, |
| { |
| "epoch": 0.8552479815455594, |
| "grad_norm": 0.8721742630004883, |
| "learning_rate": 0.00014302191464821224, |
| "loss": 1.1268048286437988, |
| "step": 1483 |
| }, |
| { |
| "epoch": 0.8558246828143022, |
| "grad_norm": 0.7512510418891907, |
| "learning_rate": 0.00014298346789696272, |
| "loss": 0.7509297132492065, |
| "step": 1484 |
| }, |
| { |
| "epoch": 0.856401384083045, |
| "grad_norm": 0.7145662307739258, |
| "learning_rate": 0.0001429450211457132, |
| "loss": 0.787600040435791, |
| "step": 1485 |
| }, |
| { |
| "epoch": 0.8569780853517878, |
| "grad_norm": 0.5714643597602844, |
| "learning_rate": 0.00014290657439446368, |
| "loss": 0.5843244791030884, |
| "step": 1486 |
| }, |
| { |
| "epoch": 0.8575547866205305, |
| "grad_norm": 0.567432701587677, |
| "learning_rate": 0.00014286812764321416, |
| "loss": 0.5819793939590454, |
| "step": 1487 |
| }, |
| { |
| "epoch": 0.8581314878892734, |
| "grad_norm": 0.7957308888435364, |
| "learning_rate": 0.00014282968089196463, |
| "loss": 1.127239465713501, |
| "step": 1488 |
| }, |
| { |
| "epoch": 0.8587081891580162, |
| "grad_norm": 0.6828871369361877, |
| "learning_rate": 0.0001427912341407151, |
| "loss": 0.8339288234710693, |
| "step": 1489 |
| }, |
| { |
| "epoch": 0.8592848904267589, |
| "grad_norm": 0.6947774887084961, |
| "learning_rate": 0.0001427527873894656, |
| "loss": 0.8848856687545776, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.8598615916955017, |
| "grad_norm": 0.7703558802604675, |
| "learning_rate": 0.00014271434063821607, |
| "loss": 1.1964079141616821, |
| "step": 1491 |
| }, |
| { |
| "epoch": 0.8604382929642446, |
| "grad_norm": 0.9820204973220825, |
| "learning_rate": 0.00014267589388696655, |
| "loss": 1.3156203031539917, |
| "step": 1492 |
| }, |
| { |
| "epoch": 0.8610149942329873, |
| "grad_norm": 0.663357138633728, |
| "learning_rate": 0.00014263744713571703, |
| "loss": 1.1208245754241943, |
| "step": 1493 |
| }, |
| { |
| "epoch": 0.8615916955017301, |
| "grad_norm": 0.6204859018325806, |
| "learning_rate": 0.00014259900038446753, |
| "loss": 0.8412761688232422, |
| "step": 1494 |
| }, |
| { |
| "epoch": 0.8621683967704729, |
| "grad_norm": 0.8673816323280334, |
| "learning_rate": 0.000142560553633218, |
| "loss": 0.9236775040626526, |
| "step": 1495 |
| }, |
| { |
| "epoch": 0.8627450980392157, |
| "grad_norm": 0.6511439681053162, |
| "learning_rate": 0.0001425221068819685, |
| "loss": 0.8711351156234741, |
| "step": 1496 |
| }, |
| { |
| "epoch": 0.8633217993079585, |
| "grad_norm": 0.5167029500007629, |
| "learning_rate": 0.00014248366013071897, |
| "loss": 0.6116561889648438, |
| "step": 1497 |
| }, |
| { |
| "epoch": 0.8638985005767013, |
| "grad_norm": 0.6007522940635681, |
| "learning_rate": 0.00014244521337946944, |
| "loss": 0.7663001418113708, |
| "step": 1498 |
| }, |
| { |
| "epoch": 0.864475201845444, |
| "grad_norm": 0.5924880504608154, |
| "learning_rate": 0.00014240676662821992, |
| "loss": 0.6707437038421631, |
| "step": 1499 |
| }, |
| { |
| "epoch": 0.8650519031141869, |
| "grad_norm": 0.859641969203949, |
| "learning_rate": 0.0001423683198769704, |
| "loss": 1.0436668395996094, |
| "step": 1500 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 5202, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 9.361771665599693e+16, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|