| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 1645, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0006079027355623101, |
| "grad_norm": 44.818572998046875, |
| "learning_rate": 0.0, |
| "loss": 7.186539173126221, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0012158054711246201, |
| "grad_norm": 47.259071350097656, |
| "learning_rate": 1e-05, |
| "loss": 7.313593864440918, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.00182370820668693, |
| "grad_norm": 23.298837661743164, |
| "learning_rate": 2e-05, |
| "loss": 7.087122917175293, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.0024316109422492403, |
| "grad_norm": 13.535771369934082, |
| "learning_rate": 3e-05, |
| "loss": 6.942234992980957, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.00303951367781155, |
| "grad_norm": 11.997403144836426, |
| "learning_rate": 4e-05, |
| "loss": 6.6411614418029785, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.00364741641337386, |
| "grad_norm": 13.242263793945312, |
| "learning_rate": 5e-05, |
| "loss": 6.319230079650879, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.00425531914893617, |
| "grad_norm": 10.080074310302734, |
| "learning_rate": 6e-05, |
| "loss": 6.251328468322754, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.004863221884498481, |
| "grad_norm": 14.386478424072266, |
| "learning_rate": 7.000000000000001e-05, |
| "loss": 6.372805595397949, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.00547112462006079, |
| "grad_norm": 6.731114387512207, |
| "learning_rate": 8e-05, |
| "loss": 6.32672119140625, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.0060790273556231, |
| "grad_norm": 7.430361747741699, |
| "learning_rate": 8.999999999999999e-05, |
| "loss": 5.981637954711914, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.006686930091185411, |
| "grad_norm": 6.817004680633545, |
| "learning_rate": 0.0001, |
| "loss": 6.182029724121094, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.00729483282674772, |
| "grad_norm": 6.540442943572998, |
| "learning_rate": 0.00011, |
| "loss": 6.224725723266602, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.007902735562310031, |
| "grad_norm": 6.224416255950928, |
| "learning_rate": 0.00012, |
| "loss": 6.106351852416992, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.00851063829787234, |
| "grad_norm": 5.954357624053955, |
| "learning_rate": 0.00013000000000000002, |
| "loss": 6.050826072692871, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.00911854103343465, |
| "grad_norm": 5.7734551429748535, |
| "learning_rate": 0.00014000000000000001, |
| "loss": 6.147342681884766, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.009726443768996961, |
| "grad_norm": 6.399932861328125, |
| "learning_rate": 0.00015, |
| "loss": 6.284224510192871, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.01033434650455927, |
| "grad_norm": 4.2578558921813965, |
| "learning_rate": 0.00016, |
| "loss": 5.968033790588379, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.01094224924012158, |
| "grad_norm": 3.9558868408203125, |
| "learning_rate": 0.00017, |
| "loss": 5.909118175506592, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.011550151975683891, |
| "grad_norm": 3.4882659912109375, |
| "learning_rate": 0.00017999999999999998, |
| "loss": 6.045907974243164, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.0121580547112462, |
| "grad_norm": 6.301029682159424, |
| "learning_rate": 0.00019, |
| "loss": 5.905165672302246, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.01276595744680851, |
| "grad_norm": 3.891385078430176, |
| "learning_rate": 0.0002, |
| "loss": 5.9485931396484375, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.013373860182370821, |
| "grad_norm": 4.277671813964844, |
| "learning_rate": 0.00021, |
| "loss": 5.995012283325195, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.01398176291793313, |
| "grad_norm": 3.7930500507354736, |
| "learning_rate": 0.00022, |
| "loss": 6.081092834472656, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.01458966565349544, |
| "grad_norm": 5.02017879486084, |
| "learning_rate": 0.00023, |
| "loss": 6.232627868652344, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.015197568389057751, |
| "grad_norm": 3.485990285873413, |
| "learning_rate": 0.00024, |
| "loss": 6.189592361450195, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.015805471124620062, |
| "grad_norm": 4.133285999298096, |
| "learning_rate": 0.00025, |
| "loss": 5.953710079193115, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.01641337386018237, |
| "grad_norm": 4.140801429748535, |
| "learning_rate": 0.00026000000000000003, |
| "loss": 5.926338195800781, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.01702127659574468, |
| "grad_norm": 3.4010164737701416, |
| "learning_rate": 0.00027, |
| "loss": 5.7254462242126465, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.01762917933130699, |
| "grad_norm": 10.262829780578613, |
| "learning_rate": 0.00028000000000000003, |
| "loss": 6.183866500854492, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.0182370820668693, |
| "grad_norm": 4.732674598693848, |
| "learning_rate": 0.00029, |
| "loss": 5.899426460266113, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.01884498480243161, |
| "grad_norm": 4.868585109710693, |
| "learning_rate": 0.0003, |
| "loss": 5.8833699226379395, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.019452887537993922, |
| "grad_norm": 4.654231071472168, |
| "learning_rate": 0.00031, |
| "loss": 5.967190265655518, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.02006079027355623, |
| "grad_norm": 4.583294868469238, |
| "learning_rate": 0.00032, |
| "loss": 6.027661323547363, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.02066869300911854, |
| "grad_norm": 4.038606643676758, |
| "learning_rate": 0.00033, |
| "loss": 6.06468391418457, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.02127659574468085, |
| "grad_norm": 3.1677229404449463, |
| "learning_rate": 0.00034, |
| "loss": 5.97524881362915, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.02188449848024316, |
| "grad_norm": 4.171515941619873, |
| "learning_rate": 0.00035, |
| "loss": 5.981804370880127, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.022492401215805473, |
| "grad_norm": 5.382990837097168, |
| "learning_rate": 0.00035999999999999997, |
| "loss": 6.05380916595459, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.023100303951367782, |
| "grad_norm": 4.436893463134766, |
| "learning_rate": 0.00037, |
| "loss": 6.156210899353027, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.02370820668693009, |
| "grad_norm": 4.104293346405029, |
| "learning_rate": 0.00038, |
| "loss": 5.963473320007324, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.0243161094224924, |
| "grad_norm": 7.8225202560424805, |
| "learning_rate": 0.00039000000000000005, |
| "loss": 5.945594310760498, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.02492401215805471, |
| "grad_norm": 3.7115426063537598, |
| "learning_rate": 0.0004, |
| "loss": 5.866631984710693, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.02553191489361702, |
| "grad_norm": 3.377136468887329, |
| "learning_rate": 0.00041, |
| "loss": 5.87300968170166, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.026139817629179333, |
| "grad_norm": 3.0676238536834717, |
| "learning_rate": 0.00042, |
| "loss": 5.819428443908691, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.026747720364741642, |
| "grad_norm": 3.4088737964630127, |
| "learning_rate": 0.00043, |
| "loss": 5.686548709869385, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.02735562310030395, |
| "grad_norm": 4.599688529968262, |
| "learning_rate": 0.00044, |
| "loss": 6.143298149108887, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.02796352583586626, |
| "grad_norm": 3.1253559589385986, |
| "learning_rate": 0.00045000000000000004, |
| "loss": 5.965961933135986, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.02857142857142857, |
| "grad_norm": 3.3107733726501465, |
| "learning_rate": 0.00046, |
| "loss": 5.744629859924316, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.02917933130699088, |
| "grad_norm": 3.4835944175720215, |
| "learning_rate": 0.00047, |
| "loss": 5.963787078857422, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.029787234042553193, |
| "grad_norm": 4.766516208648682, |
| "learning_rate": 0.00048, |
| "loss": 5.903127670288086, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.030395136778115502, |
| "grad_norm": 3.4444823265075684, |
| "learning_rate": 0.00049, |
| "loss": 5.898875713348389, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.03100303951367781, |
| "grad_norm": 3.4199633598327637, |
| "learning_rate": 0.0005, |
| "loss": 5.995363235473633, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.031610942249240125, |
| "grad_norm": 4.609949111938477, |
| "learning_rate": 0.0005, |
| "loss": 5.867133140563965, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.03221884498480243, |
| "grad_norm": 2.445003032684326, |
| "learning_rate": 0.0005, |
| "loss": 5.596291542053223, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.03282674772036474, |
| "grad_norm": 7.065042972564697, |
| "learning_rate": 0.0005, |
| "loss": 5.764184951782227, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.03343465045592705, |
| "grad_norm": 3.3624749183654785, |
| "learning_rate": 0.0005, |
| "loss": 5.835771560668945, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.03404255319148936, |
| "grad_norm": 2.667015790939331, |
| "learning_rate": 0.0005, |
| "loss": 5.9446611404418945, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.034650455927051675, |
| "grad_norm": 3.2562549114227295, |
| "learning_rate": 0.0005, |
| "loss": 6.190652370452881, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.03525835866261398, |
| "grad_norm": 3.5651185512542725, |
| "learning_rate": 0.0005, |
| "loss": 5.877089500427246, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.035866261398176294, |
| "grad_norm": 2.6607139110565186, |
| "learning_rate": 0.0005, |
| "loss": 5.947436332702637, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.0364741641337386, |
| "grad_norm": 2.5586416721343994, |
| "learning_rate": 0.0005, |
| "loss": 6.041194915771484, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.03708206686930091, |
| "grad_norm": 3.5156543254852295, |
| "learning_rate": 0.0005, |
| "loss": 5.8784284591674805, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.03768996960486322, |
| "grad_norm": 2.013105630874634, |
| "learning_rate": 0.0005, |
| "loss": 5.705929756164551, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.03829787234042553, |
| "grad_norm": 2.2044196128845215, |
| "learning_rate": 0.0005, |
| "loss": 5.775040626525879, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.038905775075987845, |
| "grad_norm": 3.8432488441467285, |
| "learning_rate": 0.0005, |
| "loss": 5.757482528686523, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.03951367781155015, |
| "grad_norm": 2.794318437576294, |
| "learning_rate": 0.0005, |
| "loss": 5.4956865310668945, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.04012158054711246, |
| "grad_norm": 5.635376930236816, |
| "learning_rate": 0.0005, |
| "loss": 5.950571060180664, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.04072948328267477, |
| "grad_norm": 2.8366096019744873, |
| "learning_rate": 0.0005, |
| "loss": 5.937989711761475, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.04133738601823708, |
| "grad_norm": 4.0585455894470215, |
| "learning_rate": 0.0005, |
| "loss": 6.175616264343262, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.041945288753799395, |
| "grad_norm": 2.4633665084838867, |
| "learning_rate": 0.0005, |
| "loss": 5.856078147888184, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.0425531914893617, |
| "grad_norm": 2.900541305541992, |
| "learning_rate": 0.0005, |
| "loss": 5.562302112579346, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.043161094224924014, |
| "grad_norm": 2.1582231521606445, |
| "learning_rate": 0.0005, |
| "loss": 5.853466033935547, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.04376899696048632, |
| "grad_norm": 2.823076009750366, |
| "learning_rate": 0.0005, |
| "loss": 5.676411151885986, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.04437689969604863, |
| "grad_norm": 3.4227182865142822, |
| "learning_rate": 0.0005, |
| "loss": 5.687357425689697, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.044984802431610946, |
| "grad_norm": 2.4039175510406494, |
| "learning_rate": 0.0005, |
| "loss": 5.892976760864258, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.04559270516717325, |
| "grad_norm": 2.6830098628997803, |
| "learning_rate": 0.0005, |
| "loss": 5.66058349609375, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.046200607902735565, |
| "grad_norm": 2.413268566131592, |
| "learning_rate": 0.0005, |
| "loss": 5.7166547775268555, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.04680851063829787, |
| "grad_norm": 2.110560894012451, |
| "learning_rate": 0.0005, |
| "loss": 5.578657150268555, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.04741641337386018, |
| "grad_norm": 2.293944835662842, |
| "learning_rate": 0.0005, |
| "loss": 5.830209732055664, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.04802431610942249, |
| "grad_norm": 2.3141164779663086, |
| "learning_rate": 0.0005, |
| "loss": 5.730184555053711, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.0486322188449848, |
| "grad_norm": 2.4202141761779785, |
| "learning_rate": 0.0005, |
| "loss": 5.657958030700684, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.049240121580547115, |
| "grad_norm": 2.1450300216674805, |
| "learning_rate": 0.0005, |
| "loss": 5.734421253204346, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.04984802431610942, |
| "grad_norm": 2.340426206588745, |
| "learning_rate": 0.0005, |
| "loss": 5.912275314331055, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.050455927051671734, |
| "grad_norm": 2.2572286128997803, |
| "learning_rate": 0.0005, |
| "loss": 6.227065086364746, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.05106382978723404, |
| "grad_norm": 1.9745402336120605, |
| "learning_rate": 0.0005, |
| "loss": 5.538962364196777, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.05167173252279635, |
| "grad_norm": 1.8350422382354736, |
| "learning_rate": 0.0005, |
| "loss": 5.68572998046875, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.052279635258358666, |
| "grad_norm": 1.4099390506744385, |
| "learning_rate": 0.0005, |
| "loss": 5.548061370849609, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.05288753799392097, |
| "grad_norm": 1.7324459552764893, |
| "learning_rate": 0.0005, |
| "loss": 5.791088104248047, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.053495440729483285, |
| "grad_norm": 2.2765917778015137, |
| "learning_rate": 0.0005, |
| "loss": 5.66319465637207, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.05410334346504559, |
| "grad_norm": 1.8931759595870972, |
| "learning_rate": 0.0005, |
| "loss": 5.931559085845947, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.0547112462006079, |
| "grad_norm": 3.1260805130004883, |
| "learning_rate": 0.0005, |
| "loss": 5.887214183807373, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.05531914893617021, |
| "grad_norm": 2.076260805130005, |
| "learning_rate": 0.0005, |
| "loss": 5.837953567504883, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.05592705167173252, |
| "grad_norm": 2.6507105827331543, |
| "learning_rate": 0.0005, |
| "loss": 5.720830917358398, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.056534954407294835, |
| "grad_norm": 1.761267900466919, |
| "learning_rate": 0.0005, |
| "loss": 5.8046417236328125, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.05714285714285714, |
| "grad_norm": 2.158432722091675, |
| "learning_rate": 0.0005, |
| "loss": 5.530825614929199, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.057750759878419454, |
| "grad_norm": 1.8743107318878174, |
| "learning_rate": 0.0005, |
| "loss": 5.851261138916016, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.05835866261398176, |
| "grad_norm": 2.2951159477233887, |
| "learning_rate": 0.0005, |
| "loss": 5.754410743713379, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.05896656534954407, |
| "grad_norm": 1.6710808277130127, |
| "learning_rate": 0.0005, |
| "loss": 5.511685371398926, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.059574468085106386, |
| "grad_norm": 2.4671308994293213, |
| "learning_rate": 0.0005, |
| "loss": 5.762502193450928, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.06018237082066869, |
| "grad_norm": 1.7344735860824585, |
| "learning_rate": 0.0005, |
| "loss": 5.726058006286621, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.060790273556231005, |
| "grad_norm": 1.9786497354507446, |
| "learning_rate": 0.0005, |
| "loss": 5.570637226104736, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.06139817629179331, |
| "grad_norm": 1.672898769378662, |
| "learning_rate": 0.0005, |
| "loss": 5.4022722244262695, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.06200607902735562, |
| "grad_norm": 1.975422978401184, |
| "learning_rate": 0.0005, |
| "loss": 5.58085823059082, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.06261398176291794, |
| "grad_norm": 1.6185539960861206, |
| "learning_rate": 0.0005, |
| "loss": 5.551645755767822, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.06322188449848025, |
| "grad_norm": 1.6963152885437012, |
| "learning_rate": 0.0005, |
| "loss": 5.634788990020752, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.06382978723404255, |
| "grad_norm": 1.6010147333145142, |
| "learning_rate": 0.0005, |
| "loss": 5.439291954040527, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.06443768996960486, |
| "grad_norm": 1.4918285608291626, |
| "learning_rate": 0.0005, |
| "loss": 5.595495700836182, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.06504559270516717, |
| "grad_norm": 1.7921746969223022, |
| "learning_rate": 0.0005, |
| "loss": 5.7882080078125, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.06565349544072949, |
| "grad_norm": 1.6905741691589355, |
| "learning_rate": 0.0005, |
| "loss": 5.6724653244018555, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.0662613981762918, |
| "grad_norm": 1.5293573141098022, |
| "learning_rate": 0.0005, |
| "loss": 5.407555103302002, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.0668693009118541, |
| "grad_norm": 1.3903565406799316, |
| "learning_rate": 0.0005, |
| "loss": 5.763338565826416, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.06747720364741641, |
| "grad_norm": 1.6731656789779663, |
| "learning_rate": 0.0005, |
| "loss": 5.656299591064453, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.06808510638297872, |
| "grad_norm": 1.6174890995025635, |
| "learning_rate": 0.0005, |
| "loss": 5.728058815002441, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.06869300911854104, |
| "grad_norm": 1.9111192226409912, |
| "learning_rate": 0.0005, |
| "loss": 5.569175720214844, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.06930091185410335, |
| "grad_norm": 1.397756576538086, |
| "learning_rate": 0.0005, |
| "loss": 5.692349433898926, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.06990881458966565, |
| "grad_norm": 1.4280520677566528, |
| "learning_rate": 0.0005, |
| "loss": 5.366017818450928, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.07051671732522796, |
| "grad_norm": 2.1756176948547363, |
| "learning_rate": 0.0005, |
| "loss": 5.529537677764893, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.07112462006079028, |
| "grad_norm": 1.6855345964431763, |
| "learning_rate": 0.0005, |
| "loss": 5.3663010597229, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.07173252279635259, |
| "grad_norm": 1.3849018812179565, |
| "learning_rate": 0.0005, |
| "loss": 5.661293983459473, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.07234042553191489, |
| "grad_norm": 1.5399678945541382, |
| "learning_rate": 0.0005, |
| "loss": 5.681015968322754, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.0729483282674772, |
| "grad_norm": 1.3474847078323364, |
| "learning_rate": 0.0005, |
| "loss": 5.404428482055664, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.07355623100303951, |
| "grad_norm": 1.4353671073913574, |
| "learning_rate": 0.0005, |
| "loss": 5.621041297912598, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.07416413373860183, |
| "grad_norm": 1.385099172592163, |
| "learning_rate": 0.0005, |
| "loss": 5.410789489746094, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.07477203647416414, |
| "grad_norm": 1.5382664203643799, |
| "learning_rate": 0.0005, |
| "loss": 5.401933670043945, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.07537993920972644, |
| "grad_norm": 1.48553466796875, |
| "learning_rate": 0.0005, |
| "loss": 5.547571182250977, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.07598784194528875, |
| "grad_norm": 1.3798505067825317, |
| "learning_rate": 0.0005, |
| "loss": 5.5776872634887695, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.07659574468085106, |
| "grad_norm": 1.863465428352356, |
| "learning_rate": 0.0005, |
| "loss": 5.570428371429443, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.07720364741641338, |
| "grad_norm": 1.7337578535079956, |
| "learning_rate": 0.0005, |
| "loss": 5.60271692276001, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.07781155015197569, |
| "grad_norm": 1.7129346132278442, |
| "learning_rate": 0.0005, |
| "loss": 5.655090808868408, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.07841945288753799, |
| "grad_norm": 1.8253934383392334, |
| "learning_rate": 0.0005, |
| "loss": 5.726884841918945, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.0790273556231003, |
| "grad_norm": 1.493262529373169, |
| "learning_rate": 0.0005, |
| "loss": 5.307271957397461, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.07963525835866261, |
| "grad_norm": 1.9851430654525757, |
| "learning_rate": 0.0005, |
| "loss": 5.40402889251709, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.08024316109422493, |
| "grad_norm": 1.4382926225662231, |
| "learning_rate": 0.0005, |
| "loss": 5.55129337310791, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.08085106382978724, |
| "grad_norm": 2.1384055614471436, |
| "learning_rate": 0.0005, |
| "loss": 5.42939567565918, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.08145896656534954, |
| "grad_norm": 1.5483143329620361, |
| "learning_rate": 0.0005, |
| "loss": 5.495145797729492, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.08206686930091185, |
| "grad_norm": 1.6180500984191895, |
| "learning_rate": 0.0005, |
| "loss": 5.596287727355957, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.08267477203647416, |
| "grad_norm": 1.6833781003952026, |
| "learning_rate": 0.0005, |
| "loss": 5.704960346221924, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.08328267477203648, |
| "grad_norm": 1.731799602508545, |
| "learning_rate": 0.0005, |
| "loss": 5.343502998352051, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.08389057750759879, |
| "grad_norm": 1.7854918241500854, |
| "learning_rate": 0.0005, |
| "loss": 5.647939205169678, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.08449848024316109, |
| "grad_norm": 1.2474077939987183, |
| "learning_rate": 0.0005, |
| "loss": 5.360551834106445, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.0851063829787234, |
| "grad_norm": 5.299109935760498, |
| "learning_rate": 0.0005, |
| "loss": 5.383178234100342, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.08571428571428572, |
| "grad_norm": 2.591733694076538, |
| "learning_rate": 0.0005, |
| "loss": 5.623793601989746, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.08632218844984803, |
| "grad_norm": 1.5868524312973022, |
| "learning_rate": 0.0005, |
| "loss": 5.522441864013672, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.08693009118541034, |
| "grad_norm": 1.752677083015442, |
| "learning_rate": 0.0005, |
| "loss": 5.5086774826049805, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.08753799392097264, |
| "grad_norm": 1.5863618850708008, |
| "learning_rate": 0.0005, |
| "loss": 5.492759704589844, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.08814589665653495, |
| "grad_norm": 1.4941948652267456, |
| "learning_rate": 0.0005, |
| "loss": 5.475063323974609, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.08875379939209727, |
| "grad_norm": 1.5351965427398682, |
| "learning_rate": 0.0005, |
| "loss": 5.511392593383789, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.08936170212765958, |
| "grad_norm": 1.5566837787628174, |
| "learning_rate": 0.0005, |
| "loss": 5.4525909423828125, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.08996960486322189, |
| "grad_norm": 1.5408483743667603, |
| "learning_rate": 0.0005, |
| "loss": 5.592557430267334, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.09057750759878419, |
| "grad_norm": 1.3915044069290161, |
| "learning_rate": 0.0005, |
| "loss": 5.68109130859375, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.0911854103343465, |
| "grad_norm": 1.4081814289093018, |
| "learning_rate": 0.0005, |
| "loss": 5.310542106628418, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.09179331306990882, |
| "grad_norm": 1.368977427482605, |
| "learning_rate": 0.0005, |
| "loss": 5.590452194213867, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.09240121580547113, |
| "grad_norm": 1.7604471445083618, |
| "learning_rate": 0.0005, |
| "loss": 5.2881550788879395, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.09300911854103343, |
| "grad_norm": 1.2718323469161987, |
| "learning_rate": 0.0005, |
| "loss": 5.228243827819824, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.09361702127659574, |
| "grad_norm": 1.853657841682434, |
| "learning_rate": 0.0005, |
| "loss": 5.344303131103516, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.09422492401215805, |
| "grad_norm": 1.2742729187011719, |
| "learning_rate": 0.0005, |
| "loss": 5.602327346801758, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.09483282674772037, |
| "grad_norm": 1.3428983688354492, |
| "learning_rate": 0.0005, |
| "loss": 5.564847469329834, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.09544072948328268, |
| "grad_norm": 1.307673454284668, |
| "learning_rate": 0.0005, |
| "loss": 5.5293378829956055, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.09604863221884498, |
| "grad_norm": 1.2413536310195923, |
| "learning_rate": 0.0005, |
| "loss": 5.751148223876953, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.09665653495440729, |
| "grad_norm": 1.5207955837249756, |
| "learning_rate": 0.0005, |
| "loss": 5.464879989624023, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.0972644376899696, |
| "grad_norm": 1.2123122215270996, |
| "learning_rate": 0.0005, |
| "loss": 5.438077926635742, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.09787234042553192, |
| "grad_norm": 1.420456051826477, |
| "learning_rate": 0.0005, |
| "loss": 5.586366176605225, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.09848024316109423, |
| "grad_norm": 1.2411231994628906, |
| "learning_rate": 0.0005, |
| "loss": 5.465837478637695, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.09908814589665653, |
| "grad_norm": 1.4124112129211426, |
| "learning_rate": 0.0005, |
| "loss": 5.58890438079834, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.09969604863221884, |
| "grad_norm": 1.421832799911499, |
| "learning_rate": 0.0005, |
| "loss": 5.211925029754639, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.10030395136778116, |
| "grad_norm": 1.4735937118530273, |
| "learning_rate": 0.0005, |
| "loss": 5.542084693908691, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.10091185410334347, |
| "grad_norm": 1.2726881504058838, |
| "learning_rate": 0.0005, |
| "loss": 5.566733360290527, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.10151975683890578, |
| "grad_norm": 1.3275830745697021, |
| "learning_rate": 0.0005, |
| "loss": 5.730228424072266, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.10212765957446808, |
| "grad_norm": 1.6597068309783936, |
| "learning_rate": 0.0005, |
| "loss": 5.339101791381836, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.10273556231003039, |
| "grad_norm": 1.46490478515625, |
| "learning_rate": 0.0005, |
| "loss": 5.410638809204102, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.1033434650455927, |
| "grad_norm": 1.3094699382781982, |
| "learning_rate": 0.0005, |
| "loss": 5.219968318939209, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.10395136778115502, |
| "grad_norm": 1.4983205795288086, |
| "learning_rate": 0.0005, |
| "loss": 5.392378330230713, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.10455927051671733, |
| "grad_norm": 1.517512559890747, |
| "learning_rate": 0.0005, |
| "loss": 5.38358736038208, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.10516717325227963, |
| "grad_norm": 1.5345962047576904, |
| "learning_rate": 0.0005, |
| "loss": 5.368213653564453, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.10577507598784194, |
| "grad_norm": 1.1318706274032593, |
| "learning_rate": 0.0005, |
| "loss": 5.639193534851074, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.10638297872340426, |
| "grad_norm": 1.3089977502822876, |
| "learning_rate": 0.0005, |
| "loss": 5.508517265319824, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.10699088145896657, |
| "grad_norm": 1.16405189037323, |
| "learning_rate": 0.0005, |
| "loss": 5.238767623901367, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.10759878419452888, |
| "grad_norm": 1.318361759185791, |
| "learning_rate": 0.0005, |
| "loss": 5.591005325317383, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.10820668693009118, |
| "grad_norm": 1.7068839073181152, |
| "learning_rate": 0.0005, |
| "loss": 5.138769149780273, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.1088145896656535, |
| "grad_norm": 1.4426335096359253, |
| "learning_rate": 0.0005, |
| "loss": 5.406965255737305, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.1094224924012158, |
| "grad_norm": 1.3298251628875732, |
| "learning_rate": 0.0005, |
| "loss": 5.486334323883057, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.11003039513677812, |
| "grad_norm": 1.2703888416290283, |
| "learning_rate": 0.0005, |
| "loss": 5.543169021606445, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.11063829787234042, |
| "grad_norm": 1.0853707790374756, |
| "learning_rate": 0.0005, |
| "loss": 5.2396135330200195, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.11124620060790273, |
| "grad_norm": 1.283922553062439, |
| "learning_rate": 0.0005, |
| "loss": 5.168734550476074, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.11185410334346504, |
| "grad_norm": 1.4008558988571167, |
| "learning_rate": 0.0005, |
| "loss": 5.464504241943359, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.11246200607902736, |
| "grad_norm": 1.6104100942611694, |
| "learning_rate": 0.0005, |
| "loss": 5.350894927978516, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.11306990881458967, |
| "grad_norm": 1.1095637083053589, |
| "learning_rate": 0.0005, |
| "loss": 5.330683708190918, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.11367781155015197, |
| "grad_norm": 1.3298522233963013, |
| "learning_rate": 0.0005, |
| "loss": 5.376528739929199, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.11428571428571428, |
| "grad_norm": 1.4511582851409912, |
| "learning_rate": 0.0005, |
| "loss": 5.49576473236084, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.1148936170212766, |
| "grad_norm": 1.4968204498291016, |
| "learning_rate": 0.0005, |
| "loss": 5.232635021209717, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.11550151975683891, |
| "grad_norm": 1.2423769235610962, |
| "learning_rate": 0.0005, |
| "loss": 5.456453323364258, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.11610942249240122, |
| "grad_norm": 1.2642461061477661, |
| "learning_rate": 0.0005, |
| "loss": 5.673423767089844, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.11671732522796352, |
| "grad_norm": 1.6604862213134766, |
| "learning_rate": 0.0005, |
| "loss": 5.230939865112305, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.11732522796352583, |
| "grad_norm": 1.4601672887802124, |
| "learning_rate": 0.0005, |
| "loss": 5.308025360107422, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.11793313069908815, |
| "grad_norm": 1.66468346118927, |
| "learning_rate": 0.0005, |
| "loss": 5.50089168548584, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.11854103343465046, |
| "grad_norm": 1.4034700393676758, |
| "learning_rate": 0.0005, |
| "loss": 5.4229583740234375, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.11914893617021277, |
| "grad_norm": 1.3911566734313965, |
| "learning_rate": 0.0005, |
| "loss": 5.266064643859863, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.11975683890577507, |
| "grad_norm": 1.5582391023635864, |
| "learning_rate": 0.0005, |
| "loss": 5.215412616729736, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.12036474164133738, |
| "grad_norm": 1.4908430576324463, |
| "learning_rate": 0.0005, |
| "loss": 5.305833339691162, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.1209726443768997, |
| "grad_norm": 1.4207631349563599, |
| "learning_rate": 0.0005, |
| "loss": 5.2746734619140625, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.12158054711246201, |
| "grad_norm": 1.5322375297546387, |
| "learning_rate": 0.0005, |
| "loss": 5.160092353820801, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.12218844984802432, |
| "grad_norm": 1.538822889328003, |
| "learning_rate": 0.0005, |
| "loss": 5.2349467277526855, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.12279635258358662, |
| "grad_norm": 1.487720251083374, |
| "learning_rate": 0.0005, |
| "loss": 5.305604934692383, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.12340425531914893, |
| "grad_norm": 1.402201771736145, |
| "learning_rate": 0.0005, |
| "loss": 5.271785736083984, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.12401215805471125, |
| "grad_norm": 1.4523091316223145, |
| "learning_rate": 0.0005, |
| "loss": 5.260416030883789, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.12462006079027356, |
| "grad_norm": 1.3056803941726685, |
| "learning_rate": 0.0005, |
| "loss": 5.221076488494873, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.12522796352583587, |
| "grad_norm": 1.4249091148376465, |
| "learning_rate": 0.0005, |
| "loss": 5.13364839553833, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.12583586626139817, |
| "grad_norm": 1.417321801185608, |
| "learning_rate": 0.0005, |
| "loss": 5.294346332550049, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.1264437689969605, |
| "grad_norm": 1.3512288331985474, |
| "learning_rate": 0.0005, |
| "loss": 5.273685455322266, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.1270516717325228, |
| "grad_norm": 1.53708016872406, |
| "learning_rate": 0.0005, |
| "loss": 5.160931587219238, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.1276595744680851, |
| "grad_norm": 1.3125845193862915, |
| "learning_rate": 0.0005, |
| "loss": 5.472460746765137, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.12826747720364742, |
| "grad_norm": 1.6518676280975342, |
| "learning_rate": 0.0005, |
| "loss": 5.4825568199157715, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.12887537993920972, |
| "grad_norm": 1.203003168106079, |
| "learning_rate": 0.0005, |
| "loss": 5.11652946472168, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.12948328267477205, |
| "grad_norm": 1.3805352449417114, |
| "learning_rate": 0.0005, |
| "loss": 5.366741180419922, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.13009118541033435, |
| "grad_norm": 1.8709197044372559, |
| "learning_rate": 0.0005, |
| "loss": 5.435246467590332, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.13069908814589665, |
| "grad_norm": 1.7283586263656616, |
| "learning_rate": 0.0005, |
| "loss": 5.202251434326172, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.13130699088145897, |
| "grad_norm": 1.2809170484542847, |
| "learning_rate": 0.0005, |
| "loss": 5.283895492553711, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.13191489361702127, |
| "grad_norm": 1.249645709991455, |
| "learning_rate": 0.0005, |
| "loss": 5.123793601989746, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.1325227963525836, |
| "grad_norm": 1.3356451988220215, |
| "learning_rate": 0.0005, |
| "loss": 5.174809455871582, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.1331306990881459, |
| "grad_norm": 1.139381766319275, |
| "learning_rate": 0.0005, |
| "loss": 5.0811967849731445, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.1337386018237082, |
| "grad_norm": 1.2006030082702637, |
| "learning_rate": 0.0005, |
| "loss": 5.268994331359863, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.13434650455927052, |
| "grad_norm": 1.2994015216827393, |
| "learning_rate": 0.0005, |
| "loss": 5.426079750061035, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.13495440729483282, |
| "grad_norm": 1.0793324708938599, |
| "learning_rate": 0.0005, |
| "loss": 5.424633979797363, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.13556231003039515, |
| "grad_norm": 1.1271226406097412, |
| "learning_rate": 0.0005, |
| "loss": 5.310846328735352, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.13617021276595745, |
| "grad_norm": 1.1775165796279907, |
| "learning_rate": 0.0005, |
| "loss": 5.071159839630127, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.13677811550151975, |
| "grad_norm": 1.1077218055725098, |
| "learning_rate": 0.0005, |
| "loss": 5.208876609802246, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.13738601823708207, |
| "grad_norm": 1.3281017541885376, |
| "learning_rate": 0.0005, |
| "loss": 5.371927261352539, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.13799392097264437, |
| "grad_norm": 1.4999650716781616, |
| "learning_rate": 0.0005, |
| "loss": 5.17914342880249, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.1386018237082067, |
| "grad_norm": 1.2213531732559204, |
| "learning_rate": 0.0005, |
| "loss": 5.079235076904297, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.139209726443769, |
| "grad_norm": 1.409624695777893, |
| "learning_rate": 0.0005, |
| "loss": 5.218929767608643, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.1398176291793313, |
| "grad_norm": 1.2914072275161743, |
| "learning_rate": 0.0005, |
| "loss": 5.254355430603027, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.14042553191489363, |
| "grad_norm": 1.27825927734375, |
| "learning_rate": 0.0005, |
| "loss": 5.02869987487793, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.14103343465045592, |
| "grad_norm": 1.367679238319397, |
| "learning_rate": 0.0005, |
| "loss": 5.032447814941406, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.14164133738601822, |
| "grad_norm": 1.1813191175460815, |
| "learning_rate": 0.0005, |
| "loss": 5.181385040283203, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.14224924012158055, |
| "grad_norm": 1.385109305381775, |
| "learning_rate": 0.0005, |
| "loss": 5.294610977172852, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.14285714285714285, |
| "grad_norm": 1.2544500827789307, |
| "learning_rate": 0.0005, |
| "loss": 5.046303749084473, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.14346504559270518, |
| "grad_norm": 1.487121820449829, |
| "learning_rate": 0.0005, |
| "loss": 5.523983001708984, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.14407294832826747, |
| "grad_norm": 1.263445258140564, |
| "learning_rate": 0.0005, |
| "loss": 5.192383289337158, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.14468085106382977, |
| "grad_norm": 1.0454970598220825, |
| "learning_rate": 0.0005, |
| "loss": 5.0029120445251465, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.1452887537993921, |
| "grad_norm": 1.131041407585144, |
| "learning_rate": 0.0005, |
| "loss": 5.140591144561768, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.1458966565349544, |
| "grad_norm": 1.3271952867507935, |
| "learning_rate": 0.0005, |
| "loss": 5.232538223266602, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.14650455927051673, |
| "grad_norm": 1.2867931127548218, |
| "learning_rate": 0.0005, |
| "loss": 5.288295745849609, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.14711246200607903, |
| "grad_norm": 1.2857162952423096, |
| "learning_rate": 0.0005, |
| "loss": 4.999725341796875, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.14772036474164132, |
| "grad_norm": 1.308387279510498, |
| "learning_rate": 0.0005, |
| "loss": 5.332901477813721, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.14832826747720365, |
| "grad_norm": 1.431774377822876, |
| "learning_rate": 0.0005, |
| "loss": 5.33701753616333, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.14893617021276595, |
| "grad_norm": 1.2257990837097168, |
| "learning_rate": 0.0005, |
| "loss": 5.286837100982666, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.14954407294832828, |
| "grad_norm": 1.2497832775115967, |
| "learning_rate": 0.0005, |
| "loss": 5.060267448425293, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.15015197568389058, |
| "grad_norm": 1.3174192905426025, |
| "learning_rate": 0.0005, |
| "loss": 5.460453987121582, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.15075987841945288, |
| "grad_norm": 1.2937954664230347, |
| "learning_rate": 0.0005, |
| "loss": 5.300616264343262, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.1513677811550152, |
| "grad_norm": 1.1722848415374756, |
| "learning_rate": 0.0005, |
| "loss": 5.289948463439941, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.1519756838905775, |
| "grad_norm": 1.365752100944519, |
| "learning_rate": 0.0005, |
| "loss": 5.077818870544434, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.15258358662613983, |
| "grad_norm": 1.2099617719650269, |
| "learning_rate": 0.0005, |
| "loss": 5.033614635467529, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.15319148936170213, |
| "grad_norm": 1.3854937553405762, |
| "learning_rate": 0.0005, |
| "loss": 5.019617080688477, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.15379939209726443, |
| "grad_norm": 1.3792158365249634, |
| "learning_rate": 0.0005, |
| "loss": 5.079125881195068, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.15440729483282675, |
| "grad_norm": 1.1149134635925293, |
| "learning_rate": 0.0005, |
| "loss": 5.06775426864624, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.15501519756838905, |
| "grad_norm": 1.4162288904190063, |
| "learning_rate": 0.0005, |
| "loss": 5.29591178894043, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.15562310030395138, |
| "grad_norm": 1.298060417175293, |
| "learning_rate": 0.0005, |
| "loss": 5.090610504150391, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.15623100303951368, |
| "grad_norm": 1.1845481395721436, |
| "learning_rate": 0.0005, |
| "loss": 5.00084114074707, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.15683890577507598, |
| "grad_norm": 1.1649361848831177, |
| "learning_rate": 0.0005, |
| "loss": 5.0191168785095215, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.1574468085106383, |
| "grad_norm": 1.1649863719940186, |
| "learning_rate": 0.0005, |
| "loss": 4.924384117126465, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.1580547112462006, |
| "grad_norm": 1.305981159210205, |
| "learning_rate": 0.0005, |
| "loss": 5.208071708679199, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.15866261398176293, |
| "grad_norm": 1.1375975608825684, |
| "learning_rate": 0.0005, |
| "loss": 5.07304048538208, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.15927051671732523, |
| "grad_norm": 1.570008635520935, |
| "learning_rate": 0.0005, |
| "loss": 5.2816667556762695, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.15987841945288753, |
| "grad_norm": 1.168481469154358, |
| "learning_rate": 0.0005, |
| "loss": 5.156436920166016, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.16048632218844985, |
| "grad_norm": 1.17093026638031, |
| "learning_rate": 0.0005, |
| "loss": 5.264464378356934, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.16109422492401215, |
| "grad_norm": 1.1767195463180542, |
| "learning_rate": 0.0005, |
| "loss": 5.278616905212402, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.16170212765957448, |
| "grad_norm": 1.2456096410751343, |
| "learning_rate": 0.0005, |
| "loss": 5.296989440917969, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.16231003039513678, |
| "grad_norm": 1.260128140449524, |
| "learning_rate": 0.0005, |
| "loss": 5.161136150360107, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.16291793313069908, |
| "grad_norm": 1.3702967166900635, |
| "learning_rate": 0.0005, |
| "loss": 5.2522077560424805, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.1635258358662614, |
| "grad_norm": 1.1898664236068726, |
| "learning_rate": 0.0005, |
| "loss": 5.138284683227539, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.1641337386018237, |
| "grad_norm": 1.586888074874878, |
| "learning_rate": 0.0005, |
| "loss": 4.960643291473389, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.16474164133738603, |
| "grad_norm": 1.2508625984191895, |
| "learning_rate": 0.0005, |
| "loss": 5.2589569091796875, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.16534954407294833, |
| "grad_norm": 1.1662089824676514, |
| "learning_rate": 0.0005, |
| "loss": 5.264585494995117, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.16595744680851063, |
| "grad_norm": 1.2917591333389282, |
| "learning_rate": 0.0005, |
| "loss": 4.975507736206055, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.16656534954407295, |
| "grad_norm": 1.0556538105010986, |
| "learning_rate": 0.0005, |
| "loss": 5.047136306762695, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.16717325227963525, |
| "grad_norm": 1.0959351062774658, |
| "learning_rate": 0.0005, |
| "loss": 5.063904762268066, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.16778115501519758, |
| "grad_norm": 1.0194965600967407, |
| "learning_rate": 0.0005, |
| "loss": 5.230169296264648, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.16838905775075988, |
| "grad_norm": 1.326802372932434, |
| "learning_rate": 0.0005, |
| "loss": 5.127433776855469, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.16899696048632218, |
| "grad_norm": 1.17707097530365, |
| "learning_rate": 0.0005, |
| "loss": 5.209277153015137, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.1696048632218845, |
| "grad_norm": 0.9115813970565796, |
| "learning_rate": 0.0005, |
| "loss": 5.025136470794678, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.1702127659574468, |
| "grad_norm": 1.1245434284210205, |
| "learning_rate": 0.0005, |
| "loss": 5.057619094848633, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.17082066869300913, |
| "grad_norm": 1.3757452964782715, |
| "learning_rate": 0.0005, |
| "loss": 4.920927047729492, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.17142857142857143, |
| "grad_norm": 1.4696053266525269, |
| "learning_rate": 0.0005, |
| "loss": 5.1536760330200195, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.17203647416413373, |
| "grad_norm": 1.2874000072479248, |
| "learning_rate": 0.0005, |
| "loss": 5.050880432128906, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.17264437689969606, |
| "grad_norm": 1.2090721130371094, |
| "learning_rate": 0.0005, |
| "loss": 5.024714469909668, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.17325227963525835, |
| "grad_norm": 1.3489820957183838, |
| "learning_rate": 0.0005, |
| "loss": 5.124329090118408, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.17386018237082068, |
| "grad_norm": 1.055483102798462, |
| "learning_rate": 0.0005, |
| "loss": 4.890225887298584, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.17446808510638298, |
| "grad_norm": 1.2479093074798584, |
| "learning_rate": 0.0005, |
| "loss": 4.835631370544434, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.17507598784194528, |
| "grad_norm": 1.1899778842926025, |
| "learning_rate": 0.0005, |
| "loss": 5.027457237243652, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.1756838905775076, |
| "grad_norm": 1.1618897914886475, |
| "learning_rate": 0.0005, |
| "loss": 5.145232677459717, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.1762917933130699, |
| "grad_norm": 1.2332507371902466, |
| "learning_rate": 0.0005, |
| "loss": 5.138116359710693, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.17689969604863223, |
| "grad_norm": 1.1276404857635498, |
| "learning_rate": 0.0005, |
| "loss": 5.094466209411621, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.17750759878419453, |
| "grad_norm": 1.4890656471252441, |
| "learning_rate": 0.0005, |
| "loss": 4.797001838684082, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.17811550151975683, |
| "grad_norm": 1.0490905046463013, |
| "learning_rate": 0.0005, |
| "loss": 5.235766410827637, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.17872340425531916, |
| "grad_norm": 1.1675019264221191, |
| "learning_rate": 0.0005, |
| "loss": 4.964472770690918, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.17933130699088146, |
| "grad_norm": 0.9588620662689209, |
| "learning_rate": 0.0005, |
| "loss": 5.124715805053711, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.17993920972644378, |
| "grad_norm": 1.3892091512680054, |
| "learning_rate": 0.0005, |
| "loss": 4.847377300262451, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.18054711246200608, |
| "grad_norm": 1.1051721572875977, |
| "learning_rate": 0.0005, |
| "loss": 5.199601173400879, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.18115501519756838, |
| "grad_norm": 1.0869505405426025, |
| "learning_rate": 0.0005, |
| "loss": 5.3870697021484375, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.1817629179331307, |
| "grad_norm": 1.111187219619751, |
| "learning_rate": 0.0005, |
| "loss": 5.190181732177734, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.182370820668693, |
| "grad_norm": 1.2440016269683838, |
| "learning_rate": 0.0005, |
| "loss": 5.041322231292725, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.1829787234042553, |
| "grad_norm": 1.2418692111968994, |
| "learning_rate": 0.0005, |
| "loss": 5.212306022644043, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.18358662613981763, |
| "grad_norm": 1.2612659931182861, |
| "learning_rate": 0.0005, |
| "loss": 4.961835861206055, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.18419452887537993, |
| "grad_norm": 1.1162973642349243, |
| "learning_rate": 0.0005, |
| "loss": 4.950830936431885, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.18480243161094226, |
| "grad_norm": 1.144067406654358, |
| "learning_rate": 0.0005, |
| "loss": 4.8998637199401855, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.18541033434650456, |
| "grad_norm": 1.2814747095108032, |
| "learning_rate": 0.0005, |
| "loss": 5.224381446838379, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.18601823708206686, |
| "grad_norm": 1.3770310878753662, |
| "learning_rate": 0.0005, |
| "loss": 5.05579137802124, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.18662613981762918, |
| "grad_norm": 1.5116229057312012, |
| "learning_rate": 0.0005, |
| "loss": 5.082482814788818, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.18723404255319148, |
| "grad_norm": 1.0909713506698608, |
| "learning_rate": 0.0005, |
| "loss": 4.967124938964844, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.1878419452887538, |
| "grad_norm": 1.1027607917785645, |
| "learning_rate": 0.0005, |
| "loss": 5.00374698638916, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.1884498480243161, |
| "grad_norm": 1.238652229309082, |
| "learning_rate": 0.0005, |
| "loss": 4.993183135986328, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.1890577507598784, |
| "grad_norm": 1.0609782934188843, |
| "learning_rate": 0.0005, |
| "loss": 5.019218444824219, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.18966565349544073, |
| "grad_norm": 1.1945058107376099, |
| "learning_rate": 0.0005, |
| "loss": 5.068751335144043, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.19027355623100303, |
| "grad_norm": 1.2640782594680786, |
| "learning_rate": 0.0005, |
| "loss": 5.185402870178223, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.19088145896656536, |
| "grad_norm": 1.0532907247543335, |
| "learning_rate": 0.0005, |
| "loss": 5.222114562988281, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.19148936170212766, |
| "grad_norm": 1.0423952341079712, |
| "learning_rate": 0.0005, |
| "loss": 5.1693806648254395, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.19209726443768996, |
| "grad_norm": 1.0700887441635132, |
| "learning_rate": 0.0005, |
| "loss": 5.0217485427856445, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.19270516717325228, |
| "grad_norm": 1.2595866918563843, |
| "learning_rate": 0.0005, |
| "loss": 5.231429576873779, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.19331306990881458, |
| "grad_norm": 1.1495158672332764, |
| "learning_rate": 0.0005, |
| "loss": 5.015372276306152, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.1939209726443769, |
| "grad_norm": 1.3977763652801514, |
| "learning_rate": 0.0005, |
| "loss": 5.323009490966797, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.1945288753799392, |
| "grad_norm": 1.4009697437286377, |
| "learning_rate": 0.0005, |
| "loss": 5.2833638191223145, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.1951367781155015, |
| "grad_norm": 1.1618447303771973, |
| "learning_rate": 0.0005, |
| "loss": 5.064535140991211, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.19574468085106383, |
| "grad_norm": 1.1447522640228271, |
| "learning_rate": 0.0005, |
| "loss": 4.99235725402832, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.19635258358662613, |
| "grad_norm": 1.2342157363891602, |
| "learning_rate": 0.0005, |
| "loss": 5.036558151245117, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.19696048632218846, |
| "grad_norm": 1.2487186193466187, |
| "learning_rate": 0.0005, |
| "loss": 5.207220077514648, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.19756838905775076, |
| "grad_norm": 1.4693067073822021, |
| "learning_rate": 0.0005, |
| "loss": 5.096504211425781, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.19817629179331306, |
| "grad_norm": 1.1707696914672852, |
| "learning_rate": 0.0005, |
| "loss": 5.003598213195801, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.19878419452887539, |
| "grad_norm": 0.9728778600692749, |
| "learning_rate": 0.0005, |
| "loss": 4.8744659423828125, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.19939209726443768, |
| "grad_norm": 1.383410096168518, |
| "learning_rate": 0.0005, |
| "loss": 5.1511383056640625, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 1.0482876300811768, |
| "learning_rate": 0.0005, |
| "loss": 5.014847755432129, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.2006079027355623, |
| "grad_norm": 1.2320209741592407, |
| "learning_rate": 0.0005, |
| "loss": 4.923969745635986, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.2012158054711246, |
| "grad_norm": 2.013617753982544, |
| "learning_rate": 0.0005, |
| "loss": 4.876163482666016, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.20182370820668694, |
| "grad_norm": 1.4123047590255737, |
| "learning_rate": 0.0005, |
| "loss": 4.870320796966553, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.20243161094224923, |
| "grad_norm": 0.9998598694801331, |
| "learning_rate": 0.0005, |
| "loss": 4.8142805099487305, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.20303951367781156, |
| "grad_norm": 1.255579948425293, |
| "learning_rate": 0.0005, |
| "loss": 5.134385108947754, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.20364741641337386, |
| "grad_norm": 1.1863816976547241, |
| "learning_rate": 0.0005, |
| "loss": 4.943517208099365, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.20425531914893616, |
| "grad_norm": 1.3125497102737427, |
| "learning_rate": 0.0005, |
| "loss": 4.835733413696289, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.2048632218844985, |
| "grad_norm": 1.330944538116455, |
| "learning_rate": 0.0005, |
| "loss": 4.996496200561523, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.20547112462006079, |
| "grad_norm": 1.4103339910507202, |
| "learning_rate": 0.0005, |
| "loss": 5.215001106262207, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.2060790273556231, |
| "grad_norm": 1.1276763677597046, |
| "learning_rate": 0.0005, |
| "loss": 5.080985069274902, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.2066869300911854, |
| "grad_norm": 1.2522611618041992, |
| "learning_rate": 0.0005, |
| "loss": 5.1337480545043945, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.2072948328267477, |
| "grad_norm": 1.0622775554656982, |
| "learning_rate": 0.0005, |
| "loss": 5.139281272888184, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.20790273556231004, |
| "grad_norm": 1.2667897939682007, |
| "learning_rate": 0.0005, |
| "loss": 4.985269546508789, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.20851063829787234, |
| "grad_norm": 1.2665342092514038, |
| "learning_rate": 0.0005, |
| "loss": 4.907642841339111, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.20911854103343466, |
| "grad_norm": 1.2670104503631592, |
| "learning_rate": 0.0005, |
| "loss": 4.9238739013671875, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.20972644376899696, |
| "grad_norm": 1.3876585960388184, |
| "learning_rate": 0.0005, |
| "loss": 5.280843734741211, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.21033434650455926, |
| "grad_norm": 1.172425389289856, |
| "learning_rate": 0.0005, |
| "loss": 5.018771171569824, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.2109422492401216, |
| "grad_norm": 1.057332158088684, |
| "learning_rate": 0.0005, |
| "loss": 4.957630157470703, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.2115501519756839, |
| "grad_norm": 1.2106921672821045, |
| "learning_rate": 0.0005, |
| "loss": 5.079224109649658, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.2121580547112462, |
| "grad_norm": 1.2184040546417236, |
| "learning_rate": 0.0005, |
| "loss": 4.923876762390137, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.2127659574468085, |
| "grad_norm": 1.3889566659927368, |
| "learning_rate": 0.0005, |
| "loss": 5.0445098876953125, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.2133738601823708, |
| "grad_norm": 1.1836071014404297, |
| "learning_rate": 0.0005, |
| "loss": 4.762534141540527, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.21398176291793314, |
| "grad_norm": 1.2222967147827148, |
| "learning_rate": 0.0005, |
| "loss": 5.045120716094971, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.21458966565349544, |
| "grad_norm": 1.203317403793335, |
| "learning_rate": 0.0005, |
| "loss": 5.027883052825928, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.21519756838905776, |
| "grad_norm": 1.118275761604309, |
| "learning_rate": 0.0005, |
| "loss": 5.153387069702148, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.21580547112462006, |
| "grad_norm": 1.1502918004989624, |
| "learning_rate": 0.0005, |
| "loss": 4.907447814941406, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.21641337386018236, |
| "grad_norm": 0.916477620601654, |
| "learning_rate": 0.0005, |
| "loss": 4.913633346557617, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.2170212765957447, |
| "grad_norm": 0.9976673722267151, |
| "learning_rate": 0.0005, |
| "loss": 4.855230331420898, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.217629179331307, |
| "grad_norm": 1.2301874160766602, |
| "learning_rate": 0.0005, |
| "loss": 5.274983882904053, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.21823708206686931, |
| "grad_norm": 1.268349051475525, |
| "learning_rate": 0.0005, |
| "loss": 4.990891933441162, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.2188449848024316, |
| "grad_norm": 1.7098944187164307, |
| "learning_rate": 0.0005, |
| "loss": 5.0019989013671875, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.2194528875379939, |
| "grad_norm": 1.3171290159225464, |
| "learning_rate": 0.0005, |
| "loss": 5.091225624084473, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.22006079027355624, |
| "grad_norm": 1.1964459419250488, |
| "learning_rate": 0.0005, |
| "loss": 4.942023754119873, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.22066869300911854, |
| "grad_norm": 1.212193250656128, |
| "learning_rate": 0.0005, |
| "loss": 4.842243194580078, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.22127659574468084, |
| "grad_norm": 1.2447597980499268, |
| "learning_rate": 0.0005, |
| "loss": 4.891105651855469, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.22188449848024316, |
| "grad_norm": 1.0322506427764893, |
| "learning_rate": 0.0005, |
| "loss": 5.083103179931641, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.22249240121580546, |
| "grad_norm": 1.1431292295455933, |
| "learning_rate": 0.0005, |
| "loss": 5.104142189025879, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.2231003039513678, |
| "grad_norm": 1.1028327941894531, |
| "learning_rate": 0.0005, |
| "loss": 4.933050632476807, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.2237082066869301, |
| "grad_norm": 0.9712069630622864, |
| "learning_rate": 0.0005, |
| "loss": 4.821019172668457, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.2243161094224924, |
| "grad_norm": 1.063249111175537, |
| "learning_rate": 0.0005, |
| "loss": 4.972682476043701, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.22492401215805471, |
| "grad_norm": 1.1715357303619385, |
| "learning_rate": 0.0005, |
| "loss": 5.0836591720581055, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.225531914893617, |
| "grad_norm": 1.128483772277832, |
| "learning_rate": 0.0005, |
| "loss": 5.094054698944092, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.22613981762917934, |
| "grad_norm": 1.2616199254989624, |
| "learning_rate": 0.0005, |
| "loss": 4.991359710693359, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.22674772036474164, |
| "grad_norm": 1.2140382528305054, |
| "learning_rate": 0.0005, |
| "loss": 4.7401838302612305, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.22735562310030394, |
| "grad_norm": 1.1435750722885132, |
| "learning_rate": 0.0005, |
| "loss": 5.093307971954346, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.22796352583586627, |
| "grad_norm": 1.0213854312896729, |
| "learning_rate": 0.0005, |
| "loss": 4.898110389709473, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.22857142857142856, |
| "grad_norm": 1.6159358024597168, |
| "learning_rate": 0.0005, |
| "loss": 4.884780406951904, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.2291793313069909, |
| "grad_norm": 1.0451385974884033, |
| "learning_rate": 0.0005, |
| "loss": 5.046623229980469, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.2297872340425532, |
| "grad_norm": 1.0726312398910522, |
| "learning_rate": 0.0005, |
| "loss": 5.3511962890625, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.2303951367781155, |
| "grad_norm": 1.1179200410842896, |
| "learning_rate": 0.0005, |
| "loss": 4.847324371337891, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.23100303951367782, |
| "grad_norm": 1.1474509239196777, |
| "learning_rate": 0.0005, |
| "loss": 4.830921173095703, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.23161094224924011, |
| "grad_norm": 1.0454329252243042, |
| "learning_rate": 0.0005, |
| "loss": 4.962401390075684, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.23221884498480244, |
| "grad_norm": 1.214348316192627, |
| "learning_rate": 0.0005, |
| "loss": 4.800313472747803, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.23282674772036474, |
| "grad_norm": 1.18563973903656, |
| "learning_rate": 0.0005, |
| "loss": 4.8629655838012695, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.23343465045592704, |
| "grad_norm": 1.0595086812973022, |
| "learning_rate": 0.0005, |
| "loss": 4.9949750900268555, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.23404255319148937, |
| "grad_norm": 1.0595086812973022, |
| "learning_rate": 0.0005, |
| "loss": 4.926072597503662, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.23465045592705167, |
| "grad_norm": 1.1770035028457642, |
| "learning_rate": 0.0005, |
| "loss": 4.766304969787598, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.235258358662614, |
| "grad_norm": 1.1117204427719116, |
| "learning_rate": 0.0005, |
| "loss": 4.896605968475342, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.2358662613981763, |
| "grad_norm": 1.2087441682815552, |
| "learning_rate": 0.0005, |
| "loss": 4.892548084259033, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.2364741641337386, |
| "grad_norm": 0.9041852355003357, |
| "learning_rate": 0.0005, |
| "loss": 4.948829650878906, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.23708206686930092, |
| "grad_norm": 0.94862300157547, |
| "learning_rate": 0.0005, |
| "loss": 4.8753533363342285, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.23768996960486322, |
| "grad_norm": 1.055679202079773, |
| "learning_rate": 0.0005, |
| "loss": 4.816287994384766, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.23829787234042554, |
| "grad_norm": 1.413857340812683, |
| "learning_rate": 0.0005, |
| "loss": 4.809457778930664, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.23890577507598784, |
| "grad_norm": 1.326051950454712, |
| "learning_rate": 0.0005, |
| "loss": 5.0313568115234375, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.23951367781155014, |
| "grad_norm": 1.2621649503707886, |
| "learning_rate": 0.0005, |
| "loss": 4.906643867492676, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.24012158054711247, |
| "grad_norm": 1.2217754125595093, |
| "learning_rate": 0.0005, |
| "loss": 4.929527759552002, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.24072948328267477, |
| "grad_norm": 1.1450992822647095, |
| "learning_rate": 0.0005, |
| "loss": 4.908195495605469, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.2413373860182371, |
| "grad_norm": 1.4507970809936523, |
| "learning_rate": 0.0005, |
| "loss": 5.079260349273682, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.2419452887537994, |
| "grad_norm": 1.086036205291748, |
| "learning_rate": 0.0005, |
| "loss": 4.996855735778809, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.2425531914893617, |
| "grad_norm": 1.0666170120239258, |
| "learning_rate": 0.0005, |
| "loss": 5.002256393432617, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.24316109422492402, |
| "grad_norm": 1.199183702468872, |
| "learning_rate": 0.0005, |
| "loss": 5.217647552490234, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.24376899696048632, |
| "grad_norm": 1.156293511390686, |
| "learning_rate": 0.0005, |
| "loss": 4.900952339172363, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.24437689969604864, |
| "grad_norm": 1.3151594400405884, |
| "learning_rate": 0.0005, |
| "loss": 4.980197906494141, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.24498480243161094, |
| "grad_norm": 1.0817885398864746, |
| "learning_rate": 0.0005, |
| "loss": 4.745031356811523, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.24559270516717324, |
| "grad_norm": 1.0003957748413086, |
| "learning_rate": 0.0005, |
| "loss": 4.599782466888428, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.24620060790273557, |
| "grad_norm": 0.95441734790802, |
| "learning_rate": 0.0005, |
| "loss": 4.928730010986328, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.24680851063829787, |
| "grad_norm": 1.1539515256881714, |
| "learning_rate": 0.0005, |
| "loss": 5.01755428314209, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.2474164133738602, |
| "grad_norm": 1.1274021863937378, |
| "learning_rate": 0.0005, |
| "loss": 4.92464542388916, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.2480243161094225, |
| "grad_norm": 1.075126051902771, |
| "learning_rate": 0.0005, |
| "loss": 4.842813014984131, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.2486322188449848, |
| "grad_norm": 1.1200828552246094, |
| "learning_rate": 0.0005, |
| "loss": 4.701647758483887, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.24924012158054712, |
| "grad_norm": 1.349135398864746, |
| "learning_rate": 0.0005, |
| "loss": 5.124917030334473, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.24984802431610942, |
| "grad_norm": 1.403590440750122, |
| "learning_rate": 0.0005, |
| "loss": 5.070537567138672, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.25045592705167175, |
| "grad_norm": 0.9664301872253418, |
| "learning_rate": 0.0005, |
| "loss": 4.846314430236816, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.251063829787234, |
| "grad_norm": 1.1642309427261353, |
| "learning_rate": 0.0005, |
| "loss": 4.933165550231934, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.25167173252279634, |
| "grad_norm": 1.1649516820907593, |
| "learning_rate": 0.0005, |
| "loss": 4.789491653442383, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.25227963525835867, |
| "grad_norm": 1.1041150093078613, |
| "learning_rate": 0.0005, |
| "loss": 4.580702781677246, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.252887537993921, |
| "grad_norm": 1.0078331232070923, |
| "learning_rate": 0.0005, |
| "loss": 4.77386999130249, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.25349544072948327, |
| "grad_norm": 1.0907591581344604, |
| "learning_rate": 0.0005, |
| "loss": 4.774503707885742, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.2541033434650456, |
| "grad_norm": 1.3880425691604614, |
| "learning_rate": 0.0005, |
| "loss": 4.793880462646484, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.2547112462006079, |
| "grad_norm": 1.2313039302825928, |
| "learning_rate": 0.0005, |
| "loss": 4.7932891845703125, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.2553191489361702, |
| "grad_norm": 0.9940412044525146, |
| "learning_rate": 0.0005, |
| "loss": 5.119372367858887, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.2559270516717325, |
| "grad_norm": 1.0474408864974976, |
| "learning_rate": 0.0005, |
| "loss": 4.940298080444336, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.25653495440729485, |
| "grad_norm": 1.091572642326355, |
| "learning_rate": 0.0005, |
| "loss": 4.824063777923584, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.2571428571428571, |
| "grad_norm": 0.9919223189353943, |
| "learning_rate": 0.0005, |
| "loss": 4.823666572570801, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.25775075987841944, |
| "grad_norm": 0.9640527963638306, |
| "learning_rate": 0.0005, |
| "loss": 4.798361778259277, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.25835866261398177, |
| "grad_norm": 1.0292719602584839, |
| "learning_rate": 0.0005, |
| "loss": 4.69101619720459, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.2589665653495441, |
| "grad_norm": 1.2390789985656738, |
| "learning_rate": 0.0005, |
| "loss": 4.671029090881348, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.25957446808510637, |
| "grad_norm": 1.2008142471313477, |
| "learning_rate": 0.0005, |
| "loss": 4.796487331390381, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.2601823708206687, |
| "grad_norm": 1.0405327081680298, |
| "learning_rate": 0.0005, |
| "loss": 4.8557820320129395, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.260790273556231, |
| "grad_norm": 1.042792558670044, |
| "learning_rate": 0.0005, |
| "loss": 4.805086135864258, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.2613981762917933, |
| "grad_norm": 1.6039878129959106, |
| "learning_rate": 0.0005, |
| "loss": 4.892642974853516, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.2620060790273556, |
| "grad_norm": 1.0221588611602783, |
| "learning_rate": 0.0005, |
| "loss": 4.868304252624512, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.26261398176291795, |
| "grad_norm": 1.0673880577087402, |
| "learning_rate": 0.0005, |
| "loss": 4.52126932144165, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.2632218844984802, |
| "grad_norm": 1.1782925128936768, |
| "learning_rate": 0.0005, |
| "loss": 4.9915618896484375, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.26382978723404255, |
| "grad_norm": 0.9004169702529907, |
| "learning_rate": 0.0005, |
| "loss": 5.040285110473633, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.26443768996960487, |
| "grad_norm": 1.1495839357376099, |
| "learning_rate": 0.0005, |
| "loss": 4.991700172424316, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.2650455927051672, |
| "grad_norm": 1.4188427925109863, |
| "learning_rate": 0.0005, |
| "loss": 4.851819038391113, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.26565349544072947, |
| "grad_norm": 1.1886249780654907, |
| "learning_rate": 0.0005, |
| "loss": 4.819738388061523, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.2662613981762918, |
| "grad_norm": 1.0886558294296265, |
| "learning_rate": 0.0005, |
| "loss": 4.889862537384033, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.2668693009118541, |
| "grad_norm": 1.215423822402954, |
| "learning_rate": 0.0005, |
| "loss": 4.66435432434082, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.2674772036474164, |
| "grad_norm": 1.2564237117767334, |
| "learning_rate": 0.0005, |
| "loss": 4.840651512145996, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.2680851063829787, |
| "grad_norm": 0.9406836628913879, |
| "learning_rate": 0.0005, |
| "loss": 4.836145401000977, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.26869300911854105, |
| "grad_norm": 0.9963774085044861, |
| "learning_rate": 0.0005, |
| "loss": 4.879360675811768, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.2693009118541033, |
| "grad_norm": 1.349959135055542, |
| "learning_rate": 0.0005, |
| "loss": 5.149614334106445, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.26990881458966565, |
| "grad_norm": 1.0401732921600342, |
| "learning_rate": 0.0005, |
| "loss": 4.831120491027832, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.270516717325228, |
| "grad_norm": 1.0176857709884644, |
| "learning_rate": 0.0005, |
| "loss": 4.795515060424805, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.2711246200607903, |
| "grad_norm": 1.025748610496521, |
| "learning_rate": 0.0005, |
| "loss": 4.850000381469727, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.27173252279635257, |
| "grad_norm": 1.179107904434204, |
| "learning_rate": 0.0005, |
| "loss": 4.714792728424072, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.2723404255319149, |
| "grad_norm": 1.0913288593292236, |
| "learning_rate": 0.0005, |
| "loss": 4.713229656219482, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.2729483282674772, |
| "grad_norm": 1.2143056392669678, |
| "learning_rate": 0.0005, |
| "loss": 4.776023864746094, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.2735562310030395, |
| "grad_norm": 1.0799494981765747, |
| "learning_rate": 0.0005, |
| "loss": 4.930194854736328, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.2741641337386018, |
| "grad_norm": 1.108874797821045, |
| "learning_rate": 0.0005, |
| "loss": 4.798364162445068, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.27477203647416415, |
| "grad_norm": 1.023545742034912, |
| "learning_rate": 0.0005, |
| "loss": 4.951462745666504, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.2753799392097264, |
| "grad_norm": 1.109633207321167, |
| "learning_rate": 0.0005, |
| "loss": 4.775464057922363, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.27598784194528875, |
| "grad_norm": 1.3409186601638794, |
| "learning_rate": 0.0005, |
| "loss": 4.637991905212402, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.2765957446808511, |
| "grad_norm": 1.3562052249908447, |
| "learning_rate": 0.0005, |
| "loss": 4.67308235168457, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.2772036474164134, |
| "grad_norm": 1.0121145248413086, |
| "learning_rate": 0.0005, |
| "loss": 4.8010430335998535, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.2778115501519757, |
| "grad_norm": 1.1394174098968506, |
| "learning_rate": 0.0005, |
| "loss": 4.878546237945557, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.278419452887538, |
| "grad_norm": 1.2403444051742554, |
| "learning_rate": 0.0005, |
| "loss": 4.8740434646606445, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.2790273556231003, |
| "grad_norm": 1.242672085762024, |
| "learning_rate": 0.0005, |
| "loss": 4.854490280151367, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.2796352583586626, |
| "grad_norm": 1.1986356973648071, |
| "learning_rate": 0.0005, |
| "loss": 4.629700660705566, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.2802431610942249, |
| "grad_norm": 1.0786645412445068, |
| "learning_rate": 0.0005, |
| "loss": 4.87874698638916, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.28085106382978725, |
| "grad_norm": 1.1056885719299316, |
| "learning_rate": 0.0005, |
| "loss": 4.816555023193359, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.2814589665653495, |
| "grad_norm": 1.2329976558685303, |
| "learning_rate": 0.0005, |
| "loss": 4.837638854980469, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.28206686930091185, |
| "grad_norm": 1.0028218030929565, |
| "learning_rate": 0.0005, |
| "loss": 4.760637283325195, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.2826747720364742, |
| "grad_norm": 2.1149895191192627, |
| "learning_rate": 0.0005, |
| "loss": 4.90034818649292, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.28328267477203645, |
| "grad_norm": 1.1582082509994507, |
| "learning_rate": 0.0005, |
| "loss": 4.943870544433594, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.2838905775075988, |
| "grad_norm": 1.069417119026184, |
| "learning_rate": 0.0005, |
| "loss": 4.872045993804932, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.2844984802431611, |
| "grad_norm": 1.0112608671188354, |
| "learning_rate": 0.0005, |
| "loss": 4.7598490715026855, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.2851063829787234, |
| "grad_norm": 1.2075181007385254, |
| "learning_rate": 0.0005, |
| "loss": 4.731328010559082, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.2857142857142857, |
| "grad_norm": 1.2083991765975952, |
| "learning_rate": 0.0005, |
| "loss": 4.927289962768555, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.286322188449848, |
| "grad_norm": 1.1168643236160278, |
| "learning_rate": 0.0005, |
| "loss": 4.864751815795898, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.28693009118541035, |
| "grad_norm": 1.078041434288025, |
| "learning_rate": 0.0005, |
| "loss": 4.8492431640625, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.2875379939209726, |
| "grad_norm": 1.1274940967559814, |
| "learning_rate": 0.0005, |
| "loss": 4.937112808227539, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.28814589665653495, |
| "grad_norm": 1.0653259754180908, |
| "learning_rate": 0.0005, |
| "loss": 4.594569683074951, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.2887537993920973, |
| "grad_norm": 1.1258432865142822, |
| "learning_rate": 0.0005, |
| "loss": 4.773998260498047, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.28936170212765955, |
| "grad_norm": 1.0394357442855835, |
| "learning_rate": 0.0005, |
| "loss": 4.6821393966674805, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.2899696048632219, |
| "grad_norm": 0.9899529218673706, |
| "learning_rate": 0.0005, |
| "loss": 4.887704849243164, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.2905775075987842, |
| "grad_norm": 1.1077382564544678, |
| "learning_rate": 0.0005, |
| "loss": 4.747071266174316, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.29118541033434653, |
| "grad_norm": 1.1913772821426392, |
| "learning_rate": 0.0005, |
| "loss": 4.718881607055664, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.2917933130699088, |
| "grad_norm": 1.0459861755371094, |
| "learning_rate": 0.0005, |
| "loss": 4.841939926147461, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.2924012158054711, |
| "grad_norm": 1.0120186805725098, |
| "learning_rate": 0.0005, |
| "loss": 4.599112510681152, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.29300911854103345, |
| "grad_norm": 1.195823073387146, |
| "learning_rate": 0.0005, |
| "loss": 4.728496551513672, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.2936170212765957, |
| "grad_norm": 1.3696142435073853, |
| "learning_rate": 0.0005, |
| "loss": 4.8885321617126465, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.29422492401215805, |
| "grad_norm": 1.0792248249053955, |
| "learning_rate": 0.0005, |
| "loss": 4.971987724304199, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.2948328267477204, |
| "grad_norm": 1.1619709730148315, |
| "learning_rate": 0.0005, |
| "loss": 4.571520805358887, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.29544072948328265, |
| "grad_norm": 1.0330854654312134, |
| "learning_rate": 0.0005, |
| "loss": 4.9688520431518555, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.296048632218845, |
| "grad_norm": 1.0170172452926636, |
| "learning_rate": 0.0005, |
| "loss": 4.837705135345459, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.2966565349544073, |
| "grad_norm": 0.9504514932632446, |
| "learning_rate": 0.0005, |
| "loss": 4.930578231811523, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.29726443768996963, |
| "grad_norm": 1.0397839546203613, |
| "learning_rate": 0.0005, |
| "loss": 4.835279941558838, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.2978723404255319, |
| "grad_norm": 1.1507797241210938, |
| "learning_rate": 0.0005, |
| "loss": 4.659822463989258, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.2984802431610942, |
| "grad_norm": 1.0850329399108887, |
| "learning_rate": 0.0005, |
| "loss": 4.845378875732422, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.29908814589665655, |
| "grad_norm": 0.9977235794067383, |
| "learning_rate": 0.0005, |
| "loss": 4.6792449951171875, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.2996960486322188, |
| "grad_norm": 1.1023447513580322, |
| "learning_rate": 0.0005, |
| "loss": 4.397878646850586, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.30030395136778115, |
| "grad_norm": 1.151859998703003, |
| "learning_rate": 0.0005, |
| "loss": 4.909426689147949, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.3009118541033435, |
| "grad_norm": 0.9461018443107605, |
| "learning_rate": 0.0005, |
| "loss": 4.778614044189453, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.30151975683890575, |
| "grad_norm": 1.0753334760665894, |
| "learning_rate": 0.0005, |
| "loss": 4.747906684875488, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.3021276595744681, |
| "grad_norm": 1.1790133714675903, |
| "learning_rate": 0.0005, |
| "loss": 4.932548522949219, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.3027355623100304, |
| "grad_norm": 0.9537319540977478, |
| "learning_rate": 0.0005, |
| "loss": 4.962670803070068, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.30334346504559273, |
| "grad_norm": 1.0915073156356812, |
| "learning_rate": 0.0005, |
| "loss": 4.60493278503418, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.303951367781155, |
| "grad_norm": 1.1177006959915161, |
| "learning_rate": 0.0005, |
| "loss": 4.69853401184082, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.30455927051671733, |
| "grad_norm": 1.297899842262268, |
| "learning_rate": 0.0005, |
| "loss": 4.779489517211914, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.30516717325227966, |
| "grad_norm": 1.0834105014801025, |
| "learning_rate": 0.0005, |
| "loss": 4.795891761779785, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.3057750759878419, |
| "grad_norm": 1.345795750617981, |
| "learning_rate": 0.0005, |
| "loss": 4.725937843322754, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.30638297872340425, |
| "grad_norm": 1.0314546823501587, |
| "learning_rate": 0.0005, |
| "loss": 4.679283142089844, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.3069908814589666, |
| "grad_norm": 1.0348689556121826, |
| "learning_rate": 0.0005, |
| "loss": 4.620650291442871, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.30759878419452885, |
| "grad_norm": 1.266882061958313, |
| "learning_rate": 0.0005, |
| "loss": 4.773314476013184, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.3082066869300912, |
| "grad_norm": 1.1243505477905273, |
| "learning_rate": 0.0005, |
| "loss": 4.748200416564941, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.3088145896656535, |
| "grad_norm": 1.1018924713134766, |
| "learning_rate": 0.0005, |
| "loss": 4.68126106262207, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.30942249240121583, |
| "grad_norm": 0.9563927054405212, |
| "learning_rate": 0.0005, |
| "loss": 4.857057094573975, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.3100303951367781, |
| "grad_norm": 0.9670454263687134, |
| "learning_rate": 0.0005, |
| "loss": 4.659792900085449, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.31063829787234043, |
| "grad_norm": 1.3360145092010498, |
| "learning_rate": 0.0005, |
| "loss": 4.829246520996094, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.31124620060790276, |
| "grad_norm": 1.2123932838439941, |
| "learning_rate": 0.0005, |
| "loss": 4.866283416748047, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.31185410334346503, |
| "grad_norm": 1.1718541383743286, |
| "learning_rate": 0.0005, |
| "loss": 4.582745552062988, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.31246200607902735, |
| "grad_norm": 1.0925103425979614, |
| "learning_rate": 0.0005, |
| "loss": 4.792252540588379, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.3130699088145897, |
| "grad_norm": 1.1929430961608887, |
| "learning_rate": 0.0005, |
| "loss": 5.072274208068848, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.31367781155015195, |
| "grad_norm": 1.1033862829208374, |
| "learning_rate": 0.0005, |
| "loss": 5.100406646728516, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.3142857142857143, |
| "grad_norm": 1.0984266996383667, |
| "learning_rate": 0.0005, |
| "loss": 4.652458190917969, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.3148936170212766, |
| "grad_norm": 1.1322665214538574, |
| "learning_rate": 0.0005, |
| "loss": 4.757636070251465, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.31550151975683893, |
| "grad_norm": 1.062367558479309, |
| "learning_rate": 0.0005, |
| "loss": 4.769024848937988, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.3161094224924012, |
| "grad_norm": 1.2141786813735962, |
| "learning_rate": 0.0005, |
| "loss": 4.795253753662109, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.31671732522796353, |
| "grad_norm": 1.0612986087799072, |
| "learning_rate": 0.0005, |
| "loss": 4.869831562042236, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.31732522796352586, |
| "grad_norm": 1.0063875913619995, |
| "learning_rate": 0.0005, |
| "loss": 4.789008617401123, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.31793313069908813, |
| "grad_norm": 1.1345361471176147, |
| "learning_rate": 0.0005, |
| "loss": 4.858623504638672, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.31854103343465046, |
| "grad_norm": 1.0883427858352661, |
| "learning_rate": 0.0005, |
| "loss": 4.6939568519592285, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.3191489361702128, |
| "grad_norm": 1.210877776145935, |
| "learning_rate": 0.0005, |
| "loss": 4.860000133514404, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.31975683890577505, |
| "grad_norm": 0.9779753088951111, |
| "learning_rate": 0.0005, |
| "loss": 4.710822582244873, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.3203647416413374, |
| "grad_norm": 1.130603313446045, |
| "learning_rate": 0.0005, |
| "loss": 4.8572678565979, |
| "step": 527 |
| }, |
| { |
| "epoch": 0.3209726443768997, |
| "grad_norm": 1.0674115419387817, |
| "learning_rate": 0.0005, |
| "loss": 4.597178936004639, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.321580547112462, |
| "grad_norm": 1.2021600008010864, |
| "learning_rate": 0.0005, |
| "loss": 4.564465045928955, |
| "step": 529 |
| }, |
| { |
| "epoch": 0.3221884498480243, |
| "grad_norm": 1.018747329711914, |
| "learning_rate": 0.0005, |
| "loss": 4.791827201843262, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.32279635258358663, |
| "grad_norm": 0.847745418548584, |
| "learning_rate": 0.0005, |
| "loss": 4.538583278656006, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.32340425531914896, |
| "grad_norm": 1.0722301006317139, |
| "learning_rate": 0.0005, |
| "loss": 4.728479385375977, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.32401215805471123, |
| "grad_norm": 1.0908275842666626, |
| "learning_rate": 0.0005, |
| "loss": 4.7406721115112305, |
| "step": 533 |
| }, |
| { |
| "epoch": 0.32462006079027356, |
| "grad_norm": 1.0944693088531494, |
| "learning_rate": 0.0005, |
| "loss": 4.56569242477417, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.3252279635258359, |
| "grad_norm": 1.2364919185638428, |
| "learning_rate": 0.0005, |
| "loss": 4.977725028991699, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.32583586626139815, |
| "grad_norm": 0.9999113082885742, |
| "learning_rate": 0.0005, |
| "loss": 4.493361473083496, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.3264437689969605, |
| "grad_norm": 1.3366332054138184, |
| "learning_rate": 0.0005, |
| "loss": 4.634256362915039, |
| "step": 537 |
| }, |
| { |
| "epoch": 0.3270516717325228, |
| "grad_norm": 1.1342191696166992, |
| "learning_rate": 0.0005, |
| "loss": 4.737150192260742, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.3276595744680851, |
| "grad_norm": 1.582653284072876, |
| "learning_rate": 0.0005, |
| "loss": 4.870404243469238, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.3282674772036474, |
| "grad_norm": 1.1713464260101318, |
| "learning_rate": 0.0005, |
| "loss": 4.6230669021606445, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.32887537993920973, |
| "grad_norm": 1.4178698062896729, |
| "learning_rate": 0.0005, |
| "loss": 4.764198303222656, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.32948328267477206, |
| "grad_norm": 1.2060075998306274, |
| "learning_rate": 0.0005, |
| "loss": 4.675044059753418, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.33009118541033433, |
| "grad_norm": 1.1698312759399414, |
| "learning_rate": 0.0005, |
| "loss": 4.706038475036621, |
| "step": 543 |
| }, |
| { |
| "epoch": 0.33069908814589666, |
| "grad_norm": 1.23035728931427, |
| "learning_rate": 0.0005, |
| "loss": 4.638150215148926, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.331306990881459, |
| "grad_norm": 1.2109099626541138, |
| "learning_rate": 0.0005, |
| "loss": 4.521143436431885, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.33191489361702126, |
| "grad_norm": 1.0906360149383545, |
| "learning_rate": 0.0005, |
| "loss": 4.71769380569458, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.3325227963525836, |
| "grad_norm": 0.9782645106315613, |
| "learning_rate": 0.0005, |
| "loss": 4.610015869140625, |
| "step": 547 |
| }, |
| { |
| "epoch": 0.3331306990881459, |
| "grad_norm": 0.9349035620689392, |
| "learning_rate": 0.0005, |
| "loss": 4.59166955947876, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.3337386018237082, |
| "grad_norm": 0.987219512462616, |
| "learning_rate": 0.0005, |
| "loss": 4.769125938415527, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.3343465045592705, |
| "grad_norm": 1.1204229593276978, |
| "learning_rate": 0.0005, |
| "loss": 4.561359405517578, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.33495440729483283, |
| "grad_norm": 0.9658718109130859, |
| "learning_rate": 0.0005, |
| "loss": 4.64151668548584, |
| "step": 551 |
| }, |
| { |
| "epoch": 0.33556231003039516, |
| "grad_norm": 0.9612642526626587, |
| "learning_rate": 0.0005, |
| "loss": 4.750694274902344, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.33617021276595743, |
| "grad_norm": 1.215868592262268, |
| "learning_rate": 0.0005, |
| "loss": 4.788500785827637, |
| "step": 553 |
| }, |
| { |
| "epoch": 0.33677811550151976, |
| "grad_norm": 1.1488007307052612, |
| "learning_rate": 0.0005, |
| "loss": 4.708594799041748, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.3373860182370821, |
| "grad_norm": 1.7407371997833252, |
| "learning_rate": 0.0005, |
| "loss": 4.751000881195068, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.33799392097264436, |
| "grad_norm": 1.0364381074905396, |
| "learning_rate": 0.0005, |
| "loss": 4.5454301834106445, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.3386018237082067, |
| "grad_norm": 1.0255850553512573, |
| "learning_rate": 0.0005, |
| "loss": 4.67049503326416, |
| "step": 557 |
| }, |
| { |
| "epoch": 0.339209726443769, |
| "grad_norm": 1.1722489595413208, |
| "learning_rate": 0.0005, |
| "loss": 4.762301445007324, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.3398176291793313, |
| "grad_norm": 0.9487795829772949, |
| "learning_rate": 0.0005, |
| "loss": 4.537074089050293, |
| "step": 559 |
| }, |
| { |
| "epoch": 0.3404255319148936, |
| "grad_norm": 1.0322198867797852, |
| "learning_rate": 0.0005, |
| "loss": 4.325550079345703, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.34103343465045594, |
| "grad_norm": 1.1969901323318481, |
| "learning_rate": 0.0005, |
| "loss": 4.897404670715332, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.34164133738601826, |
| "grad_norm": 0.9366703629493713, |
| "learning_rate": 0.0005, |
| "loss": 4.552170753479004, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.34224924012158053, |
| "grad_norm": 0.9916586875915527, |
| "learning_rate": 0.0005, |
| "loss": 4.596172332763672, |
| "step": 563 |
| }, |
| { |
| "epoch": 0.34285714285714286, |
| "grad_norm": 1.1367878913879395, |
| "learning_rate": 0.0005, |
| "loss": 4.745723724365234, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.3434650455927052, |
| "grad_norm": 1.0490455627441406, |
| "learning_rate": 0.0005, |
| "loss": 4.605084419250488, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.34407294832826746, |
| "grad_norm": 1.2300151586532593, |
| "learning_rate": 0.0005, |
| "loss": 4.680173397064209, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.3446808510638298, |
| "grad_norm": 0.9747954607009888, |
| "learning_rate": 0.0005, |
| "loss": 4.755300521850586, |
| "step": 567 |
| }, |
| { |
| "epoch": 0.3452887537993921, |
| "grad_norm": 1.2195698022842407, |
| "learning_rate": 0.0005, |
| "loss": 4.678683280944824, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.3458966565349544, |
| "grad_norm": 1.1122758388519287, |
| "learning_rate": 0.0005, |
| "loss": 4.55827522277832, |
| "step": 569 |
| }, |
| { |
| "epoch": 0.3465045592705167, |
| "grad_norm": 1.1671665906906128, |
| "learning_rate": 0.0005, |
| "loss": 4.6204071044921875, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.34711246200607904, |
| "grad_norm": 0.912133514881134, |
| "learning_rate": 0.0005, |
| "loss": 4.619932174682617, |
| "step": 571 |
| }, |
| { |
| "epoch": 0.34772036474164136, |
| "grad_norm": 1.0673686265945435, |
| "learning_rate": 0.0005, |
| "loss": 4.7417120933532715, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.34832826747720363, |
| "grad_norm": 1.0796691179275513, |
| "learning_rate": 0.0005, |
| "loss": 4.666133880615234, |
| "step": 573 |
| }, |
| { |
| "epoch": 0.34893617021276596, |
| "grad_norm": 1.177518367767334, |
| "learning_rate": 0.0005, |
| "loss": 4.443113803863525, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.3495440729483283, |
| "grad_norm": 0.9157246351242065, |
| "learning_rate": 0.0005, |
| "loss": 4.578097343444824, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.35015197568389056, |
| "grad_norm": 1.034294843673706, |
| "learning_rate": 0.0005, |
| "loss": 4.393146514892578, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.3507598784194529, |
| "grad_norm": 0.9026995301246643, |
| "learning_rate": 0.0005, |
| "loss": 4.868537425994873, |
| "step": 577 |
| }, |
| { |
| "epoch": 0.3513677811550152, |
| "grad_norm": 1.1576241254806519, |
| "learning_rate": 0.0005, |
| "loss": 4.755158424377441, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.3519756838905775, |
| "grad_norm": 1.061812400817871, |
| "learning_rate": 0.0005, |
| "loss": 4.48585319519043, |
| "step": 579 |
| }, |
| { |
| "epoch": 0.3525835866261398, |
| "grad_norm": 0.9842910170555115, |
| "learning_rate": 0.0005, |
| "loss": 4.865891456604004, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.35319148936170214, |
| "grad_norm": 1.0243335962295532, |
| "learning_rate": 0.0005, |
| "loss": 4.523388862609863, |
| "step": 581 |
| }, |
| { |
| "epoch": 0.35379939209726446, |
| "grad_norm": 1.2581957578659058, |
| "learning_rate": 0.0005, |
| "loss": 4.821706771850586, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.35440729483282674, |
| "grad_norm": 1.1777689456939697, |
| "learning_rate": 0.0005, |
| "loss": 4.600160121917725, |
| "step": 583 |
| }, |
| { |
| "epoch": 0.35501519756838906, |
| "grad_norm": 0.9623486995697021, |
| "learning_rate": 0.0005, |
| "loss": 4.775470733642578, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.3556231003039514, |
| "grad_norm": 1.302804708480835, |
| "learning_rate": 0.0005, |
| "loss": 4.704485893249512, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.35623100303951366, |
| "grad_norm": 1.15083646774292, |
| "learning_rate": 0.0005, |
| "loss": 4.685108184814453, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.356838905775076, |
| "grad_norm": 1.0529240369796753, |
| "learning_rate": 0.0005, |
| "loss": 4.762598991394043, |
| "step": 587 |
| }, |
| { |
| "epoch": 0.3574468085106383, |
| "grad_norm": 1.008600115776062, |
| "learning_rate": 0.0005, |
| "loss": 4.711298942565918, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.3580547112462006, |
| "grad_norm": 1.1591368913650513, |
| "learning_rate": 0.0005, |
| "loss": 4.836706638336182, |
| "step": 589 |
| }, |
| { |
| "epoch": 0.3586626139817629, |
| "grad_norm": 1.0372366905212402, |
| "learning_rate": 0.0005, |
| "loss": 4.753532409667969, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.35927051671732524, |
| "grad_norm": 0.9533773064613342, |
| "learning_rate": 0.0005, |
| "loss": 4.787997245788574, |
| "step": 591 |
| }, |
| { |
| "epoch": 0.35987841945288757, |
| "grad_norm": 1.3395041227340698, |
| "learning_rate": 0.0005, |
| "loss": 4.700077533721924, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.36048632218844984, |
| "grad_norm": 1.0645594596862793, |
| "learning_rate": 0.0005, |
| "loss": 4.607672691345215, |
| "step": 593 |
| }, |
| { |
| "epoch": 0.36109422492401216, |
| "grad_norm": 1.2142505645751953, |
| "learning_rate": 0.0005, |
| "loss": 4.6179375648498535, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.3617021276595745, |
| "grad_norm": 1.2730581760406494, |
| "learning_rate": 0.0005, |
| "loss": 4.555119514465332, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.36231003039513676, |
| "grad_norm": 1.0680732727050781, |
| "learning_rate": 0.0005, |
| "loss": 4.700529098510742, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.3629179331306991, |
| "grad_norm": 1.055757761001587, |
| "learning_rate": 0.0005, |
| "loss": 4.544746398925781, |
| "step": 597 |
| }, |
| { |
| "epoch": 0.3635258358662614, |
| "grad_norm": 1.2012107372283936, |
| "learning_rate": 0.0005, |
| "loss": 4.614580154418945, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.3641337386018237, |
| "grad_norm": 1.0662033557891846, |
| "learning_rate": 0.0005, |
| "loss": 4.880558967590332, |
| "step": 599 |
| }, |
| { |
| "epoch": 0.364741641337386, |
| "grad_norm": 1.0305242538452148, |
| "learning_rate": 0.0005, |
| "loss": 4.462358474731445, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.36534954407294834, |
| "grad_norm": 1.0423706769943237, |
| "learning_rate": 0.0005, |
| "loss": 4.591382026672363, |
| "step": 601 |
| }, |
| { |
| "epoch": 0.3659574468085106, |
| "grad_norm": 1.2076576948165894, |
| "learning_rate": 0.0005, |
| "loss": 4.7383599281311035, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.36656534954407294, |
| "grad_norm": 1.0415648221969604, |
| "learning_rate": 0.0005, |
| "loss": 4.586676597595215, |
| "step": 603 |
| }, |
| { |
| "epoch": 0.36717325227963526, |
| "grad_norm": 0.9548492431640625, |
| "learning_rate": 0.0005, |
| "loss": 4.836339950561523, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.3677811550151976, |
| "grad_norm": 1.1116399765014648, |
| "learning_rate": 0.0005, |
| "loss": 4.634486198425293, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.36838905775075986, |
| "grad_norm": 0.9329056739807129, |
| "learning_rate": 0.0005, |
| "loss": 4.806420803070068, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.3689969604863222, |
| "grad_norm": 1.167823314666748, |
| "learning_rate": 0.0005, |
| "loss": 4.594254493713379, |
| "step": 607 |
| }, |
| { |
| "epoch": 0.3696048632218845, |
| "grad_norm": 1.0034370422363281, |
| "learning_rate": 0.0005, |
| "loss": 4.6151347160339355, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.3702127659574468, |
| "grad_norm": 1.0906440019607544, |
| "learning_rate": 0.0005, |
| "loss": 4.540549278259277, |
| "step": 609 |
| }, |
| { |
| "epoch": 0.3708206686930091, |
| "grad_norm": 1.0491790771484375, |
| "learning_rate": 0.0005, |
| "loss": 4.600298881530762, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.37142857142857144, |
| "grad_norm": 1.2935380935668945, |
| "learning_rate": 0.0005, |
| "loss": 4.646307945251465, |
| "step": 611 |
| }, |
| { |
| "epoch": 0.3720364741641337, |
| "grad_norm": 1.1572242975234985, |
| "learning_rate": 0.0005, |
| "loss": 4.820685863494873, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.37264437689969604, |
| "grad_norm": 1.0526167154312134, |
| "learning_rate": 0.0005, |
| "loss": 4.463221549987793, |
| "step": 613 |
| }, |
| { |
| "epoch": 0.37325227963525837, |
| "grad_norm": 1.0142046213150024, |
| "learning_rate": 0.0005, |
| "loss": 4.979160308837891, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.3738601823708207, |
| "grad_norm": 1.0886595249176025, |
| "learning_rate": 0.0005, |
| "loss": 4.659153461456299, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.37446808510638296, |
| "grad_norm": 1.0294383764266968, |
| "learning_rate": 0.0005, |
| "loss": 4.511576175689697, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.3750759878419453, |
| "grad_norm": 1.220738172531128, |
| "learning_rate": 0.0005, |
| "loss": 4.640242576599121, |
| "step": 617 |
| }, |
| { |
| "epoch": 0.3756838905775076, |
| "grad_norm": 0.976274311542511, |
| "learning_rate": 0.0005, |
| "loss": 4.557078838348389, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.3762917933130699, |
| "grad_norm": 1.1121824979782104, |
| "learning_rate": 0.0005, |
| "loss": 4.412234306335449, |
| "step": 619 |
| }, |
| { |
| "epoch": 0.3768996960486322, |
| "grad_norm": 1.0940440893173218, |
| "learning_rate": 0.0005, |
| "loss": 4.597440242767334, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.37750759878419454, |
| "grad_norm": 1.1758757829666138, |
| "learning_rate": 0.0005, |
| "loss": 4.729987144470215, |
| "step": 621 |
| }, |
| { |
| "epoch": 0.3781155015197568, |
| "grad_norm": 0.979016900062561, |
| "learning_rate": 0.0005, |
| "loss": 4.656641960144043, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.37872340425531914, |
| "grad_norm": 1.1017565727233887, |
| "learning_rate": 0.0005, |
| "loss": 4.587738037109375, |
| "step": 623 |
| }, |
| { |
| "epoch": 0.37933130699088147, |
| "grad_norm": 1.0581464767456055, |
| "learning_rate": 0.0005, |
| "loss": 4.452451705932617, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.3799392097264438, |
| "grad_norm": 1.0750993490219116, |
| "learning_rate": 0.0005, |
| "loss": 4.531889915466309, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.38054711246200607, |
| "grad_norm": 0.9821625351905823, |
| "learning_rate": 0.0005, |
| "loss": 4.488890171051025, |
| "step": 626 |
| }, |
| { |
| "epoch": 0.3811550151975684, |
| "grad_norm": 1.0691367387771606, |
| "learning_rate": 0.0005, |
| "loss": 4.62428617477417, |
| "step": 627 |
| }, |
| { |
| "epoch": 0.3817629179331307, |
| "grad_norm": 1.0314120054244995, |
| "learning_rate": 0.0005, |
| "loss": 4.533023834228516, |
| "step": 628 |
| }, |
| { |
| "epoch": 0.382370820668693, |
| "grad_norm": 0.9268558025360107, |
| "learning_rate": 0.0005, |
| "loss": 4.565212249755859, |
| "step": 629 |
| }, |
| { |
| "epoch": 0.3829787234042553, |
| "grad_norm": 1.0632472038269043, |
| "learning_rate": 0.0005, |
| "loss": 4.5511980056762695, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.38358662613981764, |
| "grad_norm": 0.9516937732696533, |
| "learning_rate": 0.0005, |
| "loss": 4.546860694885254, |
| "step": 631 |
| }, |
| { |
| "epoch": 0.3841945288753799, |
| "grad_norm": 0.8885926008224487, |
| "learning_rate": 0.0005, |
| "loss": 4.540233612060547, |
| "step": 632 |
| }, |
| { |
| "epoch": 0.38480243161094224, |
| "grad_norm": 0.9631567001342773, |
| "learning_rate": 0.0005, |
| "loss": 4.552545070648193, |
| "step": 633 |
| }, |
| { |
| "epoch": 0.38541033434650457, |
| "grad_norm": 1.0189249515533447, |
| "learning_rate": 0.0005, |
| "loss": 4.413745880126953, |
| "step": 634 |
| }, |
| { |
| "epoch": 0.3860182370820669, |
| "grad_norm": 1.0094175338745117, |
| "learning_rate": 0.0005, |
| "loss": 4.266282081604004, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.38662613981762917, |
| "grad_norm": 1.1108192205429077, |
| "learning_rate": 0.0005, |
| "loss": 4.169710159301758, |
| "step": 636 |
| }, |
| { |
| "epoch": 0.3872340425531915, |
| "grad_norm": 1.1999133825302124, |
| "learning_rate": 0.0005, |
| "loss": 4.5471391677856445, |
| "step": 637 |
| }, |
| { |
| "epoch": 0.3878419452887538, |
| "grad_norm": 1.047059178352356, |
| "learning_rate": 0.0005, |
| "loss": 4.793215751647949, |
| "step": 638 |
| }, |
| { |
| "epoch": 0.3884498480243161, |
| "grad_norm": 1.1927613019943237, |
| "learning_rate": 0.0005, |
| "loss": 4.474370002746582, |
| "step": 639 |
| }, |
| { |
| "epoch": 0.3890577507598784, |
| "grad_norm": 1.0722092390060425, |
| "learning_rate": 0.0005, |
| "loss": 4.685356140136719, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.38966565349544074, |
| "grad_norm": 1.0422673225402832, |
| "learning_rate": 0.0005, |
| "loss": 4.5289201736450195, |
| "step": 641 |
| }, |
| { |
| "epoch": 0.390273556231003, |
| "grad_norm": 0.9556507468223572, |
| "learning_rate": 0.0005, |
| "loss": 4.421667098999023, |
| "step": 642 |
| }, |
| { |
| "epoch": 0.39088145896656534, |
| "grad_norm": 1.0354868173599243, |
| "learning_rate": 0.0005, |
| "loss": 4.573639869689941, |
| "step": 643 |
| }, |
| { |
| "epoch": 0.39148936170212767, |
| "grad_norm": 1.0089163780212402, |
| "learning_rate": 0.0005, |
| "loss": 4.505742073059082, |
| "step": 644 |
| }, |
| { |
| "epoch": 0.39209726443769, |
| "grad_norm": 1.098516821861267, |
| "learning_rate": 0.0005, |
| "loss": 4.61726713180542, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.39270516717325227, |
| "grad_norm": 1.0022438764572144, |
| "learning_rate": 0.0005, |
| "loss": 4.8146162033081055, |
| "step": 646 |
| }, |
| { |
| "epoch": 0.3933130699088146, |
| "grad_norm": 1.219514012336731, |
| "learning_rate": 0.0005, |
| "loss": 4.5992279052734375, |
| "step": 647 |
| }, |
| { |
| "epoch": 0.3939209726443769, |
| "grad_norm": 1.0511285066604614, |
| "learning_rate": 0.0005, |
| "loss": 4.65933895111084, |
| "step": 648 |
| }, |
| { |
| "epoch": 0.3945288753799392, |
| "grad_norm": 1.0481231212615967, |
| "learning_rate": 0.0005, |
| "loss": 4.405591011047363, |
| "step": 649 |
| }, |
| { |
| "epoch": 0.3951367781155015, |
| "grad_norm": 1.1169630289077759, |
| "learning_rate": 0.0005, |
| "loss": 4.621652603149414, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.39574468085106385, |
| "grad_norm": 1.031966209411621, |
| "learning_rate": 0.0005, |
| "loss": 4.5710320472717285, |
| "step": 651 |
| }, |
| { |
| "epoch": 0.3963525835866261, |
| "grad_norm": 1.1107763051986694, |
| "learning_rate": 0.0005, |
| "loss": 4.537693023681641, |
| "step": 652 |
| }, |
| { |
| "epoch": 0.39696048632218844, |
| "grad_norm": 0.9889346957206726, |
| "learning_rate": 0.0005, |
| "loss": 4.518610000610352, |
| "step": 653 |
| }, |
| { |
| "epoch": 0.39756838905775077, |
| "grad_norm": 1.1640068292617798, |
| "learning_rate": 0.0005, |
| "loss": 4.595146179199219, |
| "step": 654 |
| }, |
| { |
| "epoch": 0.3981762917933131, |
| "grad_norm": 1.2929025888442993, |
| "learning_rate": 0.0005, |
| "loss": 4.559798240661621, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.39878419452887537, |
| "grad_norm": 1.098781943321228, |
| "learning_rate": 0.0005, |
| "loss": 4.602121353149414, |
| "step": 656 |
| }, |
| { |
| "epoch": 0.3993920972644377, |
| "grad_norm": 1.0199748277664185, |
| "learning_rate": 0.0005, |
| "loss": 4.460375785827637, |
| "step": 657 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 1.4516689777374268, |
| "learning_rate": 0.0005, |
| "loss": 4.583429336547852, |
| "step": 658 |
| }, |
| { |
| "epoch": 0.4006079027355623, |
| "grad_norm": 1.0523816347122192, |
| "learning_rate": 0.0005, |
| "loss": 4.602944374084473, |
| "step": 659 |
| }, |
| { |
| "epoch": 0.4012158054711246, |
| "grad_norm": 1.052711844444275, |
| "learning_rate": 0.0005, |
| "loss": 4.508934020996094, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.40182370820668695, |
| "grad_norm": 1.0846177339553833, |
| "learning_rate": 0.0005, |
| "loss": 4.532805442810059, |
| "step": 661 |
| }, |
| { |
| "epoch": 0.4024316109422492, |
| "grad_norm": 0.9877490401268005, |
| "learning_rate": 0.0005, |
| "loss": 4.644316673278809, |
| "step": 662 |
| }, |
| { |
| "epoch": 0.40303951367781155, |
| "grad_norm": 1.04659104347229, |
| "learning_rate": 0.0005, |
| "loss": 4.376730918884277, |
| "step": 663 |
| }, |
| { |
| "epoch": 0.40364741641337387, |
| "grad_norm": 1.250658392906189, |
| "learning_rate": 0.0005, |
| "loss": 4.553335666656494, |
| "step": 664 |
| }, |
| { |
| "epoch": 0.40425531914893614, |
| "grad_norm": 1.1647439002990723, |
| "learning_rate": 0.0005, |
| "loss": 4.282361030578613, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.40486322188449847, |
| "grad_norm": 1.086575984954834, |
| "learning_rate": 0.0005, |
| "loss": 4.545602798461914, |
| "step": 666 |
| }, |
| { |
| "epoch": 0.4054711246200608, |
| "grad_norm": 1.0094430446624756, |
| "learning_rate": 0.0005, |
| "loss": 4.514423370361328, |
| "step": 667 |
| }, |
| { |
| "epoch": 0.4060790273556231, |
| "grad_norm": 1.1341593265533447, |
| "learning_rate": 0.0005, |
| "loss": 4.359306812286377, |
| "step": 668 |
| }, |
| { |
| "epoch": 0.4066869300911854, |
| "grad_norm": 1.0556292533874512, |
| "learning_rate": 0.0005, |
| "loss": 4.663166046142578, |
| "step": 669 |
| }, |
| { |
| "epoch": 0.4072948328267477, |
| "grad_norm": 0.9918414950370789, |
| "learning_rate": 0.0005, |
| "loss": 4.348359107971191, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.40790273556231005, |
| "grad_norm": 1.2771086692810059, |
| "learning_rate": 0.0005, |
| "loss": 4.380928993225098, |
| "step": 671 |
| }, |
| { |
| "epoch": 0.4085106382978723, |
| "grad_norm": 1.2792952060699463, |
| "learning_rate": 0.0005, |
| "loss": 4.493129253387451, |
| "step": 672 |
| }, |
| { |
| "epoch": 0.40911854103343465, |
| "grad_norm": 1.115451693534851, |
| "learning_rate": 0.0005, |
| "loss": 4.5493903160095215, |
| "step": 673 |
| }, |
| { |
| "epoch": 0.409726443768997, |
| "grad_norm": 1.02188241481781, |
| "learning_rate": 0.0005, |
| "loss": 4.540634632110596, |
| "step": 674 |
| }, |
| { |
| "epoch": 0.41033434650455924, |
| "grad_norm": 1.1881492137908936, |
| "learning_rate": 0.0005, |
| "loss": 4.6216325759887695, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.41094224924012157, |
| "grad_norm": 1.1510716676712036, |
| "learning_rate": 0.0005, |
| "loss": 4.753006935119629, |
| "step": 676 |
| }, |
| { |
| "epoch": 0.4115501519756839, |
| "grad_norm": 0.9409204125404358, |
| "learning_rate": 0.0005, |
| "loss": 4.558671951293945, |
| "step": 677 |
| }, |
| { |
| "epoch": 0.4121580547112462, |
| "grad_norm": 0.9652894735336304, |
| "learning_rate": 0.0005, |
| "loss": 4.586430549621582, |
| "step": 678 |
| }, |
| { |
| "epoch": 0.4127659574468085, |
| "grad_norm": 1.0625907182693481, |
| "learning_rate": 0.0005, |
| "loss": 4.467252254486084, |
| "step": 679 |
| }, |
| { |
| "epoch": 0.4133738601823708, |
| "grad_norm": 1.078682780265808, |
| "learning_rate": 0.0005, |
| "loss": 4.66164493560791, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.41398176291793315, |
| "grad_norm": 1.0304362773895264, |
| "learning_rate": 0.0005, |
| "loss": 4.765620231628418, |
| "step": 681 |
| }, |
| { |
| "epoch": 0.4145896656534954, |
| "grad_norm": 0.9225407242774963, |
| "learning_rate": 0.0005, |
| "loss": 4.550148010253906, |
| "step": 682 |
| }, |
| { |
| "epoch": 0.41519756838905775, |
| "grad_norm": 1.0196508169174194, |
| "learning_rate": 0.0005, |
| "loss": 4.9098100662231445, |
| "step": 683 |
| }, |
| { |
| "epoch": 0.4158054711246201, |
| "grad_norm": 0.9961191415786743, |
| "learning_rate": 0.0005, |
| "loss": 4.4087114334106445, |
| "step": 684 |
| }, |
| { |
| "epoch": 0.41641337386018235, |
| "grad_norm": 1.0987764596939087, |
| "learning_rate": 0.0005, |
| "loss": 4.60486364364624, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.41702127659574467, |
| "grad_norm": 1.3485429286956787, |
| "learning_rate": 0.0005, |
| "loss": 4.509698390960693, |
| "step": 686 |
| }, |
| { |
| "epoch": 0.417629179331307, |
| "grad_norm": 1.0834795236587524, |
| "learning_rate": 0.0005, |
| "loss": 4.131223678588867, |
| "step": 687 |
| }, |
| { |
| "epoch": 0.4182370820668693, |
| "grad_norm": 1.2778581380844116, |
| "learning_rate": 0.0005, |
| "loss": 4.530914306640625, |
| "step": 688 |
| }, |
| { |
| "epoch": 0.4188449848024316, |
| "grad_norm": 0.9555144309997559, |
| "learning_rate": 0.0005, |
| "loss": 4.773101806640625, |
| "step": 689 |
| }, |
| { |
| "epoch": 0.4194528875379939, |
| "grad_norm": 1.0608127117156982, |
| "learning_rate": 0.0005, |
| "loss": 4.457843780517578, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.42006079027355625, |
| "grad_norm": 1.2380342483520508, |
| "learning_rate": 0.0005, |
| "loss": 4.438450813293457, |
| "step": 691 |
| }, |
| { |
| "epoch": 0.4206686930091185, |
| "grad_norm": 1.0234472751617432, |
| "learning_rate": 0.0005, |
| "loss": 4.412363052368164, |
| "step": 692 |
| }, |
| { |
| "epoch": 0.42127659574468085, |
| "grad_norm": 1.0774229764938354, |
| "learning_rate": 0.0005, |
| "loss": 4.687466144561768, |
| "step": 693 |
| }, |
| { |
| "epoch": 0.4218844984802432, |
| "grad_norm": 0.9822944402694702, |
| "learning_rate": 0.0005, |
| "loss": 4.798013687133789, |
| "step": 694 |
| }, |
| { |
| "epoch": 0.42249240121580545, |
| "grad_norm": 1.1232951879501343, |
| "learning_rate": 0.0005, |
| "loss": 4.548072814941406, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.4231003039513678, |
| "grad_norm": 1.5027856826782227, |
| "learning_rate": 0.0005, |
| "loss": 4.7048797607421875, |
| "step": 696 |
| }, |
| { |
| "epoch": 0.4237082066869301, |
| "grad_norm": 1.036541223526001, |
| "learning_rate": 0.0005, |
| "loss": 4.6969709396362305, |
| "step": 697 |
| }, |
| { |
| "epoch": 0.4243161094224924, |
| "grad_norm": 1.1823787689208984, |
| "learning_rate": 0.0005, |
| "loss": 4.457941055297852, |
| "step": 698 |
| }, |
| { |
| "epoch": 0.4249240121580547, |
| "grad_norm": 0.9230678081512451, |
| "learning_rate": 0.0005, |
| "loss": 4.421998500823975, |
| "step": 699 |
| }, |
| { |
| "epoch": 0.425531914893617, |
| "grad_norm": 1.7750741243362427, |
| "learning_rate": 0.0005, |
| "loss": 4.76076602935791, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.42613981762917935, |
| "grad_norm": 1.0719808340072632, |
| "learning_rate": 0.0005, |
| "loss": 4.580799102783203, |
| "step": 701 |
| }, |
| { |
| "epoch": 0.4267477203647416, |
| "grad_norm": 1.0799646377563477, |
| "learning_rate": 0.0005, |
| "loss": 4.311610221862793, |
| "step": 702 |
| }, |
| { |
| "epoch": 0.42735562310030395, |
| "grad_norm": 0.8947767019271851, |
| "learning_rate": 0.0005, |
| "loss": 4.4494123458862305, |
| "step": 703 |
| }, |
| { |
| "epoch": 0.4279635258358663, |
| "grad_norm": 1.0298351049423218, |
| "learning_rate": 0.0005, |
| "loss": 4.393129348754883, |
| "step": 704 |
| }, |
| { |
| "epoch": 0.42857142857142855, |
| "grad_norm": 1.098189115524292, |
| "learning_rate": 0.0005, |
| "loss": 4.199446678161621, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.4291793313069909, |
| "grad_norm": 1.112589955329895, |
| "learning_rate": 0.0005, |
| "loss": 4.471273422241211, |
| "step": 706 |
| }, |
| { |
| "epoch": 0.4297872340425532, |
| "grad_norm": 1.2152529954910278, |
| "learning_rate": 0.0005, |
| "loss": 4.727916240692139, |
| "step": 707 |
| }, |
| { |
| "epoch": 0.43039513677811553, |
| "grad_norm": 1.1162065267562866, |
| "learning_rate": 0.0005, |
| "loss": 4.282822132110596, |
| "step": 708 |
| }, |
| { |
| "epoch": 0.4310030395136778, |
| "grad_norm": 1.2259479761123657, |
| "learning_rate": 0.0005, |
| "loss": 4.1524977684021, |
| "step": 709 |
| }, |
| { |
| "epoch": 0.4316109422492401, |
| "grad_norm": 1.0089929103851318, |
| "learning_rate": 0.0005, |
| "loss": 4.150537490844727, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.43221884498480245, |
| "grad_norm": 0.9101129770278931, |
| "learning_rate": 0.0005, |
| "loss": 4.379437446594238, |
| "step": 711 |
| }, |
| { |
| "epoch": 0.4328267477203647, |
| "grad_norm": 0.9849691390991211, |
| "learning_rate": 0.0005, |
| "loss": 4.299429893493652, |
| "step": 712 |
| }, |
| { |
| "epoch": 0.43343465045592705, |
| "grad_norm": 0.9956537485122681, |
| "learning_rate": 0.0005, |
| "loss": 4.439446926116943, |
| "step": 713 |
| }, |
| { |
| "epoch": 0.4340425531914894, |
| "grad_norm": 1.0646576881408691, |
| "learning_rate": 0.0005, |
| "loss": 4.680734634399414, |
| "step": 714 |
| }, |
| { |
| "epoch": 0.43465045592705165, |
| "grad_norm": 1.1268900632858276, |
| "learning_rate": 0.0005, |
| "loss": 4.390021324157715, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.435258358662614, |
| "grad_norm": 1.1238709688186646, |
| "learning_rate": 0.0005, |
| "loss": 4.414492607116699, |
| "step": 716 |
| }, |
| { |
| "epoch": 0.4358662613981763, |
| "grad_norm": 1.0272475481033325, |
| "learning_rate": 0.0005, |
| "loss": 4.48759651184082, |
| "step": 717 |
| }, |
| { |
| "epoch": 0.43647416413373863, |
| "grad_norm": 0.9443128108978271, |
| "learning_rate": 0.0005, |
| "loss": 4.241964340209961, |
| "step": 718 |
| }, |
| { |
| "epoch": 0.4370820668693009, |
| "grad_norm": 0.8795979022979736, |
| "learning_rate": 0.0005, |
| "loss": 4.438322067260742, |
| "step": 719 |
| }, |
| { |
| "epoch": 0.4376899696048632, |
| "grad_norm": 1.0388433933258057, |
| "learning_rate": 0.0005, |
| "loss": 4.499500274658203, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.43829787234042555, |
| "grad_norm": 1.0285965204238892, |
| "learning_rate": 0.0005, |
| "loss": 4.458085060119629, |
| "step": 721 |
| }, |
| { |
| "epoch": 0.4389057750759878, |
| "grad_norm": 1.0486245155334473, |
| "learning_rate": 0.0005, |
| "loss": 4.3121843338012695, |
| "step": 722 |
| }, |
| { |
| "epoch": 0.43951367781155015, |
| "grad_norm": 0.974229633808136, |
| "learning_rate": 0.0005, |
| "loss": 4.484938621520996, |
| "step": 723 |
| }, |
| { |
| "epoch": 0.4401215805471125, |
| "grad_norm": 1.028061032295227, |
| "learning_rate": 0.0005, |
| "loss": 4.343748092651367, |
| "step": 724 |
| }, |
| { |
| "epoch": 0.44072948328267475, |
| "grad_norm": 1.247310757637024, |
| "learning_rate": 0.0005, |
| "loss": 4.43183708190918, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.4413373860182371, |
| "grad_norm": 1.07508385181427, |
| "learning_rate": 0.0005, |
| "loss": 4.473773956298828, |
| "step": 726 |
| }, |
| { |
| "epoch": 0.4419452887537994, |
| "grad_norm": 1.0861989259719849, |
| "learning_rate": 0.0005, |
| "loss": 4.50743293762207, |
| "step": 727 |
| }, |
| { |
| "epoch": 0.4425531914893617, |
| "grad_norm": 1.043446660041809, |
| "learning_rate": 0.0005, |
| "loss": 4.65224027633667, |
| "step": 728 |
| }, |
| { |
| "epoch": 0.443161094224924, |
| "grad_norm": 1.1153486967086792, |
| "learning_rate": 0.0005, |
| "loss": 4.275899887084961, |
| "step": 729 |
| }, |
| { |
| "epoch": 0.44376899696048633, |
| "grad_norm": 1.0387423038482666, |
| "learning_rate": 0.0005, |
| "loss": 4.571664333343506, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.44437689969604866, |
| "grad_norm": 1.1121833324432373, |
| "learning_rate": 0.0005, |
| "loss": 4.472873687744141, |
| "step": 731 |
| }, |
| { |
| "epoch": 0.4449848024316109, |
| "grad_norm": 1.110357642173767, |
| "learning_rate": 0.0005, |
| "loss": 4.507586479187012, |
| "step": 732 |
| }, |
| { |
| "epoch": 0.44559270516717325, |
| "grad_norm": 1.0192921161651611, |
| "learning_rate": 0.0005, |
| "loss": 4.614180564880371, |
| "step": 733 |
| }, |
| { |
| "epoch": 0.4462006079027356, |
| "grad_norm": 1.2011562585830688, |
| "learning_rate": 0.0005, |
| "loss": 4.410806655883789, |
| "step": 734 |
| }, |
| { |
| "epoch": 0.44680851063829785, |
| "grad_norm": 1.045922040939331, |
| "learning_rate": 0.0005, |
| "loss": 4.522254943847656, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.4474164133738602, |
| "grad_norm": 1.1084001064300537, |
| "learning_rate": 0.0005, |
| "loss": 4.473600387573242, |
| "step": 736 |
| }, |
| { |
| "epoch": 0.4480243161094225, |
| "grad_norm": 1.0580531358718872, |
| "learning_rate": 0.0005, |
| "loss": 4.495148658752441, |
| "step": 737 |
| }, |
| { |
| "epoch": 0.4486322188449848, |
| "grad_norm": 1.0791500806808472, |
| "learning_rate": 0.0005, |
| "loss": 4.559470176696777, |
| "step": 738 |
| }, |
| { |
| "epoch": 0.4492401215805471, |
| "grad_norm": 0.9919356107711792, |
| "learning_rate": 0.0005, |
| "loss": 4.445730209350586, |
| "step": 739 |
| }, |
| { |
| "epoch": 0.44984802431610943, |
| "grad_norm": 0.9215476512908936, |
| "learning_rate": 0.0005, |
| "loss": 4.360682487487793, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.45045592705167176, |
| "grad_norm": 1.1767232418060303, |
| "learning_rate": 0.0005, |
| "loss": 4.51902437210083, |
| "step": 741 |
| }, |
| { |
| "epoch": 0.451063829787234, |
| "grad_norm": 1.1746350526809692, |
| "learning_rate": 0.0005, |
| "loss": 4.362285614013672, |
| "step": 742 |
| }, |
| { |
| "epoch": 0.45167173252279635, |
| "grad_norm": 1.0243946313858032, |
| "learning_rate": 0.0005, |
| "loss": 4.443662166595459, |
| "step": 743 |
| }, |
| { |
| "epoch": 0.4522796352583587, |
| "grad_norm": 1.034515619277954, |
| "learning_rate": 0.0005, |
| "loss": 4.329188346862793, |
| "step": 744 |
| }, |
| { |
| "epoch": 0.45288753799392095, |
| "grad_norm": 1.1209111213684082, |
| "learning_rate": 0.0005, |
| "loss": 4.6534223556518555, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.4534954407294833, |
| "grad_norm": 1.0455032587051392, |
| "learning_rate": 0.0005, |
| "loss": 4.511608600616455, |
| "step": 746 |
| }, |
| { |
| "epoch": 0.4541033434650456, |
| "grad_norm": 1.002439022064209, |
| "learning_rate": 0.0005, |
| "loss": 4.4008378982543945, |
| "step": 747 |
| }, |
| { |
| "epoch": 0.4547112462006079, |
| "grad_norm": 0.9780976176261902, |
| "learning_rate": 0.0005, |
| "loss": 4.478031158447266, |
| "step": 748 |
| }, |
| { |
| "epoch": 0.4553191489361702, |
| "grad_norm": 1.0394052267074585, |
| "learning_rate": 0.0005, |
| "loss": 4.431166648864746, |
| "step": 749 |
| }, |
| { |
| "epoch": 0.45592705167173253, |
| "grad_norm": 1.0838037729263306, |
| "learning_rate": 0.0005, |
| "loss": 4.38276481628418, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.45653495440729486, |
| "grad_norm": 1.2306514978408813, |
| "learning_rate": 0.0005, |
| "loss": 4.427013874053955, |
| "step": 751 |
| }, |
| { |
| "epoch": 0.45714285714285713, |
| "grad_norm": 0.8942012190818787, |
| "learning_rate": 0.0005, |
| "loss": 4.463613986968994, |
| "step": 752 |
| }, |
| { |
| "epoch": 0.45775075987841946, |
| "grad_norm": 1.0273581743240356, |
| "learning_rate": 0.0005, |
| "loss": 4.331677436828613, |
| "step": 753 |
| }, |
| { |
| "epoch": 0.4583586626139818, |
| "grad_norm": 1.061225414276123, |
| "learning_rate": 0.0005, |
| "loss": 4.360611438751221, |
| "step": 754 |
| }, |
| { |
| "epoch": 0.45896656534954405, |
| "grad_norm": 0.9954508543014526, |
| "learning_rate": 0.0005, |
| "loss": 4.37364387512207, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.4595744680851064, |
| "grad_norm": 0.9806733131408691, |
| "learning_rate": 0.0005, |
| "loss": 4.469931602478027, |
| "step": 756 |
| }, |
| { |
| "epoch": 0.4601823708206687, |
| "grad_norm": 1.131806492805481, |
| "learning_rate": 0.0005, |
| "loss": 4.487429618835449, |
| "step": 757 |
| }, |
| { |
| "epoch": 0.460790273556231, |
| "grad_norm": 0.9451801776885986, |
| "learning_rate": 0.0005, |
| "loss": 4.476114749908447, |
| "step": 758 |
| }, |
| { |
| "epoch": 0.4613981762917933, |
| "grad_norm": 1.064634084701538, |
| "learning_rate": 0.0005, |
| "loss": 4.607744216918945, |
| "step": 759 |
| }, |
| { |
| "epoch": 0.46200607902735563, |
| "grad_norm": 1.0846835374832153, |
| "learning_rate": 0.0005, |
| "loss": 4.312438011169434, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.46261398176291796, |
| "grad_norm": 0.9688083529472351, |
| "learning_rate": 0.0005, |
| "loss": 4.376931667327881, |
| "step": 761 |
| }, |
| { |
| "epoch": 0.46322188449848023, |
| "grad_norm": 1.1652911901474, |
| "learning_rate": 0.0005, |
| "loss": 4.416962623596191, |
| "step": 762 |
| }, |
| { |
| "epoch": 0.46382978723404256, |
| "grad_norm": 1.147851586341858, |
| "learning_rate": 0.0005, |
| "loss": 4.349986553192139, |
| "step": 763 |
| }, |
| { |
| "epoch": 0.4644376899696049, |
| "grad_norm": 0.9702697992324829, |
| "learning_rate": 0.0005, |
| "loss": 4.497674942016602, |
| "step": 764 |
| }, |
| { |
| "epoch": 0.46504559270516715, |
| "grad_norm": 1.0843515396118164, |
| "learning_rate": 0.0005, |
| "loss": 4.49877405166626, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.4656534954407295, |
| "grad_norm": 0.9171056747436523, |
| "learning_rate": 0.0005, |
| "loss": 4.1539201736450195, |
| "step": 766 |
| }, |
| { |
| "epoch": 0.4662613981762918, |
| "grad_norm": 1.164944052696228, |
| "learning_rate": 0.0005, |
| "loss": 4.509303092956543, |
| "step": 767 |
| }, |
| { |
| "epoch": 0.4668693009118541, |
| "grad_norm": 1.0968433618545532, |
| "learning_rate": 0.0005, |
| "loss": 4.4588727951049805, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.4674772036474164, |
| "grad_norm": 1.0154880285263062, |
| "learning_rate": 0.0005, |
| "loss": 4.611554145812988, |
| "step": 769 |
| }, |
| { |
| "epoch": 0.46808510638297873, |
| "grad_norm": 0.9653189778327942, |
| "learning_rate": 0.0005, |
| "loss": 4.324926376342773, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.46869300911854106, |
| "grad_norm": 1.1051913499832153, |
| "learning_rate": 0.0005, |
| "loss": 4.4647111892700195, |
| "step": 771 |
| }, |
| { |
| "epoch": 0.46930091185410333, |
| "grad_norm": 0.9223854541778564, |
| "learning_rate": 0.0005, |
| "loss": 4.7103400230407715, |
| "step": 772 |
| }, |
| { |
| "epoch": 0.46990881458966566, |
| "grad_norm": 1.124935507774353, |
| "learning_rate": 0.0005, |
| "loss": 4.453402519226074, |
| "step": 773 |
| }, |
| { |
| "epoch": 0.470516717325228, |
| "grad_norm": 1.3314533233642578, |
| "learning_rate": 0.0005, |
| "loss": 4.297192573547363, |
| "step": 774 |
| }, |
| { |
| "epoch": 0.47112462006079026, |
| "grad_norm": 1.0218007564544678, |
| "learning_rate": 0.0005, |
| "loss": 4.496466159820557, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.4717325227963526, |
| "grad_norm": 1.0308325290679932, |
| "learning_rate": 0.0005, |
| "loss": 4.3223772048950195, |
| "step": 776 |
| }, |
| { |
| "epoch": 0.4723404255319149, |
| "grad_norm": 1.1283831596374512, |
| "learning_rate": 0.0005, |
| "loss": 4.398843288421631, |
| "step": 777 |
| }, |
| { |
| "epoch": 0.4729483282674772, |
| "grad_norm": 1.1089282035827637, |
| "learning_rate": 0.0005, |
| "loss": 4.226986408233643, |
| "step": 778 |
| }, |
| { |
| "epoch": 0.4735562310030395, |
| "grad_norm": 0.9950074553489685, |
| "learning_rate": 0.0005, |
| "loss": 4.401683807373047, |
| "step": 779 |
| }, |
| { |
| "epoch": 0.47416413373860183, |
| "grad_norm": 1.1220934391021729, |
| "learning_rate": 0.0005, |
| "loss": 4.23845100402832, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.47477203647416416, |
| "grad_norm": 1.1314822435379028, |
| "learning_rate": 0.0005, |
| "loss": 4.648829936981201, |
| "step": 781 |
| }, |
| { |
| "epoch": 0.47537993920972643, |
| "grad_norm": 1.0067565441131592, |
| "learning_rate": 0.0005, |
| "loss": 4.342182159423828, |
| "step": 782 |
| }, |
| { |
| "epoch": 0.47598784194528876, |
| "grad_norm": 1.4291990995407104, |
| "learning_rate": 0.0005, |
| "loss": 4.222455978393555, |
| "step": 783 |
| }, |
| { |
| "epoch": 0.4765957446808511, |
| "grad_norm": 1.0664339065551758, |
| "learning_rate": 0.0005, |
| "loss": 4.533761978149414, |
| "step": 784 |
| }, |
| { |
| "epoch": 0.47720364741641336, |
| "grad_norm": 0.837992787361145, |
| "learning_rate": 0.0005, |
| "loss": 4.583135604858398, |
| "step": 785 |
| }, |
| { |
| "epoch": 0.4778115501519757, |
| "grad_norm": 1.0775222778320312, |
| "learning_rate": 0.0005, |
| "loss": 4.407233238220215, |
| "step": 786 |
| }, |
| { |
| "epoch": 0.478419452887538, |
| "grad_norm": 1.1260716915130615, |
| "learning_rate": 0.0005, |
| "loss": 4.408687114715576, |
| "step": 787 |
| }, |
| { |
| "epoch": 0.4790273556231003, |
| "grad_norm": 1.1476800441741943, |
| "learning_rate": 0.0005, |
| "loss": 4.5264692306518555, |
| "step": 788 |
| }, |
| { |
| "epoch": 0.4796352583586626, |
| "grad_norm": 1.0624704360961914, |
| "learning_rate": 0.0005, |
| "loss": 4.47670316696167, |
| "step": 789 |
| }, |
| { |
| "epoch": 0.48024316109422494, |
| "grad_norm": 1.4008615016937256, |
| "learning_rate": 0.0005, |
| "loss": 4.542054653167725, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.4808510638297872, |
| "grad_norm": 1.6348981857299805, |
| "learning_rate": 0.0005, |
| "loss": 4.272322654724121, |
| "step": 791 |
| }, |
| { |
| "epoch": 0.48145896656534953, |
| "grad_norm": 1.110823154449463, |
| "learning_rate": 0.0005, |
| "loss": 4.32360315322876, |
| "step": 792 |
| }, |
| { |
| "epoch": 0.48206686930091186, |
| "grad_norm": 0.9771617650985718, |
| "learning_rate": 0.0005, |
| "loss": 4.4510321617126465, |
| "step": 793 |
| }, |
| { |
| "epoch": 0.4826747720364742, |
| "grad_norm": 1.0948632955551147, |
| "learning_rate": 0.0005, |
| "loss": 4.335118293762207, |
| "step": 794 |
| }, |
| { |
| "epoch": 0.48328267477203646, |
| "grad_norm": 1.2692338228225708, |
| "learning_rate": 0.0005, |
| "loss": 4.3776655197143555, |
| "step": 795 |
| }, |
| { |
| "epoch": 0.4838905775075988, |
| "grad_norm": 0.8474439978599548, |
| "learning_rate": 0.0005, |
| "loss": 4.24397087097168, |
| "step": 796 |
| }, |
| { |
| "epoch": 0.4844984802431611, |
| "grad_norm": 0.9258842468261719, |
| "learning_rate": 0.0005, |
| "loss": 4.602321624755859, |
| "step": 797 |
| }, |
| { |
| "epoch": 0.4851063829787234, |
| "grad_norm": 1.1678420305252075, |
| "learning_rate": 0.0005, |
| "loss": 4.5578203201293945, |
| "step": 798 |
| }, |
| { |
| "epoch": 0.4857142857142857, |
| "grad_norm": 1.0839719772338867, |
| "learning_rate": 0.0005, |
| "loss": 4.4719109535217285, |
| "step": 799 |
| }, |
| { |
| "epoch": 0.48632218844984804, |
| "grad_norm": 1.0721313953399658, |
| "learning_rate": 0.0005, |
| "loss": 4.1971516609191895, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.4869300911854103, |
| "grad_norm": 1.077587366104126, |
| "learning_rate": 0.0005, |
| "loss": 4.452859401702881, |
| "step": 801 |
| }, |
| { |
| "epoch": 0.48753799392097263, |
| "grad_norm": 0.9456436038017273, |
| "learning_rate": 0.0005, |
| "loss": 4.417455673217773, |
| "step": 802 |
| }, |
| { |
| "epoch": 0.48814589665653496, |
| "grad_norm": 0.9326696991920471, |
| "learning_rate": 0.0005, |
| "loss": 4.389290809631348, |
| "step": 803 |
| }, |
| { |
| "epoch": 0.4887537993920973, |
| "grad_norm": 1.0423635244369507, |
| "learning_rate": 0.0005, |
| "loss": 4.4252448081970215, |
| "step": 804 |
| }, |
| { |
| "epoch": 0.48936170212765956, |
| "grad_norm": 1.0106087923049927, |
| "learning_rate": 0.0005, |
| "loss": 4.29632043838501, |
| "step": 805 |
| }, |
| { |
| "epoch": 0.4899696048632219, |
| "grad_norm": 0.8635157942771912, |
| "learning_rate": 0.0005, |
| "loss": 4.45654296875, |
| "step": 806 |
| }, |
| { |
| "epoch": 0.4905775075987842, |
| "grad_norm": 0.9637815952301025, |
| "learning_rate": 0.0005, |
| "loss": 4.305363655090332, |
| "step": 807 |
| }, |
| { |
| "epoch": 0.4911854103343465, |
| "grad_norm": 0.9523938298225403, |
| "learning_rate": 0.0005, |
| "loss": 4.561666965484619, |
| "step": 808 |
| }, |
| { |
| "epoch": 0.4917933130699088, |
| "grad_norm": 1.1045883893966675, |
| "learning_rate": 0.0005, |
| "loss": 4.385721206665039, |
| "step": 809 |
| }, |
| { |
| "epoch": 0.49240121580547114, |
| "grad_norm": 0.951117992401123, |
| "learning_rate": 0.0005, |
| "loss": 4.302276611328125, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.4930091185410334, |
| "grad_norm": 1.091933250427246, |
| "learning_rate": 0.0005, |
| "loss": 4.64669132232666, |
| "step": 811 |
| }, |
| { |
| "epoch": 0.49361702127659574, |
| "grad_norm": 1.0813966989517212, |
| "learning_rate": 0.0005, |
| "loss": 4.266849517822266, |
| "step": 812 |
| }, |
| { |
| "epoch": 0.49422492401215806, |
| "grad_norm": 0.9683962464332581, |
| "learning_rate": 0.0005, |
| "loss": 4.304372787475586, |
| "step": 813 |
| }, |
| { |
| "epoch": 0.4948328267477204, |
| "grad_norm": 0.960382342338562, |
| "learning_rate": 0.0005, |
| "loss": 4.221304416656494, |
| "step": 814 |
| }, |
| { |
| "epoch": 0.49544072948328266, |
| "grad_norm": 0.9746182560920715, |
| "learning_rate": 0.0005, |
| "loss": 4.392333030700684, |
| "step": 815 |
| }, |
| { |
| "epoch": 0.496048632218845, |
| "grad_norm": 0.9449917078018188, |
| "learning_rate": 0.0005, |
| "loss": 4.274685859680176, |
| "step": 816 |
| }, |
| { |
| "epoch": 0.4966565349544073, |
| "grad_norm": 0.8899694085121155, |
| "learning_rate": 0.0005, |
| "loss": 4.206332206726074, |
| "step": 817 |
| }, |
| { |
| "epoch": 0.4972644376899696, |
| "grad_norm": 0.9504559636116028, |
| "learning_rate": 0.0005, |
| "loss": 4.2690534591674805, |
| "step": 818 |
| }, |
| { |
| "epoch": 0.4978723404255319, |
| "grad_norm": 0.9823598265647888, |
| "learning_rate": 0.0005, |
| "loss": 4.379746437072754, |
| "step": 819 |
| }, |
| { |
| "epoch": 0.49848024316109424, |
| "grad_norm": 1.0227431058883667, |
| "learning_rate": 0.0005, |
| "loss": 4.233619213104248, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.4990881458966565, |
| "grad_norm": 0.9714612364768982, |
| "learning_rate": 0.0005, |
| "loss": 4.607011795043945, |
| "step": 821 |
| }, |
| { |
| "epoch": 0.49969604863221884, |
| "grad_norm": 0.9920446276664734, |
| "learning_rate": 0.0005, |
| "loss": 4.5199127197265625, |
| "step": 822 |
| }, |
| { |
| "epoch": 0.5003039513677812, |
| "grad_norm": 1.0052610635757446, |
| "learning_rate": 0.0005, |
| "loss": 4.538883209228516, |
| "step": 823 |
| }, |
| { |
| "epoch": 0.5009118541033435, |
| "grad_norm": 0.961460292339325, |
| "learning_rate": 0.0005, |
| "loss": 4.37430477142334, |
| "step": 824 |
| }, |
| { |
| "epoch": 0.5015197568389058, |
| "grad_norm": 0.9705450534820557, |
| "learning_rate": 0.0005, |
| "loss": 4.36405611038208, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.502127659574468, |
| "grad_norm": 1.0589666366577148, |
| "learning_rate": 0.0005, |
| "loss": 4.532018661499023, |
| "step": 826 |
| }, |
| { |
| "epoch": 0.5027355623100304, |
| "grad_norm": 1.0190895795822144, |
| "learning_rate": 0.0005, |
| "loss": 4.366916656494141, |
| "step": 827 |
| }, |
| { |
| "epoch": 0.5033434650455927, |
| "grad_norm": 1.2047783136367798, |
| "learning_rate": 0.0005, |
| "loss": 4.332704544067383, |
| "step": 828 |
| }, |
| { |
| "epoch": 0.503951367781155, |
| "grad_norm": 0.9100733995437622, |
| "learning_rate": 0.0005, |
| "loss": 4.1280975341796875, |
| "step": 829 |
| }, |
| { |
| "epoch": 0.5045592705167173, |
| "grad_norm": 1.0953924655914307, |
| "learning_rate": 0.0005, |
| "loss": 4.338841438293457, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.5051671732522797, |
| "grad_norm": 1.2325948476791382, |
| "learning_rate": 0.0005, |
| "loss": 4.425684928894043, |
| "step": 831 |
| }, |
| { |
| "epoch": 0.505775075987842, |
| "grad_norm": 1.0776824951171875, |
| "learning_rate": 0.0005, |
| "loss": 4.238302230834961, |
| "step": 832 |
| }, |
| { |
| "epoch": 0.5063829787234042, |
| "grad_norm": 1.002465009689331, |
| "learning_rate": 0.0005, |
| "loss": 4.1673173904418945, |
| "step": 833 |
| }, |
| { |
| "epoch": 0.5069908814589665, |
| "grad_norm": 1.0070068836212158, |
| "learning_rate": 0.0005, |
| "loss": 4.502063751220703, |
| "step": 834 |
| }, |
| { |
| "epoch": 0.5075987841945289, |
| "grad_norm": 0.9460301995277405, |
| "learning_rate": 0.0005, |
| "loss": 4.266294479370117, |
| "step": 835 |
| }, |
| { |
| "epoch": 0.5082066869300912, |
| "grad_norm": 0.9609605669975281, |
| "learning_rate": 0.0005, |
| "loss": 4.49836540222168, |
| "step": 836 |
| }, |
| { |
| "epoch": 0.5088145896656535, |
| "grad_norm": 1.0298100709915161, |
| "learning_rate": 0.0005, |
| "loss": 4.342093467712402, |
| "step": 837 |
| }, |
| { |
| "epoch": 0.5094224924012158, |
| "grad_norm": 1.102327585220337, |
| "learning_rate": 0.0005, |
| "loss": 4.25087833404541, |
| "step": 838 |
| }, |
| { |
| "epoch": 0.5100303951367782, |
| "grad_norm": 1.2569550275802612, |
| "learning_rate": 0.0005, |
| "loss": 4.285090446472168, |
| "step": 839 |
| }, |
| { |
| "epoch": 0.5106382978723404, |
| "grad_norm": 1.0138150453567505, |
| "learning_rate": 0.0005, |
| "loss": 4.334506034851074, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.5112462006079027, |
| "grad_norm": 1.0152983665466309, |
| "learning_rate": 0.0005, |
| "loss": 4.283235549926758, |
| "step": 841 |
| }, |
| { |
| "epoch": 0.511854103343465, |
| "grad_norm": 1.1372138261795044, |
| "learning_rate": 0.0005, |
| "loss": 4.07025146484375, |
| "step": 842 |
| }, |
| { |
| "epoch": 0.5124620060790274, |
| "grad_norm": 1.1843246221542358, |
| "learning_rate": 0.0005, |
| "loss": 4.353334426879883, |
| "step": 843 |
| }, |
| { |
| "epoch": 0.5130699088145897, |
| "grad_norm": 1.1458396911621094, |
| "learning_rate": 0.0005, |
| "loss": 4.34335994720459, |
| "step": 844 |
| }, |
| { |
| "epoch": 0.513677811550152, |
| "grad_norm": 1.0594899654388428, |
| "learning_rate": 0.0005, |
| "loss": 4.31781005859375, |
| "step": 845 |
| }, |
| { |
| "epoch": 0.5142857142857142, |
| "grad_norm": 0.844513475894928, |
| "learning_rate": 0.0005, |
| "loss": 4.4846577644348145, |
| "step": 846 |
| }, |
| { |
| "epoch": 0.5148936170212766, |
| "grad_norm": 2.6839306354522705, |
| "learning_rate": 0.0005, |
| "loss": 4.262670993804932, |
| "step": 847 |
| }, |
| { |
| "epoch": 0.5155015197568389, |
| "grad_norm": 1.0088754892349243, |
| "learning_rate": 0.0005, |
| "loss": 4.266050338745117, |
| "step": 848 |
| }, |
| { |
| "epoch": 0.5161094224924012, |
| "grad_norm": 1.0849522352218628, |
| "learning_rate": 0.0005, |
| "loss": 4.108889102935791, |
| "step": 849 |
| }, |
| { |
| "epoch": 0.5167173252279635, |
| "grad_norm": 1.0903068780899048, |
| "learning_rate": 0.0005, |
| "loss": 4.313821315765381, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.5173252279635259, |
| "grad_norm": 1.1618335247039795, |
| "learning_rate": 0.0005, |
| "loss": 4.295135498046875, |
| "step": 851 |
| }, |
| { |
| "epoch": 0.5179331306990882, |
| "grad_norm": 0.9828124046325684, |
| "learning_rate": 0.0005, |
| "loss": 4.440587043762207, |
| "step": 852 |
| }, |
| { |
| "epoch": 0.5185410334346504, |
| "grad_norm": 1.131939172744751, |
| "learning_rate": 0.0005, |
| "loss": 4.306354522705078, |
| "step": 853 |
| }, |
| { |
| "epoch": 0.5191489361702127, |
| "grad_norm": 1.3951880931854248, |
| "learning_rate": 0.0005, |
| "loss": 4.395257949829102, |
| "step": 854 |
| }, |
| { |
| "epoch": 0.5197568389057751, |
| "grad_norm": 1.28059983253479, |
| "learning_rate": 0.0005, |
| "loss": 4.033473968505859, |
| "step": 855 |
| }, |
| { |
| "epoch": 0.5203647416413374, |
| "grad_norm": 0.9717862606048584, |
| "learning_rate": 0.0005, |
| "loss": 4.356355667114258, |
| "step": 856 |
| }, |
| { |
| "epoch": 0.5209726443768997, |
| "grad_norm": 1.043353796005249, |
| "learning_rate": 0.0005, |
| "loss": 4.250835418701172, |
| "step": 857 |
| }, |
| { |
| "epoch": 0.521580547112462, |
| "grad_norm": 1.016579508781433, |
| "learning_rate": 0.0005, |
| "loss": 4.286150932312012, |
| "step": 858 |
| }, |
| { |
| "epoch": 0.5221884498480243, |
| "grad_norm": 1.112782597541809, |
| "learning_rate": 0.0005, |
| "loss": 4.598012924194336, |
| "step": 859 |
| }, |
| { |
| "epoch": 0.5227963525835866, |
| "grad_norm": 1.1940479278564453, |
| "learning_rate": 0.0005, |
| "loss": 4.4383955001831055, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.5234042553191489, |
| "grad_norm": 1.254970669746399, |
| "learning_rate": 0.0005, |
| "loss": 4.322863578796387, |
| "step": 861 |
| }, |
| { |
| "epoch": 0.5240121580547112, |
| "grad_norm": 1.0700422525405884, |
| "learning_rate": 0.0005, |
| "loss": 4.244253158569336, |
| "step": 862 |
| }, |
| { |
| "epoch": 0.5246200607902736, |
| "grad_norm": 1.0553544759750366, |
| "learning_rate": 0.0005, |
| "loss": 4.310792446136475, |
| "step": 863 |
| }, |
| { |
| "epoch": 0.5252279635258359, |
| "grad_norm": 1.0288846492767334, |
| "learning_rate": 0.0005, |
| "loss": 4.3274383544921875, |
| "step": 864 |
| }, |
| { |
| "epoch": 0.5258358662613982, |
| "grad_norm": 1.0445955991744995, |
| "learning_rate": 0.0005, |
| "loss": 4.45347261428833, |
| "step": 865 |
| }, |
| { |
| "epoch": 0.5264437689969604, |
| "grad_norm": 1.1357736587524414, |
| "learning_rate": 0.0005, |
| "loss": 4.4809064865112305, |
| "step": 866 |
| }, |
| { |
| "epoch": 0.5270516717325228, |
| "grad_norm": 1.109326720237732, |
| "learning_rate": 0.0005, |
| "loss": 4.253253936767578, |
| "step": 867 |
| }, |
| { |
| "epoch": 0.5276595744680851, |
| "grad_norm": 1.1890736818313599, |
| "learning_rate": 0.0005, |
| "loss": 4.426365852355957, |
| "step": 868 |
| }, |
| { |
| "epoch": 0.5282674772036474, |
| "grad_norm": 1.0840505361557007, |
| "learning_rate": 0.0005, |
| "loss": 4.321274280548096, |
| "step": 869 |
| }, |
| { |
| "epoch": 0.5288753799392097, |
| "grad_norm": 1.2200610637664795, |
| "learning_rate": 0.0005, |
| "loss": 4.557803153991699, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.5294832826747721, |
| "grad_norm": 0.9972710609436035, |
| "learning_rate": 0.0005, |
| "loss": 4.23234748840332, |
| "step": 871 |
| }, |
| { |
| "epoch": 0.5300911854103344, |
| "grad_norm": 1.0316972732543945, |
| "learning_rate": 0.0005, |
| "loss": 4.139028549194336, |
| "step": 872 |
| }, |
| { |
| "epoch": 0.5306990881458966, |
| "grad_norm": 1.0380617380142212, |
| "learning_rate": 0.0005, |
| "loss": 4.348488807678223, |
| "step": 873 |
| }, |
| { |
| "epoch": 0.5313069908814589, |
| "grad_norm": 0.9867698550224304, |
| "learning_rate": 0.0005, |
| "loss": 4.302568435668945, |
| "step": 874 |
| }, |
| { |
| "epoch": 0.5319148936170213, |
| "grad_norm": 1.0779541730880737, |
| "learning_rate": 0.0005, |
| "loss": 4.425013542175293, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.5325227963525836, |
| "grad_norm": 1.2543246746063232, |
| "learning_rate": 0.0005, |
| "loss": 4.724435806274414, |
| "step": 876 |
| }, |
| { |
| "epoch": 0.5331306990881459, |
| "grad_norm": 1.2280689477920532, |
| "learning_rate": 0.0005, |
| "loss": 4.2406415939331055, |
| "step": 877 |
| }, |
| { |
| "epoch": 0.5337386018237082, |
| "grad_norm": 1.3842073678970337, |
| "learning_rate": 0.0005, |
| "loss": 4.396044731140137, |
| "step": 878 |
| }, |
| { |
| "epoch": 0.5343465045592705, |
| "grad_norm": 1.0350067615509033, |
| "learning_rate": 0.0005, |
| "loss": 4.17176628112793, |
| "step": 879 |
| }, |
| { |
| "epoch": 0.5349544072948328, |
| "grad_norm": 0.9484389424324036, |
| "learning_rate": 0.0005, |
| "loss": 4.430863380432129, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.5355623100303951, |
| "grad_norm": 1.1557071208953857, |
| "learning_rate": 0.0005, |
| "loss": 4.12956428527832, |
| "step": 881 |
| }, |
| { |
| "epoch": 0.5361702127659574, |
| "grad_norm": 0.9079960584640503, |
| "learning_rate": 0.0005, |
| "loss": 4.4100542068481445, |
| "step": 882 |
| }, |
| { |
| "epoch": 0.5367781155015198, |
| "grad_norm": 0.9755933880805969, |
| "learning_rate": 0.0005, |
| "loss": 4.136897563934326, |
| "step": 883 |
| }, |
| { |
| "epoch": 0.5373860182370821, |
| "grad_norm": 1.0319873094558716, |
| "learning_rate": 0.0005, |
| "loss": 4.440415859222412, |
| "step": 884 |
| }, |
| { |
| "epoch": 0.5379939209726444, |
| "grad_norm": 0.8542789220809937, |
| "learning_rate": 0.0005, |
| "loss": 4.413039207458496, |
| "step": 885 |
| }, |
| { |
| "epoch": 0.5386018237082066, |
| "grad_norm": 1.0158871412277222, |
| "learning_rate": 0.0005, |
| "loss": 4.379025459289551, |
| "step": 886 |
| }, |
| { |
| "epoch": 0.539209726443769, |
| "grad_norm": 0.8926265835762024, |
| "learning_rate": 0.0005, |
| "loss": 4.344198226928711, |
| "step": 887 |
| }, |
| { |
| "epoch": 0.5398176291793313, |
| "grad_norm": 0.8857081532478333, |
| "learning_rate": 0.0005, |
| "loss": 4.38722562789917, |
| "step": 888 |
| }, |
| { |
| "epoch": 0.5404255319148936, |
| "grad_norm": 0.9595281481742859, |
| "learning_rate": 0.0005, |
| "loss": 4.3452959060668945, |
| "step": 889 |
| }, |
| { |
| "epoch": 0.541033434650456, |
| "grad_norm": 0.9428173303604126, |
| "learning_rate": 0.0005, |
| "loss": 4.258479118347168, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.5416413373860183, |
| "grad_norm": 1.5479097366333008, |
| "learning_rate": 0.0005, |
| "loss": 4.245420455932617, |
| "step": 891 |
| }, |
| { |
| "epoch": 0.5422492401215806, |
| "grad_norm": 1.1619681119918823, |
| "learning_rate": 0.0005, |
| "loss": 4.385200500488281, |
| "step": 892 |
| }, |
| { |
| "epoch": 0.5428571428571428, |
| "grad_norm": 0.9958190321922302, |
| "learning_rate": 0.0005, |
| "loss": 4.102227687835693, |
| "step": 893 |
| }, |
| { |
| "epoch": 0.5434650455927051, |
| "grad_norm": 1.0156055688858032, |
| "learning_rate": 0.0005, |
| "loss": 4.067695617675781, |
| "step": 894 |
| }, |
| { |
| "epoch": 0.5440729483282675, |
| "grad_norm": 1.1579831838607788, |
| "learning_rate": 0.0005, |
| "loss": 4.48448371887207, |
| "step": 895 |
| }, |
| { |
| "epoch": 0.5446808510638298, |
| "grad_norm": 1.23504638671875, |
| "learning_rate": 0.0005, |
| "loss": 4.5317583084106445, |
| "step": 896 |
| }, |
| { |
| "epoch": 0.5452887537993921, |
| "grad_norm": 1.167401909828186, |
| "learning_rate": 0.0005, |
| "loss": 4.3551435470581055, |
| "step": 897 |
| }, |
| { |
| "epoch": 0.5458966565349544, |
| "grad_norm": 1.4126181602478027, |
| "learning_rate": 0.0005, |
| "loss": 4.373387813568115, |
| "step": 898 |
| }, |
| { |
| "epoch": 0.5465045592705167, |
| "grad_norm": 1.152944564819336, |
| "learning_rate": 0.0005, |
| "loss": 3.9645819664001465, |
| "step": 899 |
| }, |
| { |
| "epoch": 0.547112462006079, |
| "grad_norm": 1.5390210151672363, |
| "learning_rate": 0.0005, |
| "loss": 4.454073429107666, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.5477203647416413, |
| "grad_norm": 1.0349818468093872, |
| "learning_rate": 0.0005, |
| "loss": 4.398721694946289, |
| "step": 901 |
| }, |
| { |
| "epoch": 0.5483282674772036, |
| "grad_norm": 1.0963656902313232, |
| "learning_rate": 0.0005, |
| "loss": 4.0993242263793945, |
| "step": 902 |
| }, |
| { |
| "epoch": 0.548936170212766, |
| "grad_norm": 1.1737645864486694, |
| "learning_rate": 0.0005, |
| "loss": 4.228819370269775, |
| "step": 903 |
| }, |
| { |
| "epoch": 0.5495440729483283, |
| "grad_norm": 1.1499532461166382, |
| "learning_rate": 0.0005, |
| "loss": 4.329497337341309, |
| "step": 904 |
| }, |
| { |
| "epoch": 0.5501519756838906, |
| "grad_norm": 1.1188825368881226, |
| "learning_rate": 0.0005, |
| "loss": 4.631438255310059, |
| "step": 905 |
| }, |
| { |
| "epoch": 0.5507598784194528, |
| "grad_norm": 1.0337425470352173, |
| "learning_rate": 0.0005, |
| "loss": 4.373821258544922, |
| "step": 906 |
| }, |
| { |
| "epoch": 0.5513677811550152, |
| "grad_norm": 1.098497986793518, |
| "learning_rate": 0.0005, |
| "loss": 4.344779014587402, |
| "step": 907 |
| }, |
| { |
| "epoch": 0.5519756838905775, |
| "grad_norm": 1.0316400527954102, |
| "learning_rate": 0.0005, |
| "loss": 4.097405910491943, |
| "step": 908 |
| }, |
| { |
| "epoch": 0.5525835866261398, |
| "grad_norm": 1.0182708501815796, |
| "learning_rate": 0.0005, |
| "loss": 4.353028297424316, |
| "step": 909 |
| }, |
| { |
| "epoch": 0.5531914893617021, |
| "grad_norm": 1.2190346717834473, |
| "learning_rate": 0.0005, |
| "loss": 4.165225028991699, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.5537993920972645, |
| "grad_norm": 1.017309546470642, |
| "learning_rate": 0.0005, |
| "loss": 4.318220138549805, |
| "step": 911 |
| }, |
| { |
| "epoch": 0.5544072948328268, |
| "grad_norm": 1.1314797401428223, |
| "learning_rate": 0.0005, |
| "loss": 4.121149063110352, |
| "step": 912 |
| }, |
| { |
| "epoch": 0.555015197568389, |
| "grad_norm": 1.0844316482543945, |
| "learning_rate": 0.0005, |
| "loss": 4.213968276977539, |
| "step": 913 |
| }, |
| { |
| "epoch": 0.5556231003039513, |
| "grad_norm": 0.9382945895195007, |
| "learning_rate": 0.0005, |
| "loss": 4.214629650115967, |
| "step": 914 |
| }, |
| { |
| "epoch": 0.5562310030395137, |
| "grad_norm": 1.245742678642273, |
| "learning_rate": 0.0005, |
| "loss": 4.122774124145508, |
| "step": 915 |
| }, |
| { |
| "epoch": 0.556838905775076, |
| "grad_norm": 1.095625877380371, |
| "learning_rate": 0.0005, |
| "loss": 4.296173095703125, |
| "step": 916 |
| }, |
| { |
| "epoch": 0.5574468085106383, |
| "grad_norm": 1.0720239877700806, |
| "learning_rate": 0.0005, |
| "loss": 4.165585994720459, |
| "step": 917 |
| }, |
| { |
| "epoch": 0.5580547112462007, |
| "grad_norm": 1.1082829236984253, |
| "learning_rate": 0.0005, |
| "loss": 4.3951921463012695, |
| "step": 918 |
| }, |
| { |
| "epoch": 0.5586626139817629, |
| "grad_norm": 1.1302635669708252, |
| "learning_rate": 0.0005, |
| "loss": 4.336912155151367, |
| "step": 919 |
| }, |
| { |
| "epoch": 0.5592705167173252, |
| "grad_norm": 0.9658374786376953, |
| "learning_rate": 0.0005, |
| "loss": 4.145001411437988, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.5598784194528875, |
| "grad_norm": 1.2869893312454224, |
| "learning_rate": 0.0005, |
| "loss": 4.438281536102295, |
| "step": 921 |
| }, |
| { |
| "epoch": 0.5604863221884498, |
| "grad_norm": 0.9351769089698792, |
| "learning_rate": 0.0005, |
| "loss": 4.342588424682617, |
| "step": 922 |
| }, |
| { |
| "epoch": 0.5610942249240122, |
| "grad_norm": 1.075165867805481, |
| "learning_rate": 0.0005, |
| "loss": 4.3024187088012695, |
| "step": 923 |
| }, |
| { |
| "epoch": 0.5617021276595745, |
| "grad_norm": 1.0462286472320557, |
| "learning_rate": 0.0005, |
| "loss": 4.308043479919434, |
| "step": 924 |
| }, |
| { |
| "epoch": 0.5623100303951368, |
| "grad_norm": 1.1331902742385864, |
| "learning_rate": 0.0005, |
| "loss": 4.122361183166504, |
| "step": 925 |
| }, |
| { |
| "epoch": 0.562917933130699, |
| "grad_norm": 1.0483379364013672, |
| "learning_rate": 0.0005, |
| "loss": 4.140399932861328, |
| "step": 926 |
| }, |
| { |
| "epoch": 0.5635258358662614, |
| "grad_norm": 1.0775599479675293, |
| "learning_rate": 0.0005, |
| "loss": 4.258686065673828, |
| "step": 927 |
| }, |
| { |
| "epoch": 0.5641337386018237, |
| "grad_norm": 1.1621100902557373, |
| "learning_rate": 0.0005, |
| "loss": 4.177057266235352, |
| "step": 928 |
| }, |
| { |
| "epoch": 0.564741641337386, |
| "grad_norm": 1.144015908241272, |
| "learning_rate": 0.0005, |
| "loss": 3.854235887527466, |
| "step": 929 |
| }, |
| { |
| "epoch": 0.5653495440729484, |
| "grad_norm": 1.0188685655593872, |
| "learning_rate": 0.0005, |
| "loss": 4.226658821105957, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.5659574468085107, |
| "grad_norm": 1.214069128036499, |
| "learning_rate": 0.0005, |
| "loss": 4.558093070983887, |
| "step": 931 |
| }, |
| { |
| "epoch": 0.5665653495440729, |
| "grad_norm": 1.0221775770187378, |
| "learning_rate": 0.0005, |
| "loss": 4.362860202789307, |
| "step": 932 |
| }, |
| { |
| "epoch": 0.5671732522796352, |
| "grad_norm": 1.1003692150115967, |
| "learning_rate": 0.0005, |
| "loss": 4.412820339202881, |
| "step": 933 |
| }, |
| { |
| "epoch": 0.5677811550151975, |
| "grad_norm": 1.0189692974090576, |
| "learning_rate": 0.0005, |
| "loss": 4.141862392425537, |
| "step": 934 |
| }, |
| { |
| "epoch": 0.5683890577507599, |
| "grad_norm": 1.1275514364242554, |
| "learning_rate": 0.0005, |
| "loss": 4.0759077072143555, |
| "step": 935 |
| }, |
| { |
| "epoch": 0.5689969604863222, |
| "grad_norm": 1.0595769882202148, |
| "learning_rate": 0.0005, |
| "loss": 4.234007835388184, |
| "step": 936 |
| }, |
| { |
| "epoch": 0.5696048632218845, |
| "grad_norm": 1.0620779991149902, |
| "learning_rate": 0.0005, |
| "loss": 4.242690086364746, |
| "step": 937 |
| }, |
| { |
| "epoch": 0.5702127659574469, |
| "grad_norm": 1.0344425439834595, |
| "learning_rate": 0.0005, |
| "loss": 4.393516540527344, |
| "step": 938 |
| }, |
| { |
| "epoch": 0.5708206686930091, |
| "grad_norm": 1.1058911085128784, |
| "learning_rate": 0.0005, |
| "loss": 4.163288116455078, |
| "step": 939 |
| }, |
| { |
| "epoch": 0.5714285714285714, |
| "grad_norm": 1.38120698928833, |
| "learning_rate": 0.0005, |
| "loss": 4.052524566650391, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.5720364741641337, |
| "grad_norm": 1.0876317024230957, |
| "learning_rate": 0.0005, |
| "loss": 4.03524112701416, |
| "step": 941 |
| }, |
| { |
| "epoch": 0.572644376899696, |
| "grad_norm": 1.0367745161056519, |
| "learning_rate": 0.0005, |
| "loss": 4.183863639831543, |
| "step": 942 |
| }, |
| { |
| "epoch": 0.5732522796352584, |
| "grad_norm": 1.008543848991394, |
| "learning_rate": 0.0005, |
| "loss": 4.219581127166748, |
| "step": 943 |
| }, |
| { |
| "epoch": 0.5738601823708207, |
| "grad_norm": 1.0349946022033691, |
| "learning_rate": 0.0005, |
| "loss": 4.3019561767578125, |
| "step": 944 |
| }, |
| { |
| "epoch": 0.574468085106383, |
| "grad_norm": 1.5670639276504517, |
| "learning_rate": 0.0005, |
| "loss": 4.330730438232422, |
| "step": 945 |
| }, |
| { |
| "epoch": 0.5750759878419452, |
| "grad_norm": 1.0402114391326904, |
| "learning_rate": 0.0005, |
| "loss": 4.131731033325195, |
| "step": 946 |
| }, |
| { |
| "epoch": 0.5756838905775076, |
| "grad_norm": 1.092549204826355, |
| "learning_rate": 0.0005, |
| "loss": 4.311880111694336, |
| "step": 947 |
| }, |
| { |
| "epoch": 0.5762917933130699, |
| "grad_norm": 0.968338668346405, |
| "learning_rate": 0.0005, |
| "loss": 4.174734115600586, |
| "step": 948 |
| }, |
| { |
| "epoch": 0.5768996960486322, |
| "grad_norm": 1.0552120208740234, |
| "learning_rate": 0.0005, |
| "loss": 3.9222404956817627, |
| "step": 949 |
| }, |
| { |
| "epoch": 0.5775075987841946, |
| "grad_norm": 1.0390139818191528, |
| "learning_rate": 0.0005, |
| "loss": 4.248430252075195, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.5781155015197569, |
| "grad_norm": 1.1151931285858154, |
| "learning_rate": 0.0005, |
| "loss": 4.177859306335449, |
| "step": 951 |
| }, |
| { |
| "epoch": 0.5787234042553191, |
| "grad_norm": 1.0557676553726196, |
| "learning_rate": 0.0005, |
| "loss": 4.173686981201172, |
| "step": 952 |
| }, |
| { |
| "epoch": 0.5793313069908814, |
| "grad_norm": 1.133589506149292, |
| "learning_rate": 0.0005, |
| "loss": 4.227142333984375, |
| "step": 953 |
| }, |
| { |
| "epoch": 0.5799392097264437, |
| "grad_norm": 1.12785804271698, |
| "learning_rate": 0.0005, |
| "loss": 4.077308654785156, |
| "step": 954 |
| }, |
| { |
| "epoch": 0.5805471124620061, |
| "grad_norm": 1.0380632877349854, |
| "learning_rate": 0.0005, |
| "loss": 4.485074996948242, |
| "step": 955 |
| }, |
| { |
| "epoch": 0.5811550151975684, |
| "grad_norm": 1.0573036670684814, |
| "learning_rate": 0.0005, |
| "loss": 4.045351028442383, |
| "step": 956 |
| }, |
| { |
| "epoch": 0.5817629179331307, |
| "grad_norm": 1.0433647632598877, |
| "learning_rate": 0.0005, |
| "loss": 3.9277734756469727, |
| "step": 957 |
| }, |
| { |
| "epoch": 0.5823708206686931, |
| "grad_norm": 1.077911376953125, |
| "learning_rate": 0.0005, |
| "loss": 4.329649448394775, |
| "step": 958 |
| }, |
| { |
| "epoch": 0.5829787234042553, |
| "grad_norm": 0.9521039128303528, |
| "learning_rate": 0.0005, |
| "loss": 4.175987720489502, |
| "step": 959 |
| }, |
| { |
| "epoch": 0.5835866261398176, |
| "grad_norm": 1.0778512954711914, |
| "learning_rate": 0.0005, |
| "loss": 4.32703971862793, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.5841945288753799, |
| "grad_norm": 1.048074722290039, |
| "learning_rate": 0.0005, |
| "loss": 4.146064758300781, |
| "step": 961 |
| }, |
| { |
| "epoch": 0.5848024316109423, |
| "grad_norm": 1.0995032787322998, |
| "learning_rate": 0.0005, |
| "loss": 4.317961692810059, |
| "step": 962 |
| }, |
| { |
| "epoch": 0.5854103343465046, |
| "grad_norm": 1.1812586784362793, |
| "learning_rate": 0.0005, |
| "loss": 4.266629219055176, |
| "step": 963 |
| }, |
| { |
| "epoch": 0.5860182370820669, |
| "grad_norm": 1.2058099508285522, |
| "learning_rate": 0.0005, |
| "loss": 4.350966930389404, |
| "step": 964 |
| }, |
| { |
| "epoch": 0.5866261398176292, |
| "grad_norm": 1.1499630212783813, |
| "learning_rate": 0.0005, |
| "loss": 4.47420072555542, |
| "step": 965 |
| }, |
| { |
| "epoch": 0.5872340425531914, |
| "grad_norm": 1.212178111076355, |
| "learning_rate": 0.0005, |
| "loss": 4.201877593994141, |
| "step": 966 |
| }, |
| { |
| "epoch": 0.5878419452887538, |
| "grad_norm": 1.0750401020050049, |
| "learning_rate": 0.0005, |
| "loss": 4.032867431640625, |
| "step": 967 |
| }, |
| { |
| "epoch": 0.5884498480243161, |
| "grad_norm": 1.0766054391860962, |
| "learning_rate": 0.0005, |
| "loss": 4.070111274719238, |
| "step": 968 |
| }, |
| { |
| "epoch": 0.5890577507598784, |
| "grad_norm": 1.0466876029968262, |
| "learning_rate": 0.0005, |
| "loss": 4.164140224456787, |
| "step": 969 |
| }, |
| { |
| "epoch": 0.5896656534954408, |
| "grad_norm": 0.9755964875221252, |
| "learning_rate": 0.0005, |
| "loss": 4.061018943786621, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.5902735562310031, |
| "grad_norm": 1.0774449110031128, |
| "learning_rate": 0.0005, |
| "loss": 4.103540420532227, |
| "step": 971 |
| }, |
| { |
| "epoch": 0.5908814589665653, |
| "grad_norm": 1.016599178314209, |
| "learning_rate": 0.0005, |
| "loss": 4.367238998413086, |
| "step": 972 |
| }, |
| { |
| "epoch": 0.5914893617021276, |
| "grad_norm": 1.273015022277832, |
| "learning_rate": 0.0005, |
| "loss": 4.130205154418945, |
| "step": 973 |
| }, |
| { |
| "epoch": 0.59209726443769, |
| "grad_norm": 1.121202826499939, |
| "learning_rate": 0.0005, |
| "loss": 4.058278560638428, |
| "step": 974 |
| }, |
| { |
| "epoch": 0.5927051671732523, |
| "grad_norm": 1.782248854637146, |
| "learning_rate": 0.0005, |
| "loss": 4.2193732261657715, |
| "step": 975 |
| }, |
| { |
| "epoch": 0.5933130699088146, |
| "grad_norm": 1.2525842189788818, |
| "learning_rate": 0.0005, |
| "loss": 4.324434757232666, |
| "step": 976 |
| }, |
| { |
| "epoch": 0.5939209726443769, |
| "grad_norm": 0.9859209656715393, |
| "learning_rate": 0.0005, |
| "loss": 4.235608100891113, |
| "step": 977 |
| }, |
| { |
| "epoch": 0.5945288753799393, |
| "grad_norm": 1.0421037673950195, |
| "learning_rate": 0.0005, |
| "loss": 4.312819480895996, |
| "step": 978 |
| }, |
| { |
| "epoch": 0.5951367781155015, |
| "grad_norm": 1.2486640214920044, |
| "learning_rate": 0.0005, |
| "loss": 4.172330856323242, |
| "step": 979 |
| }, |
| { |
| "epoch": 0.5957446808510638, |
| "grad_norm": 1.049641489982605, |
| "learning_rate": 0.0005, |
| "loss": 4.053893089294434, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.5963525835866261, |
| "grad_norm": 1.0123006105422974, |
| "learning_rate": 0.0005, |
| "loss": 4.453596115112305, |
| "step": 981 |
| }, |
| { |
| "epoch": 0.5969604863221885, |
| "grad_norm": 0.9871963858604431, |
| "learning_rate": 0.0005, |
| "loss": 4.007091999053955, |
| "step": 982 |
| }, |
| { |
| "epoch": 0.5975683890577508, |
| "grad_norm": 0.9984953999519348, |
| "learning_rate": 0.0005, |
| "loss": 4.22979736328125, |
| "step": 983 |
| }, |
| { |
| "epoch": 0.5981762917933131, |
| "grad_norm": 1.281544804573059, |
| "learning_rate": 0.0005, |
| "loss": 4.074709892272949, |
| "step": 984 |
| }, |
| { |
| "epoch": 0.5987841945288754, |
| "grad_norm": 1.1482913494110107, |
| "learning_rate": 0.0005, |
| "loss": 4.320782661437988, |
| "step": 985 |
| }, |
| { |
| "epoch": 0.5993920972644377, |
| "grad_norm": 1.2105413675308228, |
| "learning_rate": 0.0005, |
| "loss": 4.15565299987793, |
| "step": 986 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 1.0716112852096558, |
| "learning_rate": 0.0005, |
| "loss": 4.429147720336914, |
| "step": 987 |
| }, |
| { |
| "epoch": 0.6006079027355623, |
| "grad_norm": 1.1487056016921997, |
| "learning_rate": 0.0005, |
| "loss": 4.206772804260254, |
| "step": 988 |
| }, |
| { |
| "epoch": 0.6012158054711246, |
| "grad_norm": 0.9919009208679199, |
| "learning_rate": 0.0005, |
| "loss": 4.105408191680908, |
| "step": 989 |
| }, |
| { |
| "epoch": 0.601823708206687, |
| "grad_norm": 1.1244338750839233, |
| "learning_rate": 0.0005, |
| "loss": 4.034040451049805, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.6024316109422493, |
| "grad_norm": 0.9693543910980225, |
| "learning_rate": 0.0005, |
| "loss": 3.8358006477355957, |
| "step": 991 |
| }, |
| { |
| "epoch": 0.6030395136778115, |
| "grad_norm": 1.147226333618164, |
| "learning_rate": 0.0005, |
| "loss": 4.114927291870117, |
| "step": 992 |
| }, |
| { |
| "epoch": 0.6036474164133738, |
| "grad_norm": 1.1658263206481934, |
| "learning_rate": 0.0005, |
| "loss": 4.3732099533081055, |
| "step": 993 |
| }, |
| { |
| "epoch": 0.6042553191489362, |
| "grad_norm": 1.1261506080627441, |
| "learning_rate": 0.0005, |
| "loss": 4.212404251098633, |
| "step": 994 |
| }, |
| { |
| "epoch": 0.6048632218844985, |
| "grad_norm": 1.214408040046692, |
| "learning_rate": 0.0005, |
| "loss": 4.113962173461914, |
| "step": 995 |
| }, |
| { |
| "epoch": 0.6054711246200608, |
| "grad_norm": 1.1703499555587769, |
| "learning_rate": 0.0005, |
| "loss": 3.9795780181884766, |
| "step": 996 |
| }, |
| { |
| "epoch": 0.6060790273556231, |
| "grad_norm": 1.2819421291351318, |
| "learning_rate": 0.0005, |
| "loss": 3.820543050765991, |
| "step": 997 |
| }, |
| { |
| "epoch": 0.6066869300911855, |
| "grad_norm": 1.1751822233200073, |
| "learning_rate": 0.0005, |
| "loss": 4.115008354187012, |
| "step": 998 |
| }, |
| { |
| "epoch": 0.6072948328267477, |
| "grad_norm": 1.133631944656372, |
| "learning_rate": 0.0005, |
| "loss": 4.320215225219727, |
| "step": 999 |
| }, |
| { |
| "epoch": 0.60790273556231, |
| "grad_norm": 1.2056914567947388, |
| "learning_rate": 0.0005, |
| "loss": 4.139728546142578, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.6085106382978723, |
| "grad_norm": 1.1610949039459229, |
| "learning_rate": 0.0005, |
| "loss": 4.215843200683594, |
| "step": 1001 |
| }, |
| { |
| "epoch": 0.6091185410334347, |
| "grad_norm": 1.2171114683151245, |
| "learning_rate": 0.0005, |
| "loss": 4.104484558105469, |
| "step": 1002 |
| }, |
| { |
| "epoch": 0.609726443768997, |
| "grad_norm": 1.0760419368743896, |
| "learning_rate": 0.0005, |
| "loss": 4.106335639953613, |
| "step": 1003 |
| }, |
| { |
| "epoch": 0.6103343465045593, |
| "grad_norm": 1.0737935304641724, |
| "learning_rate": 0.0005, |
| "loss": 4.18284797668457, |
| "step": 1004 |
| }, |
| { |
| "epoch": 0.6109422492401215, |
| "grad_norm": 1.0054482221603394, |
| "learning_rate": 0.0005, |
| "loss": 4.185699462890625, |
| "step": 1005 |
| }, |
| { |
| "epoch": 0.6115501519756839, |
| "grad_norm": 1.0817815065383911, |
| "learning_rate": 0.0005, |
| "loss": 3.9077231884002686, |
| "step": 1006 |
| }, |
| { |
| "epoch": 0.6121580547112462, |
| "grad_norm": 1.1520154476165771, |
| "learning_rate": 0.0005, |
| "loss": 4.094099044799805, |
| "step": 1007 |
| }, |
| { |
| "epoch": 0.6127659574468085, |
| "grad_norm": 0.9844207763671875, |
| "learning_rate": 0.0005, |
| "loss": 4.341885566711426, |
| "step": 1008 |
| }, |
| { |
| "epoch": 0.6133738601823708, |
| "grad_norm": 1.2627776861190796, |
| "learning_rate": 0.0005, |
| "loss": 4.28475284576416, |
| "step": 1009 |
| }, |
| { |
| "epoch": 0.6139817629179332, |
| "grad_norm": 0.9542902112007141, |
| "learning_rate": 0.0005, |
| "loss": 4.2372026443481445, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.6145896656534955, |
| "grad_norm": 3.645486831665039, |
| "learning_rate": 0.0005, |
| "loss": 4.06125545501709, |
| "step": 1011 |
| }, |
| { |
| "epoch": 0.6151975683890577, |
| "grad_norm": 1.4817546606063843, |
| "learning_rate": 0.0005, |
| "loss": 3.9809517860412598, |
| "step": 1012 |
| }, |
| { |
| "epoch": 0.61580547112462, |
| "grad_norm": 1.1932374238967896, |
| "learning_rate": 0.0005, |
| "loss": 4.242306232452393, |
| "step": 1013 |
| }, |
| { |
| "epoch": 0.6164133738601824, |
| "grad_norm": 0.9499757289886475, |
| "learning_rate": 0.0005, |
| "loss": 3.9819726943969727, |
| "step": 1014 |
| }, |
| { |
| "epoch": 0.6170212765957447, |
| "grad_norm": 1.1981247663497925, |
| "learning_rate": 0.0005, |
| "loss": 4.266401290893555, |
| "step": 1015 |
| }, |
| { |
| "epoch": 0.617629179331307, |
| "grad_norm": 1.2060346603393555, |
| "learning_rate": 0.0005, |
| "loss": 4.270205497741699, |
| "step": 1016 |
| }, |
| { |
| "epoch": 0.6182370820668693, |
| "grad_norm": 1.002508282661438, |
| "learning_rate": 0.0005, |
| "loss": 4.509585380554199, |
| "step": 1017 |
| }, |
| { |
| "epoch": 0.6188449848024317, |
| "grad_norm": 1.0094107389450073, |
| "learning_rate": 0.0005, |
| "loss": 4.162940979003906, |
| "step": 1018 |
| }, |
| { |
| "epoch": 0.6194528875379939, |
| "grad_norm": 1.180220365524292, |
| "learning_rate": 0.0005, |
| "loss": 4.317109107971191, |
| "step": 1019 |
| }, |
| { |
| "epoch": 0.6200607902735562, |
| "grad_norm": 0.980454683303833, |
| "learning_rate": 0.0005, |
| "loss": 4.042284965515137, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.6206686930091185, |
| "grad_norm": 1.0461052656173706, |
| "learning_rate": 0.0005, |
| "loss": 4.0409698486328125, |
| "step": 1021 |
| }, |
| { |
| "epoch": 0.6212765957446809, |
| "grad_norm": 1.0268027782440186, |
| "learning_rate": 0.0005, |
| "loss": 4.10588264465332, |
| "step": 1022 |
| }, |
| { |
| "epoch": 0.6218844984802432, |
| "grad_norm": 0.9659956693649292, |
| "learning_rate": 0.0005, |
| "loss": 4.511264801025391, |
| "step": 1023 |
| }, |
| { |
| "epoch": 0.6224924012158055, |
| "grad_norm": 1.0161465406417847, |
| "learning_rate": 0.0005, |
| "loss": 4.369597911834717, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.6231003039513677, |
| "grad_norm": 1.145430326461792, |
| "learning_rate": 0.0005, |
| "loss": 4.104186058044434, |
| "step": 1025 |
| }, |
| { |
| "epoch": 0.6237082066869301, |
| "grad_norm": 0.968573808670044, |
| "learning_rate": 0.0005, |
| "loss": 4.03414249420166, |
| "step": 1026 |
| }, |
| { |
| "epoch": 0.6243161094224924, |
| "grad_norm": 1.2972266674041748, |
| "learning_rate": 0.0005, |
| "loss": 4.18367862701416, |
| "step": 1027 |
| }, |
| { |
| "epoch": 0.6249240121580547, |
| "grad_norm": 0.9075741171836853, |
| "learning_rate": 0.0005, |
| "loss": 4.101839065551758, |
| "step": 1028 |
| }, |
| { |
| "epoch": 0.625531914893617, |
| "grad_norm": 1.2480190992355347, |
| "learning_rate": 0.0005, |
| "loss": 4.170825004577637, |
| "step": 1029 |
| }, |
| { |
| "epoch": 0.6261398176291794, |
| "grad_norm": 1.1662267446517944, |
| "learning_rate": 0.0005, |
| "loss": 4.132046222686768, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.6267477203647417, |
| "grad_norm": 0.9081514477729797, |
| "learning_rate": 0.0005, |
| "loss": 4.023431777954102, |
| "step": 1031 |
| }, |
| { |
| "epoch": 0.6273556231003039, |
| "grad_norm": 1.1570264101028442, |
| "learning_rate": 0.0005, |
| "loss": 4.246901512145996, |
| "step": 1032 |
| }, |
| { |
| "epoch": 0.6279635258358662, |
| "grad_norm": 1.0261447429656982, |
| "learning_rate": 0.0005, |
| "loss": 4.251025199890137, |
| "step": 1033 |
| }, |
| { |
| "epoch": 0.6285714285714286, |
| "grad_norm": 0.9957416653633118, |
| "learning_rate": 0.0005, |
| "loss": 4.112504482269287, |
| "step": 1034 |
| }, |
| { |
| "epoch": 0.6291793313069909, |
| "grad_norm": 1.2634888887405396, |
| "learning_rate": 0.0005, |
| "loss": 4.397002220153809, |
| "step": 1035 |
| }, |
| { |
| "epoch": 0.6297872340425532, |
| "grad_norm": 1.0848995447158813, |
| "learning_rate": 0.0005, |
| "loss": 4.163301467895508, |
| "step": 1036 |
| }, |
| { |
| "epoch": 0.6303951367781155, |
| "grad_norm": 1.0806390047073364, |
| "learning_rate": 0.0005, |
| "loss": 3.78402042388916, |
| "step": 1037 |
| }, |
| { |
| "epoch": 0.6310030395136779, |
| "grad_norm": 1.0640003681182861, |
| "learning_rate": 0.0005, |
| "loss": 4.0556440353393555, |
| "step": 1038 |
| }, |
| { |
| "epoch": 0.6316109422492401, |
| "grad_norm": 0.9620634317398071, |
| "learning_rate": 0.0005, |
| "loss": 4.19709587097168, |
| "step": 1039 |
| }, |
| { |
| "epoch": 0.6322188449848024, |
| "grad_norm": 1.4484918117523193, |
| "learning_rate": 0.0005, |
| "loss": 4.058507442474365, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.6328267477203647, |
| "grad_norm": 1.219489574432373, |
| "learning_rate": 0.0005, |
| "loss": 4.1419267654418945, |
| "step": 1041 |
| }, |
| { |
| "epoch": 0.6334346504559271, |
| "grad_norm": 1.127636194229126, |
| "learning_rate": 0.0005, |
| "loss": 3.892014980316162, |
| "step": 1042 |
| }, |
| { |
| "epoch": 0.6340425531914894, |
| "grad_norm": 1.326476812362671, |
| "learning_rate": 0.0005, |
| "loss": 4.128079414367676, |
| "step": 1043 |
| }, |
| { |
| "epoch": 0.6346504559270517, |
| "grad_norm": 1.1010375022888184, |
| "learning_rate": 0.0005, |
| "loss": 3.898940086364746, |
| "step": 1044 |
| }, |
| { |
| "epoch": 0.6352583586626139, |
| "grad_norm": 1.1064268350601196, |
| "learning_rate": 0.0005, |
| "loss": 4.141763687133789, |
| "step": 1045 |
| }, |
| { |
| "epoch": 0.6358662613981763, |
| "grad_norm": 1.24687659740448, |
| "learning_rate": 0.0005, |
| "loss": 4.210533618927002, |
| "step": 1046 |
| }, |
| { |
| "epoch": 0.6364741641337386, |
| "grad_norm": 1.0071916580200195, |
| "learning_rate": 0.0005, |
| "loss": 4.255558013916016, |
| "step": 1047 |
| }, |
| { |
| "epoch": 0.6370820668693009, |
| "grad_norm": 1.0620638132095337, |
| "learning_rate": 0.0005, |
| "loss": 4.008969306945801, |
| "step": 1048 |
| }, |
| { |
| "epoch": 0.6376899696048632, |
| "grad_norm": 1.0604190826416016, |
| "learning_rate": 0.0005, |
| "loss": 4.224608421325684, |
| "step": 1049 |
| }, |
| { |
| "epoch": 0.6382978723404256, |
| "grad_norm": 1.032774567604065, |
| "learning_rate": 0.0005, |
| "loss": 4.131565093994141, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.6389057750759879, |
| "grad_norm": 0.9236063361167908, |
| "learning_rate": 0.0005, |
| "loss": 4.309024333953857, |
| "step": 1051 |
| }, |
| { |
| "epoch": 0.6395136778115501, |
| "grad_norm": 1.059757947921753, |
| "learning_rate": 0.0005, |
| "loss": 4.041001319885254, |
| "step": 1052 |
| }, |
| { |
| "epoch": 0.6401215805471124, |
| "grad_norm": 1.1099759340286255, |
| "learning_rate": 0.0005, |
| "loss": 3.9661004543304443, |
| "step": 1053 |
| }, |
| { |
| "epoch": 0.6407294832826748, |
| "grad_norm": 1.0091055631637573, |
| "learning_rate": 0.0005, |
| "loss": 3.987016439437866, |
| "step": 1054 |
| }, |
| { |
| "epoch": 0.6413373860182371, |
| "grad_norm": 1.1090649366378784, |
| "learning_rate": 0.0005, |
| "loss": 4.068497657775879, |
| "step": 1055 |
| }, |
| { |
| "epoch": 0.6419452887537994, |
| "grad_norm": 1.0738252401351929, |
| "learning_rate": 0.0005, |
| "loss": 3.9846339225769043, |
| "step": 1056 |
| }, |
| { |
| "epoch": 0.6425531914893617, |
| "grad_norm": 1.1196277141571045, |
| "learning_rate": 0.0005, |
| "loss": 4.063312530517578, |
| "step": 1057 |
| }, |
| { |
| "epoch": 0.643161094224924, |
| "grad_norm": 1.2615549564361572, |
| "learning_rate": 0.0005, |
| "loss": 3.9986069202423096, |
| "step": 1058 |
| }, |
| { |
| "epoch": 0.6437689969604863, |
| "grad_norm": 1.49628746509552, |
| "learning_rate": 0.0005, |
| "loss": 4.1674394607543945, |
| "step": 1059 |
| }, |
| { |
| "epoch": 0.6443768996960486, |
| "grad_norm": 1.279189109802246, |
| "learning_rate": 0.0005, |
| "loss": 3.8027124404907227, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.6449848024316109, |
| "grad_norm": 1.1228110790252686, |
| "learning_rate": 0.0005, |
| "loss": 3.7935433387756348, |
| "step": 1061 |
| }, |
| { |
| "epoch": 0.6455927051671733, |
| "grad_norm": 1.082332730293274, |
| "learning_rate": 0.0005, |
| "loss": 4.039803981781006, |
| "step": 1062 |
| }, |
| { |
| "epoch": 0.6462006079027356, |
| "grad_norm": 0.9758466482162476, |
| "learning_rate": 0.0005, |
| "loss": 4.102064609527588, |
| "step": 1063 |
| }, |
| { |
| "epoch": 0.6468085106382979, |
| "grad_norm": 1.0097397565841675, |
| "learning_rate": 0.0005, |
| "loss": 4.058742523193359, |
| "step": 1064 |
| }, |
| { |
| "epoch": 0.6474164133738601, |
| "grad_norm": 1.0726414918899536, |
| "learning_rate": 0.0005, |
| "loss": 4.0242133140563965, |
| "step": 1065 |
| }, |
| { |
| "epoch": 0.6480243161094225, |
| "grad_norm": 1.107040524482727, |
| "learning_rate": 0.0005, |
| "loss": 3.9720733165740967, |
| "step": 1066 |
| }, |
| { |
| "epoch": 0.6486322188449848, |
| "grad_norm": 1.258399248123169, |
| "learning_rate": 0.0005, |
| "loss": 3.85103178024292, |
| "step": 1067 |
| }, |
| { |
| "epoch": 0.6492401215805471, |
| "grad_norm": 1.215524435043335, |
| "learning_rate": 0.0005, |
| "loss": 4.162143707275391, |
| "step": 1068 |
| }, |
| { |
| "epoch": 0.6498480243161094, |
| "grad_norm": 1.0505629777908325, |
| "learning_rate": 0.0005, |
| "loss": 4.23874568939209, |
| "step": 1069 |
| }, |
| { |
| "epoch": 0.6504559270516718, |
| "grad_norm": 1.2580337524414062, |
| "learning_rate": 0.0005, |
| "loss": 4.126619338989258, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.6510638297872341, |
| "grad_norm": 1.1980527639389038, |
| "learning_rate": 0.0005, |
| "loss": 4.011953353881836, |
| "step": 1071 |
| }, |
| { |
| "epoch": 0.6516717325227963, |
| "grad_norm": 1.020224690437317, |
| "learning_rate": 0.0005, |
| "loss": 4.2201948165893555, |
| "step": 1072 |
| }, |
| { |
| "epoch": 0.6522796352583586, |
| "grad_norm": 1.0695855617523193, |
| "learning_rate": 0.0005, |
| "loss": 4.277288436889648, |
| "step": 1073 |
| }, |
| { |
| "epoch": 0.652887537993921, |
| "grad_norm": 1.1862881183624268, |
| "learning_rate": 0.0005, |
| "loss": 4.1104841232299805, |
| "step": 1074 |
| }, |
| { |
| "epoch": 0.6534954407294833, |
| "grad_norm": 1.7002424001693726, |
| "learning_rate": 0.0005, |
| "loss": 4.274345874786377, |
| "step": 1075 |
| }, |
| { |
| "epoch": 0.6541033434650456, |
| "grad_norm": 1.3632254600524902, |
| "learning_rate": 0.0005, |
| "loss": 4.318878173828125, |
| "step": 1076 |
| }, |
| { |
| "epoch": 0.6547112462006079, |
| "grad_norm": 1.1510448455810547, |
| "learning_rate": 0.0005, |
| "loss": 4.182323455810547, |
| "step": 1077 |
| }, |
| { |
| "epoch": 0.6553191489361702, |
| "grad_norm": 1.143638014793396, |
| "learning_rate": 0.0005, |
| "loss": 4.174741744995117, |
| "step": 1078 |
| }, |
| { |
| "epoch": 0.6559270516717325, |
| "grad_norm": 1.1500475406646729, |
| "learning_rate": 0.0005, |
| "loss": 3.9260623455047607, |
| "step": 1079 |
| }, |
| { |
| "epoch": 0.6565349544072948, |
| "grad_norm": 1.293712854385376, |
| "learning_rate": 0.0005, |
| "loss": 4.087700843811035, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.6571428571428571, |
| "grad_norm": 1.3932772874832153, |
| "learning_rate": 0.0005, |
| "loss": 4.118124961853027, |
| "step": 1081 |
| }, |
| { |
| "epoch": 0.6577507598784195, |
| "grad_norm": 1.094328761100769, |
| "learning_rate": 0.0005, |
| "loss": 4.175318241119385, |
| "step": 1082 |
| }, |
| { |
| "epoch": 0.6583586626139818, |
| "grad_norm": 1.467499017715454, |
| "learning_rate": 0.0005, |
| "loss": 4.272140979766846, |
| "step": 1083 |
| }, |
| { |
| "epoch": 0.6589665653495441, |
| "grad_norm": 1.1503561735153198, |
| "learning_rate": 0.0005, |
| "loss": 4.183167934417725, |
| "step": 1084 |
| }, |
| { |
| "epoch": 0.6595744680851063, |
| "grad_norm": 1.1912407875061035, |
| "learning_rate": 0.0005, |
| "loss": 4.055290222167969, |
| "step": 1085 |
| }, |
| { |
| "epoch": 0.6601823708206687, |
| "grad_norm": 1.1428508758544922, |
| "learning_rate": 0.0005, |
| "loss": 4.183894157409668, |
| "step": 1086 |
| }, |
| { |
| "epoch": 0.660790273556231, |
| "grad_norm": 1.136474609375, |
| "learning_rate": 0.0005, |
| "loss": 3.86468768119812, |
| "step": 1087 |
| }, |
| { |
| "epoch": 0.6613981762917933, |
| "grad_norm": 1.0048547983169556, |
| "learning_rate": 0.0005, |
| "loss": 4.054813385009766, |
| "step": 1088 |
| }, |
| { |
| "epoch": 0.6620060790273556, |
| "grad_norm": 1.021672010421753, |
| "learning_rate": 0.0005, |
| "loss": 3.9937808513641357, |
| "step": 1089 |
| }, |
| { |
| "epoch": 0.662613981762918, |
| "grad_norm": 1.184766173362732, |
| "learning_rate": 0.0005, |
| "loss": 4.007226943969727, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.6632218844984803, |
| "grad_norm": 1.1701700687408447, |
| "learning_rate": 0.0005, |
| "loss": 3.880901336669922, |
| "step": 1091 |
| }, |
| { |
| "epoch": 0.6638297872340425, |
| "grad_norm": 1.0928300619125366, |
| "learning_rate": 0.0005, |
| "loss": 4.0920090675354, |
| "step": 1092 |
| }, |
| { |
| "epoch": 0.6644376899696048, |
| "grad_norm": 1.0498013496398926, |
| "learning_rate": 0.0005, |
| "loss": 4.117219924926758, |
| "step": 1093 |
| }, |
| { |
| "epoch": 0.6650455927051672, |
| "grad_norm": 1.034084439277649, |
| "learning_rate": 0.0005, |
| "loss": 4.0926313400268555, |
| "step": 1094 |
| }, |
| { |
| "epoch": 0.6656534954407295, |
| "grad_norm": 0.939494788646698, |
| "learning_rate": 0.0005, |
| "loss": 4.018050670623779, |
| "step": 1095 |
| }, |
| { |
| "epoch": 0.6662613981762918, |
| "grad_norm": 1.2339518070220947, |
| "learning_rate": 0.0005, |
| "loss": 3.9285688400268555, |
| "step": 1096 |
| }, |
| { |
| "epoch": 0.6668693009118541, |
| "grad_norm": 1.1236822605133057, |
| "learning_rate": 0.0005, |
| "loss": 3.9050168991088867, |
| "step": 1097 |
| }, |
| { |
| "epoch": 0.6674772036474164, |
| "grad_norm": 0.9875328540802002, |
| "learning_rate": 0.0005, |
| "loss": 4.033220291137695, |
| "step": 1098 |
| }, |
| { |
| "epoch": 0.6680851063829787, |
| "grad_norm": 0.9468657374382019, |
| "learning_rate": 0.0005, |
| "loss": 4.108023643493652, |
| "step": 1099 |
| }, |
| { |
| "epoch": 0.668693009118541, |
| "grad_norm": 1.0056613683700562, |
| "learning_rate": 0.0005, |
| "loss": 4.217726707458496, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.6693009118541033, |
| "grad_norm": 1.1911637783050537, |
| "learning_rate": 0.0005, |
| "loss": 3.9922735691070557, |
| "step": 1101 |
| }, |
| { |
| "epoch": 0.6699088145896657, |
| "grad_norm": 0.9524610638618469, |
| "learning_rate": 0.0005, |
| "loss": 3.843928337097168, |
| "step": 1102 |
| }, |
| { |
| "epoch": 0.670516717325228, |
| "grad_norm": 1.1759804487228394, |
| "learning_rate": 0.0005, |
| "loss": 3.8452773094177246, |
| "step": 1103 |
| }, |
| { |
| "epoch": 0.6711246200607903, |
| "grad_norm": 1.1534795761108398, |
| "learning_rate": 0.0005, |
| "loss": 4.147238731384277, |
| "step": 1104 |
| }, |
| { |
| "epoch": 0.6717325227963525, |
| "grad_norm": 1.0438340902328491, |
| "learning_rate": 0.0005, |
| "loss": 3.814009666442871, |
| "step": 1105 |
| }, |
| { |
| "epoch": 0.6723404255319149, |
| "grad_norm": 1.4943510293960571, |
| "learning_rate": 0.0005, |
| "loss": 4.062148571014404, |
| "step": 1106 |
| }, |
| { |
| "epoch": 0.6729483282674772, |
| "grad_norm": 0.9739040732383728, |
| "learning_rate": 0.0005, |
| "loss": 4.066575050354004, |
| "step": 1107 |
| }, |
| { |
| "epoch": 0.6735562310030395, |
| "grad_norm": 1.0727957487106323, |
| "learning_rate": 0.0005, |
| "loss": 3.935608386993408, |
| "step": 1108 |
| }, |
| { |
| "epoch": 0.6741641337386018, |
| "grad_norm": 1.480692744255066, |
| "learning_rate": 0.0005, |
| "loss": 4.12183952331543, |
| "step": 1109 |
| }, |
| { |
| "epoch": 0.6747720364741642, |
| "grad_norm": 1.1042070388793945, |
| "learning_rate": 0.0005, |
| "loss": 3.8309693336486816, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.6753799392097265, |
| "grad_norm": 1.5949453115463257, |
| "learning_rate": 0.0005, |
| "loss": 4.225711822509766, |
| "step": 1111 |
| }, |
| { |
| "epoch": 0.6759878419452887, |
| "grad_norm": 1.1404409408569336, |
| "learning_rate": 0.0005, |
| "loss": 3.870262384414673, |
| "step": 1112 |
| }, |
| { |
| "epoch": 0.676595744680851, |
| "grad_norm": 1.1272308826446533, |
| "learning_rate": 0.0005, |
| "loss": 4.375516891479492, |
| "step": 1113 |
| }, |
| { |
| "epoch": 0.6772036474164134, |
| "grad_norm": 1.3391433954238892, |
| "learning_rate": 0.0005, |
| "loss": 3.9125869274139404, |
| "step": 1114 |
| }, |
| { |
| "epoch": 0.6778115501519757, |
| "grad_norm": 0.9406550526618958, |
| "learning_rate": 0.0005, |
| "loss": 4.000041961669922, |
| "step": 1115 |
| }, |
| { |
| "epoch": 0.678419452887538, |
| "grad_norm": 1.211789846420288, |
| "learning_rate": 0.0005, |
| "loss": 4.146924018859863, |
| "step": 1116 |
| }, |
| { |
| "epoch": 0.6790273556231003, |
| "grad_norm": 1.0479586124420166, |
| "learning_rate": 0.0005, |
| "loss": 3.77646803855896, |
| "step": 1117 |
| }, |
| { |
| "epoch": 0.6796352583586626, |
| "grad_norm": 1.0069152116775513, |
| "learning_rate": 0.0005, |
| "loss": 4.110267162322998, |
| "step": 1118 |
| }, |
| { |
| "epoch": 0.6802431610942249, |
| "grad_norm": 1.2088702917099, |
| "learning_rate": 0.0005, |
| "loss": 4.083201885223389, |
| "step": 1119 |
| }, |
| { |
| "epoch": 0.6808510638297872, |
| "grad_norm": 1.3016067743301392, |
| "learning_rate": 0.0005, |
| "loss": 4.1130218505859375, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.6814589665653495, |
| "grad_norm": 1.0395400524139404, |
| "learning_rate": 0.0005, |
| "loss": 4.012112617492676, |
| "step": 1121 |
| }, |
| { |
| "epoch": 0.6820668693009119, |
| "grad_norm": 1.1534603834152222, |
| "learning_rate": 0.0005, |
| "loss": 3.8767285346984863, |
| "step": 1122 |
| }, |
| { |
| "epoch": 0.6826747720364742, |
| "grad_norm": 1.1331707239151, |
| "learning_rate": 0.0005, |
| "loss": 3.8466670513153076, |
| "step": 1123 |
| }, |
| { |
| "epoch": 0.6832826747720365, |
| "grad_norm": 1.0023419857025146, |
| "learning_rate": 0.0005, |
| "loss": 3.978550910949707, |
| "step": 1124 |
| }, |
| { |
| "epoch": 0.6838905775075987, |
| "grad_norm": 1.198326826095581, |
| "learning_rate": 0.0005, |
| "loss": 4.160974502563477, |
| "step": 1125 |
| }, |
| { |
| "epoch": 0.6844984802431611, |
| "grad_norm": 1.0249745845794678, |
| "learning_rate": 0.0005, |
| "loss": 3.961395740509033, |
| "step": 1126 |
| }, |
| { |
| "epoch": 0.6851063829787234, |
| "grad_norm": 1.2853235006332397, |
| "learning_rate": 0.0005, |
| "loss": 4.322844505310059, |
| "step": 1127 |
| }, |
| { |
| "epoch": 0.6857142857142857, |
| "grad_norm": 0.9774798154830933, |
| "learning_rate": 0.0005, |
| "loss": 4.034377098083496, |
| "step": 1128 |
| }, |
| { |
| "epoch": 0.686322188449848, |
| "grad_norm": 1.1903027296066284, |
| "learning_rate": 0.0005, |
| "loss": 3.896298408508301, |
| "step": 1129 |
| }, |
| { |
| "epoch": 0.6869300911854104, |
| "grad_norm": 0.9409128427505493, |
| "learning_rate": 0.0005, |
| "loss": 3.967303514480591, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.6875379939209726, |
| "grad_norm": 1.0214065313339233, |
| "learning_rate": 0.0005, |
| "loss": 3.916367769241333, |
| "step": 1131 |
| }, |
| { |
| "epoch": 0.6881458966565349, |
| "grad_norm": 1.3258956670761108, |
| "learning_rate": 0.0005, |
| "loss": 3.859543800354004, |
| "step": 1132 |
| }, |
| { |
| "epoch": 0.6887537993920972, |
| "grad_norm": 1.0668888092041016, |
| "learning_rate": 0.0005, |
| "loss": 3.929979085922241, |
| "step": 1133 |
| }, |
| { |
| "epoch": 0.6893617021276596, |
| "grad_norm": 1.0921815633773804, |
| "learning_rate": 0.0005, |
| "loss": 3.942767381668091, |
| "step": 1134 |
| }, |
| { |
| "epoch": 0.6899696048632219, |
| "grad_norm": 1.1683087348937988, |
| "learning_rate": 0.0005, |
| "loss": 4.096218109130859, |
| "step": 1135 |
| }, |
| { |
| "epoch": 0.6905775075987842, |
| "grad_norm": 1.150611162185669, |
| "learning_rate": 0.0005, |
| "loss": 4.065778732299805, |
| "step": 1136 |
| }, |
| { |
| "epoch": 0.6911854103343466, |
| "grad_norm": 0.9955292344093323, |
| "learning_rate": 0.0005, |
| "loss": 3.83855938911438, |
| "step": 1137 |
| }, |
| { |
| "epoch": 0.6917933130699088, |
| "grad_norm": 1.1191688776016235, |
| "learning_rate": 0.0005, |
| "loss": 4.189568519592285, |
| "step": 1138 |
| }, |
| { |
| "epoch": 0.6924012158054711, |
| "grad_norm": 1.1021112203598022, |
| "learning_rate": 0.0005, |
| "loss": 4.004612445831299, |
| "step": 1139 |
| }, |
| { |
| "epoch": 0.6930091185410334, |
| "grad_norm": 1.2468072175979614, |
| "learning_rate": 0.0005, |
| "loss": 3.867835283279419, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.6936170212765957, |
| "grad_norm": 0.9965139627456665, |
| "learning_rate": 0.0005, |
| "loss": 3.8393120765686035, |
| "step": 1141 |
| }, |
| { |
| "epoch": 0.6942249240121581, |
| "grad_norm": 1.2608331441879272, |
| "learning_rate": 0.0005, |
| "loss": 4.122796535491943, |
| "step": 1142 |
| }, |
| { |
| "epoch": 0.6948328267477204, |
| "grad_norm": 0.9645028710365295, |
| "learning_rate": 0.0005, |
| "loss": 4.193379878997803, |
| "step": 1143 |
| }, |
| { |
| "epoch": 0.6954407294832827, |
| "grad_norm": 1.103003978729248, |
| "learning_rate": 0.0005, |
| "loss": 3.80690860748291, |
| "step": 1144 |
| }, |
| { |
| "epoch": 0.6960486322188449, |
| "grad_norm": 0.9812702536582947, |
| "learning_rate": 0.0005, |
| "loss": 3.910191059112549, |
| "step": 1145 |
| }, |
| { |
| "epoch": 0.6966565349544073, |
| "grad_norm": 1.1629973649978638, |
| "learning_rate": 0.0005, |
| "loss": 3.912919282913208, |
| "step": 1146 |
| }, |
| { |
| "epoch": 0.6972644376899696, |
| "grad_norm": 0.9559318423271179, |
| "learning_rate": 0.0005, |
| "loss": 4.1403703689575195, |
| "step": 1147 |
| }, |
| { |
| "epoch": 0.6978723404255319, |
| "grad_norm": 1.187225103378296, |
| "learning_rate": 0.0005, |
| "loss": 4.227229595184326, |
| "step": 1148 |
| }, |
| { |
| "epoch": 0.6984802431610942, |
| "grad_norm": 1.0893582105636597, |
| "learning_rate": 0.0005, |
| "loss": 4.085037708282471, |
| "step": 1149 |
| }, |
| { |
| "epoch": 0.6990881458966566, |
| "grad_norm": 1.207614541053772, |
| "learning_rate": 0.0005, |
| "loss": 4.065666198730469, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.6996960486322188, |
| "grad_norm": 1.1726024150848389, |
| "learning_rate": 0.0005, |
| "loss": 4.011224269866943, |
| "step": 1151 |
| }, |
| { |
| "epoch": 0.7003039513677811, |
| "grad_norm": 1.0657603740692139, |
| "learning_rate": 0.0005, |
| "loss": 3.7515785694122314, |
| "step": 1152 |
| }, |
| { |
| "epoch": 0.7009118541033434, |
| "grad_norm": 1.069787859916687, |
| "learning_rate": 0.0005, |
| "loss": 4.024471759796143, |
| "step": 1153 |
| }, |
| { |
| "epoch": 0.7015197568389058, |
| "grad_norm": 1.0333293676376343, |
| "learning_rate": 0.0005, |
| "loss": 3.9765753746032715, |
| "step": 1154 |
| }, |
| { |
| "epoch": 0.7021276595744681, |
| "grad_norm": 1.3091932535171509, |
| "learning_rate": 0.0005, |
| "loss": 4.086296081542969, |
| "step": 1155 |
| }, |
| { |
| "epoch": 0.7027355623100304, |
| "grad_norm": 0.96831214427948, |
| "learning_rate": 0.0005, |
| "loss": 4.159407615661621, |
| "step": 1156 |
| }, |
| { |
| "epoch": 0.7033434650455928, |
| "grad_norm": 1.0307363271713257, |
| "learning_rate": 0.0005, |
| "loss": 4.085778713226318, |
| "step": 1157 |
| }, |
| { |
| "epoch": 0.703951367781155, |
| "grad_norm": 1.2046213150024414, |
| "learning_rate": 0.0005, |
| "loss": 4.149312973022461, |
| "step": 1158 |
| }, |
| { |
| "epoch": 0.7045592705167173, |
| "grad_norm": 1.027969241142273, |
| "learning_rate": 0.0005, |
| "loss": 4.021113872528076, |
| "step": 1159 |
| }, |
| { |
| "epoch": 0.7051671732522796, |
| "grad_norm": 0.886216938495636, |
| "learning_rate": 0.0005, |
| "loss": 4.1072492599487305, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.705775075987842, |
| "grad_norm": 1.2814362049102783, |
| "learning_rate": 0.0005, |
| "loss": 4.351136207580566, |
| "step": 1161 |
| }, |
| { |
| "epoch": 0.7063829787234043, |
| "grad_norm": 1.195614218711853, |
| "learning_rate": 0.0005, |
| "loss": 3.859333038330078, |
| "step": 1162 |
| }, |
| { |
| "epoch": 0.7069908814589666, |
| "grad_norm": 1.02545964717865, |
| "learning_rate": 0.0005, |
| "loss": 3.993720531463623, |
| "step": 1163 |
| }, |
| { |
| "epoch": 0.7075987841945289, |
| "grad_norm": 1.1973057985305786, |
| "learning_rate": 0.0005, |
| "loss": 4.022034168243408, |
| "step": 1164 |
| }, |
| { |
| "epoch": 0.7082066869300911, |
| "grad_norm": 1.047211766242981, |
| "learning_rate": 0.0005, |
| "loss": 4.110629081726074, |
| "step": 1165 |
| }, |
| { |
| "epoch": 0.7088145896656535, |
| "grad_norm": 0.9065303206443787, |
| "learning_rate": 0.0005, |
| "loss": 4.266797065734863, |
| "step": 1166 |
| }, |
| { |
| "epoch": 0.7094224924012158, |
| "grad_norm": 1.0121465921401978, |
| "learning_rate": 0.0005, |
| "loss": 3.7510571479797363, |
| "step": 1167 |
| }, |
| { |
| "epoch": 0.7100303951367781, |
| "grad_norm": 1.2128599882125854, |
| "learning_rate": 0.0005, |
| "loss": 4.190847396850586, |
| "step": 1168 |
| }, |
| { |
| "epoch": 0.7106382978723405, |
| "grad_norm": 1.4867533445358276, |
| "learning_rate": 0.0005, |
| "loss": 4.125823497772217, |
| "step": 1169 |
| }, |
| { |
| "epoch": 0.7112462006079028, |
| "grad_norm": 0.9088724851608276, |
| "learning_rate": 0.0005, |
| "loss": 4.092848777770996, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.711854103343465, |
| "grad_norm": 0.980387270450592, |
| "learning_rate": 0.0005, |
| "loss": 3.963526725769043, |
| "step": 1171 |
| }, |
| { |
| "epoch": 0.7124620060790273, |
| "grad_norm": 0.9671593308448792, |
| "learning_rate": 0.0005, |
| "loss": 3.886415958404541, |
| "step": 1172 |
| }, |
| { |
| "epoch": 0.7130699088145896, |
| "grad_norm": 0.8448948860168457, |
| "learning_rate": 0.0005, |
| "loss": 3.960893154144287, |
| "step": 1173 |
| }, |
| { |
| "epoch": 0.713677811550152, |
| "grad_norm": 1.0654000043869019, |
| "learning_rate": 0.0005, |
| "loss": 3.9057178497314453, |
| "step": 1174 |
| }, |
| { |
| "epoch": 0.7142857142857143, |
| "grad_norm": 1.255560040473938, |
| "learning_rate": 0.0005, |
| "loss": 4.085208892822266, |
| "step": 1175 |
| }, |
| { |
| "epoch": 0.7148936170212766, |
| "grad_norm": 1.172607183456421, |
| "learning_rate": 0.0005, |
| "loss": 3.8918302059173584, |
| "step": 1176 |
| }, |
| { |
| "epoch": 0.715501519756839, |
| "grad_norm": 1.1429939270019531, |
| "learning_rate": 0.0005, |
| "loss": 3.8840150833129883, |
| "step": 1177 |
| }, |
| { |
| "epoch": 0.7161094224924012, |
| "grad_norm": 1.0610404014587402, |
| "learning_rate": 0.0005, |
| "loss": 4.0701003074646, |
| "step": 1178 |
| }, |
| { |
| "epoch": 0.7167173252279635, |
| "grad_norm": 1.0055387020111084, |
| "learning_rate": 0.0005, |
| "loss": 3.773474931716919, |
| "step": 1179 |
| }, |
| { |
| "epoch": 0.7173252279635258, |
| "grad_norm": 1.0536381006240845, |
| "learning_rate": 0.0005, |
| "loss": 3.885234832763672, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.7179331306990882, |
| "grad_norm": 1.2304924726486206, |
| "learning_rate": 0.0005, |
| "loss": 4.134721755981445, |
| "step": 1181 |
| }, |
| { |
| "epoch": 0.7185410334346505, |
| "grad_norm": 1.1367759704589844, |
| "learning_rate": 0.0005, |
| "loss": 4.07640266418457, |
| "step": 1182 |
| }, |
| { |
| "epoch": 0.7191489361702128, |
| "grad_norm": 0.9987047910690308, |
| "learning_rate": 0.0005, |
| "loss": 3.941793918609619, |
| "step": 1183 |
| }, |
| { |
| "epoch": 0.7197568389057751, |
| "grad_norm": 0.9390357136726379, |
| "learning_rate": 0.0005, |
| "loss": 4.057319641113281, |
| "step": 1184 |
| }, |
| { |
| "epoch": 0.7203647416413373, |
| "grad_norm": 1.3009685277938843, |
| "learning_rate": 0.0005, |
| "loss": 3.7487921714782715, |
| "step": 1185 |
| }, |
| { |
| "epoch": 0.7209726443768997, |
| "grad_norm": 1.0107924938201904, |
| "learning_rate": 0.0005, |
| "loss": 3.9321677684783936, |
| "step": 1186 |
| }, |
| { |
| "epoch": 0.721580547112462, |
| "grad_norm": 1.003091812133789, |
| "learning_rate": 0.0005, |
| "loss": 3.855192184448242, |
| "step": 1187 |
| }, |
| { |
| "epoch": 0.7221884498480243, |
| "grad_norm": 1.1665643453598022, |
| "learning_rate": 0.0005, |
| "loss": 4.062481880187988, |
| "step": 1188 |
| }, |
| { |
| "epoch": 0.7227963525835867, |
| "grad_norm": 1.0481219291687012, |
| "learning_rate": 0.0005, |
| "loss": 3.7130908966064453, |
| "step": 1189 |
| }, |
| { |
| "epoch": 0.723404255319149, |
| "grad_norm": 1.4968420267105103, |
| "learning_rate": 0.0005, |
| "loss": 4.054961681365967, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.7240121580547112, |
| "grad_norm": 1.0543270111083984, |
| "learning_rate": 0.0005, |
| "loss": 4.080737113952637, |
| "step": 1191 |
| }, |
| { |
| "epoch": 0.7246200607902735, |
| "grad_norm": 1.3208811283111572, |
| "learning_rate": 0.0005, |
| "loss": 3.828869581222534, |
| "step": 1192 |
| }, |
| { |
| "epoch": 0.7252279635258359, |
| "grad_norm": 1.1503605842590332, |
| "learning_rate": 0.0005, |
| "loss": 3.897340774536133, |
| "step": 1193 |
| }, |
| { |
| "epoch": 0.7258358662613982, |
| "grad_norm": 0.9485260844230652, |
| "learning_rate": 0.0005, |
| "loss": 4.022025108337402, |
| "step": 1194 |
| }, |
| { |
| "epoch": 0.7264437689969605, |
| "grad_norm": 1.0768346786499023, |
| "learning_rate": 0.0005, |
| "loss": 4.132440567016602, |
| "step": 1195 |
| }, |
| { |
| "epoch": 0.7270516717325228, |
| "grad_norm": 1.0768530368804932, |
| "learning_rate": 0.0005, |
| "loss": 4.167514801025391, |
| "step": 1196 |
| }, |
| { |
| "epoch": 0.7276595744680852, |
| "grad_norm": 1.1659386157989502, |
| "learning_rate": 0.0005, |
| "loss": 3.9605331420898438, |
| "step": 1197 |
| }, |
| { |
| "epoch": 0.7282674772036474, |
| "grad_norm": 0.9825963377952576, |
| "learning_rate": 0.0005, |
| "loss": 3.9677042961120605, |
| "step": 1198 |
| }, |
| { |
| "epoch": 0.7288753799392097, |
| "grad_norm": 1.200975775718689, |
| "learning_rate": 0.0005, |
| "loss": 4.059627056121826, |
| "step": 1199 |
| }, |
| { |
| "epoch": 0.729483282674772, |
| "grad_norm": 1.0287483930587769, |
| "learning_rate": 0.0005, |
| "loss": 3.9571118354797363, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.7300911854103344, |
| "grad_norm": 1.171775221824646, |
| "learning_rate": 0.0005, |
| "loss": 4.2555084228515625, |
| "step": 1201 |
| }, |
| { |
| "epoch": 0.7306990881458967, |
| "grad_norm": 1.2075831890106201, |
| "learning_rate": 0.0005, |
| "loss": 3.9444262981414795, |
| "step": 1202 |
| }, |
| { |
| "epoch": 0.731306990881459, |
| "grad_norm": 1.1258975267410278, |
| "learning_rate": 0.0005, |
| "loss": 4.037412643432617, |
| "step": 1203 |
| }, |
| { |
| "epoch": 0.7319148936170212, |
| "grad_norm": 1.107055902481079, |
| "learning_rate": 0.0005, |
| "loss": 4.046412467956543, |
| "step": 1204 |
| }, |
| { |
| "epoch": 0.7325227963525835, |
| "grad_norm": 1.1721580028533936, |
| "learning_rate": 0.0005, |
| "loss": 4.039186477661133, |
| "step": 1205 |
| }, |
| { |
| "epoch": 0.7331306990881459, |
| "grad_norm": 1.8083940744400024, |
| "learning_rate": 0.0005, |
| "loss": 4.255344390869141, |
| "step": 1206 |
| }, |
| { |
| "epoch": 0.7337386018237082, |
| "grad_norm": 1.1505194902420044, |
| "learning_rate": 0.0005, |
| "loss": 3.849947452545166, |
| "step": 1207 |
| }, |
| { |
| "epoch": 0.7343465045592705, |
| "grad_norm": 1.0368176698684692, |
| "learning_rate": 0.0005, |
| "loss": 4.037250518798828, |
| "step": 1208 |
| }, |
| { |
| "epoch": 0.7349544072948329, |
| "grad_norm": 1.076282262802124, |
| "learning_rate": 0.0005, |
| "loss": 3.8192124366760254, |
| "step": 1209 |
| }, |
| { |
| "epoch": 0.7355623100303952, |
| "grad_norm": 0.9457529187202454, |
| "learning_rate": 0.0005, |
| "loss": 3.988126516342163, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.7361702127659574, |
| "grad_norm": 1.396436333656311, |
| "learning_rate": 0.0005, |
| "loss": 3.8735647201538086, |
| "step": 1211 |
| }, |
| { |
| "epoch": 0.7367781155015197, |
| "grad_norm": 1.1978737115859985, |
| "learning_rate": 0.0005, |
| "loss": 3.7308835983276367, |
| "step": 1212 |
| }, |
| { |
| "epoch": 0.737386018237082, |
| "grad_norm": 1.2270631790161133, |
| "learning_rate": 0.0005, |
| "loss": 3.7548632621765137, |
| "step": 1213 |
| }, |
| { |
| "epoch": 0.7379939209726444, |
| "grad_norm": 1.0319976806640625, |
| "learning_rate": 0.0005, |
| "loss": 4.095251083374023, |
| "step": 1214 |
| }, |
| { |
| "epoch": 0.7386018237082067, |
| "grad_norm": 1.2742465734481812, |
| "learning_rate": 0.0005, |
| "loss": 3.7844438552856445, |
| "step": 1215 |
| }, |
| { |
| "epoch": 0.739209726443769, |
| "grad_norm": 0.9936171174049377, |
| "learning_rate": 0.0005, |
| "loss": 3.6828408241271973, |
| "step": 1216 |
| }, |
| { |
| "epoch": 0.7398176291793314, |
| "grad_norm": 1.0827305316925049, |
| "learning_rate": 0.0005, |
| "loss": 4.2918291091918945, |
| "step": 1217 |
| }, |
| { |
| "epoch": 0.7404255319148936, |
| "grad_norm": 1.0626490116119385, |
| "learning_rate": 0.0005, |
| "loss": 3.8438615798950195, |
| "step": 1218 |
| }, |
| { |
| "epoch": 0.7410334346504559, |
| "grad_norm": 1.0187205076217651, |
| "learning_rate": 0.0005, |
| "loss": 4.007755279541016, |
| "step": 1219 |
| }, |
| { |
| "epoch": 0.7416413373860182, |
| "grad_norm": 0.9945427775382996, |
| "learning_rate": 0.0005, |
| "loss": 3.8854458332061768, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.7422492401215806, |
| "grad_norm": 0.9728744029998779, |
| "learning_rate": 0.0005, |
| "loss": 3.7727737426757812, |
| "step": 1221 |
| }, |
| { |
| "epoch": 0.7428571428571429, |
| "grad_norm": 1.0771368741989136, |
| "learning_rate": 0.0005, |
| "loss": 3.984614133834839, |
| "step": 1222 |
| }, |
| { |
| "epoch": 0.7434650455927052, |
| "grad_norm": 1.0673145055770874, |
| "learning_rate": 0.0005, |
| "loss": 4.186018943786621, |
| "step": 1223 |
| }, |
| { |
| "epoch": 0.7440729483282674, |
| "grad_norm": 1.0385884046554565, |
| "learning_rate": 0.0005, |
| "loss": 3.9700469970703125, |
| "step": 1224 |
| }, |
| { |
| "epoch": 0.7446808510638298, |
| "grad_norm": 0.9378101229667664, |
| "learning_rate": 0.0005, |
| "loss": 4.093457221984863, |
| "step": 1225 |
| }, |
| { |
| "epoch": 0.7452887537993921, |
| "grad_norm": 1.1992157697677612, |
| "learning_rate": 0.0005, |
| "loss": 3.8426108360290527, |
| "step": 1226 |
| }, |
| { |
| "epoch": 0.7458966565349544, |
| "grad_norm": 0.9516767263412476, |
| "learning_rate": 0.0005, |
| "loss": 3.540165901184082, |
| "step": 1227 |
| }, |
| { |
| "epoch": 0.7465045592705167, |
| "grad_norm": 0.9911203980445862, |
| "learning_rate": 0.0005, |
| "loss": 3.791531562805176, |
| "step": 1228 |
| }, |
| { |
| "epoch": 0.7471124620060791, |
| "grad_norm": 1.1304718255996704, |
| "learning_rate": 0.0005, |
| "loss": 3.8638508319854736, |
| "step": 1229 |
| }, |
| { |
| "epoch": 0.7477203647416414, |
| "grad_norm": 3.538874626159668, |
| "learning_rate": 0.0005, |
| "loss": 4.131631851196289, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.7483282674772036, |
| "grad_norm": 1.096618413925171, |
| "learning_rate": 0.0005, |
| "loss": 3.782884120941162, |
| "step": 1231 |
| }, |
| { |
| "epoch": 0.7489361702127659, |
| "grad_norm": 1.2701330184936523, |
| "learning_rate": 0.0005, |
| "loss": 3.992222785949707, |
| "step": 1232 |
| }, |
| { |
| "epoch": 0.7495440729483283, |
| "grad_norm": 1.0706497430801392, |
| "learning_rate": 0.0005, |
| "loss": 3.908442735671997, |
| "step": 1233 |
| }, |
| { |
| "epoch": 0.7501519756838906, |
| "grad_norm": 1.030834436416626, |
| "learning_rate": 0.0005, |
| "loss": 4.000621318817139, |
| "step": 1234 |
| }, |
| { |
| "epoch": 0.7507598784194529, |
| "grad_norm": 1.3895245790481567, |
| "learning_rate": 0.0005, |
| "loss": 3.80660343170166, |
| "step": 1235 |
| }, |
| { |
| "epoch": 0.7513677811550152, |
| "grad_norm": 0.9692356586456299, |
| "learning_rate": 0.0005, |
| "loss": 4.078845977783203, |
| "step": 1236 |
| }, |
| { |
| "epoch": 0.7519756838905776, |
| "grad_norm": 1.1271778345108032, |
| "learning_rate": 0.0005, |
| "loss": 3.9555394649505615, |
| "step": 1237 |
| }, |
| { |
| "epoch": 0.7525835866261398, |
| "grad_norm": 1.5441569089889526, |
| "learning_rate": 0.0005, |
| "loss": 3.963904857635498, |
| "step": 1238 |
| }, |
| { |
| "epoch": 0.7531914893617021, |
| "grad_norm": 1.7030054330825806, |
| "learning_rate": 0.0005, |
| "loss": 4.024696350097656, |
| "step": 1239 |
| }, |
| { |
| "epoch": 0.7537993920972644, |
| "grad_norm": 1.12552011013031, |
| "learning_rate": 0.0005, |
| "loss": 3.919168472290039, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.7544072948328268, |
| "grad_norm": 1.0487366914749146, |
| "learning_rate": 0.0005, |
| "loss": 4.095437526702881, |
| "step": 1241 |
| }, |
| { |
| "epoch": 0.7550151975683891, |
| "grad_norm": 1.0279390811920166, |
| "learning_rate": 0.0005, |
| "loss": 3.941718816757202, |
| "step": 1242 |
| }, |
| { |
| "epoch": 0.7556231003039514, |
| "grad_norm": 1.080350399017334, |
| "learning_rate": 0.0005, |
| "loss": 3.7040228843688965, |
| "step": 1243 |
| }, |
| { |
| "epoch": 0.7562310030395136, |
| "grad_norm": 1.0182151794433594, |
| "learning_rate": 0.0005, |
| "loss": 4.062251091003418, |
| "step": 1244 |
| }, |
| { |
| "epoch": 0.756838905775076, |
| "grad_norm": 1.078009843826294, |
| "learning_rate": 0.0005, |
| "loss": 3.8745062351226807, |
| "step": 1245 |
| }, |
| { |
| "epoch": 0.7574468085106383, |
| "grad_norm": 1.0222269296646118, |
| "learning_rate": 0.0005, |
| "loss": 3.7564854621887207, |
| "step": 1246 |
| }, |
| { |
| "epoch": 0.7580547112462006, |
| "grad_norm": 1.329654335975647, |
| "learning_rate": 0.0005, |
| "loss": 3.8875160217285156, |
| "step": 1247 |
| }, |
| { |
| "epoch": 0.7586626139817629, |
| "grad_norm": 1.0129868984222412, |
| "learning_rate": 0.0005, |
| "loss": 3.8748350143432617, |
| "step": 1248 |
| }, |
| { |
| "epoch": 0.7592705167173253, |
| "grad_norm": 1.030468225479126, |
| "learning_rate": 0.0005, |
| "loss": 3.8655738830566406, |
| "step": 1249 |
| }, |
| { |
| "epoch": 0.7598784194528876, |
| "grad_norm": 1.111459732055664, |
| "learning_rate": 0.0005, |
| "loss": 3.9891488552093506, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.7604863221884498, |
| "grad_norm": 1.4396013021469116, |
| "learning_rate": 0.0005, |
| "loss": 3.9904720783233643, |
| "step": 1251 |
| }, |
| { |
| "epoch": 0.7610942249240121, |
| "grad_norm": 1.2336925268173218, |
| "learning_rate": 0.0005, |
| "loss": 3.7369742393493652, |
| "step": 1252 |
| }, |
| { |
| "epoch": 0.7617021276595745, |
| "grad_norm": 0.8990273475646973, |
| "learning_rate": 0.0005, |
| "loss": 3.9168124198913574, |
| "step": 1253 |
| }, |
| { |
| "epoch": 0.7623100303951368, |
| "grad_norm": 1.2932227849960327, |
| "learning_rate": 0.0005, |
| "loss": 4.008082389831543, |
| "step": 1254 |
| }, |
| { |
| "epoch": 0.7629179331306991, |
| "grad_norm": 0.9154768586158752, |
| "learning_rate": 0.0005, |
| "loss": 4.019550323486328, |
| "step": 1255 |
| }, |
| { |
| "epoch": 0.7635258358662614, |
| "grad_norm": 0.9175946712493896, |
| "learning_rate": 0.0005, |
| "loss": 3.97037935256958, |
| "step": 1256 |
| }, |
| { |
| "epoch": 0.7641337386018237, |
| "grad_norm": 1.067017912864685, |
| "learning_rate": 0.0005, |
| "loss": 4.1387038230896, |
| "step": 1257 |
| }, |
| { |
| "epoch": 0.764741641337386, |
| "grad_norm": 1.1540616750717163, |
| "learning_rate": 0.0005, |
| "loss": 3.979078769683838, |
| "step": 1258 |
| }, |
| { |
| "epoch": 0.7653495440729483, |
| "grad_norm": 0.9942051768302917, |
| "learning_rate": 0.0005, |
| "loss": 4.157475471496582, |
| "step": 1259 |
| }, |
| { |
| "epoch": 0.7659574468085106, |
| "grad_norm": 1.0882611274719238, |
| "learning_rate": 0.0005, |
| "loss": 3.784665584564209, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.766565349544073, |
| "grad_norm": 1.0358823537826538, |
| "learning_rate": 0.0005, |
| "loss": 3.8665788173675537, |
| "step": 1261 |
| }, |
| { |
| "epoch": 0.7671732522796353, |
| "grad_norm": 0.9150176048278809, |
| "learning_rate": 0.0005, |
| "loss": 3.9708409309387207, |
| "step": 1262 |
| }, |
| { |
| "epoch": 0.7677811550151976, |
| "grad_norm": 1.2305281162261963, |
| "learning_rate": 0.0005, |
| "loss": 3.791486978530884, |
| "step": 1263 |
| }, |
| { |
| "epoch": 0.7683890577507598, |
| "grad_norm": 1.0246379375457764, |
| "learning_rate": 0.0005, |
| "loss": 3.931403875350952, |
| "step": 1264 |
| }, |
| { |
| "epoch": 0.7689969604863222, |
| "grad_norm": 1.342997431755066, |
| "learning_rate": 0.0005, |
| "loss": 3.800549030303955, |
| "step": 1265 |
| }, |
| { |
| "epoch": 0.7696048632218845, |
| "grad_norm": 1.0477383136749268, |
| "learning_rate": 0.0005, |
| "loss": 3.9642491340637207, |
| "step": 1266 |
| }, |
| { |
| "epoch": 0.7702127659574468, |
| "grad_norm": 1.5231037139892578, |
| "learning_rate": 0.0005, |
| "loss": 3.883274555206299, |
| "step": 1267 |
| }, |
| { |
| "epoch": 0.7708206686930091, |
| "grad_norm": 1.21817147731781, |
| "learning_rate": 0.0005, |
| "loss": 3.8319201469421387, |
| "step": 1268 |
| }, |
| { |
| "epoch": 0.7714285714285715, |
| "grad_norm": 1.3139930963516235, |
| "learning_rate": 0.0005, |
| "loss": 3.902513027191162, |
| "step": 1269 |
| }, |
| { |
| "epoch": 0.7720364741641338, |
| "grad_norm": 1.1108347177505493, |
| "learning_rate": 0.0005, |
| "loss": 4.040473937988281, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.772644376899696, |
| "grad_norm": 0.9352411031723022, |
| "learning_rate": 0.0005, |
| "loss": 3.8833415508270264, |
| "step": 1271 |
| }, |
| { |
| "epoch": 0.7732522796352583, |
| "grad_norm": 0.9234441518783569, |
| "learning_rate": 0.0005, |
| "loss": 4.101876258850098, |
| "step": 1272 |
| }, |
| { |
| "epoch": 0.7738601823708207, |
| "grad_norm": 1.0629017353057861, |
| "learning_rate": 0.0005, |
| "loss": 3.869561195373535, |
| "step": 1273 |
| }, |
| { |
| "epoch": 0.774468085106383, |
| "grad_norm": 1.0356484651565552, |
| "learning_rate": 0.0005, |
| "loss": 3.9723856449127197, |
| "step": 1274 |
| }, |
| { |
| "epoch": 0.7750759878419453, |
| "grad_norm": 0.9600344896316528, |
| "learning_rate": 0.0005, |
| "loss": 3.824707508087158, |
| "step": 1275 |
| }, |
| { |
| "epoch": 0.7756838905775076, |
| "grad_norm": 1.0315158367156982, |
| "learning_rate": 0.0005, |
| "loss": 4.001948356628418, |
| "step": 1276 |
| }, |
| { |
| "epoch": 0.7762917933130699, |
| "grad_norm": 1.1866099834442139, |
| "learning_rate": 0.0005, |
| "loss": 3.763075828552246, |
| "step": 1277 |
| }, |
| { |
| "epoch": 0.7768996960486322, |
| "grad_norm": 1.1227611303329468, |
| "learning_rate": 0.0005, |
| "loss": 3.846872329711914, |
| "step": 1278 |
| }, |
| { |
| "epoch": 0.7775075987841945, |
| "grad_norm": 1.1628526449203491, |
| "learning_rate": 0.0005, |
| "loss": 3.929243564605713, |
| "step": 1279 |
| }, |
| { |
| "epoch": 0.7781155015197568, |
| "grad_norm": 0.9936217069625854, |
| "learning_rate": 0.0005, |
| "loss": 3.736764907836914, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.7787234042553192, |
| "grad_norm": 1.0325050354003906, |
| "learning_rate": 0.0005, |
| "loss": 3.935077667236328, |
| "step": 1281 |
| }, |
| { |
| "epoch": 0.7793313069908815, |
| "grad_norm": 1.0567058324813843, |
| "learning_rate": 0.0005, |
| "loss": 3.801319122314453, |
| "step": 1282 |
| }, |
| { |
| "epoch": 0.7799392097264438, |
| "grad_norm": 1.313740611076355, |
| "learning_rate": 0.0005, |
| "loss": 4.132801055908203, |
| "step": 1283 |
| }, |
| { |
| "epoch": 0.780547112462006, |
| "grad_norm": 1.4536793231964111, |
| "learning_rate": 0.0005, |
| "loss": 3.88094425201416, |
| "step": 1284 |
| }, |
| { |
| "epoch": 0.7811550151975684, |
| "grad_norm": 1.1501535177230835, |
| "learning_rate": 0.0005, |
| "loss": 3.8404948711395264, |
| "step": 1285 |
| }, |
| { |
| "epoch": 0.7817629179331307, |
| "grad_norm": 1.3253229856491089, |
| "learning_rate": 0.0005, |
| "loss": 4.016423225402832, |
| "step": 1286 |
| }, |
| { |
| "epoch": 0.782370820668693, |
| "grad_norm": 1.2896214723587036, |
| "learning_rate": 0.0005, |
| "loss": 3.8204050064086914, |
| "step": 1287 |
| }, |
| { |
| "epoch": 0.7829787234042553, |
| "grad_norm": 1.347516655921936, |
| "learning_rate": 0.0005, |
| "loss": 3.849546432495117, |
| "step": 1288 |
| }, |
| { |
| "epoch": 0.7835866261398177, |
| "grad_norm": 1.5418754816055298, |
| "learning_rate": 0.0005, |
| "loss": 4.135068893432617, |
| "step": 1289 |
| }, |
| { |
| "epoch": 0.78419452887538, |
| "grad_norm": 1.0823962688446045, |
| "learning_rate": 0.0005, |
| "loss": 3.752423048019409, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.7848024316109422, |
| "grad_norm": 1.1146916151046753, |
| "learning_rate": 0.0005, |
| "loss": 3.810540199279785, |
| "step": 1291 |
| }, |
| { |
| "epoch": 0.7854103343465045, |
| "grad_norm": 1.0943037271499634, |
| "learning_rate": 0.0005, |
| "loss": 3.761442184448242, |
| "step": 1292 |
| }, |
| { |
| "epoch": 0.7860182370820669, |
| "grad_norm": 1.0425827503204346, |
| "learning_rate": 0.0005, |
| "loss": 3.7996015548706055, |
| "step": 1293 |
| }, |
| { |
| "epoch": 0.7866261398176292, |
| "grad_norm": 1.5982511043548584, |
| "learning_rate": 0.0005, |
| "loss": 3.8388147354125977, |
| "step": 1294 |
| }, |
| { |
| "epoch": 0.7872340425531915, |
| "grad_norm": 1.4619585275650024, |
| "learning_rate": 0.0005, |
| "loss": 4.016120910644531, |
| "step": 1295 |
| }, |
| { |
| "epoch": 0.7878419452887538, |
| "grad_norm": 1.3633700609207153, |
| "learning_rate": 0.0005, |
| "loss": 4.069618225097656, |
| "step": 1296 |
| }, |
| { |
| "epoch": 0.7884498480243161, |
| "grad_norm": 1.009056568145752, |
| "learning_rate": 0.0005, |
| "loss": 4.0978264808654785, |
| "step": 1297 |
| }, |
| { |
| "epoch": 0.7890577507598784, |
| "grad_norm": 1.1812894344329834, |
| "learning_rate": 0.0005, |
| "loss": 3.6178488731384277, |
| "step": 1298 |
| }, |
| { |
| "epoch": 0.7896656534954407, |
| "grad_norm": 1.0647777318954468, |
| "learning_rate": 0.0005, |
| "loss": 3.910210371017456, |
| "step": 1299 |
| }, |
| { |
| "epoch": 0.790273556231003, |
| "grad_norm": 1.4413726329803467, |
| "learning_rate": 0.0005, |
| "loss": 3.9200563430786133, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.7908814589665654, |
| "grad_norm": 1.1021374464035034, |
| "learning_rate": 0.0005, |
| "loss": 3.680574655532837, |
| "step": 1301 |
| }, |
| { |
| "epoch": 0.7914893617021277, |
| "grad_norm": 1.0827854871749878, |
| "learning_rate": 0.0005, |
| "loss": 3.842402458190918, |
| "step": 1302 |
| }, |
| { |
| "epoch": 0.79209726443769, |
| "grad_norm": 1.2615513801574707, |
| "learning_rate": 0.0005, |
| "loss": 4.0547590255737305, |
| "step": 1303 |
| }, |
| { |
| "epoch": 0.7927051671732522, |
| "grad_norm": 1.0599168539047241, |
| "learning_rate": 0.0005, |
| "loss": 3.8400776386260986, |
| "step": 1304 |
| }, |
| { |
| "epoch": 0.7933130699088146, |
| "grad_norm": 1.4258071184158325, |
| "learning_rate": 0.0005, |
| "loss": 3.945885181427002, |
| "step": 1305 |
| }, |
| { |
| "epoch": 0.7939209726443769, |
| "grad_norm": 1.107612133026123, |
| "learning_rate": 0.0005, |
| "loss": 3.6351089477539062, |
| "step": 1306 |
| }, |
| { |
| "epoch": 0.7945288753799392, |
| "grad_norm": 0.9725725650787354, |
| "learning_rate": 0.0005, |
| "loss": 3.5905802249908447, |
| "step": 1307 |
| }, |
| { |
| "epoch": 0.7951367781155015, |
| "grad_norm": 1.3178088665008545, |
| "learning_rate": 0.0005, |
| "loss": 4.063264846801758, |
| "step": 1308 |
| }, |
| { |
| "epoch": 0.7957446808510639, |
| "grad_norm": 1.111405611038208, |
| "learning_rate": 0.0005, |
| "loss": 3.70896053314209, |
| "step": 1309 |
| }, |
| { |
| "epoch": 0.7963525835866262, |
| "grad_norm": 1.0547385215759277, |
| "learning_rate": 0.0005, |
| "loss": 4.020359516143799, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.7969604863221884, |
| "grad_norm": 1.1632133722305298, |
| "learning_rate": 0.0005, |
| "loss": 3.8566200733184814, |
| "step": 1311 |
| }, |
| { |
| "epoch": 0.7975683890577507, |
| "grad_norm": 1.0662367343902588, |
| "learning_rate": 0.0005, |
| "loss": 3.7626304626464844, |
| "step": 1312 |
| }, |
| { |
| "epoch": 0.7981762917933131, |
| "grad_norm": 1.0058962106704712, |
| "learning_rate": 0.0005, |
| "loss": 3.667207956314087, |
| "step": 1313 |
| }, |
| { |
| "epoch": 0.7987841945288754, |
| "grad_norm": 1.21786367893219, |
| "learning_rate": 0.0005, |
| "loss": 3.7486650943756104, |
| "step": 1314 |
| }, |
| { |
| "epoch": 0.7993920972644377, |
| "grad_norm": 1.576144814491272, |
| "learning_rate": 0.0005, |
| "loss": 3.836618185043335, |
| "step": 1315 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 1.0205941200256348, |
| "learning_rate": 0.0005, |
| "loss": 3.921718120574951, |
| "step": 1316 |
| }, |
| { |
| "epoch": 0.8006079027355623, |
| "grad_norm": 1.1202620267868042, |
| "learning_rate": 0.0005, |
| "loss": 3.979546308517456, |
| "step": 1317 |
| }, |
| { |
| "epoch": 0.8012158054711246, |
| "grad_norm": 1.266727089881897, |
| "learning_rate": 0.0004999886023671629, |
| "loss": 3.7467775344848633, |
| "step": 1318 |
| }, |
| { |
| "epoch": 0.8018237082066869, |
| "grad_norm": 1.1622782945632935, |
| "learning_rate": 0.0004999544105079001, |
| "loss": 4.046473503112793, |
| "step": 1319 |
| }, |
| { |
| "epoch": 0.8024316109422492, |
| "grad_norm": 1.1754651069641113, |
| "learning_rate": 0.0004998974275398614, |
| "loss": 3.6320791244506836, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.8030395136778116, |
| "grad_norm": 0.9786376953125, |
| "learning_rate": 0.0004998176586588145, |
| "loss": 3.6877191066741943, |
| "step": 1321 |
| }, |
| { |
| "epoch": 0.8036474164133739, |
| "grad_norm": 0.969366729259491, |
| "learning_rate": 0.0004997151111381707, |
| "loss": 3.766533374786377, |
| "step": 1322 |
| }, |
| { |
| "epoch": 0.8042553191489362, |
| "grad_norm": 0.9558953046798706, |
| "learning_rate": 0.0004995897943283221, |
| "loss": 4.06315803527832, |
| "step": 1323 |
| }, |
| { |
| "epoch": 0.8048632218844984, |
| "grad_norm": 0.8645924925804138, |
| "learning_rate": 0.0004994417196557883, |
| "loss": 3.838135004043579, |
| "step": 1324 |
| }, |
| { |
| "epoch": 0.8054711246200608, |
| "grad_norm": 0.8671835064888, |
| "learning_rate": 0.0004992709006221755, |
| "loss": 3.883330821990967, |
| "step": 1325 |
| }, |
| { |
| "epoch": 0.8060790273556231, |
| "grad_norm": 1.1144053936004639, |
| "learning_rate": 0.0004990773528029446, |
| "loss": 3.7989044189453125, |
| "step": 1326 |
| }, |
| { |
| "epoch": 0.8066869300911854, |
| "grad_norm": 1.0151537656784058, |
| "learning_rate": 0.0004988610938459917, |
| "loss": 4.007248878479004, |
| "step": 1327 |
| }, |
| { |
| "epoch": 0.8072948328267477, |
| "grad_norm": 1.2170069217681885, |
| "learning_rate": 0.0004986221434700379, |
| "loss": 3.8616843223571777, |
| "step": 1328 |
| }, |
| { |
| "epoch": 0.8079027355623101, |
| "grad_norm": 0.8724591135978699, |
| "learning_rate": 0.0004983605234628328, |
| "loss": 4.205953598022461, |
| "step": 1329 |
| }, |
| { |
| "epoch": 0.8085106382978723, |
| "grad_norm": 1.1466760635375977, |
| "learning_rate": 0.0004980762576791664, |
| "loss": 3.9655470848083496, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.8091185410334346, |
| "grad_norm": 1.1359692811965942, |
| "learning_rate": 0.000497769372038695, |
| "loss": 4.22013521194458, |
| "step": 1331 |
| }, |
| { |
| "epoch": 0.8097264437689969, |
| "grad_norm": 1.0394648313522339, |
| "learning_rate": 0.0004974398945235776, |
| "loss": 3.911543130874634, |
| "step": 1332 |
| }, |
| { |
| "epoch": 0.8103343465045593, |
| "grad_norm": 1.0383487939834595, |
| "learning_rate": 0.0004970878551759239, |
| "loss": 3.8219704627990723, |
| "step": 1333 |
| }, |
| { |
| "epoch": 0.8109422492401216, |
| "grad_norm": 1.0844473838806152, |
| "learning_rate": 0.000496713286095056, |
| "loss": 3.876410484313965, |
| "step": 1334 |
| }, |
| { |
| "epoch": 0.8115501519756839, |
| "grad_norm": 1.2770010232925415, |
| "learning_rate": 0.0004963162214345805, |
| "loss": 3.8071320056915283, |
| "step": 1335 |
| }, |
| { |
| "epoch": 0.8121580547112462, |
| "grad_norm": 1.0182770490646362, |
| "learning_rate": 0.0004958966973992754, |
| "loss": 3.6059393882751465, |
| "step": 1336 |
| }, |
| { |
| "epoch": 0.8127659574468085, |
| "grad_norm": 1.02802574634552, |
| "learning_rate": 0.0004954547522417877, |
| "loss": 3.669658660888672, |
| "step": 1337 |
| }, |
| { |
| "epoch": 0.8133738601823708, |
| "grad_norm": 1.1248687505722046, |
| "learning_rate": 0.0004949904262591467, |
| "loss": 3.9866435527801514, |
| "step": 1338 |
| }, |
| { |
| "epoch": 0.8139817629179331, |
| "grad_norm": 1.0492587089538574, |
| "learning_rate": 0.0004945037617890889, |
| "loss": 3.949676036834717, |
| "step": 1339 |
| }, |
| { |
| "epoch": 0.8145896656534954, |
| "grad_norm": 0.9690307974815369, |
| "learning_rate": 0.000493994803206198, |
| "loss": 3.748741626739502, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.8151975683890578, |
| "grad_norm": 1.465824842453003, |
| "learning_rate": 0.0004934635969178583, |
| "loss": 3.977262020111084, |
| "step": 1341 |
| }, |
| { |
| "epoch": 0.8158054711246201, |
| "grad_norm": 1.0349231958389282, |
| "learning_rate": 0.0004929101913600238, |
| "loss": 3.619255542755127, |
| "step": 1342 |
| }, |
| { |
| "epoch": 0.8164133738601824, |
| "grad_norm": 1.0467352867126465, |
| "learning_rate": 0.0004923346369928012, |
| "loss": 3.9860079288482666, |
| "step": 1343 |
| }, |
| { |
| "epoch": 0.8170212765957446, |
| "grad_norm": 1.0222679376602173, |
| "learning_rate": 0.0004917369862958494, |
| "loss": 3.830394744873047, |
| "step": 1344 |
| }, |
| { |
| "epoch": 0.817629179331307, |
| "grad_norm": 1.117563247680664, |
| "learning_rate": 0.0004911172937635942, |
| "loss": 3.7490053176879883, |
| "step": 1345 |
| }, |
| { |
| "epoch": 0.8182370820668693, |
| "grad_norm": 1.4361516237258911, |
| "learning_rate": 0.000490475615900259, |
| "loss": 3.8583335876464844, |
| "step": 1346 |
| }, |
| { |
| "epoch": 0.8188449848024316, |
| "grad_norm": 1.2900465726852417, |
| "learning_rate": 0.0004898120112147136, |
| "loss": 3.906479835510254, |
| "step": 1347 |
| }, |
| { |
| "epoch": 0.819452887537994, |
| "grad_norm": 1.1463675498962402, |
| "learning_rate": 0.0004891265402151381, |
| "loss": 3.9391555786132812, |
| "step": 1348 |
| }, |
| { |
| "epoch": 0.8200607902735563, |
| "grad_norm": 1.5694284439086914, |
| "learning_rate": 0.0004884192654035069, |
| "loss": 3.974485397338867, |
| "step": 1349 |
| }, |
| { |
| "epoch": 0.8206686930091185, |
| "grad_norm": 1.0200462341308594, |
| "learning_rate": 0.000487690251269889, |
| "loss": 3.6864073276519775, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.8212765957446808, |
| "grad_norm": 1.089603304862976, |
| "learning_rate": 0.0004869395642865676, |
| "loss": 3.7212605476379395, |
| "step": 1351 |
| }, |
| { |
| "epoch": 0.8218844984802431, |
| "grad_norm": 1.2351415157318115, |
| "learning_rate": 0.0004861672729019797, |
| "loss": 3.700591802597046, |
| "step": 1352 |
| }, |
| { |
| "epoch": 0.8224924012158055, |
| "grad_norm": 0.9957062602043152, |
| "learning_rate": 0.00048537344753447453, |
| "loss": 3.7319130897521973, |
| "step": 1353 |
| }, |
| { |
| "epoch": 0.8231003039513678, |
| "grad_norm": 1.056557059288025, |
| "learning_rate": 0.0004845581605658926, |
| "loss": 3.657074213027954, |
| "step": 1354 |
| }, |
| { |
| "epoch": 0.8237082066869301, |
| "grad_norm": 1.0980826616287231, |
| "learning_rate": 0.00048372148633496617, |
| "loss": 3.770319938659668, |
| "step": 1355 |
| }, |
| { |
| "epoch": 0.8243161094224924, |
| "grad_norm": 0.8664339780807495, |
| "learning_rate": 0.0004828635011305407, |
| "loss": 3.894157886505127, |
| "step": 1356 |
| }, |
| { |
| "epoch": 0.8249240121580547, |
| "grad_norm": 1.2869031429290771, |
| "learning_rate": 0.00048198428318461896, |
| "loss": 3.6396484375, |
| "step": 1357 |
| }, |
| { |
| "epoch": 0.825531914893617, |
| "grad_norm": 1.459326148033142, |
| "learning_rate": 0.0004810839126652275, |
| "loss": 4.004338264465332, |
| "step": 1358 |
| }, |
| { |
| "epoch": 0.8261398176291793, |
| "grad_norm": 1.1490086317062378, |
| "learning_rate": 0.0004801624716691072, |
| "loss": 4.074912071228027, |
| "step": 1359 |
| }, |
| { |
| "epoch": 0.8267477203647416, |
| "grad_norm": 1.0660607814788818, |
| "learning_rate": 0.00047922004421422726, |
| "loss": 3.8288257122039795, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.827355623100304, |
| "grad_norm": 1.1202282905578613, |
| "learning_rate": 0.00047825671623212454, |
| "loss": 3.728804111480713, |
| "step": 1361 |
| }, |
| { |
| "epoch": 0.8279635258358663, |
| "grad_norm": 1.07158625125885, |
| "learning_rate": 0.0004772725755600682, |
| "loss": 3.5751538276672363, |
| "step": 1362 |
| }, |
| { |
| "epoch": 0.8285714285714286, |
| "grad_norm": 1.008811354637146, |
| "learning_rate": 0.0004762677119330505, |
| "loss": 3.8057093620300293, |
| "step": 1363 |
| }, |
| { |
| "epoch": 0.8291793313069908, |
| "grad_norm": 1.4745137691497803, |
| "learning_rate": 0.00047524221697560476, |
| "loss": 3.8376100063323975, |
| "step": 1364 |
| }, |
| { |
| "epoch": 0.8297872340425532, |
| "grad_norm": 1.2719781398773193, |
| "learning_rate": 0.00047419618419345115, |
| "loss": 3.747580051422119, |
| "step": 1365 |
| }, |
| { |
| "epoch": 0.8303951367781155, |
| "grad_norm": 1.1576839685440063, |
| "learning_rate": 0.0004731297089649703, |
| "loss": 3.7823610305786133, |
| "step": 1366 |
| }, |
| { |
| "epoch": 0.8310030395136778, |
| "grad_norm": 1.0849125385284424, |
| "learning_rate": 0.0004720428885325069, |
| "loss": 3.9312424659729004, |
| "step": 1367 |
| }, |
| { |
| "epoch": 0.8316109422492401, |
| "grad_norm": 1.0173479318618774, |
| "learning_rate": 0.00047093582199350285, |
| "loss": 3.641855239868164, |
| "step": 1368 |
| }, |
| { |
| "epoch": 0.8322188449848025, |
| "grad_norm": 0.9390632510185242, |
| "learning_rate": 0.00046980861029146173, |
| "loss": 4.027669906616211, |
| "step": 1369 |
| }, |
| { |
| "epoch": 0.8328267477203647, |
| "grad_norm": 1.0367680788040161, |
| "learning_rate": 0.0004686613562067444, |
| "loss": 3.9053921699523926, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.833434650455927, |
| "grad_norm": 0.9983039498329163, |
| "learning_rate": 0.00046749416434719747, |
| "loss": 3.6601035594940186, |
| "step": 1371 |
| }, |
| { |
| "epoch": 0.8340425531914893, |
| "grad_norm": 1.2556368112564087, |
| "learning_rate": 0.00046630714113861507, |
| "loss": 3.643587350845337, |
| "step": 1372 |
| }, |
| { |
| "epoch": 0.8346504559270517, |
| "grad_norm": 1.0456199645996094, |
| "learning_rate": 0.00046510039481503486, |
| "loss": 3.689802646636963, |
| "step": 1373 |
| }, |
| { |
| "epoch": 0.835258358662614, |
| "grad_norm": 0.9730342626571655, |
| "learning_rate": 0.00046387403540886895, |
| "loss": 3.6004483699798584, |
| "step": 1374 |
| }, |
| { |
| "epoch": 0.8358662613981763, |
| "grad_norm": 1.2402491569519043, |
| "learning_rate": 0.00046262817474087127, |
| "loss": 3.6834664344787598, |
| "step": 1375 |
| }, |
| { |
| "epoch": 0.8364741641337387, |
| "grad_norm": 1.1597247123718262, |
| "learning_rate": 0.00046136292640994154, |
| "loss": 3.7525768280029297, |
| "step": 1376 |
| }, |
| { |
| "epoch": 0.8370820668693009, |
| "grad_norm": 1.2773432731628418, |
| "learning_rate": 0.0004600784057827671, |
| "loss": 3.862699508666992, |
| "step": 1377 |
| }, |
| { |
| "epoch": 0.8376899696048632, |
| "grad_norm": 1.2818998098373413, |
| "learning_rate": 0.00045877472998330385, |
| "loss": 4.099722385406494, |
| "step": 1378 |
| }, |
| { |
| "epoch": 0.8382978723404255, |
| "grad_norm": 1.0724464654922485, |
| "learning_rate": 0.0004574520178820965, |
| "loss": 3.8608179092407227, |
| "step": 1379 |
| }, |
| { |
| "epoch": 0.8389057750759878, |
| "grad_norm": 1.293906807899475, |
| "learning_rate": 0.0004561103900854401, |
| "loss": 3.723815441131592, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.8395136778115502, |
| "grad_norm": 1.0045194625854492, |
| "learning_rate": 0.0004547499689243829, |
| "loss": 3.7255592346191406, |
| "step": 1381 |
| }, |
| { |
| "epoch": 0.8401215805471125, |
| "grad_norm": 1.0186697244644165, |
| "learning_rate": 0.0004533708784435722, |
| "loss": 3.6717958450317383, |
| "step": 1382 |
| }, |
| { |
| "epoch": 0.8407294832826747, |
| "grad_norm": 1.0383477210998535, |
| "learning_rate": 0.0004519732443899435, |
| "loss": 3.681596279144287, |
| "step": 1383 |
| }, |
| { |
| "epoch": 0.841337386018237, |
| "grad_norm": 1.1144697666168213, |
| "learning_rate": 0.00045055719420125504, |
| "loss": 3.9934191703796387, |
| "step": 1384 |
| }, |
| { |
| "epoch": 0.8419452887537994, |
| "grad_norm": 1.056483268737793, |
| "learning_rate": 0.0004491228569944679, |
| "loss": 4.028287887573242, |
| "step": 1385 |
| }, |
| { |
| "epoch": 0.8425531914893617, |
| "grad_norm": 1.1046830415725708, |
| "learning_rate": 0.0004476703635539728, |
| "loss": 3.823612689971924, |
| "step": 1386 |
| }, |
| { |
| "epoch": 0.843161094224924, |
| "grad_norm": 1.1697293519973755, |
| "learning_rate": 0.00044619984631966527, |
| "loss": 3.7220816612243652, |
| "step": 1387 |
| }, |
| { |
| "epoch": 0.8437689969604864, |
| "grad_norm": 1.0626883506774902, |
| "learning_rate": 0.0004447114393748694, |
| "loss": 3.5306200981140137, |
| "step": 1388 |
| }, |
| { |
| "epoch": 0.8443768996960487, |
| "grad_norm": 1.153074026107788, |
| "learning_rate": 0.0004432052784341122, |
| "loss": 3.672762393951416, |
| "step": 1389 |
| }, |
| { |
| "epoch": 0.8449848024316109, |
| "grad_norm": 0.9894313812255859, |
| "learning_rate": 0.0004416815008307488, |
| "loss": 3.661726474761963, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.8455927051671732, |
| "grad_norm": 0.9667363166809082, |
| "learning_rate": 0.00044014024550444045, |
| "loss": 3.788522720336914, |
| "step": 1391 |
| }, |
| { |
| "epoch": 0.8462006079027355, |
| "grad_norm": 1.2645761966705322, |
| "learning_rate": 0.00043858165298848556, |
| "loss": 3.721158981323242, |
| "step": 1392 |
| }, |
| { |
| "epoch": 0.8468085106382979, |
| "grad_norm": 1.0775492191314697, |
| "learning_rate": 0.00043700586539700614, |
| "loss": 3.5772523880004883, |
| "step": 1393 |
| }, |
| { |
| "epoch": 0.8474164133738602, |
| "grad_norm": 1.0271198749542236, |
| "learning_rate": 0.00043541302641198946, |
| "loss": 3.820373058319092, |
| "step": 1394 |
| }, |
| { |
| "epoch": 0.8480243161094225, |
| "grad_norm": 0.8722153902053833, |
| "learning_rate": 0.00043380328127018663, |
| "loss": 3.610518455505371, |
| "step": 1395 |
| }, |
| { |
| "epoch": 0.8486322188449849, |
| "grad_norm": 1.0228782892227173, |
| "learning_rate": 0.00043217677674987047, |
| "loss": 3.7967772483825684, |
| "step": 1396 |
| }, |
| { |
| "epoch": 0.8492401215805471, |
| "grad_norm": 1.0525845289230347, |
| "learning_rate": 0.00043053366115745174, |
| "loss": 3.623091697692871, |
| "step": 1397 |
| }, |
| { |
| "epoch": 0.8498480243161094, |
| "grad_norm": 0.998408317565918, |
| "learning_rate": 0.00042887408431395614, |
| "loss": 3.685908317565918, |
| "step": 1398 |
| }, |
| { |
| "epoch": 0.8504559270516717, |
| "grad_norm": 1.026895523071289, |
| "learning_rate": 0.0004271981975413639, |
| "loss": 3.5633139610290527, |
| "step": 1399 |
| }, |
| { |
| "epoch": 0.851063829787234, |
| "grad_norm": 1.0553339719772339, |
| "learning_rate": 0.00042550615364881196, |
| "loss": 3.833423137664795, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.8516717325227964, |
| "grad_norm": 1.1019606590270996, |
| "learning_rate": 0.00042379810691866064, |
| "loss": 3.8337411880493164, |
| "step": 1401 |
| }, |
| { |
| "epoch": 0.8522796352583587, |
| "grad_norm": 1.7001227140426636, |
| "learning_rate": 0.0004220742130924257, |
| "loss": 3.495081663131714, |
| "step": 1402 |
| }, |
| { |
| "epoch": 0.8528875379939209, |
| "grad_norm": 1.0422172546386719, |
| "learning_rate": 0.0004203346293565784, |
| "loss": 3.7549071311950684, |
| "step": 1403 |
| }, |
| { |
| "epoch": 0.8534954407294832, |
| "grad_norm": 1.2587510347366333, |
| "learning_rate": 0.0004185795143282123, |
| "loss": 3.770139217376709, |
| "step": 1404 |
| }, |
| { |
| "epoch": 0.8541033434650456, |
| "grad_norm": 1.1424074172973633, |
| "learning_rate": 0.00041680902804058095, |
| "loss": 3.779757499694824, |
| "step": 1405 |
| }, |
| { |
| "epoch": 0.8547112462006079, |
| "grad_norm": 1.0849041938781738, |
| "learning_rate": 0.0004150233319285055, |
| "loss": 3.8310835361480713, |
| "step": 1406 |
| }, |
| { |
| "epoch": 0.8553191489361702, |
| "grad_norm": 1.1193660497665405, |
| "learning_rate": 0.00041322258881365515, |
| "loss": 3.7291765213012695, |
| "step": 1407 |
| }, |
| { |
| "epoch": 0.8559270516717326, |
| "grad_norm": 1.1108680963516235, |
| "learning_rate": 0.0004114069628897006, |
| "loss": 4.129992485046387, |
| "step": 1408 |
| }, |
| { |
| "epoch": 0.8565349544072949, |
| "grad_norm": 1.1723637580871582, |
| "learning_rate": 0.0004095766197073432, |
| "loss": 3.6475980281829834, |
| "step": 1409 |
| }, |
| { |
| "epoch": 0.8571428571428571, |
| "grad_norm": 1.0332688093185425, |
| "learning_rate": 0.0004077317261592194, |
| "loss": 3.8548192977905273, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.8577507598784194, |
| "grad_norm": 1.0339442491531372, |
| "learning_rate": 0.0004058724504646834, |
| "loss": 3.8385329246520996, |
| "step": 1411 |
| }, |
| { |
| "epoch": 0.8583586626139817, |
| "grad_norm": 1.0235612392425537, |
| "learning_rate": 0.000403998962154469, |
| "loss": 3.8993711471557617, |
| "step": 1412 |
| }, |
| { |
| "epoch": 0.8589665653495441, |
| "grad_norm": 0.8945487141609192, |
| "learning_rate": 0.0004021114320552311, |
| "loss": 3.681536912918091, |
| "step": 1413 |
| }, |
| { |
| "epoch": 0.8595744680851064, |
| "grad_norm": 0.907351016998291, |
| "learning_rate": 0.00040021003227397014, |
| "loss": 3.751767635345459, |
| "step": 1414 |
| }, |
| { |
| "epoch": 0.8601823708206687, |
| "grad_norm": 0.8751946091651917, |
| "learning_rate": 0.0003982949361823388, |
| "loss": 3.6982154846191406, |
| "step": 1415 |
| }, |
| { |
| "epoch": 0.8607902735562311, |
| "grad_norm": 0.9630452990531921, |
| "learning_rate": 0.0003963663184008338, |
| "loss": 3.7995591163635254, |
| "step": 1416 |
| }, |
| { |
| "epoch": 0.8613981762917933, |
| "grad_norm": 1.0385856628417969, |
| "learning_rate": 0.0003944243547828742, |
| "loss": 3.583292007446289, |
| "step": 1417 |
| }, |
| { |
| "epoch": 0.8620060790273556, |
| "grad_norm": 1.1438446044921875, |
| "learning_rate": 0.000392469222398766, |
| "loss": 3.97701096534729, |
| "step": 1418 |
| }, |
| { |
| "epoch": 0.8626139817629179, |
| "grad_norm": 1.0620638132095337, |
| "learning_rate": 0.00039050109951955814, |
| "loss": 3.5980987548828125, |
| "step": 1419 |
| }, |
| { |
| "epoch": 0.8632218844984803, |
| "grad_norm": 0.978015661239624, |
| "learning_rate": 0.000388520165600786, |
| "loss": 3.7316596508026123, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.8638297872340426, |
| "grad_norm": 1.0127967596054077, |
| "learning_rate": 0.0003865266012661095, |
| "loss": 3.9404823780059814, |
| "step": 1421 |
| }, |
| { |
| "epoch": 0.8644376899696049, |
| "grad_norm": 1.2003899812698364, |
| "learning_rate": 0.0003845205882908432, |
| "loss": 3.918931245803833, |
| "step": 1422 |
| }, |
| { |
| "epoch": 0.8650455927051671, |
| "grad_norm": 1.1857889890670776, |
| "learning_rate": 0.000382502309585382, |
| "loss": 3.6090548038482666, |
| "step": 1423 |
| }, |
| { |
| "epoch": 0.8656534954407294, |
| "grad_norm": 1.2434966564178467, |
| "learning_rate": 0.000380471949178523, |
| "loss": 3.467123031616211, |
| "step": 1424 |
| }, |
| { |
| "epoch": 0.8662613981762918, |
| "grad_norm": 1.2050342559814453, |
| "learning_rate": 0.0003784296922006859, |
| "loss": 3.696073055267334, |
| "step": 1425 |
| }, |
| { |
| "epoch": 0.8668693009118541, |
| "grad_norm": 1.1129356622695923, |
| "learning_rate": 0.0003763757248670321, |
| "loss": 3.715449810028076, |
| "step": 1426 |
| }, |
| { |
| "epoch": 0.8674772036474164, |
| "grad_norm": 1.1708143949508667, |
| "learning_rate": 0.00037431023446048595, |
| "loss": 3.860975980758667, |
| "step": 1427 |
| }, |
| { |
| "epoch": 0.8680851063829788, |
| "grad_norm": 1.1058366298675537, |
| "learning_rate": 0.0003722334093146576, |
| "loss": 3.7916457653045654, |
| "step": 1428 |
| }, |
| { |
| "epoch": 0.8686930091185411, |
| "grad_norm": 1.274646520614624, |
| "learning_rate": 0.00037014543879667093, |
| "loss": 3.8200652599334717, |
| "step": 1429 |
| }, |
| { |
| "epoch": 0.8693009118541033, |
| "grad_norm": 1.2253806591033936, |
| "learning_rate": 0.00036804651328989666, |
| "loss": 3.522810459136963, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.8699088145896656, |
| "grad_norm": 1.1936273574829102, |
| "learning_rate": 0.000365936824176593, |
| "loss": 3.9892830848693848, |
| "step": 1431 |
| }, |
| { |
| "epoch": 0.870516717325228, |
| "grad_norm": 1.1107733249664307, |
| "learning_rate": 0.00036381656382045526, |
| "loss": 3.6833291053771973, |
| "step": 1432 |
| }, |
| { |
| "epoch": 0.8711246200607903, |
| "grad_norm": 1.4528982639312744, |
| "learning_rate": 0.00036168592554907596, |
| "loss": 3.4317424297332764, |
| "step": 1433 |
| }, |
| { |
| "epoch": 0.8717325227963526, |
| "grad_norm": 1.539918303489685, |
| "learning_rate": 0.0003595451036363168, |
| "loss": 3.9146463871002197, |
| "step": 1434 |
| }, |
| { |
| "epoch": 0.8723404255319149, |
| "grad_norm": 1.0589654445648193, |
| "learning_rate": 0.00035739429328459493, |
| "loss": 3.64989972114563, |
| "step": 1435 |
| }, |
| { |
| "epoch": 0.8729483282674773, |
| "grad_norm": 0.9970619082450867, |
| "learning_rate": 0.0003552336906070838, |
| "loss": 3.7197318077087402, |
| "step": 1436 |
| }, |
| { |
| "epoch": 0.8735562310030395, |
| "grad_norm": 1.1559967994689941, |
| "learning_rate": 0.0003530634926098316, |
| "loss": 3.835594892501831, |
| "step": 1437 |
| }, |
| { |
| "epoch": 0.8741641337386018, |
| "grad_norm": 1.0069043636322021, |
| "learning_rate": 0.0003508838971737981, |
| "loss": 3.8029980659484863, |
| "step": 1438 |
| }, |
| { |
| "epoch": 0.8747720364741641, |
| "grad_norm": 1.3581100702285767, |
| "learning_rate": 0.0003486951030368113, |
| "loss": 3.6824827194213867, |
| "step": 1439 |
| }, |
| { |
| "epoch": 0.8753799392097265, |
| "grad_norm": 1.7533200979232788, |
| "learning_rate": 0.00034649730977544664, |
| "loss": 3.7235536575317383, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.8759878419452888, |
| "grad_norm": 1.0940066576004028, |
| "learning_rate": 0.0003442907177868293, |
| "loss": 3.6482458114624023, |
| "step": 1441 |
| }, |
| { |
| "epoch": 0.8765957446808511, |
| "grad_norm": 1.0252796411514282, |
| "learning_rate": 0.00034207552827036176, |
| "loss": 3.634884834289551, |
| "step": 1442 |
| }, |
| { |
| "epoch": 0.8772036474164133, |
| "grad_norm": 1.3038619756698608, |
| "learning_rate": 0.0003398519432093782, |
| "loss": 3.886862277984619, |
| "step": 1443 |
| }, |
| { |
| "epoch": 0.8778115501519757, |
| "grad_norm": 1.5358000993728638, |
| "learning_rate": 0.00033762016535272745, |
| "loss": 3.916736125946045, |
| "step": 1444 |
| }, |
| { |
| "epoch": 0.878419452887538, |
| "grad_norm": 1.0540707111358643, |
| "learning_rate": 0.00033538039819628625, |
| "loss": 3.914485454559326, |
| "step": 1445 |
| }, |
| { |
| "epoch": 0.8790273556231003, |
| "grad_norm": 1.0498977899551392, |
| "learning_rate": 0.000333132845964404, |
| "loss": 3.6423823833465576, |
| "step": 1446 |
| }, |
| { |
| "epoch": 0.8796352583586626, |
| "grad_norm": 2.2342031002044678, |
| "learning_rate": 0.00033087771359128175, |
| "loss": 3.7215816974639893, |
| "step": 1447 |
| }, |
| { |
| "epoch": 0.880243161094225, |
| "grad_norm": 1.4365023374557495, |
| "learning_rate": 0.00032861520670228586, |
| "loss": 3.7631328105926514, |
| "step": 1448 |
| }, |
| { |
| "epoch": 0.8808510638297873, |
| "grad_norm": 2.098018169403076, |
| "learning_rate": 0.00032634553159519865, |
| "loss": 3.4372754096984863, |
| "step": 1449 |
| }, |
| { |
| "epoch": 0.8814589665653495, |
| "grad_norm": 0.9924235939979553, |
| "learning_rate": 0.0003240688952214085, |
| "loss": 4.062948226928711, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.8820668693009118, |
| "grad_norm": 1.2176319360733032, |
| "learning_rate": 0.0003217855051670393, |
| "loss": 3.6439735889434814, |
| "step": 1451 |
| }, |
| { |
| "epoch": 0.8826747720364742, |
| "grad_norm": 1.2388694286346436, |
| "learning_rate": 0.00031949556963402283, |
| "loss": 3.8236451148986816, |
| "step": 1452 |
| }, |
| { |
| "epoch": 0.8832826747720365, |
| "grad_norm": 0.8196237683296204, |
| "learning_rate": 0.00031719929742111437, |
| "loss": 3.686429977416992, |
| "step": 1453 |
| }, |
| { |
| "epoch": 0.8838905775075988, |
| "grad_norm": 0.9667937755584717, |
| "learning_rate": 0.00031489689790485464, |
| "loss": 4.012905120849609, |
| "step": 1454 |
| }, |
| { |
| "epoch": 0.8844984802431611, |
| "grad_norm": 0.9525713920593262, |
| "learning_rate": 0.00031258858102047813, |
| "loss": 3.484525680541992, |
| "step": 1455 |
| }, |
| { |
| "epoch": 0.8851063829787233, |
| "grad_norm": 1.0953255891799927, |
| "learning_rate": 0.0003102745572427716, |
| "loss": 3.6367969512939453, |
| "step": 1456 |
| }, |
| { |
| "epoch": 0.8857142857142857, |
| "grad_norm": 0.8041018843650818, |
| "learning_rate": 0.0003079550375668821, |
| "loss": 3.627480983734131, |
| "step": 1457 |
| }, |
| { |
| "epoch": 0.886322188449848, |
| "grad_norm": 0.9474192261695862, |
| "learning_rate": 0.0003056302334890786, |
| "loss": 3.771761894226074, |
| "step": 1458 |
| }, |
| { |
| "epoch": 0.8869300911854103, |
| "grad_norm": 0.9393053650856018, |
| "learning_rate": 0.00030330035698746753, |
| "loss": 3.4475784301757812, |
| "step": 1459 |
| }, |
| { |
| "epoch": 0.8875379939209727, |
| "grad_norm": 1.0495154857635498, |
| "learning_rate": 0.00030096562050266427, |
| "loss": 3.7747950553894043, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.888145896656535, |
| "grad_norm": 1.0793986320495605, |
| "learning_rate": 0.0002986262369184226, |
| "loss": 3.5836963653564453, |
| "step": 1461 |
| }, |
| { |
| "epoch": 0.8887537993920973, |
| "grad_norm": 1.0350525379180908, |
| "learning_rate": 0.0002962824195422238, |
| "loss": 3.8103108406066895, |
| "step": 1462 |
| }, |
| { |
| "epoch": 0.8893617021276595, |
| "grad_norm": 0.923565149307251, |
| "learning_rate": 0.0002939343820858269, |
| "loss": 3.5080511569976807, |
| "step": 1463 |
| }, |
| { |
| "epoch": 0.8899696048632219, |
| "grad_norm": 0.9419893026351929, |
| "learning_rate": 0.00029158233864578256, |
| "loss": 3.5664780139923096, |
| "step": 1464 |
| }, |
| { |
| "epoch": 0.8905775075987842, |
| "grad_norm": 1.120071530342102, |
| "learning_rate": 0.000289226503683911, |
| "loss": 3.5272912979125977, |
| "step": 1465 |
| }, |
| { |
| "epoch": 0.8911854103343465, |
| "grad_norm": 0.9679391980171204, |
| "learning_rate": 0.0002868670920077478, |
| "loss": 3.7422268390655518, |
| "step": 1466 |
| }, |
| { |
| "epoch": 0.8917933130699088, |
| "grad_norm": 0.8348677754402161, |
| "learning_rate": 0.0002845043187509567, |
| "loss": 3.677544355392456, |
| "step": 1467 |
| }, |
| { |
| "epoch": 0.8924012158054712, |
| "grad_norm": 0.8530043959617615, |
| "learning_rate": 0.0002821383993537144, |
| "loss": 3.6701407432556152, |
| "step": 1468 |
| }, |
| { |
| "epoch": 0.8930091185410335, |
| "grad_norm": 0.9390717148780823, |
| "learning_rate": 0.00027976954954306554, |
| "loss": 3.7104759216308594, |
| "step": 1469 |
| }, |
| { |
| "epoch": 0.8936170212765957, |
| "grad_norm": 1.0507652759552002, |
| "learning_rate": 0.0002773979853132534, |
| "loss": 3.5879673957824707, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.894224924012158, |
| "grad_norm": 0.9291044473648071, |
| "learning_rate": 0.0002750239229060246, |
| "loss": 3.655197858810425, |
| "step": 1471 |
| }, |
| { |
| "epoch": 0.8948328267477204, |
| "grad_norm": 0.9448993802070618, |
| "learning_rate": 0.0002726475787909125, |
| "loss": 3.6126198768615723, |
| "step": 1472 |
| }, |
| { |
| "epoch": 0.8954407294832827, |
| "grad_norm": 0.9143878221511841, |
| "learning_rate": 0.0002702691696454986, |
| "loss": 3.7886955738067627, |
| "step": 1473 |
| }, |
| { |
| "epoch": 0.896048632218845, |
| "grad_norm": 0.8731086850166321, |
| "learning_rate": 0.00026788891233565655, |
| "loss": 3.4998018741607666, |
| "step": 1474 |
| }, |
| { |
| "epoch": 0.8966565349544073, |
| "grad_norm": 1.0264720916748047, |
| "learning_rate": 0.0002655070238957772, |
| "loss": 3.7816460132598877, |
| "step": 1475 |
| }, |
| { |
| "epoch": 0.8972644376899696, |
| "grad_norm": 0.9198083877563477, |
| "learning_rate": 0.0002631237215089798, |
| "loss": 3.2887322902679443, |
| "step": 1476 |
| }, |
| { |
| "epoch": 0.8978723404255319, |
| "grad_norm": 1.2048431634902954, |
| "learning_rate": 0.0002607392224873087, |
| "loss": 3.534536838531494, |
| "step": 1477 |
| }, |
| { |
| "epoch": 0.8984802431610942, |
| "grad_norm": 0.9676938652992249, |
| "learning_rate": 0.0002583537442519187, |
| "loss": 3.5746755599975586, |
| "step": 1478 |
| }, |
| { |
| "epoch": 0.8990881458966565, |
| "grad_norm": 1.2043583393096924, |
| "learning_rate": 0.00025596750431325024, |
| "loss": 3.4282994270324707, |
| "step": 1479 |
| }, |
| { |
| "epoch": 0.8996960486322189, |
| "grad_norm": 1.0472460985183716, |
| "learning_rate": 0.0002535807202511969, |
| "loss": 3.4300684928894043, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.9003039513677812, |
| "grad_norm": 0.990128755569458, |
| "learning_rate": 0.00025119360969526577, |
| "loss": 3.663133144378662, |
| "step": 1481 |
| }, |
| { |
| "epoch": 0.9009118541033435, |
| "grad_norm": 1.091579556465149, |
| "learning_rate": 0.0002488063903047342, |
| "loss": 3.5936460494995117, |
| "step": 1482 |
| }, |
| { |
| "epoch": 0.9015197568389057, |
| "grad_norm": 0.976894736289978, |
| "learning_rate": 0.00024641927974880317, |
| "loss": 3.4573283195495605, |
| "step": 1483 |
| }, |
| { |
| "epoch": 0.902127659574468, |
| "grad_norm": 1.1969588994979858, |
| "learning_rate": 0.00024403249568674982, |
| "loss": 3.7035064697265625, |
| "step": 1484 |
| }, |
| { |
| "epoch": 0.9027355623100304, |
| "grad_norm": 0.927419126033783, |
| "learning_rate": 0.00024164625574808144, |
| "loss": 3.5091121196746826, |
| "step": 1485 |
| }, |
| { |
| "epoch": 0.9033434650455927, |
| "grad_norm": 1.0960835218429565, |
| "learning_rate": 0.00023926077751269128, |
| "loss": 3.453946590423584, |
| "step": 1486 |
| }, |
| { |
| "epoch": 0.903951367781155, |
| "grad_norm": 0.8569730520248413, |
| "learning_rate": 0.00023687627849102022, |
| "loss": 3.5202674865722656, |
| "step": 1487 |
| }, |
| { |
| "epoch": 0.9045592705167174, |
| "grad_norm": 0.9676769971847534, |
| "learning_rate": 0.00023449297610422286, |
| "loss": 3.6863176822662354, |
| "step": 1488 |
| }, |
| { |
| "epoch": 0.9051671732522797, |
| "grad_norm": 1.2797845602035522, |
| "learning_rate": 0.0002321110876643435, |
| "loss": 3.7138781547546387, |
| "step": 1489 |
| }, |
| { |
| "epoch": 0.9057750759878419, |
| "grad_norm": 1.0561813116073608, |
| "learning_rate": 0.0002297308303545014, |
| "loss": 3.5111289024353027, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.9063829787234042, |
| "grad_norm": 0.9892807602882385, |
| "learning_rate": 0.00022735242120908755, |
| "loss": 3.7911901473999023, |
| "step": 1491 |
| }, |
| { |
| "epoch": 0.9069908814589666, |
| "grad_norm": 1.0635170936584473, |
| "learning_rate": 0.0002249760770939754, |
| "loss": 3.6134800910949707, |
| "step": 1492 |
| }, |
| { |
| "epoch": 0.9075987841945289, |
| "grad_norm": 0.944739043712616, |
| "learning_rate": 0.0002226020146867467, |
| "loss": 3.7158658504486084, |
| "step": 1493 |
| }, |
| { |
| "epoch": 0.9082066869300912, |
| "grad_norm": 0.9955059885978699, |
| "learning_rate": 0.00022023045045693447, |
| "loss": 3.514943838119507, |
| "step": 1494 |
| }, |
| { |
| "epoch": 0.9088145896656535, |
| "grad_norm": 1.113900899887085, |
| "learning_rate": 0.00021786160064628569, |
| "loss": 3.594541311264038, |
| "step": 1495 |
| }, |
| { |
| "epoch": 0.9094224924012158, |
| "grad_norm": 1.1223993301391602, |
| "learning_rate": 0.00021549568124904332, |
| "loss": 3.850419044494629, |
| "step": 1496 |
| }, |
| { |
| "epoch": 0.9100303951367781, |
| "grad_norm": 1.191264033317566, |
| "learning_rate": 0.0002131329079922523, |
| "loss": 3.686944007873535, |
| "step": 1497 |
| }, |
| { |
| "epoch": 0.9106382978723404, |
| "grad_norm": 1.1577918529510498, |
| "learning_rate": 0.00021077349631608893, |
| "loss": 3.57655668258667, |
| "step": 1498 |
| }, |
| { |
| "epoch": 0.9112462006079027, |
| "grad_norm": 1.1782022714614868, |
| "learning_rate": 0.0002084176613542175, |
| "loss": 3.459223747253418, |
| "step": 1499 |
| }, |
| { |
| "epoch": 0.9118541033434651, |
| "grad_norm": 0.9181815385818481, |
| "learning_rate": 0.00020606561791417316, |
| "loss": 3.357797145843506, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.9124620060790274, |
| "grad_norm": 0.9326145052909851, |
| "learning_rate": 0.00020371758045777634, |
| "loss": 3.795490264892578, |
| "step": 1501 |
| }, |
| { |
| "epoch": 0.9130699088145897, |
| "grad_norm": 1.052650809288025, |
| "learning_rate": 0.0002013737630815774, |
| "loss": 3.694157123565674, |
| "step": 1502 |
| }, |
| { |
| "epoch": 0.9136778115501519, |
| "grad_norm": 1.2775269746780396, |
| "learning_rate": 0.00019903437949733574, |
| "loss": 3.7787959575653076, |
| "step": 1503 |
| }, |
| { |
| "epoch": 0.9142857142857143, |
| "grad_norm": 1.1516906023025513, |
| "learning_rate": 0.0001966996430125325, |
| "loss": 3.7343735694885254, |
| "step": 1504 |
| }, |
| { |
| "epoch": 0.9148936170212766, |
| "grad_norm": 1.0966649055480957, |
| "learning_rate": 0.00019436976651092142, |
| "loss": 3.6604979038238525, |
| "step": 1505 |
| }, |
| { |
| "epoch": 0.9155015197568389, |
| "grad_norm": 1.0062313079833984, |
| "learning_rate": 0.00019204496243311792, |
| "loss": 3.5538320541381836, |
| "step": 1506 |
| }, |
| { |
| "epoch": 0.9161094224924012, |
| "grad_norm": 1.0233067274093628, |
| "learning_rate": 0.00018972544275722847, |
| "loss": 3.5795540809631348, |
| "step": 1507 |
| }, |
| { |
| "epoch": 0.9167173252279636, |
| "grad_norm": 0.8073869943618774, |
| "learning_rate": 0.00018741141897952188, |
| "loss": 3.6914095878601074, |
| "step": 1508 |
| }, |
| { |
| "epoch": 0.9173252279635259, |
| "grad_norm": 1.207029938697815, |
| "learning_rate": 0.00018510310209514548, |
| "loss": 3.5582213401794434, |
| "step": 1509 |
| }, |
| { |
| "epoch": 0.9179331306990881, |
| "grad_norm": 1.0669058561325073, |
| "learning_rate": 0.00018280070257888564, |
| "loss": 3.6697239875793457, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.9185410334346504, |
| "grad_norm": 0.8246331810951233, |
| "learning_rate": 0.00018050443036597718, |
| "loss": 3.2567076683044434, |
| "step": 1511 |
| }, |
| { |
| "epoch": 0.9191489361702128, |
| "grad_norm": 0.9702603220939636, |
| "learning_rate": 0.0001782144948329607, |
| "loss": 3.505850315093994, |
| "step": 1512 |
| }, |
| { |
| "epoch": 0.9197568389057751, |
| "grad_norm": 0.9250158667564392, |
| "learning_rate": 0.00017593110477859153, |
| "loss": 3.495941400527954, |
| "step": 1513 |
| }, |
| { |
| "epoch": 0.9203647416413374, |
| "grad_norm": 0.9441630840301514, |
| "learning_rate": 0.0001736544684048013, |
| "loss": 3.563925266265869, |
| "step": 1514 |
| }, |
| { |
| "epoch": 0.9209726443768997, |
| "grad_norm": 0.9085375666618347, |
| "learning_rate": 0.00017138479329771418, |
| "loss": 3.657780408859253, |
| "step": 1515 |
| }, |
| { |
| "epoch": 0.921580547112462, |
| "grad_norm": 1.232784390449524, |
| "learning_rate": 0.00016912228640871823, |
| "loss": 3.67918062210083, |
| "step": 1516 |
| }, |
| { |
| "epoch": 0.9221884498480243, |
| "grad_norm": 1.0922844409942627, |
| "learning_rate": 0.00016686715403559606, |
| "loss": 3.6496686935424805, |
| "step": 1517 |
| }, |
| { |
| "epoch": 0.9227963525835866, |
| "grad_norm": 1.0033621788024902, |
| "learning_rate": 0.00016461960180371384, |
| "loss": 3.601614236831665, |
| "step": 1518 |
| }, |
| { |
| "epoch": 0.9234042553191489, |
| "grad_norm": 0.8341890573501587, |
| "learning_rate": 0.0001623798346472725, |
| "loss": 3.4543566703796387, |
| "step": 1519 |
| }, |
| { |
| "epoch": 0.9240121580547113, |
| "grad_norm": 0.9354051351547241, |
| "learning_rate": 0.00016014805679062183, |
| "loss": 3.702263355255127, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.9246200607902736, |
| "grad_norm": 1.1756550073623657, |
| "learning_rate": 0.0001579244717296383, |
| "loss": 3.349442958831787, |
| "step": 1521 |
| }, |
| { |
| "epoch": 0.9252279635258359, |
| "grad_norm": 1.068192481994629, |
| "learning_rate": 0.00015570928221317074, |
| "loss": 3.4384844303131104, |
| "step": 1522 |
| }, |
| { |
| "epoch": 0.9258358662613981, |
| "grad_norm": 1.013655662536621, |
| "learning_rate": 0.0001535026902245534, |
| "loss": 3.6004717350006104, |
| "step": 1523 |
| }, |
| { |
| "epoch": 0.9264437689969605, |
| "grad_norm": 0.8313837647438049, |
| "learning_rate": 0.00015130489696318876, |
| "loss": 3.6479220390319824, |
| "step": 1524 |
| }, |
| { |
| "epoch": 0.9270516717325228, |
| "grad_norm": 1.0792638063430786, |
| "learning_rate": 0.00014911610282620198, |
| "loss": 3.4788827896118164, |
| "step": 1525 |
| }, |
| { |
| "epoch": 0.9276595744680851, |
| "grad_norm": 1.0076600313186646, |
| "learning_rate": 0.00014693650739016845, |
| "loss": 3.693624496459961, |
| "step": 1526 |
| }, |
| { |
| "epoch": 0.9282674772036474, |
| "grad_norm": 0.966367781162262, |
| "learning_rate": 0.0001447663093929163, |
| "loss": 3.3769173622131348, |
| "step": 1527 |
| }, |
| { |
| "epoch": 0.9288753799392098, |
| "grad_norm": 0.8928171992301941, |
| "learning_rate": 0.00014260570671540518, |
| "loss": 3.5232667922973633, |
| "step": 1528 |
| }, |
| { |
| "epoch": 0.929483282674772, |
| "grad_norm": 1.2635657787322998, |
| "learning_rate": 0.0001404548963636833, |
| "loss": 3.521796941757202, |
| "step": 1529 |
| }, |
| { |
| "epoch": 0.9300911854103343, |
| "grad_norm": 0.9003745913505554, |
| "learning_rate": 0.00013831407445092408, |
| "loss": 3.535503625869751, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.9306990881458966, |
| "grad_norm": 0.8693346381187439, |
| "learning_rate": 0.00013618343617954477, |
| "loss": 3.401242256164551, |
| "step": 1531 |
| }, |
| { |
| "epoch": 0.931306990881459, |
| "grad_norm": 1.002539873123169, |
| "learning_rate": 0.00013406317582340693, |
| "loss": 3.4170312881469727, |
| "step": 1532 |
| }, |
| { |
| "epoch": 0.9319148936170213, |
| "grad_norm": 1.2657451629638672, |
| "learning_rate": 0.00013195348671010332, |
| "loss": 3.7101693153381348, |
| "step": 1533 |
| }, |
| { |
| "epoch": 0.9325227963525836, |
| "grad_norm": 1.0045870542526245, |
| "learning_rate": 0.00012985456120332905, |
| "loss": 3.4842121601104736, |
| "step": 1534 |
| }, |
| { |
| "epoch": 0.9331306990881459, |
| "grad_norm": 1.0290786027908325, |
| "learning_rate": 0.00012776659068534235, |
| "loss": 3.3916993141174316, |
| "step": 1535 |
| }, |
| { |
| "epoch": 0.9337386018237082, |
| "grad_norm": 0.8334917426109314, |
| "learning_rate": 0.00012568976553951406, |
| "loss": 3.6256322860717773, |
| "step": 1536 |
| }, |
| { |
| "epoch": 0.9343465045592705, |
| "grad_norm": 1.2672406435012817, |
| "learning_rate": 0.0001236242751329679, |
| "loss": 3.4259722232818604, |
| "step": 1537 |
| }, |
| { |
| "epoch": 0.9349544072948328, |
| "grad_norm": 1.0822962522506714, |
| "learning_rate": 0.00012157030779931416, |
| "loss": 3.692249298095703, |
| "step": 1538 |
| }, |
| { |
| "epoch": 0.9355623100303951, |
| "grad_norm": 1.1837276220321655, |
| "learning_rate": 0.00011952805082147698, |
| "loss": 3.5715885162353516, |
| "step": 1539 |
| }, |
| { |
| "epoch": 0.9361702127659575, |
| "grad_norm": 1.699076771736145, |
| "learning_rate": 0.00011749769041461803, |
| "loss": 3.6052627563476562, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.9367781155015198, |
| "grad_norm": 0.893450140953064, |
| "learning_rate": 0.00011547941170915685, |
| "loss": 3.8327198028564453, |
| "step": 1541 |
| }, |
| { |
| "epoch": 0.9373860182370821, |
| "grad_norm": 0.9706213474273682, |
| "learning_rate": 0.00011347339873389057, |
| "loss": 3.9060354232788086, |
| "step": 1542 |
| }, |
| { |
| "epoch": 0.9379939209726443, |
| "grad_norm": 0.9164102673530579, |
| "learning_rate": 0.000111479834399214, |
| "loss": 3.4450764656066895, |
| "step": 1543 |
| }, |
| { |
| "epoch": 0.9386018237082067, |
| "grad_norm": 1.0395925045013428, |
| "learning_rate": 0.00010949890048044187, |
| "loss": 3.5956122875213623, |
| "step": 1544 |
| }, |
| { |
| "epoch": 0.939209726443769, |
| "grad_norm": 0.9463509917259216, |
| "learning_rate": 0.00010753077760123398, |
| "loss": 3.449873447418213, |
| "step": 1545 |
| }, |
| { |
| "epoch": 0.9398176291793313, |
| "grad_norm": 1.0291234254837036, |
| "learning_rate": 0.00010557564521712592, |
| "loss": 3.613814353942871, |
| "step": 1546 |
| }, |
| { |
| "epoch": 0.9404255319148936, |
| "grad_norm": 1.0308637619018555, |
| "learning_rate": 0.00010363368159916613, |
| "loss": 3.2967031002044678, |
| "step": 1547 |
| }, |
| { |
| "epoch": 0.941033434650456, |
| "grad_norm": 1.1693090200424194, |
| "learning_rate": 0.0001017050638176612, |
| "loss": 3.63893723487854, |
| "step": 1548 |
| }, |
| { |
| "epoch": 0.9416413373860182, |
| "grad_norm": 1.174479603767395, |
| "learning_rate": 9.978996772602989e-05, |
| "loss": 3.589515447616577, |
| "step": 1549 |
| }, |
| { |
| "epoch": 0.9422492401215805, |
| "grad_norm": 1.0119879245758057, |
| "learning_rate": 9.788856794476897e-05, |
| "loss": 3.7604751586914062, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.9428571428571428, |
| "grad_norm": 0.893473744392395, |
| "learning_rate": 9.600103784553108e-05, |
| "loss": 3.652989387512207, |
| "step": 1551 |
| }, |
| { |
| "epoch": 0.9434650455927052, |
| "grad_norm": 0.9835174679756165, |
| "learning_rate": 9.412754953531663e-05, |
| "loss": 3.698601484298706, |
| "step": 1552 |
| }, |
| { |
| "epoch": 0.9440729483282675, |
| "grad_norm": 1.1016976833343506, |
| "learning_rate": 9.226827384078068e-05, |
| "loss": 3.640305519104004, |
| "step": 1553 |
| }, |
| { |
| "epoch": 0.9446808510638298, |
| "grad_norm": 1.1644668579101562, |
| "learning_rate": 9.042338029265687e-05, |
| "loss": 3.5507678985595703, |
| "step": 1554 |
| }, |
| { |
| "epoch": 0.9452887537993921, |
| "grad_norm": 0.9082716107368469, |
| "learning_rate": 8.85930371102994e-05, |
| "loss": 3.6422548294067383, |
| "step": 1555 |
| }, |
| { |
| "epoch": 0.9458966565349544, |
| "grad_norm": 1.2923622131347656, |
| "learning_rate": 8.677741118634491e-05, |
| "loss": 3.508974075317383, |
| "step": 1556 |
| }, |
| { |
| "epoch": 0.9465045592705167, |
| "grad_norm": 0.9412240982055664, |
| "learning_rate": 8.497666807149454e-05, |
| "loss": 3.53934383392334, |
| "step": 1557 |
| }, |
| { |
| "epoch": 0.947112462006079, |
| "grad_norm": 1.0613443851470947, |
| "learning_rate": 8.31909719594191e-05, |
| "loss": 3.712977886199951, |
| "step": 1558 |
| }, |
| { |
| "epoch": 0.9477203647416413, |
| "grad_norm": 1.029699444770813, |
| "learning_rate": 8.142048567178767e-05, |
| "loss": 3.444307804107666, |
| "step": 1559 |
| }, |
| { |
| "epoch": 0.9483282674772037, |
| "grad_norm": 1.0572737455368042, |
| "learning_rate": 7.966537064342167e-05, |
| "loss": 3.4443862438201904, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.948936170212766, |
| "grad_norm": 0.9910191893577576, |
| "learning_rate": 7.792578690757438e-05, |
| "loss": 3.5343542098999023, |
| "step": 1561 |
| }, |
| { |
| "epoch": 0.9495440729483283, |
| "grad_norm": 0.9332476258277893, |
| "learning_rate": 7.620189308133943e-05, |
| "loss": 3.6240739822387695, |
| "step": 1562 |
| }, |
| { |
| "epoch": 0.9501519756838905, |
| "grad_norm": 0.9007086753845215, |
| "learning_rate": 7.449384635118806e-05, |
| "loss": 3.680243492126465, |
| "step": 1563 |
| }, |
| { |
| "epoch": 0.9507598784194529, |
| "grad_norm": 0.9486724734306335, |
| "learning_rate": 7.280180245863616e-05, |
| "loss": 3.371225595474243, |
| "step": 1564 |
| }, |
| { |
| "epoch": 0.9513677811550152, |
| "grad_norm": 0.9479886889457703, |
| "learning_rate": 7.112591568604387e-05, |
| "loss": 3.7687153816223145, |
| "step": 1565 |
| }, |
| { |
| "epoch": 0.9519756838905775, |
| "grad_norm": 1.043379783630371, |
| "learning_rate": 6.94663388425483e-05, |
| "loss": 3.6657915115356445, |
| "step": 1566 |
| }, |
| { |
| "epoch": 0.9525835866261398, |
| "grad_norm": 0.9583994150161743, |
| "learning_rate": 6.782322325012946e-05, |
| "loss": 3.3854408264160156, |
| "step": 1567 |
| }, |
| { |
| "epoch": 0.9531914893617022, |
| "grad_norm": 0.9662649631500244, |
| "learning_rate": 6.619671872981339e-05, |
| "loss": 3.5261952877044678, |
| "step": 1568 |
| }, |
| { |
| "epoch": 0.9537993920972644, |
| "grad_norm": 0.9786058664321899, |
| "learning_rate": 6.458697358801061e-05, |
| "loss": 3.2436532974243164, |
| "step": 1569 |
| }, |
| { |
| "epoch": 0.9544072948328267, |
| "grad_norm": 0.9485008716583252, |
| "learning_rate": 6.299413460299386e-05, |
| "loss": 3.882096290588379, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.955015197568389, |
| "grad_norm": 0.9125201106071472, |
| "learning_rate": 6.141834701151439e-05, |
| "loss": 3.537277936935425, |
| "step": 1571 |
| }, |
| { |
| "epoch": 0.9556231003039514, |
| "grad_norm": 1.358864426612854, |
| "learning_rate": 5.985975449555955e-05, |
| "loss": 3.5532758235931396, |
| "step": 1572 |
| }, |
| { |
| "epoch": 0.9562310030395137, |
| "grad_norm": 0.9642499685287476, |
| "learning_rate": 5.8318499169251194e-05, |
| "loss": 3.5841989517211914, |
| "step": 1573 |
| }, |
| { |
| "epoch": 0.956838905775076, |
| "grad_norm": 0.9569288492202759, |
| "learning_rate": 5.679472156588783e-05, |
| "loss": 3.5072245597839355, |
| "step": 1574 |
| }, |
| { |
| "epoch": 0.9574468085106383, |
| "grad_norm": 0.8829518556594849, |
| "learning_rate": 5.528856062513057e-05, |
| "loss": 3.4685416221618652, |
| "step": 1575 |
| }, |
| { |
| "epoch": 0.9580547112462006, |
| "grad_norm": 0.9083385467529297, |
| "learning_rate": 5.3800153680334754e-05, |
| "loss": 3.499147653579712, |
| "step": 1576 |
| }, |
| { |
| "epoch": 0.9586626139817629, |
| "grad_norm": 1.156195878982544, |
| "learning_rate": 5.2329636446027255e-05, |
| "loss": 3.7224578857421875, |
| "step": 1577 |
| }, |
| { |
| "epoch": 0.9592705167173252, |
| "grad_norm": 1.125429630279541, |
| "learning_rate": 5.08771430055322e-05, |
| "loss": 3.730634927749634, |
| "step": 1578 |
| }, |
| { |
| "epoch": 0.9598784194528875, |
| "grad_norm": 0.9047555327415466, |
| "learning_rate": 4.944280579874497e-05, |
| "loss": 3.502720832824707, |
| "step": 1579 |
| }, |
| { |
| "epoch": 0.9604863221884499, |
| "grad_norm": 0.9419044256210327, |
| "learning_rate": 4.8026755610056536e-05, |
| "loss": 3.615196704864502, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.9610942249240122, |
| "grad_norm": 0.9898232817649841, |
| "learning_rate": 4.6629121556427816e-05, |
| "loss": 3.3702540397644043, |
| "step": 1581 |
| }, |
| { |
| "epoch": 0.9617021276595744, |
| "grad_norm": 0.8856302499771118, |
| "learning_rate": 4.525003107561709e-05, |
| "loss": 3.349830389022827, |
| "step": 1582 |
| }, |
| { |
| "epoch": 0.9623100303951367, |
| "grad_norm": 0.8713822364807129, |
| "learning_rate": 4.388960991455998e-05, |
| "loss": 3.3434977531433105, |
| "step": 1583 |
| }, |
| { |
| "epoch": 0.9629179331306991, |
| "grad_norm": 0.9926859736442566, |
| "learning_rate": 4.254798211790356e-05, |
| "loss": 3.6762614250183105, |
| "step": 1584 |
| }, |
| { |
| "epoch": 0.9635258358662614, |
| "grad_norm": 0.8845980763435364, |
| "learning_rate": 4.122527001669624e-05, |
| "loss": 3.692495822906494, |
| "step": 1585 |
| }, |
| { |
| "epoch": 0.9641337386018237, |
| "grad_norm": 10.871472358703613, |
| "learning_rate": 3.992159421723296e-05, |
| "loss": 3.785922050476074, |
| "step": 1586 |
| }, |
| { |
| "epoch": 0.964741641337386, |
| "grad_norm": 0.9124519228935242, |
| "learning_rate": 3.863707359005852e-05, |
| "loss": 3.477011203765869, |
| "step": 1587 |
| }, |
| { |
| "epoch": 0.9653495440729484, |
| "grad_norm": 0.849189043045044, |
| "learning_rate": 3.7371825259128746e-05, |
| "loss": 3.426685094833374, |
| "step": 1588 |
| }, |
| { |
| "epoch": 0.9659574468085106, |
| "grad_norm": 1.1374505758285522, |
| "learning_rate": 3.6125964591131074e-05, |
| "loss": 3.5459938049316406, |
| "step": 1589 |
| }, |
| { |
| "epoch": 0.9665653495440729, |
| "grad_norm": 0.8259597420692444, |
| "learning_rate": 3.489960518496521e-05, |
| "loss": 3.582315444946289, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.9671732522796352, |
| "grad_norm": 1.303789734840393, |
| "learning_rate": 3.369285886138498e-05, |
| "loss": 3.407935619354248, |
| "step": 1591 |
| }, |
| { |
| "epoch": 0.9677811550151976, |
| "grad_norm": 0.9721823930740356, |
| "learning_rate": 3.250583565280257e-05, |
| "loss": 3.3998048305511475, |
| "step": 1592 |
| }, |
| { |
| "epoch": 0.9683890577507599, |
| "grad_norm": 0.8930243849754333, |
| "learning_rate": 3.1338643793255655e-05, |
| "loss": 3.394867420196533, |
| "step": 1593 |
| }, |
| { |
| "epoch": 0.9689969604863222, |
| "grad_norm": 0.8214044570922852, |
| "learning_rate": 3.019138970853835e-05, |
| "loss": 3.3554325103759766, |
| "step": 1594 |
| }, |
| { |
| "epoch": 0.9696048632218845, |
| "grad_norm": 0.889566957950592, |
| "learning_rate": 2.906417800649719e-05, |
| "loss": 3.4998207092285156, |
| "step": 1595 |
| }, |
| { |
| "epoch": 0.9702127659574468, |
| "grad_norm": 0.9829185605049133, |
| "learning_rate": 2.795711146749316e-05, |
| "loss": 3.4992172718048096, |
| "step": 1596 |
| }, |
| { |
| "epoch": 0.9708206686930091, |
| "grad_norm": 1.16253662109375, |
| "learning_rate": 2.687029103502972e-05, |
| "loss": 3.6104440689086914, |
| "step": 1597 |
| }, |
| { |
| "epoch": 0.9714285714285714, |
| "grad_norm": 0.9969096183776855, |
| "learning_rate": 2.5803815806548838e-05, |
| "loss": 3.557669162750244, |
| "step": 1598 |
| }, |
| { |
| "epoch": 0.9720364741641337, |
| "grad_norm": 0.8945180773735046, |
| "learning_rate": 2.4757783024395242e-05, |
| "loss": 3.4996654987335205, |
| "step": 1599 |
| }, |
| { |
| "epoch": 0.9726443768996961, |
| "grad_norm": 1.0491377115249634, |
| "learning_rate": 2.3732288066949522e-05, |
| "loss": 3.5065901279449463, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.9732522796352584, |
| "grad_norm": 1.009756088256836, |
| "learning_rate": 2.2727424439931838e-05, |
| "loss": 3.4393069744110107, |
| "step": 1601 |
| }, |
| { |
| "epoch": 0.9738601823708206, |
| "grad_norm": 0.8027586936950684, |
| "learning_rate": 2.174328376787546e-05, |
| "loss": 3.944133758544922, |
| "step": 1602 |
| }, |
| { |
| "epoch": 0.9744680851063829, |
| "grad_norm": 0.9416375756263733, |
| "learning_rate": 2.077995578577277e-05, |
| "loss": 3.447531223297119, |
| "step": 1603 |
| }, |
| { |
| "epoch": 0.9750759878419453, |
| "grad_norm": 2.177234649658203, |
| "learning_rate": 1.9837528330892778e-05, |
| "loss": 3.523033380508423, |
| "step": 1604 |
| }, |
| { |
| "epoch": 0.9756838905775076, |
| "grad_norm": 1.0675021409988403, |
| "learning_rate": 1.8916087334772514e-05, |
| "loss": 3.67791748046875, |
| "step": 1605 |
| }, |
| { |
| "epoch": 0.9762917933130699, |
| "grad_norm": 0.960773229598999, |
| "learning_rate": 1.8015716815381083e-05, |
| "loss": 3.436983585357666, |
| "step": 1606 |
| }, |
| { |
| "epoch": 0.9768996960486322, |
| "grad_norm": 0.7909497022628784, |
| "learning_rate": 1.713649886945934e-05, |
| "loss": 3.523789882659912, |
| "step": 1607 |
| }, |
| { |
| "epoch": 0.9775075987841946, |
| "grad_norm": 0.9098278284072876, |
| "learning_rate": 1.627851366503383e-05, |
| "loss": 3.588881015777588, |
| "step": 1608 |
| }, |
| { |
| "epoch": 0.9781155015197568, |
| "grad_norm": 0.9856809973716736, |
| "learning_rate": 1.5441839434107403e-05, |
| "loss": 3.3807265758514404, |
| "step": 1609 |
| }, |
| { |
| "epoch": 0.9787234042553191, |
| "grad_norm": 0.8300499320030212, |
| "learning_rate": 1.4626552465525495e-05, |
| "loss": 3.3151605129241943, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.9793313069908814, |
| "grad_norm": 1.071272373199463, |
| "learning_rate": 1.3832727098020331e-05, |
| "loss": 3.476017951965332, |
| "step": 1611 |
| }, |
| { |
| "epoch": 0.9799392097264438, |
| "grad_norm": 0.9534451961517334, |
| "learning_rate": 1.3060435713432433e-05, |
| "loss": 3.5627832412719727, |
| "step": 1612 |
| }, |
| { |
| "epoch": 0.9805471124620061, |
| "grad_norm": 0.8548181056976318, |
| "learning_rate": 1.2309748730111092e-05, |
| "loss": 3.5384154319763184, |
| "step": 1613 |
| }, |
| { |
| "epoch": 0.9811550151975684, |
| "grad_norm": 0.874317467212677, |
| "learning_rate": 1.1580734596493114e-05, |
| "loss": 3.7433485984802246, |
| "step": 1614 |
| }, |
| { |
| "epoch": 0.9817629179331308, |
| "grad_norm": 0.7703690528869629, |
| "learning_rate": 1.0873459784861928e-05, |
| "loss": 3.5222666263580322, |
| "step": 1615 |
| }, |
| { |
| "epoch": 0.982370820668693, |
| "grad_norm": 0.9040242433547974, |
| "learning_rate": 1.0187988785286483e-05, |
| "loss": 3.4464478492736816, |
| "step": 1616 |
| }, |
| { |
| "epoch": 0.9829787234042553, |
| "grad_norm": 0.8175009489059448, |
| "learning_rate": 9.52438409974099e-06, |
| "loss": 3.663665294647217, |
| "step": 1617 |
| }, |
| { |
| "epoch": 0.9835866261398176, |
| "grad_norm": 0.7374025583267212, |
| "learning_rate": 8.882706236405884e-06, |
| "loss": 3.575946807861328, |
| "step": 1618 |
| }, |
| { |
| "epoch": 0.98419452887538, |
| "grad_norm": 0.8426094055175781, |
| "learning_rate": 8.263013704150613e-06, |
| "loss": 3.47537899017334, |
| "step": 1619 |
| }, |
| { |
| "epoch": 0.9848024316109423, |
| "grad_norm": 0.9192229509353638, |
| "learning_rate": 7.665363007198833e-06, |
| "loss": 3.783284902572632, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.9854103343465046, |
| "grad_norm": 1.0638452768325806, |
| "learning_rate": 7.08980863997627e-06, |
| "loss": 3.6294684410095215, |
| "step": 1621 |
| }, |
| { |
| "epoch": 0.9860182370820668, |
| "grad_norm": 0.8488450646400452, |
| "learning_rate": 6.536403082141679e-06, |
| "loss": 3.2655715942382812, |
| "step": 1622 |
| }, |
| { |
| "epoch": 0.9866261398176291, |
| "grad_norm": 0.9023237824440002, |
| "learning_rate": 6.005196793801987e-06, |
| "loss": 3.526134967803955, |
| "step": 1623 |
| }, |
| { |
| "epoch": 0.9872340425531915, |
| "grad_norm": 1.0265580415725708, |
| "learning_rate": 5.496238210911026e-06, |
| "loss": 3.8332390785217285, |
| "step": 1624 |
| }, |
| { |
| "epoch": 0.9878419452887538, |
| "grad_norm": 0.9923389554023743, |
| "learning_rate": 5.009573740853312e-06, |
| "loss": 3.502138614654541, |
| "step": 1625 |
| }, |
| { |
| "epoch": 0.9884498480243161, |
| "grad_norm": 0.9870060682296753, |
| "learning_rate": 4.545247758212356e-06, |
| "loss": 3.559659242630005, |
| "step": 1626 |
| }, |
| { |
| "epoch": 0.9890577507598785, |
| "grad_norm": 0.933310866355896, |
| "learning_rate": 4.103302600724723e-06, |
| "loss": 3.2468318939208984, |
| "step": 1627 |
| }, |
| { |
| "epoch": 0.9896656534954408, |
| "grad_norm": 0.9653894305229187, |
| "learning_rate": 3.68377856541946e-06, |
| "loss": 3.5143423080444336, |
| "step": 1628 |
| }, |
| { |
| "epoch": 0.990273556231003, |
| "grad_norm": 0.8419545888900757, |
| "learning_rate": 3.2867139049440333e-06, |
| "loss": 3.6678714752197266, |
| "step": 1629 |
| }, |
| { |
| "epoch": 0.9908814589665653, |
| "grad_norm": 0.8608642220497131, |
| "learning_rate": 2.9121448240760627e-06, |
| "loss": 3.598604202270508, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.9914893617021276, |
| "grad_norm": 0.8635938763618469, |
| "learning_rate": 2.5601054764224363e-06, |
| "loss": 3.147170066833496, |
| "step": 1631 |
| }, |
| { |
| "epoch": 0.99209726443769, |
| "grad_norm": 0.8879997134208679, |
| "learning_rate": 2.230627961304993e-06, |
| "loss": 3.362480640411377, |
| "step": 1632 |
| }, |
| { |
| "epoch": 0.9927051671732523, |
| "grad_norm": 0.9193839430809021, |
| "learning_rate": 1.9237423208336436e-06, |
| "loss": 3.479422092437744, |
| "step": 1633 |
| }, |
| { |
| "epoch": 0.9933130699088146, |
| "grad_norm": 0.9332389235496521, |
| "learning_rate": 1.639476537167256e-06, |
| "loss": 3.5799665451049805, |
| "step": 1634 |
| }, |
| { |
| "epoch": 0.993920972644377, |
| "grad_norm": 0.9634335041046143, |
| "learning_rate": 1.377856529962085e-06, |
| "loss": 3.508492946624756, |
| "step": 1635 |
| }, |
| { |
| "epoch": 0.9945288753799392, |
| "grad_norm": 0.9871721863746643, |
| "learning_rate": 1.1389061540083567e-06, |
| "loss": 3.727715015411377, |
| "step": 1636 |
| }, |
| { |
| "epoch": 0.9951367781155015, |
| "grad_norm": 1.3295271396636963, |
| "learning_rate": 9.22647197055343e-07, |
| "loss": 3.456338405609131, |
| "step": 1637 |
| }, |
| { |
| "epoch": 0.9957446808510638, |
| "grad_norm": 0.9310692548751831, |
| "learning_rate": 7.290993778245047e-07, |
| "loss": 3.6882834434509277, |
| "step": 1638 |
| }, |
| { |
| "epoch": 0.9963525835866262, |
| "grad_norm": 1.064975380897522, |
| "learning_rate": 5.582803442117091e-07, |
| "loss": 3.6411876678466797, |
| "step": 1639 |
| }, |
| { |
| "epoch": 0.9969604863221885, |
| "grad_norm": 0.8638765811920166, |
| "learning_rate": 4.102056716779601e-07, |
| "loss": 3.4605424404144287, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.9975683890577508, |
| "grad_norm": 1.017181158065796, |
| "learning_rate": 2.8488886182928555e-07, |
| "loss": 3.6346280574798584, |
| "step": 1641 |
| }, |
| { |
| "epoch": 0.998176291793313, |
| "grad_norm": 1.012779951095581, |
| "learning_rate": 1.8234134118552682e-07, |
| "loss": 3.864459753036499, |
| "step": 1642 |
| }, |
| { |
| "epoch": 0.9987841945288753, |
| "grad_norm": 0.7985902428627014, |
| "learning_rate": 1.0257246013864463e-07, |
| "loss": 3.5452003479003906, |
| "step": 1643 |
| }, |
| { |
| "epoch": 0.9993920972644377, |
| "grad_norm": 0.956741988658905, |
| "learning_rate": 4.558949209995667e-08, |
| "loss": 3.4816622734069824, |
| "step": 1644 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.9047068357467651, |
| "learning_rate": 1.1397632837056859e-08, |
| "loss": 3.2004756927490234, |
| "step": 1645 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 1645, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 164, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 3.127402894966391e+18, |
| "train_batch_size": 32, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|