| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.7975683890577507, |
| "eval_steps": 500, |
| "global_step": 1312, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0006079027355623101, |
| "grad_norm": 44.818572998046875, |
| "learning_rate": 0.0, |
| "loss": 7.186539173126221, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0012158054711246201, |
| "grad_norm": 47.259071350097656, |
| "learning_rate": 1e-05, |
| "loss": 7.313593864440918, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.00182370820668693, |
| "grad_norm": 23.298837661743164, |
| "learning_rate": 2e-05, |
| "loss": 7.087122917175293, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.0024316109422492403, |
| "grad_norm": 13.535771369934082, |
| "learning_rate": 3e-05, |
| "loss": 6.942234992980957, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.00303951367781155, |
| "grad_norm": 11.997403144836426, |
| "learning_rate": 4e-05, |
| "loss": 6.6411614418029785, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.00364741641337386, |
| "grad_norm": 13.242263793945312, |
| "learning_rate": 5e-05, |
| "loss": 6.319230079650879, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.00425531914893617, |
| "grad_norm": 10.080074310302734, |
| "learning_rate": 6e-05, |
| "loss": 6.251328468322754, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.004863221884498481, |
| "grad_norm": 14.386478424072266, |
| "learning_rate": 7.000000000000001e-05, |
| "loss": 6.372805595397949, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.00547112462006079, |
| "grad_norm": 6.731114387512207, |
| "learning_rate": 8e-05, |
| "loss": 6.32672119140625, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.0060790273556231, |
| "grad_norm": 7.430361747741699, |
| "learning_rate": 8.999999999999999e-05, |
| "loss": 5.981637954711914, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.006686930091185411, |
| "grad_norm": 6.817004680633545, |
| "learning_rate": 0.0001, |
| "loss": 6.182029724121094, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.00729483282674772, |
| "grad_norm": 6.540442943572998, |
| "learning_rate": 0.00011, |
| "loss": 6.224725723266602, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.007902735562310031, |
| "grad_norm": 6.224416255950928, |
| "learning_rate": 0.00012, |
| "loss": 6.106351852416992, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.00851063829787234, |
| "grad_norm": 5.954357624053955, |
| "learning_rate": 0.00013000000000000002, |
| "loss": 6.050826072692871, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.00911854103343465, |
| "grad_norm": 5.7734551429748535, |
| "learning_rate": 0.00014000000000000001, |
| "loss": 6.147342681884766, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.009726443768996961, |
| "grad_norm": 6.399932861328125, |
| "learning_rate": 0.00015, |
| "loss": 6.284224510192871, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.01033434650455927, |
| "grad_norm": 4.2578558921813965, |
| "learning_rate": 0.00016, |
| "loss": 5.968033790588379, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.01094224924012158, |
| "grad_norm": 3.9558868408203125, |
| "learning_rate": 0.00017, |
| "loss": 5.909118175506592, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.011550151975683891, |
| "grad_norm": 3.4882659912109375, |
| "learning_rate": 0.00017999999999999998, |
| "loss": 6.045907974243164, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.0121580547112462, |
| "grad_norm": 6.301029682159424, |
| "learning_rate": 0.00019, |
| "loss": 5.905165672302246, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.01276595744680851, |
| "grad_norm": 3.891385078430176, |
| "learning_rate": 0.0002, |
| "loss": 5.9485931396484375, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.013373860182370821, |
| "grad_norm": 4.277671813964844, |
| "learning_rate": 0.00021, |
| "loss": 5.995012283325195, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.01398176291793313, |
| "grad_norm": 3.7930500507354736, |
| "learning_rate": 0.00022, |
| "loss": 6.081092834472656, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.01458966565349544, |
| "grad_norm": 5.02017879486084, |
| "learning_rate": 0.00023, |
| "loss": 6.232627868652344, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.015197568389057751, |
| "grad_norm": 3.485990285873413, |
| "learning_rate": 0.00024, |
| "loss": 6.189592361450195, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.015805471124620062, |
| "grad_norm": 4.133285999298096, |
| "learning_rate": 0.00025, |
| "loss": 5.953710079193115, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.01641337386018237, |
| "grad_norm": 4.140801429748535, |
| "learning_rate": 0.00026000000000000003, |
| "loss": 5.926338195800781, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.01702127659574468, |
| "grad_norm": 3.4010164737701416, |
| "learning_rate": 0.00027, |
| "loss": 5.7254462242126465, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.01762917933130699, |
| "grad_norm": 10.262829780578613, |
| "learning_rate": 0.00028000000000000003, |
| "loss": 6.183866500854492, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.0182370820668693, |
| "grad_norm": 4.732674598693848, |
| "learning_rate": 0.00029, |
| "loss": 5.899426460266113, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.01884498480243161, |
| "grad_norm": 4.868585109710693, |
| "learning_rate": 0.0003, |
| "loss": 5.8833699226379395, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.019452887537993922, |
| "grad_norm": 4.654231071472168, |
| "learning_rate": 0.00031, |
| "loss": 5.967190265655518, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.02006079027355623, |
| "grad_norm": 4.583294868469238, |
| "learning_rate": 0.00032, |
| "loss": 6.027661323547363, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.02066869300911854, |
| "grad_norm": 4.038606643676758, |
| "learning_rate": 0.00033, |
| "loss": 6.06468391418457, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.02127659574468085, |
| "grad_norm": 3.1677229404449463, |
| "learning_rate": 0.00034, |
| "loss": 5.97524881362915, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.02188449848024316, |
| "grad_norm": 4.171515941619873, |
| "learning_rate": 0.00035, |
| "loss": 5.981804370880127, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.022492401215805473, |
| "grad_norm": 5.382990837097168, |
| "learning_rate": 0.00035999999999999997, |
| "loss": 6.05380916595459, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.023100303951367782, |
| "grad_norm": 4.436893463134766, |
| "learning_rate": 0.00037, |
| "loss": 6.156210899353027, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.02370820668693009, |
| "grad_norm": 4.104293346405029, |
| "learning_rate": 0.00038, |
| "loss": 5.963473320007324, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.0243161094224924, |
| "grad_norm": 7.8225202560424805, |
| "learning_rate": 0.00039000000000000005, |
| "loss": 5.945594310760498, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.02492401215805471, |
| "grad_norm": 3.7115426063537598, |
| "learning_rate": 0.0004, |
| "loss": 5.866631984710693, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.02553191489361702, |
| "grad_norm": 3.377136468887329, |
| "learning_rate": 0.00041, |
| "loss": 5.87300968170166, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.026139817629179333, |
| "grad_norm": 3.0676238536834717, |
| "learning_rate": 0.00042, |
| "loss": 5.819428443908691, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.026747720364741642, |
| "grad_norm": 3.4088737964630127, |
| "learning_rate": 0.00043, |
| "loss": 5.686548709869385, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.02735562310030395, |
| "grad_norm": 4.599688529968262, |
| "learning_rate": 0.00044, |
| "loss": 6.143298149108887, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.02796352583586626, |
| "grad_norm": 3.1253559589385986, |
| "learning_rate": 0.00045000000000000004, |
| "loss": 5.965961933135986, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.02857142857142857, |
| "grad_norm": 3.3107733726501465, |
| "learning_rate": 0.00046, |
| "loss": 5.744629859924316, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.02917933130699088, |
| "grad_norm": 3.4835944175720215, |
| "learning_rate": 0.00047, |
| "loss": 5.963787078857422, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.029787234042553193, |
| "grad_norm": 4.766516208648682, |
| "learning_rate": 0.00048, |
| "loss": 5.903127670288086, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.030395136778115502, |
| "grad_norm": 3.4444823265075684, |
| "learning_rate": 0.00049, |
| "loss": 5.898875713348389, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.03100303951367781, |
| "grad_norm": 3.4199633598327637, |
| "learning_rate": 0.0005, |
| "loss": 5.995363235473633, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.031610942249240125, |
| "grad_norm": 4.609949111938477, |
| "learning_rate": 0.0005, |
| "loss": 5.867133140563965, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.03221884498480243, |
| "grad_norm": 2.445003032684326, |
| "learning_rate": 0.0005, |
| "loss": 5.596291542053223, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.03282674772036474, |
| "grad_norm": 7.065042972564697, |
| "learning_rate": 0.0005, |
| "loss": 5.764184951782227, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.03343465045592705, |
| "grad_norm": 3.3624749183654785, |
| "learning_rate": 0.0005, |
| "loss": 5.835771560668945, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.03404255319148936, |
| "grad_norm": 2.667015790939331, |
| "learning_rate": 0.0005, |
| "loss": 5.9446611404418945, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.034650455927051675, |
| "grad_norm": 3.2562549114227295, |
| "learning_rate": 0.0005, |
| "loss": 6.190652370452881, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.03525835866261398, |
| "grad_norm": 3.5651185512542725, |
| "learning_rate": 0.0005, |
| "loss": 5.877089500427246, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.035866261398176294, |
| "grad_norm": 2.6607139110565186, |
| "learning_rate": 0.0005, |
| "loss": 5.947436332702637, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.0364741641337386, |
| "grad_norm": 2.5586416721343994, |
| "learning_rate": 0.0005, |
| "loss": 6.041194915771484, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.03708206686930091, |
| "grad_norm": 3.5156543254852295, |
| "learning_rate": 0.0005, |
| "loss": 5.8784284591674805, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.03768996960486322, |
| "grad_norm": 2.013105630874634, |
| "learning_rate": 0.0005, |
| "loss": 5.705929756164551, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.03829787234042553, |
| "grad_norm": 2.2044196128845215, |
| "learning_rate": 0.0005, |
| "loss": 5.775040626525879, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.038905775075987845, |
| "grad_norm": 3.8432488441467285, |
| "learning_rate": 0.0005, |
| "loss": 5.757482528686523, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.03951367781155015, |
| "grad_norm": 2.794318437576294, |
| "learning_rate": 0.0005, |
| "loss": 5.4956865310668945, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.04012158054711246, |
| "grad_norm": 5.635376930236816, |
| "learning_rate": 0.0005, |
| "loss": 5.950571060180664, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.04072948328267477, |
| "grad_norm": 2.8366096019744873, |
| "learning_rate": 0.0005, |
| "loss": 5.937989711761475, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.04133738601823708, |
| "grad_norm": 4.0585455894470215, |
| "learning_rate": 0.0005, |
| "loss": 6.175616264343262, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.041945288753799395, |
| "grad_norm": 2.4633665084838867, |
| "learning_rate": 0.0005, |
| "loss": 5.856078147888184, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.0425531914893617, |
| "grad_norm": 2.900541305541992, |
| "learning_rate": 0.0005, |
| "loss": 5.562302112579346, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.043161094224924014, |
| "grad_norm": 2.1582231521606445, |
| "learning_rate": 0.0005, |
| "loss": 5.853466033935547, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.04376899696048632, |
| "grad_norm": 2.823076009750366, |
| "learning_rate": 0.0005, |
| "loss": 5.676411151885986, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.04437689969604863, |
| "grad_norm": 3.4227182865142822, |
| "learning_rate": 0.0005, |
| "loss": 5.687357425689697, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.044984802431610946, |
| "grad_norm": 2.4039175510406494, |
| "learning_rate": 0.0005, |
| "loss": 5.892976760864258, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.04559270516717325, |
| "grad_norm": 2.6830098628997803, |
| "learning_rate": 0.0005, |
| "loss": 5.66058349609375, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.046200607902735565, |
| "grad_norm": 2.413268566131592, |
| "learning_rate": 0.0005, |
| "loss": 5.7166547775268555, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.04680851063829787, |
| "grad_norm": 2.110560894012451, |
| "learning_rate": 0.0005, |
| "loss": 5.578657150268555, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.04741641337386018, |
| "grad_norm": 2.293944835662842, |
| "learning_rate": 0.0005, |
| "loss": 5.830209732055664, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.04802431610942249, |
| "grad_norm": 2.3141164779663086, |
| "learning_rate": 0.0005, |
| "loss": 5.730184555053711, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.0486322188449848, |
| "grad_norm": 2.4202141761779785, |
| "learning_rate": 0.0005, |
| "loss": 5.657958030700684, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.049240121580547115, |
| "grad_norm": 2.1450300216674805, |
| "learning_rate": 0.0005, |
| "loss": 5.734421253204346, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.04984802431610942, |
| "grad_norm": 2.340426206588745, |
| "learning_rate": 0.0005, |
| "loss": 5.912275314331055, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.050455927051671734, |
| "grad_norm": 2.2572286128997803, |
| "learning_rate": 0.0005, |
| "loss": 6.227065086364746, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.05106382978723404, |
| "grad_norm": 1.9745402336120605, |
| "learning_rate": 0.0005, |
| "loss": 5.538962364196777, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.05167173252279635, |
| "grad_norm": 1.8350422382354736, |
| "learning_rate": 0.0005, |
| "loss": 5.68572998046875, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.052279635258358666, |
| "grad_norm": 1.4099390506744385, |
| "learning_rate": 0.0005, |
| "loss": 5.548061370849609, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.05288753799392097, |
| "grad_norm": 1.7324459552764893, |
| "learning_rate": 0.0005, |
| "loss": 5.791088104248047, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.053495440729483285, |
| "grad_norm": 2.2765917778015137, |
| "learning_rate": 0.0005, |
| "loss": 5.66319465637207, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.05410334346504559, |
| "grad_norm": 1.8931759595870972, |
| "learning_rate": 0.0005, |
| "loss": 5.931559085845947, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.0547112462006079, |
| "grad_norm": 3.1260805130004883, |
| "learning_rate": 0.0005, |
| "loss": 5.887214183807373, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.05531914893617021, |
| "grad_norm": 2.076260805130005, |
| "learning_rate": 0.0005, |
| "loss": 5.837953567504883, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.05592705167173252, |
| "grad_norm": 2.6507105827331543, |
| "learning_rate": 0.0005, |
| "loss": 5.720830917358398, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.056534954407294835, |
| "grad_norm": 1.761267900466919, |
| "learning_rate": 0.0005, |
| "loss": 5.8046417236328125, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.05714285714285714, |
| "grad_norm": 2.158432722091675, |
| "learning_rate": 0.0005, |
| "loss": 5.530825614929199, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.057750759878419454, |
| "grad_norm": 1.8743107318878174, |
| "learning_rate": 0.0005, |
| "loss": 5.851261138916016, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.05835866261398176, |
| "grad_norm": 2.2951159477233887, |
| "learning_rate": 0.0005, |
| "loss": 5.754410743713379, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.05896656534954407, |
| "grad_norm": 1.6710808277130127, |
| "learning_rate": 0.0005, |
| "loss": 5.511685371398926, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.059574468085106386, |
| "grad_norm": 2.4671308994293213, |
| "learning_rate": 0.0005, |
| "loss": 5.762502193450928, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.06018237082066869, |
| "grad_norm": 1.7344735860824585, |
| "learning_rate": 0.0005, |
| "loss": 5.726058006286621, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.060790273556231005, |
| "grad_norm": 1.9786497354507446, |
| "learning_rate": 0.0005, |
| "loss": 5.570637226104736, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.06139817629179331, |
| "grad_norm": 1.672898769378662, |
| "learning_rate": 0.0005, |
| "loss": 5.4022722244262695, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.06200607902735562, |
| "grad_norm": 1.975422978401184, |
| "learning_rate": 0.0005, |
| "loss": 5.58085823059082, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.06261398176291794, |
| "grad_norm": 1.6185539960861206, |
| "learning_rate": 0.0005, |
| "loss": 5.551645755767822, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.06322188449848025, |
| "grad_norm": 1.6963152885437012, |
| "learning_rate": 0.0005, |
| "loss": 5.634788990020752, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.06382978723404255, |
| "grad_norm": 1.6010147333145142, |
| "learning_rate": 0.0005, |
| "loss": 5.439291954040527, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.06443768996960486, |
| "grad_norm": 1.4918285608291626, |
| "learning_rate": 0.0005, |
| "loss": 5.595495700836182, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.06504559270516717, |
| "grad_norm": 1.7921746969223022, |
| "learning_rate": 0.0005, |
| "loss": 5.7882080078125, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.06565349544072949, |
| "grad_norm": 1.6905741691589355, |
| "learning_rate": 0.0005, |
| "loss": 5.6724653244018555, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.0662613981762918, |
| "grad_norm": 1.5293573141098022, |
| "learning_rate": 0.0005, |
| "loss": 5.407555103302002, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.0668693009118541, |
| "grad_norm": 1.3903565406799316, |
| "learning_rate": 0.0005, |
| "loss": 5.763338565826416, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.06747720364741641, |
| "grad_norm": 1.6731656789779663, |
| "learning_rate": 0.0005, |
| "loss": 5.656299591064453, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.06808510638297872, |
| "grad_norm": 1.6174890995025635, |
| "learning_rate": 0.0005, |
| "loss": 5.728058815002441, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.06869300911854104, |
| "grad_norm": 1.9111192226409912, |
| "learning_rate": 0.0005, |
| "loss": 5.569175720214844, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.06930091185410335, |
| "grad_norm": 1.397756576538086, |
| "learning_rate": 0.0005, |
| "loss": 5.692349433898926, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.06990881458966565, |
| "grad_norm": 1.4280520677566528, |
| "learning_rate": 0.0005, |
| "loss": 5.366017818450928, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.07051671732522796, |
| "grad_norm": 2.1756176948547363, |
| "learning_rate": 0.0005, |
| "loss": 5.529537677764893, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.07112462006079028, |
| "grad_norm": 1.6855345964431763, |
| "learning_rate": 0.0005, |
| "loss": 5.3663010597229, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.07173252279635259, |
| "grad_norm": 1.3849018812179565, |
| "learning_rate": 0.0005, |
| "loss": 5.661293983459473, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.07234042553191489, |
| "grad_norm": 1.5399678945541382, |
| "learning_rate": 0.0005, |
| "loss": 5.681015968322754, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.0729483282674772, |
| "grad_norm": 1.3474847078323364, |
| "learning_rate": 0.0005, |
| "loss": 5.404428482055664, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.07355623100303951, |
| "grad_norm": 1.4353671073913574, |
| "learning_rate": 0.0005, |
| "loss": 5.621041297912598, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.07416413373860183, |
| "grad_norm": 1.385099172592163, |
| "learning_rate": 0.0005, |
| "loss": 5.410789489746094, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.07477203647416414, |
| "grad_norm": 1.5382664203643799, |
| "learning_rate": 0.0005, |
| "loss": 5.401933670043945, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.07537993920972644, |
| "grad_norm": 1.48553466796875, |
| "learning_rate": 0.0005, |
| "loss": 5.547571182250977, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.07598784194528875, |
| "grad_norm": 1.3798505067825317, |
| "learning_rate": 0.0005, |
| "loss": 5.5776872634887695, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.07659574468085106, |
| "grad_norm": 1.863465428352356, |
| "learning_rate": 0.0005, |
| "loss": 5.570428371429443, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.07720364741641338, |
| "grad_norm": 1.7337578535079956, |
| "learning_rate": 0.0005, |
| "loss": 5.60271692276001, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.07781155015197569, |
| "grad_norm": 1.7129346132278442, |
| "learning_rate": 0.0005, |
| "loss": 5.655090808868408, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.07841945288753799, |
| "grad_norm": 1.8253934383392334, |
| "learning_rate": 0.0005, |
| "loss": 5.726884841918945, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.0790273556231003, |
| "grad_norm": 1.493262529373169, |
| "learning_rate": 0.0005, |
| "loss": 5.307271957397461, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.07963525835866261, |
| "grad_norm": 1.9851430654525757, |
| "learning_rate": 0.0005, |
| "loss": 5.40402889251709, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.08024316109422493, |
| "grad_norm": 1.4382926225662231, |
| "learning_rate": 0.0005, |
| "loss": 5.55129337310791, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.08085106382978724, |
| "grad_norm": 2.1384055614471436, |
| "learning_rate": 0.0005, |
| "loss": 5.42939567565918, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.08145896656534954, |
| "grad_norm": 1.5483143329620361, |
| "learning_rate": 0.0005, |
| "loss": 5.495145797729492, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.08206686930091185, |
| "grad_norm": 1.6180500984191895, |
| "learning_rate": 0.0005, |
| "loss": 5.596287727355957, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.08267477203647416, |
| "grad_norm": 1.6833781003952026, |
| "learning_rate": 0.0005, |
| "loss": 5.704960346221924, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.08328267477203648, |
| "grad_norm": 1.731799602508545, |
| "learning_rate": 0.0005, |
| "loss": 5.343502998352051, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.08389057750759879, |
| "grad_norm": 1.7854918241500854, |
| "learning_rate": 0.0005, |
| "loss": 5.647939205169678, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.08449848024316109, |
| "grad_norm": 1.2474077939987183, |
| "learning_rate": 0.0005, |
| "loss": 5.360551834106445, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.0851063829787234, |
| "grad_norm": 5.299109935760498, |
| "learning_rate": 0.0005, |
| "loss": 5.383178234100342, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.08571428571428572, |
| "grad_norm": 2.591733694076538, |
| "learning_rate": 0.0005, |
| "loss": 5.623793601989746, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.08632218844984803, |
| "grad_norm": 1.5868524312973022, |
| "learning_rate": 0.0005, |
| "loss": 5.522441864013672, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.08693009118541034, |
| "grad_norm": 1.752677083015442, |
| "learning_rate": 0.0005, |
| "loss": 5.5086774826049805, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.08753799392097264, |
| "grad_norm": 1.5863618850708008, |
| "learning_rate": 0.0005, |
| "loss": 5.492759704589844, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.08814589665653495, |
| "grad_norm": 1.4941948652267456, |
| "learning_rate": 0.0005, |
| "loss": 5.475063323974609, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.08875379939209727, |
| "grad_norm": 1.5351965427398682, |
| "learning_rate": 0.0005, |
| "loss": 5.511392593383789, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.08936170212765958, |
| "grad_norm": 1.5566837787628174, |
| "learning_rate": 0.0005, |
| "loss": 5.4525909423828125, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.08996960486322189, |
| "grad_norm": 1.5408483743667603, |
| "learning_rate": 0.0005, |
| "loss": 5.592557430267334, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.09057750759878419, |
| "grad_norm": 1.3915044069290161, |
| "learning_rate": 0.0005, |
| "loss": 5.68109130859375, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.0911854103343465, |
| "grad_norm": 1.4081814289093018, |
| "learning_rate": 0.0005, |
| "loss": 5.310542106628418, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.09179331306990882, |
| "grad_norm": 1.368977427482605, |
| "learning_rate": 0.0005, |
| "loss": 5.590452194213867, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.09240121580547113, |
| "grad_norm": 1.7604471445083618, |
| "learning_rate": 0.0005, |
| "loss": 5.2881550788879395, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.09300911854103343, |
| "grad_norm": 1.2718323469161987, |
| "learning_rate": 0.0005, |
| "loss": 5.228243827819824, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.09361702127659574, |
| "grad_norm": 1.853657841682434, |
| "learning_rate": 0.0005, |
| "loss": 5.344303131103516, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.09422492401215805, |
| "grad_norm": 1.2742729187011719, |
| "learning_rate": 0.0005, |
| "loss": 5.602327346801758, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.09483282674772037, |
| "grad_norm": 1.3428983688354492, |
| "learning_rate": 0.0005, |
| "loss": 5.564847469329834, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.09544072948328268, |
| "grad_norm": 1.307673454284668, |
| "learning_rate": 0.0005, |
| "loss": 5.5293378829956055, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.09604863221884498, |
| "grad_norm": 1.2413536310195923, |
| "learning_rate": 0.0005, |
| "loss": 5.751148223876953, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.09665653495440729, |
| "grad_norm": 1.5207955837249756, |
| "learning_rate": 0.0005, |
| "loss": 5.464879989624023, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.0972644376899696, |
| "grad_norm": 1.2123122215270996, |
| "learning_rate": 0.0005, |
| "loss": 5.438077926635742, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.09787234042553192, |
| "grad_norm": 1.420456051826477, |
| "learning_rate": 0.0005, |
| "loss": 5.586366176605225, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.09848024316109423, |
| "grad_norm": 1.2411231994628906, |
| "learning_rate": 0.0005, |
| "loss": 5.465837478637695, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.09908814589665653, |
| "grad_norm": 1.4124112129211426, |
| "learning_rate": 0.0005, |
| "loss": 5.58890438079834, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.09969604863221884, |
| "grad_norm": 1.421832799911499, |
| "learning_rate": 0.0005, |
| "loss": 5.211925029754639, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.10030395136778116, |
| "grad_norm": 1.4735937118530273, |
| "learning_rate": 0.0005, |
| "loss": 5.542084693908691, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.10091185410334347, |
| "grad_norm": 1.2726881504058838, |
| "learning_rate": 0.0005, |
| "loss": 5.566733360290527, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.10151975683890578, |
| "grad_norm": 1.3275830745697021, |
| "learning_rate": 0.0005, |
| "loss": 5.730228424072266, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.10212765957446808, |
| "grad_norm": 1.6597068309783936, |
| "learning_rate": 0.0005, |
| "loss": 5.339101791381836, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.10273556231003039, |
| "grad_norm": 1.46490478515625, |
| "learning_rate": 0.0005, |
| "loss": 5.410638809204102, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.1033434650455927, |
| "grad_norm": 1.3094699382781982, |
| "learning_rate": 0.0005, |
| "loss": 5.219968318939209, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.10395136778115502, |
| "grad_norm": 1.4983205795288086, |
| "learning_rate": 0.0005, |
| "loss": 5.392378330230713, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.10455927051671733, |
| "grad_norm": 1.517512559890747, |
| "learning_rate": 0.0005, |
| "loss": 5.38358736038208, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.10516717325227963, |
| "grad_norm": 1.5345962047576904, |
| "learning_rate": 0.0005, |
| "loss": 5.368213653564453, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.10577507598784194, |
| "grad_norm": 1.1318706274032593, |
| "learning_rate": 0.0005, |
| "loss": 5.639193534851074, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.10638297872340426, |
| "grad_norm": 1.3089977502822876, |
| "learning_rate": 0.0005, |
| "loss": 5.508517265319824, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.10699088145896657, |
| "grad_norm": 1.16405189037323, |
| "learning_rate": 0.0005, |
| "loss": 5.238767623901367, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.10759878419452888, |
| "grad_norm": 1.318361759185791, |
| "learning_rate": 0.0005, |
| "loss": 5.591005325317383, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.10820668693009118, |
| "grad_norm": 1.7068839073181152, |
| "learning_rate": 0.0005, |
| "loss": 5.138769149780273, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.1088145896656535, |
| "grad_norm": 1.4426335096359253, |
| "learning_rate": 0.0005, |
| "loss": 5.406965255737305, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.1094224924012158, |
| "grad_norm": 1.3298251628875732, |
| "learning_rate": 0.0005, |
| "loss": 5.486334323883057, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.11003039513677812, |
| "grad_norm": 1.2703888416290283, |
| "learning_rate": 0.0005, |
| "loss": 5.543169021606445, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.11063829787234042, |
| "grad_norm": 1.0853707790374756, |
| "learning_rate": 0.0005, |
| "loss": 5.2396135330200195, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.11124620060790273, |
| "grad_norm": 1.283922553062439, |
| "learning_rate": 0.0005, |
| "loss": 5.168734550476074, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.11185410334346504, |
| "grad_norm": 1.4008558988571167, |
| "learning_rate": 0.0005, |
| "loss": 5.464504241943359, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.11246200607902736, |
| "grad_norm": 1.6104100942611694, |
| "learning_rate": 0.0005, |
| "loss": 5.350894927978516, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.11306990881458967, |
| "grad_norm": 1.1095637083053589, |
| "learning_rate": 0.0005, |
| "loss": 5.330683708190918, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.11367781155015197, |
| "grad_norm": 1.3298522233963013, |
| "learning_rate": 0.0005, |
| "loss": 5.376528739929199, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.11428571428571428, |
| "grad_norm": 1.4511582851409912, |
| "learning_rate": 0.0005, |
| "loss": 5.49576473236084, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.1148936170212766, |
| "grad_norm": 1.4968204498291016, |
| "learning_rate": 0.0005, |
| "loss": 5.232635021209717, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.11550151975683891, |
| "grad_norm": 1.2423769235610962, |
| "learning_rate": 0.0005, |
| "loss": 5.456453323364258, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.11610942249240122, |
| "grad_norm": 1.2642461061477661, |
| "learning_rate": 0.0005, |
| "loss": 5.673423767089844, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.11671732522796352, |
| "grad_norm": 1.6604862213134766, |
| "learning_rate": 0.0005, |
| "loss": 5.230939865112305, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.11732522796352583, |
| "grad_norm": 1.4601672887802124, |
| "learning_rate": 0.0005, |
| "loss": 5.308025360107422, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.11793313069908815, |
| "grad_norm": 1.66468346118927, |
| "learning_rate": 0.0005, |
| "loss": 5.50089168548584, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.11854103343465046, |
| "grad_norm": 1.4034700393676758, |
| "learning_rate": 0.0005, |
| "loss": 5.4229583740234375, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.11914893617021277, |
| "grad_norm": 1.3911566734313965, |
| "learning_rate": 0.0005, |
| "loss": 5.266064643859863, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.11975683890577507, |
| "grad_norm": 1.5582391023635864, |
| "learning_rate": 0.0005, |
| "loss": 5.215412616729736, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.12036474164133738, |
| "grad_norm": 1.4908430576324463, |
| "learning_rate": 0.0005, |
| "loss": 5.305833339691162, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.1209726443768997, |
| "grad_norm": 1.4207631349563599, |
| "learning_rate": 0.0005, |
| "loss": 5.2746734619140625, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.12158054711246201, |
| "grad_norm": 1.5322375297546387, |
| "learning_rate": 0.0005, |
| "loss": 5.160092353820801, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.12218844984802432, |
| "grad_norm": 1.538822889328003, |
| "learning_rate": 0.0005, |
| "loss": 5.2349467277526855, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.12279635258358662, |
| "grad_norm": 1.487720251083374, |
| "learning_rate": 0.0005, |
| "loss": 5.305604934692383, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.12340425531914893, |
| "grad_norm": 1.402201771736145, |
| "learning_rate": 0.0005, |
| "loss": 5.271785736083984, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.12401215805471125, |
| "grad_norm": 1.4523091316223145, |
| "learning_rate": 0.0005, |
| "loss": 5.260416030883789, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.12462006079027356, |
| "grad_norm": 1.3056803941726685, |
| "learning_rate": 0.0005, |
| "loss": 5.221076488494873, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.12522796352583587, |
| "grad_norm": 1.4249091148376465, |
| "learning_rate": 0.0005, |
| "loss": 5.13364839553833, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.12583586626139817, |
| "grad_norm": 1.417321801185608, |
| "learning_rate": 0.0005, |
| "loss": 5.294346332550049, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.1264437689969605, |
| "grad_norm": 1.3512288331985474, |
| "learning_rate": 0.0005, |
| "loss": 5.273685455322266, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.1270516717325228, |
| "grad_norm": 1.53708016872406, |
| "learning_rate": 0.0005, |
| "loss": 5.160931587219238, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.1276595744680851, |
| "grad_norm": 1.3125845193862915, |
| "learning_rate": 0.0005, |
| "loss": 5.472460746765137, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.12826747720364742, |
| "grad_norm": 1.6518676280975342, |
| "learning_rate": 0.0005, |
| "loss": 5.4825568199157715, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.12887537993920972, |
| "grad_norm": 1.203003168106079, |
| "learning_rate": 0.0005, |
| "loss": 5.11652946472168, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.12948328267477205, |
| "grad_norm": 1.3805352449417114, |
| "learning_rate": 0.0005, |
| "loss": 5.366741180419922, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.13009118541033435, |
| "grad_norm": 1.8709197044372559, |
| "learning_rate": 0.0005, |
| "loss": 5.435246467590332, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.13069908814589665, |
| "grad_norm": 1.7283586263656616, |
| "learning_rate": 0.0005, |
| "loss": 5.202251434326172, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.13130699088145897, |
| "grad_norm": 1.2809170484542847, |
| "learning_rate": 0.0005, |
| "loss": 5.283895492553711, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.13191489361702127, |
| "grad_norm": 1.249645709991455, |
| "learning_rate": 0.0005, |
| "loss": 5.123793601989746, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.1325227963525836, |
| "grad_norm": 1.3356451988220215, |
| "learning_rate": 0.0005, |
| "loss": 5.174809455871582, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.1331306990881459, |
| "grad_norm": 1.139381766319275, |
| "learning_rate": 0.0005, |
| "loss": 5.0811967849731445, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.1337386018237082, |
| "grad_norm": 1.2006030082702637, |
| "learning_rate": 0.0005, |
| "loss": 5.268994331359863, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.13434650455927052, |
| "grad_norm": 1.2994015216827393, |
| "learning_rate": 0.0005, |
| "loss": 5.426079750061035, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.13495440729483282, |
| "grad_norm": 1.0793324708938599, |
| "learning_rate": 0.0005, |
| "loss": 5.424633979797363, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.13556231003039515, |
| "grad_norm": 1.1271226406097412, |
| "learning_rate": 0.0005, |
| "loss": 5.310846328735352, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.13617021276595745, |
| "grad_norm": 1.1775165796279907, |
| "learning_rate": 0.0005, |
| "loss": 5.071159839630127, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.13677811550151975, |
| "grad_norm": 1.1077218055725098, |
| "learning_rate": 0.0005, |
| "loss": 5.208876609802246, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.13738601823708207, |
| "grad_norm": 1.3281017541885376, |
| "learning_rate": 0.0005, |
| "loss": 5.371927261352539, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.13799392097264437, |
| "grad_norm": 1.4999650716781616, |
| "learning_rate": 0.0005, |
| "loss": 5.17914342880249, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.1386018237082067, |
| "grad_norm": 1.2213531732559204, |
| "learning_rate": 0.0005, |
| "loss": 5.079235076904297, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.139209726443769, |
| "grad_norm": 1.409624695777893, |
| "learning_rate": 0.0005, |
| "loss": 5.218929767608643, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.1398176291793313, |
| "grad_norm": 1.2914072275161743, |
| "learning_rate": 0.0005, |
| "loss": 5.254355430603027, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.14042553191489363, |
| "grad_norm": 1.27825927734375, |
| "learning_rate": 0.0005, |
| "loss": 5.02869987487793, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.14103343465045592, |
| "grad_norm": 1.367679238319397, |
| "learning_rate": 0.0005, |
| "loss": 5.032447814941406, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.14164133738601822, |
| "grad_norm": 1.1813191175460815, |
| "learning_rate": 0.0005, |
| "loss": 5.181385040283203, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.14224924012158055, |
| "grad_norm": 1.385109305381775, |
| "learning_rate": 0.0005, |
| "loss": 5.294610977172852, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.14285714285714285, |
| "grad_norm": 1.2544500827789307, |
| "learning_rate": 0.0005, |
| "loss": 5.046303749084473, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.14346504559270518, |
| "grad_norm": 1.487121820449829, |
| "learning_rate": 0.0005, |
| "loss": 5.523983001708984, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.14407294832826747, |
| "grad_norm": 1.263445258140564, |
| "learning_rate": 0.0005, |
| "loss": 5.192383289337158, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.14468085106382977, |
| "grad_norm": 1.0454970598220825, |
| "learning_rate": 0.0005, |
| "loss": 5.0029120445251465, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.1452887537993921, |
| "grad_norm": 1.131041407585144, |
| "learning_rate": 0.0005, |
| "loss": 5.140591144561768, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.1458966565349544, |
| "grad_norm": 1.3271952867507935, |
| "learning_rate": 0.0005, |
| "loss": 5.232538223266602, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.14650455927051673, |
| "grad_norm": 1.2867931127548218, |
| "learning_rate": 0.0005, |
| "loss": 5.288295745849609, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.14711246200607903, |
| "grad_norm": 1.2857162952423096, |
| "learning_rate": 0.0005, |
| "loss": 4.999725341796875, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.14772036474164132, |
| "grad_norm": 1.308387279510498, |
| "learning_rate": 0.0005, |
| "loss": 5.332901477813721, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.14832826747720365, |
| "grad_norm": 1.431774377822876, |
| "learning_rate": 0.0005, |
| "loss": 5.33701753616333, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.14893617021276595, |
| "grad_norm": 1.2257990837097168, |
| "learning_rate": 0.0005, |
| "loss": 5.286837100982666, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.14954407294832828, |
| "grad_norm": 1.2497832775115967, |
| "learning_rate": 0.0005, |
| "loss": 5.060267448425293, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.15015197568389058, |
| "grad_norm": 1.3174192905426025, |
| "learning_rate": 0.0005, |
| "loss": 5.460453987121582, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.15075987841945288, |
| "grad_norm": 1.2937954664230347, |
| "learning_rate": 0.0005, |
| "loss": 5.300616264343262, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.1513677811550152, |
| "grad_norm": 1.1722848415374756, |
| "learning_rate": 0.0005, |
| "loss": 5.289948463439941, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.1519756838905775, |
| "grad_norm": 1.365752100944519, |
| "learning_rate": 0.0005, |
| "loss": 5.077818870544434, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.15258358662613983, |
| "grad_norm": 1.2099617719650269, |
| "learning_rate": 0.0005, |
| "loss": 5.033614635467529, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.15319148936170213, |
| "grad_norm": 1.3854937553405762, |
| "learning_rate": 0.0005, |
| "loss": 5.019617080688477, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.15379939209726443, |
| "grad_norm": 1.3792158365249634, |
| "learning_rate": 0.0005, |
| "loss": 5.079125881195068, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.15440729483282675, |
| "grad_norm": 1.1149134635925293, |
| "learning_rate": 0.0005, |
| "loss": 5.06775426864624, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.15501519756838905, |
| "grad_norm": 1.4162288904190063, |
| "learning_rate": 0.0005, |
| "loss": 5.29591178894043, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.15562310030395138, |
| "grad_norm": 1.298060417175293, |
| "learning_rate": 0.0005, |
| "loss": 5.090610504150391, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.15623100303951368, |
| "grad_norm": 1.1845481395721436, |
| "learning_rate": 0.0005, |
| "loss": 5.00084114074707, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.15683890577507598, |
| "grad_norm": 1.1649361848831177, |
| "learning_rate": 0.0005, |
| "loss": 5.0191168785095215, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.1574468085106383, |
| "grad_norm": 1.1649863719940186, |
| "learning_rate": 0.0005, |
| "loss": 4.924384117126465, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.1580547112462006, |
| "grad_norm": 1.305981159210205, |
| "learning_rate": 0.0005, |
| "loss": 5.208071708679199, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.15866261398176293, |
| "grad_norm": 1.1375975608825684, |
| "learning_rate": 0.0005, |
| "loss": 5.07304048538208, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.15927051671732523, |
| "grad_norm": 1.570008635520935, |
| "learning_rate": 0.0005, |
| "loss": 5.2816667556762695, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.15987841945288753, |
| "grad_norm": 1.168481469154358, |
| "learning_rate": 0.0005, |
| "loss": 5.156436920166016, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.16048632218844985, |
| "grad_norm": 1.17093026638031, |
| "learning_rate": 0.0005, |
| "loss": 5.264464378356934, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.16109422492401215, |
| "grad_norm": 1.1767195463180542, |
| "learning_rate": 0.0005, |
| "loss": 5.278616905212402, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.16170212765957448, |
| "grad_norm": 1.2456096410751343, |
| "learning_rate": 0.0005, |
| "loss": 5.296989440917969, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.16231003039513678, |
| "grad_norm": 1.260128140449524, |
| "learning_rate": 0.0005, |
| "loss": 5.161136150360107, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.16291793313069908, |
| "grad_norm": 1.3702967166900635, |
| "learning_rate": 0.0005, |
| "loss": 5.2522077560424805, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.1635258358662614, |
| "grad_norm": 1.1898664236068726, |
| "learning_rate": 0.0005, |
| "loss": 5.138284683227539, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.1641337386018237, |
| "grad_norm": 1.586888074874878, |
| "learning_rate": 0.0005, |
| "loss": 4.960643291473389, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.16474164133738603, |
| "grad_norm": 1.2508625984191895, |
| "learning_rate": 0.0005, |
| "loss": 5.2589569091796875, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.16534954407294833, |
| "grad_norm": 1.1662089824676514, |
| "learning_rate": 0.0005, |
| "loss": 5.264585494995117, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.16595744680851063, |
| "grad_norm": 1.2917591333389282, |
| "learning_rate": 0.0005, |
| "loss": 4.975507736206055, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.16656534954407295, |
| "grad_norm": 1.0556538105010986, |
| "learning_rate": 0.0005, |
| "loss": 5.047136306762695, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.16717325227963525, |
| "grad_norm": 1.0959351062774658, |
| "learning_rate": 0.0005, |
| "loss": 5.063904762268066, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.16778115501519758, |
| "grad_norm": 1.0194965600967407, |
| "learning_rate": 0.0005, |
| "loss": 5.230169296264648, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.16838905775075988, |
| "grad_norm": 1.326802372932434, |
| "learning_rate": 0.0005, |
| "loss": 5.127433776855469, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.16899696048632218, |
| "grad_norm": 1.17707097530365, |
| "learning_rate": 0.0005, |
| "loss": 5.209277153015137, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.1696048632218845, |
| "grad_norm": 0.9115813970565796, |
| "learning_rate": 0.0005, |
| "loss": 5.025136470794678, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.1702127659574468, |
| "grad_norm": 1.1245434284210205, |
| "learning_rate": 0.0005, |
| "loss": 5.057619094848633, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.17082066869300913, |
| "grad_norm": 1.3757452964782715, |
| "learning_rate": 0.0005, |
| "loss": 4.920927047729492, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.17142857142857143, |
| "grad_norm": 1.4696053266525269, |
| "learning_rate": 0.0005, |
| "loss": 5.1536760330200195, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.17203647416413373, |
| "grad_norm": 1.2874000072479248, |
| "learning_rate": 0.0005, |
| "loss": 5.050880432128906, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.17264437689969606, |
| "grad_norm": 1.2090721130371094, |
| "learning_rate": 0.0005, |
| "loss": 5.024714469909668, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.17325227963525835, |
| "grad_norm": 1.3489820957183838, |
| "learning_rate": 0.0005, |
| "loss": 5.124329090118408, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.17386018237082068, |
| "grad_norm": 1.055483102798462, |
| "learning_rate": 0.0005, |
| "loss": 4.890225887298584, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.17446808510638298, |
| "grad_norm": 1.2479093074798584, |
| "learning_rate": 0.0005, |
| "loss": 4.835631370544434, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.17507598784194528, |
| "grad_norm": 1.1899778842926025, |
| "learning_rate": 0.0005, |
| "loss": 5.027457237243652, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.1756838905775076, |
| "grad_norm": 1.1618897914886475, |
| "learning_rate": 0.0005, |
| "loss": 5.145232677459717, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.1762917933130699, |
| "grad_norm": 1.2332507371902466, |
| "learning_rate": 0.0005, |
| "loss": 5.138116359710693, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.17689969604863223, |
| "grad_norm": 1.1276404857635498, |
| "learning_rate": 0.0005, |
| "loss": 5.094466209411621, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.17750759878419453, |
| "grad_norm": 1.4890656471252441, |
| "learning_rate": 0.0005, |
| "loss": 4.797001838684082, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.17811550151975683, |
| "grad_norm": 1.0490905046463013, |
| "learning_rate": 0.0005, |
| "loss": 5.235766410827637, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.17872340425531916, |
| "grad_norm": 1.1675019264221191, |
| "learning_rate": 0.0005, |
| "loss": 4.964472770690918, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.17933130699088146, |
| "grad_norm": 0.9588620662689209, |
| "learning_rate": 0.0005, |
| "loss": 5.124715805053711, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.17993920972644378, |
| "grad_norm": 1.3892091512680054, |
| "learning_rate": 0.0005, |
| "loss": 4.847377300262451, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.18054711246200608, |
| "grad_norm": 1.1051721572875977, |
| "learning_rate": 0.0005, |
| "loss": 5.199601173400879, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.18115501519756838, |
| "grad_norm": 1.0869505405426025, |
| "learning_rate": 0.0005, |
| "loss": 5.3870697021484375, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.1817629179331307, |
| "grad_norm": 1.111187219619751, |
| "learning_rate": 0.0005, |
| "loss": 5.190181732177734, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.182370820668693, |
| "grad_norm": 1.2440016269683838, |
| "learning_rate": 0.0005, |
| "loss": 5.041322231292725, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.1829787234042553, |
| "grad_norm": 1.2418692111968994, |
| "learning_rate": 0.0005, |
| "loss": 5.212306022644043, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.18358662613981763, |
| "grad_norm": 1.2612659931182861, |
| "learning_rate": 0.0005, |
| "loss": 4.961835861206055, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.18419452887537993, |
| "grad_norm": 1.1162973642349243, |
| "learning_rate": 0.0005, |
| "loss": 4.950830936431885, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.18480243161094226, |
| "grad_norm": 1.144067406654358, |
| "learning_rate": 0.0005, |
| "loss": 4.8998637199401855, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.18541033434650456, |
| "grad_norm": 1.2814747095108032, |
| "learning_rate": 0.0005, |
| "loss": 5.224381446838379, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.18601823708206686, |
| "grad_norm": 1.3770310878753662, |
| "learning_rate": 0.0005, |
| "loss": 5.05579137802124, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.18662613981762918, |
| "grad_norm": 1.5116229057312012, |
| "learning_rate": 0.0005, |
| "loss": 5.082482814788818, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.18723404255319148, |
| "grad_norm": 1.0909713506698608, |
| "learning_rate": 0.0005, |
| "loss": 4.967124938964844, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.1878419452887538, |
| "grad_norm": 1.1027607917785645, |
| "learning_rate": 0.0005, |
| "loss": 5.00374698638916, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.1884498480243161, |
| "grad_norm": 1.238652229309082, |
| "learning_rate": 0.0005, |
| "loss": 4.993183135986328, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.1890577507598784, |
| "grad_norm": 1.0609782934188843, |
| "learning_rate": 0.0005, |
| "loss": 5.019218444824219, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.18966565349544073, |
| "grad_norm": 1.1945058107376099, |
| "learning_rate": 0.0005, |
| "loss": 5.068751335144043, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.19027355623100303, |
| "grad_norm": 1.2640782594680786, |
| "learning_rate": 0.0005, |
| "loss": 5.185402870178223, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.19088145896656536, |
| "grad_norm": 1.0532907247543335, |
| "learning_rate": 0.0005, |
| "loss": 5.222114562988281, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.19148936170212766, |
| "grad_norm": 1.0423952341079712, |
| "learning_rate": 0.0005, |
| "loss": 5.1693806648254395, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.19209726443768996, |
| "grad_norm": 1.0700887441635132, |
| "learning_rate": 0.0005, |
| "loss": 5.0217485427856445, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.19270516717325228, |
| "grad_norm": 1.2595866918563843, |
| "learning_rate": 0.0005, |
| "loss": 5.231429576873779, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.19331306990881458, |
| "grad_norm": 1.1495158672332764, |
| "learning_rate": 0.0005, |
| "loss": 5.015372276306152, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.1939209726443769, |
| "grad_norm": 1.3977763652801514, |
| "learning_rate": 0.0005, |
| "loss": 5.323009490966797, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.1945288753799392, |
| "grad_norm": 1.4009697437286377, |
| "learning_rate": 0.0005, |
| "loss": 5.2833638191223145, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.1951367781155015, |
| "grad_norm": 1.1618447303771973, |
| "learning_rate": 0.0005, |
| "loss": 5.064535140991211, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.19574468085106383, |
| "grad_norm": 1.1447522640228271, |
| "learning_rate": 0.0005, |
| "loss": 4.99235725402832, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.19635258358662613, |
| "grad_norm": 1.2342157363891602, |
| "learning_rate": 0.0005, |
| "loss": 5.036558151245117, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.19696048632218846, |
| "grad_norm": 1.2487186193466187, |
| "learning_rate": 0.0005, |
| "loss": 5.207220077514648, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.19756838905775076, |
| "grad_norm": 1.4693067073822021, |
| "learning_rate": 0.0005, |
| "loss": 5.096504211425781, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.19817629179331306, |
| "grad_norm": 1.1707696914672852, |
| "learning_rate": 0.0005, |
| "loss": 5.003598213195801, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.19878419452887539, |
| "grad_norm": 0.9728778600692749, |
| "learning_rate": 0.0005, |
| "loss": 4.8744659423828125, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.19939209726443768, |
| "grad_norm": 1.383410096168518, |
| "learning_rate": 0.0005, |
| "loss": 5.1511383056640625, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 1.0482876300811768, |
| "learning_rate": 0.0005, |
| "loss": 5.014847755432129, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.2006079027355623, |
| "grad_norm": 1.2320209741592407, |
| "learning_rate": 0.0005, |
| "loss": 4.923969745635986, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.2012158054711246, |
| "grad_norm": 2.013617753982544, |
| "learning_rate": 0.0005, |
| "loss": 4.876163482666016, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.20182370820668694, |
| "grad_norm": 1.4123047590255737, |
| "learning_rate": 0.0005, |
| "loss": 4.870320796966553, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.20243161094224923, |
| "grad_norm": 0.9998598694801331, |
| "learning_rate": 0.0005, |
| "loss": 4.8142805099487305, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.20303951367781156, |
| "grad_norm": 1.255579948425293, |
| "learning_rate": 0.0005, |
| "loss": 5.134385108947754, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.20364741641337386, |
| "grad_norm": 1.1863816976547241, |
| "learning_rate": 0.0005, |
| "loss": 4.943517208099365, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.20425531914893616, |
| "grad_norm": 1.3125497102737427, |
| "learning_rate": 0.0005, |
| "loss": 4.835733413696289, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.2048632218844985, |
| "grad_norm": 1.330944538116455, |
| "learning_rate": 0.0005, |
| "loss": 4.996496200561523, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.20547112462006079, |
| "grad_norm": 1.4103339910507202, |
| "learning_rate": 0.0005, |
| "loss": 5.215001106262207, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.2060790273556231, |
| "grad_norm": 1.1276763677597046, |
| "learning_rate": 0.0005, |
| "loss": 5.080985069274902, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.2066869300911854, |
| "grad_norm": 1.2522611618041992, |
| "learning_rate": 0.0005, |
| "loss": 5.1337480545043945, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.2072948328267477, |
| "grad_norm": 1.0622775554656982, |
| "learning_rate": 0.0005, |
| "loss": 5.139281272888184, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.20790273556231004, |
| "grad_norm": 1.2667897939682007, |
| "learning_rate": 0.0005, |
| "loss": 4.985269546508789, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.20851063829787234, |
| "grad_norm": 1.2665342092514038, |
| "learning_rate": 0.0005, |
| "loss": 4.907642841339111, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.20911854103343466, |
| "grad_norm": 1.2670104503631592, |
| "learning_rate": 0.0005, |
| "loss": 4.9238739013671875, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.20972644376899696, |
| "grad_norm": 1.3876585960388184, |
| "learning_rate": 0.0005, |
| "loss": 5.280843734741211, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.21033434650455926, |
| "grad_norm": 1.172425389289856, |
| "learning_rate": 0.0005, |
| "loss": 5.018771171569824, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.2109422492401216, |
| "grad_norm": 1.057332158088684, |
| "learning_rate": 0.0005, |
| "loss": 4.957630157470703, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.2115501519756839, |
| "grad_norm": 1.2106921672821045, |
| "learning_rate": 0.0005, |
| "loss": 5.079224109649658, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.2121580547112462, |
| "grad_norm": 1.2184040546417236, |
| "learning_rate": 0.0005, |
| "loss": 4.923876762390137, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.2127659574468085, |
| "grad_norm": 1.3889566659927368, |
| "learning_rate": 0.0005, |
| "loss": 5.0445098876953125, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.2133738601823708, |
| "grad_norm": 1.1836071014404297, |
| "learning_rate": 0.0005, |
| "loss": 4.762534141540527, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.21398176291793314, |
| "grad_norm": 1.2222967147827148, |
| "learning_rate": 0.0005, |
| "loss": 5.045120716094971, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.21458966565349544, |
| "grad_norm": 1.203317403793335, |
| "learning_rate": 0.0005, |
| "loss": 5.027883052825928, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.21519756838905776, |
| "grad_norm": 1.118275761604309, |
| "learning_rate": 0.0005, |
| "loss": 5.153387069702148, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.21580547112462006, |
| "grad_norm": 1.1502918004989624, |
| "learning_rate": 0.0005, |
| "loss": 4.907447814941406, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.21641337386018236, |
| "grad_norm": 0.916477620601654, |
| "learning_rate": 0.0005, |
| "loss": 4.913633346557617, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.2170212765957447, |
| "grad_norm": 0.9976673722267151, |
| "learning_rate": 0.0005, |
| "loss": 4.855230331420898, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.217629179331307, |
| "grad_norm": 1.2301874160766602, |
| "learning_rate": 0.0005, |
| "loss": 5.274983882904053, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.21823708206686931, |
| "grad_norm": 1.268349051475525, |
| "learning_rate": 0.0005, |
| "loss": 4.990891933441162, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.2188449848024316, |
| "grad_norm": 1.7098944187164307, |
| "learning_rate": 0.0005, |
| "loss": 5.0019989013671875, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.2194528875379939, |
| "grad_norm": 1.3171290159225464, |
| "learning_rate": 0.0005, |
| "loss": 5.091225624084473, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.22006079027355624, |
| "grad_norm": 1.1964459419250488, |
| "learning_rate": 0.0005, |
| "loss": 4.942023754119873, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.22066869300911854, |
| "grad_norm": 1.212193250656128, |
| "learning_rate": 0.0005, |
| "loss": 4.842243194580078, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.22127659574468084, |
| "grad_norm": 1.2447597980499268, |
| "learning_rate": 0.0005, |
| "loss": 4.891105651855469, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.22188449848024316, |
| "grad_norm": 1.0322506427764893, |
| "learning_rate": 0.0005, |
| "loss": 5.083103179931641, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.22249240121580546, |
| "grad_norm": 1.1431292295455933, |
| "learning_rate": 0.0005, |
| "loss": 5.104142189025879, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.2231003039513678, |
| "grad_norm": 1.1028327941894531, |
| "learning_rate": 0.0005, |
| "loss": 4.933050632476807, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.2237082066869301, |
| "grad_norm": 0.9712069630622864, |
| "learning_rate": 0.0005, |
| "loss": 4.821019172668457, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.2243161094224924, |
| "grad_norm": 1.063249111175537, |
| "learning_rate": 0.0005, |
| "loss": 4.972682476043701, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.22492401215805471, |
| "grad_norm": 1.1715357303619385, |
| "learning_rate": 0.0005, |
| "loss": 5.0836591720581055, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.225531914893617, |
| "grad_norm": 1.128483772277832, |
| "learning_rate": 0.0005, |
| "loss": 5.094054698944092, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.22613981762917934, |
| "grad_norm": 1.2616199254989624, |
| "learning_rate": 0.0005, |
| "loss": 4.991359710693359, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.22674772036474164, |
| "grad_norm": 1.2140382528305054, |
| "learning_rate": 0.0005, |
| "loss": 4.7401838302612305, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.22735562310030394, |
| "grad_norm": 1.1435750722885132, |
| "learning_rate": 0.0005, |
| "loss": 5.093307971954346, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.22796352583586627, |
| "grad_norm": 1.0213854312896729, |
| "learning_rate": 0.0005, |
| "loss": 4.898110389709473, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.22857142857142856, |
| "grad_norm": 1.6159358024597168, |
| "learning_rate": 0.0005, |
| "loss": 4.884780406951904, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.2291793313069909, |
| "grad_norm": 1.0451385974884033, |
| "learning_rate": 0.0005, |
| "loss": 5.046623229980469, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.2297872340425532, |
| "grad_norm": 1.0726312398910522, |
| "learning_rate": 0.0005, |
| "loss": 5.3511962890625, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.2303951367781155, |
| "grad_norm": 1.1179200410842896, |
| "learning_rate": 0.0005, |
| "loss": 4.847324371337891, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.23100303951367782, |
| "grad_norm": 1.1474509239196777, |
| "learning_rate": 0.0005, |
| "loss": 4.830921173095703, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.23161094224924011, |
| "grad_norm": 1.0454329252243042, |
| "learning_rate": 0.0005, |
| "loss": 4.962401390075684, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.23221884498480244, |
| "grad_norm": 1.214348316192627, |
| "learning_rate": 0.0005, |
| "loss": 4.800313472747803, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.23282674772036474, |
| "grad_norm": 1.18563973903656, |
| "learning_rate": 0.0005, |
| "loss": 4.8629655838012695, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.23343465045592704, |
| "grad_norm": 1.0595086812973022, |
| "learning_rate": 0.0005, |
| "loss": 4.9949750900268555, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.23404255319148937, |
| "grad_norm": 1.0595086812973022, |
| "learning_rate": 0.0005, |
| "loss": 4.926072597503662, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.23465045592705167, |
| "grad_norm": 1.1770035028457642, |
| "learning_rate": 0.0005, |
| "loss": 4.766304969787598, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.235258358662614, |
| "grad_norm": 1.1117204427719116, |
| "learning_rate": 0.0005, |
| "loss": 4.896605968475342, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.2358662613981763, |
| "grad_norm": 1.2087441682815552, |
| "learning_rate": 0.0005, |
| "loss": 4.892548084259033, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.2364741641337386, |
| "grad_norm": 0.9041852355003357, |
| "learning_rate": 0.0005, |
| "loss": 4.948829650878906, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.23708206686930092, |
| "grad_norm": 0.94862300157547, |
| "learning_rate": 0.0005, |
| "loss": 4.8753533363342285, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.23768996960486322, |
| "grad_norm": 1.055679202079773, |
| "learning_rate": 0.0005, |
| "loss": 4.816287994384766, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.23829787234042554, |
| "grad_norm": 1.413857340812683, |
| "learning_rate": 0.0005, |
| "loss": 4.809457778930664, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.23890577507598784, |
| "grad_norm": 1.326051950454712, |
| "learning_rate": 0.0005, |
| "loss": 5.0313568115234375, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.23951367781155014, |
| "grad_norm": 1.2621649503707886, |
| "learning_rate": 0.0005, |
| "loss": 4.906643867492676, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.24012158054711247, |
| "grad_norm": 1.2217754125595093, |
| "learning_rate": 0.0005, |
| "loss": 4.929527759552002, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.24072948328267477, |
| "grad_norm": 1.1450992822647095, |
| "learning_rate": 0.0005, |
| "loss": 4.908195495605469, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.2413373860182371, |
| "grad_norm": 1.4507970809936523, |
| "learning_rate": 0.0005, |
| "loss": 5.079260349273682, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.2419452887537994, |
| "grad_norm": 1.086036205291748, |
| "learning_rate": 0.0005, |
| "loss": 4.996855735778809, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.2425531914893617, |
| "grad_norm": 1.0666170120239258, |
| "learning_rate": 0.0005, |
| "loss": 5.002256393432617, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.24316109422492402, |
| "grad_norm": 1.199183702468872, |
| "learning_rate": 0.0005, |
| "loss": 5.217647552490234, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.24376899696048632, |
| "grad_norm": 1.156293511390686, |
| "learning_rate": 0.0005, |
| "loss": 4.900952339172363, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.24437689969604864, |
| "grad_norm": 1.3151594400405884, |
| "learning_rate": 0.0005, |
| "loss": 4.980197906494141, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.24498480243161094, |
| "grad_norm": 1.0817885398864746, |
| "learning_rate": 0.0005, |
| "loss": 4.745031356811523, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.24559270516717324, |
| "grad_norm": 1.0003957748413086, |
| "learning_rate": 0.0005, |
| "loss": 4.599782466888428, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.24620060790273557, |
| "grad_norm": 0.95441734790802, |
| "learning_rate": 0.0005, |
| "loss": 4.928730010986328, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.24680851063829787, |
| "grad_norm": 1.1539515256881714, |
| "learning_rate": 0.0005, |
| "loss": 5.01755428314209, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.2474164133738602, |
| "grad_norm": 1.1274021863937378, |
| "learning_rate": 0.0005, |
| "loss": 4.92464542388916, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.2480243161094225, |
| "grad_norm": 1.075126051902771, |
| "learning_rate": 0.0005, |
| "loss": 4.842813014984131, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.2486322188449848, |
| "grad_norm": 1.1200828552246094, |
| "learning_rate": 0.0005, |
| "loss": 4.701647758483887, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.24924012158054712, |
| "grad_norm": 1.349135398864746, |
| "learning_rate": 0.0005, |
| "loss": 5.124917030334473, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.24984802431610942, |
| "grad_norm": 1.403590440750122, |
| "learning_rate": 0.0005, |
| "loss": 5.070537567138672, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.25045592705167175, |
| "grad_norm": 0.9664301872253418, |
| "learning_rate": 0.0005, |
| "loss": 4.846314430236816, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.251063829787234, |
| "grad_norm": 1.1642309427261353, |
| "learning_rate": 0.0005, |
| "loss": 4.933165550231934, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.25167173252279634, |
| "grad_norm": 1.1649516820907593, |
| "learning_rate": 0.0005, |
| "loss": 4.789491653442383, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.25227963525835867, |
| "grad_norm": 1.1041150093078613, |
| "learning_rate": 0.0005, |
| "loss": 4.580702781677246, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.252887537993921, |
| "grad_norm": 1.0078331232070923, |
| "learning_rate": 0.0005, |
| "loss": 4.77386999130249, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.25349544072948327, |
| "grad_norm": 1.0907591581344604, |
| "learning_rate": 0.0005, |
| "loss": 4.774503707885742, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.2541033434650456, |
| "grad_norm": 1.3880425691604614, |
| "learning_rate": 0.0005, |
| "loss": 4.793880462646484, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.2547112462006079, |
| "grad_norm": 1.2313039302825928, |
| "learning_rate": 0.0005, |
| "loss": 4.7932891845703125, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.2553191489361702, |
| "grad_norm": 0.9940412044525146, |
| "learning_rate": 0.0005, |
| "loss": 5.119372367858887, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.2559270516717325, |
| "grad_norm": 1.0474408864974976, |
| "learning_rate": 0.0005, |
| "loss": 4.940298080444336, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.25653495440729485, |
| "grad_norm": 1.091572642326355, |
| "learning_rate": 0.0005, |
| "loss": 4.824063777923584, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.2571428571428571, |
| "grad_norm": 0.9919223189353943, |
| "learning_rate": 0.0005, |
| "loss": 4.823666572570801, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.25775075987841944, |
| "grad_norm": 0.9640527963638306, |
| "learning_rate": 0.0005, |
| "loss": 4.798361778259277, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.25835866261398177, |
| "grad_norm": 1.0292719602584839, |
| "learning_rate": 0.0005, |
| "loss": 4.69101619720459, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.2589665653495441, |
| "grad_norm": 1.2390789985656738, |
| "learning_rate": 0.0005, |
| "loss": 4.671029090881348, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.25957446808510637, |
| "grad_norm": 1.2008142471313477, |
| "learning_rate": 0.0005, |
| "loss": 4.796487331390381, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.2601823708206687, |
| "grad_norm": 1.0405327081680298, |
| "learning_rate": 0.0005, |
| "loss": 4.8557820320129395, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.260790273556231, |
| "grad_norm": 1.042792558670044, |
| "learning_rate": 0.0005, |
| "loss": 4.805086135864258, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.2613981762917933, |
| "grad_norm": 1.6039878129959106, |
| "learning_rate": 0.0005, |
| "loss": 4.892642974853516, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.2620060790273556, |
| "grad_norm": 1.0221588611602783, |
| "learning_rate": 0.0005, |
| "loss": 4.868304252624512, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.26261398176291795, |
| "grad_norm": 1.0673880577087402, |
| "learning_rate": 0.0005, |
| "loss": 4.52126932144165, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.2632218844984802, |
| "grad_norm": 1.1782925128936768, |
| "learning_rate": 0.0005, |
| "loss": 4.9915618896484375, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.26382978723404255, |
| "grad_norm": 0.9004169702529907, |
| "learning_rate": 0.0005, |
| "loss": 5.040285110473633, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.26443768996960487, |
| "grad_norm": 1.1495839357376099, |
| "learning_rate": 0.0005, |
| "loss": 4.991700172424316, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.2650455927051672, |
| "grad_norm": 1.4188427925109863, |
| "learning_rate": 0.0005, |
| "loss": 4.851819038391113, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.26565349544072947, |
| "grad_norm": 1.1886249780654907, |
| "learning_rate": 0.0005, |
| "loss": 4.819738388061523, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.2662613981762918, |
| "grad_norm": 1.0886558294296265, |
| "learning_rate": 0.0005, |
| "loss": 4.889862537384033, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.2668693009118541, |
| "grad_norm": 1.215423822402954, |
| "learning_rate": 0.0005, |
| "loss": 4.66435432434082, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.2674772036474164, |
| "grad_norm": 1.2564237117767334, |
| "learning_rate": 0.0005, |
| "loss": 4.840651512145996, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.2680851063829787, |
| "grad_norm": 0.9406836628913879, |
| "learning_rate": 0.0005, |
| "loss": 4.836145401000977, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.26869300911854105, |
| "grad_norm": 0.9963774085044861, |
| "learning_rate": 0.0005, |
| "loss": 4.879360675811768, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.2693009118541033, |
| "grad_norm": 1.349959135055542, |
| "learning_rate": 0.0005, |
| "loss": 5.149614334106445, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.26990881458966565, |
| "grad_norm": 1.0401732921600342, |
| "learning_rate": 0.0005, |
| "loss": 4.831120491027832, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.270516717325228, |
| "grad_norm": 1.0176857709884644, |
| "learning_rate": 0.0005, |
| "loss": 4.795515060424805, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.2711246200607903, |
| "grad_norm": 1.025748610496521, |
| "learning_rate": 0.0005, |
| "loss": 4.850000381469727, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.27173252279635257, |
| "grad_norm": 1.179107904434204, |
| "learning_rate": 0.0005, |
| "loss": 4.714792728424072, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.2723404255319149, |
| "grad_norm": 1.0913288593292236, |
| "learning_rate": 0.0005, |
| "loss": 4.713229656219482, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.2729483282674772, |
| "grad_norm": 1.2143056392669678, |
| "learning_rate": 0.0005, |
| "loss": 4.776023864746094, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.2735562310030395, |
| "grad_norm": 1.0799494981765747, |
| "learning_rate": 0.0005, |
| "loss": 4.930194854736328, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.2741641337386018, |
| "grad_norm": 1.108874797821045, |
| "learning_rate": 0.0005, |
| "loss": 4.798364162445068, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.27477203647416415, |
| "grad_norm": 1.023545742034912, |
| "learning_rate": 0.0005, |
| "loss": 4.951462745666504, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.2753799392097264, |
| "grad_norm": 1.109633207321167, |
| "learning_rate": 0.0005, |
| "loss": 4.775464057922363, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.27598784194528875, |
| "grad_norm": 1.3409186601638794, |
| "learning_rate": 0.0005, |
| "loss": 4.637991905212402, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.2765957446808511, |
| "grad_norm": 1.3562052249908447, |
| "learning_rate": 0.0005, |
| "loss": 4.67308235168457, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.2772036474164134, |
| "grad_norm": 1.0121145248413086, |
| "learning_rate": 0.0005, |
| "loss": 4.8010430335998535, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.2778115501519757, |
| "grad_norm": 1.1394174098968506, |
| "learning_rate": 0.0005, |
| "loss": 4.878546237945557, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.278419452887538, |
| "grad_norm": 1.2403444051742554, |
| "learning_rate": 0.0005, |
| "loss": 4.8740434646606445, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.2790273556231003, |
| "grad_norm": 1.242672085762024, |
| "learning_rate": 0.0005, |
| "loss": 4.854490280151367, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.2796352583586626, |
| "grad_norm": 1.1986356973648071, |
| "learning_rate": 0.0005, |
| "loss": 4.629700660705566, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.2802431610942249, |
| "grad_norm": 1.0786645412445068, |
| "learning_rate": 0.0005, |
| "loss": 4.87874698638916, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.28085106382978725, |
| "grad_norm": 1.1056885719299316, |
| "learning_rate": 0.0005, |
| "loss": 4.816555023193359, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.2814589665653495, |
| "grad_norm": 1.2329976558685303, |
| "learning_rate": 0.0005, |
| "loss": 4.837638854980469, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.28206686930091185, |
| "grad_norm": 1.0028218030929565, |
| "learning_rate": 0.0005, |
| "loss": 4.760637283325195, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.2826747720364742, |
| "grad_norm": 2.1149895191192627, |
| "learning_rate": 0.0005, |
| "loss": 4.90034818649292, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.28328267477203645, |
| "grad_norm": 1.1582082509994507, |
| "learning_rate": 0.0005, |
| "loss": 4.943870544433594, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.2838905775075988, |
| "grad_norm": 1.069417119026184, |
| "learning_rate": 0.0005, |
| "loss": 4.872045993804932, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.2844984802431611, |
| "grad_norm": 1.0112608671188354, |
| "learning_rate": 0.0005, |
| "loss": 4.7598490715026855, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.2851063829787234, |
| "grad_norm": 1.2075181007385254, |
| "learning_rate": 0.0005, |
| "loss": 4.731328010559082, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.2857142857142857, |
| "grad_norm": 1.2083991765975952, |
| "learning_rate": 0.0005, |
| "loss": 4.927289962768555, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.286322188449848, |
| "grad_norm": 1.1168643236160278, |
| "learning_rate": 0.0005, |
| "loss": 4.864751815795898, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.28693009118541035, |
| "grad_norm": 1.078041434288025, |
| "learning_rate": 0.0005, |
| "loss": 4.8492431640625, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.2875379939209726, |
| "grad_norm": 1.1274940967559814, |
| "learning_rate": 0.0005, |
| "loss": 4.937112808227539, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.28814589665653495, |
| "grad_norm": 1.0653259754180908, |
| "learning_rate": 0.0005, |
| "loss": 4.594569683074951, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.2887537993920973, |
| "grad_norm": 1.1258432865142822, |
| "learning_rate": 0.0005, |
| "loss": 4.773998260498047, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.28936170212765955, |
| "grad_norm": 1.0394357442855835, |
| "learning_rate": 0.0005, |
| "loss": 4.6821393966674805, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.2899696048632219, |
| "grad_norm": 0.9899529218673706, |
| "learning_rate": 0.0005, |
| "loss": 4.887704849243164, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.2905775075987842, |
| "grad_norm": 1.1077382564544678, |
| "learning_rate": 0.0005, |
| "loss": 4.747071266174316, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.29118541033434653, |
| "grad_norm": 1.1913772821426392, |
| "learning_rate": 0.0005, |
| "loss": 4.718881607055664, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.2917933130699088, |
| "grad_norm": 1.0459861755371094, |
| "learning_rate": 0.0005, |
| "loss": 4.841939926147461, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.2924012158054711, |
| "grad_norm": 1.0120186805725098, |
| "learning_rate": 0.0005, |
| "loss": 4.599112510681152, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.29300911854103345, |
| "grad_norm": 1.195823073387146, |
| "learning_rate": 0.0005, |
| "loss": 4.728496551513672, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.2936170212765957, |
| "grad_norm": 1.3696142435073853, |
| "learning_rate": 0.0005, |
| "loss": 4.8885321617126465, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.29422492401215805, |
| "grad_norm": 1.0792248249053955, |
| "learning_rate": 0.0005, |
| "loss": 4.971987724304199, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.2948328267477204, |
| "grad_norm": 1.1619709730148315, |
| "learning_rate": 0.0005, |
| "loss": 4.571520805358887, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.29544072948328265, |
| "grad_norm": 1.0330854654312134, |
| "learning_rate": 0.0005, |
| "loss": 4.9688520431518555, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.296048632218845, |
| "grad_norm": 1.0170172452926636, |
| "learning_rate": 0.0005, |
| "loss": 4.837705135345459, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.2966565349544073, |
| "grad_norm": 0.9504514932632446, |
| "learning_rate": 0.0005, |
| "loss": 4.930578231811523, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.29726443768996963, |
| "grad_norm": 1.0397839546203613, |
| "learning_rate": 0.0005, |
| "loss": 4.835279941558838, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.2978723404255319, |
| "grad_norm": 1.1507797241210938, |
| "learning_rate": 0.0005, |
| "loss": 4.659822463989258, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.2984802431610942, |
| "grad_norm": 1.0850329399108887, |
| "learning_rate": 0.0005, |
| "loss": 4.845378875732422, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.29908814589665655, |
| "grad_norm": 0.9977235794067383, |
| "learning_rate": 0.0005, |
| "loss": 4.6792449951171875, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.2996960486322188, |
| "grad_norm": 1.1023447513580322, |
| "learning_rate": 0.0005, |
| "loss": 4.397878646850586, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.30030395136778115, |
| "grad_norm": 1.151859998703003, |
| "learning_rate": 0.0005, |
| "loss": 4.909426689147949, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.3009118541033435, |
| "grad_norm": 0.9461018443107605, |
| "learning_rate": 0.0005, |
| "loss": 4.778614044189453, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.30151975683890575, |
| "grad_norm": 1.0753334760665894, |
| "learning_rate": 0.0005, |
| "loss": 4.747906684875488, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.3021276595744681, |
| "grad_norm": 1.1790133714675903, |
| "learning_rate": 0.0005, |
| "loss": 4.932548522949219, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.3027355623100304, |
| "grad_norm": 0.9537319540977478, |
| "learning_rate": 0.0005, |
| "loss": 4.962670803070068, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.30334346504559273, |
| "grad_norm": 1.0915073156356812, |
| "learning_rate": 0.0005, |
| "loss": 4.60493278503418, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.303951367781155, |
| "grad_norm": 1.1177006959915161, |
| "learning_rate": 0.0005, |
| "loss": 4.69853401184082, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.30455927051671733, |
| "grad_norm": 1.297899842262268, |
| "learning_rate": 0.0005, |
| "loss": 4.779489517211914, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.30516717325227966, |
| "grad_norm": 1.0834105014801025, |
| "learning_rate": 0.0005, |
| "loss": 4.795891761779785, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.3057750759878419, |
| "grad_norm": 1.345795750617981, |
| "learning_rate": 0.0005, |
| "loss": 4.725937843322754, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.30638297872340425, |
| "grad_norm": 1.0314546823501587, |
| "learning_rate": 0.0005, |
| "loss": 4.679283142089844, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.3069908814589666, |
| "grad_norm": 1.0348689556121826, |
| "learning_rate": 0.0005, |
| "loss": 4.620650291442871, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.30759878419452885, |
| "grad_norm": 1.266882061958313, |
| "learning_rate": 0.0005, |
| "loss": 4.773314476013184, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.3082066869300912, |
| "grad_norm": 1.1243505477905273, |
| "learning_rate": 0.0005, |
| "loss": 4.748200416564941, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.3088145896656535, |
| "grad_norm": 1.1018924713134766, |
| "learning_rate": 0.0005, |
| "loss": 4.68126106262207, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.30942249240121583, |
| "grad_norm": 0.9563927054405212, |
| "learning_rate": 0.0005, |
| "loss": 4.857057094573975, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.3100303951367781, |
| "grad_norm": 0.9670454263687134, |
| "learning_rate": 0.0005, |
| "loss": 4.659792900085449, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.31063829787234043, |
| "grad_norm": 1.3360145092010498, |
| "learning_rate": 0.0005, |
| "loss": 4.829246520996094, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.31124620060790276, |
| "grad_norm": 1.2123932838439941, |
| "learning_rate": 0.0005, |
| "loss": 4.866283416748047, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.31185410334346503, |
| "grad_norm": 1.1718541383743286, |
| "learning_rate": 0.0005, |
| "loss": 4.582745552062988, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.31246200607902735, |
| "grad_norm": 1.0925103425979614, |
| "learning_rate": 0.0005, |
| "loss": 4.792252540588379, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.3130699088145897, |
| "grad_norm": 1.1929430961608887, |
| "learning_rate": 0.0005, |
| "loss": 5.072274208068848, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.31367781155015195, |
| "grad_norm": 1.1033862829208374, |
| "learning_rate": 0.0005, |
| "loss": 5.100406646728516, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.3142857142857143, |
| "grad_norm": 1.0984266996383667, |
| "learning_rate": 0.0005, |
| "loss": 4.652458190917969, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.3148936170212766, |
| "grad_norm": 1.1322665214538574, |
| "learning_rate": 0.0005, |
| "loss": 4.757636070251465, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.31550151975683893, |
| "grad_norm": 1.062367558479309, |
| "learning_rate": 0.0005, |
| "loss": 4.769024848937988, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.3161094224924012, |
| "grad_norm": 1.2141786813735962, |
| "learning_rate": 0.0005, |
| "loss": 4.795253753662109, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.31671732522796353, |
| "grad_norm": 1.0612986087799072, |
| "learning_rate": 0.0005, |
| "loss": 4.869831562042236, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.31732522796352586, |
| "grad_norm": 1.0063875913619995, |
| "learning_rate": 0.0005, |
| "loss": 4.789008617401123, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.31793313069908813, |
| "grad_norm": 1.1345361471176147, |
| "learning_rate": 0.0005, |
| "loss": 4.858623504638672, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.31854103343465046, |
| "grad_norm": 1.0883427858352661, |
| "learning_rate": 0.0005, |
| "loss": 4.6939568519592285, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.3191489361702128, |
| "grad_norm": 1.210877776145935, |
| "learning_rate": 0.0005, |
| "loss": 4.860000133514404, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.31975683890577505, |
| "grad_norm": 0.9779753088951111, |
| "learning_rate": 0.0005, |
| "loss": 4.710822582244873, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.3203647416413374, |
| "grad_norm": 1.130603313446045, |
| "learning_rate": 0.0005, |
| "loss": 4.8572678565979, |
| "step": 527 |
| }, |
| { |
| "epoch": 0.3209726443768997, |
| "grad_norm": 1.0674115419387817, |
| "learning_rate": 0.0005, |
| "loss": 4.597178936004639, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.321580547112462, |
| "grad_norm": 1.2021600008010864, |
| "learning_rate": 0.0005, |
| "loss": 4.564465045928955, |
| "step": 529 |
| }, |
| { |
| "epoch": 0.3221884498480243, |
| "grad_norm": 1.018747329711914, |
| "learning_rate": 0.0005, |
| "loss": 4.791827201843262, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.32279635258358663, |
| "grad_norm": 0.847745418548584, |
| "learning_rate": 0.0005, |
| "loss": 4.538583278656006, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.32340425531914896, |
| "grad_norm": 1.0722301006317139, |
| "learning_rate": 0.0005, |
| "loss": 4.728479385375977, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.32401215805471123, |
| "grad_norm": 1.0908275842666626, |
| "learning_rate": 0.0005, |
| "loss": 4.7406721115112305, |
| "step": 533 |
| }, |
| { |
| "epoch": 0.32462006079027356, |
| "grad_norm": 1.0944693088531494, |
| "learning_rate": 0.0005, |
| "loss": 4.56569242477417, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.3252279635258359, |
| "grad_norm": 1.2364919185638428, |
| "learning_rate": 0.0005, |
| "loss": 4.977725028991699, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.32583586626139815, |
| "grad_norm": 0.9999113082885742, |
| "learning_rate": 0.0005, |
| "loss": 4.493361473083496, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.3264437689969605, |
| "grad_norm": 1.3366332054138184, |
| "learning_rate": 0.0005, |
| "loss": 4.634256362915039, |
| "step": 537 |
| }, |
| { |
| "epoch": 0.3270516717325228, |
| "grad_norm": 1.1342191696166992, |
| "learning_rate": 0.0005, |
| "loss": 4.737150192260742, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.3276595744680851, |
| "grad_norm": 1.582653284072876, |
| "learning_rate": 0.0005, |
| "loss": 4.870404243469238, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.3282674772036474, |
| "grad_norm": 1.1713464260101318, |
| "learning_rate": 0.0005, |
| "loss": 4.6230669021606445, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.32887537993920973, |
| "grad_norm": 1.4178698062896729, |
| "learning_rate": 0.0005, |
| "loss": 4.764198303222656, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.32948328267477206, |
| "grad_norm": 1.2060075998306274, |
| "learning_rate": 0.0005, |
| "loss": 4.675044059753418, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.33009118541033433, |
| "grad_norm": 1.1698312759399414, |
| "learning_rate": 0.0005, |
| "loss": 4.706038475036621, |
| "step": 543 |
| }, |
| { |
| "epoch": 0.33069908814589666, |
| "grad_norm": 1.23035728931427, |
| "learning_rate": 0.0005, |
| "loss": 4.638150215148926, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.331306990881459, |
| "grad_norm": 1.2109099626541138, |
| "learning_rate": 0.0005, |
| "loss": 4.521143436431885, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.33191489361702126, |
| "grad_norm": 1.0906360149383545, |
| "learning_rate": 0.0005, |
| "loss": 4.71769380569458, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.3325227963525836, |
| "grad_norm": 0.9782645106315613, |
| "learning_rate": 0.0005, |
| "loss": 4.610015869140625, |
| "step": 547 |
| }, |
| { |
| "epoch": 0.3331306990881459, |
| "grad_norm": 0.9349035620689392, |
| "learning_rate": 0.0005, |
| "loss": 4.59166955947876, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.3337386018237082, |
| "grad_norm": 0.987219512462616, |
| "learning_rate": 0.0005, |
| "loss": 4.769125938415527, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.3343465045592705, |
| "grad_norm": 1.1204229593276978, |
| "learning_rate": 0.0005, |
| "loss": 4.561359405517578, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.33495440729483283, |
| "grad_norm": 0.9658718109130859, |
| "learning_rate": 0.0005, |
| "loss": 4.64151668548584, |
| "step": 551 |
| }, |
| { |
| "epoch": 0.33556231003039516, |
| "grad_norm": 0.9612642526626587, |
| "learning_rate": 0.0005, |
| "loss": 4.750694274902344, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.33617021276595743, |
| "grad_norm": 1.215868592262268, |
| "learning_rate": 0.0005, |
| "loss": 4.788500785827637, |
| "step": 553 |
| }, |
| { |
| "epoch": 0.33677811550151976, |
| "grad_norm": 1.1488007307052612, |
| "learning_rate": 0.0005, |
| "loss": 4.708594799041748, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.3373860182370821, |
| "grad_norm": 1.7407371997833252, |
| "learning_rate": 0.0005, |
| "loss": 4.751000881195068, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.33799392097264436, |
| "grad_norm": 1.0364381074905396, |
| "learning_rate": 0.0005, |
| "loss": 4.5454301834106445, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.3386018237082067, |
| "grad_norm": 1.0255850553512573, |
| "learning_rate": 0.0005, |
| "loss": 4.67049503326416, |
| "step": 557 |
| }, |
| { |
| "epoch": 0.339209726443769, |
| "grad_norm": 1.1722489595413208, |
| "learning_rate": 0.0005, |
| "loss": 4.762301445007324, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.3398176291793313, |
| "grad_norm": 0.9487795829772949, |
| "learning_rate": 0.0005, |
| "loss": 4.537074089050293, |
| "step": 559 |
| }, |
| { |
| "epoch": 0.3404255319148936, |
| "grad_norm": 1.0322198867797852, |
| "learning_rate": 0.0005, |
| "loss": 4.325550079345703, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.34103343465045594, |
| "grad_norm": 1.1969901323318481, |
| "learning_rate": 0.0005, |
| "loss": 4.897404670715332, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.34164133738601826, |
| "grad_norm": 0.9366703629493713, |
| "learning_rate": 0.0005, |
| "loss": 4.552170753479004, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.34224924012158053, |
| "grad_norm": 0.9916586875915527, |
| "learning_rate": 0.0005, |
| "loss": 4.596172332763672, |
| "step": 563 |
| }, |
| { |
| "epoch": 0.34285714285714286, |
| "grad_norm": 1.1367878913879395, |
| "learning_rate": 0.0005, |
| "loss": 4.745723724365234, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.3434650455927052, |
| "grad_norm": 1.0490455627441406, |
| "learning_rate": 0.0005, |
| "loss": 4.605084419250488, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.34407294832826746, |
| "grad_norm": 1.2300151586532593, |
| "learning_rate": 0.0005, |
| "loss": 4.680173397064209, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.3446808510638298, |
| "grad_norm": 0.9747954607009888, |
| "learning_rate": 0.0005, |
| "loss": 4.755300521850586, |
| "step": 567 |
| }, |
| { |
| "epoch": 0.3452887537993921, |
| "grad_norm": 1.2195698022842407, |
| "learning_rate": 0.0005, |
| "loss": 4.678683280944824, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.3458966565349544, |
| "grad_norm": 1.1122758388519287, |
| "learning_rate": 0.0005, |
| "loss": 4.55827522277832, |
| "step": 569 |
| }, |
| { |
| "epoch": 0.3465045592705167, |
| "grad_norm": 1.1671665906906128, |
| "learning_rate": 0.0005, |
| "loss": 4.6204071044921875, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.34711246200607904, |
| "grad_norm": 0.912133514881134, |
| "learning_rate": 0.0005, |
| "loss": 4.619932174682617, |
| "step": 571 |
| }, |
| { |
| "epoch": 0.34772036474164136, |
| "grad_norm": 1.0673686265945435, |
| "learning_rate": 0.0005, |
| "loss": 4.7417120933532715, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.34832826747720363, |
| "grad_norm": 1.0796691179275513, |
| "learning_rate": 0.0005, |
| "loss": 4.666133880615234, |
| "step": 573 |
| }, |
| { |
| "epoch": 0.34893617021276596, |
| "grad_norm": 1.177518367767334, |
| "learning_rate": 0.0005, |
| "loss": 4.443113803863525, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.3495440729483283, |
| "grad_norm": 0.9157246351242065, |
| "learning_rate": 0.0005, |
| "loss": 4.578097343444824, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.35015197568389056, |
| "grad_norm": 1.034294843673706, |
| "learning_rate": 0.0005, |
| "loss": 4.393146514892578, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.3507598784194529, |
| "grad_norm": 0.9026995301246643, |
| "learning_rate": 0.0005, |
| "loss": 4.868537425994873, |
| "step": 577 |
| }, |
| { |
| "epoch": 0.3513677811550152, |
| "grad_norm": 1.1576241254806519, |
| "learning_rate": 0.0005, |
| "loss": 4.755158424377441, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.3519756838905775, |
| "grad_norm": 1.061812400817871, |
| "learning_rate": 0.0005, |
| "loss": 4.48585319519043, |
| "step": 579 |
| }, |
| { |
| "epoch": 0.3525835866261398, |
| "grad_norm": 0.9842910170555115, |
| "learning_rate": 0.0005, |
| "loss": 4.865891456604004, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.35319148936170214, |
| "grad_norm": 1.0243335962295532, |
| "learning_rate": 0.0005, |
| "loss": 4.523388862609863, |
| "step": 581 |
| }, |
| { |
| "epoch": 0.35379939209726446, |
| "grad_norm": 1.2581957578659058, |
| "learning_rate": 0.0005, |
| "loss": 4.821706771850586, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.35440729483282674, |
| "grad_norm": 1.1777689456939697, |
| "learning_rate": 0.0005, |
| "loss": 4.600160121917725, |
| "step": 583 |
| }, |
| { |
| "epoch": 0.35501519756838906, |
| "grad_norm": 0.9623486995697021, |
| "learning_rate": 0.0005, |
| "loss": 4.775470733642578, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.3556231003039514, |
| "grad_norm": 1.302804708480835, |
| "learning_rate": 0.0005, |
| "loss": 4.704485893249512, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.35623100303951366, |
| "grad_norm": 1.15083646774292, |
| "learning_rate": 0.0005, |
| "loss": 4.685108184814453, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.356838905775076, |
| "grad_norm": 1.0529240369796753, |
| "learning_rate": 0.0005, |
| "loss": 4.762598991394043, |
| "step": 587 |
| }, |
| { |
| "epoch": 0.3574468085106383, |
| "grad_norm": 1.008600115776062, |
| "learning_rate": 0.0005, |
| "loss": 4.711298942565918, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.3580547112462006, |
| "grad_norm": 1.1591368913650513, |
| "learning_rate": 0.0005, |
| "loss": 4.836706638336182, |
| "step": 589 |
| }, |
| { |
| "epoch": 0.3586626139817629, |
| "grad_norm": 1.0372366905212402, |
| "learning_rate": 0.0005, |
| "loss": 4.753532409667969, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.35927051671732524, |
| "grad_norm": 0.9533773064613342, |
| "learning_rate": 0.0005, |
| "loss": 4.787997245788574, |
| "step": 591 |
| }, |
| { |
| "epoch": 0.35987841945288757, |
| "grad_norm": 1.3395041227340698, |
| "learning_rate": 0.0005, |
| "loss": 4.700077533721924, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.36048632218844984, |
| "grad_norm": 1.0645594596862793, |
| "learning_rate": 0.0005, |
| "loss": 4.607672691345215, |
| "step": 593 |
| }, |
| { |
| "epoch": 0.36109422492401216, |
| "grad_norm": 1.2142505645751953, |
| "learning_rate": 0.0005, |
| "loss": 4.6179375648498535, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.3617021276595745, |
| "grad_norm": 1.2730581760406494, |
| "learning_rate": 0.0005, |
| "loss": 4.555119514465332, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.36231003039513676, |
| "grad_norm": 1.0680732727050781, |
| "learning_rate": 0.0005, |
| "loss": 4.700529098510742, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.3629179331306991, |
| "grad_norm": 1.055757761001587, |
| "learning_rate": 0.0005, |
| "loss": 4.544746398925781, |
| "step": 597 |
| }, |
| { |
| "epoch": 0.3635258358662614, |
| "grad_norm": 1.2012107372283936, |
| "learning_rate": 0.0005, |
| "loss": 4.614580154418945, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.3641337386018237, |
| "grad_norm": 1.0662033557891846, |
| "learning_rate": 0.0005, |
| "loss": 4.880558967590332, |
| "step": 599 |
| }, |
| { |
| "epoch": 0.364741641337386, |
| "grad_norm": 1.0305242538452148, |
| "learning_rate": 0.0005, |
| "loss": 4.462358474731445, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.36534954407294834, |
| "grad_norm": 1.0423706769943237, |
| "learning_rate": 0.0005, |
| "loss": 4.591382026672363, |
| "step": 601 |
| }, |
| { |
| "epoch": 0.3659574468085106, |
| "grad_norm": 1.2076576948165894, |
| "learning_rate": 0.0005, |
| "loss": 4.7383599281311035, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.36656534954407294, |
| "grad_norm": 1.0415648221969604, |
| "learning_rate": 0.0005, |
| "loss": 4.586676597595215, |
| "step": 603 |
| }, |
| { |
| "epoch": 0.36717325227963526, |
| "grad_norm": 0.9548492431640625, |
| "learning_rate": 0.0005, |
| "loss": 4.836339950561523, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.3677811550151976, |
| "grad_norm": 1.1116399765014648, |
| "learning_rate": 0.0005, |
| "loss": 4.634486198425293, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.36838905775075986, |
| "grad_norm": 0.9329056739807129, |
| "learning_rate": 0.0005, |
| "loss": 4.806420803070068, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.3689969604863222, |
| "grad_norm": 1.167823314666748, |
| "learning_rate": 0.0005, |
| "loss": 4.594254493713379, |
| "step": 607 |
| }, |
| { |
| "epoch": 0.3696048632218845, |
| "grad_norm": 1.0034370422363281, |
| "learning_rate": 0.0005, |
| "loss": 4.6151347160339355, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.3702127659574468, |
| "grad_norm": 1.0906440019607544, |
| "learning_rate": 0.0005, |
| "loss": 4.540549278259277, |
| "step": 609 |
| }, |
| { |
| "epoch": 0.3708206686930091, |
| "grad_norm": 1.0491790771484375, |
| "learning_rate": 0.0005, |
| "loss": 4.600298881530762, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.37142857142857144, |
| "grad_norm": 1.2935380935668945, |
| "learning_rate": 0.0005, |
| "loss": 4.646307945251465, |
| "step": 611 |
| }, |
| { |
| "epoch": 0.3720364741641337, |
| "grad_norm": 1.1572242975234985, |
| "learning_rate": 0.0005, |
| "loss": 4.820685863494873, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.37264437689969604, |
| "grad_norm": 1.0526167154312134, |
| "learning_rate": 0.0005, |
| "loss": 4.463221549987793, |
| "step": 613 |
| }, |
| { |
| "epoch": 0.37325227963525837, |
| "grad_norm": 1.0142046213150024, |
| "learning_rate": 0.0005, |
| "loss": 4.979160308837891, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.3738601823708207, |
| "grad_norm": 1.0886595249176025, |
| "learning_rate": 0.0005, |
| "loss": 4.659153461456299, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.37446808510638296, |
| "grad_norm": 1.0294383764266968, |
| "learning_rate": 0.0005, |
| "loss": 4.511576175689697, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.3750759878419453, |
| "grad_norm": 1.220738172531128, |
| "learning_rate": 0.0005, |
| "loss": 4.640242576599121, |
| "step": 617 |
| }, |
| { |
| "epoch": 0.3756838905775076, |
| "grad_norm": 0.976274311542511, |
| "learning_rate": 0.0005, |
| "loss": 4.557078838348389, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.3762917933130699, |
| "grad_norm": 1.1121824979782104, |
| "learning_rate": 0.0005, |
| "loss": 4.412234306335449, |
| "step": 619 |
| }, |
| { |
| "epoch": 0.3768996960486322, |
| "grad_norm": 1.0940440893173218, |
| "learning_rate": 0.0005, |
| "loss": 4.597440242767334, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.37750759878419454, |
| "grad_norm": 1.1758757829666138, |
| "learning_rate": 0.0005, |
| "loss": 4.729987144470215, |
| "step": 621 |
| }, |
| { |
| "epoch": 0.3781155015197568, |
| "grad_norm": 0.979016900062561, |
| "learning_rate": 0.0005, |
| "loss": 4.656641960144043, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.37872340425531914, |
| "grad_norm": 1.1017565727233887, |
| "learning_rate": 0.0005, |
| "loss": 4.587738037109375, |
| "step": 623 |
| }, |
| { |
| "epoch": 0.37933130699088147, |
| "grad_norm": 1.0581464767456055, |
| "learning_rate": 0.0005, |
| "loss": 4.452451705932617, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.3799392097264438, |
| "grad_norm": 1.0750993490219116, |
| "learning_rate": 0.0005, |
| "loss": 4.531889915466309, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.38054711246200607, |
| "grad_norm": 0.9821625351905823, |
| "learning_rate": 0.0005, |
| "loss": 4.488890171051025, |
| "step": 626 |
| }, |
| { |
| "epoch": 0.3811550151975684, |
| "grad_norm": 1.0691367387771606, |
| "learning_rate": 0.0005, |
| "loss": 4.62428617477417, |
| "step": 627 |
| }, |
| { |
| "epoch": 0.3817629179331307, |
| "grad_norm": 1.0314120054244995, |
| "learning_rate": 0.0005, |
| "loss": 4.533023834228516, |
| "step": 628 |
| }, |
| { |
| "epoch": 0.382370820668693, |
| "grad_norm": 0.9268558025360107, |
| "learning_rate": 0.0005, |
| "loss": 4.565212249755859, |
| "step": 629 |
| }, |
| { |
| "epoch": 0.3829787234042553, |
| "grad_norm": 1.0632472038269043, |
| "learning_rate": 0.0005, |
| "loss": 4.5511980056762695, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.38358662613981764, |
| "grad_norm": 0.9516937732696533, |
| "learning_rate": 0.0005, |
| "loss": 4.546860694885254, |
| "step": 631 |
| }, |
| { |
| "epoch": 0.3841945288753799, |
| "grad_norm": 0.8885926008224487, |
| "learning_rate": 0.0005, |
| "loss": 4.540233612060547, |
| "step": 632 |
| }, |
| { |
| "epoch": 0.38480243161094224, |
| "grad_norm": 0.9631567001342773, |
| "learning_rate": 0.0005, |
| "loss": 4.552545070648193, |
| "step": 633 |
| }, |
| { |
| "epoch": 0.38541033434650457, |
| "grad_norm": 1.0189249515533447, |
| "learning_rate": 0.0005, |
| "loss": 4.413745880126953, |
| "step": 634 |
| }, |
| { |
| "epoch": 0.3860182370820669, |
| "grad_norm": 1.0094175338745117, |
| "learning_rate": 0.0005, |
| "loss": 4.266282081604004, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.38662613981762917, |
| "grad_norm": 1.1108192205429077, |
| "learning_rate": 0.0005, |
| "loss": 4.169710159301758, |
| "step": 636 |
| }, |
| { |
| "epoch": 0.3872340425531915, |
| "grad_norm": 1.1999133825302124, |
| "learning_rate": 0.0005, |
| "loss": 4.5471391677856445, |
| "step": 637 |
| }, |
| { |
| "epoch": 0.3878419452887538, |
| "grad_norm": 1.047059178352356, |
| "learning_rate": 0.0005, |
| "loss": 4.793215751647949, |
| "step": 638 |
| }, |
| { |
| "epoch": 0.3884498480243161, |
| "grad_norm": 1.1927613019943237, |
| "learning_rate": 0.0005, |
| "loss": 4.474370002746582, |
| "step": 639 |
| }, |
| { |
| "epoch": 0.3890577507598784, |
| "grad_norm": 1.0722092390060425, |
| "learning_rate": 0.0005, |
| "loss": 4.685356140136719, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.38966565349544074, |
| "grad_norm": 1.0422673225402832, |
| "learning_rate": 0.0005, |
| "loss": 4.5289201736450195, |
| "step": 641 |
| }, |
| { |
| "epoch": 0.390273556231003, |
| "grad_norm": 0.9556507468223572, |
| "learning_rate": 0.0005, |
| "loss": 4.421667098999023, |
| "step": 642 |
| }, |
| { |
| "epoch": 0.39088145896656534, |
| "grad_norm": 1.0354868173599243, |
| "learning_rate": 0.0005, |
| "loss": 4.573639869689941, |
| "step": 643 |
| }, |
| { |
| "epoch": 0.39148936170212767, |
| "grad_norm": 1.0089163780212402, |
| "learning_rate": 0.0005, |
| "loss": 4.505742073059082, |
| "step": 644 |
| }, |
| { |
| "epoch": 0.39209726443769, |
| "grad_norm": 1.098516821861267, |
| "learning_rate": 0.0005, |
| "loss": 4.61726713180542, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.39270516717325227, |
| "grad_norm": 1.0022438764572144, |
| "learning_rate": 0.0005, |
| "loss": 4.8146162033081055, |
| "step": 646 |
| }, |
| { |
| "epoch": 0.3933130699088146, |
| "grad_norm": 1.219514012336731, |
| "learning_rate": 0.0005, |
| "loss": 4.5992279052734375, |
| "step": 647 |
| }, |
| { |
| "epoch": 0.3939209726443769, |
| "grad_norm": 1.0511285066604614, |
| "learning_rate": 0.0005, |
| "loss": 4.65933895111084, |
| "step": 648 |
| }, |
| { |
| "epoch": 0.3945288753799392, |
| "grad_norm": 1.0481231212615967, |
| "learning_rate": 0.0005, |
| "loss": 4.405591011047363, |
| "step": 649 |
| }, |
| { |
| "epoch": 0.3951367781155015, |
| "grad_norm": 1.1169630289077759, |
| "learning_rate": 0.0005, |
| "loss": 4.621652603149414, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.39574468085106385, |
| "grad_norm": 1.031966209411621, |
| "learning_rate": 0.0005, |
| "loss": 4.5710320472717285, |
| "step": 651 |
| }, |
| { |
| "epoch": 0.3963525835866261, |
| "grad_norm": 1.1107763051986694, |
| "learning_rate": 0.0005, |
| "loss": 4.537693023681641, |
| "step": 652 |
| }, |
| { |
| "epoch": 0.39696048632218844, |
| "grad_norm": 0.9889346957206726, |
| "learning_rate": 0.0005, |
| "loss": 4.518610000610352, |
| "step": 653 |
| }, |
| { |
| "epoch": 0.39756838905775077, |
| "grad_norm": 1.1640068292617798, |
| "learning_rate": 0.0005, |
| "loss": 4.595146179199219, |
| "step": 654 |
| }, |
| { |
| "epoch": 0.3981762917933131, |
| "grad_norm": 1.2929025888442993, |
| "learning_rate": 0.0005, |
| "loss": 4.559798240661621, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.39878419452887537, |
| "grad_norm": 1.098781943321228, |
| "learning_rate": 0.0005, |
| "loss": 4.602121353149414, |
| "step": 656 |
| }, |
| { |
| "epoch": 0.3993920972644377, |
| "grad_norm": 1.0199748277664185, |
| "learning_rate": 0.0005, |
| "loss": 4.460375785827637, |
| "step": 657 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 1.4516689777374268, |
| "learning_rate": 0.0005, |
| "loss": 4.583429336547852, |
| "step": 658 |
| }, |
| { |
| "epoch": 0.4006079027355623, |
| "grad_norm": 1.0523816347122192, |
| "learning_rate": 0.0005, |
| "loss": 4.602944374084473, |
| "step": 659 |
| }, |
| { |
| "epoch": 0.4012158054711246, |
| "grad_norm": 1.052711844444275, |
| "learning_rate": 0.0005, |
| "loss": 4.508934020996094, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.40182370820668695, |
| "grad_norm": 1.0846177339553833, |
| "learning_rate": 0.0005, |
| "loss": 4.532805442810059, |
| "step": 661 |
| }, |
| { |
| "epoch": 0.4024316109422492, |
| "grad_norm": 0.9877490401268005, |
| "learning_rate": 0.0005, |
| "loss": 4.644316673278809, |
| "step": 662 |
| }, |
| { |
| "epoch": 0.40303951367781155, |
| "grad_norm": 1.04659104347229, |
| "learning_rate": 0.0005, |
| "loss": 4.376730918884277, |
| "step": 663 |
| }, |
| { |
| "epoch": 0.40364741641337387, |
| "grad_norm": 1.250658392906189, |
| "learning_rate": 0.0005, |
| "loss": 4.553335666656494, |
| "step": 664 |
| }, |
| { |
| "epoch": 0.40425531914893614, |
| "grad_norm": 1.1647439002990723, |
| "learning_rate": 0.0005, |
| "loss": 4.282361030578613, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.40486322188449847, |
| "grad_norm": 1.086575984954834, |
| "learning_rate": 0.0005, |
| "loss": 4.545602798461914, |
| "step": 666 |
| }, |
| { |
| "epoch": 0.4054711246200608, |
| "grad_norm": 1.0094430446624756, |
| "learning_rate": 0.0005, |
| "loss": 4.514423370361328, |
| "step": 667 |
| }, |
| { |
| "epoch": 0.4060790273556231, |
| "grad_norm": 1.1341593265533447, |
| "learning_rate": 0.0005, |
| "loss": 4.359306812286377, |
| "step": 668 |
| }, |
| { |
| "epoch": 0.4066869300911854, |
| "grad_norm": 1.0556292533874512, |
| "learning_rate": 0.0005, |
| "loss": 4.663166046142578, |
| "step": 669 |
| }, |
| { |
| "epoch": 0.4072948328267477, |
| "grad_norm": 0.9918414950370789, |
| "learning_rate": 0.0005, |
| "loss": 4.348359107971191, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.40790273556231005, |
| "grad_norm": 1.2771086692810059, |
| "learning_rate": 0.0005, |
| "loss": 4.380928993225098, |
| "step": 671 |
| }, |
| { |
| "epoch": 0.4085106382978723, |
| "grad_norm": 1.2792952060699463, |
| "learning_rate": 0.0005, |
| "loss": 4.493129253387451, |
| "step": 672 |
| }, |
| { |
| "epoch": 0.40911854103343465, |
| "grad_norm": 1.115451693534851, |
| "learning_rate": 0.0005, |
| "loss": 4.5493903160095215, |
| "step": 673 |
| }, |
| { |
| "epoch": 0.409726443768997, |
| "grad_norm": 1.02188241481781, |
| "learning_rate": 0.0005, |
| "loss": 4.540634632110596, |
| "step": 674 |
| }, |
| { |
| "epoch": 0.41033434650455924, |
| "grad_norm": 1.1881492137908936, |
| "learning_rate": 0.0005, |
| "loss": 4.6216325759887695, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.41094224924012157, |
| "grad_norm": 1.1510716676712036, |
| "learning_rate": 0.0005, |
| "loss": 4.753006935119629, |
| "step": 676 |
| }, |
| { |
| "epoch": 0.4115501519756839, |
| "grad_norm": 0.9409204125404358, |
| "learning_rate": 0.0005, |
| "loss": 4.558671951293945, |
| "step": 677 |
| }, |
| { |
| "epoch": 0.4121580547112462, |
| "grad_norm": 0.9652894735336304, |
| "learning_rate": 0.0005, |
| "loss": 4.586430549621582, |
| "step": 678 |
| }, |
| { |
| "epoch": 0.4127659574468085, |
| "grad_norm": 1.0625907182693481, |
| "learning_rate": 0.0005, |
| "loss": 4.467252254486084, |
| "step": 679 |
| }, |
| { |
| "epoch": 0.4133738601823708, |
| "grad_norm": 1.078682780265808, |
| "learning_rate": 0.0005, |
| "loss": 4.66164493560791, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.41398176291793315, |
| "grad_norm": 1.0304362773895264, |
| "learning_rate": 0.0005, |
| "loss": 4.765620231628418, |
| "step": 681 |
| }, |
| { |
| "epoch": 0.4145896656534954, |
| "grad_norm": 0.9225407242774963, |
| "learning_rate": 0.0005, |
| "loss": 4.550148010253906, |
| "step": 682 |
| }, |
| { |
| "epoch": 0.41519756838905775, |
| "grad_norm": 1.0196508169174194, |
| "learning_rate": 0.0005, |
| "loss": 4.9098100662231445, |
| "step": 683 |
| }, |
| { |
| "epoch": 0.4158054711246201, |
| "grad_norm": 0.9961191415786743, |
| "learning_rate": 0.0005, |
| "loss": 4.4087114334106445, |
| "step": 684 |
| }, |
| { |
| "epoch": 0.41641337386018235, |
| "grad_norm": 1.0987764596939087, |
| "learning_rate": 0.0005, |
| "loss": 4.60486364364624, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.41702127659574467, |
| "grad_norm": 1.3485429286956787, |
| "learning_rate": 0.0005, |
| "loss": 4.509698390960693, |
| "step": 686 |
| }, |
| { |
| "epoch": 0.417629179331307, |
| "grad_norm": 1.0834795236587524, |
| "learning_rate": 0.0005, |
| "loss": 4.131223678588867, |
| "step": 687 |
| }, |
| { |
| "epoch": 0.4182370820668693, |
| "grad_norm": 1.2778581380844116, |
| "learning_rate": 0.0005, |
| "loss": 4.530914306640625, |
| "step": 688 |
| }, |
| { |
| "epoch": 0.4188449848024316, |
| "grad_norm": 0.9555144309997559, |
| "learning_rate": 0.0005, |
| "loss": 4.773101806640625, |
| "step": 689 |
| }, |
| { |
| "epoch": 0.4194528875379939, |
| "grad_norm": 1.0608127117156982, |
| "learning_rate": 0.0005, |
| "loss": 4.457843780517578, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.42006079027355625, |
| "grad_norm": 1.2380342483520508, |
| "learning_rate": 0.0005, |
| "loss": 4.438450813293457, |
| "step": 691 |
| }, |
| { |
| "epoch": 0.4206686930091185, |
| "grad_norm": 1.0234472751617432, |
| "learning_rate": 0.0005, |
| "loss": 4.412363052368164, |
| "step": 692 |
| }, |
| { |
| "epoch": 0.42127659574468085, |
| "grad_norm": 1.0774229764938354, |
| "learning_rate": 0.0005, |
| "loss": 4.687466144561768, |
| "step": 693 |
| }, |
| { |
| "epoch": 0.4218844984802432, |
| "grad_norm": 0.9822944402694702, |
| "learning_rate": 0.0005, |
| "loss": 4.798013687133789, |
| "step": 694 |
| }, |
| { |
| "epoch": 0.42249240121580545, |
| "grad_norm": 1.1232951879501343, |
| "learning_rate": 0.0005, |
| "loss": 4.548072814941406, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.4231003039513678, |
| "grad_norm": 1.5027856826782227, |
| "learning_rate": 0.0005, |
| "loss": 4.7048797607421875, |
| "step": 696 |
| }, |
| { |
| "epoch": 0.4237082066869301, |
| "grad_norm": 1.036541223526001, |
| "learning_rate": 0.0005, |
| "loss": 4.6969709396362305, |
| "step": 697 |
| }, |
| { |
| "epoch": 0.4243161094224924, |
| "grad_norm": 1.1823787689208984, |
| "learning_rate": 0.0005, |
| "loss": 4.457941055297852, |
| "step": 698 |
| }, |
| { |
| "epoch": 0.4249240121580547, |
| "grad_norm": 0.9230678081512451, |
| "learning_rate": 0.0005, |
| "loss": 4.421998500823975, |
| "step": 699 |
| }, |
| { |
| "epoch": 0.425531914893617, |
| "grad_norm": 1.7750741243362427, |
| "learning_rate": 0.0005, |
| "loss": 4.76076602935791, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.42613981762917935, |
| "grad_norm": 1.0719808340072632, |
| "learning_rate": 0.0005, |
| "loss": 4.580799102783203, |
| "step": 701 |
| }, |
| { |
| "epoch": 0.4267477203647416, |
| "grad_norm": 1.0799646377563477, |
| "learning_rate": 0.0005, |
| "loss": 4.311610221862793, |
| "step": 702 |
| }, |
| { |
| "epoch": 0.42735562310030395, |
| "grad_norm": 0.8947767019271851, |
| "learning_rate": 0.0005, |
| "loss": 4.4494123458862305, |
| "step": 703 |
| }, |
| { |
| "epoch": 0.4279635258358663, |
| "grad_norm": 1.0298351049423218, |
| "learning_rate": 0.0005, |
| "loss": 4.393129348754883, |
| "step": 704 |
| }, |
| { |
| "epoch": 0.42857142857142855, |
| "grad_norm": 1.098189115524292, |
| "learning_rate": 0.0005, |
| "loss": 4.199446678161621, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.4291793313069909, |
| "grad_norm": 1.112589955329895, |
| "learning_rate": 0.0005, |
| "loss": 4.471273422241211, |
| "step": 706 |
| }, |
| { |
| "epoch": 0.4297872340425532, |
| "grad_norm": 1.2152529954910278, |
| "learning_rate": 0.0005, |
| "loss": 4.727916240692139, |
| "step": 707 |
| }, |
| { |
| "epoch": 0.43039513677811553, |
| "grad_norm": 1.1162065267562866, |
| "learning_rate": 0.0005, |
| "loss": 4.282822132110596, |
| "step": 708 |
| }, |
| { |
| "epoch": 0.4310030395136778, |
| "grad_norm": 1.2259479761123657, |
| "learning_rate": 0.0005, |
| "loss": 4.1524977684021, |
| "step": 709 |
| }, |
| { |
| "epoch": 0.4316109422492401, |
| "grad_norm": 1.0089929103851318, |
| "learning_rate": 0.0005, |
| "loss": 4.150537490844727, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.43221884498480245, |
| "grad_norm": 0.9101129770278931, |
| "learning_rate": 0.0005, |
| "loss": 4.379437446594238, |
| "step": 711 |
| }, |
| { |
| "epoch": 0.4328267477203647, |
| "grad_norm": 0.9849691390991211, |
| "learning_rate": 0.0005, |
| "loss": 4.299429893493652, |
| "step": 712 |
| }, |
| { |
| "epoch": 0.43343465045592705, |
| "grad_norm": 0.9956537485122681, |
| "learning_rate": 0.0005, |
| "loss": 4.439446926116943, |
| "step": 713 |
| }, |
| { |
| "epoch": 0.4340425531914894, |
| "grad_norm": 1.0646576881408691, |
| "learning_rate": 0.0005, |
| "loss": 4.680734634399414, |
| "step": 714 |
| }, |
| { |
| "epoch": 0.43465045592705165, |
| "grad_norm": 1.1268900632858276, |
| "learning_rate": 0.0005, |
| "loss": 4.390021324157715, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.435258358662614, |
| "grad_norm": 1.1238709688186646, |
| "learning_rate": 0.0005, |
| "loss": 4.414492607116699, |
| "step": 716 |
| }, |
| { |
| "epoch": 0.4358662613981763, |
| "grad_norm": 1.0272475481033325, |
| "learning_rate": 0.0005, |
| "loss": 4.48759651184082, |
| "step": 717 |
| }, |
| { |
| "epoch": 0.43647416413373863, |
| "grad_norm": 0.9443128108978271, |
| "learning_rate": 0.0005, |
| "loss": 4.241964340209961, |
| "step": 718 |
| }, |
| { |
| "epoch": 0.4370820668693009, |
| "grad_norm": 0.8795979022979736, |
| "learning_rate": 0.0005, |
| "loss": 4.438322067260742, |
| "step": 719 |
| }, |
| { |
| "epoch": 0.4376899696048632, |
| "grad_norm": 1.0388433933258057, |
| "learning_rate": 0.0005, |
| "loss": 4.499500274658203, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.43829787234042555, |
| "grad_norm": 1.0285965204238892, |
| "learning_rate": 0.0005, |
| "loss": 4.458085060119629, |
| "step": 721 |
| }, |
| { |
| "epoch": 0.4389057750759878, |
| "grad_norm": 1.0486245155334473, |
| "learning_rate": 0.0005, |
| "loss": 4.3121843338012695, |
| "step": 722 |
| }, |
| { |
| "epoch": 0.43951367781155015, |
| "grad_norm": 0.974229633808136, |
| "learning_rate": 0.0005, |
| "loss": 4.484938621520996, |
| "step": 723 |
| }, |
| { |
| "epoch": 0.4401215805471125, |
| "grad_norm": 1.028061032295227, |
| "learning_rate": 0.0005, |
| "loss": 4.343748092651367, |
| "step": 724 |
| }, |
| { |
| "epoch": 0.44072948328267475, |
| "grad_norm": 1.247310757637024, |
| "learning_rate": 0.0005, |
| "loss": 4.43183708190918, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.4413373860182371, |
| "grad_norm": 1.07508385181427, |
| "learning_rate": 0.0005, |
| "loss": 4.473773956298828, |
| "step": 726 |
| }, |
| { |
| "epoch": 0.4419452887537994, |
| "grad_norm": 1.0861989259719849, |
| "learning_rate": 0.0005, |
| "loss": 4.50743293762207, |
| "step": 727 |
| }, |
| { |
| "epoch": 0.4425531914893617, |
| "grad_norm": 1.043446660041809, |
| "learning_rate": 0.0005, |
| "loss": 4.65224027633667, |
| "step": 728 |
| }, |
| { |
| "epoch": 0.443161094224924, |
| "grad_norm": 1.1153486967086792, |
| "learning_rate": 0.0005, |
| "loss": 4.275899887084961, |
| "step": 729 |
| }, |
| { |
| "epoch": 0.44376899696048633, |
| "grad_norm": 1.0387423038482666, |
| "learning_rate": 0.0005, |
| "loss": 4.571664333343506, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.44437689969604866, |
| "grad_norm": 1.1121833324432373, |
| "learning_rate": 0.0005, |
| "loss": 4.472873687744141, |
| "step": 731 |
| }, |
| { |
| "epoch": 0.4449848024316109, |
| "grad_norm": 1.110357642173767, |
| "learning_rate": 0.0005, |
| "loss": 4.507586479187012, |
| "step": 732 |
| }, |
| { |
| "epoch": 0.44559270516717325, |
| "grad_norm": 1.0192921161651611, |
| "learning_rate": 0.0005, |
| "loss": 4.614180564880371, |
| "step": 733 |
| }, |
| { |
| "epoch": 0.4462006079027356, |
| "grad_norm": 1.2011562585830688, |
| "learning_rate": 0.0005, |
| "loss": 4.410806655883789, |
| "step": 734 |
| }, |
| { |
| "epoch": 0.44680851063829785, |
| "grad_norm": 1.045922040939331, |
| "learning_rate": 0.0005, |
| "loss": 4.522254943847656, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.4474164133738602, |
| "grad_norm": 1.1084001064300537, |
| "learning_rate": 0.0005, |
| "loss": 4.473600387573242, |
| "step": 736 |
| }, |
| { |
| "epoch": 0.4480243161094225, |
| "grad_norm": 1.0580531358718872, |
| "learning_rate": 0.0005, |
| "loss": 4.495148658752441, |
| "step": 737 |
| }, |
| { |
| "epoch": 0.4486322188449848, |
| "grad_norm": 1.0791500806808472, |
| "learning_rate": 0.0005, |
| "loss": 4.559470176696777, |
| "step": 738 |
| }, |
| { |
| "epoch": 0.4492401215805471, |
| "grad_norm": 0.9919356107711792, |
| "learning_rate": 0.0005, |
| "loss": 4.445730209350586, |
| "step": 739 |
| }, |
| { |
| "epoch": 0.44984802431610943, |
| "grad_norm": 0.9215476512908936, |
| "learning_rate": 0.0005, |
| "loss": 4.360682487487793, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.45045592705167176, |
| "grad_norm": 1.1767232418060303, |
| "learning_rate": 0.0005, |
| "loss": 4.51902437210083, |
| "step": 741 |
| }, |
| { |
| "epoch": 0.451063829787234, |
| "grad_norm": 1.1746350526809692, |
| "learning_rate": 0.0005, |
| "loss": 4.362285614013672, |
| "step": 742 |
| }, |
| { |
| "epoch": 0.45167173252279635, |
| "grad_norm": 1.0243946313858032, |
| "learning_rate": 0.0005, |
| "loss": 4.443662166595459, |
| "step": 743 |
| }, |
| { |
| "epoch": 0.4522796352583587, |
| "grad_norm": 1.034515619277954, |
| "learning_rate": 0.0005, |
| "loss": 4.329188346862793, |
| "step": 744 |
| }, |
| { |
| "epoch": 0.45288753799392095, |
| "grad_norm": 1.1209111213684082, |
| "learning_rate": 0.0005, |
| "loss": 4.6534223556518555, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.4534954407294833, |
| "grad_norm": 1.0455032587051392, |
| "learning_rate": 0.0005, |
| "loss": 4.511608600616455, |
| "step": 746 |
| }, |
| { |
| "epoch": 0.4541033434650456, |
| "grad_norm": 1.002439022064209, |
| "learning_rate": 0.0005, |
| "loss": 4.4008378982543945, |
| "step": 747 |
| }, |
| { |
| "epoch": 0.4547112462006079, |
| "grad_norm": 0.9780976176261902, |
| "learning_rate": 0.0005, |
| "loss": 4.478031158447266, |
| "step": 748 |
| }, |
| { |
| "epoch": 0.4553191489361702, |
| "grad_norm": 1.0394052267074585, |
| "learning_rate": 0.0005, |
| "loss": 4.431166648864746, |
| "step": 749 |
| }, |
| { |
| "epoch": 0.45592705167173253, |
| "grad_norm": 1.0838037729263306, |
| "learning_rate": 0.0005, |
| "loss": 4.38276481628418, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.45653495440729486, |
| "grad_norm": 1.2306514978408813, |
| "learning_rate": 0.0005, |
| "loss": 4.427013874053955, |
| "step": 751 |
| }, |
| { |
| "epoch": 0.45714285714285713, |
| "grad_norm": 0.8942012190818787, |
| "learning_rate": 0.0005, |
| "loss": 4.463613986968994, |
| "step": 752 |
| }, |
| { |
| "epoch": 0.45775075987841946, |
| "grad_norm": 1.0273581743240356, |
| "learning_rate": 0.0005, |
| "loss": 4.331677436828613, |
| "step": 753 |
| }, |
| { |
| "epoch": 0.4583586626139818, |
| "grad_norm": 1.061225414276123, |
| "learning_rate": 0.0005, |
| "loss": 4.360611438751221, |
| "step": 754 |
| }, |
| { |
| "epoch": 0.45896656534954405, |
| "grad_norm": 0.9954508543014526, |
| "learning_rate": 0.0005, |
| "loss": 4.37364387512207, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.4595744680851064, |
| "grad_norm": 0.9806733131408691, |
| "learning_rate": 0.0005, |
| "loss": 4.469931602478027, |
| "step": 756 |
| }, |
| { |
| "epoch": 0.4601823708206687, |
| "grad_norm": 1.131806492805481, |
| "learning_rate": 0.0005, |
| "loss": 4.487429618835449, |
| "step": 757 |
| }, |
| { |
| "epoch": 0.460790273556231, |
| "grad_norm": 0.9451801776885986, |
| "learning_rate": 0.0005, |
| "loss": 4.476114749908447, |
| "step": 758 |
| }, |
| { |
| "epoch": 0.4613981762917933, |
| "grad_norm": 1.064634084701538, |
| "learning_rate": 0.0005, |
| "loss": 4.607744216918945, |
| "step": 759 |
| }, |
| { |
| "epoch": 0.46200607902735563, |
| "grad_norm": 1.0846835374832153, |
| "learning_rate": 0.0005, |
| "loss": 4.312438011169434, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.46261398176291796, |
| "grad_norm": 0.9688083529472351, |
| "learning_rate": 0.0005, |
| "loss": 4.376931667327881, |
| "step": 761 |
| }, |
| { |
| "epoch": 0.46322188449848023, |
| "grad_norm": 1.1652911901474, |
| "learning_rate": 0.0005, |
| "loss": 4.416962623596191, |
| "step": 762 |
| }, |
| { |
| "epoch": 0.46382978723404256, |
| "grad_norm": 1.147851586341858, |
| "learning_rate": 0.0005, |
| "loss": 4.349986553192139, |
| "step": 763 |
| }, |
| { |
| "epoch": 0.4644376899696049, |
| "grad_norm": 0.9702697992324829, |
| "learning_rate": 0.0005, |
| "loss": 4.497674942016602, |
| "step": 764 |
| }, |
| { |
| "epoch": 0.46504559270516715, |
| "grad_norm": 1.0843515396118164, |
| "learning_rate": 0.0005, |
| "loss": 4.49877405166626, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.4656534954407295, |
| "grad_norm": 0.9171056747436523, |
| "learning_rate": 0.0005, |
| "loss": 4.1539201736450195, |
| "step": 766 |
| }, |
| { |
| "epoch": 0.4662613981762918, |
| "grad_norm": 1.164944052696228, |
| "learning_rate": 0.0005, |
| "loss": 4.509303092956543, |
| "step": 767 |
| }, |
| { |
| "epoch": 0.4668693009118541, |
| "grad_norm": 1.0968433618545532, |
| "learning_rate": 0.0005, |
| "loss": 4.4588727951049805, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.4674772036474164, |
| "grad_norm": 1.0154880285263062, |
| "learning_rate": 0.0005, |
| "loss": 4.611554145812988, |
| "step": 769 |
| }, |
| { |
| "epoch": 0.46808510638297873, |
| "grad_norm": 0.9653189778327942, |
| "learning_rate": 0.0005, |
| "loss": 4.324926376342773, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.46869300911854106, |
| "grad_norm": 1.1051913499832153, |
| "learning_rate": 0.0005, |
| "loss": 4.4647111892700195, |
| "step": 771 |
| }, |
| { |
| "epoch": 0.46930091185410333, |
| "grad_norm": 0.9223854541778564, |
| "learning_rate": 0.0005, |
| "loss": 4.7103400230407715, |
| "step": 772 |
| }, |
| { |
| "epoch": 0.46990881458966566, |
| "grad_norm": 1.124935507774353, |
| "learning_rate": 0.0005, |
| "loss": 4.453402519226074, |
| "step": 773 |
| }, |
| { |
| "epoch": 0.470516717325228, |
| "grad_norm": 1.3314533233642578, |
| "learning_rate": 0.0005, |
| "loss": 4.297192573547363, |
| "step": 774 |
| }, |
| { |
| "epoch": 0.47112462006079026, |
| "grad_norm": 1.0218007564544678, |
| "learning_rate": 0.0005, |
| "loss": 4.496466159820557, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.4717325227963526, |
| "grad_norm": 1.0308325290679932, |
| "learning_rate": 0.0005, |
| "loss": 4.3223772048950195, |
| "step": 776 |
| }, |
| { |
| "epoch": 0.4723404255319149, |
| "grad_norm": 1.1283831596374512, |
| "learning_rate": 0.0005, |
| "loss": 4.398843288421631, |
| "step": 777 |
| }, |
| { |
| "epoch": 0.4729483282674772, |
| "grad_norm": 1.1089282035827637, |
| "learning_rate": 0.0005, |
| "loss": 4.226986408233643, |
| "step": 778 |
| }, |
| { |
| "epoch": 0.4735562310030395, |
| "grad_norm": 0.9950074553489685, |
| "learning_rate": 0.0005, |
| "loss": 4.401683807373047, |
| "step": 779 |
| }, |
| { |
| "epoch": 0.47416413373860183, |
| "grad_norm": 1.1220934391021729, |
| "learning_rate": 0.0005, |
| "loss": 4.23845100402832, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.47477203647416416, |
| "grad_norm": 1.1314822435379028, |
| "learning_rate": 0.0005, |
| "loss": 4.648829936981201, |
| "step": 781 |
| }, |
| { |
| "epoch": 0.47537993920972643, |
| "grad_norm": 1.0067565441131592, |
| "learning_rate": 0.0005, |
| "loss": 4.342182159423828, |
| "step": 782 |
| }, |
| { |
| "epoch": 0.47598784194528876, |
| "grad_norm": 1.4291990995407104, |
| "learning_rate": 0.0005, |
| "loss": 4.222455978393555, |
| "step": 783 |
| }, |
| { |
| "epoch": 0.4765957446808511, |
| "grad_norm": 1.0664339065551758, |
| "learning_rate": 0.0005, |
| "loss": 4.533761978149414, |
| "step": 784 |
| }, |
| { |
| "epoch": 0.47720364741641336, |
| "grad_norm": 0.837992787361145, |
| "learning_rate": 0.0005, |
| "loss": 4.583135604858398, |
| "step": 785 |
| }, |
| { |
| "epoch": 0.4778115501519757, |
| "grad_norm": 1.0775222778320312, |
| "learning_rate": 0.0005, |
| "loss": 4.407233238220215, |
| "step": 786 |
| }, |
| { |
| "epoch": 0.478419452887538, |
| "grad_norm": 1.1260716915130615, |
| "learning_rate": 0.0005, |
| "loss": 4.408687114715576, |
| "step": 787 |
| }, |
| { |
| "epoch": 0.4790273556231003, |
| "grad_norm": 1.1476800441741943, |
| "learning_rate": 0.0005, |
| "loss": 4.5264692306518555, |
| "step": 788 |
| }, |
| { |
| "epoch": 0.4796352583586626, |
| "grad_norm": 1.0624704360961914, |
| "learning_rate": 0.0005, |
| "loss": 4.47670316696167, |
| "step": 789 |
| }, |
| { |
| "epoch": 0.48024316109422494, |
| "grad_norm": 1.4008615016937256, |
| "learning_rate": 0.0005, |
| "loss": 4.542054653167725, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.4808510638297872, |
| "grad_norm": 1.6348981857299805, |
| "learning_rate": 0.0005, |
| "loss": 4.272322654724121, |
| "step": 791 |
| }, |
| { |
| "epoch": 0.48145896656534953, |
| "grad_norm": 1.110823154449463, |
| "learning_rate": 0.0005, |
| "loss": 4.32360315322876, |
| "step": 792 |
| }, |
| { |
| "epoch": 0.48206686930091186, |
| "grad_norm": 0.9771617650985718, |
| "learning_rate": 0.0005, |
| "loss": 4.4510321617126465, |
| "step": 793 |
| }, |
| { |
| "epoch": 0.4826747720364742, |
| "grad_norm": 1.0948632955551147, |
| "learning_rate": 0.0005, |
| "loss": 4.335118293762207, |
| "step": 794 |
| }, |
| { |
| "epoch": 0.48328267477203646, |
| "grad_norm": 1.2692338228225708, |
| "learning_rate": 0.0005, |
| "loss": 4.3776655197143555, |
| "step": 795 |
| }, |
| { |
| "epoch": 0.4838905775075988, |
| "grad_norm": 0.8474439978599548, |
| "learning_rate": 0.0005, |
| "loss": 4.24397087097168, |
| "step": 796 |
| }, |
| { |
| "epoch": 0.4844984802431611, |
| "grad_norm": 0.9258842468261719, |
| "learning_rate": 0.0005, |
| "loss": 4.602321624755859, |
| "step": 797 |
| }, |
| { |
| "epoch": 0.4851063829787234, |
| "grad_norm": 1.1678420305252075, |
| "learning_rate": 0.0005, |
| "loss": 4.5578203201293945, |
| "step": 798 |
| }, |
| { |
| "epoch": 0.4857142857142857, |
| "grad_norm": 1.0839719772338867, |
| "learning_rate": 0.0005, |
| "loss": 4.4719109535217285, |
| "step": 799 |
| }, |
| { |
| "epoch": 0.48632218844984804, |
| "grad_norm": 1.0721313953399658, |
| "learning_rate": 0.0005, |
| "loss": 4.1971516609191895, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.4869300911854103, |
| "grad_norm": 1.077587366104126, |
| "learning_rate": 0.0005, |
| "loss": 4.452859401702881, |
| "step": 801 |
| }, |
| { |
| "epoch": 0.48753799392097263, |
| "grad_norm": 0.9456436038017273, |
| "learning_rate": 0.0005, |
| "loss": 4.417455673217773, |
| "step": 802 |
| }, |
| { |
| "epoch": 0.48814589665653496, |
| "grad_norm": 0.9326696991920471, |
| "learning_rate": 0.0005, |
| "loss": 4.389290809631348, |
| "step": 803 |
| }, |
| { |
| "epoch": 0.4887537993920973, |
| "grad_norm": 1.0423635244369507, |
| "learning_rate": 0.0005, |
| "loss": 4.4252448081970215, |
| "step": 804 |
| }, |
| { |
| "epoch": 0.48936170212765956, |
| "grad_norm": 1.0106087923049927, |
| "learning_rate": 0.0005, |
| "loss": 4.29632043838501, |
| "step": 805 |
| }, |
| { |
| "epoch": 0.4899696048632219, |
| "grad_norm": 0.8635157942771912, |
| "learning_rate": 0.0005, |
| "loss": 4.45654296875, |
| "step": 806 |
| }, |
| { |
| "epoch": 0.4905775075987842, |
| "grad_norm": 0.9637815952301025, |
| "learning_rate": 0.0005, |
| "loss": 4.305363655090332, |
| "step": 807 |
| }, |
| { |
| "epoch": 0.4911854103343465, |
| "grad_norm": 0.9523938298225403, |
| "learning_rate": 0.0005, |
| "loss": 4.561666965484619, |
| "step": 808 |
| }, |
| { |
| "epoch": 0.4917933130699088, |
| "grad_norm": 1.1045883893966675, |
| "learning_rate": 0.0005, |
| "loss": 4.385721206665039, |
| "step": 809 |
| }, |
| { |
| "epoch": 0.49240121580547114, |
| "grad_norm": 0.951117992401123, |
| "learning_rate": 0.0005, |
| "loss": 4.302276611328125, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.4930091185410334, |
| "grad_norm": 1.091933250427246, |
| "learning_rate": 0.0005, |
| "loss": 4.64669132232666, |
| "step": 811 |
| }, |
| { |
| "epoch": 0.49361702127659574, |
| "grad_norm": 1.0813966989517212, |
| "learning_rate": 0.0005, |
| "loss": 4.266849517822266, |
| "step": 812 |
| }, |
| { |
| "epoch": 0.49422492401215806, |
| "grad_norm": 0.9683962464332581, |
| "learning_rate": 0.0005, |
| "loss": 4.304372787475586, |
| "step": 813 |
| }, |
| { |
| "epoch": 0.4948328267477204, |
| "grad_norm": 0.960382342338562, |
| "learning_rate": 0.0005, |
| "loss": 4.221304416656494, |
| "step": 814 |
| }, |
| { |
| "epoch": 0.49544072948328266, |
| "grad_norm": 0.9746182560920715, |
| "learning_rate": 0.0005, |
| "loss": 4.392333030700684, |
| "step": 815 |
| }, |
| { |
| "epoch": 0.496048632218845, |
| "grad_norm": 0.9449917078018188, |
| "learning_rate": 0.0005, |
| "loss": 4.274685859680176, |
| "step": 816 |
| }, |
| { |
| "epoch": 0.4966565349544073, |
| "grad_norm": 0.8899694085121155, |
| "learning_rate": 0.0005, |
| "loss": 4.206332206726074, |
| "step": 817 |
| }, |
| { |
| "epoch": 0.4972644376899696, |
| "grad_norm": 0.9504559636116028, |
| "learning_rate": 0.0005, |
| "loss": 4.2690534591674805, |
| "step": 818 |
| }, |
| { |
| "epoch": 0.4978723404255319, |
| "grad_norm": 0.9823598265647888, |
| "learning_rate": 0.0005, |
| "loss": 4.379746437072754, |
| "step": 819 |
| }, |
| { |
| "epoch": 0.49848024316109424, |
| "grad_norm": 1.0227431058883667, |
| "learning_rate": 0.0005, |
| "loss": 4.233619213104248, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.4990881458966565, |
| "grad_norm": 0.9714612364768982, |
| "learning_rate": 0.0005, |
| "loss": 4.607011795043945, |
| "step": 821 |
| }, |
| { |
| "epoch": 0.49969604863221884, |
| "grad_norm": 0.9920446276664734, |
| "learning_rate": 0.0005, |
| "loss": 4.5199127197265625, |
| "step": 822 |
| }, |
| { |
| "epoch": 0.5003039513677812, |
| "grad_norm": 1.0052610635757446, |
| "learning_rate": 0.0005, |
| "loss": 4.538883209228516, |
| "step": 823 |
| }, |
| { |
| "epoch": 0.5009118541033435, |
| "grad_norm": 0.961460292339325, |
| "learning_rate": 0.0005, |
| "loss": 4.37430477142334, |
| "step": 824 |
| }, |
| { |
| "epoch": 0.5015197568389058, |
| "grad_norm": 0.9705450534820557, |
| "learning_rate": 0.0005, |
| "loss": 4.36405611038208, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.502127659574468, |
| "grad_norm": 1.0589666366577148, |
| "learning_rate": 0.0005, |
| "loss": 4.532018661499023, |
| "step": 826 |
| }, |
| { |
| "epoch": 0.5027355623100304, |
| "grad_norm": 1.0190895795822144, |
| "learning_rate": 0.0005, |
| "loss": 4.366916656494141, |
| "step": 827 |
| }, |
| { |
| "epoch": 0.5033434650455927, |
| "grad_norm": 1.2047783136367798, |
| "learning_rate": 0.0005, |
| "loss": 4.332704544067383, |
| "step": 828 |
| }, |
| { |
| "epoch": 0.503951367781155, |
| "grad_norm": 0.9100733995437622, |
| "learning_rate": 0.0005, |
| "loss": 4.1280975341796875, |
| "step": 829 |
| }, |
| { |
| "epoch": 0.5045592705167173, |
| "grad_norm": 1.0953924655914307, |
| "learning_rate": 0.0005, |
| "loss": 4.338841438293457, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.5051671732522797, |
| "grad_norm": 1.2325948476791382, |
| "learning_rate": 0.0005, |
| "loss": 4.425684928894043, |
| "step": 831 |
| }, |
| { |
| "epoch": 0.505775075987842, |
| "grad_norm": 1.0776824951171875, |
| "learning_rate": 0.0005, |
| "loss": 4.238302230834961, |
| "step": 832 |
| }, |
| { |
| "epoch": 0.5063829787234042, |
| "grad_norm": 1.002465009689331, |
| "learning_rate": 0.0005, |
| "loss": 4.1673173904418945, |
| "step": 833 |
| }, |
| { |
| "epoch": 0.5069908814589665, |
| "grad_norm": 1.0070068836212158, |
| "learning_rate": 0.0005, |
| "loss": 4.502063751220703, |
| "step": 834 |
| }, |
| { |
| "epoch": 0.5075987841945289, |
| "grad_norm": 0.9460301995277405, |
| "learning_rate": 0.0005, |
| "loss": 4.266294479370117, |
| "step": 835 |
| }, |
| { |
| "epoch": 0.5082066869300912, |
| "grad_norm": 0.9609605669975281, |
| "learning_rate": 0.0005, |
| "loss": 4.49836540222168, |
| "step": 836 |
| }, |
| { |
| "epoch": 0.5088145896656535, |
| "grad_norm": 1.0298100709915161, |
| "learning_rate": 0.0005, |
| "loss": 4.342093467712402, |
| "step": 837 |
| }, |
| { |
| "epoch": 0.5094224924012158, |
| "grad_norm": 1.102327585220337, |
| "learning_rate": 0.0005, |
| "loss": 4.25087833404541, |
| "step": 838 |
| }, |
| { |
| "epoch": 0.5100303951367782, |
| "grad_norm": 1.2569550275802612, |
| "learning_rate": 0.0005, |
| "loss": 4.285090446472168, |
| "step": 839 |
| }, |
| { |
| "epoch": 0.5106382978723404, |
| "grad_norm": 1.0138150453567505, |
| "learning_rate": 0.0005, |
| "loss": 4.334506034851074, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.5112462006079027, |
| "grad_norm": 1.0152983665466309, |
| "learning_rate": 0.0005, |
| "loss": 4.283235549926758, |
| "step": 841 |
| }, |
| { |
| "epoch": 0.511854103343465, |
| "grad_norm": 1.1372138261795044, |
| "learning_rate": 0.0005, |
| "loss": 4.07025146484375, |
| "step": 842 |
| }, |
| { |
| "epoch": 0.5124620060790274, |
| "grad_norm": 1.1843246221542358, |
| "learning_rate": 0.0005, |
| "loss": 4.353334426879883, |
| "step": 843 |
| }, |
| { |
| "epoch": 0.5130699088145897, |
| "grad_norm": 1.1458396911621094, |
| "learning_rate": 0.0005, |
| "loss": 4.34335994720459, |
| "step": 844 |
| }, |
| { |
| "epoch": 0.513677811550152, |
| "grad_norm": 1.0594899654388428, |
| "learning_rate": 0.0005, |
| "loss": 4.31781005859375, |
| "step": 845 |
| }, |
| { |
| "epoch": 0.5142857142857142, |
| "grad_norm": 0.844513475894928, |
| "learning_rate": 0.0005, |
| "loss": 4.4846577644348145, |
| "step": 846 |
| }, |
| { |
| "epoch": 0.5148936170212766, |
| "grad_norm": 2.6839306354522705, |
| "learning_rate": 0.0005, |
| "loss": 4.262670993804932, |
| "step": 847 |
| }, |
| { |
| "epoch": 0.5155015197568389, |
| "grad_norm": 1.0088754892349243, |
| "learning_rate": 0.0005, |
| "loss": 4.266050338745117, |
| "step": 848 |
| }, |
| { |
| "epoch": 0.5161094224924012, |
| "grad_norm": 1.0849522352218628, |
| "learning_rate": 0.0005, |
| "loss": 4.108889102935791, |
| "step": 849 |
| }, |
| { |
| "epoch": 0.5167173252279635, |
| "grad_norm": 1.0903068780899048, |
| "learning_rate": 0.0005, |
| "loss": 4.313821315765381, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.5173252279635259, |
| "grad_norm": 1.1618335247039795, |
| "learning_rate": 0.0005, |
| "loss": 4.295135498046875, |
| "step": 851 |
| }, |
| { |
| "epoch": 0.5179331306990882, |
| "grad_norm": 0.9828124046325684, |
| "learning_rate": 0.0005, |
| "loss": 4.440587043762207, |
| "step": 852 |
| }, |
| { |
| "epoch": 0.5185410334346504, |
| "grad_norm": 1.131939172744751, |
| "learning_rate": 0.0005, |
| "loss": 4.306354522705078, |
| "step": 853 |
| }, |
| { |
| "epoch": 0.5191489361702127, |
| "grad_norm": 1.3951880931854248, |
| "learning_rate": 0.0005, |
| "loss": 4.395257949829102, |
| "step": 854 |
| }, |
| { |
| "epoch": 0.5197568389057751, |
| "grad_norm": 1.28059983253479, |
| "learning_rate": 0.0005, |
| "loss": 4.033473968505859, |
| "step": 855 |
| }, |
| { |
| "epoch": 0.5203647416413374, |
| "grad_norm": 0.9717862606048584, |
| "learning_rate": 0.0005, |
| "loss": 4.356355667114258, |
| "step": 856 |
| }, |
| { |
| "epoch": 0.5209726443768997, |
| "grad_norm": 1.043353796005249, |
| "learning_rate": 0.0005, |
| "loss": 4.250835418701172, |
| "step": 857 |
| }, |
| { |
| "epoch": 0.521580547112462, |
| "grad_norm": 1.016579508781433, |
| "learning_rate": 0.0005, |
| "loss": 4.286150932312012, |
| "step": 858 |
| }, |
| { |
| "epoch": 0.5221884498480243, |
| "grad_norm": 1.112782597541809, |
| "learning_rate": 0.0005, |
| "loss": 4.598012924194336, |
| "step": 859 |
| }, |
| { |
| "epoch": 0.5227963525835866, |
| "grad_norm": 1.1940479278564453, |
| "learning_rate": 0.0005, |
| "loss": 4.4383955001831055, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.5234042553191489, |
| "grad_norm": 1.254970669746399, |
| "learning_rate": 0.0005, |
| "loss": 4.322863578796387, |
| "step": 861 |
| }, |
| { |
| "epoch": 0.5240121580547112, |
| "grad_norm": 1.0700422525405884, |
| "learning_rate": 0.0005, |
| "loss": 4.244253158569336, |
| "step": 862 |
| }, |
| { |
| "epoch": 0.5246200607902736, |
| "grad_norm": 1.0553544759750366, |
| "learning_rate": 0.0005, |
| "loss": 4.310792446136475, |
| "step": 863 |
| }, |
| { |
| "epoch": 0.5252279635258359, |
| "grad_norm": 1.0288846492767334, |
| "learning_rate": 0.0005, |
| "loss": 4.3274383544921875, |
| "step": 864 |
| }, |
| { |
| "epoch": 0.5258358662613982, |
| "grad_norm": 1.0445955991744995, |
| "learning_rate": 0.0005, |
| "loss": 4.45347261428833, |
| "step": 865 |
| }, |
| { |
| "epoch": 0.5264437689969604, |
| "grad_norm": 1.1357736587524414, |
| "learning_rate": 0.0005, |
| "loss": 4.4809064865112305, |
| "step": 866 |
| }, |
| { |
| "epoch": 0.5270516717325228, |
| "grad_norm": 1.109326720237732, |
| "learning_rate": 0.0005, |
| "loss": 4.253253936767578, |
| "step": 867 |
| }, |
| { |
| "epoch": 0.5276595744680851, |
| "grad_norm": 1.1890736818313599, |
| "learning_rate": 0.0005, |
| "loss": 4.426365852355957, |
| "step": 868 |
| }, |
| { |
| "epoch": 0.5282674772036474, |
| "grad_norm": 1.0840505361557007, |
| "learning_rate": 0.0005, |
| "loss": 4.321274280548096, |
| "step": 869 |
| }, |
| { |
| "epoch": 0.5288753799392097, |
| "grad_norm": 1.2200610637664795, |
| "learning_rate": 0.0005, |
| "loss": 4.557803153991699, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.5294832826747721, |
| "grad_norm": 0.9972710609436035, |
| "learning_rate": 0.0005, |
| "loss": 4.23234748840332, |
| "step": 871 |
| }, |
| { |
| "epoch": 0.5300911854103344, |
| "grad_norm": 1.0316972732543945, |
| "learning_rate": 0.0005, |
| "loss": 4.139028549194336, |
| "step": 872 |
| }, |
| { |
| "epoch": 0.5306990881458966, |
| "grad_norm": 1.0380617380142212, |
| "learning_rate": 0.0005, |
| "loss": 4.348488807678223, |
| "step": 873 |
| }, |
| { |
| "epoch": 0.5313069908814589, |
| "grad_norm": 0.9867698550224304, |
| "learning_rate": 0.0005, |
| "loss": 4.302568435668945, |
| "step": 874 |
| }, |
| { |
| "epoch": 0.5319148936170213, |
| "grad_norm": 1.0779541730880737, |
| "learning_rate": 0.0005, |
| "loss": 4.425013542175293, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.5325227963525836, |
| "grad_norm": 1.2543246746063232, |
| "learning_rate": 0.0005, |
| "loss": 4.724435806274414, |
| "step": 876 |
| }, |
| { |
| "epoch": 0.5331306990881459, |
| "grad_norm": 1.2280689477920532, |
| "learning_rate": 0.0005, |
| "loss": 4.2406415939331055, |
| "step": 877 |
| }, |
| { |
| "epoch": 0.5337386018237082, |
| "grad_norm": 1.3842073678970337, |
| "learning_rate": 0.0005, |
| "loss": 4.396044731140137, |
| "step": 878 |
| }, |
| { |
| "epoch": 0.5343465045592705, |
| "grad_norm": 1.0350067615509033, |
| "learning_rate": 0.0005, |
| "loss": 4.17176628112793, |
| "step": 879 |
| }, |
| { |
| "epoch": 0.5349544072948328, |
| "grad_norm": 0.9484389424324036, |
| "learning_rate": 0.0005, |
| "loss": 4.430863380432129, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.5355623100303951, |
| "grad_norm": 1.1557071208953857, |
| "learning_rate": 0.0005, |
| "loss": 4.12956428527832, |
| "step": 881 |
| }, |
| { |
| "epoch": 0.5361702127659574, |
| "grad_norm": 0.9079960584640503, |
| "learning_rate": 0.0005, |
| "loss": 4.4100542068481445, |
| "step": 882 |
| }, |
| { |
| "epoch": 0.5367781155015198, |
| "grad_norm": 0.9755933880805969, |
| "learning_rate": 0.0005, |
| "loss": 4.136897563934326, |
| "step": 883 |
| }, |
| { |
| "epoch": 0.5373860182370821, |
| "grad_norm": 1.0319873094558716, |
| "learning_rate": 0.0005, |
| "loss": 4.440415859222412, |
| "step": 884 |
| }, |
| { |
| "epoch": 0.5379939209726444, |
| "grad_norm": 0.8542789220809937, |
| "learning_rate": 0.0005, |
| "loss": 4.413039207458496, |
| "step": 885 |
| }, |
| { |
| "epoch": 0.5386018237082066, |
| "grad_norm": 1.0158871412277222, |
| "learning_rate": 0.0005, |
| "loss": 4.379025459289551, |
| "step": 886 |
| }, |
| { |
| "epoch": 0.539209726443769, |
| "grad_norm": 0.8926265835762024, |
| "learning_rate": 0.0005, |
| "loss": 4.344198226928711, |
| "step": 887 |
| }, |
| { |
| "epoch": 0.5398176291793313, |
| "grad_norm": 0.8857081532478333, |
| "learning_rate": 0.0005, |
| "loss": 4.38722562789917, |
| "step": 888 |
| }, |
| { |
| "epoch": 0.5404255319148936, |
| "grad_norm": 0.9595281481742859, |
| "learning_rate": 0.0005, |
| "loss": 4.3452959060668945, |
| "step": 889 |
| }, |
| { |
| "epoch": 0.541033434650456, |
| "grad_norm": 0.9428173303604126, |
| "learning_rate": 0.0005, |
| "loss": 4.258479118347168, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.5416413373860183, |
| "grad_norm": 1.5479097366333008, |
| "learning_rate": 0.0005, |
| "loss": 4.245420455932617, |
| "step": 891 |
| }, |
| { |
| "epoch": 0.5422492401215806, |
| "grad_norm": 1.1619681119918823, |
| "learning_rate": 0.0005, |
| "loss": 4.385200500488281, |
| "step": 892 |
| }, |
| { |
| "epoch": 0.5428571428571428, |
| "grad_norm": 0.9958190321922302, |
| "learning_rate": 0.0005, |
| "loss": 4.102227687835693, |
| "step": 893 |
| }, |
| { |
| "epoch": 0.5434650455927051, |
| "grad_norm": 1.0156055688858032, |
| "learning_rate": 0.0005, |
| "loss": 4.067695617675781, |
| "step": 894 |
| }, |
| { |
| "epoch": 0.5440729483282675, |
| "grad_norm": 1.1579831838607788, |
| "learning_rate": 0.0005, |
| "loss": 4.48448371887207, |
| "step": 895 |
| }, |
| { |
| "epoch": 0.5446808510638298, |
| "grad_norm": 1.23504638671875, |
| "learning_rate": 0.0005, |
| "loss": 4.5317583084106445, |
| "step": 896 |
| }, |
| { |
| "epoch": 0.5452887537993921, |
| "grad_norm": 1.167401909828186, |
| "learning_rate": 0.0005, |
| "loss": 4.3551435470581055, |
| "step": 897 |
| }, |
| { |
| "epoch": 0.5458966565349544, |
| "grad_norm": 1.4126181602478027, |
| "learning_rate": 0.0005, |
| "loss": 4.373387813568115, |
| "step": 898 |
| }, |
| { |
| "epoch": 0.5465045592705167, |
| "grad_norm": 1.152944564819336, |
| "learning_rate": 0.0005, |
| "loss": 3.9645819664001465, |
| "step": 899 |
| }, |
| { |
| "epoch": 0.547112462006079, |
| "grad_norm": 1.5390210151672363, |
| "learning_rate": 0.0005, |
| "loss": 4.454073429107666, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.5477203647416413, |
| "grad_norm": 1.0349818468093872, |
| "learning_rate": 0.0005, |
| "loss": 4.398721694946289, |
| "step": 901 |
| }, |
| { |
| "epoch": 0.5483282674772036, |
| "grad_norm": 1.0963656902313232, |
| "learning_rate": 0.0005, |
| "loss": 4.0993242263793945, |
| "step": 902 |
| }, |
| { |
| "epoch": 0.548936170212766, |
| "grad_norm": 1.1737645864486694, |
| "learning_rate": 0.0005, |
| "loss": 4.228819370269775, |
| "step": 903 |
| }, |
| { |
| "epoch": 0.5495440729483283, |
| "grad_norm": 1.1499532461166382, |
| "learning_rate": 0.0005, |
| "loss": 4.329497337341309, |
| "step": 904 |
| }, |
| { |
| "epoch": 0.5501519756838906, |
| "grad_norm": 1.1188825368881226, |
| "learning_rate": 0.0005, |
| "loss": 4.631438255310059, |
| "step": 905 |
| }, |
| { |
| "epoch": 0.5507598784194528, |
| "grad_norm": 1.0337425470352173, |
| "learning_rate": 0.0005, |
| "loss": 4.373821258544922, |
| "step": 906 |
| }, |
| { |
| "epoch": 0.5513677811550152, |
| "grad_norm": 1.098497986793518, |
| "learning_rate": 0.0005, |
| "loss": 4.344779014587402, |
| "step": 907 |
| }, |
| { |
| "epoch": 0.5519756838905775, |
| "grad_norm": 1.0316400527954102, |
| "learning_rate": 0.0005, |
| "loss": 4.097405910491943, |
| "step": 908 |
| }, |
| { |
| "epoch": 0.5525835866261398, |
| "grad_norm": 1.0182708501815796, |
| "learning_rate": 0.0005, |
| "loss": 4.353028297424316, |
| "step": 909 |
| }, |
| { |
| "epoch": 0.5531914893617021, |
| "grad_norm": 1.2190346717834473, |
| "learning_rate": 0.0005, |
| "loss": 4.165225028991699, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.5537993920972645, |
| "grad_norm": 1.017309546470642, |
| "learning_rate": 0.0005, |
| "loss": 4.318220138549805, |
| "step": 911 |
| }, |
| { |
| "epoch": 0.5544072948328268, |
| "grad_norm": 1.1314797401428223, |
| "learning_rate": 0.0005, |
| "loss": 4.121149063110352, |
| "step": 912 |
| }, |
| { |
| "epoch": 0.555015197568389, |
| "grad_norm": 1.0844316482543945, |
| "learning_rate": 0.0005, |
| "loss": 4.213968276977539, |
| "step": 913 |
| }, |
| { |
| "epoch": 0.5556231003039513, |
| "grad_norm": 0.9382945895195007, |
| "learning_rate": 0.0005, |
| "loss": 4.214629650115967, |
| "step": 914 |
| }, |
| { |
| "epoch": 0.5562310030395137, |
| "grad_norm": 1.245742678642273, |
| "learning_rate": 0.0005, |
| "loss": 4.122774124145508, |
| "step": 915 |
| }, |
| { |
| "epoch": 0.556838905775076, |
| "grad_norm": 1.095625877380371, |
| "learning_rate": 0.0005, |
| "loss": 4.296173095703125, |
| "step": 916 |
| }, |
| { |
| "epoch": 0.5574468085106383, |
| "grad_norm": 1.0720239877700806, |
| "learning_rate": 0.0005, |
| "loss": 4.165585994720459, |
| "step": 917 |
| }, |
| { |
| "epoch": 0.5580547112462007, |
| "grad_norm": 1.1082829236984253, |
| "learning_rate": 0.0005, |
| "loss": 4.3951921463012695, |
| "step": 918 |
| }, |
| { |
| "epoch": 0.5586626139817629, |
| "grad_norm": 1.1302635669708252, |
| "learning_rate": 0.0005, |
| "loss": 4.336912155151367, |
| "step": 919 |
| }, |
| { |
| "epoch": 0.5592705167173252, |
| "grad_norm": 0.9658374786376953, |
| "learning_rate": 0.0005, |
| "loss": 4.145001411437988, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.5598784194528875, |
| "grad_norm": 1.2869893312454224, |
| "learning_rate": 0.0005, |
| "loss": 4.438281536102295, |
| "step": 921 |
| }, |
| { |
| "epoch": 0.5604863221884498, |
| "grad_norm": 0.9351769089698792, |
| "learning_rate": 0.0005, |
| "loss": 4.342588424682617, |
| "step": 922 |
| }, |
| { |
| "epoch": 0.5610942249240122, |
| "grad_norm": 1.075165867805481, |
| "learning_rate": 0.0005, |
| "loss": 4.3024187088012695, |
| "step": 923 |
| }, |
| { |
| "epoch": 0.5617021276595745, |
| "grad_norm": 1.0462286472320557, |
| "learning_rate": 0.0005, |
| "loss": 4.308043479919434, |
| "step": 924 |
| }, |
| { |
| "epoch": 0.5623100303951368, |
| "grad_norm": 1.1331902742385864, |
| "learning_rate": 0.0005, |
| "loss": 4.122361183166504, |
| "step": 925 |
| }, |
| { |
| "epoch": 0.562917933130699, |
| "grad_norm": 1.0483379364013672, |
| "learning_rate": 0.0005, |
| "loss": 4.140399932861328, |
| "step": 926 |
| }, |
| { |
| "epoch": 0.5635258358662614, |
| "grad_norm": 1.0775599479675293, |
| "learning_rate": 0.0005, |
| "loss": 4.258686065673828, |
| "step": 927 |
| }, |
| { |
| "epoch": 0.5641337386018237, |
| "grad_norm": 1.1621100902557373, |
| "learning_rate": 0.0005, |
| "loss": 4.177057266235352, |
| "step": 928 |
| }, |
| { |
| "epoch": 0.564741641337386, |
| "grad_norm": 1.144015908241272, |
| "learning_rate": 0.0005, |
| "loss": 3.854235887527466, |
| "step": 929 |
| }, |
| { |
| "epoch": 0.5653495440729484, |
| "grad_norm": 1.0188685655593872, |
| "learning_rate": 0.0005, |
| "loss": 4.226658821105957, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.5659574468085107, |
| "grad_norm": 1.214069128036499, |
| "learning_rate": 0.0005, |
| "loss": 4.558093070983887, |
| "step": 931 |
| }, |
| { |
| "epoch": 0.5665653495440729, |
| "grad_norm": 1.0221775770187378, |
| "learning_rate": 0.0005, |
| "loss": 4.362860202789307, |
| "step": 932 |
| }, |
| { |
| "epoch": 0.5671732522796352, |
| "grad_norm": 1.1003692150115967, |
| "learning_rate": 0.0005, |
| "loss": 4.412820339202881, |
| "step": 933 |
| }, |
| { |
| "epoch": 0.5677811550151975, |
| "grad_norm": 1.0189692974090576, |
| "learning_rate": 0.0005, |
| "loss": 4.141862392425537, |
| "step": 934 |
| }, |
| { |
| "epoch": 0.5683890577507599, |
| "grad_norm": 1.1275514364242554, |
| "learning_rate": 0.0005, |
| "loss": 4.0759077072143555, |
| "step": 935 |
| }, |
| { |
| "epoch": 0.5689969604863222, |
| "grad_norm": 1.0595769882202148, |
| "learning_rate": 0.0005, |
| "loss": 4.234007835388184, |
| "step": 936 |
| }, |
| { |
| "epoch": 0.5696048632218845, |
| "grad_norm": 1.0620779991149902, |
| "learning_rate": 0.0005, |
| "loss": 4.242690086364746, |
| "step": 937 |
| }, |
| { |
| "epoch": 0.5702127659574469, |
| "grad_norm": 1.0344425439834595, |
| "learning_rate": 0.0005, |
| "loss": 4.393516540527344, |
| "step": 938 |
| }, |
| { |
| "epoch": 0.5708206686930091, |
| "grad_norm": 1.1058911085128784, |
| "learning_rate": 0.0005, |
| "loss": 4.163288116455078, |
| "step": 939 |
| }, |
| { |
| "epoch": 0.5714285714285714, |
| "grad_norm": 1.38120698928833, |
| "learning_rate": 0.0005, |
| "loss": 4.052524566650391, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.5720364741641337, |
| "grad_norm": 1.0876317024230957, |
| "learning_rate": 0.0005, |
| "loss": 4.03524112701416, |
| "step": 941 |
| }, |
| { |
| "epoch": 0.572644376899696, |
| "grad_norm": 1.0367745161056519, |
| "learning_rate": 0.0005, |
| "loss": 4.183863639831543, |
| "step": 942 |
| }, |
| { |
| "epoch": 0.5732522796352584, |
| "grad_norm": 1.008543848991394, |
| "learning_rate": 0.0005, |
| "loss": 4.219581127166748, |
| "step": 943 |
| }, |
| { |
| "epoch": 0.5738601823708207, |
| "grad_norm": 1.0349946022033691, |
| "learning_rate": 0.0005, |
| "loss": 4.3019561767578125, |
| "step": 944 |
| }, |
| { |
| "epoch": 0.574468085106383, |
| "grad_norm": 1.5670639276504517, |
| "learning_rate": 0.0005, |
| "loss": 4.330730438232422, |
| "step": 945 |
| }, |
| { |
| "epoch": 0.5750759878419452, |
| "grad_norm": 1.0402114391326904, |
| "learning_rate": 0.0005, |
| "loss": 4.131731033325195, |
| "step": 946 |
| }, |
| { |
| "epoch": 0.5756838905775076, |
| "grad_norm": 1.092549204826355, |
| "learning_rate": 0.0005, |
| "loss": 4.311880111694336, |
| "step": 947 |
| }, |
| { |
| "epoch": 0.5762917933130699, |
| "grad_norm": 0.968338668346405, |
| "learning_rate": 0.0005, |
| "loss": 4.174734115600586, |
| "step": 948 |
| }, |
| { |
| "epoch": 0.5768996960486322, |
| "grad_norm": 1.0552120208740234, |
| "learning_rate": 0.0005, |
| "loss": 3.9222404956817627, |
| "step": 949 |
| }, |
| { |
| "epoch": 0.5775075987841946, |
| "grad_norm": 1.0390139818191528, |
| "learning_rate": 0.0005, |
| "loss": 4.248430252075195, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.5781155015197569, |
| "grad_norm": 1.1151931285858154, |
| "learning_rate": 0.0005, |
| "loss": 4.177859306335449, |
| "step": 951 |
| }, |
| { |
| "epoch": 0.5787234042553191, |
| "grad_norm": 1.0557676553726196, |
| "learning_rate": 0.0005, |
| "loss": 4.173686981201172, |
| "step": 952 |
| }, |
| { |
| "epoch": 0.5793313069908814, |
| "grad_norm": 1.133589506149292, |
| "learning_rate": 0.0005, |
| "loss": 4.227142333984375, |
| "step": 953 |
| }, |
| { |
| "epoch": 0.5799392097264437, |
| "grad_norm": 1.12785804271698, |
| "learning_rate": 0.0005, |
| "loss": 4.077308654785156, |
| "step": 954 |
| }, |
| { |
| "epoch": 0.5805471124620061, |
| "grad_norm": 1.0380632877349854, |
| "learning_rate": 0.0005, |
| "loss": 4.485074996948242, |
| "step": 955 |
| }, |
| { |
| "epoch": 0.5811550151975684, |
| "grad_norm": 1.0573036670684814, |
| "learning_rate": 0.0005, |
| "loss": 4.045351028442383, |
| "step": 956 |
| }, |
| { |
| "epoch": 0.5817629179331307, |
| "grad_norm": 1.0433647632598877, |
| "learning_rate": 0.0005, |
| "loss": 3.9277734756469727, |
| "step": 957 |
| }, |
| { |
| "epoch": 0.5823708206686931, |
| "grad_norm": 1.077911376953125, |
| "learning_rate": 0.0005, |
| "loss": 4.329649448394775, |
| "step": 958 |
| }, |
| { |
| "epoch": 0.5829787234042553, |
| "grad_norm": 0.9521039128303528, |
| "learning_rate": 0.0005, |
| "loss": 4.175987720489502, |
| "step": 959 |
| }, |
| { |
| "epoch": 0.5835866261398176, |
| "grad_norm": 1.0778512954711914, |
| "learning_rate": 0.0005, |
| "loss": 4.32703971862793, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.5841945288753799, |
| "grad_norm": 1.048074722290039, |
| "learning_rate": 0.0005, |
| "loss": 4.146064758300781, |
| "step": 961 |
| }, |
| { |
| "epoch": 0.5848024316109423, |
| "grad_norm": 1.0995032787322998, |
| "learning_rate": 0.0005, |
| "loss": 4.317961692810059, |
| "step": 962 |
| }, |
| { |
| "epoch": 0.5854103343465046, |
| "grad_norm": 1.1812586784362793, |
| "learning_rate": 0.0005, |
| "loss": 4.266629219055176, |
| "step": 963 |
| }, |
| { |
| "epoch": 0.5860182370820669, |
| "grad_norm": 1.2058099508285522, |
| "learning_rate": 0.0005, |
| "loss": 4.350966930389404, |
| "step": 964 |
| }, |
| { |
| "epoch": 0.5866261398176292, |
| "grad_norm": 1.1499630212783813, |
| "learning_rate": 0.0005, |
| "loss": 4.47420072555542, |
| "step": 965 |
| }, |
| { |
| "epoch": 0.5872340425531914, |
| "grad_norm": 1.212178111076355, |
| "learning_rate": 0.0005, |
| "loss": 4.201877593994141, |
| "step": 966 |
| }, |
| { |
| "epoch": 0.5878419452887538, |
| "grad_norm": 1.0750401020050049, |
| "learning_rate": 0.0005, |
| "loss": 4.032867431640625, |
| "step": 967 |
| }, |
| { |
| "epoch": 0.5884498480243161, |
| "grad_norm": 1.0766054391860962, |
| "learning_rate": 0.0005, |
| "loss": 4.070111274719238, |
| "step": 968 |
| }, |
| { |
| "epoch": 0.5890577507598784, |
| "grad_norm": 1.0466876029968262, |
| "learning_rate": 0.0005, |
| "loss": 4.164140224456787, |
| "step": 969 |
| }, |
| { |
| "epoch": 0.5896656534954408, |
| "grad_norm": 0.9755964875221252, |
| "learning_rate": 0.0005, |
| "loss": 4.061018943786621, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.5902735562310031, |
| "grad_norm": 1.0774449110031128, |
| "learning_rate": 0.0005, |
| "loss": 4.103540420532227, |
| "step": 971 |
| }, |
| { |
| "epoch": 0.5908814589665653, |
| "grad_norm": 1.016599178314209, |
| "learning_rate": 0.0005, |
| "loss": 4.367238998413086, |
| "step": 972 |
| }, |
| { |
| "epoch": 0.5914893617021276, |
| "grad_norm": 1.273015022277832, |
| "learning_rate": 0.0005, |
| "loss": 4.130205154418945, |
| "step": 973 |
| }, |
| { |
| "epoch": 0.59209726443769, |
| "grad_norm": 1.121202826499939, |
| "learning_rate": 0.0005, |
| "loss": 4.058278560638428, |
| "step": 974 |
| }, |
| { |
| "epoch": 0.5927051671732523, |
| "grad_norm": 1.782248854637146, |
| "learning_rate": 0.0005, |
| "loss": 4.2193732261657715, |
| "step": 975 |
| }, |
| { |
| "epoch": 0.5933130699088146, |
| "grad_norm": 1.2525842189788818, |
| "learning_rate": 0.0005, |
| "loss": 4.324434757232666, |
| "step": 976 |
| }, |
| { |
| "epoch": 0.5939209726443769, |
| "grad_norm": 0.9859209656715393, |
| "learning_rate": 0.0005, |
| "loss": 4.235608100891113, |
| "step": 977 |
| }, |
| { |
| "epoch": 0.5945288753799393, |
| "grad_norm": 1.0421037673950195, |
| "learning_rate": 0.0005, |
| "loss": 4.312819480895996, |
| "step": 978 |
| }, |
| { |
| "epoch": 0.5951367781155015, |
| "grad_norm": 1.2486640214920044, |
| "learning_rate": 0.0005, |
| "loss": 4.172330856323242, |
| "step": 979 |
| }, |
| { |
| "epoch": 0.5957446808510638, |
| "grad_norm": 1.049641489982605, |
| "learning_rate": 0.0005, |
| "loss": 4.053893089294434, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.5963525835866261, |
| "grad_norm": 1.0123006105422974, |
| "learning_rate": 0.0005, |
| "loss": 4.453596115112305, |
| "step": 981 |
| }, |
| { |
| "epoch": 0.5969604863221885, |
| "grad_norm": 0.9871963858604431, |
| "learning_rate": 0.0005, |
| "loss": 4.007091999053955, |
| "step": 982 |
| }, |
| { |
| "epoch": 0.5975683890577508, |
| "grad_norm": 0.9984953999519348, |
| "learning_rate": 0.0005, |
| "loss": 4.22979736328125, |
| "step": 983 |
| }, |
| { |
| "epoch": 0.5981762917933131, |
| "grad_norm": 1.281544804573059, |
| "learning_rate": 0.0005, |
| "loss": 4.074709892272949, |
| "step": 984 |
| }, |
| { |
| "epoch": 0.5987841945288754, |
| "grad_norm": 1.1482913494110107, |
| "learning_rate": 0.0005, |
| "loss": 4.320782661437988, |
| "step": 985 |
| }, |
| { |
| "epoch": 0.5993920972644377, |
| "grad_norm": 1.2105413675308228, |
| "learning_rate": 0.0005, |
| "loss": 4.15565299987793, |
| "step": 986 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 1.0716112852096558, |
| "learning_rate": 0.0005, |
| "loss": 4.429147720336914, |
| "step": 987 |
| }, |
| { |
| "epoch": 0.6006079027355623, |
| "grad_norm": 1.1487056016921997, |
| "learning_rate": 0.0005, |
| "loss": 4.206772804260254, |
| "step": 988 |
| }, |
| { |
| "epoch": 0.6012158054711246, |
| "grad_norm": 0.9919009208679199, |
| "learning_rate": 0.0005, |
| "loss": 4.105408191680908, |
| "step": 989 |
| }, |
| { |
| "epoch": 0.601823708206687, |
| "grad_norm": 1.1244338750839233, |
| "learning_rate": 0.0005, |
| "loss": 4.034040451049805, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.6024316109422493, |
| "grad_norm": 0.9693543910980225, |
| "learning_rate": 0.0005, |
| "loss": 3.8358006477355957, |
| "step": 991 |
| }, |
| { |
| "epoch": 0.6030395136778115, |
| "grad_norm": 1.147226333618164, |
| "learning_rate": 0.0005, |
| "loss": 4.114927291870117, |
| "step": 992 |
| }, |
| { |
| "epoch": 0.6036474164133738, |
| "grad_norm": 1.1658263206481934, |
| "learning_rate": 0.0005, |
| "loss": 4.3732099533081055, |
| "step": 993 |
| }, |
| { |
| "epoch": 0.6042553191489362, |
| "grad_norm": 1.1261506080627441, |
| "learning_rate": 0.0005, |
| "loss": 4.212404251098633, |
| "step": 994 |
| }, |
| { |
| "epoch": 0.6048632218844985, |
| "grad_norm": 1.214408040046692, |
| "learning_rate": 0.0005, |
| "loss": 4.113962173461914, |
| "step": 995 |
| }, |
| { |
| "epoch": 0.6054711246200608, |
| "grad_norm": 1.1703499555587769, |
| "learning_rate": 0.0005, |
| "loss": 3.9795780181884766, |
| "step": 996 |
| }, |
| { |
| "epoch": 0.6060790273556231, |
| "grad_norm": 1.2819421291351318, |
| "learning_rate": 0.0005, |
| "loss": 3.820543050765991, |
| "step": 997 |
| }, |
| { |
| "epoch": 0.6066869300911855, |
| "grad_norm": 1.1751822233200073, |
| "learning_rate": 0.0005, |
| "loss": 4.115008354187012, |
| "step": 998 |
| }, |
| { |
| "epoch": 0.6072948328267477, |
| "grad_norm": 1.133631944656372, |
| "learning_rate": 0.0005, |
| "loss": 4.320215225219727, |
| "step": 999 |
| }, |
| { |
| "epoch": 0.60790273556231, |
| "grad_norm": 1.2056914567947388, |
| "learning_rate": 0.0005, |
| "loss": 4.139728546142578, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.6085106382978723, |
| "grad_norm": 1.1610949039459229, |
| "learning_rate": 0.0005, |
| "loss": 4.215843200683594, |
| "step": 1001 |
| }, |
| { |
| "epoch": 0.6091185410334347, |
| "grad_norm": 1.2171114683151245, |
| "learning_rate": 0.0005, |
| "loss": 4.104484558105469, |
| "step": 1002 |
| }, |
| { |
| "epoch": 0.609726443768997, |
| "grad_norm": 1.0760419368743896, |
| "learning_rate": 0.0005, |
| "loss": 4.106335639953613, |
| "step": 1003 |
| }, |
| { |
| "epoch": 0.6103343465045593, |
| "grad_norm": 1.0737935304641724, |
| "learning_rate": 0.0005, |
| "loss": 4.18284797668457, |
| "step": 1004 |
| }, |
| { |
| "epoch": 0.6109422492401215, |
| "grad_norm": 1.0054482221603394, |
| "learning_rate": 0.0005, |
| "loss": 4.185699462890625, |
| "step": 1005 |
| }, |
| { |
| "epoch": 0.6115501519756839, |
| "grad_norm": 1.0817815065383911, |
| "learning_rate": 0.0005, |
| "loss": 3.9077231884002686, |
| "step": 1006 |
| }, |
| { |
| "epoch": 0.6121580547112462, |
| "grad_norm": 1.1520154476165771, |
| "learning_rate": 0.0005, |
| "loss": 4.094099044799805, |
| "step": 1007 |
| }, |
| { |
| "epoch": 0.6127659574468085, |
| "grad_norm": 0.9844207763671875, |
| "learning_rate": 0.0005, |
| "loss": 4.341885566711426, |
| "step": 1008 |
| }, |
| { |
| "epoch": 0.6133738601823708, |
| "grad_norm": 1.2627776861190796, |
| "learning_rate": 0.0005, |
| "loss": 4.28475284576416, |
| "step": 1009 |
| }, |
| { |
| "epoch": 0.6139817629179332, |
| "grad_norm": 0.9542902112007141, |
| "learning_rate": 0.0005, |
| "loss": 4.2372026443481445, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.6145896656534955, |
| "grad_norm": 3.645486831665039, |
| "learning_rate": 0.0005, |
| "loss": 4.06125545501709, |
| "step": 1011 |
| }, |
| { |
| "epoch": 0.6151975683890577, |
| "grad_norm": 1.4817546606063843, |
| "learning_rate": 0.0005, |
| "loss": 3.9809517860412598, |
| "step": 1012 |
| }, |
| { |
| "epoch": 0.61580547112462, |
| "grad_norm": 1.1932374238967896, |
| "learning_rate": 0.0005, |
| "loss": 4.242306232452393, |
| "step": 1013 |
| }, |
| { |
| "epoch": 0.6164133738601824, |
| "grad_norm": 0.9499757289886475, |
| "learning_rate": 0.0005, |
| "loss": 3.9819726943969727, |
| "step": 1014 |
| }, |
| { |
| "epoch": 0.6170212765957447, |
| "grad_norm": 1.1981247663497925, |
| "learning_rate": 0.0005, |
| "loss": 4.266401290893555, |
| "step": 1015 |
| }, |
| { |
| "epoch": 0.617629179331307, |
| "grad_norm": 1.2060346603393555, |
| "learning_rate": 0.0005, |
| "loss": 4.270205497741699, |
| "step": 1016 |
| }, |
| { |
| "epoch": 0.6182370820668693, |
| "grad_norm": 1.002508282661438, |
| "learning_rate": 0.0005, |
| "loss": 4.509585380554199, |
| "step": 1017 |
| }, |
| { |
| "epoch": 0.6188449848024317, |
| "grad_norm": 1.0094107389450073, |
| "learning_rate": 0.0005, |
| "loss": 4.162940979003906, |
| "step": 1018 |
| }, |
| { |
| "epoch": 0.6194528875379939, |
| "grad_norm": 1.180220365524292, |
| "learning_rate": 0.0005, |
| "loss": 4.317109107971191, |
| "step": 1019 |
| }, |
| { |
| "epoch": 0.6200607902735562, |
| "grad_norm": 0.980454683303833, |
| "learning_rate": 0.0005, |
| "loss": 4.042284965515137, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.6206686930091185, |
| "grad_norm": 1.0461052656173706, |
| "learning_rate": 0.0005, |
| "loss": 4.0409698486328125, |
| "step": 1021 |
| }, |
| { |
| "epoch": 0.6212765957446809, |
| "grad_norm": 1.0268027782440186, |
| "learning_rate": 0.0005, |
| "loss": 4.10588264465332, |
| "step": 1022 |
| }, |
| { |
| "epoch": 0.6218844984802432, |
| "grad_norm": 0.9659956693649292, |
| "learning_rate": 0.0005, |
| "loss": 4.511264801025391, |
| "step": 1023 |
| }, |
| { |
| "epoch": 0.6224924012158055, |
| "grad_norm": 1.0161465406417847, |
| "learning_rate": 0.0005, |
| "loss": 4.369597911834717, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.6231003039513677, |
| "grad_norm": 1.145430326461792, |
| "learning_rate": 0.0005, |
| "loss": 4.104186058044434, |
| "step": 1025 |
| }, |
| { |
| "epoch": 0.6237082066869301, |
| "grad_norm": 0.968573808670044, |
| "learning_rate": 0.0005, |
| "loss": 4.03414249420166, |
| "step": 1026 |
| }, |
| { |
| "epoch": 0.6243161094224924, |
| "grad_norm": 1.2972266674041748, |
| "learning_rate": 0.0005, |
| "loss": 4.18367862701416, |
| "step": 1027 |
| }, |
| { |
| "epoch": 0.6249240121580547, |
| "grad_norm": 0.9075741171836853, |
| "learning_rate": 0.0005, |
| "loss": 4.101839065551758, |
| "step": 1028 |
| }, |
| { |
| "epoch": 0.625531914893617, |
| "grad_norm": 1.2480190992355347, |
| "learning_rate": 0.0005, |
| "loss": 4.170825004577637, |
| "step": 1029 |
| }, |
| { |
| "epoch": 0.6261398176291794, |
| "grad_norm": 1.1662267446517944, |
| "learning_rate": 0.0005, |
| "loss": 4.132046222686768, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.6267477203647417, |
| "grad_norm": 0.9081514477729797, |
| "learning_rate": 0.0005, |
| "loss": 4.023431777954102, |
| "step": 1031 |
| }, |
| { |
| "epoch": 0.6273556231003039, |
| "grad_norm": 1.1570264101028442, |
| "learning_rate": 0.0005, |
| "loss": 4.246901512145996, |
| "step": 1032 |
| }, |
| { |
| "epoch": 0.6279635258358662, |
| "grad_norm": 1.0261447429656982, |
| "learning_rate": 0.0005, |
| "loss": 4.251025199890137, |
| "step": 1033 |
| }, |
| { |
| "epoch": 0.6285714285714286, |
| "grad_norm": 0.9957416653633118, |
| "learning_rate": 0.0005, |
| "loss": 4.112504482269287, |
| "step": 1034 |
| }, |
| { |
| "epoch": 0.6291793313069909, |
| "grad_norm": 1.2634888887405396, |
| "learning_rate": 0.0005, |
| "loss": 4.397002220153809, |
| "step": 1035 |
| }, |
| { |
| "epoch": 0.6297872340425532, |
| "grad_norm": 1.0848995447158813, |
| "learning_rate": 0.0005, |
| "loss": 4.163301467895508, |
| "step": 1036 |
| }, |
| { |
| "epoch": 0.6303951367781155, |
| "grad_norm": 1.0806390047073364, |
| "learning_rate": 0.0005, |
| "loss": 3.78402042388916, |
| "step": 1037 |
| }, |
| { |
| "epoch": 0.6310030395136779, |
| "grad_norm": 1.0640003681182861, |
| "learning_rate": 0.0005, |
| "loss": 4.0556440353393555, |
| "step": 1038 |
| }, |
| { |
| "epoch": 0.6316109422492401, |
| "grad_norm": 0.9620634317398071, |
| "learning_rate": 0.0005, |
| "loss": 4.19709587097168, |
| "step": 1039 |
| }, |
| { |
| "epoch": 0.6322188449848024, |
| "grad_norm": 1.4484918117523193, |
| "learning_rate": 0.0005, |
| "loss": 4.058507442474365, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.6328267477203647, |
| "grad_norm": 1.219489574432373, |
| "learning_rate": 0.0005, |
| "loss": 4.1419267654418945, |
| "step": 1041 |
| }, |
| { |
| "epoch": 0.6334346504559271, |
| "grad_norm": 1.127636194229126, |
| "learning_rate": 0.0005, |
| "loss": 3.892014980316162, |
| "step": 1042 |
| }, |
| { |
| "epoch": 0.6340425531914894, |
| "grad_norm": 1.326476812362671, |
| "learning_rate": 0.0005, |
| "loss": 4.128079414367676, |
| "step": 1043 |
| }, |
| { |
| "epoch": 0.6346504559270517, |
| "grad_norm": 1.1010375022888184, |
| "learning_rate": 0.0005, |
| "loss": 3.898940086364746, |
| "step": 1044 |
| }, |
| { |
| "epoch": 0.6352583586626139, |
| "grad_norm": 1.1064268350601196, |
| "learning_rate": 0.0005, |
| "loss": 4.141763687133789, |
| "step": 1045 |
| }, |
| { |
| "epoch": 0.6358662613981763, |
| "grad_norm": 1.24687659740448, |
| "learning_rate": 0.0005, |
| "loss": 4.210533618927002, |
| "step": 1046 |
| }, |
| { |
| "epoch": 0.6364741641337386, |
| "grad_norm": 1.0071916580200195, |
| "learning_rate": 0.0005, |
| "loss": 4.255558013916016, |
| "step": 1047 |
| }, |
| { |
| "epoch": 0.6370820668693009, |
| "grad_norm": 1.0620638132095337, |
| "learning_rate": 0.0005, |
| "loss": 4.008969306945801, |
| "step": 1048 |
| }, |
| { |
| "epoch": 0.6376899696048632, |
| "grad_norm": 1.0604190826416016, |
| "learning_rate": 0.0005, |
| "loss": 4.224608421325684, |
| "step": 1049 |
| }, |
| { |
| "epoch": 0.6382978723404256, |
| "grad_norm": 1.032774567604065, |
| "learning_rate": 0.0005, |
| "loss": 4.131565093994141, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.6389057750759879, |
| "grad_norm": 0.9236063361167908, |
| "learning_rate": 0.0005, |
| "loss": 4.309024333953857, |
| "step": 1051 |
| }, |
| { |
| "epoch": 0.6395136778115501, |
| "grad_norm": 1.059757947921753, |
| "learning_rate": 0.0005, |
| "loss": 4.041001319885254, |
| "step": 1052 |
| }, |
| { |
| "epoch": 0.6401215805471124, |
| "grad_norm": 1.1099759340286255, |
| "learning_rate": 0.0005, |
| "loss": 3.9661004543304443, |
| "step": 1053 |
| }, |
| { |
| "epoch": 0.6407294832826748, |
| "grad_norm": 1.0091055631637573, |
| "learning_rate": 0.0005, |
| "loss": 3.987016439437866, |
| "step": 1054 |
| }, |
| { |
| "epoch": 0.6413373860182371, |
| "grad_norm": 1.1090649366378784, |
| "learning_rate": 0.0005, |
| "loss": 4.068497657775879, |
| "step": 1055 |
| }, |
| { |
| "epoch": 0.6419452887537994, |
| "grad_norm": 1.0738252401351929, |
| "learning_rate": 0.0005, |
| "loss": 3.9846339225769043, |
| "step": 1056 |
| }, |
| { |
| "epoch": 0.6425531914893617, |
| "grad_norm": 1.1196277141571045, |
| "learning_rate": 0.0005, |
| "loss": 4.063312530517578, |
| "step": 1057 |
| }, |
| { |
| "epoch": 0.643161094224924, |
| "grad_norm": 1.2615549564361572, |
| "learning_rate": 0.0005, |
| "loss": 3.9986069202423096, |
| "step": 1058 |
| }, |
| { |
| "epoch": 0.6437689969604863, |
| "grad_norm": 1.49628746509552, |
| "learning_rate": 0.0005, |
| "loss": 4.1674394607543945, |
| "step": 1059 |
| }, |
| { |
| "epoch": 0.6443768996960486, |
| "grad_norm": 1.279189109802246, |
| "learning_rate": 0.0005, |
| "loss": 3.8027124404907227, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.6449848024316109, |
| "grad_norm": 1.1228110790252686, |
| "learning_rate": 0.0005, |
| "loss": 3.7935433387756348, |
| "step": 1061 |
| }, |
| { |
| "epoch": 0.6455927051671733, |
| "grad_norm": 1.082332730293274, |
| "learning_rate": 0.0005, |
| "loss": 4.039803981781006, |
| "step": 1062 |
| }, |
| { |
| "epoch": 0.6462006079027356, |
| "grad_norm": 0.9758466482162476, |
| "learning_rate": 0.0005, |
| "loss": 4.102064609527588, |
| "step": 1063 |
| }, |
| { |
| "epoch": 0.6468085106382979, |
| "grad_norm": 1.0097397565841675, |
| "learning_rate": 0.0005, |
| "loss": 4.058742523193359, |
| "step": 1064 |
| }, |
| { |
| "epoch": 0.6474164133738601, |
| "grad_norm": 1.0726414918899536, |
| "learning_rate": 0.0005, |
| "loss": 4.0242133140563965, |
| "step": 1065 |
| }, |
| { |
| "epoch": 0.6480243161094225, |
| "grad_norm": 1.107040524482727, |
| "learning_rate": 0.0005, |
| "loss": 3.9720733165740967, |
| "step": 1066 |
| }, |
| { |
| "epoch": 0.6486322188449848, |
| "grad_norm": 1.258399248123169, |
| "learning_rate": 0.0005, |
| "loss": 3.85103178024292, |
| "step": 1067 |
| }, |
| { |
| "epoch": 0.6492401215805471, |
| "grad_norm": 1.215524435043335, |
| "learning_rate": 0.0005, |
| "loss": 4.162143707275391, |
| "step": 1068 |
| }, |
| { |
| "epoch": 0.6498480243161094, |
| "grad_norm": 1.0505629777908325, |
| "learning_rate": 0.0005, |
| "loss": 4.23874568939209, |
| "step": 1069 |
| }, |
| { |
| "epoch": 0.6504559270516718, |
| "grad_norm": 1.2580337524414062, |
| "learning_rate": 0.0005, |
| "loss": 4.126619338989258, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.6510638297872341, |
| "grad_norm": 1.1980527639389038, |
| "learning_rate": 0.0005, |
| "loss": 4.011953353881836, |
| "step": 1071 |
| }, |
| { |
| "epoch": 0.6516717325227963, |
| "grad_norm": 1.020224690437317, |
| "learning_rate": 0.0005, |
| "loss": 4.2201948165893555, |
| "step": 1072 |
| }, |
| { |
| "epoch": 0.6522796352583586, |
| "grad_norm": 1.0695855617523193, |
| "learning_rate": 0.0005, |
| "loss": 4.277288436889648, |
| "step": 1073 |
| }, |
| { |
| "epoch": 0.652887537993921, |
| "grad_norm": 1.1862881183624268, |
| "learning_rate": 0.0005, |
| "loss": 4.1104841232299805, |
| "step": 1074 |
| }, |
| { |
| "epoch": 0.6534954407294833, |
| "grad_norm": 1.7002424001693726, |
| "learning_rate": 0.0005, |
| "loss": 4.274345874786377, |
| "step": 1075 |
| }, |
| { |
| "epoch": 0.6541033434650456, |
| "grad_norm": 1.3632254600524902, |
| "learning_rate": 0.0005, |
| "loss": 4.318878173828125, |
| "step": 1076 |
| }, |
| { |
| "epoch": 0.6547112462006079, |
| "grad_norm": 1.1510448455810547, |
| "learning_rate": 0.0005, |
| "loss": 4.182323455810547, |
| "step": 1077 |
| }, |
| { |
| "epoch": 0.6553191489361702, |
| "grad_norm": 1.143638014793396, |
| "learning_rate": 0.0005, |
| "loss": 4.174741744995117, |
| "step": 1078 |
| }, |
| { |
| "epoch": 0.6559270516717325, |
| "grad_norm": 1.1500475406646729, |
| "learning_rate": 0.0005, |
| "loss": 3.9260623455047607, |
| "step": 1079 |
| }, |
| { |
| "epoch": 0.6565349544072948, |
| "grad_norm": 1.293712854385376, |
| "learning_rate": 0.0005, |
| "loss": 4.087700843811035, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.6571428571428571, |
| "grad_norm": 1.3932772874832153, |
| "learning_rate": 0.0005, |
| "loss": 4.118124961853027, |
| "step": 1081 |
| }, |
| { |
| "epoch": 0.6577507598784195, |
| "grad_norm": 1.094328761100769, |
| "learning_rate": 0.0005, |
| "loss": 4.175318241119385, |
| "step": 1082 |
| }, |
| { |
| "epoch": 0.6583586626139818, |
| "grad_norm": 1.467499017715454, |
| "learning_rate": 0.0005, |
| "loss": 4.272140979766846, |
| "step": 1083 |
| }, |
| { |
| "epoch": 0.6589665653495441, |
| "grad_norm": 1.1503561735153198, |
| "learning_rate": 0.0005, |
| "loss": 4.183167934417725, |
| "step": 1084 |
| }, |
| { |
| "epoch": 0.6595744680851063, |
| "grad_norm": 1.1912407875061035, |
| "learning_rate": 0.0005, |
| "loss": 4.055290222167969, |
| "step": 1085 |
| }, |
| { |
| "epoch": 0.6601823708206687, |
| "grad_norm": 1.1428508758544922, |
| "learning_rate": 0.0005, |
| "loss": 4.183894157409668, |
| "step": 1086 |
| }, |
| { |
| "epoch": 0.660790273556231, |
| "grad_norm": 1.136474609375, |
| "learning_rate": 0.0005, |
| "loss": 3.86468768119812, |
| "step": 1087 |
| }, |
| { |
| "epoch": 0.6613981762917933, |
| "grad_norm": 1.0048547983169556, |
| "learning_rate": 0.0005, |
| "loss": 4.054813385009766, |
| "step": 1088 |
| }, |
| { |
| "epoch": 0.6620060790273556, |
| "grad_norm": 1.021672010421753, |
| "learning_rate": 0.0005, |
| "loss": 3.9937808513641357, |
| "step": 1089 |
| }, |
| { |
| "epoch": 0.662613981762918, |
| "grad_norm": 1.184766173362732, |
| "learning_rate": 0.0005, |
| "loss": 4.007226943969727, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.6632218844984803, |
| "grad_norm": 1.1701700687408447, |
| "learning_rate": 0.0005, |
| "loss": 3.880901336669922, |
| "step": 1091 |
| }, |
| { |
| "epoch": 0.6638297872340425, |
| "grad_norm": 1.0928300619125366, |
| "learning_rate": 0.0005, |
| "loss": 4.0920090675354, |
| "step": 1092 |
| }, |
| { |
| "epoch": 0.6644376899696048, |
| "grad_norm": 1.0498013496398926, |
| "learning_rate": 0.0005, |
| "loss": 4.117219924926758, |
| "step": 1093 |
| }, |
| { |
| "epoch": 0.6650455927051672, |
| "grad_norm": 1.034084439277649, |
| "learning_rate": 0.0005, |
| "loss": 4.0926313400268555, |
| "step": 1094 |
| }, |
| { |
| "epoch": 0.6656534954407295, |
| "grad_norm": 0.939494788646698, |
| "learning_rate": 0.0005, |
| "loss": 4.018050670623779, |
| "step": 1095 |
| }, |
| { |
| "epoch": 0.6662613981762918, |
| "grad_norm": 1.2339518070220947, |
| "learning_rate": 0.0005, |
| "loss": 3.9285688400268555, |
| "step": 1096 |
| }, |
| { |
| "epoch": 0.6668693009118541, |
| "grad_norm": 1.1236822605133057, |
| "learning_rate": 0.0005, |
| "loss": 3.9050168991088867, |
| "step": 1097 |
| }, |
| { |
| "epoch": 0.6674772036474164, |
| "grad_norm": 0.9875328540802002, |
| "learning_rate": 0.0005, |
| "loss": 4.033220291137695, |
| "step": 1098 |
| }, |
| { |
| "epoch": 0.6680851063829787, |
| "grad_norm": 0.9468657374382019, |
| "learning_rate": 0.0005, |
| "loss": 4.108023643493652, |
| "step": 1099 |
| }, |
| { |
| "epoch": 0.668693009118541, |
| "grad_norm": 1.0056613683700562, |
| "learning_rate": 0.0005, |
| "loss": 4.217726707458496, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.6693009118541033, |
| "grad_norm": 1.1911637783050537, |
| "learning_rate": 0.0005, |
| "loss": 3.9922735691070557, |
| "step": 1101 |
| }, |
| { |
| "epoch": 0.6699088145896657, |
| "grad_norm": 0.9524610638618469, |
| "learning_rate": 0.0005, |
| "loss": 3.843928337097168, |
| "step": 1102 |
| }, |
| { |
| "epoch": 0.670516717325228, |
| "grad_norm": 1.1759804487228394, |
| "learning_rate": 0.0005, |
| "loss": 3.8452773094177246, |
| "step": 1103 |
| }, |
| { |
| "epoch": 0.6711246200607903, |
| "grad_norm": 1.1534795761108398, |
| "learning_rate": 0.0005, |
| "loss": 4.147238731384277, |
| "step": 1104 |
| }, |
| { |
| "epoch": 0.6717325227963525, |
| "grad_norm": 1.0438340902328491, |
| "learning_rate": 0.0005, |
| "loss": 3.814009666442871, |
| "step": 1105 |
| }, |
| { |
| "epoch": 0.6723404255319149, |
| "grad_norm": 1.4943510293960571, |
| "learning_rate": 0.0005, |
| "loss": 4.062148571014404, |
| "step": 1106 |
| }, |
| { |
| "epoch": 0.6729483282674772, |
| "grad_norm": 0.9739040732383728, |
| "learning_rate": 0.0005, |
| "loss": 4.066575050354004, |
| "step": 1107 |
| }, |
| { |
| "epoch": 0.6735562310030395, |
| "grad_norm": 1.0727957487106323, |
| "learning_rate": 0.0005, |
| "loss": 3.935608386993408, |
| "step": 1108 |
| }, |
| { |
| "epoch": 0.6741641337386018, |
| "grad_norm": 1.480692744255066, |
| "learning_rate": 0.0005, |
| "loss": 4.12183952331543, |
| "step": 1109 |
| }, |
| { |
| "epoch": 0.6747720364741642, |
| "grad_norm": 1.1042070388793945, |
| "learning_rate": 0.0005, |
| "loss": 3.8309693336486816, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.6753799392097265, |
| "grad_norm": 1.5949453115463257, |
| "learning_rate": 0.0005, |
| "loss": 4.225711822509766, |
| "step": 1111 |
| }, |
| { |
| "epoch": 0.6759878419452887, |
| "grad_norm": 1.1404409408569336, |
| "learning_rate": 0.0005, |
| "loss": 3.870262384414673, |
| "step": 1112 |
| }, |
| { |
| "epoch": 0.676595744680851, |
| "grad_norm": 1.1272308826446533, |
| "learning_rate": 0.0005, |
| "loss": 4.375516891479492, |
| "step": 1113 |
| }, |
| { |
| "epoch": 0.6772036474164134, |
| "grad_norm": 1.3391433954238892, |
| "learning_rate": 0.0005, |
| "loss": 3.9125869274139404, |
| "step": 1114 |
| }, |
| { |
| "epoch": 0.6778115501519757, |
| "grad_norm": 0.9406550526618958, |
| "learning_rate": 0.0005, |
| "loss": 4.000041961669922, |
| "step": 1115 |
| }, |
| { |
| "epoch": 0.678419452887538, |
| "grad_norm": 1.211789846420288, |
| "learning_rate": 0.0005, |
| "loss": 4.146924018859863, |
| "step": 1116 |
| }, |
| { |
| "epoch": 0.6790273556231003, |
| "grad_norm": 1.0479586124420166, |
| "learning_rate": 0.0005, |
| "loss": 3.77646803855896, |
| "step": 1117 |
| }, |
| { |
| "epoch": 0.6796352583586626, |
| "grad_norm": 1.0069152116775513, |
| "learning_rate": 0.0005, |
| "loss": 4.110267162322998, |
| "step": 1118 |
| }, |
| { |
| "epoch": 0.6802431610942249, |
| "grad_norm": 1.2088702917099, |
| "learning_rate": 0.0005, |
| "loss": 4.083201885223389, |
| "step": 1119 |
| }, |
| { |
| "epoch": 0.6808510638297872, |
| "grad_norm": 1.3016067743301392, |
| "learning_rate": 0.0005, |
| "loss": 4.1130218505859375, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.6814589665653495, |
| "grad_norm": 1.0395400524139404, |
| "learning_rate": 0.0005, |
| "loss": 4.012112617492676, |
| "step": 1121 |
| }, |
| { |
| "epoch": 0.6820668693009119, |
| "grad_norm": 1.1534603834152222, |
| "learning_rate": 0.0005, |
| "loss": 3.8767285346984863, |
| "step": 1122 |
| }, |
| { |
| "epoch": 0.6826747720364742, |
| "grad_norm": 1.1331707239151, |
| "learning_rate": 0.0005, |
| "loss": 3.8466670513153076, |
| "step": 1123 |
| }, |
| { |
| "epoch": 0.6832826747720365, |
| "grad_norm": 1.0023419857025146, |
| "learning_rate": 0.0005, |
| "loss": 3.978550910949707, |
| "step": 1124 |
| }, |
| { |
| "epoch": 0.6838905775075987, |
| "grad_norm": 1.198326826095581, |
| "learning_rate": 0.0005, |
| "loss": 4.160974502563477, |
| "step": 1125 |
| }, |
| { |
| "epoch": 0.6844984802431611, |
| "grad_norm": 1.0249745845794678, |
| "learning_rate": 0.0005, |
| "loss": 3.961395740509033, |
| "step": 1126 |
| }, |
| { |
| "epoch": 0.6851063829787234, |
| "grad_norm": 1.2853235006332397, |
| "learning_rate": 0.0005, |
| "loss": 4.322844505310059, |
| "step": 1127 |
| }, |
| { |
| "epoch": 0.6857142857142857, |
| "grad_norm": 0.9774798154830933, |
| "learning_rate": 0.0005, |
| "loss": 4.034377098083496, |
| "step": 1128 |
| }, |
| { |
| "epoch": 0.686322188449848, |
| "grad_norm": 1.1903027296066284, |
| "learning_rate": 0.0005, |
| "loss": 3.896298408508301, |
| "step": 1129 |
| }, |
| { |
| "epoch": 0.6869300911854104, |
| "grad_norm": 0.9409128427505493, |
| "learning_rate": 0.0005, |
| "loss": 3.967303514480591, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.6875379939209726, |
| "grad_norm": 1.0214065313339233, |
| "learning_rate": 0.0005, |
| "loss": 3.916367769241333, |
| "step": 1131 |
| }, |
| { |
| "epoch": 0.6881458966565349, |
| "grad_norm": 1.3258956670761108, |
| "learning_rate": 0.0005, |
| "loss": 3.859543800354004, |
| "step": 1132 |
| }, |
| { |
| "epoch": 0.6887537993920972, |
| "grad_norm": 1.0668888092041016, |
| "learning_rate": 0.0005, |
| "loss": 3.929979085922241, |
| "step": 1133 |
| }, |
| { |
| "epoch": 0.6893617021276596, |
| "grad_norm": 1.0921815633773804, |
| "learning_rate": 0.0005, |
| "loss": 3.942767381668091, |
| "step": 1134 |
| }, |
| { |
| "epoch": 0.6899696048632219, |
| "grad_norm": 1.1683087348937988, |
| "learning_rate": 0.0005, |
| "loss": 4.096218109130859, |
| "step": 1135 |
| }, |
| { |
| "epoch": 0.6905775075987842, |
| "grad_norm": 1.150611162185669, |
| "learning_rate": 0.0005, |
| "loss": 4.065778732299805, |
| "step": 1136 |
| }, |
| { |
| "epoch": 0.6911854103343466, |
| "grad_norm": 0.9955292344093323, |
| "learning_rate": 0.0005, |
| "loss": 3.83855938911438, |
| "step": 1137 |
| }, |
| { |
| "epoch": 0.6917933130699088, |
| "grad_norm": 1.1191688776016235, |
| "learning_rate": 0.0005, |
| "loss": 4.189568519592285, |
| "step": 1138 |
| }, |
| { |
| "epoch": 0.6924012158054711, |
| "grad_norm": 1.1021112203598022, |
| "learning_rate": 0.0005, |
| "loss": 4.004612445831299, |
| "step": 1139 |
| }, |
| { |
| "epoch": 0.6930091185410334, |
| "grad_norm": 1.2468072175979614, |
| "learning_rate": 0.0005, |
| "loss": 3.867835283279419, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.6936170212765957, |
| "grad_norm": 0.9965139627456665, |
| "learning_rate": 0.0005, |
| "loss": 3.8393120765686035, |
| "step": 1141 |
| }, |
| { |
| "epoch": 0.6942249240121581, |
| "grad_norm": 1.2608331441879272, |
| "learning_rate": 0.0005, |
| "loss": 4.122796535491943, |
| "step": 1142 |
| }, |
| { |
| "epoch": 0.6948328267477204, |
| "grad_norm": 0.9645028710365295, |
| "learning_rate": 0.0005, |
| "loss": 4.193379878997803, |
| "step": 1143 |
| }, |
| { |
| "epoch": 0.6954407294832827, |
| "grad_norm": 1.103003978729248, |
| "learning_rate": 0.0005, |
| "loss": 3.80690860748291, |
| "step": 1144 |
| }, |
| { |
| "epoch": 0.6960486322188449, |
| "grad_norm": 0.9812702536582947, |
| "learning_rate": 0.0005, |
| "loss": 3.910191059112549, |
| "step": 1145 |
| }, |
| { |
| "epoch": 0.6966565349544073, |
| "grad_norm": 1.1629973649978638, |
| "learning_rate": 0.0005, |
| "loss": 3.912919282913208, |
| "step": 1146 |
| }, |
| { |
| "epoch": 0.6972644376899696, |
| "grad_norm": 0.9559318423271179, |
| "learning_rate": 0.0005, |
| "loss": 4.1403703689575195, |
| "step": 1147 |
| }, |
| { |
| "epoch": 0.6978723404255319, |
| "grad_norm": 1.187225103378296, |
| "learning_rate": 0.0005, |
| "loss": 4.227229595184326, |
| "step": 1148 |
| }, |
| { |
| "epoch": 0.6984802431610942, |
| "grad_norm": 1.0893582105636597, |
| "learning_rate": 0.0005, |
| "loss": 4.085037708282471, |
| "step": 1149 |
| }, |
| { |
| "epoch": 0.6990881458966566, |
| "grad_norm": 1.207614541053772, |
| "learning_rate": 0.0005, |
| "loss": 4.065666198730469, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.6996960486322188, |
| "grad_norm": 1.1726024150848389, |
| "learning_rate": 0.0005, |
| "loss": 4.011224269866943, |
| "step": 1151 |
| }, |
| { |
| "epoch": 0.7003039513677811, |
| "grad_norm": 1.0657603740692139, |
| "learning_rate": 0.0005, |
| "loss": 3.7515785694122314, |
| "step": 1152 |
| }, |
| { |
| "epoch": 0.7009118541033434, |
| "grad_norm": 1.069787859916687, |
| "learning_rate": 0.0005, |
| "loss": 4.024471759796143, |
| "step": 1153 |
| }, |
| { |
| "epoch": 0.7015197568389058, |
| "grad_norm": 1.0333293676376343, |
| "learning_rate": 0.0005, |
| "loss": 3.9765753746032715, |
| "step": 1154 |
| }, |
| { |
| "epoch": 0.7021276595744681, |
| "grad_norm": 1.3091932535171509, |
| "learning_rate": 0.0005, |
| "loss": 4.086296081542969, |
| "step": 1155 |
| }, |
| { |
| "epoch": 0.7027355623100304, |
| "grad_norm": 0.96831214427948, |
| "learning_rate": 0.0005, |
| "loss": 4.159407615661621, |
| "step": 1156 |
| }, |
| { |
| "epoch": 0.7033434650455928, |
| "grad_norm": 1.0307363271713257, |
| "learning_rate": 0.0005, |
| "loss": 4.085778713226318, |
| "step": 1157 |
| }, |
| { |
| "epoch": 0.703951367781155, |
| "grad_norm": 1.2046213150024414, |
| "learning_rate": 0.0005, |
| "loss": 4.149312973022461, |
| "step": 1158 |
| }, |
| { |
| "epoch": 0.7045592705167173, |
| "grad_norm": 1.027969241142273, |
| "learning_rate": 0.0005, |
| "loss": 4.021113872528076, |
| "step": 1159 |
| }, |
| { |
| "epoch": 0.7051671732522796, |
| "grad_norm": 0.886216938495636, |
| "learning_rate": 0.0005, |
| "loss": 4.1072492599487305, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.705775075987842, |
| "grad_norm": 1.2814362049102783, |
| "learning_rate": 0.0005, |
| "loss": 4.351136207580566, |
| "step": 1161 |
| }, |
| { |
| "epoch": 0.7063829787234043, |
| "grad_norm": 1.195614218711853, |
| "learning_rate": 0.0005, |
| "loss": 3.859333038330078, |
| "step": 1162 |
| }, |
| { |
| "epoch": 0.7069908814589666, |
| "grad_norm": 1.02545964717865, |
| "learning_rate": 0.0005, |
| "loss": 3.993720531463623, |
| "step": 1163 |
| }, |
| { |
| "epoch": 0.7075987841945289, |
| "grad_norm": 1.1973057985305786, |
| "learning_rate": 0.0005, |
| "loss": 4.022034168243408, |
| "step": 1164 |
| }, |
| { |
| "epoch": 0.7082066869300911, |
| "grad_norm": 1.047211766242981, |
| "learning_rate": 0.0005, |
| "loss": 4.110629081726074, |
| "step": 1165 |
| }, |
| { |
| "epoch": 0.7088145896656535, |
| "grad_norm": 0.9065303206443787, |
| "learning_rate": 0.0005, |
| "loss": 4.266797065734863, |
| "step": 1166 |
| }, |
| { |
| "epoch": 0.7094224924012158, |
| "grad_norm": 1.0121465921401978, |
| "learning_rate": 0.0005, |
| "loss": 3.7510571479797363, |
| "step": 1167 |
| }, |
| { |
| "epoch": 0.7100303951367781, |
| "grad_norm": 1.2128599882125854, |
| "learning_rate": 0.0005, |
| "loss": 4.190847396850586, |
| "step": 1168 |
| }, |
| { |
| "epoch": 0.7106382978723405, |
| "grad_norm": 1.4867533445358276, |
| "learning_rate": 0.0005, |
| "loss": 4.125823497772217, |
| "step": 1169 |
| }, |
| { |
| "epoch": 0.7112462006079028, |
| "grad_norm": 0.9088724851608276, |
| "learning_rate": 0.0005, |
| "loss": 4.092848777770996, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.711854103343465, |
| "grad_norm": 0.980387270450592, |
| "learning_rate": 0.0005, |
| "loss": 3.963526725769043, |
| "step": 1171 |
| }, |
| { |
| "epoch": 0.7124620060790273, |
| "grad_norm": 0.9671593308448792, |
| "learning_rate": 0.0005, |
| "loss": 3.886415958404541, |
| "step": 1172 |
| }, |
| { |
| "epoch": 0.7130699088145896, |
| "grad_norm": 0.8448948860168457, |
| "learning_rate": 0.0005, |
| "loss": 3.960893154144287, |
| "step": 1173 |
| }, |
| { |
| "epoch": 0.713677811550152, |
| "grad_norm": 1.0654000043869019, |
| "learning_rate": 0.0005, |
| "loss": 3.9057178497314453, |
| "step": 1174 |
| }, |
| { |
| "epoch": 0.7142857142857143, |
| "grad_norm": 1.255560040473938, |
| "learning_rate": 0.0005, |
| "loss": 4.085208892822266, |
| "step": 1175 |
| }, |
| { |
| "epoch": 0.7148936170212766, |
| "grad_norm": 1.172607183456421, |
| "learning_rate": 0.0005, |
| "loss": 3.8918302059173584, |
| "step": 1176 |
| }, |
| { |
| "epoch": 0.715501519756839, |
| "grad_norm": 1.1429939270019531, |
| "learning_rate": 0.0005, |
| "loss": 3.8840150833129883, |
| "step": 1177 |
| }, |
| { |
| "epoch": 0.7161094224924012, |
| "grad_norm": 1.0610404014587402, |
| "learning_rate": 0.0005, |
| "loss": 4.0701003074646, |
| "step": 1178 |
| }, |
| { |
| "epoch": 0.7167173252279635, |
| "grad_norm": 1.0055387020111084, |
| "learning_rate": 0.0005, |
| "loss": 3.773474931716919, |
| "step": 1179 |
| }, |
| { |
| "epoch": 0.7173252279635258, |
| "grad_norm": 1.0536381006240845, |
| "learning_rate": 0.0005, |
| "loss": 3.885234832763672, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.7179331306990882, |
| "grad_norm": 1.2304924726486206, |
| "learning_rate": 0.0005, |
| "loss": 4.134721755981445, |
| "step": 1181 |
| }, |
| { |
| "epoch": 0.7185410334346505, |
| "grad_norm": 1.1367759704589844, |
| "learning_rate": 0.0005, |
| "loss": 4.07640266418457, |
| "step": 1182 |
| }, |
| { |
| "epoch": 0.7191489361702128, |
| "grad_norm": 0.9987047910690308, |
| "learning_rate": 0.0005, |
| "loss": 3.941793918609619, |
| "step": 1183 |
| }, |
| { |
| "epoch": 0.7197568389057751, |
| "grad_norm": 0.9390357136726379, |
| "learning_rate": 0.0005, |
| "loss": 4.057319641113281, |
| "step": 1184 |
| }, |
| { |
| "epoch": 0.7203647416413373, |
| "grad_norm": 1.3009685277938843, |
| "learning_rate": 0.0005, |
| "loss": 3.7487921714782715, |
| "step": 1185 |
| }, |
| { |
| "epoch": 0.7209726443768997, |
| "grad_norm": 1.0107924938201904, |
| "learning_rate": 0.0005, |
| "loss": 3.9321677684783936, |
| "step": 1186 |
| }, |
| { |
| "epoch": 0.721580547112462, |
| "grad_norm": 1.003091812133789, |
| "learning_rate": 0.0005, |
| "loss": 3.855192184448242, |
| "step": 1187 |
| }, |
| { |
| "epoch": 0.7221884498480243, |
| "grad_norm": 1.1665643453598022, |
| "learning_rate": 0.0005, |
| "loss": 4.062481880187988, |
| "step": 1188 |
| }, |
| { |
| "epoch": 0.7227963525835867, |
| "grad_norm": 1.0481219291687012, |
| "learning_rate": 0.0005, |
| "loss": 3.7130908966064453, |
| "step": 1189 |
| }, |
| { |
| "epoch": 0.723404255319149, |
| "grad_norm": 1.4968420267105103, |
| "learning_rate": 0.0005, |
| "loss": 4.054961681365967, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.7240121580547112, |
| "grad_norm": 1.0543270111083984, |
| "learning_rate": 0.0005, |
| "loss": 4.080737113952637, |
| "step": 1191 |
| }, |
| { |
| "epoch": 0.7246200607902735, |
| "grad_norm": 1.3208811283111572, |
| "learning_rate": 0.0005, |
| "loss": 3.828869581222534, |
| "step": 1192 |
| }, |
| { |
| "epoch": 0.7252279635258359, |
| "grad_norm": 1.1503605842590332, |
| "learning_rate": 0.0005, |
| "loss": 3.897340774536133, |
| "step": 1193 |
| }, |
| { |
| "epoch": 0.7258358662613982, |
| "grad_norm": 0.9485260844230652, |
| "learning_rate": 0.0005, |
| "loss": 4.022025108337402, |
| "step": 1194 |
| }, |
| { |
| "epoch": 0.7264437689969605, |
| "grad_norm": 1.0768346786499023, |
| "learning_rate": 0.0005, |
| "loss": 4.132440567016602, |
| "step": 1195 |
| }, |
| { |
| "epoch": 0.7270516717325228, |
| "grad_norm": 1.0768530368804932, |
| "learning_rate": 0.0005, |
| "loss": 4.167514801025391, |
| "step": 1196 |
| }, |
| { |
| "epoch": 0.7276595744680852, |
| "grad_norm": 1.1659386157989502, |
| "learning_rate": 0.0005, |
| "loss": 3.9605331420898438, |
| "step": 1197 |
| }, |
| { |
| "epoch": 0.7282674772036474, |
| "grad_norm": 0.9825963377952576, |
| "learning_rate": 0.0005, |
| "loss": 3.9677042961120605, |
| "step": 1198 |
| }, |
| { |
| "epoch": 0.7288753799392097, |
| "grad_norm": 1.200975775718689, |
| "learning_rate": 0.0005, |
| "loss": 4.059627056121826, |
| "step": 1199 |
| }, |
| { |
| "epoch": 0.729483282674772, |
| "grad_norm": 1.0287483930587769, |
| "learning_rate": 0.0005, |
| "loss": 3.9571118354797363, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.7300911854103344, |
| "grad_norm": 1.171775221824646, |
| "learning_rate": 0.0005, |
| "loss": 4.2555084228515625, |
| "step": 1201 |
| }, |
| { |
| "epoch": 0.7306990881458967, |
| "grad_norm": 1.2075831890106201, |
| "learning_rate": 0.0005, |
| "loss": 3.9444262981414795, |
| "step": 1202 |
| }, |
| { |
| "epoch": 0.731306990881459, |
| "grad_norm": 1.1258975267410278, |
| "learning_rate": 0.0005, |
| "loss": 4.037412643432617, |
| "step": 1203 |
| }, |
| { |
| "epoch": 0.7319148936170212, |
| "grad_norm": 1.107055902481079, |
| "learning_rate": 0.0005, |
| "loss": 4.046412467956543, |
| "step": 1204 |
| }, |
| { |
| "epoch": 0.7325227963525835, |
| "grad_norm": 1.1721580028533936, |
| "learning_rate": 0.0005, |
| "loss": 4.039186477661133, |
| "step": 1205 |
| }, |
| { |
| "epoch": 0.7331306990881459, |
| "grad_norm": 1.8083940744400024, |
| "learning_rate": 0.0005, |
| "loss": 4.255344390869141, |
| "step": 1206 |
| }, |
| { |
| "epoch": 0.7337386018237082, |
| "grad_norm": 1.1505194902420044, |
| "learning_rate": 0.0005, |
| "loss": 3.849947452545166, |
| "step": 1207 |
| }, |
| { |
| "epoch": 0.7343465045592705, |
| "grad_norm": 1.0368176698684692, |
| "learning_rate": 0.0005, |
| "loss": 4.037250518798828, |
| "step": 1208 |
| }, |
| { |
| "epoch": 0.7349544072948329, |
| "grad_norm": 1.076282262802124, |
| "learning_rate": 0.0005, |
| "loss": 3.8192124366760254, |
| "step": 1209 |
| }, |
| { |
| "epoch": 0.7355623100303952, |
| "grad_norm": 0.9457529187202454, |
| "learning_rate": 0.0005, |
| "loss": 3.988126516342163, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.7361702127659574, |
| "grad_norm": 1.396436333656311, |
| "learning_rate": 0.0005, |
| "loss": 3.8735647201538086, |
| "step": 1211 |
| }, |
| { |
| "epoch": 0.7367781155015197, |
| "grad_norm": 1.1978737115859985, |
| "learning_rate": 0.0005, |
| "loss": 3.7308835983276367, |
| "step": 1212 |
| }, |
| { |
| "epoch": 0.737386018237082, |
| "grad_norm": 1.2270631790161133, |
| "learning_rate": 0.0005, |
| "loss": 3.7548632621765137, |
| "step": 1213 |
| }, |
| { |
| "epoch": 0.7379939209726444, |
| "grad_norm": 1.0319976806640625, |
| "learning_rate": 0.0005, |
| "loss": 4.095251083374023, |
| "step": 1214 |
| }, |
| { |
| "epoch": 0.7386018237082067, |
| "grad_norm": 1.2742465734481812, |
| "learning_rate": 0.0005, |
| "loss": 3.7844438552856445, |
| "step": 1215 |
| }, |
| { |
| "epoch": 0.739209726443769, |
| "grad_norm": 0.9936171174049377, |
| "learning_rate": 0.0005, |
| "loss": 3.6828408241271973, |
| "step": 1216 |
| }, |
| { |
| "epoch": 0.7398176291793314, |
| "grad_norm": 1.0827305316925049, |
| "learning_rate": 0.0005, |
| "loss": 4.2918291091918945, |
| "step": 1217 |
| }, |
| { |
| "epoch": 0.7404255319148936, |
| "grad_norm": 1.0626490116119385, |
| "learning_rate": 0.0005, |
| "loss": 3.8438615798950195, |
| "step": 1218 |
| }, |
| { |
| "epoch": 0.7410334346504559, |
| "grad_norm": 1.0187205076217651, |
| "learning_rate": 0.0005, |
| "loss": 4.007755279541016, |
| "step": 1219 |
| }, |
| { |
| "epoch": 0.7416413373860182, |
| "grad_norm": 0.9945427775382996, |
| "learning_rate": 0.0005, |
| "loss": 3.8854458332061768, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.7422492401215806, |
| "grad_norm": 0.9728744029998779, |
| "learning_rate": 0.0005, |
| "loss": 3.7727737426757812, |
| "step": 1221 |
| }, |
| { |
| "epoch": 0.7428571428571429, |
| "grad_norm": 1.0771368741989136, |
| "learning_rate": 0.0005, |
| "loss": 3.984614133834839, |
| "step": 1222 |
| }, |
| { |
| "epoch": 0.7434650455927052, |
| "grad_norm": 1.0673145055770874, |
| "learning_rate": 0.0005, |
| "loss": 4.186018943786621, |
| "step": 1223 |
| }, |
| { |
| "epoch": 0.7440729483282674, |
| "grad_norm": 1.0385884046554565, |
| "learning_rate": 0.0005, |
| "loss": 3.9700469970703125, |
| "step": 1224 |
| }, |
| { |
| "epoch": 0.7446808510638298, |
| "grad_norm": 0.9378101229667664, |
| "learning_rate": 0.0005, |
| "loss": 4.093457221984863, |
| "step": 1225 |
| }, |
| { |
| "epoch": 0.7452887537993921, |
| "grad_norm": 1.1992157697677612, |
| "learning_rate": 0.0005, |
| "loss": 3.8426108360290527, |
| "step": 1226 |
| }, |
| { |
| "epoch": 0.7458966565349544, |
| "grad_norm": 0.9516767263412476, |
| "learning_rate": 0.0005, |
| "loss": 3.540165901184082, |
| "step": 1227 |
| }, |
| { |
| "epoch": 0.7465045592705167, |
| "grad_norm": 0.9911203980445862, |
| "learning_rate": 0.0005, |
| "loss": 3.791531562805176, |
| "step": 1228 |
| }, |
| { |
| "epoch": 0.7471124620060791, |
| "grad_norm": 1.1304718255996704, |
| "learning_rate": 0.0005, |
| "loss": 3.8638508319854736, |
| "step": 1229 |
| }, |
| { |
| "epoch": 0.7477203647416414, |
| "grad_norm": 3.538874626159668, |
| "learning_rate": 0.0005, |
| "loss": 4.131631851196289, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.7483282674772036, |
| "grad_norm": 1.096618413925171, |
| "learning_rate": 0.0005, |
| "loss": 3.782884120941162, |
| "step": 1231 |
| }, |
| { |
| "epoch": 0.7489361702127659, |
| "grad_norm": 1.2701330184936523, |
| "learning_rate": 0.0005, |
| "loss": 3.992222785949707, |
| "step": 1232 |
| }, |
| { |
| "epoch": 0.7495440729483283, |
| "grad_norm": 1.0706497430801392, |
| "learning_rate": 0.0005, |
| "loss": 3.908442735671997, |
| "step": 1233 |
| }, |
| { |
| "epoch": 0.7501519756838906, |
| "grad_norm": 1.030834436416626, |
| "learning_rate": 0.0005, |
| "loss": 4.000621318817139, |
| "step": 1234 |
| }, |
| { |
| "epoch": 0.7507598784194529, |
| "grad_norm": 1.3895245790481567, |
| "learning_rate": 0.0005, |
| "loss": 3.80660343170166, |
| "step": 1235 |
| }, |
| { |
| "epoch": 0.7513677811550152, |
| "grad_norm": 0.9692356586456299, |
| "learning_rate": 0.0005, |
| "loss": 4.078845977783203, |
| "step": 1236 |
| }, |
| { |
| "epoch": 0.7519756838905776, |
| "grad_norm": 1.1271778345108032, |
| "learning_rate": 0.0005, |
| "loss": 3.9555394649505615, |
| "step": 1237 |
| }, |
| { |
| "epoch": 0.7525835866261398, |
| "grad_norm": 1.5441569089889526, |
| "learning_rate": 0.0005, |
| "loss": 3.963904857635498, |
| "step": 1238 |
| }, |
| { |
| "epoch": 0.7531914893617021, |
| "grad_norm": 1.7030054330825806, |
| "learning_rate": 0.0005, |
| "loss": 4.024696350097656, |
| "step": 1239 |
| }, |
| { |
| "epoch": 0.7537993920972644, |
| "grad_norm": 1.12552011013031, |
| "learning_rate": 0.0005, |
| "loss": 3.919168472290039, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.7544072948328268, |
| "grad_norm": 1.0487366914749146, |
| "learning_rate": 0.0005, |
| "loss": 4.095437526702881, |
| "step": 1241 |
| }, |
| { |
| "epoch": 0.7550151975683891, |
| "grad_norm": 1.0279390811920166, |
| "learning_rate": 0.0005, |
| "loss": 3.941718816757202, |
| "step": 1242 |
| }, |
| { |
| "epoch": 0.7556231003039514, |
| "grad_norm": 1.080350399017334, |
| "learning_rate": 0.0005, |
| "loss": 3.7040228843688965, |
| "step": 1243 |
| }, |
| { |
| "epoch": 0.7562310030395136, |
| "grad_norm": 1.0182151794433594, |
| "learning_rate": 0.0005, |
| "loss": 4.062251091003418, |
| "step": 1244 |
| }, |
| { |
| "epoch": 0.756838905775076, |
| "grad_norm": 1.078009843826294, |
| "learning_rate": 0.0005, |
| "loss": 3.8745062351226807, |
| "step": 1245 |
| }, |
| { |
| "epoch": 0.7574468085106383, |
| "grad_norm": 1.0222269296646118, |
| "learning_rate": 0.0005, |
| "loss": 3.7564854621887207, |
| "step": 1246 |
| }, |
| { |
| "epoch": 0.7580547112462006, |
| "grad_norm": 1.329654335975647, |
| "learning_rate": 0.0005, |
| "loss": 3.8875160217285156, |
| "step": 1247 |
| }, |
| { |
| "epoch": 0.7586626139817629, |
| "grad_norm": 1.0129868984222412, |
| "learning_rate": 0.0005, |
| "loss": 3.8748350143432617, |
| "step": 1248 |
| }, |
| { |
| "epoch": 0.7592705167173253, |
| "grad_norm": 1.030468225479126, |
| "learning_rate": 0.0005, |
| "loss": 3.8655738830566406, |
| "step": 1249 |
| }, |
| { |
| "epoch": 0.7598784194528876, |
| "grad_norm": 1.111459732055664, |
| "learning_rate": 0.0005, |
| "loss": 3.9891488552093506, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.7604863221884498, |
| "grad_norm": 1.4396013021469116, |
| "learning_rate": 0.0005, |
| "loss": 3.9904720783233643, |
| "step": 1251 |
| }, |
| { |
| "epoch": 0.7610942249240121, |
| "grad_norm": 1.2336925268173218, |
| "learning_rate": 0.0005, |
| "loss": 3.7369742393493652, |
| "step": 1252 |
| }, |
| { |
| "epoch": 0.7617021276595745, |
| "grad_norm": 0.8990273475646973, |
| "learning_rate": 0.0005, |
| "loss": 3.9168124198913574, |
| "step": 1253 |
| }, |
| { |
| "epoch": 0.7623100303951368, |
| "grad_norm": 1.2932227849960327, |
| "learning_rate": 0.0005, |
| "loss": 4.008082389831543, |
| "step": 1254 |
| }, |
| { |
| "epoch": 0.7629179331306991, |
| "grad_norm": 0.9154768586158752, |
| "learning_rate": 0.0005, |
| "loss": 4.019550323486328, |
| "step": 1255 |
| }, |
| { |
| "epoch": 0.7635258358662614, |
| "grad_norm": 0.9175946712493896, |
| "learning_rate": 0.0005, |
| "loss": 3.97037935256958, |
| "step": 1256 |
| }, |
| { |
| "epoch": 0.7641337386018237, |
| "grad_norm": 1.067017912864685, |
| "learning_rate": 0.0005, |
| "loss": 4.1387038230896, |
| "step": 1257 |
| }, |
| { |
| "epoch": 0.764741641337386, |
| "grad_norm": 1.1540616750717163, |
| "learning_rate": 0.0005, |
| "loss": 3.979078769683838, |
| "step": 1258 |
| }, |
| { |
| "epoch": 0.7653495440729483, |
| "grad_norm": 0.9942051768302917, |
| "learning_rate": 0.0005, |
| "loss": 4.157475471496582, |
| "step": 1259 |
| }, |
| { |
| "epoch": 0.7659574468085106, |
| "grad_norm": 1.0882611274719238, |
| "learning_rate": 0.0005, |
| "loss": 3.784665584564209, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.766565349544073, |
| "grad_norm": 1.0358823537826538, |
| "learning_rate": 0.0005, |
| "loss": 3.8665788173675537, |
| "step": 1261 |
| }, |
| { |
| "epoch": 0.7671732522796353, |
| "grad_norm": 0.9150176048278809, |
| "learning_rate": 0.0005, |
| "loss": 3.9708409309387207, |
| "step": 1262 |
| }, |
| { |
| "epoch": 0.7677811550151976, |
| "grad_norm": 1.2305281162261963, |
| "learning_rate": 0.0005, |
| "loss": 3.791486978530884, |
| "step": 1263 |
| }, |
| { |
| "epoch": 0.7683890577507598, |
| "grad_norm": 1.0246379375457764, |
| "learning_rate": 0.0005, |
| "loss": 3.931403875350952, |
| "step": 1264 |
| }, |
| { |
| "epoch": 0.7689969604863222, |
| "grad_norm": 1.342997431755066, |
| "learning_rate": 0.0005, |
| "loss": 3.800549030303955, |
| "step": 1265 |
| }, |
| { |
| "epoch": 0.7696048632218845, |
| "grad_norm": 1.0477383136749268, |
| "learning_rate": 0.0005, |
| "loss": 3.9642491340637207, |
| "step": 1266 |
| }, |
| { |
| "epoch": 0.7702127659574468, |
| "grad_norm": 1.5231037139892578, |
| "learning_rate": 0.0005, |
| "loss": 3.883274555206299, |
| "step": 1267 |
| }, |
| { |
| "epoch": 0.7708206686930091, |
| "grad_norm": 1.21817147731781, |
| "learning_rate": 0.0005, |
| "loss": 3.8319201469421387, |
| "step": 1268 |
| }, |
| { |
| "epoch": 0.7714285714285715, |
| "grad_norm": 1.3139930963516235, |
| "learning_rate": 0.0005, |
| "loss": 3.902513027191162, |
| "step": 1269 |
| }, |
| { |
| "epoch": 0.7720364741641338, |
| "grad_norm": 1.1108347177505493, |
| "learning_rate": 0.0005, |
| "loss": 4.040473937988281, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.772644376899696, |
| "grad_norm": 0.9352411031723022, |
| "learning_rate": 0.0005, |
| "loss": 3.8833415508270264, |
| "step": 1271 |
| }, |
| { |
| "epoch": 0.7732522796352583, |
| "grad_norm": 0.9234441518783569, |
| "learning_rate": 0.0005, |
| "loss": 4.101876258850098, |
| "step": 1272 |
| }, |
| { |
| "epoch": 0.7738601823708207, |
| "grad_norm": 1.0629017353057861, |
| "learning_rate": 0.0005, |
| "loss": 3.869561195373535, |
| "step": 1273 |
| }, |
| { |
| "epoch": 0.774468085106383, |
| "grad_norm": 1.0356484651565552, |
| "learning_rate": 0.0005, |
| "loss": 3.9723856449127197, |
| "step": 1274 |
| }, |
| { |
| "epoch": 0.7750759878419453, |
| "grad_norm": 0.9600344896316528, |
| "learning_rate": 0.0005, |
| "loss": 3.824707508087158, |
| "step": 1275 |
| }, |
| { |
| "epoch": 0.7756838905775076, |
| "grad_norm": 1.0315158367156982, |
| "learning_rate": 0.0005, |
| "loss": 4.001948356628418, |
| "step": 1276 |
| }, |
| { |
| "epoch": 0.7762917933130699, |
| "grad_norm": 1.1866099834442139, |
| "learning_rate": 0.0005, |
| "loss": 3.763075828552246, |
| "step": 1277 |
| }, |
| { |
| "epoch": 0.7768996960486322, |
| "grad_norm": 1.1227611303329468, |
| "learning_rate": 0.0005, |
| "loss": 3.846872329711914, |
| "step": 1278 |
| }, |
| { |
| "epoch": 0.7775075987841945, |
| "grad_norm": 1.1628526449203491, |
| "learning_rate": 0.0005, |
| "loss": 3.929243564605713, |
| "step": 1279 |
| }, |
| { |
| "epoch": 0.7781155015197568, |
| "grad_norm": 0.9936217069625854, |
| "learning_rate": 0.0005, |
| "loss": 3.736764907836914, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.7787234042553192, |
| "grad_norm": 1.0325050354003906, |
| "learning_rate": 0.0005, |
| "loss": 3.935077667236328, |
| "step": 1281 |
| }, |
| { |
| "epoch": 0.7793313069908815, |
| "grad_norm": 1.0567058324813843, |
| "learning_rate": 0.0005, |
| "loss": 3.801319122314453, |
| "step": 1282 |
| }, |
| { |
| "epoch": 0.7799392097264438, |
| "grad_norm": 1.313740611076355, |
| "learning_rate": 0.0005, |
| "loss": 4.132801055908203, |
| "step": 1283 |
| }, |
| { |
| "epoch": 0.780547112462006, |
| "grad_norm": 1.4536793231964111, |
| "learning_rate": 0.0005, |
| "loss": 3.88094425201416, |
| "step": 1284 |
| }, |
| { |
| "epoch": 0.7811550151975684, |
| "grad_norm": 1.1501535177230835, |
| "learning_rate": 0.0005, |
| "loss": 3.8404948711395264, |
| "step": 1285 |
| }, |
| { |
| "epoch": 0.7817629179331307, |
| "grad_norm": 1.3253229856491089, |
| "learning_rate": 0.0005, |
| "loss": 4.016423225402832, |
| "step": 1286 |
| }, |
| { |
| "epoch": 0.782370820668693, |
| "grad_norm": 1.2896214723587036, |
| "learning_rate": 0.0005, |
| "loss": 3.8204050064086914, |
| "step": 1287 |
| }, |
| { |
| "epoch": 0.7829787234042553, |
| "grad_norm": 1.347516655921936, |
| "learning_rate": 0.0005, |
| "loss": 3.849546432495117, |
| "step": 1288 |
| }, |
| { |
| "epoch": 0.7835866261398177, |
| "grad_norm": 1.5418754816055298, |
| "learning_rate": 0.0005, |
| "loss": 4.135068893432617, |
| "step": 1289 |
| }, |
| { |
| "epoch": 0.78419452887538, |
| "grad_norm": 1.0823962688446045, |
| "learning_rate": 0.0005, |
| "loss": 3.752423048019409, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.7848024316109422, |
| "grad_norm": 1.1146916151046753, |
| "learning_rate": 0.0005, |
| "loss": 3.810540199279785, |
| "step": 1291 |
| }, |
| { |
| "epoch": 0.7854103343465045, |
| "grad_norm": 1.0943037271499634, |
| "learning_rate": 0.0005, |
| "loss": 3.761442184448242, |
| "step": 1292 |
| }, |
| { |
| "epoch": 0.7860182370820669, |
| "grad_norm": 1.0425827503204346, |
| "learning_rate": 0.0005, |
| "loss": 3.7996015548706055, |
| "step": 1293 |
| }, |
| { |
| "epoch": 0.7866261398176292, |
| "grad_norm": 1.5982511043548584, |
| "learning_rate": 0.0005, |
| "loss": 3.8388147354125977, |
| "step": 1294 |
| }, |
| { |
| "epoch": 0.7872340425531915, |
| "grad_norm": 1.4619585275650024, |
| "learning_rate": 0.0005, |
| "loss": 4.016120910644531, |
| "step": 1295 |
| }, |
| { |
| "epoch": 0.7878419452887538, |
| "grad_norm": 1.3633700609207153, |
| "learning_rate": 0.0005, |
| "loss": 4.069618225097656, |
| "step": 1296 |
| }, |
| { |
| "epoch": 0.7884498480243161, |
| "grad_norm": 1.009056568145752, |
| "learning_rate": 0.0005, |
| "loss": 4.0978264808654785, |
| "step": 1297 |
| }, |
| { |
| "epoch": 0.7890577507598784, |
| "grad_norm": 1.1812894344329834, |
| "learning_rate": 0.0005, |
| "loss": 3.6178488731384277, |
| "step": 1298 |
| }, |
| { |
| "epoch": 0.7896656534954407, |
| "grad_norm": 1.0647777318954468, |
| "learning_rate": 0.0005, |
| "loss": 3.910210371017456, |
| "step": 1299 |
| }, |
| { |
| "epoch": 0.790273556231003, |
| "grad_norm": 1.4413726329803467, |
| "learning_rate": 0.0005, |
| "loss": 3.9200563430786133, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.7908814589665654, |
| "grad_norm": 1.1021374464035034, |
| "learning_rate": 0.0005, |
| "loss": 3.680574655532837, |
| "step": 1301 |
| }, |
| { |
| "epoch": 0.7914893617021277, |
| "grad_norm": 1.0827854871749878, |
| "learning_rate": 0.0005, |
| "loss": 3.842402458190918, |
| "step": 1302 |
| }, |
| { |
| "epoch": 0.79209726443769, |
| "grad_norm": 1.2615513801574707, |
| "learning_rate": 0.0005, |
| "loss": 4.0547590255737305, |
| "step": 1303 |
| }, |
| { |
| "epoch": 0.7927051671732522, |
| "grad_norm": 1.0599168539047241, |
| "learning_rate": 0.0005, |
| "loss": 3.8400776386260986, |
| "step": 1304 |
| }, |
| { |
| "epoch": 0.7933130699088146, |
| "grad_norm": 1.4258071184158325, |
| "learning_rate": 0.0005, |
| "loss": 3.945885181427002, |
| "step": 1305 |
| }, |
| { |
| "epoch": 0.7939209726443769, |
| "grad_norm": 1.107612133026123, |
| "learning_rate": 0.0005, |
| "loss": 3.6351089477539062, |
| "step": 1306 |
| }, |
| { |
| "epoch": 0.7945288753799392, |
| "grad_norm": 0.9725725650787354, |
| "learning_rate": 0.0005, |
| "loss": 3.5905802249908447, |
| "step": 1307 |
| }, |
| { |
| "epoch": 0.7951367781155015, |
| "grad_norm": 1.3178088665008545, |
| "learning_rate": 0.0005, |
| "loss": 4.063264846801758, |
| "step": 1308 |
| }, |
| { |
| "epoch": 0.7957446808510639, |
| "grad_norm": 1.111405611038208, |
| "learning_rate": 0.0005, |
| "loss": 3.70896053314209, |
| "step": 1309 |
| }, |
| { |
| "epoch": 0.7963525835866262, |
| "grad_norm": 1.0547385215759277, |
| "learning_rate": 0.0005, |
| "loss": 4.020359516143799, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.7969604863221884, |
| "grad_norm": 1.1632133722305298, |
| "learning_rate": 0.0005, |
| "loss": 3.8566200733184814, |
| "step": 1311 |
| }, |
| { |
| "epoch": 0.7975683890577507, |
| "grad_norm": 1.0662367343902588, |
| "learning_rate": 0.0005, |
| "loss": 3.7626304626464844, |
| "step": 1312 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 1645, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 164, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.493255831185785e+18, |
| "train_batch_size": 32, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|