| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.21018895639981305, |
| "eval_steps": 500, |
| "global_step": 787, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0002670761834813381, |
| "grad_norm": 17.75, |
| "learning_rate": 0.0, |
| "loss": 2.6790831089019775, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0005341523669626762, |
| "grad_norm": 19.375, |
| "learning_rate": 7.692307692307694e-07, |
| "loss": 2.857093095779419, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.0008012285504440141, |
| "grad_norm": 17.75, |
| "learning_rate": 1.5384615384615387e-06, |
| "loss": 2.6315698623657227, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.0010683047339253523, |
| "grad_norm": 18.75, |
| "learning_rate": 2.307692307692308e-06, |
| "loss": 2.714285135269165, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.0013353809174066903, |
| "grad_norm": 16.25, |
| "learning_rate": 3.0769230769230774e-06, |
| "loss": 2.6139986515045166, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.0016024571008880282, |
| "grad_norm": 22.75, |
| "learning_rate": 3.846153846153847e-06, |
| "loss": 2.76374888420105, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.0018695332843693664, |
| "grad_norm": 18.0, |
| "learning_rate": 4.615384615384616e-06, |
| "loss": 2.660518169403076, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.0021366094678507046, |
| "grad_norm": 24.25, |
| "learning_rate": 5.3846153846153855e-06, |
| "loss": 2.71116042137146, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.0024036856513320426, |
| "grad_norm": 23.0, |
| "learning_rate": 6.153846153846155e-06, |
| "loss": 2.778017997741699, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.0026707618348133805, |
| "grad_norm": 20.0, |
| "learning_rate": 6.923076923076923e-06, |
| "loss": 2.5963754653930664, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.0029378380182947185, |
| "grad_norm": 20.875, |
| "learning_rate": 7.692307692307694e-06, |
| "loss": 2.766429901123047, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.0032049142017760565, |
| "grad_norm": 23.125, |
| "learning_rate": 8.461538461538462e-06, |
| "loss": 2.9626660346984863, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.003471990385257395, |
| "grad_norm": 26.0, |
| "learning_rate": 9.230769230769232e-06, |
| "loss": 2.910458564758301, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.003739066568738733, |
| "grad_norm": 22.125, |
| "learning_rate": 1e-05, |
| "loss": 2.8882126808166504, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.004006142752220071, |
| "grad_norm": 17.125, |
| "learning_rate": 1.0769230769230771e-05, |
| "loss": 2.5886685848236084, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.004273218935701409, |
| "grad_norm": 17.25, |
| "learning_rate": 1.153846153846154e-05, |
| "loss": 2.7212939262390137, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.004540295119182747, |
| "grad_norm": 22.125, |
| "learning_rate": 1.230769230769231e-05, |
| "loss": 2.689997911453247, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.004807371302664085, |
| "grad_norm": 24.25, |
| "learning_rate": 1.3076923076923078e-05, |
| "loss": 2.7217793464660645, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.005074447486145423, |
| "grad_norm": 17.0, |
| "learning_rate": 1.3846153846153847e-05, |
| "loss": 2.58693265914917, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.005341523669626761, |
| "grad_norm": 20.625, |
| "learning_rate": 1.4615384615384617e-05, |
| "loss": 2.793051242828369, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.0056085998531080995, |
| "grad_norm": 21.375, |
| "learning_rate": 1.5384615384615387e-05, |
| "loss": 2.7657687664031982, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.005875676036589437, |
| "grad_norm": 18.125, |
| "learning_rate": 1.6153846153846154e-05, |
| "loss": 2.8189685344696045, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.006142752220070775, |
| "grad_norm": 14.875, |
| "learning_rate": 1.6923076923076924e-05, |
| "loss": 2.580648183822632, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.006409828403552113, |
| "grad_norm": 21.875, |
| "learning_rate": 1.7692307692307694e-05, |
| "loss": 2.74700927734375, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.006676904587033451, |
| "grad_norm": 17.25, |
| "learning_rate": 1.8461538461538465e-05, |
| "loss": 2.4545698165893555, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.00694398077051479, |
| "grad_norm": 20.5, |
| "learning_rate": 1.923076923076923e-05, |
| "loss": 2.6994550228118896, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.007211056953996127, |
| "grad_norm": 17.0, |
| "learning_rate": 2e-05, |
| "loss": 2.6980443000793457, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.007478133137477466, |
| "grad_norm": 17.375, |
| "learning_rate": 2.0769230769230772e-05, |
| "loss": 2.78818941116333, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.007745209320958803, |
| "grad_norm": 20.25, |
| "learning_rate": 2.1538461538461542e-05, |
| "loss": 2.742327928543091, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.008012285504440142, |
| "grad_norm": 15.9375, |
| "learning_rate": 2.230769230769231e-05, |
| "loss": 2.6761393547058105, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.00827936168792148, |
| "grad_norm": 17.625, |
| "learning_rate": 2.307692307692308e-05, |
| "loss": 2.639137029647827, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.008546437871402818, |
| "grad_norm": 16.25, |
| "learning_rate": 2.384615384615385e-05, |
| "loss": 2.640866279602051, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.008813514054884156, |
| "grad_norm": 20.5, |
| "learning_rate": 2.461538461538462e-05, |
| "loss": 2.843217134475708, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.009080590238365494, |
| "grad_norm": 17.625, |
| "learning_rate": 2.5384615384615383e-05, |
| "loss": 2.504178047180176, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.009347666421846831, |
| "grad_norm": 16.0, |
| "learning_rate": 2.6153846153846157e-05, |
| "loss": 2.5768792629241943, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.00961474260532817, |
| "grad_norm": 16.875, |
| "learning_rate": 2.6923076923076923e-05, |
| "loss": 2.5246806144714355, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.009881818788809508, |
| "grad_norm": 18.875, |
| "learning_rate": 2.7692307692307694e-05, |
| "loss": 2.6676347255706787, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.010148894972290845, |
| "grad_norm": 17.875, |
| "learning_rate": 2.846153846153846e-05, |
| "loss": 2.667938470840454, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.010415971155772185, |
| "grad_norm": 16.5, |
| "learning_rate": 2.9230769230769234e-05, |
| "loss": 2.4972121715545654, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.010683047339253522, |
| "grad_norm": 16.625, |
| "learning_rate": 3e-05, |
| "loss": 2.6042871475219727, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.01095012352273486, |
| "grad_norm": 16.5, |
| "learning_rate": 3.0769230769230774e-05, |
| "loss": 2.8169543743133545, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.011217199706216199, |
| "grad_norm": 16.0, |
| "learning_rate": 3.153846153846154e-05, |
| "loss": 2.542125701904297, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.011484275889697537, |
| "grad_norm": 13.9375, |
| "learning_rate": 3.230769230769231e-05, |
| "loss": 2.404881000518799, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.011751352073178874, |
| "grad_norm": 15.5625, |
| "learning_rate": 3.307692307692308e-05, |
| "loss": 2.658536672592163, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.012018428256660212, |
| "grad_norm": 13.6875, |
| "learning_rate": 3.384615384615385e-05, |
| "loss": 2.5229556560516357, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.01228550444014155, |
| "grad_norm": 13.9375, |
| "learning_rate": 3.461538461538462e-05, |
| "loss": 2.4668819904327393, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.012552580623622888, |
| "grad_norm": 13.6875, |
| "learning_rate": 3.538461538461539e-05, |
| "loss": 2.3786585330963135, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.012819656807104226, |
| "grad_norm": 14.625, |
| "learning_rate": 3.615384615384615e-05, |
| "loss": 2.486743688583374, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.013086732990585565, |
| "grad_norm": 14.0625, |
| "learning_rate": 3.692307692307693e-05, |
| "loss": 2.581742763519287, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.013353809174066903, |
| "grad_norm": 12.3125, |
| "learning_rate": 3.769230769230769e-05, |
| "loss": 2.4105544090270996, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.01362088535754824, |
| "grad_norm": 14.875, |
| "learning_rate": 3.846153846153846e-05, |
| "loss": 2.5906612873077393, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.01388796154102958, |
| "grad_norm": 14.75, |
| "learning_rate": 3.923076923076923e-05, |
| "loss": 2.617494821548462, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.014155037724510917, |
| "grad_norm": 13.8125, |
| "learning_rate": 4e-05, |
| "loss": 2.562443494796753, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.014422113907992255, |
| "grad_norm": 14.1875, |
| "learning_rate": 4.0769230769230773e-05, |
| "loss": 2.5650522708892822, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.014689190091473592, |
| "grad_norm": 15.375, |
| "learning_rate": 4.1538461538461544e-05, |
| "loss": 2.4969890117645264, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.014956266274954931, |
| "grad_norm": 13.0625, |
| "learning_rate": 4.230769230769231e-05, |
| "loss": 2.533348321914673, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.015223342458436269, |
| "grad_norm": 13.6875, |
| "learning_rate": 4.3076923076923084e-05, |
| "loss": 2.639575719833374, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.015490418641917606, |
| "grad_norm": 12.3125, |
| "learning_rate": 4.384615384615385e-05, |
| "loss": 2.369950771331787, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.015757494825398944, |
| "grad_norm": 11.8125, |
| "learning_rate": 4.461538461538462e-05, |
| "loss": 2.5953032970428467, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.016024571008880283, |
| "grad_norm": 11.75, |
| "learning_rate": 4.538461538461539e-05, |
| "loss": 2.4076132774353027, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.016291647192361623, |
| "grad_norm": 14.875, |
| "learning_rate": 4.615384615384616e-05, |
| "loss": 2.5904717445373535, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.01655872337584296, |
| "grad_norm": 11.8125, |
| "learning_rate": 4.692307692307693e-05, |
| "loss": 2.411831855773926, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.016825799559324298, |
| "grad_norm": 11.6875, |
| "learning_rate": 4.76923076923077e-05, |
| "loss": 2.5101966857910156, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.017092875742805637, |
| "grad_norm": 11.125, |
| "learning_rate": 4.846153846153846e-05, |
| "loss": 2.2876455783843994, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.017359951926286973, |
| "grad_norm": 10.25, |
| "learning_rate": 4.923076923076924e-05, |
| "loss": 2.278735399246216, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.017627028109768312, |
| "grad_norm": 10.5, |
| "learning_rate": 5e-05, |
| "loss": 2.300778865814209, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.017894104293249648, |
| "grad_norm": 14.375, |
| "learning_rate": 5.0769230769230766e-05, |
| "loss": 2.7023446559906006, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.018161180476730987, |
| "grad_norm": 10.5625, |
| "learning_rate": 5.1538461538461536e-05, |
| "loss": 2.2479007244110107, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.018428256660212326, |
| "grad_norm": 10.1875, |
| "learning_rate": 5.230769230769231e-05, |
| "loss": 2.2641005516052246, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.018695332843693662, |
| "grad_norm": 10.875, |
| "learning_rate": 5.3076923076923076e-05, |
| "loss": 2.417478561401367, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.018962409027175, |
| "grad_norm": 11.5625, |
| "learning_rate": 5.384615384615385e-05, |
| "loss": 2.4386911392211914, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.01922948521065634, |
| "grad_norm": 11.5625, |
| "learning_rate": 5.461538461538461e-05, |
| "loss": 2.562483787536621, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.019496561394137676, |
| "grad_norm": 10.8125, |
| "learning_rate": 5.538461538461539e-05, |
| "loss": 2.3742763996124268, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.019763637577619016, |
| "grad_norm": 9.0625, |
| "learning_rate": 5.615384615384616e-05, |
| "loss": 2.1751370429992676, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.020030713761100355, |
| "grad_norm": 10.3125, |
| "learning_rate": 5.692307692307692e-05, |
| "loss": 2.398951292037964, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.02029778994458169, |
| "grad_norm": 9.4375, |
| "learning_rate": 5.769230769230769e-05, |
| "loss": 2.19356632232666, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.02056486612806303, |
| "grad_norm": 9.0625, |
| "learning_rate": 5.846153846153847e-05, |
| "loss": 2.200453519821167, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.02083194231154437, |
| "grad_norm": 10.25, |
| "learning_rate": 5.923076923076923e-05, |
| "loss": 2.3186240196228027, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.021099018495025705, |
| "grad_norm": 9.625, |
| "learning_rate": 6e-05, |
| "loss": 2.2442519664764404, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.021366094678507044, |
| "grad_norm": 9.0625, |
| "learning_rate": 6.0769230769230765e-05, |
| "loss": 2.2705352306365967, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.021633170861988384, |
| "grad_norm": 9.125, |
| "learning_rate": 6.153846153846155e-05, |
| "loss": 2.2238214015960693, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.02190024704546972, |
| "grad_norm": 9.5625, |
| "learning_rate": 6.23076923076923e-05, |
| "loss": 2.2311673164367676, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.02216732322895106, |
| "grad_norm": 9.5625, |
| "learning_rate": 6.307692307692308e-05, |
| "loss": 2.2941524982452393, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.022434399412432398, |
| "grad_norm": 9.8125, |
| "learning_rate": 6.384615384615385e-05, |
| "loss": 2.2252962589263916, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.022701475595913734, |
| "grad_norm": 9.3125, |
| "learning_rate": 6.461538461538462e-05, |
| "loss": 2.204983949661255, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.022968551779395073, |
| "grad_norm": 8.6875, |
| "learning_rate": 6.538461538461539e-05, |
| "loss": 2.074981212615967, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.02323562796287641, |
| "grad_norm": 9.0625, |
| "learning_rate": 6.615384615384616e-05, |
| "loss": 2.0775859355926514, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.023502704146357748, |
| "grad_norm": 9.6875, |
| "learning_rate": 6.692307692307693e-05, |
| "loss": 2.144122362136841, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.023769780329839087, |
| "grad_norm": 8.4375, |
| "learning_rate": 6.76923076923077e-05, |
| "loss": 2.1126554012298584, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.024036856513320423, |
| "grad_norm": 7.6875, |
| "learning_rate": 6.846153846153847e-05, |
| "loss": 1.9261810779571533, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.024303932696801762, |
| "grad_norm": 8.0625, |
| "learning_rate": 6.923076923076924e-05, |
| "loss": 2.0227725505828857, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.0245710088802831, |
| "grad_norm": 6.59375, |
| "learning_rate": 7e-05, |
| "loss": 1.9648834466934204, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.024838085063764438, |
| "grad_norm": 6.59375, |
| "learning_rate": 7.076923076923078e-05, |
| "loss": 1.920790672302246, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.025105161247245777, |
| "grad_norm": 6.3125, |
| "learning_rate": 7.153846153846155e-05, |
| "loss": 1.898984432220459, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.025372237430727116, |
| "grad_norm": 5.5, |
| "learning_rate": 7.23076923076923e-05, |
| "loss": 1.9621495008468628, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.025639313614208452, |
| "grad_norm": 5.25, |
| "learning_rate": 7.307692307692307e-05, |
| "loss": 1.930998682975769, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.02590638979768979, |
| "grad_norm": 5.125, |
| "learning_rate": 7.384615384615386e-05, |
| "loss": 1.8902232646942139, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.02617346598117113, |
| "grad_norm": 5.15625, |
| "learning_rate": 7.461538461538462e-05, |
| "loss": 1.916045904159546, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.026440542164652466, |
| "grad_norm": 6.0, |
| "learning_rate": 7.538461538461539e-05, |
| "loss": 2.0126044750213623, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.026707618348133805, |
| "grad_norm": 4.75, |
| "learning_rate": 7.615384615384616e-05, |
| "loss": 1.8163182735443115, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.026974694531615145, |
| "grad_norm": 4.8125, |
| "learning_rate": 7.692307692307693e-05, |
| "loss": 1.8071495294570923, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.02724177071509648, |
| "grad_norm": 4.8125, |
| "learning_rate": 7.76923076923077e-05, |
| "loss": 2.030604362487793, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.02750884689857782, |
| "grad_norm": 4.78125, |
| "learning_rate": 7.846153846153847e-05, |
| "loss": 1.9025654792785645, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.02777592308205916, |
| "grad_norm": 4.5, |
| "learning_rate": 7.923076923076924e-05, |
| "loss": 1.9143315553665161, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.028042999265540495, |
| "grad_norm": 3.828125, |
| "learning_rate": 8e-05, |
| "loss": 1.839133858680725, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.028310075449021834, |
| "grad_norm": 4.0625, |
| "learning_rate": 8.076923076923078e-05, |
| "loss": 1.8787554502487183, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.02857715163250317, |
| "grad_norm": 3.375, |
| "learning_rate": 8.153846153846155e-05, |
| "loss": 1.7920942306518555, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.02884422781598451, |
| "grad_norm": 3.671875, |
| "learning_rate": 8.23076923076923e-05, |
| "loss": 1.8200159072875977, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.02911130399946585, |
| "grad_norm": 3.234375, |
| "learning_rate": 8.307692307692309e-05, |
| "loss": 1.7615149021148682, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.029378380182947184, |
| "grad_norm": 3.0, |
| "learning_rate": 8.384615384615386e-05, |
| "loss": 1.7836936712265015, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.029645456366428524, |
| "grad_norm": 3.0625, |
| "learning_rate": 8.461538461538461e-05, |
| "loss": 1.7257531881332397, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.029912532549909863, |
| "grad_norm": 2.96875, |
| "learning_rate": 8.538461538461538e-05, |
| "loss": 1.894051194190979, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.0301796087333912, |
| "grad_norm": 3.078125, |
| "learning_rate": 8.615384615384617e-05, |
| "loss": 1.7538135051727295, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.030446684916872538, |
| "grad_norm": 3.328125, |
| "learning_rate": 8.692307692307692e-05, |
| "loss": 1.7873612642288208, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.030713761100353877, |
| "grad_norm": 2.671875, |
| "learning_rate": 8.76923076923077e-05, |
| "loss": 1.692636489868164, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.030980837283835213, |
| "grad_norm": 2.765625, |
| "learning_rate": 8.846153846153847e-05, |
| "loss": 1.7532554864883423, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.031247913467316552, |
| "grad_norm": 2.15625, |
| "learning_rate": 8.923076923076924e-05, |
| "loss": 1.5472298860549927, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.03151498965079789, |
| "grad_norm": 2.296875, |
| "learning_rate": 9e-05, |
| "loss": 1.7773547172546387, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.03178206583427923, |
| "grad_norm": 2.625, |
| "learning_rate": 9.076923076923078e-05, |
| "loss": 1.756500482559204, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.03204914201776057, |
| "grad_norm": 2.359375, |
| "learning_rate": 9.153846153846155e-05, |
| "loss": 1.7489063739776611, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.032316218201241906, |
| "grad_norm": 2.203125, |
| "learning_rate": 9.230769230769232e-05, |
| "loss": 1.734527349472046, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.032583294384723245, |
| "grad_norm": 2.34375, |
| "learning_rate": 9.307692307692309e-05, |
| "loss": 1.7665072679519653, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.03285037056820458, |
| "grad_norm": 2.125, |
| "learning_rate": 9.384615384615386e-05, |
| "loss": 1.7649085521697998, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.03311744675168592, |
| "grad_norm": 2.03125, |
| "learning_rate": 9.461538461538461e-05, |
| "loss": 1.7251827716827393, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.033384522935167256, |
| "grad_norm": 2.0625, |
| "learning_rate": 9.53846153846154e-05, |
| "loss": 1.6280146837234497, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.033651599118648595, |
| "grad_norm": 1.9140625, |
| "learning_rate": 9.615384615384617e-05, |
| "loss": 1.700337290763855, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.033918675302129934, |
| "grad_norm": 1.9765625, |
| "learning_rate": 9.692307692307692e-05, |
| "loss": 1.711737871170044, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.034185751485611274, |
| "grad_norm": 1.875, |
| "learning_rate": 9.76923076923077e-05, |
| "loss": 1.6615790128707886, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.034452827669092606, |
| "grad_norm": 1.78125, |
| "learning_rate": 9.846153846153848e-05, |
| "loss": 1.6334642171859741, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.034719903852573945, |
| "grad_norm": 1.9140625, |
| "learning_rate": 9.923076923076923e-05, |
| "loss": 1.6756442785263062, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.034986980036055285, |
| "grad_norm": 1.703125, |
| "learning_rate": 0.0001, |
| "loss": 1.623317003250122, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.035254056219536624, |
| "grad_norm": 1.9296875, |
| "learning_rate": 0.0001, |
| "loss": 1.7026211023330688, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.03552113240301796, |
| "grad_norm": 1.7265625, |
| "learning_rate": 0.0001, |
| "loss": 1.6647707223892212, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.035788208586499295, |
| "grad_norm": 1.9375, |
| "learning_rate": 0.0001, |
| "loss": 1.75358247756958, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.036055284769980635, |
| "grad_norm": 1.5625, |
| "learning_rate": 0.0001, |
| "loss": 1.631664752960205, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.036322360953461974, |
| "grad_norm": 1.484375, |
| "learning_rate": 0.0001, |
| "loss": 1.6193870306015015, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.03658943713694331, |
| "grad_norm": 1.5234375, |
| "learning_rate": 0.0001, |
| "loss": 1.6243921518325806, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.03685651332042465, |
| "grad_norm": 1.4296875, |
| "learning_rate": 0.0001, |
| "loss": 1.6910765171051025, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.03712358950390599, |
| "grad_norm": 1.3984375, |
| "learning_rate": 0.0001, |
| "loss": 1.617536187171936, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.037390665687387324, |
| "grad_norm": 1.3125, |
| "learning_rate": 0.0001, |
| "loss": 1.4483500719070435, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.03765774187086866, |
| "grad_norm": 1.421875, |
| "learning_rate": 0.0001, |
| "loss": 1.6344833374023438, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.03792481805435, |
| "grad_norm": 1.328125, |
| "learning_rate": 0.0001, |
| "loss": 1.6913228034973145, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.03819189423783134, |
| "grad_norm": 1.34375, |
| "learning_rate": 0.0001, |
| "loss": 1.598616123199463, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.03845897042131268, |
| "grad_norm": 1.3359375, |
| "learning_rate": 0.0001, |
| "loss": 1.705214500427246, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.03872604660479402, |
| "grad_norm": 1.1953125, |
| "learning_rate": 0.0001, |
| "loss": 1.574364423751831, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.03899312278827535, |
| "grad_norm": 1.3671875, |
| "learning_rate": 0.0001, |
| "loss": 1.6656533479690552, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.03926019897175669, |
| "grad_norm": 1.265625, |
| "learning_rate": 0.0001, |
| "loss": 1.5525474548339844, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.03952727515523803, |
| "grad_norm": 1.234375, |
| "learning_rate": 0.0001, |
| "loss": 1.6093608140945435, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.03979435133871937, |
| "grad_norm": 1.21875, |
| "learning_rate": 0.0001, |
| "loss": 1.5538296699523926, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.04006142752220071, |
| "grad_norm": 1.1328125, |
| "learning_rate": 0.0001, |
| "loss": 1.566986083984375, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.04032850370568205, |
| "grad_norm": 1.078125, |
| "learning_rate": 0.0001, |
| "loss": 1.4673757553100586, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.04059557988916338, |
| "grad_norm": 1.125, |
| "learning_rate": 0.0001, |
| "loss": 1.5815367698669434, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.04086265607264472, |
| "grad_norm": 1.1796875, |
| "learning_rate": 0.0001, |
| "loss": 1.5937564373016357, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.04112973225612606, |
| "grad_norm": 1.25, |
| "learning_rate": 0.0001, |
| "loss": 1.5485172271728516, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.0413968084396074, |
| "grad_norm": 1.1796875, |
| "learning_rate": 0.0001, |
| "loss": 1.5556213855743408, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.04166388462308874, |
| "grad_norm": 1.1015625, |
| "learning_rate": 0.0001, |
| "loss": 1.5599007606506348, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.04193096080657007, |
| "grad_norm": 1.078125, |
| "learning_rate": 0.0001, |
| "loss": 1.5992523431777954, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.04219803699005141, |
| "grad_norm": 0.984375, |
| "learning_rate": 0.0001, |
| "loss": 1.5620003938674927, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.04246511317353275, |
| "grad_norm": 1.078125, |
| "learning_rate": 0.0001, |
| "loss": 1.5492624044418335, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.04273218935701409, |
| "grad_norm": 0.984375, |
| "learning_rate": 0.0001, |
| "loss": 1.6017942428588867, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.04299926554049543, |
| "grad_norm": 1.09375, |
| "learning_rate": 0.0001, |
| "loss": 1.5750805139541626, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.04326634172397677, |
| "grad_norm": 1.0546875, |
| "learning_rate": 0.0001, |
| "loss": 1.761667013168335, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.0435334179074581, |
| "grad_norm": 0.98046875, |
| "learning_rate": 0.0001, |
| "loss": 1.6303234100341797, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.04380049409093944, |
| "grad_norm": 0.95703125, |
| "learning_rate": 0.0001, |
| "loss": 1.5894020795822144, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.04406757027442078, |
| "grad_norm": 0.91015625, |
| "learning_rate": 0.0001, |
| "loss": 1.569838047027588, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.04433464645790212, |
| "grad_norm": 1.0234375, |
| "learning_rate": 0.0001, |
| "loss": 1.4858709573745728, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.04460172264138346, |
| "grad_norm": 0.95703125, |
| "learning_rate": 0.0001, |
| "loss": 1.6576966047286987, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.044868798824864796, |
| "grad_norm": 1.0390625, |
| "learning_rate": 0.0001, |
| "loss": 1.5174238681793213, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.04513587500834613, |
| "grad_norm": 0.89453125, |
| "learning_rate": 0.0001, |
| "loss": 1.5932589769363403, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.04540295119182747, |
| "grad_norm": 0.89453125, |
| "learning_rate": 0.0001, |
| "loss": 1.3785005807876587, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.04567002737530881, |
| "grad_norm": 0.93359375, |
| "learning_rate": 0.0001, |
| "loss": 1.491324782371521, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.045937103558790146, |
| "grad_norm": 0.921875, |
| "learning_rate": 0.0001, |
| "loss": 1.588956594467163, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.046204179742271485, |
| "grad_norm": 0.87890625, |
| "learning_rate": 0.0001, |
| "loss": 1.4634578227996826, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.04647125592575282, |
| "grad_norm": 1.0703125, |
| "learning_rate": 0.0001, |
| "loss": 1.520629644393921, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.04673833210923416, |
| "grad_norm": 0.84765625, |
| "learning_rate": 0.0001, |
| "loss": 1.5000630617141724, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.047005408292715496, |
| "grad_norm": 1.0390625, |
| "learning_rate": 0.0001, |
| "loss": 1.632713794708252, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.047272484476196835, |
| "grad_norm": 0.9609375, |
| "learning_rate": 0.0001, |
| "loss": 1.5651711225509644, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.047539560659678175, |
| "grad_norm": 0.87109375, |
| "learning_rate": 0.0001, |
| "loss": 1.5135159492492676, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.047806636843159514, |
| "grad_norm": 0.875, |
| "learning_rate": 0.0001, |
| "loss": 1.4664947986602783, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.048073713026640846, |
| "grad_norm": 0.8515625, |
| "learning_rate": 0.0001, |
| "loss": 1.5288572311401367, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.048340789210122186, |
| "grad_norm": 0.90234375, |
| "learning_rate": 0.0001, |
| "loss": 1.4387882947921753, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.048607865393603525, |
| "grad_norm": 0.8359375, |
| "learning_rate": 0.0001, |
| "loss": 1.5431747436523438, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.048874941577084864, |
| "grad_norm": 0.84765625, |
| "learning_rate": 0.0001, |
| "loss": 1.58133065700531, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.0491420177605662, |
| "grad_norm": 0.8203125, |
| "learning_rate": 0.0001, |
| "loss": 1.5671203136444092, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.04940909394404754, |
| "grad_norm": 0.796875, |
| "learning_rate": 0.0001, |
| "loss": 1.5029916763305664, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.049676170127528875, |
| "grad_norm": 0.8203125, |
| "learning_rate": 0.0001, |
| "loss": 1.5585753917694092, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.049943246311010214, |
| "grad_norm": 0.859375, |
| "learning_rate": 0.0001, |
| "loss": 1.4746276140213013, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.050210322494491554, |
| "grad_norm": 0.828125, |
| "learning_rate": 0.0001, |
| "loss": 1.4432883262634277, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.05047739867797289, |
| "grad_norm": 0.9296875, |
| "learning_rate": 0.0001, |
| "loss": 1.472724437713623, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.05074447486145423, |
| "grad_norm": 0.8203125, |
| "learning_rate": 0.0001, |
| "loss": 1.4479737281799316, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.051011551044935564, |
| "grad_norm": 0.80859375, |
| "learning_rate": 0.0001, |
| "loss": 1.4101297855377197, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.051278627228416904, |
| "grad_norm": 0.91796875, |
| "learning_rate": 0.0001, |
| "loss": 1.5154465436935425, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.05154570341189824, |
| "grad_norm": 0.82421875, |
| "learning_rate": 0.0001, |
| "loss": 1.5523278713226318, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.05181277959537958, |
| "grad_norm": 0.83203125, |
| "learning_rate": 0.0001, |
| "loss": 1.5290699005126953, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.05207985577886092, |
| "grad_norm": 0.8515625, |
| "learning_rate": 0.0001, |
| "loss": 1.527392029762268, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.05234693196234226, |
| "grad_norm": 0.7421875, |
| "learning_rate": 0.0001, |
| "loss": 1.5038968324661255, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.05261400814582359, |
| "grad_norm": 0.79296875, |
| "learning_rate": 0.0001, |
| "loss": 1.4845222234725952, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.05288108432930493, |
| "grad_norm": 0.77734375, |
| "learning_rate": 0.0001, |
| "loss": 1.513843059539795, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.05314816051278627, |
| "grad_norm": 0.82421875, |
| "learning_rate": 0.0001, |
| "loss": 1.4493604898452759, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.05341523669626761, |
| "grad_norm": 0.796875, |
| "learning_rate": 0.0001, |
| "loss": 1.3438012599945068, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.05368231287974895, |
| "grad_norm": 0.71875, |
| "learning_rate": 0.0001, |
| "loss": 1.463066577911377, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.05394938906323029, |
| "grad_norm": 0.7578125, |
| "learning_rate": 0.0001, |
| "loss": 1.492901086807251, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.05421646524671162, |
| "grad_norm": 0.734375, |
| "learning_rate": 0.0001, |
| "loss": 1.4638193845748901, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.05448354143019296, |
| "grad_norm": 0.70703125, |
| "learning_rate": 0.0001, |
| "loss": 1.4464643001556396, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.0547506176136743, |
| "grad_norm": 0.75390625, |
| "learning_rate": 0.0001, |
| "loss": 1.5037822723388672, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.05501769379715564, |
| "grad_norm": 0.7421875, |
| "learning_rate": 0.0001, |
| "loss": 1.544986367225647, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.05528476998063698, |
| "grad_norm": 0.796875, |
| "learning_rate": 0.0001, |
| "loss": 1.500586986541748, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.05555184616411832, |
| "grad_norm": 0.68359375, |
| "learning_rate": 0.0001, |
| "loss": 1.463313341140747, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.05581892234759965, |
| "grad_norm": 0.71484375, |
| "learning_rate": 0.0001, |
| "loss": 1.5014681816101074, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.05608599853108099, |
| "grad_norm": 0.6875, |
| "learning_rate": 0.0001, |
| "loss": 1.4201849699020386, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.05635307471456233, |
| "grad_norm": 0.7109375, |
| "learning_rate": 0.0001, |
| "loss": 1.4925004243850708, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.05662015089804367, |
| "grad_norm": 0.73046875, |
| "learning_rate": 0.0001, |
| "loss": 1.509586215019226, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.05688722708152501, |
| "grad_norm": 0.65625, |
| "learning_rate": 0.0001, |
| "loss": 1.332578420639038, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.05715430326500634, |
| "grad_norm": 0.73046875, |
| "learning_rate": 0.0001, |
| "loss": 1.4592535495758057, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.05742137944848768, |
| "grad_norm": 0.7421875, |
| "learning_rate": 0.0001, |
| "loss": 1.4789878129959106, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.05768845563196902, |
| "grad_norm": 0.73828125, |
| "learning_rate": 0.0001, |
| "loss": 1.5233962535858154, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.05795553181545036, |
| "grad_norm": 0.703125, |
| "learning_rate": 0.0001, |
| "loss": 1.587152123451233, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.0582226079989317, |
| "grad_norm": 0.75, |
| "learning_rate": 0.0001, |
| "loss": 1.3452866077423096, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.058489684182413036, |
| "grad_norm": 0.703125, |
| "learning_rate": 0.0001, |
| "loss": 1.5141767263412476, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.05875676036589437, |
| "grad_norm": 0.671875, |
| "learning_rate": 0.0001, |
| "loss": 1.39532470703125, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.05902383654937571, |
| "grad_norm": 0.70703125, |
| "learning_rate": 0.0001, |
| "loss": 1.5537728071212769, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.05929091273285705, |
| "grad_norm": 0.61328125, |
| "learning_rate": 0.0001, |
| "loss": 1.3248519897460938, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.059557988916338386, |
| "grad_norm": 0.66796875, |
| "learning_rate": 0.0001, |
| "loss": 1.4280136823654175, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.059825065099819726, |
| "grad_norm": 0.67578125, |
| "learning_rate": 0.0001, |
| "loss": 1.590162754058838, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.060092141283301065, |
| "grad_norm": 0.68359375, |
| "learning_rate": 0.0001, |
| "loss": 1.5274425745010376, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.0603592174667824, |
| "grad_norm": 0.69140625, |
| "learning_rate": 0.0001, |
| "loss": 1.538464903831482, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.060626293650263736, |
| "grad_norm": 0.67578125, |
| "learning_rate": 0.0001, |
| "loss": 1.43494713306427, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.060893369833745076, |
| "grad_norm": 0.83984375, |
| "learning_rate": 0.0001, |
| "loss": 1.3913284540176392, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.061160446017226415, |
| "grad_norm": 0.6953125, |
| "learning_rate": 0.0001, |
| "loss": 1.4944510459899902, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.061427522200707754, |
| "grad_norm": 0.66796875, |
| "learning_rate": 0.0001, |
| "loss": 1.384867548942566, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.06169459838418909, |
| "grad_norm": 0.65625, |
| "learning_rate": 0.0001, |
| "loss": 1.421984314918518, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.061961674567670426, |
| "grad_norm": 0.7578125, |
| "learning_rate": 0.0001, |
| "loss": 1.52085280418396, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.062228750751151765, |
| "grad_norm": 0.7109375, |
| "learning_rate": 0.0001, |
| "loss": 1.5080655813217163, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.062495826934633104, |
| "grad_norm": 0.71875, |
| "learning_rate": 0.0001, |
| "loss": 1.5477678775787354, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.06276290311811444, |
| "grad_norm": 0.6640625, |
| "learning_rate": 0.0001, |
| "loss": 1.577529788017273, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.06302997930159578, |
| "grad_norm": 0.63671875, |
| "learning_rate": 0.0001, |
| "loss": 1.4905290603637695, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.06329705548507712, |
| "grad_norm": 0.671875, |
| "learning_rate": 0.0001, |
| "loss": 1.4339290857315063, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.06356413166855845, |
| "grad_norm": 0.65625, |
| "learning_rate": 0.0001, |
| "loss": 1.4818267822265625, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.0638312078520398, |
| "grad_norm": 0.69921875, |
| "learning_rate": 0.0001, |
| "loss": 1.484571099281311, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.06409828403552113, |
| "grad_norm": 0.7265625, |
| "learning_rate": 0.0001, |
| "loss": 1.4844155311584473, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.06436536021900247, |
| "grad_norm": 0.66796875, |
| "learning_rate": 0.0001, |
| "loss": 1.3982433080673218, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.06463243640248381, |
| "grad_norm": 0.66015625, |
| "learning_rate": 0.0001, |
| "loss": 1.3840627670288086, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.06489951258596514, |
| "grad_norm": 1.5, |
| "learning_rate": 0.0001, |
| "loss": 1.516506552696228, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.06516658876944649, |
| "grad_norm": 0.6796875, |
| "learning_rate": 0.0001, |
| "loss": 1.381393551826477, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.06543366495292782, |
| "grad_norm": 0.7578125, |
| "learning_rate": 0.0001, |
| "loss": 1.461839199066162, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.06570074113640915, |
| "grad_norm": 0.828125, |
| "learning_rate": 0.0001, |
| "loss": 1.5335147380828857, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.0659678173198905, |
| "grad_norm": 0.6796875, |
| "learning_rate": 0.0001, |
| "loss": 1.5478235483169556, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.06623489350337183, |
| "grad_norm": 0.65234375, |
| "learning_rate": 0.0001, |
| "loss": 1.376564383506775, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.06650196968685318, |
| "grad_norm": 0.6328125, |
| "learning_rate": 0.0001, |
| "loss": 1.3668392896652222, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.06676904587033451, |
| "grad_norm": 0.65625, |
| "learning_rate": 0.0001, |
| "loss": 1.4300906658172607, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.06703612205381584, |
| "grad_norm": 0.68359375, |
| "learning_rate": 0.0001, |
| "loss": 1.4604257345199585, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.06730319823729719, |
| "grad_norm": 0.64453125, |
| "learning_rate": 0.0001, |
| "loss": 1.4916373491287231, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.06757027442077852, |
| "grad_norm": 0.68359375, |
| "learning_rate": 0.0001, |
| "loss": 1.5384293794631958, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.06783735060425987, |
| "grad_norm": 0.64453125, |
| "learning_rate": 0.0001, |
| "loss": 1.4481867551803589, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.0681044267877412, |
| "grad_norm": 0.6328125, |
| "learning_rate": 0.0001, |
| "loss": 1.3729946613311768, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.06837150297122255, |
| "grad_norm": 0.7265625, |
| "learning_rate": 0.0001, |
| "loss": 1.4905198812484741, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.06863857915470388, |
| "grad_norm": 0.61328125, |
| "learning_rate": 0.0001, |
| "loss": 1.3511143922805786, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.06890565533818521, |
| "grad_norm": 0.6875, |
| "learning_rate": 0.0001, |
| "loss": 1.546562671661377, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.06917273152166656, |
| "grad_norm": 0.65234375, |
| "learning_rate": 0.0001, |
| "loss": 1.4856135845184326, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.06943980770514789, |
| "grad_norm": 0.6796875, |
| "learning_rate": 0.0001, |
| "loss": 1.5204493999481201, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.06970688388862924, |
| "grad_norm": 0.70703125, |
| "learning_rate": 0.0001, |
| "loss": 1.496220350265503, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.06997396007211057, |
| "grad_norm": 0.7109375, |
| "learning_rate": 0.0001, |
| "loss": 1.453678846359253, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.0702410362555919, |
| "grad_norm": 0.625, |
| "learning_rate": 0.0001, |
| "loss": 1.4709477424621582, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.07050811243907325, |
| "grad_norm": 0.65625, |
| "learning_rate": 0.0001, |
| "loss": 1.420995831489563, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.07077518862255458, |
| "grad_norm": 0.640625, |
| "learning_rate": 0.0001, |
| "loss": 1.4407060146331787, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.07104226480603593, |
| "grad_norm": 0.8203125, |
| "learning_rate": 0.0001, |
| "loss": 1.4003022909164429, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.07130934098951726, |
| "grad_norm": 0.6796875, |
| "learning_rate": 0.0001, |
| "loss": 1.4565346240997314, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.07157641717299859, |
| "grad_norm": 0.625, |
| "learning_rate": 0.0001, |
| "loss": 1.4498852491378784, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.07184349335647994, |
| "grad_norm": 0.6640625, |
| "learning_rate": 0.0001, |
| "loss": 1.437258243560791, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.07211056953996127, |
| "grad_norm": 0.625, |
| "learning_rate": 0.0001, |
| "loss": 1.4084056615829468, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.07237764572344262, |
| "grad_norm": 0.66015625, |
| "learning_rate": 0.0001, |
| "loss": 1.410775899887085, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.07264472190692395, |
| "grad_norm": 0.6328125, |
| "learning_rate": 0.0001, |
| "loss": 1.4345930814743042, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.0729117980904053, |
| "grad_norm": 0.63671875, |
| "learning_rate": 0.0001, |
| "loss": 1.431238055229187, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.07317887427388663, |
| "grad_norm": 0.68359375, |
| "learning_rate": 0.0001, |
| "loss": 1.491136074066162, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.07344595045736796, |
| "grad_norm": 0.6796875, |
| "learning_rate": 0.0001, |
| "loss": 1.3734588623046875, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.0737130266408493, |
| "grad_norm": 0.6328125, |
| "learning_rate": 0.0001, |
| "loss": 1.3641507625579834, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.07398010282433064, |
| "grad_norm": 0.65625, |
| "learning_rate": 0.0001, |
| "loss": 1.499869704246521, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.07424717900781198, |
| "grad_norm": 0.62890625, |
| "learning_rate": 0.0001, |
| "loss": 1.4527770280838013, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.07451425519129332, |
| "grad_norm": 0.6640625, |
| "learning_rate": 0.0001, |
| "loss": 1.450440526008606, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.07478133137477465, |
| "grad_norm": 0.6328125, |
| "learning_rate": 0.0001, |
| "loss": 1.3640730381011963, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.075048407558256, |
| "grad_norm": 0.7265625, |
| "learning_rate": 0.0001, |
| "loss": 1.3600690364837646, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.07531548374173733, |
| "grad_norm": 0.65625, |
| "learning_rate": 0.0001, |
| "loss": 1.3847788572311401, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.07558255992521867, |
| "grad_norm": 0.6328125, |
| "learning_rate": 0.0001, |
| "loss": 1.471301555633545, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.0758496361087, |
| "grad_norm": 0.609375, |
| "learning_rate": 0.0001, |
| "loss": 1.3216960430145264, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.07611671229218135, |
| "grad_norm": 0.62890625, |
| "learning_rate": 0.0001, |
| "loss": 1.4935619831085205, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.07638378847566268, |
| "grad_norm": 0.65234375, |
| "learning_rate": 0.0001, |
| "loss": 1.4458503723144531, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.07665086465914402, |
| "grad_norm": 0.6328125, |
| "learning_rate": 0.0001, |
| "loss": 1.3823765516281128, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.07691794084262536, |
| "grad_norm": 0.62890625, |
| "learning_rate": 0.0001, |
| "loss": 1.43092942237854, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.0771850170261067, |
| "grad_norm": 0.6171875, |
| "learning_rate": 0.0001, |
| "loss": 1.2901136875152588, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.07745209320958804, |
| "grad_norm": 0.625, |
| "learning_rate": 0.0001, |
| "loss": 1.4777882099151611, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.07771916939306937, |
| "grad_norm": 0.6328125, |
| "learning_rate": 0.0001, |
| "loss": 1.3023655414581299, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.0779862455765507, |
| "grad_norm": 0.65234375, |
| "learning_rate": 0.0001, |
| "loss": 1.4468395709991455, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.07825332176003205, |
| "grad_norm": 0.66796875, |
| "learning_rate": 0.0001, |
| "loss": 1.5395652055740356, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.07852039794351338, |
| "grad_norm": 0.6484375, |
| "learning_rate": 0.0001, |
| "loss": 1.5038352012634277, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.07878747412699473, |
| "grad_norm": 0.65234375, |
| "learning_rate": 0.0001, |
| "loss": 1.4570817947387695, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.07905455031047606, |
| "grad_norm": 0.66015625, |
| "learning_rate": 0.0001, |
| "loss": 1.4961787462234497, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.0793216264939574, |
| "grad_norm": 0.73828125, |
| "learning_rate": 0.0001, |
| "loss": 1.413039207458496, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.07958870267743874, |
| "grad_norm": 0.6171875, |
| "learning_rate": 0.0001, |
| "loss": 1.4544329643249512, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.07985577886092007, |
| "grad_norm": 0.6796875, |
| "learning_rate": 0.0001, |
| "loss": 1.4016033411026, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.08012285504440142, |
| "grad_norm": 0.63671875, |
| "learning_rate": 0.0001, |
| "loss": 1.4141845703125, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.08038993122788275, |
| "grad_norm": 0.62890625, |
| "learning_rate": 0.0001, |
| "loss": 1.300679326057434, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.0806570074113641, |
| "grad_norm": 0.66796875, |
| "learning_rate": 0.0001, |
| "loss": 1.4802281856536865, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.08092408359484543, |
| "grad_norm": 0.71875, |
| "learning_rate": 0.0001, |
| "loss": 1.4601387977600098, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.08119115977832676, |
| "grad_norm": 0.58984375, |
| "learning_rate": 0.0001, |
| "loss": 1.3691799640655518, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.08145823596180811, |
| "grad_norm": 0.66796875, |
| "learning_rate": 0.0001, |
| "loss": 1.4916398525238037, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.08172531214528944, |
| "grad_norm": 0.734375, |
| "learning_rate": 0.0001, |
| "loss": 1.3630765676498413, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.08199238832877079, |
| "grad_norm": 0.6875, |
| "learning_rate": 0.0001, |
| "loss": 1.3123859167099, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.08225946451225212, |
| "grad_norm": 0.640625, |
| "learning_rate": 0.0001, |
| "loss": 1.4827555418014526, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.08252654069573345, |
| "grad_norm": 0.640625, |
| "learning_rate": 0.0001, |
| "loss": 1.3617500066757202, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.0827936168792148, |
| "grad_norm": 0.69140625, |
| "learning_rate": 0.0001, |
| "loss": 1.5043036937713623, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.08306069306269613, |
| "grad_norm": 0.61328125, |
| "learning_rate": 0.0001, |
| "loss": 1.49618399143219, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.08332776924617748, |
| "grad_norm": 0.59765625, |
| "learning_rate": 0.0001, |
| "loss": 1.5405969619750977, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.08359484542965881, |
| "grad_norm": 0.65234375, |
| "learning_rate": 0.0001, |
| "loss": 1.458032488822937, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.08386192161314014, |
| "grad_norm": 0.609375, |
| "learning_rate": 0.0001, |
| "loss": 1.3159329891204834, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.08412899779662149, |
| "grad_norm": 0.6015625, |
| "learning_rate": 0.0001, |
| "loss": 1.497544765472412, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.08439607398010282, |
| "grad_norm": 0.6171875, |
| "learning_rate": 0.0001, |
| "loss": 1.4141039848327637, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.08466315016358417, |
| "grad_norm": 0.63671875, |
| "learning_rate": 0.0001, |
| "loss": 1.4044368267059326, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.0849302263470655, |
| "grad_norm": 0.6171875, |
| "learning_rate": 0.0001, |
| "loss": 1.4094866514205933, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.08519730253054685, |
| "grad_norm": 0.61328125, |
| "learning_rate": 0.0001, |
| "loss": 1.4708569049835205, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.08546437871402818, |
| "grad_norm": 0.62890625, |
| "learning_rate": 0.0001, |
| "loss": 1.490856409072876, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.08573145489750951, |
| "grad_norm": 0.671875, |
| "learning_rate": 0.0001, |
| "loss": 1.4941420555114746, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.08599853108099086, |
| "grad_norm": 0.66796875, |
| "learning_rate": 0.0001, |
| "loss": 1.4737300872802734, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.08626560726447219, |
| "grad_norm": 0.65625, |
| "learning_rate": 0.0001, |
| "loss": 1.4835773706436157, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.08653268344795353, |
| "grad_norm": 0.6484375, |
| "learning_rate": 0.0001, |
| "loss": 1.3013849258422852, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.08679975963143487, |
| "grad_norm": 0.59765625, |
| "learning_rate": 0.0001, |
| "loss": 1.456613540649414, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.0870668358149162, |
| "grad_norm": 0.62109375, |
| "learning_rate": 0.0001, |
| "loss": 1.4825116395950317, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.08733391199839755, |
| "grad_norm": 0.6171875, |
| "learning_rate": 0.0001, |
| "loss": 1.5193110704421997, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.08760098818187888, |
| "grad_norm": 0.6328125, |
| "learning_rate": 0.0001, |
| "loss": 1.4921228885650635, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.08786806436536022, |
| "grad_norm": 0.6328125, |
| "learning_rate": 0.0001, |
| "loss": 1.5096917152404785, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.08813514054884156, |
| "grad_norm": 0.6640625, |
| "learning_rate": 0.0001, |
| "loss": 1.40569269657135, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.08840221673232289, |
| "grad_norm": 0.62890625, |
| "learning_rate": 0.0001, |
| "loss": 1.3891390562057495, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.08866929291580423, |
| "grad_norm": 0.62109375, |
| "learning_rate": 0.0001, |
| "loss": 1.431661605834961, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.08893636909928557, |
| "grad_norm": 0.66015625, |
| "learning_rate": 0.0001, |
| "loss": 1.3885363340377808, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.08920344528276691, |
| "grad_norm": 0.640625, |
| "learning_rate": 0.0001, |
| "loss": 1.4512131214141846, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.08947052146624825, |
| "grad_norm": 0.59765625, |
| "learning_rate": 0.0001, |
| "loss": 1.404493808746338, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.08973759764972959, |
| "grad_norm": 0.61328125, |
| "learning_rate": 0.0001, |
| "loss": 1.3655177354812622, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.09000467383321092, |
| "grad_norm": 0.60546875, |
| "learning_rate": 0.0001, |
| "loss": 1.2801893949508667, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.09027175001669226, |
| "grad_norm": 0.671875, |
| "learning_rate": 0.0001, |
| "loss": 1.470276117324829, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.0905388262001736, |
| "grad_norm": 0.609375, |
| "learning_rate": 0.0001, |
| "loss": 1.4459853172302246, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.09080590238365494, |
| "grad_norm": 0.640625, |
| "learning_rate": 0.0001, |
| "loss": 1.4520962238311768, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.09107297856713628, |
| "grad_norm": 0.60546875, |
| "learning_rate": 0.0001, |
| "loss": 1.3865824937820435, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.09134005475061761, |
| "grad_norm": 0.59375, |
| "learning_rate": 0.0001, |
| "loss": 1.3370836973190308, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.09160713093409895, |
| "grad_norm": 0.58984375, |
| "learning_rate": 0.0001, |
| "loss": 1.4441577196121216, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.09187420711758029, |
| "grad_norm": 0.64453125, |
| "learning_rate": 0.0001, |
| "loss": 1.4073468446731567, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.09214128330106162, |
| "grad_norm": 0.640625, |
| "learning_rate": 0.0001, |
| "loss": 1.3208041191101074, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.09240835948454297, |
| "grad_norm": 0.66796875, |
| "learning_rate": 0.0001, |
| "loss": 1.5300800800323486, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.0926754356680243, |
| "grad_norm": 0.625, |
| "learning_rate": 0.0001, |
| "loss": 1.4317735433578491, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.09294251185150564, |
| "grad_norm": 0.58984375, |
| "learning_rate": 0.0001, |
| "loss": 1.1871222257614136, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.09320958803498698, |
| "grad_norm": 0.6015625, |
| "learning_rate": 0.0001, |
| "loss": 1.3505630493164062, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.09347666421846831, |
| "grad_norm": 0.64453125, |
| "learning_rate": 0.0001, |
| "loss": 1.4304125308990479, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.09374374040194966, |
| "grad_norm": 0.6015625, |
| "learning_rate": 0.0001, |
| "loss": 1.4102681875228882, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.09401081658543099, |
| "grad_norm": 0.625, |
| "learning_rate": 0.0001, |
| "loss": 1.49142324924469, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.09427789276891234, |
| "grad_norm": 0.6015625, |
| "learning_rate": 0.0001, |
| "loss": 1.3962408304214478, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.09454496895239367, |
| "grad_norm": 0.58984375, |
| "learning_rate": 0.0001, |
| "loss": 1.273288607597351, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.094812045135875, |
| "grad_norm": 0.6796875, |
| "learning_rate": 0.0001, |
| "loss": 1.4168131351470947, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.09507912131935635, |
| "grad_norm": 0.6171875, |
| "learning_rate": 0.0001, |
| "loss": 1.3748528957366943, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.09534619750283768, |
| "grad_norm": 0.59375, |
| "learning_rate": 0.0001, |
| "loss": 1.3892818689346313, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.09561327368631903, |
| "grad_norm": 0.65234375, |
| "learning_rate": 0.0001, |
| "loss": 1.453303575515747, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.09588034986980036, |
| "grad_norm": 0.609375, |
| "learning_rate": 0.0001, |
| "loss": 1.4077439308166504, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.09614742605328169, |
| "grad_norm": 0.59765625, |
| "learning_rate": 0.0001, |
| "loss": 1.4063971042633057, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.09641450223676304, |
| "grad_norm": 0.703125, |
| "learning_rate": 0.0001, |
| "loss": 1.4442360401153564, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.09668157842024437, |
| "grad_norm": 0.62109375, |
| "learning_rate": 0.0001, |
| "loss": 1.4110056161880493, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.09694865460372572, |
| "grad_norm": 0.609375, |
| "learning_rate": 0.0001, |
| "loss": 1.5454267263412476, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.09721573078720705, |
| "grad_norm": 0.6015625, |
| "learning_rate": 0.0001, |
| "loss": 1.4862934350967407, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.09748280697068838, |
| "grad_norm": 0.60546875, |
| "learning_rate": 0.0001, |
| "loss": 1.4219492673873901, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.09774988315416973, |
| "grad_norm": 0.58203125, |
| "learning_rate": 0.0001, |
| "loss": 1.4686615467071533, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.09801695933765106, |
| "grad_norm": 0.58203125, |
| "learning_rate": 0.0001, |
| "loss": 1.3849600553512573, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.0982840355211324, |
| "grad_norm": 0.6015625, |
| "learning_rate": 0.0001, |
| "loss": 1.442658543586731, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.09855111170461374, |
| "grad_norm": 0.6015625, |
| "learning_rate": 0.0001, |
| "loss": 1.3929274082183838, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.09881818788809509, |
| "grad_norm": 0.62109375, |
| "learning_rate": 0.0001, |
| "loss": 1.511911392211914, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.09908526407157642, |
| "grad_norm": 0.640625, |
| "learning_rate": 0.0001, |
| "loss": 1.3419064283370972, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.09935234025505775, |
| "grad_norm": 0.59765625, |
| "learning_rate": 0.0001, |
| "loss": 1.3798582553863525, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.0996194164385391, |
| "grad_norm": 0.6015625, |
| "learning_rate": 0.0001, |
| "loss": 1.4674209356307983, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.09988649262202043, |
| "grad_norm": 0.58984375, |
| "learning_rate": 0.0001, |
| "loss": 1.4218908548355103, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.10015356880550177, |
| "grad_norm": 0.59765625, |
| "learning_rate": 0.0001, |
| "loss": 1.341963529586792, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.10042064498898311, |
| "grad_norm": 0.5859375, |
| "learning_rate": 0.0001, |
| "loss": 1.4310853481292725, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.10068772117246444, |
| "grad_norm": 0.60546875, |
| "learning_rate": 0.0001, |
| "loss": 1.4752094745635986, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.10095479735594579, |
| "grad_norm": 0.60546875, |
| "learning_rate": 0.0001, |
| "loss": 1.3446142673492432, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.10122187353942712, |
| "grad_norm": 0.62109375, |
| "learning_rate": 0.0001, |
| "loss": 1.4467658996582031, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.10148894972290846, |
| "grad_norm": 0.6015625, |
| "learning_rate": 0.0001, |
| "loss": 1.4706642627716064, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.1017560259063898, |
| "grad_norm": 0.58984375, |
| "learning_rate": 0.0001, |
| "loss": 1.4760010242462158, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.10202310208987113, |
| "grad_norm": 0.6015625, |
| "learning_rate": 0.0001, |
| "loss": 1.4614394903182983, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.10229017827335248, |
| "grad_norm": 0.5859375, |
| "learning_rate": 0.0001, |
| "loss": 1.48732590675354, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.10255725445683381, |
| "grad_norm": 0.66015625, |
| "learning_rate": 0.0001, |
| "loss": 1.4869389533996582, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.10282433064031515, |
| "grad_norm": 0.60546875, |
| "learning_rate": 0.0001, |
| "loss": 1.455293893814087, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.10309140682379649, |
| "grad_norm": 0.6015625, |
| "learning_rate": 0.0001, |
| "loss": 1.412632703781128, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.10335848300727783, |
| "grad_norm": 0.640625, |
| "learning_rate": 0.0001, |
| "loss": 1.4052395820617676, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.10362555919075916, |
| "grad_norm": 0.58984375, |
| "learning_rate": 0.0001, |
| "loss": 1.3353341817855835, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.1038926353742405, |
| "grad_norm": 0.6015625, |
| "learning_rate": 0.0001, |
| "loss": 1.3652608394622803, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.10415971155772184, |
| "grad_norm": 0.578125, |
| "learning_rate": 0.0001, |
| "loss": 1.3833329677581787, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.10442678774120318, |
| "grad_norm": 0.6328125, |
| "learning_rate": 0.0001, |
| "loss": 1.3956871032714844, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.10469386392468452, |
| "grad_norm": 0.58984375, |
| "learning_rate": 0.0001, |
| "loss": 1.2527759075164795, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.10496094010816585, |
| "grad_norm": 0.609375, |
| "learning_rate": 0.0001, |
| "loss": 1.3367494344711304, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.10522801629164719, |
| "grad_norm": 0.62109375, |
| "learning_rate": 0.0001, |
| "loss": 1.4054964780807495, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.10549509247512853, |
| "grad_norm": 0.671875, |
| "learning_rate": 0.0001, |
| "loss": 1.4781296253204346, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.10576216865860986, |
| "grad_norm": 0.56640625, |
| "learning_rate": 0.0001, |
| "loss": 1.158512830734253, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.10602924484209121, |
| "grad_norm": 0.60546875, |
| "learning_rate": 0.0001, |
| "loss": 1.4767026901245117, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.10629632102557254, |
| "grad_norm": 0.58984375, |
| "learning_rate": 0.0001, |
| "loss": 1.3846932649612427, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.10656339720905389, |
| "grad_norm": 0.640625, |
| "learning_rate": 0.0001, |
| "loss": 1.434056282043457, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.10683047339253522, |
| "grad_norm": 0.609375, |
| "learning_rate": 0.0001, |
| "loss": 1.3218588829040527, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.10709754957601655, |
| "grad_norm": 0.69140625, |
| "learning_rate": 0.0001, |
| "loss": 1.4285893440246582, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.1073646257594979, |
| "grad_norm": 0.59375, |
| "learning_rate": 0.0001, |
| "loss": 1.4643278121948242, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.10763170194297923, |
| "grad_norm": 0.5859375, |
| "learning_rate": 0.0001, |
| "loss": 1.3895577192306519, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.10789877812646058, |
| "grad_norm": 0.5703125, |
| "learning_rate": 0.0001, |
| "loss": 1.4688113927841187, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.10816585430994191, |
| "grad_norm": 0.546875, |
| "learning_rate": 0.0001, |
| "loss": 1.3960481882095337, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.10843293049342324, |
| "grad_norm": 0.57421875, |
| "learning_rate": 0.0001, |
| "loss": 1.3620350360870361, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.10870000667690459, |
| "grad_norm": 0.6328125, |
| "learning_rate": 0.0001, |
| "loss": 1.308117389678955, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.10896708286038592, |
| "grad_norm": 0.62109375, |
| "learning_rate": 0.0001, |
| "loss": 1.3918156623840332, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.10923415904386727, |
| "grad_norm": 0.61328125, |
| "learning_rate": 0.0001, |
| "loss": 1.2705599069595337, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.1095012352273486, |
| "grad_norm": 0.61328125, |
| "learning_rate": 0.0001, |
| "loss": 1.3722550868988037, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.10976831141082993, |
| "grad_norm": 0.6015625, |
| "learning_rate": 0.0001, |
| "loss": 1.3725674152374268, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.11003538759431128, |
| "grad_norm": 0.5859375, |
| "learning_rate": 0.0001, |
| "loss": 1.4689661264419556, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.11030246377779261, |
| "grad_norm": 0.6640625, |
| "learning_rate": 0.0001, |
| "loss": 1.4475466012954712, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.11056953996127396, |
| "grad_norm": 0.58984375, |
| "learning_rate": 0.0001, |
| "loss": 1.310787558555603, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.11083661614475529, |
| "grad_norm": 0.63671875, |
| "learning_rate": 0.0001, |
| "loss": 1.3508751392364502, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.11110369232823664, |
| "grad_norm": 0.6015625, |
| "learning_rate": 0.0001, |
| "loss": 1.2748651504516602, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.11137076851171797, |
| "grad_norm": 0.5625, |
| "learning_rate": 0.0001, |
| "loss": 1.429402232170105, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.1116378446951993, |
| "grad_norm": 0.6015625, |
| "learning_rate": 0.0001, |
| "loss": 1.4464179277420044, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.11190492087868065, |
| "grad_norm": 0.6015625, |
| "learning_rate": 0.0001, |
| "loss": 1.5171209573745728, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.11217199706216198, |
| "grad_norm": 0.65234375, |
| "learning_rate": 0.0001, |
| "loss": 1.3573802709579468, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.11243907324564333, |
| "grad_norm": 0.5859375, |
| "learning_rate": 0.0001, |
| "loss": 1.268883228302002, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.11270614942912466, |
| "grad_norm": 0.57421875, |
| "learning_rate": 0.0001, |
| "loss": 1.4801915884017944, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.11297322561260599, |
| "grad_norm": 0.62109375, |
| "learning_rate": 0.0001, |
| "loss": 1.5770097970962524, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.11324030179608734, |
| "grad_norm": 0.6328125, |
| "learning_rate": 0.0001, |
| "loss": 1.3894881010055542, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.11350737797956867, |
| "grad_norm": 0.6796875, |
| "learning_rate": 0.0001, |
| "loss": 1.4948337078094482, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.11377445416305002, |
| "grad_norm": 0.60546875, |
| "learning_rate": 0.0001, |
| "loss": 1.4342015981674194, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.11404153034653135, |
| "grad_norm": 0.58984375, |
| "learning_rate": 0.0001, |
| "loss": 1.3664584159851074, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.11430860653001268, |
| "grad_norm": 0.67578125, |
| "learning_rate": 0.0001, |
| "loss": 1.3029652833938599, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.11457568271349403, |
| "grad_norm": 0.59375, |
| "learning_rate": 0.0001, |
| "loss": 1.3096115589141846, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.11484275889697536, |
| "grad_norm": 0.578125, |
| "learning_rate": 0.0001, |
| "loss": 1.4837114810943604, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.1151098350804567, |
| "grad_norm": 0.59375, |
| "learning_rate": 0.0001, |
| "loss": 1.4767651557922363, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.11537691126393804, |
| "grad_norm": 0.58203125, |
| "learning_rate": 0.0001, |
| "loss": 1.3935829401016235, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.11564398744741938, |
| "grad_norm": 0.625, |
| "learning_rate": 0.0001, |
| "loss": 1.4307037591934204, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.11591106363090072, |
| "grad_norm": 0.609375, |
| "learning_rate": 0.0001, |
| "loss": 1.4892133474349976, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.11617813981438205, |
| "grad_norm": 0.58203125, |
| "learning_rate": 0.0001, |
| "loss": 1.427190899848938, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.1164452159978634, |
| "grad_norm": 0.59375, |
| "learning_rate": 0.0001, |
| "loss": 1.2890833616256714, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.11671229218134473, |
| "grad_norm": 0.62109375, |
| "learning_rate": 0.0001, |
| "loss": 1.4582980871200562, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.11697936836482607, |
| "grad_norm": 0.59375, |
| "learning_rate": 0.0001, |
| "loss": 1.3579460382461548, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.1172464445483074, |
| "grad_norm": 0.57421875, |
| "learning_rate": 0.0001, |
| "loss": 1.4087233543395996, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.11751352073178874, |
| "grad_norm": 0.6015625, |
| "learning_rate": 0.0001, |
| "loss": 1.4538408517837524, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.11778059691527008, |
| "grad_norm": 0.58203125, |
| "learning_rate": 0.0001, |
| "loss": 1.4369275569915771, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.11804767309875142, |
| "grad_norm": 0.59765625, |
| "learning_rate": 0.0001, |
| "loss": 1.3921966552734375, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.11831474928223276, |
| "grad_norm": 0.58984375, |
| "learning_rate": 0.0001, |
| "loss": 1.3368277549743652, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.1185818254657141, |
| "grad_norm": 0.59765625, |
| "learning_rate": 0.0001, |
| "loss": 1.34857976436615, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.11884890164919543, |
| "grad_norm": 0.59375, |
| "learning_rate": 0.0001, |
| "loss": 1.4927914142608643, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.11911597783267677, |
| "grad_norm": 0.6328125, |
| "learning_rate": 0.0001, |
| "loss": 1.402164340019226, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.1193830540161581, |
| "grad_norm": 0.58203125, |
| "learning_rate": 0.0001, |
| "loss": 1.3122864961624146, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.11965013019963945, |
| "grad_norm": 0.62890625, |
| "learning_rate": 0.0001, |
| "loss": 1.1882364749908447, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.11991720638312078, |
| "grad_norm": 0.609375, |
| "learning_rate": 0.0001, |
| "loss": 1.371604323387146, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.12018428256660213, |
| "grad_norm": 0.5859375, |
| "learning_rate": 0.0001, |
| "loss": 1.3432501554489136, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.12045135875008346, |
| "grad_norm": 0.59765625, |
| "learning_rate": 0.0001, |
| "loss": 1.43648362159729, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.1207184349335648, |
| "grad_norm": 0.62890625, |
| "learning_rate": 0.0001, |
| "loss": 1.3512425422668457, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.12098551111704614, |
| "grad_norm": 0.61328125, |
| "learning_rate": 0.0001, |
| "loss": 1.4198323488235474, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.12125258730052747, |
| "grad_norm": 0.6015625, |
| "learning_rate": 0.0001, |
| "loss": 1.3973807096481323, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.12151966348400882, |
| "grad_norm": 0.6015625, |
| "learning_rate": 0.0001, |
| "loss": 1.4904663562774658, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.12178673966749015, |
| "grad_norm": 0.5625, |
| "learning_rate": 0.0001, |
| "loss": 1.3683151006698608, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.12205381585097148, |
| "grad_norm": 0.55859375, |
| "learning_rate": 0.0001, |
| "loss": 1.47139310836792, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.12232089203445283, |
| "grad_norm": 0.58203125, |
| "learning_rate": 0.0001, |
| "loss": 1.4727649688720703, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.12258796821793416, |
| "grad_norm": 0.68359375, |
| "learning_rate": 0.0001, |
| "loss": 1.4294495582580566, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.12285504440141551, |
| "grad_norm": 0.58984375, |
| "learning_rate": 0.0001, |
| "loss": 1.4658524990081787, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.12312212058489684, |
| "grad_norm": 0.5859375, |
| "learning_rate": 0.0001, |
| "loss": 1.457288146018982, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.12338919676837817, |
| "grad_norm": 0.55078125, |
| "learning_rate": 0.0001, |
| "loss": 1.3517018556594849, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.12365627295185952, |
| "grad_norm": 0.57421875, |
| "learning_rate": 0.0001, |
| "loss": 1.2725576162338257, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.12392334913534085, |
| "grad_norm": 0.5859375, |
| "learning_rate": 0.0001, |
| "loss": 1.3586177825927734, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.1241904253188222, |
| "grad_norm": 0.55859375, |
| "learning_rate": 0.0001, |
| "loss": 1.2883931398391724, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.12445750150230353, |
| "grad_norm": 0.62109375, |
| "learning_rate": 0.0001, |
| "loss": 1.4876344203948975, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.12472457768578488, |
| "grad_norm": 0.5859375, |
| "learning_rate": 0.0001, |
| "loss": 1.4705077409744263, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.12499165386926621, |
| "grad_norm": 0.57421875, |
| "learning_rate": 0.0001, |
| "loss": 1.412283182144165, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.12525873005274754, |
| "grad_norm": 0.57421875, |
| "learning_rate": 0.0001, |
| "loss": 1.335158348083496, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.1255258062362289, |
| "grad_norm": 0.6015625, |
| "learning_rate": 0.0001, |
| "loss": 1.3694274425506592, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.12579288241971023, |
| "grad_norm": 0.62890625, |
| "learning_rate": 0.0001, |
| "loss": 1.372456669807434, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.12605995860319155, |
| "grad_norm": 0.59375, |
| "learning_rate": 0.0001, |
| "loss": 1.4638047218322754, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.1263270347866729, |
| "grad_norm": 0.58984375, |
| "learning_rate": 0.0001, |
| "loss": 1.413342833518982, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.12659411097015424, |
| "grad_norm": 0.57421875, |
| "learning_rate": 0.0001, |
| "loss": 1.397768497467041, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.12686118715363556, |
| "grad_norm": 0.6015625, |
| "learning_rate": 0.0001, |
| "loss": 1.2481286525726318, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.1271282633371169, |
| "grad_norm": 0.59375, |
| "learning_rate": 0.0001, |
| "loss": 1.4423224925994873, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.12739533952059826, |
| "grad_norm": 0.6171875, |
| "learning_rate": 0.0001, |
| "loss": 1.3875746726989746, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.1276624157040796, |
| "grad_norm": 0.58984375, |
| "learning_rate": 0.0001, |
| "loss": 1.5474607944488525, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.12792949188756092, |
| "grad_norm": 0.56640625, |
| "learning_rate": 0.0001, |
| "loss": 1.352391242980957, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.12819656807104227, |
| "grad_norm": 0.609375, |
| "learning_rate": 0.0001, |
| "loss": 1.472024917602539, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.1284636442545236, |
| "grad_norm": 0.5703125, |
| "learning_rate": 0.0001, |
| "loss": 1.3799539804458618, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.12873072043800493, |
| "grad_norm": 0.56640625, |
| "learning_rate": 0.0001, |
| "loss": 1.3873319625854492, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.12899779662148628, |
| "grad_norm": 0.546875, |
| "learning_rate": 0.0001, |
| "loss": 1.342333197593689, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.12926487280496762, |
| "grad_norm": 0.60546875, |
| "learning_rate": 0.0001, |
| "loss": 1.3376230001449585, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.12953194898844894, |
| "grad_norm": 0.5625, |
| "learning_rate": 0.0001, |
| "loss": 1.3061014413833618, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.1297990251719303, |
| "grad_norm": 0.578125, |
| "learning_rate": 0.0001, |
| "loss": 1.4731895923614502, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.13006610135541163, |
| "grad_norm": 0.5625, |
| "learning_rate": 0.0001, |
| "loss": 1.3420498371124268, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.13033317753889298, |
| "grad_norm": 0.62109375, |
| "learning_rate": 0.0001, |
| "loss": 1.4132871627807617, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.1306002537223743, |
| "grad_norm": 0.57421875, |
| "learning_rate": 0.0001, |
| "loss": 1.4671186208724976, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.13086732990585564, |
| "grad_norm": 0.58984375, |
| "learning_rate": 0.0001, |
| "loss": 1.4150232076644897, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.131134406089337, |
| "grad_norm": 0.546875, |
| "learning_rate": 0.0001, |
| "loss": 1.3895783424377441, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.1314014822728183, |
| "grad_norm": 0.5859375, |
| "learning_rate": 0.0001, |
| "loss": 1.3676741123199463, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.13166855845629966, |
| "grad_norm": 0.625, |
| "learning_rate": 0.0001, |
| "loss": 1.384874701499939, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.131935634639781, |
| "grad_norm": 0.58203125, |
| "learning_rate": 0.0001, |
| "loss": 1.358314037322998, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.13220271082326235, |
| "grad_norm": 0.578125, |
| "learning_rate": 0.0001, |
| "loss": 1.5222220420837402, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.13246978700674367, |
| "grad_norm": 0.59765625, |
| "learning_rate": 0.0001, |
| "loss": 1.3388750553131104, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.132736863190225, |
| "grad_norm": 0.609375, |
| "learning_rate": 0.0001, |
| "loss": 1.4615256786346436, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.13300393937370636, |
| "grad_norm": 0.58984375, |
| "learning_rate": 0.0001, |
| "loss": 1.4470080137252808, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.13327101555718768, |
| "grad_norm": 0.578125, |
| "learning_rate": 0.0001, |
| "loss": 1.3600274324417114, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.13353809174066902, |
| "grad_norm": 0.5859375, |
| "learning_rate": 0.0001, |
| "loss": 1.32597017288208, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.13380516792415037, |
| "grad_norm": 0.58984375, |
| "learning_rate": 0.0001, |
| "loss": 1.4446059465408325, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.1340722441076317, |
| "grad_norm": 0.62109375, |
| "learning_rate": 0.0001, |
| "loss": 1.3945196866989136, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.13433932029111303, |
| "grad_norm": 0.6015625, |
| "learning_rate": 0.0001, |
| "loss": 1.460927128791809, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.13460639647459438, |
| "grad_norm": 0.578125, |
| "learning_rate": 0.0001, |
| "loss": 1.424298644065857, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.13487347265807573, |
| "grad_norm": 0.56640625, |
| "learning_rate": 0.0001, |
| "loss": 1.4830772876739502, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.13514054884155705, |
| "grad_norm": 0.55859375, |
| "learning_rate": 0.0001, |
| "loss": 1.3743841648101807, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.1354076250250384, |
| "grad_norm": 0.6015625, |
| "learning_rate": 0.0001, |
| "loss": 1.5224791765213013, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.13567470120851974, |
| "grad_norm": 0.58203125, |
| "learning_rate": 0.0001, |
| "loss": 1.341582179069519, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.13594177739200106, |
| "grad_norm": 0.58203125, |
| "learning_rate": 0.0001, |
| "loss": 1.3958293199539185, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.1362088535754824, |
| "grad_norm": 0.6328125, |
| "learning_rate": 0.0001, |
| "loss": 1.2593824863433838, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.13647592975896375, |
| "grad_norm": 0.60546875, |
| "learning_rate": 0.0001, |
| "loss": 1.5547127723693848, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.1367430059424451, |
| "grad_norm": 0.67578125, |
| "learning_rate": 0.0001, |
| "loss": 1.3834812641143799, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.1370100821259264, |
| "grad_norm": 0.6015625, |
| "learning_rate": 0.0001, |
| "loss": 1.4330908060073853, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.13727715830940776, |
| "grad_norm": 0.6171875, |
| "learning_rate": 0.0001, |
| "loss": 1.3858208656311035, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.1375442344928891, |
| "grad_norm": 0.5859375, |
| "learning_rate": 0.0001, |
| "loss": 1.2102665901184082, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.13781131067637042, |
| "grad_norm": 0.6328125, |
| "learning_rate": 0.0001, |
| "loss": 1.3554112911224365, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.13807838685985177, |
| "grad_norm": 0.56640625, |
| "learning_rate": 0.0001, |
| "loss": 1.2493407726287842, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.13834546304333312, |
| "grad_norm": 0.57421875, |
| "learning_rate": 0.0001, |
| "loss": 1.2981582880020142, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.13861253922681444, |
| "grad_norm": 0.5859375, |
| "learning_rate": 0.0001, |
| "loss": 1.252562165260315, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.13887961541029578, |
| "grad_norm": 0.6015625, |
| "learning_rate": 0.0001, |
| "loss": 1.4024418592453003, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.13914669159377713, |
| "grad_norm": 0.58203125, |
| "learning_rate": 0.0001, |
| "loss": 1.3612288236618042, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.13941376777725847, |
| "grad_norm": 0.59765625, |
| "learning_rate": 0.0001, |
| "loss": 1.512868881225586, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.1396808439607398, |
| "grad_norm": 0.5859375, |
| "learning_rate": 0.0001, |
| "loss": 1.375083565711975, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.13994792014422114, |
| "grad_norm": 0.640625, |
| "learning_rate": 0.0001, |
| "loss": 1.3480380773544312, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.14021499632770248, |
| "grad_norm": 0.56640625, |
| "learning_rate": 0.0001, |
| "loss": 1.3594597578048706, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.1404820725111838, |
| "grad_norm": 0.57421875, |
| "learning_rate": 0.0001, |
| "loss": 1.3115019798278809, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.14074914869466515, |
| "grad_norm": 0.54296875, |
| "learning_rate": 0.0001, |
| "loss": 1.2540839910507202, |
| "step": 527 |
| }, |
| { |
| "epoch": 0.1410162248781465, |
| "grad_norm": 0.5546875, |
| "learning_rate": 0.0001, |
| "loss": 1.3259623050689697, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.14128330106162784, |
| "grad_norm": 0.56640625, |
| "learning_rate": 0.0001, |
| "loss": 1.2489577531814575, |
| "step": 529 |
| }, |
| { |
| "epoch": 0.14155037724510916, |
| "grad_norm": 0.58984375, |
| "learning_rate": 0.0001, |
| "loss": 1.3643953800201416, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.1418174534285905, |
| "grad_norm": 0.6171875, |
| "learning_rate": 0.0001, |
| "loss": 1.4134252071380615, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.14208452961207185, |
| "grad_norm": 0.55078125, |
| "learning_rate": 0.0001, |
| "loss": 1.3410513401031494, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.14235160579555317, |
| "grad_norm": 0.62890625, |
| "learning_rate": 0.0001, |
| "loss": 1.5260295867919922, |
| "step": 533 |
| }, |
| { |
| "epoch": 0.14261868197903452, |
| "grad_norm": 0.57421875, |
| "learning_rate": 0.0001, |
| "loss": 1.39544677734375, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.14288575816251586, |
| "grad_norm": 0.6171875, |
| "learning_rate": 0.0001, |
| "loss": 1.5388062000274658, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.14315283434599718, |
| "grad_norm": 0.6015625, |
| "learning_rate": 0.0001, |
| "loss": 1.3159434795379639, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.14341991052947853, |
| "grad_norm": 0.56640625, |
| "learning_rate": 0.0001, |
| "loss": 1.3709022998809814, |
| "step": 537 |
| }, |
| { |
| "epoch": 0.14368698671295987, |
| "grad_norm": 0.59375, |
| "learning_rate": 0.0001, |
| "loss": 1.3411214351654053, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.14395406289644122, |
| "grad_norm": 0.5625, |
| "learning_rate": 0.0001, |
| "loss": 1.3669507503509521, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.14422113907992254, |
| "grad_norm": 0.5703125, |
| "learning_rate": 0.0001, |
| "loss": 1.4368212223052979, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.14448821526340389, |
| "grad_norm": 0.5703125, |
| "learning_rate": 0.0001, |
| "loss": 1.392094373703003, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.14475529144688523, |
| "grad_norm": 0.61328125, |
| "learning_rate": 0.0001, |
| "loss": 1.4218195676803589, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.14502236763036655, |
| "grad_norm": 0.58203125, |
| "learning_rate": 0.0001, |
| "loss": 1.3535877466201782, |
| "step": 543 |
| }, |
| { |
| "epoch": 0.1452894438138479, |
| "grad_norm": 0.609375, |
| "learning_rate": 0.0001, |
| "loss": 1.3613425493240356, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.14555651999732924, |
| "grad_norm": 0.5703125, |
| "learning_rate": 0.0001, |
| "loss": 1.4432311058044434, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.1458235961808106, |
| "grad_norm": 0.58203125, |
| "learning_rate": 0.0001, |
| "loss": 1.4000786542892456, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.1460906723642919, |
| "grad_norm": 0.55859375, |
| "learning_rate": 0.0001, |
| "loss": 1.4382905960083008, |
| "step": 547 |
| }, |
| { |
| "epoch": 0.14635774854777325, |
| "grad_norm": 0.56640625, |
| "learning_rate": 0.0001, |
| "loss": 1.3199044466018677, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.1466248247312546, |
| "grad_norm": 0.58984375, |
| "learning_rate": 0.0001, |
| "loss": 1.4392772912979126, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.14689190091473592, |
| "grad_norm": 0.5625, |
| "learning_rate": 0.0001, |
| "loss": 1.2513238191604614, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.14715897709821726, |
| "grad_norm": 0.5859375, |
| "learning_rate": 0.0001, |
| "loss": 1.3273593187332153, |
| "step": 551 |
| }, |
| { |
| "epoch": 0.1474260532816986, |
| "grad_norm": 0.609375, |
| "learning_rate": 0.0001, |
| "loss": 1.4180893898010254, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.14769312946517996, |
| "grad_norm": 0.5703125, |
| "learning_rate": 0.0001, |
| "loss": 1.3058419227600098, |
| "step": 553 |
| }, |
| { |
| "epoch": 0.14796020564866127, |
| "grad_norm": 0.58203125, |
| "learning_rate": 0.0001, |
| "loss": 1.3267779350280762, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.14822728183214262, |
| "grad_norm": 0.72265625, |
| "learning_rate": 0.0001, |
| "loss": 1.4524626731872559, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.14849435801562397, |
| "grad_norm": 0.6796875, |
| "learning_rate": 0.0001, |
| "loss": 1.3795995712280273, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.14876143419910529, |
| "grad_norm": 0.59375, |
| "learning_rate": 0.0001, |
| "loss": 1.3587958812713623, |
| "step": 557 |
| }, |
| { |
| "epoch": 0.14902851038258663, |
| "grad_norm": 0.5859375, |
| "learning_rate": 0.0001, |
| "loss": 1.2744011878967285, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.14929558656606798, |
| "grad_norm": 0.5859375, |
| "learning_rate": 0.0001, |
| "loss": 1.3527799844741821, |
| "step": 559 |
| }, |
| { |
| "epoch": 0.1495626627495493, |
| "grad_norm": 0.71875, |
| "learning_rate": 0.0001, |
| "loss": 1.5189881324768066, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.14982973893303064, |
| "grad_norm": 0.54296875, |
| "learning_rate": 0.0001, |
| "loss": 1.2625980377197266, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.150096815116512, |
| "grad_norm": 0.59765625, |
| "learning_rate": 0.0001, |
| "loss": 1.3527413606643677, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.15036389129999334, |
| "grad_norm": 0.5625, |
| "learning_rate": 0.0001, |
| "loss": 1.3367376327514648, |
| "step": 563 |
| }, |
| { |
| "epoch": 0.15063096748347465, |
| "grad_norm": 0.62890625, |
| "learning_rate": 0.0001, |
| "loss": 1.5792129039764404, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.150898043666956, |
| "grad_norm": 0.61328125, |
| "learning_rate": 0.0001, |
| "loss": 1.3854796886444092, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.15116511985043735, |
| "grad_norm": 0.5859375, |
| "learning_rate": 0.0001, |
| "loss": 1.3474575281143188, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.15143219603391866, |
| "grad_norm": 0.5859375, |
| "learning_rate": 0.0001, |
| "loss": 1.347915530204773, |
| "step": 567 |
| }, |
| { |
| "epoch": 0.1516992722174, |
| "grad_norm": 0.62109375, |
| "learning_rate": 0.0001, |
| "loss": 1.433038353919983, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.15196634840088136, |
| "grad_norm": 0.625, |
| "learning_rate": 0.0001, |
| "loss": 1.2815639972686768, |
| "step": 569 |
| }, |
| { |
| "epoch": 0.1522334245843627, |
| "grad_norm": 0.609375, |
| "learning_rate": 0.0001, |
| "loss": 1.4771748781204224, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.15250050076784402, |
| "grad_norm": 0.62890625, |
| "learning_rate": 0.0001, |
| "loss": 1.4043114185333252, |
| "step": 571 |
| }, |
| { |
| "epoch": 0.15276757695132537, |
| "grad_norm": 0.59765625, |
| "learning_rate": 0.0001, |
| "loss": 1.3523536920547485, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.15303465313480671, |
| "grad_norm": 0.60546875, |
| "learning_rate": 0.0001, |
| "loss": 1.347900629043579, |
| "step": 573 |
| }, |
| { |
| "epoch": 0.15330172931828803, |
| "grad_norm": 0.5625, |
| "learning_rate": 0.0001, |
| "loss": 1.3196823596954346, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.15356880550176938, |
| "grad_norm": 0.58984375, |
| "learning_rate": 0.0001, |
| "loss": 1.472100853919983, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.15383588168525072, |
| "grad_norm": 0.59765625, |
| "learning_rate": 0.0001, |
| "loss": 1.3329824209213257, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.15410295786873204, |
| "grad_norm": 0.625, |
| "learning_rate": 0.0001, |
| "loss": 1.3529117107391357, |
| "step": 577 |
| }, |
| { |
| "epoch": 0.1543700340522134, |
| "grad_norm": 0.62109375, |
| "learning_rate": 0.0001, |
| "loss": 1.507158637046814, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.15463711023569474, |
| "grad_norm": 0.55078125, |
| "learning_rate": 0.0001, |
| "loss": 1.2954974174499512, |
| "step": 579 |
| }, |
| { |
| "epoch": 0.15490418641917608, |
| "grad_norm": 0.56640625, |
| "learning_rate": 0.0001, |
| "loss": 1.4197229146957397, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.1551712626026574, |
| "grad_norm": 0.63671875, |
| "learning_rate": 0.0001, |
| "loss": 1.3419297933578491, |
| "step": 581 |
| }, |
| { |
| "epoch": 0.15543833878613875, |
| "grad_norm": 0.54296875, |
| "learning_rate": 0.0001, |
| "loss": 1.4200016260147095, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.1557054149696201, |
| "grad_norm": 0.54296875, |
| "learning_rate": 0.0001, |
| "loss": 1.434874176979065, |
| "step": 583 |
| }, |
| { |
| "epoch": 0.1559724911531014, |
| "grad_norm": 0.578125, |
| "learning_rate": 0.0001, |
| "loss": 1.4581799507141113, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.15623956733658276, |
| "grad_norm": 0.5859375, |
| "learning_rate": 0.0001, |
| "loss": 1.3460516929626465, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.1565066435200641, |
| "grad_norm": 0.609375, |
| "learning_rate": 0.0001, |
| "loss": 1.2796165943145752, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.15677371970354545, |
| "grad_norm": 0.578125, |
| "learning_rate": 0.0001, |
| "loss": 1.339218258857727, |
| "step": 587 |
| }, |
| { |
| "epoch": 0.15704079588702677, |
| "grad_norm": 0.62109375, |
| "learning_rate": 0.0001, |
| "loss": 1.4210413694381714, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.15730787207050811, |
| "grad_norm": 0.56640625, |
| "learning_rate": 0.0001, |
| "loss": 1.3302514553070068, |
| "step": 589 |
| }, |
| { |
| "epoch": 0.15757494825398946, |
| "grad_norm": 0.5625, |
| "learning_rate": 0.0001, |
| "loss": 1.2733567953109741, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.15784202443747078, |
| "grad_norm": 0.60546875, |
| "learning_rate": 0.0001, |
| "loss": 1.335167407989502, |
| "step": 591 |
| }, |
| { |
| "epoch": 0.15810910062095213, |
| "grad_norm": 0.5703125, |
| "learning_rate": 0.0001, |
| "loss": 1.3808437585830688, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.15837617680443347, |
| "grad_norm": 0.57421875, |
| "learning_rate": 0.0001, |
| "loss": 1.3993290662765503, |
| "step": 593 |
| }, |
| { |
| "epoch": 0.1586432529879148, |
| "grad_norm": 0.64453125, |
| "learning_rate": 0.0001, |
| "loss": 1.4145331382751465, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.15891032917139614, |
| "grad_norm": 0.55078125, |
| "learning_rate": 0.0001, |
| "loss": 1.3426066637039185, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.15917740535487748, |
| "grad_norm": 0.5546875, |
| "learning_rate": 0.0001, |
| "loss": 1.311093807220459, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.15944448153835883, |
| "grad_norm": 0.56640625, |
| "learning_rate": 0.0001, |
| "loss": 1.4404199123382568, |
| "step": 597 |
| }, |
| { |
| "epoch": 0.15971155772184015, |
| "grad_norm": 0.5546875, |
| "learning_rate": 0.0001, |
| "loss": 1.2987630367279053, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.1599786339053215, |
| "grad_norm": 0.578125, |
| "learning_rate": 0.0001, |
| "loss": 1.100266933441162, |
| "step": 599 |
| }, |
| { |
| "epoch": 0.16024571008880284, |
| "grad_norm": 0.53125, |
| "learning_rate": 0.0001, |
| "loss": 1.4733037948608398, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.16051278627228416, |
| "grad_norm": 0.6015625, |
| "learning_rate": 0.0001, |
| "loss": 1.4752408266067505, |
| "step": 601 |
| }, |
| { |
| "epoch": 0.1607798624557655, |
| "grad_norm": 0.5546875, |
| "learning_rate": 0.0001, |
| "loss": 1.365689992904663, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.16104693863924685, |
| "grad_norm": 0.578125, |
| "learning_rate": 0.0001, |
| "loss": 1.4368810653686523, |
| "step": 603 |
| }, |
| { |
| "epoch": 0.1613140148227282, |
| "grad_norm": 0.59375, |
| "learning_rate": 0.0001, |
| "loss": 1.3821358680725098, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.16158109100620952, |
| "grad_norm": 0.5859375, |
| "learning_rate": 0.0001, |
| "loss": 1.3334194421768188, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.16184816718969086, |
| "grad_norm": 0.58203125, |
| "learning_rate": 0.0001, |
| "loss": 1.3358609676361084, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.1621152433731722, |
| "grad_norm": 0.56640625, |
| "learning_rate": 0.0001, |
| "loss": 1.3391547203063965, |
| "step": 607 |
| }, |
| { |
| "epoch": 0.16238231955665353, |
| "grad_norm": 0.5546875, |
| "learning_rate": 0.0001, |
| "loss": 1.271005630493164, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.16264939574013487, |
| "grad_norm": 0.5546875, |
| "learning_rate": 0.0001, |
| "loss": 1.308809518814087, |
| "step": 609 |
| }, |
| { |
| "epoch": 0.16291647192361622, |
| "grad_norm": 0.6171875, |
| "learning_rate": 0.0001, |
| "loss": 1.3306488990783691, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.16318354810709754, |
| "grad_norm": 0.59375, |
| "learning_rate": 0.0001, |
| "loss": 1.3931329250335693, |
| "step": 611 |
| }, |
| { |
| "epoch": 0.16345062429057888, |
| "grad_norm": 0.546875, |
| "learning_rate": 0.0001, |
| "loss": 1.391507863998413, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.16371770047406023, |
| "grad_norm": 0.5390625, |
| "learning_rate": 0.0001, |
| "loss": 1.3093831539154053, |
| "step": 613 |
| }, |
| { |
| "epoch": 0.16398477665754158, |
| "grad_norm": 0.5703125, |
| "learning_rate": 0.0001, |
| "loss": 1.3817846775054932, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.1642518528410229, |
| "grad_norm": 0.55859375, |
| "learning_rate": 0.0001, |
| "loss": 1.2933706045150757, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.16451892902450424, |
| "grad_norm": 0.578125, |
| "learning_rate": 0.0001, |
| "loss": 1.4987157583236694, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.1647860052079856, |
| "grad_norm": 0.57421875, |
| "learning_rate": 0.0001, |
| "loss": 1.4318113327026367, |
| "step": 617 |
| }, |
| { |
| "epoch": 0.1650530813914669, |
| "grad_norm": 0.57421875, |
| "learning_rate": 0.0001, |
| "loss": 1.405381202697754, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.16532015757494825, |
| "grad_norm": 0.56640625, |
| "learning_rate": 0.0001, |
| "loss": 1.3609851598739624, |
| "step": 619 |
| }, |
| { |
| "epoch": 0.1655872337584296, |
| "grad_norm": 0.6015625, |
| "learning_rate": 0.0001, |
| "loss": 1.549910306930542, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.16585430994191094, |
| "grad_norm": 0.58984375, |
| "learning_rate": 0.0001, |
| "loss": 1.4672483205795288, |
| "step": 621 |
| }, |
| { |
| "epoch": 0.16612138612539226, |
| "grad_norm": 0.54296875, |
| "learning_rate": 0.0001, |
| "loss": 1.279062032699585, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.1663884623088736, |
| "grad_norm": 0.6171875, |
| "learning_rate": 0.0001, |
| "loss": 1.398356556892395, |
| "step": 623 |
| }, |
| { |
| "epoch": 0.16665553849235495, |
| "grad_norm": 0.56640625, |
| "learning_rate": 0.0001, |
| "loss": 1.3836430311203003, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.16692261467583627, |
| "grad_norm": 0.578125, |
| "learning_rate": 0.0001, |
| "loss": 1.4430739879608154, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.16718969085931762, |
| "grad_norm": 0.54296875, |
| "learning_rate": 0.0001, |
| "loss": 1.314014196395874, |
| "step": 626 |
| }, |
| { |
| "epoch": 0.16745676704279897, |
| "grad_norm": 0.546875, |
| "learning_rate": 0.0001, |
| "loss": 1.3610138893127441, |
| "step": 627 |
| }, |
| { |
| "epoch": 0.16772384322628028, |
| "grad_norm": 0.58984375, |
| "learning_rate": 0.0001, |
| "loss": 1.4223636388778687, |
| "step": 628 |
| }, |
| { |
| "epoch": 0.16799091940976163, |
| "grad_norm": 0.55078125, |
| "learning_rate": 0.0001, |
| "loss": 1.3471919298171997, |
| "step": 629 |
| }, |
| { |
| "epoch": 0.16825799559324298, |
| "grad_norm": 0.5546875, |
| "learning_rate": 0.0001, |
| "loss": 1.321734070777893, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.16852507177672432, |
| "grad_norm": 0.5625, |
| "learning_rate": 0.0001, |
| "loss": 1.4103505611419678, |
| "step": 631 |
| }, |
| { |
| "epoch": 0.16879214796020564, |
| "grad_norm": 0.5546875, |
| "learning_rate": 0.0001, |
| "loss": 1.366790771484375, |
| "step": 632 |
| }, |
| { |
| "epoch": 0.169059224143687, |
| "grad_norm": 0.55078125, |
| "learning_rate": 0.0001, |
| "loss": 1.3268681764602661, |
| "step": 633 |
| }, |
| { |
| "epoch": 0.16932630032716833, |
| "grad_norm": 0.578125, |
| "learning_rate": 0.0001, |
| "loss": 1.4994505643844604, |
| "step": 634 |
| }, |
| { |
| "epoch": 0.16959337651064965, |
| "grad_norm": 0.5625, |
| "learning_rate": 0.0001, |
| "loss": 1.2760584354400635, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.169860452694131, |
| "grad_norm": 0.57421875, |
| "learning_rate": 0.0001, |
| "loss": 1.5046621561050415, |
| "step": 636 |
| }, |
| { |
| "epoch": 0.17012752887761234, |
| "grad_norm": 0.578125, |
| "learning_rate": 0.0001, |
| "loss": 1.4411461353302002, |
| "step": 637 |
| }, |
| { |
| "epoch": 0.1703946050610937, |
| "grad_norm": 0.53515625, |
| "learning_rate": 0.0001, |
| "loss": 1.2349016666412354, |
| "step": 638 |
| }, |
| { |
| "epoch": 0.170661681244575, |
| "grad_norm": 0.5703125, |
| "learning_rate": 0.0001, |
| "loss": 1.327634334564209, |
| "step": 639 |
| }, |
| { |
| "epoch": 0.17092875742805635, |
| "grad_norm": 0.578125, |
| "learning_rate": 0.0001, |
| "loss": 1.3230228424072266, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.1711958336115377, |
| "grad_norm": 0.5625, |
| "learning_rate": 0.0001, |
| "loss": 1.3155494928359985, |
| "step": 641 |
| }, |
| { |
| "epoch": 0.17146290979501902, |
| "grad_norm": 0.59765625, |
| "learning_rate": 0.0001, |
| "loss": 1.2801501750946045, |
| "step": 642 |
| }, |
| { |
| "epoch": 0.17172998597850037, |
| "grad_norm": 0.5703125, |
| "learning_rate": 0.0001, |
| "loss": 1.3254103660583496, |
| "step": 643 |
| }, |
| { |
| "epoch": 0.1719970621619817, |
| "grad_norm": 0.55078125, |
| "learning_rate": 0.0001, |
| "loss": 1.4413820505142212, |
| "step": 644 |
| }, |
| { |
| "epoch": 0.17226413834546303, |
| "grad_norm": 0.578125, |
| "learning_rate": 0.0001, |
| "loss": 1.3388326168060303, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.17253121452894438, |
| "grad_norm": 0.55859375, |
| "learning_rate": 0.0001, |
| "loss": 1.4060581922531128, |
| "step": 646 |
| }, |
| { |
| "epoch": 0.17279829071242572, |
| "grad_norm": 0.625, |
| "learning_rate": 0.0001, |
| "loss": 1.4181727170944214, |
| "step": 647 |
| }, |
| { |
| "epoch": 0.17306536689590707, |
| "grad_norm": 0.578125, |
| "learning_rate": 0.0001, |
| "loss": 1.3995436429977417, |
| "step": 648 |
| }, |
| { |
| "epoch": 0.1733324430793884, |
| "grad_norm": 0.55078125, |
| "learning_rate": 0.0001, |
| "loss": 1.3278270959854126, |
| "step": 649 |
| }, |
| { |
| "epoch": 0.17359951926286973, |
| "grad_norm": 0.578125, |
| "learning_rate": 0.0001, |
| "loss": 1.396315574645996, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.17386659544635108, |
| "grad_norm": 0.57421875, |
| "learning_rate": 0.0001, |
| "loss": 1.224435806274414, |
| "step": 651 |
| }, |
| { |
| "epoch": 0.1741336716298324, |
| "grad_norm": 0.6328125, |
| "learning_rate": 0.0001, |
| "loss": 1.4477213621139526, |
| "step": 652 |
| }, |
| { |
| "epoch": 0.17440074781331374, |
| "grad_norm": 0.58984375, |
| "learning_rate": 0.0001, |
| "loss": 1.3974782228469849, |
| "step": 653 |
| }, |
| { |
| "epoch": 0.1746678239967951, |
| "grad_norm": 0.56640625, |
| "learning_rate": 0.0001, |
| "loss": 1.4719974994659424, |
| "step": 654 |
| }, |
| { |
| "epoch": 0.17493490018027644, |
| "grad_norm": 0.546875, |
| "learning_rate": 0.0001, |
| "loss": 1.3227864503860474, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.17520197636375776, |
| "grad_norm": 0.56640625, |
| "learning_rate": 0.0001, |
| "loss": 1.4213438034057617, |
| "step": 656 |
| }, |
| { |
| "epoch": 0.1754690525472391, |
| "grad_norm": 0.56640625, |
| "learning_rate": 0.0001, |
| "loss": 1.3693983554840088, |
| "step": 657 |
| }, |
| { |
| "epoch": 0.17573612873072045, |
| "grad_norm": 0.57421875, |
| "learning_rate": 0.0001, |
| "loss": 1.4880212545394897, |
| "step": 658 |
| }, |
| { |
| "epoch": 0.17600320491420177, |
| "grad_norm": 0.56640625, |
| "learning_rate": 0.0001, |
| "loss": 1.315510630607605, |
| "step": 659 |
| }, |
| { |
| "epoch": 0.1762702810976831, |
| "grad_norm": 0.5390625, |
| "learning_rate": 0.0001, |
| "loss": 1.3102097511291504, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.17653735728116446, |
| "grad_norm": 0.54296875, |
| "learning_rate": 0.0001, |
| "loss": 1.2761492729187012, |
| "step": 661 |
| }, |
| { |
| "epoch": 0.17680443346464578, |
| "grad_norm": 0.55078125, |
| "learning_rate": 0.0001, |
| "loss": 1.3073588609695435, |
| "step": 662 |
| }, |
| { |
| "epoch": 0.17707150964812712, |
| "grad_norm": 0.54296875, |
| "learning_rate": 0.0001, |
| "loss": 1.2740814685821533, |
| "step": 663 |
| }, |
| { |
| "epoch": 0.17733858583160847, |
| "grad_norm": 0.56640625, |
| "learning_rate": 0.0001, |
| "loss": 1.3133610486984253, |
| "step": 664 |
| }, |
| { |
| "epoch": 0.17760566201508982, |
| "grad_norm": 0.59375, |
| "learning_rate": 0.0001, |
| "loss": 1.2949233055114746, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.17787273819857113, |
| "grad_norm": 0.6015625, |
| "learning_rate": 0.0001, |
| "loss": 1.2975229024887085, |
| "step": 666 |
| }, |
| { |
| "epoch": 0.17813981438205248, |
| "grad_norm": 0.58203125, |
| "learning_rate": 0.0001, |
| "loss": 1.355084776878357, |
| "step": 667 |
| }, |
| { |
| "epoch": 0.17840689056553383, |
| "grad_norm": 0.58203125, |
| "learning_rate": 0.0001, |
| "loss": 1.3896042108535767, |
| "step": 668 |
| }, |
| { |
| "epoch": 0.17867396674901515, |
| "grad_norm": 0.53125, |
| "learning_rate": 0.0001, |
| "loss": 1.2886717319488525, |
| "step": 669 |
| }, |
| { |
| "epoch": 0.1789410429324965, |
| "grad_norm": 0.6015625, |
| "learning_rate": 0.0001, |
| "loss": 1.3847600221633911, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.17920811911597784, |
| "grad_norm": 0.55078125, |
| "learning_rate": 0.0001, |
| "loss": 1.3929096460342407, |
| "step": 671 |
| }, |
| { |
| "epoch": 0.17947519529945918, |
| "grad_norm": 0.6171875, |
| "learning_rate": 0.0001, |
| "loss": 1.3475611209869385, |
| "step": 672 |
| }, |
| { |
| "epoch": 0.1797422714829405, |
| "grad_norm": 0.55078125, |
| "learning_rate": 0.0001, |
| "loss": 1.5096707344055176, |
| "step": 673 |
| }, |
| { |
| "epoch": 0.18000934766642185, |
| "grad_norm": 0.625, |
| "learning_rate": 0.0001, |
| "loss": 1.4462485313415527, |
| "step": 674 |
| }, |
| { |
| "epoch": 0.1802764238499032, |
| "grad_norm": 0.5703125, |
| "learning_rate": 0.0001, |
| "loss": 1.2290098667144775, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.1805435000333845, |
| "grad_norm": 0.57421875, |
| "learning_rate": 0.0001, |
| "loss": 1.3566489219665527, |
| "step": 676 |
| }, |
| { |
| "epoch": 0.18081057621686586, |
| "grad_norm": 0.56640625, |
| "learning_rate": 0.0001, |
| "loss": 1.2263463735580444, |
| "step": 677 |
| }, |
| { |
| "epoch": 0.1810776524003472, |
| "grad_norm": 0.58984375, |
| "learning_rate": 0.0001, |
| "loss": 1.3689738512039185, |
| "step": 678 |
| }, |
| { |
| "epoch": 0.18134472858382852, |
| "grad_norm": 0.58203125, |
| "learning_rate": 0.0001, |
| "loss": 1.3112177848815918, |
| "step": 679 |
| }, |
| { |
| "epoch": 0.18161180476730987, |
| "grad_norm": 0.5703125, |
| "learning_rate": 0.0001, |
| "loss": 1.4430503845214844, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.18187888095079122, |
| "grad_norm": 0.5546875, |
| "learning_rate": 0.0001, |
| "loss": 1.3968268632888794, |
| "step": 681 |
| }, |
| { |
| "epoch": 0.18214595713427256, |
| "grad_norm": 0.5390625, |
| "learning_rate": 0.0001, |
| "loss": 1.2902494668960571, |
| "step": 682 |
| }, |
| { |
| "epoch": 0.18241303331775388, |
| "grad_norm": 0.55859375, |
| "learning_rate": 0.0001, |
| "loss": 1.368390679359436, |
| "step": 683 |
| }, |
| { |
| "epoch": 0.18268010950123523, |
| "grad_norm": 0.5546875, |
| "learning_rate": 0.0001, |
| "loss": 1.36076021194458, |
| "step": 684 |
| }, |
| { |
| "epoch": 0.18294718568471657, |
| "grad_norm": 0.58984375, |
| "learning_rate": 0.0001, |
| "loss": 1.292517900466919, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.1832142618681979, |
| "grad_norm": 0.58203125, |
| "learning_rate": 0.0001, |
| "loss": 1.3457889556884766, |
| "step": 686 |
| }, |
| { |
| "epoch": 0.18348133805167924, |
| "grad_norm": 0.57421875, |
| "learning_rate": 0.0001, |
| "loss": 1.44827139377594, |
| "step": 687 |
| }, |
| { |
| "epoch": 0.18374841423516058, |
| "grad_norm": 0.546875, |
| "learning_rate": 0.0001, |
| "loss": 1.3091659545898438, |
| "step": 688 |
| }, |
| { |
| "epoch": 0.18401549041864193, |
| "grad_norm": 0.56640625, |
| "learning_rate": 0.0001, |
| "loss": 1.3264273405075073, |
| "step": 689 |
| }, |
| { |
| "epoch": 0.18428256660212325, |
| "grad_norm": 0.546875, |
| "learning_rate": 0.0001, |
| "loss": 1.329689621925354, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.1845496427856046, |
| "grad_norm": 0.59375, |
| "learning_rate": 0.0001, |
| "loss": 1.419305443763733, |
| "step": 691 |
| }, |
| { |
| "epoch": 0.18481671896908594, |
| "grad_norm": 0.55859375, |
| "learning_rate": 0.0001, |
| "loss": 1.3605815172195435, |
| "step": 692 |
| }, |
| { |
| "epoch": 0.18508379515256726, |
| "grad_norm": 0.5546875, |
| "learning_rate": 0.0001, |
| "loss": 1.2165064811706543, |
| "step": 693 |
| }, |
| { |
| "epoch": 0.1853508713360486, |
| "grad_norm": 0.58203125, |
| "learning_rate": 0.0001, |
| "loss": 1.407888412475586, |
| "step": 694 |
| }, |
| { |
| "epoch": 0.18561794751952995, |
| "grad_norm": 0.53515625, |
| "learning_rate": 0.0001, |
| "loss": 1.3541319370269775, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.18588502370301127, |
| "grad_norm": 0.54296875, |
| "learning_rate": 0.0001, |
| "loss": 1.364072561264038, |
| "step": 696 |
| }, |
| { |
| "epoch": 0.18615209988649262, |
| "grad_norm": 0.58984375, |
| "learning_rate": 0.0001, |
| "loss": 1.3818382024765015, |
| "step": 697 |
| }, |
| { |
| "epoch": 0.18641917606997396, |
| "grad_norm": 0.55078125, |
| "learning_rate": 0.0001, |
| "loss": 1.3012504577636719, |
| "step": 698 |
| }, |
| { |
| "epoch": 0.1866862522534553, |
| "grad_norm": 0.55078125, |
| "learning_rate": 0.0001, |
| "loss": 1.2554136514663696, |
| "step": 699 |
| }, |
| { |
| "epoch": 0.18695332843693663, |
| "grad_norm": 0.55859375, |
| "learning_rate": 0.0001, |
| "loss": 1.3880873918533325, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.18722040462041797, |
| "grad_norm": 0.54296875, |
| "learning_rate": 0.0001, |
| "loss": 1.3451950550079346, |
| "step": 701 |
| }, |
| { |
| "epoch": 0.18748748080389932, |
| "grad_norm": 0.6015625, |
| "learning_rate": 0.0001, |
| "loss": 1.5112781524658203, |
| "step": 702 |
| }, |
| { |
| "epoch": 0.18775455698738064, |
| "grad_norm": 0.578125, |
| "learning_rate": 0.0001, |
| "loss": 1.3787572383880615, |
| "step": 703 |
| }, |
| { |
| "epoch": 0.18802163317086198, |
| "grad_norm": 0.5625, |
| "learning_rate": 0.0001, |
| "loss": 1.3001948595046997, |
| "step": 704 |
| }, |
| { |
| "epoch": 0.18828870935434333, |
| "grad_norm": 0.57421875, |
| "learning_rate": 0.0001, |
| "loss": 1.376787543296814, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.18855578553782468, |
| "grad_norm": 0.5546875, |
| "learning_rate": 0.0001, |
| "loss": 1.4305808544158936, |
| "step": 706 |
| }, |
| { |
| "epoch": 0.188822861721306, |
| "grad_norm": 0.5625, |
| "learning_rate": 0.0001, |
| "loss": 1.4173235893249512, |
| "step": 707 |
| }, |
| { |
| "epoch": 0.18908993790478734, |
| "grad_norm": 0.6015625, |
| "learning_rate": 0.0001, |
| "loss": 1.3962050676345825, |
| "step": 708 |
| }, |
| { |
| "epoch": 0.1893570140882687, |
| "grad_norm": 0.57421875, |
| "learning_rate": 0.0001, |
| "loss": 1.3929705619812012, |
| "step": 709 |
| }, |
| { |
| "epoch": 0.18962409027175, |
| "grad_norm": 0.59765625, |
| "learning_rate": 0.0001, |
| "loss": 1.3011528253555298, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.18989116645523135, |
| "grad_norm": 0.5703125, |
| "learning_rate": 0.0001, |
| "loss": 1.4486719369888306, |
| "step": 711 |
| }, |
| { |
| "epoch": 0.1901582426387127, |
| "grad_norm": 0.55078125, |
| "learning_rate": 0.0001, |
| "loss": 1.4150975942611694, |
| "step": 712 |
| }, |
| { |
| "epoch": 0.19042531882219402, |
| "grad_norm": 0.56640625, |
| "learning_rate": 0.0001, |
| "loss": 1.3898661136627197, |
| "step": 713 |
| }, |
| { |
| "epoch": 0.19069239500567536, |
| "grad_norm": 0.55859375, |
| "learning_rate": 0.0001, |
| "loss": 1.3499014377593994, |
| "step": 714 |
| }, |
| { |
| "epoch": 0.1909594711891567, |
| "grad_norm": 0.58203125, |
| "learning_rate": 0.0001, |
| "loss": 1.4843147993087769, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.19122654737263806, |
| "grad_norm": 0.5703125, |
| "learning_rate": 0.0001, |
| "loss": 1.37148916721344, |
| "step": 716 |
| }, |
| { |
| "epoch": 0.19149362355611937, |
| "grad_norm": 0.5546875, |
| "learning_rate": 0.0001, |
| "loss": 1.3226914405822754, |
| "step": 717 |
| }, |
| { |
| "epoch": 0.19176069973960072, |
| "grad_norm": 0.546875, |
| "learning_rate": 0.0001, |
| "loss": 1.3623806238174438, |
| "step": 718 |
| }, |
| { |
| "epoch": 0.19202777592308207, |
| "grad_norm": 0.55078125, |
| "learning_rate": 0.0001, |
| "loss": 1.3770349025726318, |
| "step": 719 |
| }, |
| { |
| "epoch": 0.19229485210656339, |
| "grad_norm": 0.55859375, |
| "learning_rate": 0.0001, |
| "loss": 1.447762370109558, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.19256192829004473, |
| "grad_norm": 0.546875, |
| "learning_rate": 0.0001, |
| "loss": 1.3057664632797241, |
| "step": 721 |
| }, |
| { |
| "epoch": 0.19282900447352608, |
| "grad_norm": 0.578125, |
| "learning_rate": 0.0001, |
| "loss": 1.3898338079452515, |
| "step": 722 |
| }, |
| { |
| "epoch": 0.19309608065700742, |
| "grad_norm": 0.53125, |
| "learning_rate": 0.0001, |
| "loss": 1.429726243019104, |
| "step": 723 |
| }, |
| { |
| "epoch": 0.19336315684048874, |
| "grad_norm": 0.55078125, |
| "learning_rate": 0.0001, |
| "loss": 1.3420591354370117, |
| "step": 724 |
| }, |
| { |
| "epoch": 0.1936302330239701, |
| "grad_norm": 0.59375, |
| "learning_rate": 0.0001, |
| "loss": 1.445176362991333, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.19389730920745143, |
| "grad_norm": 0.55859375, |
| "learning_rate": 0.0001, |
| "loss": 1.3709323406219482, |
| "step": 726 |
| }, |
| { |
| "epoch": 0.19416438539093275, |
| "grad_norm": 0.546875, |
| "learning_rate": 0.0001, |
| "loss": 1.3264213800430298, |
| "step": 727 |
| }, |
| { |
| "epoch": 0.1944314615744141, |
| "grad_norm": 0.56640625, |
| "learning_rate": 0.0001, |
| "loss": 1.388136863708496, |
| "step": 728 |
| }, |
| { |
| "epoch": 0.19469853775789545, |
| "grad_norm": 0.5625, |
| "learning_rate": 0.0001, |
| "loss": 1.3481216430664062, |
| "step": 729 |
| }, |
| { |
| "epoch": 0.19496561394137676, |
| "grad_norm": 0.5390625, |
| "learning_rate": 0.0001, |
| "loss": 1.4106545448303223, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.1952326901248581, |
| "grad_norm": 0.56640625, |
| "learning_rate": 0.0001, |
| "loss": 1.3216980695724487, |
| "step": 731 |
| }, |
| { |
| "epoch": 0.19549976630833946, |
| "grad_norm": 0.60546875, |
| "learning_rate": 0.0001, |
| "loss": 1.3771264553070068, |
| "step": 732 |
| }, |
| { |
| "epoch": 0.1957668424918208, |
| "grad_norm": 0.55859375, |
| "learning_rate": 0.0001, |
| "loss": 1.428194522857666, |
| "step": 733 |
| }, |
| { |
| "epoch": 0.19603391867530212, |
| "grad_norm": 0.60546875, |
| "learning_rate": 0.0001, |
| "loss": 1.3380928039550781, |
| "step": 734 |
| }, |
| { |
| "epoch": 0.19630099485878347, |
| "grad_norm": 0.5390625, |
| "learning_rate": 0.0001, |
| "loss": 1.40969979763031, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.1965680710422648, |
| "grad_norm": 0.52734375, |
| "learning_rate": 0.0001, |
| "loss": 1.274341106414795, |
| "step": 736 |
| }, |
| { |
| "epoch": 0.19683514722574613, |
| "grad_norm": 0.5546875, |
| "learning_rate": 0.0001, |
| "loss": 1.5402621030807495, |
| "step": 737 |
| }, |
| { |
| "epoch": 0.19710222340922748, |
| "grad_norm": 0.546875, |
| "learning_rate": 0.0001, |
| "loss": 1.297295331954956, |
| "step": 738 |
| }, |
| { |
| "epoch": 0.19736929959270882, |
| "grad_norm": 0.55078125, |
| "learning_rate": 0.0001, |
| "loss": 1.4769492149353027, |
| "step": 739 |
| }, |
| { |
| "epoch": 0.19763637577619017, |
| "grad_norm": 0.546875, |
| "learning_rate": 0.0001, |
| "loss": 1.3517801761627197, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.1979034519596715, |
| "grad_norm": 0.5546875, |
| "learning_rate": 0.0001, |
| "loss": 1.415041446685791, |
| "step": 741 |
| }, |
| { |
| "epoch": 0.19817052814315284, |
| "grad_norm": 0.578125, |
| "learning_rate": 0.0001, |
| "loss": 1.376120686531067, |
| "step": 742 |
| }, |
| { |
| "epoch": 0.19843760432663418, |
| "grad_norm": 0.546875, |
| "learning_rate": 0.0001, |
| "loss": 1.2844655513763428, |
| "step": 743 |
| }, |
| { |
| "epoch": 0.1987046805101155, |
| "grad_norm": 0.5703125, |
| "learning_rate": 0.0001, |
| "loss": 1.3740079402923584, |
| "step": 744 |
| }, |
| { |
| "epoch": 0.19897175669359685, |
| "grad_norm": 0.578125, |
| "learning_rate": 0.0001, |
| "loss": 1.3367841243743896, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.1992388328770782, |
| "grad_norm": 0.57421875, |
| "learning_rate": 0.0001, |
| "loss": 1.3713178634643555, |
| "step": 746 |
| }, |
| { |
| "epoch": 0.1995059090605595, |
| "grad_norm": 0.55859375, |
| "learning_rate": 0.0001, |
| "loss": 1.390384554862976, |
| "step": 747 |
| }, |
| { |
| "epoch": 0.19977298524404086, |
| "grad_norm": 0.58984375, |
| "learning_rate": 0.0001, |
| "loss": 1.4415326118469238, |
| "step": 748 |
| }, |
| { |
| "epoch": 0.2000400614275222, |
| "grad_norm": 0.57421875, |
| "learning_rate": 0.0001, |
| "loss": 1.3195914030075073, |
| "step": 749 |
| }, |
| { |
| "epoch": 0.20030713761100355, |
| "grad_norm": 0.5703125, |
| "learning_rate": 0.0001, |
| "loss": 1.329708456993103, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.20057421379448487, |
| "grad_norm": 0.57421875, |
| "learning_rate": 0.0001, |
| "loss": 1.363006830215454, |
| "step": 751 |
| }, |
| { |
| "epoch": 0.20084128997796621, |
| "grad_norm": 0.546875, |
| "learning_rate": 0.0001, |
| "loss": 1.2761677503585815, |
| "step": 752 |
| }, |
| { |
| "epoch": 0.20110836616144756, |
| "grad_norm": 0.5546875, |
| "learning_rate": 0.0001, |
| "loss": 1.4017164707183838, |
| "step": 753 |
| }, |
| { |
| "epoch": 0.20137544234492888, |
| "grad_norm": 0.5625, |
| "learning_rate": 0.0001, |
| "loss": 1.3224306106567383, |
| "step": 754 |
| }, |
| { |
| "epoch": 0.20164251852841023, |
| "grad_norm": 0.55859375, |
| "learning_rate": 0.0001, |
| "loss": 1.337402105331421, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.20190959471189157, |
| "grad_norm": 0.5546875, |
| "learning_rate": 0.0001, |
| "loss": 1.4196721315383911, |
| "step": 756 |
| }, |
| { |
| "epoch": 0.20217667089537292, |
| "grad_norm": 0.58203125, |
| "learning_rate": 0.0001, |
| "loss": 1.314110279083252, |
| "step": 757 |
| }, |
| { |
| "epoch": 0.20244374707885424, |
| "grad_norm": 0.5625, |
| "learning_rate": 0.0001, |
| "loss": 1.4188446998596191, |
| "step": 758 |
| }, |
| { |
| "epoch": 0.20271082326233558, |
| "grad_norm": 0.55078125, |
| "learning_rate": 0.0001, |
| "loss": 1.3670579195022583, |
| "step": 759 |
| }, |
| { |
| "epoch": 0.20297789944581693, |
| "grad_norm": 0.58203125, |
| "learning_rate": 0.0001, |
| "loss": 1.358155608177185, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.20324497562929825, |
| "grad_norm": 0.5703125, |
| "learning_rate": 0.0001, |
| "loss": 1.2619855403900146, |
| "step": 761 |
| }, |
| { |
| "epoch": 0.2035120518127796, |
| "grad_norm": 0.54296875, |
| "learning_rate": 0.0001, |
| "loss": 1.2777178287506104, |
| "step": 762 |
| }, |
| { |
| "epoch": 0.20377912799626094, |
| "grad_norm": 0.5625, |
| "learning_rate": 0.0001, |
| "loss": 1.4464168548583984, |
| "step": 763 |
| }, |
| { |
| "epoch": 0.20404620417974226, |
| "grad_norm": 0.5546875, |
| "learning_rate": 0.0001, |
| "loss": 1.2626973390579224, |
| "step": 764 |
| }, |
| { |
| "epoch": 0.2043132803632236, |
| "grad_norm": 0.58984375, |
| "learning_rate": 0.0001, |
| "loss": 1.3790322542190552, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.20458035654670495, |
| "grad_norm": 0.5703125, |
| "learning_rate": 0.0001, |
| "loss": 1.368752360343933, |
| "step": 766 |
| }, |
| { |
| "epoch": 0.2048474327301863, |
| "grad_norm": 0.5234375, |
| "learning_rate": 0.0001, |
| "loss": 1.2792086601257324, |
| "step": 767 |
| }, |
| { |
| "epoch": 0.20511450891366761, |
| "grad_norm": 0.5859375, |
| "learning_rate": 0.0001, |
| "loss": 1.3987016677856445, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.20538158509714896, |
| "grad_norm": 0.5546875, |
| "learning_rate": 0.0001, |
| "loss": 1.3528289794921875, |
| "step": 769 |
| }, |
| { |
| "epoch": 0.2056486612806303, |
| "grad_norm": 0.58984375, |
| "learning_rate": 0.0001, |
| "loss": 1.3498940467834473, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.20591573746411163, |
| "grad_norm": 0.54296875, |
| "learning_rate": 0.0001, |
| "loss": 1.277837872505188, |
| "step": 771 |
| }, |
| { |
| "epoch": 0.20618281364759297, |
| "grad_norm": 0.6171875, |
| "learning_rate": 0.0001, |
| "loss": 1.3961533308029175, |
| "step": 772 |
| }, |
| { |
| "epoch": 0.20644988983107432, |
| "grad_norm": 0.56640625, |
| "learning_rate": 0.0001, |
| "loss": 1.3514395952224731, |
| "step": 773 |
| }, |
| { |
| "epoch": 0.20671696601455566, |
| "grad_norm": 0.54296875, |
| "learning_rate": 0.0001, |
| "loss": 1.3249895572662354, |
| "step": 774 |
| }, |
| { |
| "epoch": 0.20698404219803698, |
| "grad_norm": 0.56640625, |
| "learning_rate": 0.0001, |
| "loss": 1.3612358570098877, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.20725111838151833, |
| "grad_norm": 0.5546875, |
| "learning_rate": 0.0001, |
| "loss": 1.4175899028778076, |
| "step": 776 |
| }, |
| { |
| "epoch": 0.20751819456499968, |
| "grad_norm": 0.5390625, |
| "learning_rate": 0.0001, |
| "loss": 1.3095741271972656, |
| "step": 777 |
| }, |
| { |
| "epoch": 0.207785270748481, |
| "grad_norm": 0.57421875, |
| "learning_rate": 0.0001, |
| "loss": 1.4355252981185913, |
| "step": 778 |
| }, |
| { |
| "epoch": 0.20805234693196234, |
| "grad_norm": 0.55859375, |
| "learning_rate": 0.0001, |
| "loss": 1.3619897365570068, |
| "step": 779 |
| }, |
| { |
| "epoch": 0.20831942311544369, |
| "grad_norm": 0.5546875, |
| "learning_rate": 0.0001, |
| "loss": 1.2804675102233887, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.20858649929892503, |
| "grad_norm": 0.54296875, |
| "learning_rate": 0.0001, |
| "loss": 1.3464568853378296, |
| "step": 781 |
| }, |
| { |
| "epoch": 0.20885357548240635, |
| "grad_norm": 0.54296875, |
| "learning_rate": 0.0001, |
| "loss": 1.3062856197357178, |
| "step": 782 |
| }, |
| { |
| "epoch": 0.2091206516658877, |
| "grad_norm": 0.55078125, |
| "learning_rate": 0.0001, |
| "loss": 1.3532344102859497, |
| "step": 783 |
| }, |
| { |
| "epoch": 0.20938772784936904, |
| "grad_norm": 0.546875, |
| "learning_rate": 0.0001, |
| "loss": 1.3980780839920044, |
| "step": 784 |
| }, |
| { |
| "epoch": 0.20965480403285036, |
| "grad_norm": 0.55859375, |
| "learning_rate": 0.0001, |
| "loss": 1.3094193935394287, |
| "step": 785 |
| }, |
| { |
| "epoch": 0.2099218802163317, |
| "grad_norm": 0.53125, |
| "learning_rate": 0.0001, |
| "loss": 1.436868667602539, |
| "step": 786 |
| }, |
| { |
| "epoch": 0.21018895639981305, |
| "grad_norm": 0.58984375, |
| "learning_rate": 0.0001, |
| "loss": 1.4248528480529785, |
| "step": 787 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 3933, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 787, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 5.5555284789987e+18, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|