| { |
| "best_global_step": 14798, |
| "best_metric": 0.13719096779823303, |
| "best_model_checkpoint": "/mnt/nfs/homes/penacour/my_project/./saved_models/Text/checkpoints/checkpoint-14798", |
| "epoch": 3.0, |
| "eval_steps": 500, |
| "global_step": 22197, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0067576699553993785, |
| "grad_norm": 16.06703758239746, |
| "learning_rate": 4.414414414414415e-07, |
| "loss": 1.35020751953125, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.013515339910798757, |
| "grad_norm": 15.278281211853027, |
| "learning_rate": 8.91891891891892e-07, |
| "loss": 1.359039306640625, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.020273009866198136, |
| "grad_norm": 9.957842826843262, |
| "learning_rate": 1.3423423423423422e-06, |
| "loss": 1.345301513671875, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.027030679821597514, |
| "grad_norm": 9.441655158996582, |
| "learning_rate": 1.7927927927927929e-06, |
| "loss": 1.291690673828125, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.03378834977699689, |
| "grad_norm": 11.737529754638672, |
| "learning_rate": 2.2432432432432435e-06, |
| "loss": 1.20750244140625, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.04054601973239627, |
| "grad_norm": 13.651801109313965, |
| "learning_rate": 2.693693693693694e-06, |
| "loss": 1.026903076171875, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.04730368968779565, |
| "grad_norm": 10.333759307861328, |
| "learning_rate": 3.1441441441441444e-06, |
| "loss": 0.8750057983398437, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.05406135964319503, |
| "grad_norm": 11.845670700073242, |
| "learning_rate": 3.5945945945945946e-06, |
| "loss": 0.74543701171875, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.06081902959859441, |
| "grad_norm": 20.486881256103516, |
| "learning_rate": 4.045045045045045e-06, |
| "loss": 0.627983627319336, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.06757669955399379, |
| "grad_norm": 5.46737003326416, |
| "learning_rate": 4.495495495495496e-06, |
| "loss": 0.6082597351074219, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.07433436950939316, |
| "grad_norm": 33.317134857177734, |
| "learning_rate": 4.9459459459459466e-06, |
| "loss": 0.5269870758056641, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.08109203946479254, |
| "grad_norm": 22.17811393737793, |
| "learning_rate": 5.396396396396398e-06, |
| "loss": 0.5109220123291016, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.08784970942019192, |
| "grad_norm": 65.60385131835938, |
| "learning_rate": 5.846846846846848e-06, |
| "loss": 0.47735191345214845, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.0946073793755913, |
| "grad_norm": 56.5187873840332, |
| "learning_rate": 6.297297297297298e-06, |
| "loss": 0.4838460159301758, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.10136504933099068, |
| "grad_norm": 9.734931945800781, |
| "learning_rate": 6.747747747747748e-06, |
| "loss": 0.5097962188720703, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.10812271928639006, |
| "grad_norm": 40.54719161987305, |
| "learning_rate": 7.1981981981981985e-06, |
| "loss": 0.439853515625, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.11488038924178944, |
| "grad_norm": 1.501684546470642, |
| "learning_rate": 7.648648648648649e-06, |
| "loss": 0.3697560501098633, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.12163805919718881, |
| "grad_norm": 1.2029157876968384, |
| "learning_rate": 8.0990990990991e-06, |
| "loss": 0.3332023620605469, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.12839572915258818, |
| "grad_norm": 1.7254059314727783, |
| "learning_rate": 8.549549549549551e-06, |
| "loss": 0.7150354766845703, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.13515339910798757, |
| "grad_norm": 0.1328679621219635, |
| "learning_rate": 9e-06, |
| "loss": 0.39909423828125, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.14191106906338694, |
| "grad_norm": 67.52281951904297, |
| "learning_rate": 9.450450450450451e-06, |
| "loss": 0.4741665267944336, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.14866873901878633, |
| "grad_norm": 0.11982670426368713, |
| "learning_rate": 9.900900900900902e-06, |
| "loss": 0.4088504028320312, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.1554264089741857, |
| "grad_norm": 73.58162689208984, |
| "learning_rate": 1.0351351351351353e-05, |
| "loss": 0.5978804397583007, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.16218407892958508, |
| "grad_norm": 16.60240936279297, |
| "learning_rate": 1.0801801801801803e-05, |
| "loss": 0.4865760040283203, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.16894174888498445, |
| "grad_norm": 0.07922861725091934, |
| "learning_rate": 1.1252252252252254e-05, |
| "loss": 0.47403549194335937, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.17569941884038384, |
| "grad_norm": 177.65086364746094, |
| "learning_rate": 1.1702702702702703e-05, |
| "loss": 0.43802192687988284, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.1824570887957832, |
| "grad_norm": 0.34974488615989685, |
| "learning_rate": 1.2153153153153154e-05, |
| "loss": 0.5690596008300781, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.1892147587511826, |
| "grad_norm": 0.37981438636779785, |
| "learning_rate": 1.2603603603603605e-05, |
| "loss": 0.5010957336425781, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.19597242870658196, |
| "grad_norm": 42.93001174926758, |
| "learning_rate": 1.3054054054054055e-05, |
| "loss": 0.4970842742919922, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.20273009866198136, |
| "grad_norm": 30.023513793945312, |
| "learning_rate": 1.3504504504504506e-05, |
| "loss": 0.5511152648925781, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.20948776861738072, |
| "grad_norm": 7.049088954925537, |
| "learning_rate": 1.3954954954954955e-05, |
| "loss": 0.4516510009765625, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.2162454385727801, |
| "grad_norm": 0.25647661089897156, |
| "learning_rate": 1.4405405405405406e-05, |
| "loss": 0.36651172637939455, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.22300310852817948, |
| "grad_norm": 16.453493118286133, |
| "learning_rate": 1.4855855855855856e-05, |
| "loss": 0.4177054214477539, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.22976077848357887, |
| "grad_norm": 105.5218505859375, |
| "learning_rate": 1.5306306306306307e-05, |
| "loss": 0.5324158477783203, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.23651844843897823, |
| "grad_norm": 96.71179962158203, |
| "learning_rate": 1.5756756756756756e-05, |
| "loss": 0.5433593368530274, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.24327611839437763, |
| "grad_norm": 0.2389248162508011, |
| "learning_rate": 1.620720720720721e-05, |
| "loss": 0.4678084945678711, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.250033788349777, |
| "grad_norm": 153.3388214111328, |
| "learning_rate": 1.6657657657657658e-05, |
| "loss": 0.5350112533569336, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.25679145830517636, |
| "grad_norm": 38.44563293457031, |
| "learning_rate": 1.710810810810811e-05, |
| "loss": 0.49324443817138675, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.26354912826057575, |
| "grad_norm": 21.985090255737305, |
| "learning_rate": 1.755855855855856e-05, |
| "loss": 0.4342898178100586, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.27030679821597514, |
| "grad_norm": 1.6282066106796265, |
| "learning_rate": 1.800900900900901e-05, |
| "loss": 0.3795098876953125, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.27706446817137453, |
| "grad_norm": 0.3189730644226074, |
| "learning_rate": 1.8459459459459462e-05, |
| "loss": 0.5205284881591797, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.28382213812677387, |
| "grad_norm": 119.69422912597656, |
| "learning_rate": 1.8909909909909912e-05, |
| "loss": 0.35589817047119143, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.29057980808217326, |
| "grad_norm": 0.12017817795276642, |
| "learning_rate": 1.936036036036036e-05, |
| "loss": 0.48412479400634767, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.29733747803757266, |
| "grad_norm": 0.13124582171440125, |
| "learning_rate": 1.981081081081081e-05, |
| "loss": 0.39202404022216797, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.30409514799297205, |
| "grad_norm": 0.04938481003046036, |
| "learning_rate": 1.9970966611603347e-05, |
| "loss": 0.4283562469482422, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.3108528179483714, |
| "grad_norm": 0.6524620056152344, |
| "learning_rate": 1.9920909045402215e-05, |
| "loss": 0.40030643463134763, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.3176104879037708, |
| "grad_norm": 18.973918914794922, |
| "learning_rate": 1.9870851479201083e-05, |
| "loss": 0.4120947265625, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.32436815785917017, |
| "grad_norm": 71.04777526855469, |
| "learning_rate": 1.982079391299995e-05, |
| "loss": 0.522302017211914, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.33112582781456956, |
| "grad_norm": 0.29514455795288086, |
| "learning_rate": 1.977073634679882e-05, |
| "loss": 0.4655706787109375, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.3378834977699689, |
| "grad_norm": 0.18427829444408417, |
| "learning_rate": 1.972067878059769e-05, |
| "loss": 0.28636587142944336, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.3446411677253683, |
| "grad_norm": 1.2528847455978394, |
| "learning_rate": 1.9670621214396557e-05, |
| "loss": 0.3846575164794922, |
| "step": 2550 |
| }, |
| { |
| "epoch": 0.3513988376807677, |
| "grad_norm": 2.0725510120391846, |
| "learning_rate": 1.9620563648195424e-05, |
| "loss": 0.5236722564697266, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.3581565076361671, |
| "grad_norm": 0.8594722151756287, |
| "learning_rate": 1.9570506081994295e-05, |
| "loss": 0.3829684066772461, |
| "step": 2650 |
| }, |
| { |
| "epoch": 0.3649141775915664, |
| "grad_norm": 0.1568164974451065, |
| "learning_rate": 1.9520448515793166e-05, |
| "loss": 0.4368381881713867, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.3716718475469658, |
| "grad_norm": 0.15734773874282837, |
| "learning_rate": 1.9470390949592034e-05, |
| "loss": 0.2745826721191406, |
| "step": 2750 |
| }, |
| { |
| "epoch": 0.3784295175023652, |
| "grad_norm": 0.15169839560985565, |
| "learning_rate": 1.94203333833909e-05, |
| "loss": 0.5820682525634766, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.3851871874577646, |
| "grad_norm": 0.3607238531112671, |
| "learning_rate": 1.937027581718977e-05, |
| "loss": 0.40924224853515623, |
| "step": 2850 |
| }, |
| { |
| "epoch": 0.3919448574131639, |
| "grad_norm": 26.703046798706055, |
| "learning_rate": 1.9320218250988637e-05, |
| "loss": 0.4911691284179687, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.3987025273685633, |
| "grad_norm": 4.848247528076172, |
| "learning_rate": 1.9270160684787508e-05, |
| "loss": 0.4514664840698242, |
| "step": 2950 |
| }, |
| { |
| "epoch": 0.4054601973239627, |
| "grad_norm": 0.109466053545475, |
| "learning_rate": 1.9220103118586375e-05, |
| "loss": 0.37604598999023436, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.41221786727936205, |
| "grad_norm": 16.59686851501465, |
| "learning_rate": 1.9170045552385243e-05, |
| "loss": 0.29687234878540036, |
| "step": 3050 |
| }, |
| { |
| "epoch": 0.41897553723476144, |
| "grad_norm": 21.556055068969727, |
| "learning_rate": 1.9119987986184114e-05, |
| "loss": 0.5553390121459961, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.42573320719016083, |
| "grad_norm": 0.31334683299064636, |
| "learning_rate": 1.906993041998298e-05, |
| "loss": 0.34477981567382815, |
| "step": 3150 |
| }, |
| { |
| "epoch": 0.4324908771455602, |
| "grad_norm": 0.06408923864364624, |
| "learning_rate": 1.9019872853781852e-05, |
| "loss": 0.4639363479614258, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.43924854710095956, |
| "grad_norm": 0.039293207228183746, |
| "learning_rate": 1.896981528758072e-05, |
| "loss": 0.23494468688964842, |
| "step": 3250 |
| }, |
| { |
| "epoch": 0.44600621705635896, |
| "grad_norm": 0.04438117891550064, |
| "learning_rate": 1.8919757721379588e-05, |
| "loss": 0.13365052223205567, |
| "step": 3300 |
| }, |
| { |
| "epoch": 0.45276388701175835, |
| "grad_norm": 0.36716172099113464, |
| "learning_rate": 1.8869700155178455e-05, |
| "loss": 0.28368005752563474, |
| "step": 3350 |
| }, |
| { |
| "epoch": 0.45952155696715774, |
| "grad_norm": 0.04489823430776596, |
| "learning_rate": 1.8819642588977326e-05, |
| "loss": 0.40611759185791013, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.4662792269225571, |
| "grad_norm": 16.726591110229492, |
| "learning_rate": 1.8769585022776194e-05, |
| "loss": 0.36710128784179685, |
| "step": 3450 |
| }, |
| { |
| "epoch": 0.47303689687795647, |
| "grad_norm": 14.693270683288574, |
| "learning_rate": 1.871952745657506e-05, |
| "loss": 0.37917068481445315, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.47979456683335586, |
| "grad_norm": 73.12869262695312, |
| "learning_rate": 1.8669469890373932e-05, |
| "loss": 0.2679195594787598, |
| "step": 3550 |
| }, |
| { |
| "epoch": 0.48655223678875525, |
| "grad_norm": 1.5196269750595093, |
| "learning_rate": 1.86194123241728e-05, |
| "loss": 0.4122719955444336, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.4933099067441546, |
| "grad_norm": 24.74604606628418, |
| "learning_rate": 1.8569354757971668e-05, |
| "loss": 0.25464179992675784, |
| "step": 3650 |
| }, |
| { |
| "epoch": 0.500067576699554, |
| "grad_norm": 49.48332977294922, |
| "learning_rate": 1.851929719177054e-05, |
| "loss": 0.3096816062927246, |
| "step": 3700 |
| }, |
| { |
| "epoch": 0.5068252466549533, |
| "grad_norm": 33.84089660644531, |
| "learning_rate": 1.8469239625569406e-05, |
| "loss": 0.34311264038085937, |
| "step": 3750 |
| }, |
| { |
| "epoch": 0.5135829166103527, |
| "grad_norm": 0.0433129221200943, |
| "learning_rate": 1.8419182059368274e-05, |
| "loss": 0.3423194885253906, |
| "step": 3800 |
| }, |
| { |
| "epoch": 0.5203405865657521, |
| "grad_norm": 0.5578937530517578, |
| "learning_rate": 1.836912449316714e-05, |
| "loss": 0.4252873229980469, |
| "step": 3850 |
| }, |
| { |
| "epoch": 0.5270982565211515, |
| "grad_norm": 233.93492126464844, |
| "learning_rate": 1.8319066926966012e-05, |
| "loss": 0.303822021484375, |
| "step": 3900 |
| }, |
| { |
| "epoch": 0.5338559264765509, |
| "grad_norm": 0.005285980179905891, |
| "learning_rate": 1.8269009360764883e-05, |
| "loss": 0.37970211029052736, |
| "step": 3950 |
| }, |
| { |
| "epoch": 0.5406135964319503, |
| "grad_norm": 0.14675083756446838, |
| "learning_rate": 1.821895179456375e-05, |
| "loss": 0.3994259262084961, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.5473712663873497, |
| "grad_norm": 18.797971725463867, |
| "learning_rate": 1.816889422836262e-05, |
| "loss": 0.4484566116333008, |
| "step": 4050 |
| }, |
| { |
| "epoch": 0.5541289363427491, |
| "grad_norm": 16.081249237060547, |
| "learning_rate": 1.8118836662161486e-05, |
| "loss": 0.398365364074707, |
| "step": 4100 |
| }, |
| { |
| "epoch": 0.5608866062981483, |
| "grad_norm": 0.4474808871746063, |
| "learning_rate": 1.8068779095960357e-05, |
| "loss": 0.16137269973754884, |
| "step": 4150 |
| }, |
| { |
| "epoch": 0.5676442762535477, |
| "grad_norm": 32.02878952026367, |
| "learning_rate": 1.8018721529759225e-05, |
| "loss": 0.48198543548583983, |
| "step": 4200 |
| }, |
| { |
| "epoch": 0.5744019462089471, |
| "grad_norm": 7.520394325256348, |
| "learning_rate": 1.7968663963558092e-05, |
| "loss": 0.38921875, |
| "step": 4250 |
| }, |
| { |
| "epoch": 0.5811596161643465, |
| "grad_norm": 0.21612419188022614, |
| "learning_rate": 1.791860639735696e-05, |
| "loss": 0.24774662017822266, |
| "step": 4300 |
| }, |
| { |
| "epoch": 0.5879172861197459, |
| "grad_norm": 52.803627014160156, |
| "learning_rate": 1.786854883115583e-05, |
| "loss": 0.4917910385131836, |
| "step": 4350 |
| }, |
| { |
| "epoch": 0.5946749560751453, |
| "grad_norm": 2.780160665512085, |
| "learning_rate": 1.78184912649547e-05, |
| "loss": 0.4956707763671875, |
| "step": 4400 |
| }, |
| { |
| "epoch": 0.6014326260305447, |
| "grad_norm": 60.74454116821289, |
| "learning_rate": 1.776843369875357e-05, |
| "loss": 0.38436222076416016, |
| "step": 4450 |
| }, |
| { |
| "epoch": 0.6081902959859441, |
| "grad_norm": 171.72776794433594, |
| "learning_rate": 1.7718376132552437e-05, |
| "loss": 0.339903678894043, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.6149479659413434, |
| "grad_norm": 0.21840180456638336, |
| "learning_rate": 1.7668318566351305e-05, |
| "loss": 0.35657142639160155, |
| "step": 4550 |
| }, |
| { |
| "epoch": 0.6217056358967428, |
| "grad_norm": 0.1235419437289238, |
| "learning_rate": 1.7618261000150173e-05, |
| "loss": 0.48563838958740235, |
| "step": 4600 |
| }, |
| { |
| "epoch": 0.6284633058521422, |
| "grad_norm": 45.70621109008789, |
| "learning_rate": 1.7568203433949044e-05, |
| "loss": 0.3387251281738281, |
| "step": 4650 |
| }, |
| { |
| "epoch": 0.6352209758075416, |
| "grad_norm": 16.330842971801758, |
| "learning_rate": 1.751814586774791e-05, |
| "loss": 0.26475383758544924, |
| "step": 4700 |
| }, |
| { |
| "epoch": 0.641978645762941, |
| "grad_norm": 0.13989397883415222, |
| "learning_rate": 1.746808830154678e-05, |
| "loss": 0.23626720428466796, |
| "step": 4750 |
| }, |
| { |
| "epoch": 0.6487363157183403, |
| "grad_norm": 0.2098826766014099, |
| "learning_rate": 1.7418030735345646e-05, |
| "loss": 0.3830945587158203, |
| "step": 4800 |
| }, |
| { |
| "epoch": 0.6554939856737397, |
| "grad_norm": 129.06463623046875, |
| "learning_rate": 1.7367973169144517e-05, |
| "loss": 0.3804433822631836, |
| "step": 4850 |
| }, |
| { |
| "epoch": 0.6622516556291391, |
| "grad_norm": 0.03363212198019028, |
| "learning_rate": 1.731791560294339e-05, |
| "loss": 0.360137939453125, |
| "step": 4900 |
| }, |
| { |
| "epoch": 0.6690093255845384, |
| "grad_norm": 0.03238023817539215, |
| "learning_rate": 1.7267858036742256e-05, |
| "loss": 0.2027187728881836, |
| "step": 4950 |
| }, |
| { |
| "epoch": 0.6757669955399378, |
| "grad_norm": 0.06728547066450119, |
| "learning_rate": 1.7217800470541124e-05, |
| "loss": 0.331846923828125, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.6825246654953372, |
| "grad_norm": 19.55385398864746, |
| "learning_rate": 1.716774290433999e-05, |
| "loss": 0.4320966339111328, |
| "step": 5050 |
| }, |
| { |
| "epoch": 0.6892823354507366, |
| "grad_norm": 30.989242553710938, |
| "learning_rate": 1.7117685338138862e-05, |
| "loss": 0.39179283142089844, |
| "step": 5100 |
| }, |
| { |
| "epoch": 0.696040005406136, |
| "grad_norm": 9.868060111999512, |
| "learning_rate": 1.706762777193773e-05, |
| "loss": 0.36711841583251953, |
| "step": 5150 |
| }, |
| { |
| "epoch": 0.7027976753615354, |
| "grad_norm": 0.5608569383621216, |
| "learning_rate": 1.7017570205736597e-05, |
| "loss": 0.24337669372558593, |
| "step": 5200 |
| }, |
| { |
| "epoch": 0.7095553453169348, |
| "grad_norm": 34.208763122558594, |
| "learning_rate": 1.6967512639535465e-05, |
| "loss": 0.5551647567749023, |
| "step": 5250 |
| }, |
| { |
| "epoch": 0.7163130152723342, |
| "grad_norm": 0.15128108859062195, |
| "learning_rate": 1.6917455073334336e-05, |
| "loss": 0.32145679473876954, |
| "step": 5300 |
| }, |
| { |
| "epoch": 0.7230706852277334, |
| "grad_norm": 35.46443557739258, |
| "learning_rate": 1.6867397507133204e-05, |
| "loss": 0.2589885330200195, |
| "step": 5350 |
| }, |
| { |
| "epoch": 0.7298283551831328, |
| "grad_norm": 26.979644775390625, |
| "learning_rate": 1.6817339940932075e-05, |
| "loss": 0.4622607421875, |
| "step": 5400 |
| }, |
| { |
| "epoch": 0.7365860251385322, |
| "grad_norm": 52.30873489379883, |
| "learning_rate": 1.6767282374730942e-05, |
| "loss": 0.24140811920166017, |
| "step": 5450 |
| }, |
| { |
| "epoch": 0.7433436950939316, |
| "grad_norm": 37.16353988647461, |
| "learning_rate": 1.671722480852981e-05, |
| "loss": 0.4084646224975586, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.750101365049331, |
| "grad_norm": 1.0389983654022217, |
| "learning_rate": 1.6667167242328677e-05, |
| "loss": 0.3605424118041992, |
| "step": 5550 |
| }, |
| { |
| "epoch": 0.7568590350047304, |
| "grad_norm": 27.835859298706055, |
| "learning_rate": 1.661710967612755e-05, |
| "loss": 0.3582349395751953, |
| "step": 5600 |
| }, |
| { |
| "epoch": 0.7636167049601298, |
| "grad_norm": 76.13422393798828, |
| "learning_rate": 1.6567052109926416e-05, |
| "loss": 0.407784423828125, |
| "step": 5650 |
| }, |
| { |
| "epoch": 0.7703743749155292, |
| "grad_norm": 0.39249739050865173, |
| "learning_rate": 1.6516994543725284e-05, |
| "loss": 0.39183979034423827, |
| "step": 5700 |
| }, |
| { |
| "epoch": 0.7771320448709285, |
| "grad_norm": 24.34403419494629, |
| "learning_rate": 1.6466936977524155e-05, |
| "loss": 0.2900525093078613, |
| "step": 5750 |
| }, |
| { |
| "epoch": 0.7838897148263279, |
| "grad_norm": 0.6838638186454773, |
| "learning_rate": 1.6416879411323022e-05, |
| "loss": 0.31336002349853515, |
| "step": 5800 |
| }, |
| { |
| "epoch": 0.7906473847817272, |
| "grad_norm": 0.06529181450605392, |
| "learning_rate": 1.6366821845121893e-05, |
| "loss": 0.2243809700012207, |
| "step": 5850 |
| }, |
| { |
| "epoch": 0.7974050547371266, |
| "grad_norm": 0.08979305624961853, |
| "learning_rate": 1.631676427892076e-05, |
| "loss": 0.37539905548095703, |
| "step": 5900 |
| }, |
| { |
| "epoch": 0.804162724692526, |
| "grad_norm": 0.36067715287208557, |
| "learning_rate": 1.626670671271963e-05, |
| "loss": 0.2607468795776367, |
| "step": 5950 |
| }, |
| { |
| "epoch": 0.8109203946479254, |
| "grad_norm": 0.07132972776889801, |
| "learning_rate": 1.6216649146518496e-05, |
| "loss": 0.4599153518676758, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.8176780646033248, |
| "grad_norm": 0.016791321337223053, |
| "learning_rate": 1.6166591580317367e-05, |
| "loss": 0.29411968231201174, |
| "step": 6050 |
| }, |
| { |
| "epoch": 0.8244357345587241, |
| "grad_norm": 31.486772537231445, |
| "learning_rate": 1.6116534014116235e-05, |
| "loss": 0.4410831069946289, |
| "step": 6100 |
| }, |
| { |
| "epoch": 0.8311934045141235, |
| "grad_norm": 0.12318305671215057, |
| "learning_rate": 1.6066476447915106e-05, |
| "loss": 0.3409576416015625, |
| "step": 6150 |
| }, |
| { |
| "epoch": 0.8379510744695229, |
| "grad_norm": 0.03524986654520035, |
| "learning_rate": 1.6016418881713973e-05, |
| "loss": 0.2761139106750488, |
| "step": 6200 |
| }, |
| { |
| "epoch": 0.8447087444249223, |
| "grad_norm": 0.07541833072900772, |
| "learning_rate": 1.596636131551284e-05, |
| "loss": 0.4007402420043945, |
| "step": 6250 |
| }, |
| { |
| "epoch": 0.8514664143803217, |
| "grad_norm": 0.1083996370434761, |
| "learning_rate": 1.591630374931171e-05, |
| "loss": 0.2156216812133789, |
| "step": 6300 |
| }, |
| { |
| "epoch": 0.8582240843357211, |
| "grad_norm": 17.29279899597168, |
| "learning_rate": 1.586624618311058e-05, |
| "loss": 0.2401065444946289, |
| "step": 6350 |
| }, |
| { |
| "epoch": 0.8649817542911205, |
| "grad_norm": 0.163585364818573, |
| "learning_rate": 1.5816188616909447e-05, |
| "loss": 0.27833726882934573, |
| "step": 6400 |
| }, |
| { |
| "epoch": 0.8717394242465198, |
| "grad_norm": 0.12471607327461243, |
| "learning_rate": 1.5766131050708315e-05, |
| "loss": 0.26960302352905274, |
| "step": 6450 |
| }, |
| { |
| "epoch": 0.8784970942019191, |
| "grad_norm": 0.01586587354540825, |
| "learning_rate": 1.5716073484507182e-05, |
| "loss": 0.24444808959960937, |
| "step": 6500 |
| }, |
| { |
| "epoch": 0.8852547641573185, |
| "grad_norm": 0.04108090698719025, |
| "learning_rate": 1.5666015918306053e-05, |
| "loss": 0.24059207916259764, |
| "step": 6550 |
| }, |
| { |
| "epoch": 0.8920124341127179, |
| "grad_norm": 0.6881380081176758, |
| "learning_rate": 1.5615958352104924e-05, |
| "loss": 0.2879082870483398, |
| "step": 6600 |
| }, |
| { |
| "epoch": 0.8987701040681173, |
| "grad_norm": 10.9269437789917, |
| "learning_rate": 1.5565900785903792e-05, |
| "loss": 0.3690375518798828, |
| "step": 6650 |
| }, |
| { |
| "epoch": 0.9055277740235167, |
| "grad_norm": 0.2799762785434723, |
| "learning_rate": 1.551584321970266e-05, |
| "loss": 0.37917003631591795, |
| "step": 6700 |
| }, |
| { |
| "epoch": 0.9122854439789161, |
| "grad_norm": 29.693958282470703, |
| "learning_rate": 1.5465785653501527e-05, |
| "loss": 0.2966591453552246, |
| "step": 6750 |
| }, |
| { |
| "epoch": 0.9190431139343155, |
| "grad_norm": 0.018007883802056313, |
| "learning_rate": 1.5415728087300398e-05, |
| "loss": 0.1859919548034668, |
| "step": 6800 |
| }, |
| { |
| "epoch": 0.9258007838897149, |
| "grad_norm": 61.17789077758789, |
| "learning_rate": 1.5365670521099266e-05, |
| "loss": 0.3845572662353516, |
| "step": 6850 |
| }, |
| { |
| "epoch": 0.9325584538451142, |
| "grad_norm": 20.372512817382812, |
| "learning_rate": 1.5315612954898133e-05, |
| "loss": 0.4127138900756836, |
| "step": 6900 |
| }, |
| { |
| "epoch": 0.9393161238005135, |
| "grad_norm": 10.219191551208496, |
| "learning_rate": 1.5265555388697e-05, |
| "loss": 0.31619930267333984, |
| "step": 6950 |
| }, |
| { |
| "epoch": 0.9460737937559129, |
| "grad_norm": 0.049500029534101486, |
| "learning_rate": 1.521549782249587e-05, |
| "loss": 0.3062467193603516, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.9528314637113123, |
| "grad_norm": 0.020763445645570755, |
| "learning_rate": 1.5165440256294741e-05, |
| "loss": 0.31766212463378907, |
| "step": 7050 |
| }, |
| { |
| "epoch": 0.9595891336667117, |
| "grad_norm": 0.03847825154662132, |
| "learning_rate": 1.5115382690093609e-05, |
| "loss": 0.17821809768676758, |
| "step": 7100 |
| }, |
| { |
| "epoch": 0.9663468036221111, |
| "grad_norm": 0.1949007511138916, |
| "learning_rate": 1.5065325123892478e-05, |
| "loss": 0.17827264785766603, |
| "step": 7150 |
| }, |
| { |
| "epoch": 0.9731044735775105, |
| "grad_norm": 0.04745342954993248, |
| "learning_rate": 1.5015267557691346e-05, |
| "loss": 0.25842235565185545, |
| "step": 7200 |
| }, |
| { |
| "epoch": 0.9798621435329099, |
| "grad_norm": 0.09277615696191788, |
| "learning_rate": 1.4965209991490215e-05, |
| "loss": 0.28848594665527344, |
| "step": 7250 |
| }, |
| { |
| "epoch": 0.9866198134883092, |
| "grad_norm": 107.77627563476562, |
| "learning_rate": 1.4915152425289083e-05, |
| "loss": 0.3165037727355957, |
| "step": 7300 |
| }, |
| { |
| "epoch": 0.9933774834437086, |
| "grad_norm": 0.037322066724300385, |
| "learning_rate": 1.4865094859087952e-05, |
| "loss": 0.441009521484375, |
| "step": 7350 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_accuracy": 0.9648333206194217, |
| "eval_f1": 0.9626145486984024, |
| "eval_loss": 0.17089255154132843, |
| "eval_precision": 0.945666029318037, |
| "eval_recall": 0.9801816680429397, |
| "eval_runtime": 68.5571, |
| "eval_samples_per_second": 191.213, |
| "eval_steps_per_second": 47.814, |
| "step": 7399 |
| }, |
| { |
| "epoch": 1.000135153399108, |
| "grad_norm": 0.07350551337003708, |
| "learning_rate": 1.481503729288682e-05, |
| "loss": 0.25518951416015623, |
| "step": 7400 |
| }, |
| { |
| "epoch": 1.0068928233545074, |
| "grad_norm": 0.13678599894046783, |
| "learning_rate": 1.4764979726685689e-05, |
| "loss": 0.18701498031616212, |
| "step": 7450 |
| }, |
| { |
| "epoch": 1.0136504933099066, |
| "grad_norm": 2.2193477153778076, |
| "learning_rate": 1.471492216048456e-05, |
| "loss": 0.2123020362854004, |
| "step": 7500 |
| }, |
| { |
| "epoch": 1.0204081632653061, |
| "grad_norm": 0.002282262546941638, |
| "learning_rate": 1.4664864594283427e-05, |
| "loss": 0.06725791931152343, |
| "step": 7550 |
| }, |
| { |
| "epoch": 1.0271658332207054, |
| "grad_norm": 0.018047912046313286, |
| "learning_rate": 1.4614807028082297e-05, |
| "loss": 0.05762751579284668, |
| "step": 7600 |
| }, |
| { |
| "epoch": 1.033923503176105, |
| "grad_norm": 0.018197333440184593, |
| "learning_rate": 1.4564749461881164e-05, |
| "loss": 0.2026105308532715, |
| "step": 7650 |
| }, |
| { |
| "epoch": 1.0406811731315042, |
| "grad_norm": 25.600358963012695, |
| "learning_rate": 1.4514691895680034e-05, |
| "loss": 0.3179864311218262, |
| "step": 7700 |
| }, |
| { |
| "epoch": 1.0474388430869037, |
| "grad_norm": 0.01227724552154541, |
| "learning_rate": 1.4464634329478901e-05, |
| "loss": 0.17692861557006836, |
| "step": 7750 |
| }, |
| { |
| "epoch": 1.054196513042303, |
| "grad_norm": 0.00431372644379735, |
| "learning_rate": 1.441457676327777e-05, |
| "loss": 0.16868959426879881, |
| "step": 7800 |
| }, |
| { |
| "epoch": 1.0609541829977025, |
| "grad_norm": 0.2933880686759949, |
| "learning_rate": 1.4364519197076638e-05, |
| "loss": 0.16839271545410156, |
| "step": 7850 |
| }, |
| { |
| "epoch": 1.0677118529531018, |
| "grad_norm": 0.08445457369089127, |
| "learning_rate": 1.431446163087551e-05, |
| "loss": 0.23334505081176757, |
| "step": 7900 |
| }, |
| { |
| "epoch": 1.074469522908501, |
| "grad_norm": 0.008567499928176403, |
| "learning_rate": 1.4264404064674377e-05, |
| "loss": 0.15990480422973632, |
| "step": 7950 |
| }, |
| { |
| "epoch": 1.0812271928639006, |
| "grad_norm": 0.04676676541566849, |
| "learning_rate": 1.4214346498473246e-05, |
| "loss": 0.16386571884155274, |
| "step": 8000 |
| }, |
| { |
| "epoch": 1.0879848628192998, |
| "grad_norm": 0.0389719195663929, |
| "learning_rate": 1.4164288932272114e-05, |
| "loss": 0.38469879150390623, |
| "step": 8050 |
| }, |
| { |
| "epoch": 1.0947425327746994, |
| "grad_norm": 13.620099067687988, |
| "learning_rate": 1.4114231366070983e-05, |
| "loss": 0.15156843185424804, |
| "step": 8100 |
| }, |
| { |
| "epoch": 1.1015002027300986, |
| "grad_norm": 14.422818183898926, |
| "learning_rate": 1.406417379986985e-05, |
| "loss": 0.23564334869384765, |
| "step": 8150 |
| }, |
| { |
| "epoch": 1.1082578726854981, |
| "grad_norm": 8.18301773071289, |
| "learning_rate": 1.401411623366872e-05, |
| "loss": 0.18750425338745116, |
| "step": 8200 |
| }, |
| { |
| "epoch": 1.1150155426408974, |
| "grad_norm": 15.054717063903809, |
| "learning_rate": 1.3964058667467588e-05, |
| "loss": 0.26675054550170896, |
| "step": 8250 |
| }, |
| { |
| "epoch": 1.1217732125962967, |
| "grad_norm": 0.11389277875423431, |
| "learning_rate": 1.3914001101266457e-05, |
| "loss": 0.12498929023742676, |
| "step": 8300 |
| }, |
| { |
| "epoch": 1.1285308825516962, |
| "grad_norm": 0.04972947761416435, |
| "learning_rate": 1.3863943535065328e-05, |
| "loss": 0.13664302825927735, |
| "step": 8350 |
| }, |
| { |
| "epoch": 1.1352885525070955, |
| "grad_norm": 17.056364059448242, |
| "learning_rate": 1.3813885968864195e-05, |
| "loss": 0.33427154541015625, |
| "step": 8400 |
| }, |
| { |
| "epoch": 1.142046222462495, |
| "grad_norm": 0.040144748985767365, |
| "learning_rate": 1.3763828402663065e-05, |
| "loss": 0.21294273376464845, |
| "step": 8450 |
| }, |
| { |
| "epoch": 1.1488038924178943, |
| "grad_norm": 0.18602368235588074, |
| "learning_rate": 1.3713770836461932e-05, |
| "loss": 0.18324586868286133, |
| "step": 8500 |
| }, |
| { |
| "epoch": 1.1555615623732938, |
| "grad_norm": 15.689005851745605, |
| "learning_rate": 1.3663713270260802e-05, |
| "loss": 0.2427196502685547, |
| "step": 8550 |
| }, |
| { |
| "epoch": 1.162319232328693, |
| "grad_norm": 23.67704200744629, |
| "learning_rate": 1.361365570405967e-05, |
| "loss": 0.1402696418762207, |
| "step": 8600 |
| }, |
| { |
| "epoch": 1.1690769022840923, |
| "grad_norm": 0.0033212420530617237, |
| "learning_rate": 1.3563598137858539e-05, |
| "loss": 0.12810823440551758, |
| "step": 8650 |
| }, |
| { |
| "epoch": 1.1758345722394918, |
| "grad_norm": 141.46917724609375, |
| "learning_rate": 1.3513540571657406e-05, |
| "loss": 0.30034215927124025, |
| "step": 8700 |
| }, |
| { |
| "epoch": 1.1825922421948911, |
| "grad_norm": 0.8686608672142029, |
| "learning_rate": 1.3463483005456275e-05, |
| "loss": 0.1589590072631836, |
| "step": 8750 |
| }, |
| { |
| "epoch": 1.1893499121502906, |
| "grad_norm": 0.063064344227314, |
| "learning_rate": 1.3413425439255145e-05, |
| "loss": 0.09607341766357422, |
| "step": 8800 |
| }, |
| { |
| "epoch": 1.19610758210569, |
| "grad_norm": 66.27935791015625, |
| "learning_rate": 1.3363367873054014e-05, |
| "loss": 0.2668747329711914, |
| "step": 8850 |
| }, |
| { |
| "epoch": 1.2028652520610894, |
| "grad_norm": 0.1408540904521942, |
| "learning_rate": 1.3313310306852882e-05, |
| "loss": 0.20421890258789063, |
| "step": 8900 |
| }, |
| { |
| "epoch": 1.2096229220164887, |
| "grad_norm": 0.23674733936786652, |
| "learning_rate": 1.3263252740651751e-05, |
| "loss": 0.0837314224243164, |
| "step": 8950 |
| }, |
| { |
| "epoch": 1.216380591971888, |
| "grad_norm": 0.0018713766476139426, |
| "learning_rate": 1.3213195174450619e-05, |
| "loss": 0.1790580177307129, |
| "step": 9000 |
| }, |
| { |
| "epoch": 1.2231382619272875, |
| "grad_norm": 21.07696533203125, |
| "learning_rate": 1.3163137608249488e-05, |
| "loss": 0.3062829780578613, |
| "step": 9050 |
| }, |
| { |
| "epoch": 1.2298959318826868, |
| "grad_norm": 0.0028690961189568043, |
| "learning_rate": 1.3113080042048356e-05, |
| "loss": 0.3135700798034668, |
| "step": 9100 |
| }, |
| { |
| "epoch": 1.2366536018380863, |
| "grad_norm": 0.03967598080635071, |
| "learning_rate": 1.3063022475847225e-05, |
| "loss": 0.07902798652648926, |
| "step": 9150 |
| }, |
| { |
| "epoch": 1.2434112717934855, |
| "grad_norm": 0.28184425830841064, |
| "learning_rate": 1.3012964909646092e-05, |
| "loss": 0.3031677055358887, |
| "step": 9200 |
| }, |
| { |
| "epoch": 1.250168941748885, |
| "grad_norm": 24.878887176513672, |
| "learning_rate": 1.2962907343444963e-05, |
| "loss": 0.1646289825439453, |
| "step": 9250 |
| }, |
| { |
| "epoch": 1.2569266117042843, |
| "grad_norm": 71.28195190429688, |
| "learning_rate": 1.2912849777243833e-05, |
| "loss": 0.42414035797119143, |
| "step": 9300 |
| }, |
| { |
| "epoch": 1.2636842816596836, |
| "grad_norm": 0.011118948459625244, |
| "learning_rate": 1.28627922110427e-05, |
| "loss": 0.14453999519348146, |
| "step": 9350 |
| }, |
| { |
| "epoch": 1.270441951615083, |
| "grad_norm": 1.3121373653411865, |
| "learning_rate": 1.281273464484157e-05, |
| "loss": 0.1707002067565918, |
| "step": 9400 |
| }, |
| { |
| "epoch": 1.2771996215704826, |
| "grad_norm": 0.003927276004105806, |
| "learning_rate": 1.2762677078640437e-05, |
| "loss": 0.17268417358398438, |
| "step": 9450 |
| }, |
| { |
| "epoch": 1.283957291525882, |
| "grad_norm": 0.1925577074289322, |
| "learning_rate": 1.2712619512439307e-05, |
| "loss": 0.09306806564331055, |
| "step": 9500 |
| }, |
| { |
| "epoch": 1.2907149614812812, |
| "grad_norm": 0.32519787549972534, |
| "learning_rate": 1.2662561946238174e-05, |
| "loss": 0.16181667327880858, |
| "step": 9550 |
| }, |
| { |
| "epoch": 1.2974726314366807, |
| "grad_norm": 0.09728897362947464, |
| "learning_rate": 1.2612504380037043e-05, |
| "loss": 0.15530454635620117, |
| "step": 9600 |
| }, |
| { |
| "epoch": 1.30423030139208, |
| "grad_norm": 0.5013231635093689, |
| "learning_rate": 1.2562446813835911e-05, |
| "loss": 0.2710952949523926, |
| "step": 9650 |
| }, |
| { |
| "epoch": 1.3109879713474795, |
| "grad_norm": 0.0033234574366360903, |
| "learning_rate": 1.2512389247634782e-05, |
| "loss": 0.1900665283203125, |
| "step": 9700 |
| }, |
| { |
| "epoch": 1.3177456413028787, |
| "grad_norm": 6.791006565093994, |
| "learning_rate": 1.246233168143365e-05, |
| "loss": 0.22926794052124022, |
| "step": 9750 |
| }, |
| { |
| "epoch": 1.3245033112582782, |
| "grad_norm": 0.014930491335690022, |
| "learning_rate": 1.2412274115232519e-05, |
| "loss": 0.19701622009277345, |
| "step": 9800 |
| }, |
| { |
| "epoch": 1.3312609812136775, |
| "grad_norm": 0.037327345460653305, |
| "learning_rate": 1.2362216549031387e-05, |
| "loss": 0.13671175003051758, |
| "step": 9850 |
| }, |
| { |
| "epoch": 1.3380186511690768, |
| "grad_norm": 0.11069410294294357, |
| "learning_rate": 1.2312158982830256e-05, |
| "loss": 0.2781560134887695, |
| "step": 9900 |
| }, |
| { |
| "epoch": 1.3447763211244763, |
| "grad_norm": 0.2557360827922821, |
| "learning_rate": 1.2262101416629123e-05, |
| "loss": 0.2896346664428711, |
| "step": 9950 |
| }, |
| { |
| "epoch": 1.3515339910798756, |
| "grad_norm": 0.004186369478702545, |
| "learning_rate": 1.2212043850427993e-05, |
| "loss": 0.14058467864990234, |
| "step": 10000 |
| }, |
| { |
| "epoch": 1.358291661035275, |
| "grad_norm": 0.5735684633255005, |
| "learning_rate": 1.216198628422686e-05, |
| "loss": 0.30132375717163085, |
| "step": 10050 |
| }, |
| { |
| "epoch": 1.3650493309906744, |
| "grad_norm": 0.061718910932540894, |
| "learning_rate": 1.2111928718025731e-05, |
| "loss": 0.21547920227050782, |
| "step": 10100 |
| }, |
| { |
| "epoch": 1.3718070009460739, |
| "grad_norm": 0.010497814044356346, |
| "learning_rate": 1.20618711518246e-05, |
| "loss": 0.1678770065307617, |
| "step": 10150 |
| }, |
| { |
| "epoch": 1.3785646709014732, |
| "grad_norm": 0.01137350220233202, |
| "learning_rate": 1.2011813585623468e-05, |
| "loss": 0.2268022346496582, |
| "step": 10200 |
| }, |
| { |
| "epoch": 1.3853223408568724, |
| "grad_norm": 73.45201110839844, |
| "learning_rate": 1.1961756019422338e-05, |
| "loss": 0.2648137092590332, |
| "step": 10250 |
| }, |
| { |
| "epoch": 1.392080010812272, |
| "grad_norm": 0.003874465124681592, |
| "learning_rate": 1.1911698453221205e-05, |
| "loss": 0.26919260025024416, |
| "step": 10300 |
| }, |
| { |
| "epoch": 1.3988376807676712, |
| "grad_norm": 0.04089919477701187, |
| "learning_rate": 1.1861640887020075e-05, |
| "loss": 0.29294668197631835, |
| "step": 10350 |
| }, |
| { |
| "epoch": 1.4055953507230707, |
| "grad_norm": 0.14030589163303375, |
| "learning_rate": 1.1811583320818942e-05, |
| "loss": 0.2179054832458496, |
| "step": 10400 |
| }, |
| { |
| "epoch": 1.41235302067847, |
| "grad_norm": 0.9217932820320129, |
| "learning_rate": 1.1761525754617811e-05, |
| "loss": 0.25105377197265627, |
| "step": 10450 |
| }, |
| { |
| "epoch": 1.4191106906338695, |
| "grad_norm": 185.11000061035156, |
| "learning_rate": 1.1711468188416679e-05, |
| "loss": 0.22989677429199218, |
| "step": 10500 |
| }, |
| { |
| "epoch": 1.4258683605892688, |
| "grad_norm": 29.782814025878906, |
| "learning_rate": 1.166141062221555e-05, |
| "loss": 0.20334575653076173, |
| "step": 10550 |
| }, |
| { |
| "epoch": 1.432626030544668, |
| "grad_norm": 11.781908988952637, |
| "learning_rate": 1.1611353056014418e-05, |
| "loss": 0.1873354721069336, |
| "step": 10600 |
| }, |
| { |
| "epoch": 1.4393837005000676, |
| "grad_norm": 0.017826354131102562, |
| "learning_rate": 1.1561295489813287e-05, |
| "loss": 0.11134927749633788, |
| "step": 10650 |
| }, |
| { |
| "epoch": 1.446141370455467, |
| "grad_norm": 0.03723045065999031, |
| "learning_rate": 1.1511237923612155e-05, |
| "loss": 0.19197362899780274, |
| "step": 10700 |
| }, |
| { |
| "epoch": 1.4528990404108664, |
| "grad_norm": 14.280227661132812, |
| "learning_rate": 1.1461180357411024e-05, |
| "loss": 0.23998620986938476, |
| "step": 10750 |
| }, |
| { |
| "epoch": 1.4596567103662657, |
| "grad_norm": 0.002761346288025379, |
| "learning_rate": 1.1411122791209891e-05, |
| "loss": 0.07881230354309082, |
| "step": 10800 |
| }, |
| { |
| "epoch": 1.4664143803216652, |
| "grad_norm": 0.14611680805683136, |
| "learning_rate": 1.136106522500876e-05, |
| "loss": 0.22946516036987305, |
| "step": 10850 |
| }, |
| { |
| "epoch": 1.4731720502770644, |
| "grad_norm": 2.264693021774292, |
| "learning_rate": 1.1311007658807628e-05, |
| "loss": 0.17156749725341797, |
| "step": 10900 |
| }, |
| { |
| "epoch": 1.4799297202324637, |
| "grad_norm": 0.5791627764701843, |
| "learning_rate": 1.1260950092606498e-05, |
| "loss": 0.17092864990234374, |
| "step": 10950 |
| }, |
| { |
| "epoch": 1.4866873901878632, |
| "grad_norm": 0.30093225836753845, |
| "learning_rate": 1.1210892526405369e-05, |
| "loss": 0.30757621765136717, |
| "step": 11000 |
| }, |
| { |
| "epoch": 1.4934450601432627, |
| "grad_norm": 0.008798571303486824, |
| "learning_rate": 1.1160834960204236e-05, |
| "loss": 0.17301080703735353, |
| "step": 11050 |
| }, |
| { |
| "epoch": 1.500202730098662, |
| "grad_norm": 0.026798205450177193, |
| "learning_rate": 1.1110777394003106e-05, |
| "loss": 0.15110500335693358, |
| "step": 11100 |
| }, |
| { |
| "epoch": 1.5069604000540613, |
| "grad_norm": 80.63719177246094, |
| "learning_rate": 1.1060719827801973e-05, |
| "loss": 0.2579657173156738, |
| "step": 11150 |
| }, |
| { |
| "epoch": 1.5137180700094608, |
| "grad_norm": 0.006884767208248377, |
| "learning_rate": 1.1010662261600842e-05, |
| "loss": 0.1731630325317383, |
| "step": 11200 |
| }, |
| { |
| "epoch": 1.52047573996486, |
| "grad_norm": 0.020346157252788544, |
| "learning_rate": 1.096060469539971e-05, |
| "loss": 0.08803058624267578, |
| "step": 11250 |
| }, |
| { |
| "epoch": 1.5272334099202594, |
| "grad_norm": 0.4605032801628113, |
| "learning_rate": 1.091054712919858e-05, |
| "loss": 0.3004783058166504, |
| "step": 11300 |
| }, |
| { |
| "epoch": 1.5339910798756589, |
| "grad_norm": 54.79648971557617, |
| "learning_rate": 1.0860489562997447e-05, |
| "loss": 0.2580095291137695, |
| "step": 11350 |
| }, |
| { |
| "epoch": 1.5407487498310584, |
| "grad_norm": 0.007759020198136568, |
| "learning_rate": 1.0810431996796316e-05, |
| "loss": 0.2316766357421875, |
| "step": 11400 |
| }, |
| { |
| "epoch": 1.5475064197864576, |
| "grad_norm": 1.0416474342346191, |
| "learning_rate": 1.0760374430595186e-05, |
| "loss": 0.10510608673095703, |
| "step": 11450 |
| }, |
| { |
| "epoch": 1.554264089741857, |
| "grad_norm": 0.029644185677170753, |
| "learning_rate": 1.0710316864394055e-05, |
| "loss": 0.2700320816040039, |
| "step": 11500 |
| }, |
| { |
| "epoch": 1.5610217596972564, |
| "grad_norm": 0.06986944377422333, |
| "learning_rate": 1.0660259298192923e-05, |
| "loss": 0.27375816345214843, |
| "step": 11550 |
| }, |
| { |
| "epoch": 1.5677794296526557, |
| "grad_norm": 0.0046129655092954636, |
| "learning_rate": 1.0610201731991792e-05, |
| "loss": 0.13367100715637206, |
| "step": 11600 |
| }, |
| { |
| "epoch": 1.574537099608055, |
| "grad_norm": 0.007804605178534985, |
| "learning_rate": 1.056014416579066e-05, |
| "loss": 0.2514232063293457, |
| "step": 11650 |
| }, |
| { |
| "epoch": 1.5812947695634545, |
| "grad_norm": 0.0645962581038475, |
| "learning_rate": 1.0510086599589529e-05, |
| "loss": 0.18371934890747071, |
| "step": 11700 |
| }, |
| { |
| "epoch": 1.588052439518854, |
| "grad_norm": 0.0033988540526479483, |
| "learning_rate": 1.0460029033388396e-05, |
| "loss": 0.21081953048706054, |
| "step": 11750 |
| }, |
| { |
| "epoch": 1.5948101094742533, |
| "grad_norm": 0.04411851987242699, |
| "learning_rate": 1.0409971467187266e-05, |
| "loss": 0.12850214004516602, |
| "step": 11800 |
| }, |
| { |
| "epoch": 1.6015677794296526, |
| "grad_norm": 0.018026337027549744, |
| "learning_rate": 1.0359913900986133e-05, |
| "loss": 0.1492741584777832, |
| "step": 11850 |
| }, |
| { |
| "epoch": 1.608325449385052, |
| "grad_norm": 0.09057886898517609, |
| "learning_rate": 1.0309856334785004e-05, |
| "loss": 0.1486029529571533, |
| "step": 11900 |
| }, |
| { |
| "epoch": 1.6150831193404516, |
| "grad_norm": 0.0102351950481534, |
| "learning_rate": 1.0259798768583874e-05, |
| "loss": 0.23811851501464842, |
| "step": 11950 |
| }, |
| { |
| "epoch": 1.6218407892958506, |
| "grad_norm": 0.05652283504605293, |
| "learning_rate": 1.0209741202382741e-05, |
| "loss": 0.13805081367492675, |
| "step": 12000 |
| }, |
| { |
| "epoch": 1.6285984592512501, |
| "grad_norm": 0.07080666720867157, |
| "learning_rate": 1.015968363618161e-05, |
| "loss": 0.12881503105163575, |
| "step": 12050 |
| }, |
| { |
| "epoch": 1.6353561292066496, |
| "grad_norm": 0.018375622108578682, |
| "learning_rate": 1.0109626069980478e-05, |
| "loss": 0.2675124931335449, |
| "step": 12100 |
| }, |
| { |
| "epoch": 1.642113799162049, |
| "grad_norm": 0.02257567085325718, |
| "learning_rate": 1.0059568503779347e-05, |
| "loss": 0.32150630950927733, |
| "step": 12150 |
| }, |
| { |
| "epoch": 1.6488714691174482, |
| "grad_norm": 0.1083192378282547, |
| "learning_rate": 1.0009510937578215e-05, |
| "loss": 0.23064302444458007, |
| "step": 12200 |
| }, |
| { |
| "epoch": 1.6556291390728477, |
| "grad_norm": 0.038683537393808365, |
| "learning_rate": 9.959453371377084e-06, |
| "loss": 0.30872579574584963, |
| "step": 12250 |
| }, |
| { |
| "epoch": 1.6623868090282472, |
| "grad_norm": 0.009854331612586975, |
| "learning_rate": 9.909395805175954e-06, |
| "loss": 0.2523124885559082, |
| "step": 12300 |
| }, |
| { |
| "epoch": 1.6691444789836465, |
| "grad_norm": 0.11421715468168259, |
| "learning_rate": 9.859338238974821e-06, |
| "loss": 0.22798009872436523, |
| "step": 12350 |
| }, |
| { |
| "epoch": 1.6759021489390458, |
| "grad_norm": 0.012904458679258823, |
| "learning_rate": 9.80928067277369e-06, |
| "loss": 0.11912490844726563, |
| "step": 12400 |
| }, |
| { |
| "epoch": 1.6826598188944453, |
| "grad_norm": 0.004866345319896936, |
| "learning_rate": 9.75922310657256e-06, |
| "loss": 0.17411937713623046, |
| "step": 12450 |
| }, |
| { |
| "epoch": 1.6894174888498446, |
| "grad_norm": 0.06953968107700348, |
| "learning_rate": 9.709165540371427e-06, |
| "loss": 0.15718982696533204, |
| "step": 12500 |
| }, |
| { |
| "epoch": 1.6961751588052438, |
| "grad_norm": 0.30232375860214233, |
| "learning_rate": 9.659107974170297e-06, |
| "loss": 0.3242319107055664, |
| "step": 12550 |
| }, |
| { |
| "epoch": 1.7029328287606433, |
| "grad_norm": 0.009811073541641235, |
| "learning_rate": 9.609050407969164e-06, |
| "loss": 0.16056331634521484, |
| "step": 12600 |
| }, |
| { |
| "epoch": 1.7096904987160428, |
| "grad_norm": 0.0033976894337683916, |
| "learning_rate": 9.558992841768034e-06, |
| "loss": 0.21641508102416993, |
| "step": 12650 |
| }, |
| { |
| "epoch": 1.7164481686714421, |
| "grad_norm": 0.019763845950365067, |
| "learning_rate": 9.508935275566903e-06, |
| "loss": 0.20622289657592774, |
| "step": 12700 |
| }, |
| { |
| "epoch": 1.7232058386268414, |
| "grad_norm": 0.03865963965654373, |
| "learning_rate": 9.45887770936577e-06, |
| "loss": 0.24815959930419923, |
| "step": 12750 |
| }, |
| { |
| "epoch": 1.729963508582241, |
| "grad_norm": 0.44768020510673523, |
| "learning_rate": 9.40882014316464e-06, |
| "loss": 0.12028050422668457, |
| "step": 12800 |
| }, |
| { |
| "epoch": 1.7367211785376402, |
| "grad_norm": 0.022567948326468468, |
| "learning_rate": 9.358762576963509e-06, |
| "loss": 0.22824619293212892, |
| "step": 12850 |
| }, |
| { |
| "epoch": 1.7434788484930395, |
| "grad_norm": 0.004544971976429224, |
| "learning_rate": 9.308705010762378e-06, |
| "loss": 0.12526805877685546, |
| "step": 12900 |
| }, |
| { |
| "epoch": 1.750236518448439, |
| "grad_norm": 0.21990123391151428, |
| "learning_rate": 9.258647444561246e-06, |
| "loss": 0.3063252067565918, |
| "step": 12950 |
| }, |
| { |
| "epoch": 1.7569941884038385, |
| "grad_norm": 0.0023501410614699125, |
| "learning_rate": 9.208589878360115e-06, |
| "loss": 0.25366107940673827, |
| "step": 13000 |
| }, |
| { |
| "epoch": 1.7637518583592378, |
| "grad_norm": 0.20313404500484467, |
| "learning_rate": 9.158532312158983e-06, |
| "loss": 0.1626577377319336, |
| "step": 13050 |
| }, |
| { |
| "epoch": 1.770509528314637, |
| "grad_norm": 0.14616358280181885, |
| "learning_rate": 9.108474745957852e-06, |
| "loss": 0.16987455368041993, |
| "step": 13100 |
| }, |
| { |
| "epoch": 1.7772671982700365, |
| "grad_norm": 0.010072534903883934, |
| "learning_rate": 9.058417179756722e-06, |
| "loss": 0.22326894760131835, |
| "step": 13150 |
| }, |
| { |
| "epoch": 1.7840248682254358, |
| "grad_norm": 0.01229020394384861, |
| "learning_rate": 9.00835961355559e-06, |
| "loss": 0.18657752990722656, |
| "step": 13200 |
| }, |
| { |
| "epoch": 1.790782538180835, |
| "grad_norm": 0.009742148220539093, |
| "learning_rate": 8.958302047354458e-06, |
| "loss": 0.14471253395080566, |
| "step": 13250 |
| }, |
| { |
| "epoch": 1.7975402081362346, |
| "grad_norm": 0.3649709224700928, |
| "learning_rate": 8.908244481153328e-06, |
| "loss": 0.2663186264038086, |
| "step": 13300 |
| }, |
| { |
| "epoch": 1.804297878091634, |
| "grad_norm": 0.7191339135169983, |
| "learning_rate": 8.858186914952195e-06, |
| "loss": 0.17694835662841796, |
| "step": 13350 |
| }, |
| { |
| "epoch": 1.8110555480470334, |
| "grad_norm": 0.13810834288597107, |
| "learning_rate": 8.808129348751065e-06, |
| "loss": 0.13321252822875976, |
| "step": 13400 |
| }, |
| { |
| "epoch": 1.8178132180024327, |
| "grad_norm": 0.013841865584254265, |
| "learning_rate": 8.758071782549932e-06, |
| "loss": 0.2504547882080078, |
| "step": 13450 |
| }, |
| { |
| "epoch": 1.8245708879578322, |
| "grad_norm": 0.02471834421157837, |
| "learning_rate": 8.708014216348802e-06, |
| "loss": 0.16575366973876954, |
| "step": 13500 |
| }, |
| { |
| "epoch": 1.8313285579132317, |
| "grad_norm": 0.2838708758354187, |
| "learning_rate": 8.657956650147671e-06, |
| "loss": 0.241872615814209, |
| "step": 13550 |
| }, |
| { |
| "epoch": 1.8380862278686307, |
| "grad_norm": 0.0604943223297596, |
| "learning_rate": 8.607899083946539e-06, |
| "loss": 0.17180231094360351, |
| "step": 13600 |
| }, |
| { |
| "epoch": 1.8448438978240302, |
| "grad_norm": 70.3255844116211, |
| "learning_rate": 8.557841517745408e-06, |
| "loss": 0.1580354404449463, |
| "step": 13650 |
| }, |
| { |
| "epoch": 1.8516015677794297, |
| "grad_norm": 3.8454079627990723, |
| "learning_rate": 8.507783951544275e-06, |
| "loss": 0.13566259384155274, |
| "step": 13700 |
| }, |
| { |
| "epoch": 1.858359237734829, |
| "grad_norm": 0.02096381038427353, |
| "learning_rate": 8.457726385343146e-06, |
| "loss": 0.23864656448364258, |
| "step": 13750 |
| }, |
| { |
| "epoch": 1.8651169076902283, |
| "grad_norm": 0.010230190120637417, |
| "learning_rate": 8.407668819142014e-06, |
| "loss": 0.18619832992553711, |
| "step": 13800 |
| }, |
| { |
| "epoch": 1.8718745776456278, |
| "grad_norm": 0.1908511072397232, |
| "learning_rate": 8.357611252940883e-06, |
| "loss": 0.2673153877258301, |
| "step": 13850 |
| }, |
| { |
| "epoch": 1.8786322476010273, |
| "grad_norm": 11.70110034942627, |
| "learning_rate": 8.307553686739751e-06, |
| "loss": 0.2062428665161133, |
| "step": 13900 |
| }, |
| { |
| "epoch": 1.8853899175564266, |
| "grad_norm": 24.96676254272461, |
| "learning_rate": 8.25749612053862e-06, |
| "loss": 0.20080638885498048, |
| "step": 13950 |
| }, |
| { |
| "epoch": 1.8921475875118259, |
| "grad_norm": 0.05246945098042488, |
| "learning_rate": 8.20743855433749e-06, |
| "loss": 0.11393006324768067, |
| "step": 14000 |
| }, |
| { |
| "epoch": 1.8989052574672254, |
| "grad_norm": 0.15934395790100098, |
| "learning_rate": 8.157380988136357e-06, |
| "loss": 0.11714041709899903, |
| "step": 14050 |
| }, |
| { |
| "epoch": 1.9056629274226247, |
| "grad_norm": 0.30261462926864624, |
| "learning_rate": 8.107323421935226e-06, |
| "loss": 0.21116409301757813, |
| "step": 14100 |
| }, |
| { |
| "epoch": 1.912420597378024, |
| "grad_norm": 0.02992912568151951, |
| "learning_rate": 8.057265855734094e-06, |
| "loss": 0.2957320213317871, |
| "step": 14150 |
| }, |
| { |
| "epoch": 1.9191782673334234, |
| "grad_norm": 556.4647216796875, |
| "learning_rate": 8.007208289532963e-06, |
| "loss": 0.14229880332946776, |
| "step": 14200 |
| }, |
| { |
| "epoch": 1.925935937288823, |
| "grad_norm": 0.35265979170799255, |
| "learning_rate": 7.957150723331833e-06, |
| "loss": 0.2273382568359375, |
| "step": 14250 |
| }, |
| { |
| "epoch": 1.9326936072442222, |
| "grad_norm": 0.0018637154716998339, |
| "learning_rate": 7.9070931571307e-06, |
| "loss": 0.1868155097961426, |
| "step": 14300 |
| }, |
| { |
| "epoch": 1.9394512771996215, |
| "grad_norm": 0.007218921557068825, |
| "learning_rate": 7.85703559092957e-06, |
| "loss": 0.18037618637084962, |
| "step": 14350 |
| }, |
| { |
| "epoch": 1.946208947155021, |
| "grad_norm": 0.6409568190574646, |
| "learning_rate": 7.806978024728439e-06, |
| "loss": 0.25310573577880857, |
| "step": 14400 |
| }, |
| { |
| "epoch": 1.9529666171104203, |
| "grad_norm": 0.010283703915774822, |
| "learning_rate": 7.756920458527306e-06, |
| "loss": 0.17320125579833984, |
| "step": 14450 |
| }, |
| { |
| "epoch": 1.9597242870658196, |
| "grad_norm": 0.012976414524018764, |
| "learning_rate": 7.706862892326176e-06, |
| "loss": 0.2135805892944336, |
| "step": 14500 |
| }, |
| { |
| "epoch": 1.966481957021219, |
| "grad_norm": 0.0458478182554245, |
| "learning_rate": 7.656805326125043e-06, |
| "loss": 0.1271858787536621, |
| "step": 14550 |
| }, |
| { |
| "epoch": 1.9732396269766186, |
| "grad_norm": 45.8770637512207, |
| "learning_rate": 7.606747759923913e-06, |
| "loss": 0.2798696327209473, |
| "step": 14600 |
| }, |
| { |
| "epoch": 1.9799972969320179, |
| "grad_norm": 0.010839959606528282, |
| "learning_rate": 7.556690193722782e-06, |
| "loss": 0.17837514877319335, |
| "step": 14650 |
| }, |
| { |
| "epoch": 1.9867549668874172, |
| "grad_norm": 0.3253706991672516, |
| "learning_rate": 7.5066326275216505e-06, |
| "loss": 0.16589849472045898, |
| "step": 14700 |
| }, |
| { |
| "epoch": 1.9935126368428167, |
| "grad_norm": 0.04959991201758385, |
| "learning_rate": 7.456575061320519e-06, |
| "loss": 0.13824424743652344, |
| "step": 14750 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_accuracy": 0.9710885651079411, |
| "eval_f1": 0.9687783178186012, |
| "eval_loss": 0.13719096779823303, |
| "eval_precision": 0.9664694280078896, |
| "eval_recall": 0.9710982658959537, |
| "eval_runtime": 72.1607, |
| "eval_samples_per_second": 181.664, |
| "eval_steps_per_second": 45.426, |
| "step": 14798 |
| }, |
| { |
| "epoch": 2.000270306798216, |
| "grad_norm": 0.25084859132766724, |
| "learning_rate": 7.406517495119387e-06, |
| "loss": 0.17634214401245119, |
| "step": 14800 |
| }, |
| { |
| "epoch": 2.007027976753615, |
| "grad_norm": 0.23022498190402985, |
| "learning_rate": 7.356459928918257e-06, |
| "loss": 0.06952855110168457, |
| "step": 14850 |
| }, |
| { |
| "epoch": 2.0137856467090147, |
| "grad_norm": 0.27114880084991455, |
| "learning_rate": 7.306402362717125e-06, |
| "loss": 0.044750089645385745, |
| "step": 14900 |
| }, |
| { |
| "epoch": 2.0205433166644142, |
| "grad_norm": 0.8851802945137024, |
| "learning_rate": 7.256344796515994e-06, |
| "loss": 0.21987371444702147, |
| "step": 14950 |
| }, |
| { |
| "epoch": 2.0273009866198133, |
| "grad_norm": 0.1222100630402565, |
| "learning_rate": 7.206287230314862e-06, |
| "loss": 0.026863176822662354, |
| "step": 15000 |
| }, |
| { |
| "epoch": 2.034058656575213, |
| "grad_norm": 0.06605394929647446, |
| "learning_rate": 7.156229664113732e-06, |
| "loss": 0.1442209243774414, |
| "step": 15050 |
| }, |
| { |
| "epoch": 2.0408163265306123, |
| "grad_norm": 0.007441596128046513, |
| "learning_rate": 7.106172097912601e-06, |
| "loss": 0.0016956537961959838, |
| "step": 15100 |
| }, |
| { |
| "epoch": 2.047573996486012, |
| "grad_norm": 0.00727056385949254, |
| "learning_rate": 7.056114531711469e-06, |
| "loss": 0.08436053276062011, |
| "step": 15150 |
| }, |
| { |
| "epoch": 2.054331666441411, |
| "grad_norm": 0.0015993643319234252, |
| "learning_rate": 7.0060569655103376e-06, |
| "loss": 0.08930938720703124, |
| "step": 15200 |
| }, |
| { |
| "epoch": 2.0610893363968104, |
| "grad_norm": 0.16863678395748138, |
| "learning_rate": 6.955999399309206e-06, |
| "loss": 0.13128344535827638, |
| "step": 15250 |
| }, |
| { |
| "epoch": 2.06784700635221, |
| "grad_norm": 0.9009956121444702, |
| "learning_rate": 6.905941833108075e-06, |
| "loss": 0.027562847137451173, |
| "step": 15300 |
| }, |
| { |
| "epoch": 2.074604676307609, |
| "grad_norm": 0.06319503486156464, |
| "learning_rate": 6.855884266906944e-06, |
| "loss": 0.059659385681152345, |
| "step": 15350 |
| }, |
| { |
| "epoch": 2.0813623462630084, |
| "grad_norm": 0.002058418933302164, |
| "learning_rate": 6.805826700705812e-06, |
| "loss": 0.0802430534362793, |
| "step": 15400 |
| }, |
| { |
| "epoch": 2.088120016218408, |
| "grad_norm": 0.001060748123563826, |
| "learning_rate": 6.755769134504681e-06, |
| "loss": 0.054065885543823244, |
| "step": 15450 |
| }, |
| { |
| "epoch": 2.0948776861738074, |
| "grad_norm": 0.06381445378065109, |
| "learning_rate": 6.70571156830355e-06, |
| "loss": 0.21190324783325196, |
| "step": 15500 |
| }, |
| { |
| "epoch": 2.1016353561292065, |
| "grad_norm": 16.87372589111328, |
| "learning_rate": 6.6556540021024184e-06, |
| "loss": 0.1262486171722412, |
| "step": 15550 |
| }, |
| { |
| "epoch": 2.108393026084606, |
| "grad_norm": 0.004437705967575312, |
| "learning_rate": 6.605596435901287e-06, |
| "loss": 0.06593876361846923, |
| "step": 15600 |
| }, |
| { |
| "epoch": 2.1151506960400055, |
| "grad_norm": 395.3876647949219, |
| "learning_rate": 6.555538869700155e-06, |
| "loss": 0.09583724975585937, |
| "step": 15650 |
| }, |
| { |
| "epoch": 2.121908365995405, |
| "grad_norm": 0.7663223147392273, |
| "learning_rate": 6.505481303499025e-06, |
| "loss": 0.1377907657623291, |
| "step": 15700 |
| }, |
| { |
| "epoch": 2.128666035950804, |
| "grad_norm": 0.12682226300239563, |
| "learning_rate": 6.455423737297893e-06, |
| "loss": 0.17662681579589845, |
| "step": 15750 |
| }, |
| { |
| "epoch": 2.1354237059062036, |
| "grad_norm": 0.03758738934993744, |
| "learning_rate": 6.4053661710967616e-06, |
| "loss": 0.06712995529174805, |
| "step": 15800 |
| }, |
| { |
| "epoch": 2.142181375861603, |
| "grad_norm": 0.00288871256634593, |
| "learning_rate": 6.35530860489563e-06, |
| "loss": 0.1376853370666504, |
| "step": 15850 |
| }, |
| { |
| "epoch": 2.148939045817002, |
| "grad_norm": 0.07170133292675018, |
| "learning_rate": 6.3052510386944985e-06, |
| "loss": 0.22661022186279298, |
| "step": 15900 |
| }, |
| { |
| "epoch": 2.1556967157724016, |
| "grad_norm": 2.242140054702759, |
| "learning_rate": 6.255193472493369e-06, |
| "loss": 0.19371969223022462, |
| "step": 15950 |
| }, |
| { |
| "epoch": 2.162454385727801, |
| "grad_norm": 0.007292329799383879, |
| "learning_rate": 6.205135906292237e-06, |
| "loss": 0.14007676124572754, |
| "step": 16000 |
| }, |
| { |
| "epoch": 2.1692120556832006, |
| "grad_norm": 0.010140486061573029, |
| "learning_rate": 6.1550783400911055e-06, |
| "loss": 0.11522891998291016, |
| "step": 16050 |
| }, |
| { |
| "epoch": 2.1759697256385997, |
| "grad_norm": 0.019118858501315117, |
| "learning_rate": 6.105020773889974e-06, |
| "loss": 0.10537444114685059, |
| "step": 16100 |
| }, |
| { |
| "epoch": 2.182727395593999, |
| "grad_norm": 0.002554641803726554, |
| "learning_rate": 6.054963207688843e-06, |
| "loss": 0.08890585899353028, |
| "step": 16150 |
| }, |
| { |
| "epoch": 2.1894850655493987, |
| "grad_norm": 0.005230333656072617, |
| "learning_rate": 6.004905641487712e-06, |
| "loss": 0.060731606483459474, |
| "step": 16200 |
| }, |
| { |
| "epoch": 2.1962427355047978, |
| "grad_norm": 0.46126317977905273, |
| "learning_rate": 5.95484807528658e-06, |
| "loss": 0.06578202724456787, |
| "step": 16250 |
| }, |
| { |
| "epoch": 2.2030004054601973, |
| "grad_norm": 0.0038304554764181376, |
| "learning_rate": 5.904790509085449e-06, |
| "loss": 0.09989256858825683, |
| "step": 16300 |
| }, |
| { |
| "epoch": 2.2097580754155968, |
| "grad_norm": 0.010909990407526493, |
| "learning_rate": 5.854732942884317e-06, |
| "loss": 0.06423632144927978, |
| "step": 16350 |
| }, |
| { |
| "epoch": 2.2165157453709963, |
| "grad_norm": 0.012528044171631336, |
| "learning_rate": 5.804675376683186e-06, |
| "loss": 0.17703054428100587, |
| "step": 16400 |
| }, |
| { |
| "epoch": 2.2232734153263953, |
| "grad_norm": 360.2218933105469, |
| "learning_rate": 5.754617810482055e-06, |
| "loss": 0.06444434165954589, |
| "step": 16450 |
| }, |
| { |
| "epoch": 2.230031085281795, |
| "grad_norm": 0.005636855959892273, |
| "learning_rate": 5.704560244280923e-06, |
| "loss": 0.01724057674407959, |
| "step": 16500 |
| }, |
| { |
| "epoch": 2.2367887552371943, |
| "grad_norm": 0.0011315088486298919, |
| "learning_rate": 5.654502678079792e-06, |
| "loss": 0.11380767822265625, |
| "step": 16550 |
| }, |
| { |
| "epoch": 2.2435464251925934, |
| "grad_norm": 0.001966334879398346, |
| "learning_rate": 5.604445111878661e-06, |
| "loss": 0.047862215042114256, |
| "step": 16600 |
| }, |
| { |
| "epoch": 2.250304095147993, |
| "grad_norm": 0.0012165512889623642, |
| "learning_rate": 5.5543875456775295e-06, |
| "loss": 0.07931708812713623, |
| "step": 16650 |
| }, |
| { |
| "epoch": 2.2570617651033924, |
| "grad_norm": 0.009946290403604507, |
| "learning_rate": 5.504329979476398e-06, |
| "loss": 0.1210904598236084, |
| "step": 16700 |
| }, |
| { |
| "epoch": 2.263819435058792, |
| "grad_norm": 0.0033872355706989765, |
| "learning_rate": 5.4542724132752664e-06, |
| "loss": 0.06890227317810059, |
| "step": 16750 |
| }, |
| { |
| "epoch": 2.270577105014191, |
| "grad_norm": 0.01570816896855831, |
| "learning_rate": 5.404214847074137e-06, |
| "loss": 0.050945615768432616, |
| "step": 16800 |
| }, |
| { |
| "epoch": 2.2773347749695905, |
| "grad_norm": 0.504207968711853, |
| "learning_rate": 5.354157280873005e-06, |
| "loss": 0.04987267017364502, |
| "step": 16850 |
| }, |
| { |
| "epoch": 2.28409244492499, |
| "grad_norm": 0.0023474390618503094, |
| "learning_rate": 5.3040997146718735e-06, |
| "loss": 0.07442074298858642, |
| "step": 16900 |
| }, |
| { |
| "epoch": 2.2908501148803895, |
| "grad_norm": 0.2789280414581299, |
| "learning_rate": 5.254042148470742e-06, |
| "loss": 0.04266136169433594, |
| "step": 16950 |
| }, |
| { |
| "epoch": 2.2976077848357885, |
| "grad_norm": 0.14824353158473969, |
| "learning_rate": 5.20398458226961e-06, |
| "loss": 0.16466136932373046, |
| "step": 17000 |
| }, |
| { |
| "epoch": 2.304365454791188, |
| "grad_norm": 0.010403298772871494, |
| "learning_rate": 5.15392701606848e-06, |
| "loss": 0.12755483627319336, |
| "step": 17050 |
| }, |
| { |
| "epoch": 2.3111231247465875, |
| "grad_norm": 70.99131774902344, |
| "learning_rate": 5.103869449867348e-06, |
| "loss": 0.17195240020751953, |
| "step": 17100 |
| }, |
| { |
| "epoch": 2.3178807947019866, |
| "grad_norm": 0.006223689764738083, |
| "learning_rate": 5.053811883666217e-06, |
| "loss": 0.020604298114776612, |
| "step": 17150 |
| }, |
| { |
| "epoch": 2.324638464657386, |
| "grad_norm": 0.011977083049714565, |
| "learning_rate": 5.003754317465085e-06, |
| "loss": 0.08143982887268067, |
| "step": 17200 |
| }, |
| { |
| "epoch": 2.3313961346127856, |
| "grad_norm": 0.014804004691541195, |
| "learning_rate": 4.9536967512639535e-06, |
| "loss": 0.1274884796142578, |
| "step": 17250 |
| }, |
| { |
| "epoch": 2.3381538045681847, |
| "grad_norm": 0.01787167228758335, |
| "learning_rate": 4.903639185062823e-06, |
| "loss": 0.05703251838684082, |
| "step": 17300 |
| }, |
| { |
| "epoch": 2.344911474523584, |
| "grad_norm": 0.020681528374552727, |
| "learning_rate": 4.853581618861691e-06, |
| "loss": 0.11474921226501465, |
| "step": 17350 |
| }, |
| { |
| "epoch": 2.3516691444789837, |
| "grad_norm": 0.013767705298960209, |
| "learning_rate": 4.80352405266056e-06, |
| "loss": 0.09019528388977051, |
| "step": 17400 |
| }, |
| { |
| "epoch": 2.358426814434383, |
| "grad_norm": 0.0029704535845667124, |
| "learning_rate": 4.753466486459429e-06, |
| "loss": 0.07326930046081542, |
| "step": 17450 |
| }, |
| { |
| "epoch": 2.3651844843897822, |
| "grad_norm": 0.0017279853345826268, |
| "learning_rate": 4.7034089202582975e-06, |
| "loss": 0.09494049072265626, |
| "step": 17500 |
| }, |
| { |
| "epoch": 2.3719421543451817, |
| "grad_norm": 0.01662347838282585, |
| "learning_rate": 4.653351354057166e-06, |
| "loss": 0.08398569107055665, |
| "step": 17550 |
| }, |
| { |
| "epoch": 2.3786998243005812, |
| "grad_norm": 0.029822053387761116, |
| "learning_rate": 4.603293787856034e-06, |
| "loss": 0.0965285587310791, |
| "step": 17600 |
| }, |
| { |
| "epoch": 2.3854574942559807, |
| "grad_norm": 0.23640286922454834, |
| "learning_rate": 4.553236221654904e-06, |
| "loss": 0.04336097717285156, |
| "step": 17650 |
| }, |
| { |
| "epoch": 2.39221516421138, |
| "grad_norm": 0.01468204241245985, |
| "learning_rate": 4.503178655453772e-06, |
| "loss": 0.1220925521850586, |
| "step": 17700 |
| }, |
| { |
| "epoch": 2.3989728341667793, |
| "grad_norm": 0.03710741922259331, |
| "learning_rate": 4.4531210892526415e-06, |
| "loss": 0.14217045783996582, |
| "step": 17750 |
| }, |
| { |
| "epoch": 2.405730504122179, |
| "grad_norm": 0.001643803552724421, |
| "learning_rate": 4.40306352305151e-06, |
| "loss": 0.0781629228591919, |
| "step": 17800 |
| }, |
| { |
| "epoch": 2.412488174077578, |
| "grad_norm": 0.005556935910135508, |
| "learning_rate": 4.353005956850378e-06, |
| "loss": 0.06303605079650879, |
| "step": 17850 |
| }, |
| { |
| "epoch": 2.4192458440329774, |
| "grad_norm": 0.0012224218808114529, |
| "learning_rate": 4.302948390649247e-06, |
| "loss": 0.06739924907684326, |
| "step": 17900 |
| }, |
| { |
| "epoch": 2.426003513988377, |
| "grad_norm": 0.007048673462122679, |
| "learning_rate": 4.252890824448115e-06, |
| "loss": 0.15105469703674315, |
| "step": 17950 |
| }, |
| { |
| "epoch": 2.432761183943776, |
| "grad_norm": 0.018857238814234734, |
| "learning_rate": 4.202833258246985e-06, |
| "loss": 0.05414244651794434, |
| "step": 18000 |
| }, |
| { |
| "epoch": 2.4395188538991754, |
| "grad_norm": 0.04056254029273987, |
| "learning_rate": 4.152775692045853e-06, |
| "loss": 0.08572115898132324, |
| "step": 18050 |
| }, |
| { |
| "epoch": 2.446276523854575, |
| "grad_norm": 0.004304830450564623, |
| "learning_rate": 4.1027181258447215e-06, |
| "loss": 0.10598690032958985, |
| "step": 18100 |
| }, |
| { |
| "epoch": 2.4530341938099745, |
| "grad_norm": 0.45981258153915405, |
| "learning_rate": 4.05266055964359e-06, |
| "loss": 0.1520198917388916, |
| "step": 18150 |
| }, |
| { |
| "epoch": 2.4597918637653735, |
| "grad_norm": 0.0010058052139356732, |
| "learning_rate": 4.002602993442459e-06, |
| "loss": 0.000949324369430542, |
| "step": 18200 |
| }, |
| { |
| "epoch": 2.466549533720773, |
| "grad_norm": 0.7622770071029663, |
| "learning_rate": 3.952545427241328e-06, |
| "loss": 0.08991068840026856, |
| "step": 18250 |
| }, |
| { |
| "epoch": 2.4733072036761725, |
| "grad_norm": 0.0006587824318557978, |
| "learning_rate": 3.902487861040197e-06, |
| "loss": 0.0005220246315002442, |
| "step": 18300 |
| }, |
| { |
| "epoch": 2.480064873631572, |
| "grad_norm": 0.008263742551207542, |
| "learning_rate": 3.8524302948390655e-06, |
| "loss": 0.10548673629760742, |
| "step": 18350 |
| }, |
| { |
| "epoch": 2.486822543586971, |
| "grad_norm": 0.002541335765272379, |
| "learning_rate": 3.8023727286379344e-06, |
| "loss": 0.08907471656799316, |
| "step": 18400 |
| }, |
| { |
| "epoch": 2.4935802135423706, |
| "grad_norm": 0.0045174965634942055, |
| "learning_rate": 3.7523151624368024e-06, |
| "loss": 0.12158055305480957, |
| "step": 18450 |
| }, |
| { |
| "epoch": 2.50033788349777, |
| "grad_norm": 0.11145602911710739, |
| "learning_rate": 3.702257596235671e-06, |
| "loss": 0.027370555400848387, |
| "step": 18500 |
| }, |
| { |
| "epoch": 2.507095553453169, |
| "grad_norm": 0.06724914163351059, |
| "learning_rate": 3.65220003003454e-06, |
| "loss": 0.10710090637207031, |
| "step": 18550 |
| }, |
| { |
| "epoch": 2.5138532234085686, |
| "grad_norm": 0.033600274473428726, |
| "learning_rate": 3.6021424638334086e-06, |
| "loss": 0.04707695484161377, |
| "step": 18600 |
| }, |
| { |
| "epoch": 2.520610893363968, |
| "grad_norm": 0.08391136676073074, |
| "learning_rate": 3.5520848976322775e-06, |
| "loss": 0.02492550849914551, |
| "step": 18650 |
| }, |
| { |
| "epoch": 2.527368563319367, |
| "grad_norm": 0.0006770718027837574, |
| "learning_rate": 3.502027331431146e-06, |
| "loss": 0.018025219440460205, |
| "step": 18700 |
| }, |
| { |
| "epoch": 2.5341262332747667, |
| "grad_norm": 0.054639119654893875, |
| "learning_rate": 3.451969765230015e-06, |
| "loss": 0.00034577369689941406, |
| "step": 18750 |
| }, |
| { |
| "epoch": 2.540883903230166, |
| "grad_norm": 0.06790229678153992, |
| "learning_rate": 3.4019121990288833e-06, |
| "loss": 0.03392164945602417, |
| "step": 18800 |
| }, |
| { |
| "epoch": 2.5476415731855657, |
| "grad_norm": 0.0023788262624293566, |
| "learning_rate": 3.351854632827752e-06, |
| "loss": 0.096776123046875, |
| "step": 18850 |
| }, |
| { |
| "epoch": 2.5543992431409652, |
| "grad_norm": 34.9875373840332, |
| "learning_rate": 3.3017970666266206e-06, |
| "loss": 0.09802338600158692, |
| "step": 18900 |
| }, |
| { |
| "epoch": 2.5611569130963643, |
| "grad_norm": 0.005968010518699884, |
| "learning_rate": 3.25173950042549e-06, |
| "loss": 0.154553861618042, |
| "step": 18950 |
| }, |
| { |
| "epoch": 2.567914583051764, |
| "grad_norm": 0.0010768665233626962, |
| "learning_rate": 3.2016819342243584e-06, |
| "loss": 0.037902953624725344, |
| "step": 19000 |
| }, |
| { |
| "epoch": 2.5746722530071633, |
| "grad_norm": 0.005550037138164043, |
| "learning_rate": 3.1516243680232272e-06, |
| "loss": 0.0898381519317627, |
| "step": 19050 |
| }, |
| { |
| "epoch": 2.5814299229625624, |
| "grad_norm": 0.003930082079023123, |
| "learning_rate": 3.1015668018220957e-06, |
| "loss": 0.0002598583698272705, |
| "step": 19100 |
| }, |
| { |
| "epoch": 2.588187592917962, |
| "grad_norm": 0.00719639053568244, |
| "learning_rate": 3.051509235620964e-06, |
| "loss": 0.0004229414463043213, |
| "step": 19150 |
| }, |
| { |
| "epoch": 2.5949452628733614, |
| "grad_norm": 0.00710981385782361, |
| "learning_rate": 3.001451669419833e-06, |
| "loss": 0.05192263126373291, |
| "step": 19200 |
| }, |
| { |
| "epoch": 2.6017029328287604, |
| "grad_norm": 101.4814224243164, |
| "learning_rate": 2.9513941032187015e-06, |
| "loss": 0.05233159065246582, |
| "step": 19250 |
| }, |
| { |
| "epoch": 2.60846060278416, |
| "grad_norm": 0.0006045199697837234, |
| "learning_rate": 2.9013365370175704e-06, |
| "loss": 0.0001742267608642578, |
| "step": 19300 |
| }, |
| { |
| "epoch": 2.6152182727395594, |
| "grad_norm": 6.727764129638672, |
| "learning_rate": 2.851278970816439e-06, |
| "loss": 0.0005090945959091187, |
| "step": 19350 |
| }, |
| { |
| "epoch": 2.621975942694959, |
| "grad_norm": 0.0011069847969338298, |
| "learning_rate": 2.801221404615308e-06, |
| "loss": 0.02414227247238159, |
| "step": 19400 |
| }, |
| { |
| "epoch": 2.6287336126503584, |
| "grad_norm": 0.0020342168863862753, |
| "learning_rate": 2.7511638384141766e-06, |
| "loss": 0.04676462173461914, |
| "step": 19450 |
| }, |
| { |
| "epoch": 2.6354912826057575, |
| "grad_norm": 0.0005501318373717368, |
| "learning_rate": 2.7011062722130455e-06, |
| "loss": 0.06581556320190429, |
| "step": 19500 |
| }, |
| { |
| "epoch": 2.642248952561157, |
| "grad_norm": 0.0007036877213977277, |
| "learning_rate": 2.651048706011914e-06, |
| "loss": 0.04698281764984131, |
| "step": 19550 |
| }, |
| { |
| "epoch": 2.6490066225165565, |
| "grad_norm": 0.002277799416333437, |
| "learning_rate": 2.600991139810783e-06, |
| "loss": 0.11243149757385254, |
| "step": 19600 |
| }, |
| { |
| "epoch": 2.6557642924719556, |
| "grad_norm": 0.004528827499598265, |
| "learning_rate": 2.5509335736096513e-06, |
| "loss": 0.11157805442810059, |
| "step": 19650 |
| }, |
| { |
| "epoch": 2.662521962427355, |
| "grad_norm": 1.4003304243087769, |
| "learning_rate": 2.5008760074085197e-06, |
| "loss": 0.08801917076110839, |
| "step": 19700 |
| }, |
| { |
| "epoch": 2.6692796323827546, |
| "grad_norm": 0.020114433020353317, |
| "learning_rate": 2.4508184412073886e-06, |
| "loss": 0.21220327377319337, |
| "step": 19750 |
| }, |
| { |
| "epoch": 2.6760373023381536, |
| "grad_norm": 0.018475929275155067, |
| "learning_rate": 2.4007608750062575e-06, |
| "loss": 0.07419031143188476, |
| "step": 19800 |
| }, |
| { |
| "epoch": 2.682794972293553, |
| "grad_norm": 0.003193729789927602, |
| "learning_rate": 2.3507033088051263e-06, |
| "loss": 0.08196438789367676, |
| "step": 19850 |
| }, |
| { |
| "epoch": 2.6895526422489526, |
| "grad_norm": 0.0011461104732006788, |
| "learning_rate": 2.300645742603995e-06, |
| "loss": 0.051229662895202636, |
| "step": 19900 |
| }, |
| { |
| "epoch": 2.6963103122043517, |
| "grad_norm": 0.028392083942890167, |
| "learning_rate": 2.2505881764028633e-06, |
| "loss": 0.22081806182861327, |
| "step": 19950 |
| }, |
| { |
| "epoch": 2.703067982159751, |
| "grad_norm": 0.00366395921446383, |
| "learning_rate": 2.200530610201732e-06, |
| "loss": 0.11483686447143554, |
| "step": 20000 |
| }, |
| { |
| "epoch": 2.7098256521151507, |
| "grad_norm": 0.016792714595794678, |
| "learning_rate": 2.150473044000601e-06, |
| "loss": 0.06181726932525635, |
| "step": 20050 |
| }, |
| { |
| "epoch": 2.71658332207055, |
| "grad_norm": 0.0016602030955255032, |
| "learning_rate": 2.1004154777994695e-06, |
| "loss": 0.13399076461791992, |
| "step": 20100 |
| }, |
| { |
| "epoch": 2.7233409920259497, |
| "grad_norm": 0.03401346504688263, |
| "learning_rate": 2.0503579115983383e-06, |
| "loss": 0.09610502243041992, |
| "step": 20150 |
| }, |
| { |
| "epoch": 2.7300986619813488, |
| "grad_norm": 0.0012083080364391208, |
| "learning_rate": 2.000300345397207e-06, |
| "loss": 0.10456615447998047, |
| "step": 20200 |
| }, |
| { |
| "epoch": 2.7368563319367483, |
| "grad_norm": 0.23686014115810394, |
| "learning_rate": 1.9502427791960757e-06, |
| "loss": 0.05664618968963623, |
| "step": 20250 |
| }, |
| { |
| "epoch": 2.7436140018921478, |
| "grad_norm": 0.04736332222819328, |
| "learning_rate": 1.9001852129949443e-06, |
| "loss": 0.10926689147949219, |
| "step": 20300 |
| }, |
| { |
| "epoch": 2.750371671847547, |
| "grad_norm": 0.0023457373026758432, |
| "learning_rate": 1.8501276467938132e-06, |
| "loss": 0.13384007453918456, |
| "step": 20350 |
| }, |
| { |
| "epoch": 2.7571293418029463, |
| "grad_norm": 0.005340518895536661, |
| "learning_rate": 1.8000700805926819e-06, |
| "loss": 0.05119992733001709, |
| "step": 20400 |
| }, |
| { |
| "epoch": 2.763887011758346, |
| "grad_norm": 0.0013073196168988943, |
| "learning_rate": 1.7500125143915503e-06, |
| "loss": 0.07081173419952393, |
| "step": 20450 |
| }, |
| { |
| "epoch": 2.770644681713745, |
| "grad_norm": 0.0015691856388002634, |
| "learning_rate": 1.699954948190419e-06, |
| "loss": 0.04183328628540039, |
| "step": 20500 |
| }, |
| { |
| "epoch": 2.7774023516691444, |
| "grad_norm": 57.649574279785156, |
| "learning_rate": 1.6498973819892877e-06, |
| "loss": 0.07409313201904297, |
| "step": 20550 |
| }, |
| { |
| "epoch": 2.784160021624544, |
| "grad_norm": 63.43701171875, |
| "learning_rate": 1.5998398157881564e-06, |
| "loss": 0.03429892063140869, |
| "step": 20600 |
| }, |
| { |
| "epoch": 2.790917691579943, |
| "grad_norm": 0.25699251890182495, |
| "learning_rate": 1.5497822495870252e-06, |
| "loss": 0.062169432640075684, |
| "step": 20650 |
| }, |
| { |
| "epoch": 2.7976753615353425, |
| "grad_norm": 0.0006383510190062225, |
| "learning_rate": 1.499724683385894e-06, |
| "loss": 0.09383662223815918, |
| "step": 20700 |
| }, |
| { |
| "epoch": 2.804433031490742, |
| "grad_norm": 0.6026961803436279, |
| "learning_rate": 1.4496671171847626e-06, |
| "loss": 0.029181952476501464, |
| "step": 20750 |
| }, |
| { |
| "epoch": 2.8111907014461415, |
| "grad_norm": 0.0031832880340516567, |
| "learning_rate": 1.3996095509836312e-06, |
| "loss": 0.10682568550109864, |
| "step": 20800 |
| }, |
| { |
| "epoch": 2.817948371401541, |
| "grad_norm": 0.06199163198471069, |
| "learning_rate": 1.3495519847825001e-06, |
| "loss": 0.17822813034057616, |
| "step": 20850 |
| }, |
| { |
| "epoch": 2.82470604135694, |
| "grad_norm": 0.0034721684642136097, |
| "learning_rate": 1.2994944185813688e-06, |
| "loss": 0.09863890647888184, |
| "step": 20900 |
| }, |
| { |
| "epoch": 2.8314637113123395, |
| "grad_norm": 0.023132864385843277, |
| "learning_rate": 1.2494368523802374e-06, |
| "loss": 0.162152099609375, |
| "step": 20950 |
| }, |
| { |
| "epoch": 2.838221381267739, |
| "grad_norm": 0.0008075262885540724, |
| "learning_rate": 1.1993792861791061e-06, |
| "loss": 0.09824638366699219, |
| "step": 21000 |
| }, |
| { |
| "epoch": 2.844979051223138, |
| "grad_norm": 0.31778621673583984, |
| "learning_rate": 1.1493217199779748e-06, |
| "loss": 0.047033162117004396, |
| "step": 21050 |
| }, |
| { |
| "epoch": 2.8517367211785376, |
| "grad_norm": 0.03576982393860817, |
| "learning_rate": 1.0992641537768434e-06, |
| "loss": 0.08890575408935547, |
| "step": 21100 |
| }, |
| { |
| "epoch": 2.858494391133937, |
| "grad_norm": 9.656866073608398, |
| "learning_rate": 1.0492065875757121e-06, |
| "loss": 0.1096105670928955, |
| "step": 21150 |
| }, |
| { |
| "epoch": 2.865252061089336, |
| "grad_norm": 0.0005786643596366048, |
| "learning_rate": 9.991490213745808e-07, |
| "loss": 0.0002528005838394165, |
| "step": 21200 |
| }, |
| { |
| "epoch": 2.8720097310447357, |
| "grad_norm": 0.0012748179724439979, |
| "learning_rate": 9.490914551734496e-07, |
| "loss": 0.136431884765625, |
| "step": 21250 |
| }, |
| { |
| "epoch": 2.878767401000135, |
| "grad_norm": 0.0009654840687289834, |
| "learning_rate": 8.990338889723182e-07, |
| "loss": 0.0849915599822998, |
| "step": 21300 |
| }, |
| { |
| "epoch": 2.8855250709555347, |
| "grad_norm": 0.08358582854270935, |
| "learning_rate": 8.48976322771187e-07, |
| "loss": 0.05486437797546387, |
| "step": 21350 |
| }, |
| { |
| "epoch": 2.892282740910934, |
| "grad_norm": 0.016318723559379578, |
| "learning_rate": 7.989187565700557e-07, |
| "loss": 0.08288352966308593, |
| "step": 21400 |
| }, |
| { |
| "epoch": 2.8990404108663332, |
| "grad_norm": 4.843513488769531, |
| "learning_rate": 7.488611903689242e-07, |
| "loss": 0.07068184852600097, |
| "step": 21450 |
| }, |
| { |
| "epoch": 2.9057980808217327, |
| "grad_norm": 0.0013034067815169692, |
| "learning_rate": 6.98803624167793e-07, |
| "loss": 0.044244151115417484, |
| "step": 21500 |
| }, |
| { |
| "epoch": 2.9125557507771322, |
| "grad_norm": 0.01674531400203705, |
| "learning_rate": 6.487460579666617e-07, |
| "loss": 0.0642578935623169, |
| "step": 21550 |
| }, |
| { |
| "epoch": 2.9193134207325313, |
| "grad_norm": 0.0049995374865829945, |
| "learning_rate": 5.986884917655304e-07, |
| "loss": 0.14431939125061036, |
| "step": 21600 |
| }, |
| { |
| "epoch": 2.926071090687931, |
| "grad_norm": 0.00926155038177967, |
| "learning_rate": 5.486309255643991e-07, |
| "loss": 0.03058389902114868, |
| "step": 21650 |
| }, |
| { |
| "epoch": 2.9328287606433303, |
| "grad_norm": 0.023561256006360054, |
| "learning_rate": 4.985733593632678e-07, |
| "loss": 0.09700474739074708, |
| "step": 21700 |
| }, |
| { |
| "epoch": 2.9395864305987294, |
| "grad_norm": 0.002966544823721051, |
| "learning_rate": 4.4851579316213654e-07, |
| "loss": 0.08508275985717774, |
| "step": 21750 |
| }, |
| { |
| "epoch": 2.946344100554129, |
| "grad_norm": 0.003825432388111949, |
| "learning_rate": 3.9845822696100515e-07, |
| "loss": 0.11144843101501464, |
| "step": 21800 |
| }, |
| { |
| "epoch": 2.9531017705095284, |
| "grad_norm": 0.005938540678471327, |
| "learning_rate": 3.484006607598739e-07, |
| "loss": 0.134426851272583, |
| "step": 21850 |
| }, |
| { |
| "epoch": 2.9598594404649274, |
| "grad_norm": 0.0038295928388834, |
| "learning_rate": 2.983430945587426e-07, |
| "loss": 0.11520405769348145, |
| "step": 21900 |
| }, |
| { |
| "epoch": 2.966617110420327, |
| "grad_norm": 0.022271599620580673, |
| "learning_rate": 2.4828552835761126e-07, |
| "loss": 0.036415774822235104, |
| "step": 21950 |
| }, |
| { |
| "epoch": 2.9733747803757264, |
| "grad_norm": 0.08405578881502151, |
| "learning_rate": 1.9822796215647998e-07, |
| "loss": 0.0003047233819961548, |
| "step": 22000 |
| }, |
| { |
| "epoch": 2.980132450331126, |
| "grad_norm": 0.3389264941215515, |
| "learning_rate": 1.4817039595534867e-07, |
| "loss": 0.06158688545227051, |
| "step": 22050 |
| }, |
| { |
| "epoch": 2.9868901202865255, |
| "grad_norm": 23.368932723999023, |
| "learning_rate": 9.811282975421737e-08, |
| "loss": 0.09837133407592774, |
| "step": 22100 |
| }, |
| { |
| "epoch": 2.9936477902419245, |
| "grad_norm": 0.001970636658370495, |
| "learning_rate": 4.805526355308605e-08, |
| "loss": 0.07353384971618653, |
| "step": 22150 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_accuracy": 0.9708597146998246, |
| "eval_f1": 0.9688061407806631, |
| "eval_loss": 0.18717099726200104, |
| "eval_precision": 0.9581650783395251, |
| "eval_recall": 0.9796862097440132, |
| "eval_runtime": 67.7468, |
| "eval_samples_per_second": 193.5, |
| "eval_steps_per_second": 48.386, |
| "step": 22197 |
| } |
| ], |
| "logging_steps": 50, |
| "max_steps": 22197, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 4.67206301002752e+16, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|