| { |
| "best_metric": 0.7780232429504395, |
| "best_model_checkpoint": "MarkKisker/RoBERTa-base-RottenTomatoes_v2\\checkpoint-1067", |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 1067, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.01, |
| "grad_norm": 0.03423001989722252, |
| "learning_rate": 1.0000000000000002e-06, |
| "loss": 0.0018, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 0.04875793680548668, |
| "learning_rate": 2.0000000000000003e-06, |
| "loss": 0.0014, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 20.525543212890625, |
| "learning_rate": 3e-06, |
| "loss": 0.1788, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 0.02198684774339199, |
| "learning_rate": 4.000000000000001e-06, |
| "loss": 0.0014, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 0.09893256425857544, |
| "learning_rate": 5e-06, |
| "loss": 0.1576, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 0.04567793011665344, |
| "learning_rate": 6e-06, |
| "loss": 0.1448, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 0.7623605728149414, |
| "learning_rate": 7.000000000000001e-06, |
| "loss": 0.0016, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 0.023505745455622673, |
| "learning_rate": 8.000000000000001e-06, |
| "loss": 0.0017, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 0.022804420441389084, |
| "learning_rate": 9e-06, |
| "loss": 0.1304, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 0.03228422999382019, |
| "learning_rate": 1e-05, |
| "loss": 0.1446, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 0.23935572803020477, |
| "learning_rate": 1.1000000000000001e-05, |
| "loss": 0.0425, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 7.51793098449707, |
| "learning_rate": 1.2e-05, |
| "loss": 0.0024, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 0.023124126717448235, |
| "learning_rate": 1.3000000000000001e-05, |
| "loss": 0.0228, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 17.817167282104492, |
| "learning_rate": 1.4000000000000001e-05, |
| "loss": 0.0753, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 0.015741823241114616, |
| "learning_rate": 1.5e-05, |
| "loss": 0.1112, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 0.26574474573135376, |
| "learning_rate": 1.6000000000000003e-05, |
| "loss": 0.0092, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 4.9772748947143555, |
| "learning_rate": 1.7000000000000003e-05, |
| "loss": 0.2384, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 0.061539579182863235, |
| "learning_rate": 1.8e-05, |
| "loss": 0.0012, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 0.08193587511777878, |
| "learning_rate": 1.9e-05, |
| "loss": 0.0812, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 0.023329803720116615, |
| "learning_rate": 2e-05, |
| "loss": 0.1662, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 0.031778186559677124, |
| "learning_rate": 2.1e-05, |
| "loss": 0.0013, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 0.023606792092323303, |
| "learning_rate": 2.2000000000000003e-05, |
| "loss": 0.0547, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 0.019992610439658165, |
| "learning_rate": 2.3000000000000003e-05, |
| "loss": 0.0011, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 0.09242820739746094, |
| "learning_rate": 2.4e-05, |
| "loss": 0.0012, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 0.01790749281644821, |
| "learning_rate": 2.5e-05, |
| "loss": 0.0803, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 0.013409961014986038, |
| "learning_rate": 2.6000000000000002e-05, |
| "loss": 0.0633, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 0.045994311571121216, |
| "learning_rate": 2.7000000000000002e-05, |
| "loss": 0.1399, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 0.030622974038124084, |
| "learning_rate": 2.8000000000000003e-05, |
| "loss": 0.0015, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.27, |
| "grad_norm": 0.052308339625597, |
| "learning_rate": 2.9e-05, |
| "loss": 0.0852, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 0.09171419590711594, |
| "learning_rate": 3e-05, |
| "loss": 0.0022, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 0.024435508996248245, |
| "learning_rate": 3.1e-05, |
| "loss": 0.0859, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 330.25262451171875, |
| "learning_rate": 3.2000000000000005e-05, |
| "loss": 0.0497, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.31, |
| "grad_norm": 0.021127384155988693, |
| "learning_rate": 3.3e-05, |
| "loss": 0.0818, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 0.030799318104982376, |
| "learning_rate": 3.4000000000000007e-05, |
| "loss": 0.0009, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 0.0609976127743721, |
| "learning_rate": 3.5e-05, |
| "loss": 0.0016, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 0.013851546682417393, |
| "learning_rate": 3.6e-05, |
| "loss": 0.2128, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 0.012113348580896854, |
| "learning_rate": 3.7e-05, |
| "loss": 0.119, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 0.0172914806753397, |
| "learning_rate": 3.8e-05, |
| "loss": 0.2769, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 0.047122351825237274, |
| "learning_rate": 3.9000000000000006e-05, |
| "loss": 0.0015, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 0.029489964246749878, |
| "learning_rate": 4e-05, |
| "loss": 0.0019, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 0.02585042454302311, |
| "learning_rate": 4.1e-05, |
| "loss": 0.0759, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.39, |
| "grad_norm": 0.044562604278326035, |
| "learning_rate": 4.2e-05, |
| "loss": 0.0025, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 0.14535053074359894, |
| "learning_rate": 4.3e-05, |
| "loss": 0.0856, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.41, |
| "grad_norm": 14.468594551086426, |
| "learning_rate": 4.4000000000000006e-05, |
| "loss": 0.1576, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 3.1556191444396973, |
| "learning_rate": 4.5e-05, |
| "loss": 0.1671, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.43, |
| "grad_norm": 3.5952117443084717, |
| "learning_rate": 4.600000000000001e-05, |
| "loss": 0.1422, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 0.10417389869689941, |
| "learning_rate": 4.7e-05, |
| "loss": 0.0705, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 24.245695114135742, |
| "learning_rate": 4.8e-05, |
| "loss": 0.0586, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 0.6337321400642395, |
| "learning_rate": 4.9e-05, |
| "loss": 0.1252, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.47, |
| "grad_norm": 62.256656646728516, |
| "learning_rate": 5e-05, |
| "loss": 0.1297, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 0.2806699573993683, |
| "learning_rate": 4.9896587383660806e-05, |
| "loss": 0.1789, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.49, |
| "grad_norm": 0.05291756987571716, |
| "learning_rate": 4.9793174767321616e-05, |
| "loss": 0.1434, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 0.12910176813602448, |
| "learning_rate": 4.968976215098242e-05, |
| "loss": 0.2909, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.51, |
| "grad_norm": 9.672201156616211, |
| "learning_rate": 4.958634953464323e-05, |
| "loss": 0.178, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 0.1485089659690857, |
| "learning_rate": 4.948293691830403e-05, |
| "loss": 0.0731, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 0.11018037796020508, |
| "learning_rate": 4.937952430196484e-05, |
| "loss": 0.1337, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.53, |
| "grad_norm": 0.047798193991184235, |
| "learning_rate": 4.9276111685625646e-05, |
| "loss": 0.0028, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.54, |
| "grad_norm": 3.2003190517425537, |
| "learning_rate": 4.9172699069286456e-05, |
| "loss": 0.2401, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.55, |
| "grad_norm": 0.033508703112602234, |
| "learning_rate": 4.906928645294726e-05, |
| "loss": 0.057, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 0.07519116997718811, |
| "learning_rate": 4.896587383660807e-05, |
| "loss": 0.1506, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.57, |
| "grad_norm": 0.10883668810129166, |
| "learning_rate": 4.886246122026887e-05, |
| "loss": 0.0799, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.58, |
| "grad_norm": 3.15360689163208, |
| "learning_rate": 4.8759048603929683e-05, |
| "loss": 0.0904, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.59, |
| "grad_norm": 2.191103219985962, |
| "learning_rate": 4.865563598759049e-05, |
| "loss": 0.0639, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 0.05860808119177818, |
| "learning_rate": 4.855222337125129e-05, |
| "loss": 0.2293, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.61, |
| "grad_norm": 180.98397827148438, |
| "learning_rate": 4.84488107549121e-05, |
| "loss": 0.1808, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.62, |
| "grad_norm": 127.35352325439453, |
| "learning_rate": 4.8345398138572904e-05, |
| "loss": 0.1102, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.63, |
| "grad_norm": 0.03393542766571045, |
| "learning_rate": 4.8241985522233714e-05, |
| "loss": 0.2701, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 0.3284960985183716, |
| "learning_rate": 4.813857290589452e-05, |
| "loss": 0.0668, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.65, |
| "grad_norm": 0.05796672776341438, |
| "learning_rate": 4.803516028955533e-05, |
| "loss": 0.1625, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.66, |
| "grad_norm": 0.607434093952179, |
| "learning_rate": 4.793174767321613e-05, |
| "loss": 0.3021, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.67, |
| "grad_norm": 0.1597072184085846, |
| "learning_rate": 4.782833505687694e-05, |
| "loss": 0.1627, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.67, |
| "grad_norm": 0.0897730141878128, |
| "learning_rate": 4.772492244053775e-05, |
| "loss": 0.1379, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 70.59358978271484, |
| "learning_rate": 4.7621509824198554e-05, |
| "loss": 0.571, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.69, |
| "grad_norm": 7.284711837768555, |
| "learning_rate": 4.7518097207859365e-05, |
| "loss": 0.4225, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 1.0496598482131958, |
| "learning_rate": 4.741468459152017e-05, |
| "loss": 0.0981, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.71, |
| "grad_norm": 0.06985878944396973, |
| "learning_rate": 4.731127197518098e-05, |
| "loss": 0.1593, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 3.0135066509246826, |
| "learning_rate": 4.720785935884178e-05, |
| "loss": 0.3379, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.73, |
| "grad_norm": 0.7335708141326904, |
| "learning_rate": 4.710444674250259e-05, |
| "loss": 0.4362, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.74, |
| "grad_norm": 47.25736618041992, |
| "learning_rate": 4.7001034126163395e-05, |
| "loss": 0.3113, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 0.23570404946804047, |
| "learning_rate": 4.6897621509824205e-05, |
| "loss": 0.2591, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 1.6712744235992432, |
| "learning_rate": 4.679420889348501e-05, |
| "loss": 0.3156, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.77, |
| "grad_norm": 3.803595781326294, |
| "learning_rate": 4.669079627714581e-05, |
| "loss": 0.2374, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.78, |
| "grad_norm": 0.053344208747148514, |
| "learning_rate": 4.658738366080662e-05, |
| "loss": 0.0993, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.79, |
| "grad_norm": 0.0863012745976448, |
| "learning_rate": 4.6483971044467425e-05, |
| "loss": 0.2164, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 50.73778533935547, |
| "learning_rate": 4.6380558428128236e-05, |
| "loss": 0.7427, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.81, |
| "grad_norm": 1.3867453336715698, |
| "learning_rate": 4.627714581178904e-05, |
| "loss": 0.4358, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.82, |
| "grad_norm": 2.2237284183502197, |
| "learning_rate": 4.617373319544985e-05, |
| "loss": 0.1522, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.82, |
| "grad_norm": 189.51943969726562, |
| "learning_rate": 4.607032057911065e-05, |
| "loss": 0.3018, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.83, |
| "grad_norm": 76.69792938232422, |
| "learning_rate": 4.596690796277146e-05, |
| "loss": 0.4078, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 11.872729301452637, |
| "learning_rate": 4.5863495346432266e-05, |
| "loss": 0.3047, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.85, |
| "grad_norm": 2.8548457622528076, |
| "learning_rate": 4.5760082730093076e-05, |
| "loss": 0.5353, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.86, |
| "grad_norm": 1.8873672485351562, |
| "learning_rate": 4.565667011375388e-05, |
| "loss": 0.1907, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.87, |
| "grad_norm": 0.31615641713142395, |
| "learning_rate": 4.555325749741469e-05, |
| "loss": 0.0634, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 44.98077392578125, |
| "learning_rate": 4.544984488107549e-05, |
| "loss": 0.2581, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.89, |
| "grad_norm": 0.426419734954834, |
| "learning_rate": 4.5346432264736296e-05, |
| "loss": 0.4992, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 0.7562969923019409, |
| "learning_rate": 4.5243019648397106e-05, |
| "loss": 0.3383, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.91, |
| "grad_norm": 0.20380929112434387, |
| "learning_rate": 4.513960703205791e-05, |
| "loss": 0.1892, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.92, |
| "grad_norm": 2.884596586227417, |
| "learning_rate": 4.503619441571872e-05, |
| "loss": 0.3536, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.93, |
| "grad_norm": 14.720122337341309, |
| "learning_rate": 4.493278179937952e-05, |
| "loss": 0.4856, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.94, |
| "grad_norm": 0.2174040526151657, |
| "learning_rate": 4.4829369183040333e-05, |
| "loss": 0.1159, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.95, |
| "grad_norm": 0.5567955374717712, |
| "learning_rate": 4.472595656670114e-05, |
| "loss": 0.5414, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 0.8856528997421265, |
| "learning_rate": 4.462254395036195e-05, |
| "loss": 0.2257, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.97, |
| "grad_norm": 0.36583012342453003, |
| "learning_rate": 4.451913133402275e-05, |
| "loss": 0.0835, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.97, |
| "grad_norm": 2.9916436672210693, |
| "learning_rate": 4.441571871768356e-05, |
| "loss": 0.2258, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.98, |
| "grad_norm": 0.16345125436782837, |
| "learning_rate": 4.4312306101344364e-05, |
| "loss": 0.2466, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.99, |
| "grad_norm": 0.1895245909690857, |
| "learning_rate": 4.420889348500517e-05, |
| "loss": 0.4954, |
| "step": 1060 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_accuracy": 0.8030018761726079, |
| "eval_f1": 0.8080438756855576, |
| "eval_loss": 0.7780232429504395, |
| "eval_precision": 0.7864768683274022, |
| "eval_recall": 0.8308270676691729, |
| "eval_runtime": 1.4434, |
| "eval_samples_per_second": 369.265, |
| "eval_steps_per_second": 46.418, |
| "step": 1067 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 5335, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 5, |
| "save_steps": 500, |
| "total_flos": 350677703472000.0, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|