| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.9860291834833903, |
| "eval_steps": 30, |
| "global_step": 800, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.024837007140639553, |
| "grad_norm": 11.272954940795898, |
| "learning_rate": 1.1111111111111112e-05, |
| "loss": 2.7848, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.04967401428127911, |
| "grad_norm": 1.125125765800476, |
| "learning_rate": 2.345679012345679e-05, |
| "loss": 1.2296, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.07451102142191866, |
| "grad_norm": 0.8439419269561768, |
| "learning_rate": 3.580246913580247e-05, |
| "loss": 0.6858, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.07451102142191866, |
| "eval_loss": 0.6361502408981323, |
| "eval_runtime": 38.0472, |
| "eval_samples_per_second": 4.468, |
| "eval_steps_per_second": 4.468, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.09934802856255821, |
| "grad_norm": 0.639077365398407, |
| "learning_rate": 4.814814814814815e-05, |
| "loss": 0.5125, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.12418503570319776, |
| "grad_norm": 0.6465684771537781, |
| "learning_rate": 6.049382716049383e-05, |
| "loss": 0.3889, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.14902204284383733, |
| "grad_norm": 0.688926100730896, |
| "learning_rate": 7.283950617283951e-05, |
| "loss": 0.3039, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.14902204284383733, |
| "eval_loss": 0.31402018666267395, |
| "eval_runtime": 37.4247, |
| "eval_samples_per_second": 4.542, |
| "eval_steps_per_second": 4.542, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.17385904998447688, |
| "grad_norm": 0.6076411008834839, |
| "learning_rate": 8.518518518518518e-05, |
| "loss": 0.2683, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.19869605712511643, |
| "grad_norm": 1.1276863813400269, |
| "learning_rate": 9.753086419753087e-05, |
| "loss": 0.2392, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.22353306426575598, |
| "grad_norm": 0.5334923267364502, |
| "learning_rate": 9.996995997963675e-05, |
| "loss": 0.2136, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.22353306426575598, |
| "eval_loss": 0.2120552957057953, |
| "eval_runtime": 37.372, |
| "eval_samples_per_second": 4.549, |
| "eval_steps_per_second": 4.549, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.24837007140639553, |
| "grad_norm": 0.43537846207618713, |
| "learning_rate": 9.984798425822163e-05, |
| "loss": 0.1974, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.2732070785470351, |
| "grad_norm": 0.7943837642669678, |
| "learning_rate": 9.963242415487557e-05, |
| "loss": 0.1678, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.29804408568767465, |
| "grad_norm": 0.30124473571777344, |
| "learning_rate": 9.932368436116915e-05, |
| "loss": 0.1648, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.29804408568767465, |
| "eval_loss": 0.15894636511802673, |
| "eval_runtime": 37.5461, |
| "eval_samples_per_second": 4.528, |
| "eval_steps_per_second": 4.528, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.3228810928283142, |
| "grad_norm": 0.26191410422325134, |
| "learning_rate": 9.892234450380547e-05, |
| "loss": 0.1602, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.34771809996895375, |
| "grad_norm": 0.5726041197776794, |
| "learning_rate": 9.842915805643155e-05, |
| "loss": 0.1647, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.3725551071095933, |
| "grad_norm": 0.4395739436149597, |
| "learning_rate": 9.784505092507031e-05, |
| "loss": 0.1525, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.3725551071095933, |
| "eval_loss": 0.14860223233699799, |
| "eval_runtime": 37.4998, |
| "eval_samples_per_second": 4.533, |
| "eval_steps_per_second": 4.533, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.39739211425023285, |
| "grad_norm": 0.33055049180984497, |
| "learning_rate": 9.717111970982869e-05, |
| "loss": 0.1555, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.4222291213908724, |
| "grad_norm": 0.2741611897945404, |
| "learning_rate": 9.640862964614564e-05, |
| "loss": 0.1487, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.44706612853151195, |
| "grad_norm": 0.3228336572647095, |
| "learning_rate": 9.555901222944468e-05, |
| "loss": 0.1564, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.44706612853151195, |
| "eval_loss": 0.1397811621427536, |
| "eval_runtime": 37.4841, |
| "eval_samples_per_second": 4.535, |
| "eval_steps_per_second": 4.535, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.47190313567215153, |
| "grad_norm": 0.23800568282604218, |
| "learning_rate": 9.462386252765087e-05, |
| "loss": 0.1443, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.49674014281279105, |
| "grad_norm": 0.27476266026496887, |
| "learning_rate": 9.36049361866175e-05, |
| "loss": 0.1447, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.5215771499534306, |
| "grad_norm": 0.1981421262025833, |
| "learning_rate": 9.250414613408427e-05, |
| "loss": 0.1448, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.5215771499534306, |
| "eval_loss": 0.1324106752872467, |
| "eval_runtime": 37.3913, |
| "eval_samples_per_second": 4.547, |
| "eval_steps_per_second": 4.547, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.5464141570940702, |
| "grad_norm": 0.21448366343975067, |
| "learning_rate": 9.132355898835556e-05, |
| "loss": 0.1401, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.5712511642347097, |
| "grad_norm": 0.2850506603717804, |
| "learning_rate": 9.00653911784403e-05, |
| "loss": 0.1455, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.5960881713753493, |
| "grad_norm": 0.22351579368114471, |
| "learning_rate": 8.873200478293826e-05, |
| "loss": 0.1478, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.5960881713753493, |
| "eval_loss": 0.1282844841480255, |
| "eval_runtime": 37.4497, |
| "eval_samples_per_second": 4.539, |
| "eval_steps_per_second": 4.539, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.6209251785159888, |
| "grad_norm": 0.2048240602016449, |
| "learning_rate": 8.732590309548416e-05, |
| "loss": 0.1357, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.6457621856566284, |
| "grad_norm": 0.24408458173274994, |
| "learning_rate": 8.584972592507553e-05, |
| "loss": 0.1371, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.670599192797268, |
| "grad_norm": 0.14031532406806946, |
| "learning_rate": 8.430624464010706e-05, |
| "loss": 0.1424, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.670599192797268, |
| "eval_loss": 0.12568299472332, |
| "eval_runtime": 37.8149, |
| "eval_samples_per_second": 4.496, |
| "eval_steps_per_second": 4.496, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.6954361999379075, |
| "grad_norm": 0.24387261271476746, |
| "learning_rate": 8.269835696541607e-05, |
| "loss": 0.1289, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.720273207078547, |
| "grad_norm": 0.1631806641817093, |
| "learning_rate": 8.102908154210693e-05, |
| "loss": 0.1358, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.7451102142191866, |
| "grad_norm": 0.1921030879020691, |
| "learning_rate": 7.93015522603677e-05, |
| "loss": 0.1466, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.7451102142191866, |
| "eval_loss": 0.12537378072738647, |
| "eval_runtime": 37.7611, |
| "eval_samples_per_second": 4.502, |
| "eval_steps_per_second": 4.502, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.7699472213598262, |
| "grad_norm": 0.17755988240242004, |
| "learning_rate": 7.751901237591887e-05, |
| "loss": 0.1423, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.7947842285004657, |
| "grad_norm": 0.22009995579719543, |
| "learning_rate": 7.568480842113952e-05, |
| "loss": 0.1399, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.8196212356411052, |
| "grad_norm": 0.19072045385837555, |
| "learning_rate": 7.380238392230257e-05, |
| "loss": 0.1429, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.8196212356411052, |
| "eval_loss": 0.12434083968400955, |
| "eval_runtime": 37.8732, |
| "eval_samples_per_second": 4.489, |
| "eval_steps_per_second": 4.489, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.8444582427817448, |
| "grad_norm": 0.13499405980110168, |
| "learning_rate": 7.187527293471385e-05, |
| "loss": 0.1388, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.8692952499223844, |
| "grad_norm": 0.1576332151889801, |
| "learning_rate": 6.990709340789273e-05, |
| "loss": 0.1308, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.8941322570630239, |
| "grad_norm": 0.13361729681491852, |
| "learning_rate": 6.790154039324975e-05, |
| "loss": 0.132, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.8941322570630239, |
| "eval_loss": 0.12276456505060196, |
| "eval_runtime": 37.8657, |
| "eval_samples_per_second": 4.49, |
| "eval_steps_per_second": 4.49, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.9189692642036634, |
| "grad_norm": 0.13873551785945892, |
| "learning_rate": 6.586237910701374e-05, |
| "loss": 0.1454, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.9438062713443031, |
| "grad_norm": 0.12903927266597748, |
| "learning_rate": 6.379343786143184e-05, |
| "loss": 0.1325, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.9686432784849426, |
| "grad_norm": 0.12467560917139053, |
| "learning_rate": 6.169860087751321e-05, |
| "loss": 0.1389, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.9686432784849426, |
| "eval_loss": 0.1223841980099678, |
| "eval_runtime": 37.8137, |
| "eval_samples_per_second": 4.496, |
| "eval_steps_per_second": 4.496, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.9934802856255821, |
| "grad_norm": 0.1413436084985733, |
| "learning_rate": 5.95818009928099e-05, |
| "loss": 0.1283, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.0173859049984477, |
| "grad_norm": 0.17072723805904388, |
| "learning_rate": 5.744701227792538e-05, |
| "loss": 0.1351, |
| "step": 410 |
| }, |
| { |
| "epoch": 1.0422229121390871, |
| "grad_norm": 0.15790237486362457, |
| "learning_rate": 5.529824257561212e-05, |
| "loss": 0.1346, |
| "step": 420 |
| }, |
| { |
| "epoch": 1.0422229121390871, |
| "eval_loss": 0.12222303450107574, |
| "eval_runtime": 37.8365, |
| "eval_samples_per_second": 4.493, |
| "eval_steps_per_second": 4.493, |
| "step": 420 |
| }, |
| { |
| "epoch": 1.0670599192797268, |
| "grad_norm": 0.14720048010349274, |
| "learning_rate": 5.313952597646568e-05, |
| "loss": 0.1324, |
| "step": 430 |
| }, |
| { |
| "epoch": 1.0918969264203664, |
| "grad_norm": 0.12014240026473999, |
| "learning_rate": 5.097491524534106e-05, |
| "loss": 0.1315, |
| "step": 440 |
| }, |
| { |
| "epoch": 1.1167339335610058, |
| "grad_norm": 0.18464700877666473, |
| "learning_rate": 4.88084742127102e-05, |
| "loss": 0.1201, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.1167339335610058, |
| "eval_loss": 0.12194804102182388, |
| "eval_runtime": 37.6494, |
| "eval_samples_per_second": 4.515, |
| "eval_steps_per_second": 4.515, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.1415709407016454, |
| "grad_norm": 0.1334882378578186, |
| "learning_rate": 4.664427014524492e-05, |
| "loss": 0.1309, |
| "step": 460 |
| }, |
| { |
| "epoch": 1.166407947842285, |
| "grad_norm": 0.14938846230506897, |
| "learning_rate": 4.448636610994857e-05, |
| "loss": 0.1313, |
| "step": 470 |
| }, |
| { |
| "epoch": 1.1912449549829245, |
| "grad_norm": 0.11317029595375061, |
| "learning_rate": 4.2338813346172476e-05, |
| "loss": 0.1297, |
| "step": 480 |
| }, |
| { |
| "epoch": 1.1912449549829245, |
| "eval_loss": 0.12136982381343842, |
| "eval_runtime": 38.0285, |
| "eval_samples_per_second": 4.47, |
| "eval_steps_per_second": 4.47, |
| "step": 480 |
| }, |
| { |
| "epoch": 1.2160819621235641, |
| "grad_norm": 0.15464289486408234, |
| "learning_rate": 4.020564365983722e-05, |
| "loss": 0.1344, |
| "step": 490 |
| }, |
| { |
| "epoch": 1.2409189692642038, |
| "grad_norm": 0.13622425496578217, |
| "learning_rate": 3.80908618541384e-05, |
| "loss": 0.1321, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.2657559764048432, |
| "grad_norm": 0.18376871943473816, |
| "learning_rate": 3.5998438210946937e-05, |
| "loss": 0.1252, |
| "step": 510 |
| }, |
| { |
| "epoch": 1.2657559764048432, |
| "eval_loss": 0.12068537622690201, |
| "eval_runtime": 37.8008, |
| "eval_samples_per_second": 4.497, |
| "eval_steps_per_second": 4.497, |
| "step": 510 |
| }, |
| { |
| "epoch": 1.2905929835454828, |
| "grad_norm": 0.1372678130865097, |
| "learning_rate": 3.393230103701989e-05, |
| "loss": 0.13, |
| "step": 520 |
| }, |
| { |
| "epoch": 1.3154299906861224, |
| "grad_norm": 0.12849152088165283, |
| "learning_rate": 3.1896329289014846e-05, |
| "loss": 0.1317, |
| "step": 530 |
| }, |
| { |
| "epoch": 1.3402669978267618, |
| "grad_norm": 0.11176390945911407, |
| "learning_rate": 2.9894345291154202e-05, |
| "loss": 0.13, |
| "step": 540 |
| }, |
| { |
| "epoch": 1.3402669978267618, |
| "eval_loss": 0.12049100548028946, |
| "eval_runtime": 37.9715, |
| "eval_samples_per_second": 4.477, |
| "eval_steps_per_second": 4.477, |
| "step": 540 |
| }, |
| { |
| "epoch": 1.3651040049674015, |
| "grad_norm": 0.16603630781173706, |
| "learning_rate": 2.793010755921068e-05, |
| "loss": 0.1272, |
| "step": 550 |
| }, |
| { |
| "epoch": 1.389941012108041, |
| "grad_norm": 0.1615874320268631, |
| "learning_rate": 2.6007303744286844e-05, |
| "loss": 0.1309, |
| "step": 560 |
| }, |
| { |
| "epoch": 1.4147780192486805, |
| "grad_norm": 0.13734523952007294, |
| "learning_rate": 2.4129543709635378e-05, |
| "loss": 0.1303, |
| "step": 570 |
| }, |
| { |
| "epoch": 1.4147780192486805, |
| "eval_loss": 0.12090769410133362, |
| "eval_runtime": 38.0585, |
| "eval_samples_per_second": 4.467, |
| "eval_steps_per_second": 4.467, |
| "step": 570 |
| }, |
| { |
| "epoch": 1.4396150263893202, |
| "grad_norm": 0.13437435030937195, |
| "learning_rate": 2.230035275351806e-05, |
| "loss": 0.1338, |
| "step": 580 |
| }, |
| { |
| "epoch": 1.4644520335299596, |
| "grad_norm": 0.12444904446601868, |
| "learning_rate": 2.0523164990826543e-05, |
| "loss": 0.1281, |
| "step": 590 |
| }, |
| { |
| "epoch": 1.4892890406705992, |
| "grad_norm": 0.14093923568725586, |
| "learning_rate": 1.8801316905890583e-05, |
| "loss": 0.1307, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.4892890406705992, |
| "eval_loss": 0.11982201784849167, |
| "eval_runtime": 37.9037, |
| "eval_samples_per_second": 4.485, |
| "eval_steps_per_second": 4.485, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.5141260478112386, |
| "grad_norm": 0.14758960902690887, |
| "learning_rate": 1.7138041088577267e-05, |
| "loss": 0.1299, |
| "step": 610 |
| }, |
| { |
| "epoch": 1.5389630549518785, |
| "grad_norm": 0.13439303636550903, |
| "learning_rate": 1.5536460165441323e-05, |
| "loss": 0.1285, |
| "step": 620 |
| }, |
| { |
| "epoch": 1.5638000620925179, |
| "grad_norm": 0.1315300315618515, |
| "learning_rate": 1.3999580937320011e-05, |
| "loss": 0.1294, |
| "step": 630 |
| }, |
| { |
| "epoch": 1.5638000620925179, |
| "eval_loss": 0.11952362954616547, |
| "eval_runtime": 37.8235, |
| "eval_samples_per_second": 4.495, |
| "eval_steps_per_second": 4.495, |
| "step": 630 |
| }, |
| { |
| "epoch": 1.5886370692331573, |
| "grad_norm": 0.13855591416358948, |
| "learning_rate": 1.2530288734378764e-05, |
| "loss": 0.1302, |
| "step": 640 |
| }, |
| { |
| "epoch": 1.613474076373797, |
| "grad_norm": 0.14461226761341095, |
| "learning_rate": 1.1131341999205274e-05, |
| "loss": 0.1241, |
| "step": 650 |
| }, |
| { |
| "epoch": 1.6383110835144366, |
| "grad_norm": 0.1513630747795105, |
| "learning_rate": 9.805367108121761e-06, |
| "loss": 0.1177, |
| "step": 660 |
| }, |
| { |
| "epoch": 1.6383110835144366, |
| "eval_loss": 0.11914250999689102, |
| "eval_runtime": 37.8427, |
| "eval_samples_per_second": 4.492, |
| "eval_steps_per_second": 4.492, |
| "step": 660 |
| }, |
| { |
| "epoch": 1.663148090655076, |
| "grad_norm": 0.3891820013523102, |
| "learning_rate": 8.554853440437805e-06, |
| "loss": 0.1297, |
| "step": 670 |
| }, |
| { |
| "epoch": 1.6879850977957156, |
| "grad_norm": 0.1408233791589737, |
| "learning_rate": 7.382148704900882e-06, |
| "loss": 0.1302, |
| "step": 680 |
| }, |
| { |
| "epoch": 1.7128221049363552, |
| "grad_norm": 0.15466411411762238, |
| "learning_rate": 6.289454532118444e-06, |
| "loss": 0.1288, |
| "step": 690 |
| }, |
| { |
| "epoch": 1.7128221049363552, |
| "eval_loss": 0.11917895078659058, |
| "eval_runtime": 37.8925, |
| "eval_samples_per_second": 4.486, |
| "eval_steps_per_second": 4.486, |
| "step": 690 |
| }, |
| { |
| "epoch": 1.7376591120769946, |
| "grad_norm": 0.09873683750629425, |
| "learning_rate": 5.278822341226519e-06, |
| "loss": 0.1296, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.7624961192176343, |
| "grad_norm": 0.13227960467338562, |
| "learning_rate": 4.352149488564605e-06, |
| "loss": 0.1241, |
| "step": 710 |
| }, |
| { |
| "epoch": 1.787333126358274, |
| "grad_norm": 0.12939970195293427, |
| "learning_rate": 3.511175705587433e-06, |
| "loss": 0.1362, |
| "step": 720 |
| }, |
| { |
| "epoch": 1.787333126358274, |
| "eval_loss": 0.11892342567443848, |
| "eval_runtime": 37.9012, |
| "eval_samples_per_second": 4.485, |
| "eval_steps_per_second": 4.485, |
| "step": 720 |
| }, |
| { |
| "epoch": 1.8121701334989133, |
| "grad_norm": 0.15326713025569916, |
| "learning_rate": 2.75747983270091e-06, |
| "loss": 0.1287, |
| "step": 730 |
| }, |
| { |
| "epoch": 1.837007140639553, |
| "grad_norm": 0.13985218107700348, |
| "learning_rate": 2.0924768551542463e-06, |
| "loss": 0.1298, |
| "step": 740 |
| }, |
| { |
| "epoch": 1.8618441477801926, |
| "grad_norm": 0.14625447988510132, |
| "learning_rate": 1.517415246552978e-06, |
| "loss": 0.1295, |
| "step": 750 |
| }, |
| { |
| "epoch": 1.8618441477801926, |
| "eval_loss": 0.11896809190511703, |
| "eval_runtime": 37.7631, |
| "eval_samples_per_second": 4.502, |
| "eval_steps_per_second": 4.502, |
| "step": 750 |
| }, |
| { |
| "epoch": 1.886681154920832, |
| "grad_norm": 0.134150892496109, |
| "learning_rate": 1.033374624980249e-06, |
| "loss": 0.1252, |
| "step": 760 |
| }, |
| { |
| "epoch": 1.9115181620614716, |
| "grad_norm": 0.1421349197626114, |
| "learning_rate": 6.412637261266396e-07, |
| "loss": 0.1224, |
| "step": 770 |
| }, |
| { |
| "epoch": 1.9363551692021113, |
| "grad_norm": 0.1370449662208557, |
| "learning_rate": 3.418186972338977e-07, |
| "loss": 0.1305, |
| "step": 780 |
| }, |
| { |
| "epoch": 1.9363551692021113, |
| "eval_loss": 0.11897934973239899, |
| "eval_runtime": 37.8217, |
| "eval_samples_per_second": 4.495, |
| "eval_steps_per_second": 4.495, |
| "step": 780 |
| }, |
| { |
| "epoch": 1.9611921763427507, |
| "grad_norm": 0.14134632050991058, |
| "learning_rate": 1.356017150553557e-07, |
| "loss": 0.1298, |
| "step": 790 |
| }, |
| { |
| "epoch": 1.9860291834833903, |
| "grad_norm": 0.13970708847045898, |
| "learning_rate": 2.299993042786941e-08, |
| "loss": 0.1286, |
| "step": 800 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 806, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 100, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.2428317974302515e+17, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|