| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.061823802163833, | |
| "eval_steps": 100, | |
| "global_step": 1000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.020607934054611025, | |
| "grad_norm": 1.2759531736373901, | |
| "learning_rate": 6.1643835616438354e-06, | |
| "loss": 0.3919, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.04121586810922205, | |
| "grad_norm": 0.78020179271698, | |
| "learning_rate": 1.3013698630136986e-05, | |
| "loss": 0.3437, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.061823802163833076, | |
| "grad_norm": 0.9467633962631226, | |
| "learning_rate": 1.9863013698630137e-05, | |
| "loss": 0.3129, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.0824317362184441, | |
| "grad_norm": 0.8084121346473694, | |
| "learning_rate": 2.671232876712329e-05, | |
| "loss": 0.2943, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.10303967027305512, | |
| "grad_norm": 0.9517551064491272, | |
| "learning_rate": 3.356164383561644e-05, | |
| "loss": 0.2838, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.12364760432766615, | |
| "grad_norm": 0.7000167965888977, | |
| "learning_rate": 4.041095890410959e-05, | |
| "loss": 0.2776, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.14425553838227717, | |
| "grad_norm": 0.7666856646537781, | |
| "learning_rate": 4.726027397260274e-05, | |
| "loss": 0.2653, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.1648634724368882, | |
| "grad_norm": 1.0173320770263672, | |
| "learning_rate": 5.4109589041095895e-05, | |
| "loss": 0.2705, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.18547140649149924, | |
| "grad_norm": 1.0106574296951294, | |
| "learning_rate": 6.095890410958904e-05, | |
| "loss": 0.2418, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.20607934054611024, | |
| "grad_norm": 0.8626989722251892, | |
| "learning_rate": 6.78082191780822e-05, | |
| "loss": 0.2435, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.20607934054611024, | |
| "eval_loss": 0.23216360807418823, | |
| "eval_runtime": 153.64, | |
| "eval_samples_per_second": 0.391, | |
| "eval_steps_per_second": 0.391, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.22668727460072127, | |
| "grad_norm": 0.6557551622390747, | |
| "learning_rate": 7.465753424657535e-05, | |
| "loss": 0.2408, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.2472952086553323, | |
| "grad_norm": 0.7992894649505615, | |
| "learning_rate": 8.15068493150685e-05, | |
| "loss": 0.2336, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.2679031427099433, | |
| "grad_norm": 0.7196781039237976, | |
| "learning_rate": 8.835616438356165e-05, | |
| "loss": 0.2388, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.28851107676455434, | |
| "grad_norm": 0.7250545024871826, | |
| "learning_rate": 9.520547945205481e-05, | |
| "loss": 0.2218, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.3091190108191654, | |
| "grad_norm": 0.7629153728485107, | |
| "learning_rate": 9.999870401165987e-05, | |
| "loss": 0.2356, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.3297269448737764, | |
| "grad_norm": 0.7327470183372498, | |
| "learning_rate": 9.99756660878697e-05, | |
| "loss": 0.2323, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.35033487892838744, | |
| "grad_norm": 0.7014147043228149, | |
| "learning_rate": 9.992384369672854e-05, | |
| "loss": 0.2362, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.37094281298299847, | |
| "grad_norm": 0.5997125506401062, | |
| "learning_rate": 9.984326668636131e-05, | |
| "loss": 0.2277, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.3915507470376095, | |
| "grad_norm": 0.8773711919784546, | |
| "learning_rate": 9.973398146668118e-05, | |
| "loss": 0.2348, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.4121586810922205, | |
| "grad_norm": 0.6003942489624023, | |
| "learning_rate": 9.959605098265886e-05, | |
| "loss": 0.2341, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.4121586810922205, | |
| "eval_loss": 0.21873560547828674, | |
| "eval_runtime": 153.6661, | |
| "eval_samples_per_second": 0.39, | |
| "eval_steps_per_second": 0.39, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.4327666151468315, | |
| "grad_norm": 0.6303937435150146, | |
| "learning_rate": 9.94295546780682e-05, | |
| "loss": 0.2367, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.45337454920144254, | |
| "grad_norm": 0.48416924476623535, | |
| "learning_rate": 9.923458844972895e-05, | |
| "loss": 0.2222, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.4739824832560536, | |
| "grad_norm": 0.5229669809341431, | |
| "learning_rate": 9.901126459227316e-05, | |
| "loss": 0.2082, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.4945904173106646, | |
| "grad_norm": 0.594384491443634, | |
| "learning_rate": 9.875971173346683e-05, | |
| "loss": 0.2202, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.5151983513652756, | |
| "grad_norm": 0.8608866930007935, | |
| "learning_rate": 9.848007476012426e-05, | |
| "loss": 0.2277, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.5358062854198866, | |
| "grad_norm": 0.5913523435592651, | |
| "learning_rate": 9.817251473465759e-05, | |
| "loss": 0.2195, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.5564142194744977, | |
| "grad_norm": 0.6697617769241333, | |
| "learning_rate": 9.78372088023098e-05, | |
| "loss": 0.2236, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.5770221535291087, | |
| "grad_norm": 0.6057365536689758, | |
| "learning_rate": 9.747435008912438e-05, | |
| "loss": 0.2414, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.5976300875837197, | |
| "grad_norm": 0.6758664846420288, | |
| "learning_rate": 9.708414759071059e-05, | |
| "loss": 0.213, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.6182380216383307, | |
| "grad_norm": 0.6613226532936096, | |
| "learning_rate": 9.666682605186835e-05, | |
| "loss": 0.2136, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.6182380216383307, | |
| "eval_loss": 0.20567402243614197, | |
| "eval_runtime": 153.6199, | |
| "eval_samples_per_second": 0.391, | |
| "eval_steps_per_second": 0.391, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.6388459556929418, | |
| "grad_norm": 0.572005569934845, | |
| "learning_rate": 9.622262583714205e-05, | |
| "loss": 0.2293, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.6594538897475528, | |
| "grad_norm": 0.6621612310409546, | |
| "learning_rate": 9.575180279237787e-05, | |
| "loss": 0.2264, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.6800618238021638, | |
| "grad_norm": 0.6547878980636597, | |
| "learning_rate": 9.52546280973643e-05, | |
| "loss": 0.2235, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.7006697578567749, | |
| "grad_norm": 0.6255643963813782, | |
| "learning_rate": 9.473138810964087e-05, | |
| "loss": 0.2289, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.7212776919113859, | |
| "grad_norm": 0.653727650642395, | |
| "learning_rate": 9.418238419956484e-05, | |
| "loss": 0.1932, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.7418856259659969, | |
| "grad_norm": 0.5155867338180542, | |
| "learning_rate": 9.360793257673102e-05, | |
| "loss": 0.2299, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.762493560020608, | |
| "grad_norm": 0.5622400641441345, | |
| "learning_rate": 9.30083641078447e-05, | |
| "loss": 0.2067, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.783101494075219, | |
| "grad_norm": 0.5189801454544067, | |
| "learning_rate": 9.23840241261524e-05, | |
| "loss": 0.2077, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.80370942812983, | |
| "grad_norm": 0.6926999092102051, | |
| "learning_rate": 9.173527223254044e-05, | |
| "loss": 0.2195, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.824317362184441, | |
| "grad_norm": 0.3899558484554291, | |
| "learning_rate": 9.106248208841569e-05, | |
| "loss": 0.2021, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.824317362184441, | |
| "eval_loss": 0.19942662119865417, | |
| "eval_runtime": 153.276, | |
| "eval_samples_per_second": 0.391, | |
| "eval_steps_per_second": 0.391, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.844925296239052, | |
| "grad_norm": 0.4683637022972107, | |
| "learning_rate": 9.036604120048799e-05, | |
| "loss": 0.2139, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.865533230293663, | |
| "grad_norm": 0.643914520740509, | |
| "learning_rate": 8.964635069757802e-05, | |
| "loss": 0.1967, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.8861411643482741, | |
| "grad_norm": 0.6378000974655151, | |
| "learning_rate": 8.890382509957928e-05, | |
| "loss": 0.2141, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.9067490984028851, | |
| "grad_norm": 0.5705697536468506, | |
| "learning_rate": 8.813889207870718e-05, | |
| "loss": 0.1967, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.9273570324574961, | |
| "grad_norm": 0.5441785454750061, | |
| "learning_rate": 8.735199221317285e-05, | |
| "loss": 0.2231, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.9479649665121072, | |
| "grad_norm": 0.5542232990264893, | |
| "learning_rate": 8.654357873342345e-05, | |
| "loss": 0.1916, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.9685729005667182, | |
| "grad_norm": 0.5035462975502014, | |
| "learning_rate": 8.571411726109519e-05, | |
| "loss": 0.2261, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.9891808346213292, | |
| "grad_norm": 0.49977976083755493, | |
| "learning_rate": 8.486408554082935e-05, | |
| "loss": 0.1987, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.0103039670273055, | |
| "grad_norm": 0.44534987211227417, | |
| "learning_rate": 8.399397316510596e-05, | |
| "loss": 0.2067, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 1.0309119010819165, | |
| "grad_norm": 0.4200068414211273, | |
| "learning_rate": 8.310428129225325e-05, | |
| "loss": 0.1384, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.0309119010819165, | |
| "eval_loss": 0.19919553399085999, | |
| "eval_runtime": 147.0946, | |
| "eval_samples_per_second": 0.408, | |
| "eval_steps_per_second": 0.408, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.0515198351365276, | |
| "grad_norm": 0.4512649476528168, | |
| "learning_rate": 8.219552235779578e-05, | |
| "loss": 0.1319, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 1.0721277691911386, | |
| "grad_norm": 0.4668980538845062, | |
| "learning_rate": 8.126821977930711e-05, | |
| "loss": 0.126, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 1.0927357032457496, | |
| "grad_norm": 0.5090588331222534, | |
| "learning_rate": 8.032290765493704e-05, | |
| "loss": 0.1473, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 1.1133436373003607, | |
| "grad_norm": 0.6163284182548523, | |
| "learning_rate": 7.936013045578745e-05, | |
| "loss": 0.146, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 1.1339515713549717, | |
| "grad_norm": 0.5462138056755066, | |
| "learning_rate": 7.838044271231333e-05, | |
| "loss": 0.1349, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 1.1545595054095827, | |
| "grad_norm": 0.5338026881217957, | |
| "learning_rate": 7.738440869493018e-05, | |
| "loss": 0.14, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 1.1751674394641938, | |
| "grad_norm": 0.6935913562774658, | |
| "learning_rate": 7.63726020890112e-05, | |
| "loss": 0.1395, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 1.1957753735188048, | |
| "grad_norm": 0.4336049556732178, | |
| "learning_rate": 7.534560566446216e-05, | |
| "loss": 0.1381, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 1.2163833075734158, | |
| "grad_norm": 0.5455029606819153, | |
| "learning_rate": 7.430401094006339e-05, | |
| "loss": 0.1267, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 1.2369912416280269, | |
| "grad_norm": 0.6405333280563354, | |
| "learning_rate": 7.324841784277302e-05, | |
| "loss": 0.1487, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.2369912416280269, | |
| "eval_loss": 0.19719114899635315, | |
| "eval_runtime": 147.2125, | |
| "eval_samples_per_second": 0.408, | |
| "eval_steps_per_second": 0.408, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.2575991756826377, | |
| "grad_norm": 0.5280514359474182, | |
| "learning_rate": 7.217943436218728e-05, | |
| "loss": 0.1515, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 1.2782071097372487, | |
| "grad_norm": 0.5676441788673401, | |
| "learning_rate": 7.109767620035689e-05, | |
| "loss": 0.1345, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 1.2988150437918597, | |
| "grad_norm": 0.5746245980262756, | |
| "learning_rate": 7.000376641716133e-05, | |
| "loss": 0.1387, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 1.3194229778464708, | |
| "grad_norm": 0.6514201164245605, | |
| "learning_rate": 6.889833507144532e-05, | |
| "loss": 0.1185, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 1.3400309119010818, | |
| "grad_norm": 0.7305589318275452, | |
| "learning_rate": 6.778201885812404e-05, | |
| "loss": 0.1569, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 1.3606388459556928, | |
| "grad_norm": 0.41217121481895447, | |
| "learning_rate": 6.66554607414661e-05, | |
| "loss": 0.1469, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 1.3812467800103039, | |
| "grad_norm": 0.4568374454975128, | |
| "learning_rate": 6.551930958476571e-05, | |
| "loss": 0.1249, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 1.401854714064915, | |
| "grad_norm": 0.567737340927124, | |
| "learning_rate": 6.437421977661709e-05, | |
| "loss": 0.1348, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 1.422462648119526, | |
| "grad_norm": 0.5140897035598755, | |
| "learning_rate": 6.322085085400644e-05, | |
| "loss": 0.1274, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 1.443070582174137, | |
| "grad_norm": 0.6037400960922241, | |
| "learning_rate": 6.205986712243875e-05, | |
| "loss": 0.1437, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.443070582174137, | |
| "eval_loss": 0.1935451626777649, | |
| "eval_runtime": 146.9127, | |
| "eval_samples_per_second": 0.408, | |
| "eval_steps_per_second": 0.408, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.463678516228748, | |
| "grad_norm": 0.5562382340431213, | |
| "learning_rate": 6.0891937273317935e-05, | |
| "loss": 0.1407, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 1.484286450283359, | |
| "grad_norm": 0.4450112581253052, | |
| "learning_rate": 5.9717733998800803e-05, | |
| "loss": 0.1474, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 1.50489438433797, | |
| "grad_norm": 0.5950725674629211, | |
| "learning_rate": 5.853793360434687e-05, | |
| "loss": 0.1393, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 1.525502318392581, | |
| "grad_norm": 0.5703924298286438, | |
| "learning_rate": 5.735321561918697e-05, | |
| "loss": 0.1396, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 1.5461102524471921, | |
| "grad_norm": 0.5534270405769348, | |
| "learning_rate": 5.61642624049349e-05, | |
| "loss": 0.1296, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 1.5667181865018032, | |
| "grad_norm": 0.5007007122039795, | |
| "learning_rate": 5.497175876256796e-05, | |
| "loss": 0.1261, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 1.5873261205564142, | |
| "grad_norm": 0.6271963715553284, | |
| "learning_rate": 5.377639153800229e-05, | |
| "loss": 0.1513, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 1.6079340546110252, | |
| "grad_norm": 0.5237583518028259, | |
| "learning_rate": 5.2578849226490525e-05, | |
| "loss": 0.1465, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 1.6285419886656363, | |
| "grad_norm": 0.4081502854824066, | |
| "learning_rate": 5.137982157606937e-05, | |
| "loss": 0.1214, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 1.6491499227202473, | |
| "grad_norm": 0.670275866985321, | |
| "learning_rate": 5.017999919028566e-05, | |
| "loss": 0.1371, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.6491499227202473, | |
| "eval_loss": 0.19273868203163147, | |
| "eval_runtime": 146.7244, | |
| "eval_samples_per_second": 0.409, | |
| "eval_steps_per_second": 0.409, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.6697578567748583, | |
| "grad_norm": 0.4012995660305023, | |
| "learning_rate": 4.898007313042975e-05, | |
| "loss": 0.1359, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 1.6903657908294694, | |
| "grad_norm": 0.4577192962169647, | |
| "learning_rate": 4.7780734517504985e-05, | |
| "loss": 0.1306, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 1.7109737248840804, | |
| "grad_norm": 0.5553697943687439, | |
| "learning_rate": 4.658267413416326e-05, | |
| "loss": 0.1414, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 1.7315816589386914, | |
| "grad_norm": 0.5764682292938232, | |
| "learning_rate": 4.5386582026834906e-05, | |
| "loss": 0.1407, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 1.7521895929933025, | |
| "grad_norm": 0.4649393558502197, | |
| "learning_rate": 4.4193147108283016e-05, | |
| "loss": 0.1291, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 1.7727975270479135, | |
| "grad_norm": 0.6201866865158081, | |
| "learning_rate": 4.300305676081057e-05, | |
| "loss": 0.1434, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 1.7934054611025245, | |
| "grad_norm": 0.6112945079803467, | |
| "learning_rate": 4.1816996440349104e-05, | |
| "loss": 0.1295, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 1.8140133951571356, | |
| "grad_norm": 0.4921644330024719, | |
| "learning_rate": 4.063564928165682e-05, | |
| "loss": 0.1325, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 1.8346213292117466, | |
| "grad_norm": 0.665326714515686, | |
| "learning_rate": 3.9459695704853836e-05, | |
| "loss": 0.164, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 1.8552292632663576, | |
| "grad_norm": 0.5689858198165894, | |
| "learning_rate": 3.828981302352065e-05, | |
| "loss": 0.147, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.8552292632663576, | |
| "eval_loss": 0.1883440464735031, | |
| "eval_runtime": 149.5929, | |
| "eval_samples_per_second": 0.401, | |
| "eval_steps_per_second": 0.401, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.8758371973209687, | |
| "grad_norm": 0.5504728555679321, | |
| "learning_rate": 3.712667505458622e-05, | |
| "loss": 0.1228, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 1.8964451313755797, | |
| "grad_norm": 0.8000712990760803, | |
| "learning_rate": 3.5970951730229785e-05, | |
| "loss": 0.135, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 1.9170530654301907, | |
| "grad_norm": 0.4071889817714691, | |
| "learning_rate": 3.482330871202029e-05, | |
| "loss": 0.1197, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 1.9376609994848017, | |
| "grad_norm": 0.46653851866722107, | |
| "learning_rate": 3.3684407007515484e-05, | |
| "loss": 0.1395, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 1.9582689335394128, | |
| "grad_norm": 0.6100273132324219, | |
| "learning_rate": 3.255490258954167e-05, | |
| "loss": 0.1347, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 1.9788768675940238, | |
| "grad_norm": 0.5089631080627441, | |
| "learning_rate": 3.14354460183732e-05, | |
| "loss": 0.12, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 1.9994848016486348, | |
| "grad_norm": 0.4348441958427429, | |
| "learning_rate": 3.032668206702959e-05, | |
| "loss": 0.1289, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 2.020607934054611, | |
| "grad_norm": 0.3667771816253662, | |
| "learning_rate": 2.9229249349905684e-05, | |
| "loss": 0.0908, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 2.041215868109222, | |
| "grad_norm": 0.420012891292572, | |
| "learning_rate": 2.8143779954949267e-05, | |
| "loss": 0.0718, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 2.061823802163833, | |
| "grad_norm": 0.3521002233028412, | |
| "learning_rate": 2.70708990795975e-05, | |
| "loss": 0.0668, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 2.061823802163833, | |
| "eval_loss": 0.1961071491241455, | |
| "eval_runtime": 149.4232, | |
| "eval_samples_per_second": 0.402, | |
| "eval_steps_per_second": 0.402, | |
| "step": 1000 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 1455, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 200, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 5.394040116523008e+16, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |