| { |
| "best_metric": 0.09926149994134903, |
| "best_model_checkpoint": "./xlam_lora_new_2560_1_delete_over_size_3epoch_multi/checkpoint-1384", |
| "epoch": 2.9994592321095954, |
| "eval_steps": 173, |
| "global_step": 1560, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.019227302769933306, |
| "grad_norm": 0.8050442337989807, |
| "learning_rate": 3.205128205128205e-06, |
| "loss": 0.6642, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.03845460553986661, |
| "grad_norm": 0.45196670293807983, |
| "learning_rate": 6.41025641025641e-06, |
| "loss": 0.6377, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.05768190830979992, |
| "grad_norm": 0.47538116574287415, |
| "learning_rate": 9.615384615384616e-06, |
| "loss": 0.6205, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.07690921107973323, |
| "grad_norm": 0.34218236804008484, |
| "learning_rate": 1.282051282051282e-05, |
| "loss": 0.5579, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.09613651384966652, |
| "grad_norm": 0.38529354333877563, |
| "learning_rate": 1.602564102564103e-05, |
| "loss": 0.4393, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.11536381661959984, |
| "grad_norm": 0.3676348924636841, |
| "learning_rate": 1.923076923076923e-05, |
| "loss": 0.3965, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.13459111938953314, |
| "grad_norm": 0.3427989184856415, |
| "learning_rate": 2.2435897435897437e-05, |
| "loss": 0.3488, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.15381842215946645, |
| "grad_norm": 0.30555886030197144, |
| "learning_rate": 2.4999887657859027e-05, |
| "loss": 0.2224, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.17304572492939974, |
| "grad_norm": 0.3501119315624237, |
| "learning_rate": 2.4995955894949523e-05, |
| "loss": 0.248, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.19227302769933305, |
| "grad_norm": 0.36164987087249756, |
| "learning_rate": 2.4986409044149163e-05, |
| "loss": 0.2322, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.21150033046926636, |
| "grad_norm": 0.3375028967857361, |
| "learning_rate": 2.4971251395358342e-05, |
| "loss": 0.2427, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.23072763323919968, |
| "grad_norm": 0.3321882486343384, |
| "learning_rate": 2.495048975970308e-05, |
| "loss": 0.1967, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.24995493600913296, |
| "grad_norm": 0.2828320562839508, |
| "learning_rate": 2.492413346647437e-05, |
| "loss": 0.1636, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.2691822387790663, |
| "grad_norm": 0.3430372476577759, |
| "learning_rate": 2.4892194358936095e-05, |
| "loss": 0.2041, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.28840954154899956, |
| "grad_norm": 0.3393559455871582, |
| "learning_rate": 2.4854686789003173e-05, |
| "loss": 0.1911, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.3076368443189329, |
| "grad_norm": 0.36908936500549316, |
| "learning_rate": 2.4811627610792543e-05, |
| "loss": 0.2025, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.3268641470888662, |
| "grad_norm": 0.38679710030555725, |
| "learning_rate": 2.4763036173049677e-05, |
| "loss": 0.1566, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.3326323379198462, |
| "eval_loss": 0.14381718635559082, |
| "eval_runtime": 202.8561, |
| "eval_samples_per_second": 5.285, |
| "eval_steps_per_second": 5.285, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.34609144985879947, |
| "grad_norm": 0.39648687839508057, |
| "learning_rate": 2.4708934310454207e-05, |
| "loss": 0.1943, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.3653187526287328, |
| "grad_norm": 0.3979399800300598, |
| "learning_rate": 2.4649346333808458e-05, |
| "loss": 0.1594, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.3845460553986661, |
| "grad_norm": 0.34854283928871155, |
| "learning_rate": 2.458429901911331e-05, |
| "loss": 0.1683, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.40377335816859944, |
| "grad_norm": 0.26675811409950256, |
| "learning_rate": 2.4513821595536356e-05, |
| "loss": 0.1616, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.4230006609385327, |
| "grad_norm": 0.4399104118347168, |
| "learning_rate": 2.44379457322777e-05, |
| "loss": 0.1664, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.442227963708466, |
| "grad_norm": 0.5316939353942871, |
| "learning_rate": 2.4356705524339317e-05, |
| "loss": 0.1745, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.46145526647839935, |
| "grad_norm": 0.5996547341346741, |
| "learning_rate": 2.4270137477204408e-05, |
| "loss": 0.1753, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.48068256924833264, |
| "grad_norm": 0.4330001175403595, |
| "learning_rate": 2.417828049043353e-05, |
| "loss": 0.1997, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.4999098720182659, |
| "grad_norm": 0.4255751073360443, |
| "learning_rate": 2.4081175840185022e-05, |
| "loss": 0.1728, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.5191371747881992, |
| "grad_norm": 0.536382257938385, |
| "learning_rate": 2.3978867160667457e-05, |
| "loss": 0.147, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.5383644775581325, |
| "grad_norm": 0.5623698830604553, |
| "learning_rate": 2.3871400424532493e-05, |
| "loss": 0.1863, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.5575917803280659, |
| "grad_norm": 0.49679550528526306, |
| "learning_rate": 2.375882392221695e-05, |
| "loss": 0.1685, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.5768190830979991, |
| "grad_norm": 0.5784851908683777, |
| "learning_rate": 2.36411882402434e-05, |
| "loss": 0.1506, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.5960463858679325, |
| "grad_norm": 0.6098183393478394, |
| "learning_rate": 2.3518546238489e-05, |
| "loss": 0.1565, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.6152736886378658, |
| "grad_norm": 0.5198598504066467, |
| "learning_rate": 2.339095302643273e-05, |
| "loss": 0.1433, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.634500991407799, |
| "grad_norm": 0.5796005129814148, |
| "learning_rate": 2.325846593839188e-05, |
| "loss": 0.1668, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.6537282941777324, |
| "grad_norm": 0.6006646752357483, |
| "learning_rate": 2.312114450775869e-05, |
| "loss": 0.1505, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.6652646758396924, |
| "eval_loss": 0.1198095753788948, |
| "eval_runtime": 202.8938, |
| "eval_samples_per_second": 5.284, |
| "eval_steps_per_second": 5.284, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.6729555969476657, |
| "grad_norm": 0.5787773728370667, |
| "learning_rate": 2.2979050440248896e-05, |
| "loss": 0.1442, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.6921828997175989, |
| "grad_norm": 0.5230283141136169, |
| "learning_rate": 2.2832247586174118e-05, |
| "loss": 0.1555, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.7114102024875323, |
| "grad_norm": 0.5551069378852844, |
| "learning_rate": 2.2680801911750558e-05, |
| "loss": 0.1422, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.7306375052574656, |
| "grad_norm": 0.5769614577293396, |
| "learning_rate": 2.2524781469456928e-05, |
| "loss": 0.165, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.7498648080273989, |
| "grad_norm": 0.6609200239181519, |
| "learning_rate": 2.2364256367454922e-05, |
| "loss": 0.161, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.7690921107973322, |
| "grad_norm": 0.5530131459236145, |
| "learning_rate": 2.2199298738085907e-05, |
| "loss": 0.1709, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.7883194135672655, |
| "grad_norm": 0.7019795775413513, |
| "learning_rate": 2.2029982705458107e-05, |
| "loss": 0.1471, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.8075467163371989, |
| "grad_norm": 0.5327528715133667, |
| "learning_rate": 2.1856384352138765e-05, |
| "loss": 0.1913, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.8267740191071321, |
| "grad_norm": 0.5548112988471985, |
| "learning_rate": 2.1678581684966235e-05, |
| "loss": 0.1509, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.8460013218770654, |
| "grad_norm": 0.51619553565979, |
| "learning_rate": 2.149665459999743e-05, |
| "loss": 0.1341, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.8652286246469988, |
| "grad_norm": 0.6642457842826843, |
| "learning_rate": 2.1310684846606346e-05, |
| "loss": 0.1458, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.884455927416932, |
| "grad_norm": 0.48370271921157837, |
| "learning_rate": 2.1120755990749762e-05, |
| "loss": 0.1584, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.9036832301868654, |
| "grad_norm": 0.8130201697349548, |
| "learning_rate": 2.092695337741671e-05, |
| "loss": 0.1389, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.9229105329567987, |
| "grad_norm": 0.4986889958381653, |
| "learning_rate": 2.0729364092278456e-05, |
| "loss": 0.1263, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.9421378357267319, |
| "grad_norm": 0.6791219711303711, |
| "learning_rate": 2.052807692255638e-05, |
| "loss": 0.1562, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.9613651384966653, |
| "grad_norm": 0.6069239974021912, |
| "learning_rate": 2.0323182317125198e-05, |
| "loss": 0.1296, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.9805924412665986, |
| "grad_norm": 0.6993957161903381, |
| "learning_rate": 2.011477234586957e-05, |
| "loss": 0.1695, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.9978970137595385, |
| "eval_loss": 0.11108512431383133, |
| "eval_runtime": 202.9151, |
| "eval_samples_per_second": 5.283, |
| "eval_steps_per_second": 5.283, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.9998197440365318, |
| "grad_norm": 0.5495030283927917, |
| "learning_rate": 1.9902940658312253e-05, |
| "loss": 0.1512, |
| "step": 520 |
| }, |
| { |
| "epoch": 1.0190470468064652, |
| "grad_norm": 0.5100754499435425, |
| "learning_rate": 1.968778244153246e-05, |
| "loss": 0.1088, |
| "step": 530 |
| }, |
| { |
| "epoch": 1.0382743495763984, |
| "grad_norm": 0.6836853623390198, |
| "learning_rate": 1.9469394377393335e-05, |
| "loss": 0.1524, |
| "step": 540 |
| }, |
| { |
| "epoch": 1.0575016523463319, |
| "grad_norm": 0.5304776430130005, |
| "learning_rate": 1.9247874599097714e-05, |
| "loss": 0.1239, |
| "step": 550 |
| }, |
| { |
| "epoch": 1.076728955116265, |
| "grad_norm": 0.6995298862457275, |
| "learning_rate": 1.9023322647091736e-05, |
| "loss": 0.1203, |
| "step": 560 |
| }, |
| { |
| "epoch": 1.0959562578861983, |
| "grad_norm": 0.579207181930542, |
| "learning_rate": 1.8795839424336097e-05, |
| "loss": 0.134, |
| "step": 570 |
| }, |
| { |
| "epoch": 1.1151835606561318, |
| "grad_norm": 0.4746134877204895, |
| "learning_rate": 1.8565527150965077e-05, |
| "loss": 0.1344, |
| "step": 580 |
| }, |
| { |
| "epoch": 1.134410863426065, |
| "grad_norm": 0.8127744793891907, |
| "learning_rate": 1.8332489318353655e-05, |
| "loss": 0.1157, |
| "step": 590 |
| }, |
| { |
| "epoch": 1.1536381661959982, |
| "grad_norm": 0.6949151158332825, |
| "learning_rate": 1.809683064261343e-05, |
| "loss": 0.1197, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.1728654689659317, |
| "grad_norm": 0.6869731545448303, |
| "learning_rate": 1.7858657017538178e-05, |
| "loss": 0.1392, |
| "step": 610 |
| }, |
| { |
| "epoch": 1.192092771735865, |
| "grad_norm": 0.7461158037185669, |
| "learning_rate": 1.7618075467020213e-05, |
| "loss": 0.1262, |
| "step": 620 |
| }, |
| { |
| "epoch": 1.2113200745057981, |
| "grad_norm": 0.5442166924476624, |
| "learning_rate": 1.7375194096958946e-05, |
| "loss": 0.1258, |
| "step": 630 |
| }, |
| { |
| "epoch": 1.2305473772757316, |
| "grad_norm": 0.7670741081237793, |
| "learning_rate": 1.713012204668325e-05, |
| "loss": 0.1204, |
| "step": 640 |
| }, |
| { |
| "epoch": 1.2497746800456648, |
| "grad_norm": 0.3919640779495239, |
| "learning_rate": 1.6882969439909434e-05, |
| "loss": 0.1444, |
| "step": 650 |
| }, |
| { |
| "epoch": 1.269001982815598, |
| "grad_norm": 0.6234434247016907, |
| "learning_rate": 1.663384733525686e-05, |
| "loss": 0.1245, |
| "step": 660 |
| }, |
| { |
| "epoch": 1.2882292855855315, |
| "grad_norm": 0.7237009406089783, |
| "learning_rate": 1.638286767634353e-05, |
| "loss": 0.1258, |
| "step": 670 |
| }, |
| { |
| "epoch": 1.3074565883554647, |
| "grad_norm": 0.6398624181747437, |
| "learning_rate": 1.613014324148392e-05, |
| "loss": 0.1519, |
| "step": 680 |
| }, |
| { |
| "epoch": 1.326683891125398, |
| "grad_norm": 0.7676591873168945, |
| "learning_rate": 1.5875787593011784e-05, |
| "loss": 0.1545, |
| "step": 690 |
| }, |
| { |
| "epoch": 1.3305293516793848, |
| "eval_loss": 0.10604555904865265, |
| "eval_runtime": 203.0173, |
| "eval_samples_per_second": 5.28, |
| "eval_steps_per_second": 5.28, |
| "step": 692 |
| }, |
| { |
| "epoch": 1.3459111938953314, |
| "grad_norm": 0.5583875775337219, |
| "learning_rate": 1.5619915026250646e-05, |
| "loss": 0.1141, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.3651384966652647, |
| "grad_norm": 0.5790243148803711, |
| "learning_rate": 1.536264051815491e-05, |
| "loss": 0.1326, |
| "step": 710 |
| }, |
| { |
| "epoch": 1.3843657994351979, |
| "grad_norm": 0.7467628121376038, |
| "learning_rate": 1.5104079675644706e-05, |
| "loss": 0.1439, |
| "step": 720 |
| }, |
| { |
| "epoch": 1.4035931022051313, |
| "grad_norm": 0.9867657423019409, |
| "learning_rate": 1.4844348683657616e-05, |
| "loss": 0.1385, |
| "step": 730 |
| }, |
| { |
| "epoch": 1.4228204049750646, |
| "grad_norm": 0.7909297347068787, |
| "learning_rate": 1.4583564252940735e-05, |
| "loss": 0.1259, |
| "step": 740 |
| }, |
| { |
| "epoch": 1.4420477077449978, |
| "grad_norm": 0.6159791350364685, |
| "learning_rate": 1.432184356760637e-05, |
| "loss": 0.1126, |
| "step": 750 |
| }, |
| { |
| "epoch": 1.4612750105149312, |
| "grad_norm": 0.6234619617462158, |
| "learning_rate": 1.4059304232475098e-05, |
| "loss": 0.1144, |
| "step": 760 |
| }, |
| { |
| "epoch": 1.4805023132848645, |
| "grad_norm": 0.7142959833145142, |
| "learning_rate": 1.3796064220229765e-05, |
| "loss": 0.1249, |
| "step": 770 |
| }, |
| { |
| "epoch": 1.4997296160547977, |
| "grad_norm": 0.6258341073989868, |
| "learning_rate": 1.3532241818404156e-05, |
| "loss": 0.1321, |
| "step": 780 |
| }, |
| { |
| "epoch": 1.5189569188247312, |
| "grad_norm": 0.5723307728767395, |
| "learning_rate": 1.326795557623022e-05, |
| "loss": 0.1193, |
| "step": 790 |
| }, |
| { |
| "epoch": 1.5381842215946644, |
| "grad_norm": 0.7454131841659546, |
| "learning_rate": 1.300332425136769e-05, |
| "loss": 0.1281, |
| "step": 800 |
| }, |
| { |
| "epoch": 1.5574115243645976, |
| "grad_norm": 0.5975070595741272, |
| "learning_rate": 1.273846675654003e-05, |
| "loss": 0.1321, |
| "step": 810 |
| }, |
| { |
| "epoch": 1.576638827134531, |
| "grad_norm": 0.7056507468223572, |
| "learning_rate": 1.2473502106100723e-05, |
| "loss": 0.1444, |
| "step": 820 |
| }, |
| { |
| "epoch": 1.5958661299044643, |
| "grad_norm": 0.7889280915260315, |
| "learning_rate": 1.2208549362553885e-05, |
| "loss": 0.1226, |
| "step": 830 |
| }, |
| { |
| "epoch": 1.6150934326743975, |
| "grad_norm": 0.7041313648223877, |
| "learning_rate": 1.194372758305325e-05, |
| "loss": 0.1316, |
| "step": 840 |
| }, |
| { |
| "epoch": 1.634320735444331, |
| "grad_norm": 0.7797935605049133, |
| "learning_rate": 1.1679155765903524e-05, |
| "loss": 0.132, |
| "step": 850 |
| }, |
| { |
| "epoch": 1.6535480382142642, |
| "grad_norm": 0.6426231861114502, |
| "learning_rate": 1.1414952797088248e-05, |
| "loss": 0.1101, |
| "step": 860 |
| }, |
| { |
| "epoch": 1.663161689599231, |
| "eval_loss": 0.10293085128068924, |
| "eval_runtime": 203.1567, |
| "eval_samples_per_second": 5.277, |
| "eval_steps_per_second": 5.277, |
| "step": 865 |
| }, |
| { |
| "epoch": 1.6727753409841974, |
| "grad_norm": 1.0461760759353638, |
| "learning_rate": 1.1151237396848058e-05, |
| "loss": 0.128, |
| "step": 870 |
| }, |
| { |
| "epoch": 1.692002643754131, |
| "grad_norm": 0.8692240118980408, |
| "learning_rate": 1.088812806633349e-05, |
| "loss": 0.1114, |
| "step": 880 |
| }, |
| { |
| "epoch": 1.7112299465240641, |
| "grad_norm": 0.5583866238594055, |
| "learning_rate": 1.0625743034356183e-05, |
| "loss": 0.1309, |
| "step": 890 |
| }, |
| { |
| "epoch": 1.7304572492939974, |
| "grad_norm": 0.5476118922233582, |
| "learning_rate": 1.0364200204262473e-05, |
| "loss": 0.1156, |
| "step": 900 |
| }, |
| { |
| "epoch": 1.7496845520639308, |
| "grad_norm": 0.8960713148117065, |
| "learning_rate": 1.0103617100953274e-05, |
| "loss": 0.1305, |
| "step": 910 |
| }, |
| { |
| "epoch": 1.768911854833864, |
| "grad_norm": 0.6927953958511353, |
| "learning_rate": 9.84411081807393e-06, |
| "loss": 0.1245, |
| "step": 920 |
| }, |
| { |
| "epoch": 1.7881391576037973, |
| "grad_norm": 0.5891989469528198, |
| "learning_rate": 9.585797965397949e-06, |
| "loss": 0.1125, |
| "step": 930 |
| }, |
| { |
| "epoch": 1.8073664603737307, |
| "grad_norm": 0.8319947123527527, |
| "learning_rate": 9.328794616428092e-06, |
| "loss": 0.1462, |
| "step": 940 |
| }, |
| { |
| "epoch": 1.826593763143664, |
| "grad_norm": 0.7439499497413635, |
| "learning_rate": 9.073216256238485e-06, |
| "loss": 0.1167, |
| "step": 950 |
| }, |
| { |
| "epoch": 1.8458210659135972, |
| "grad_norm": 0.7593638896942139, |
| "learning_rate": 8.8191777295811e-06, |
| "loss": 0.1356, |
| "step": 960 |
| }, |
| { |
| "epoch": 1.8650483686835306, |
| "grad_norm": 0.871376097202301, |
| "learning_rate": 8.56679318928e-06, |
| "loss": 0.1173, |
| "step": 970 |
| }, |
| { |
| "epoch": 1.884275671453464, |
| "grad_norm": 0.8772872090339661, |
| "learning_rate": 8.31617604493651e-06, |
| "loss": 0.1347, |
| "step": 980 |
| }, |
| { |
| "epoch": 1.903502974223397, |
| "grad_norm": 0.6309168934822083, |
| "learning_rate": 8.067438911968305e-06, |
| "loss": 0.1382, |
| "step": 990 |
| }, |
| { |
| "epoch": 1.9227302769933305, |
| "grad_norm": 0.775113046169281, |
| "learning_rate": 7.820693561005429e-06, |
| "loss": 0.1368, |
| "step": 1000 |
| }, |
| { |
| "epoch": 1.941957579763264, |
| "grad_norm": 0.9096739888191223, |
| "learning_rate": 7.576050867665876e-06, |
| "loss": 0.1263, |
| "step": 1010 |
| }, |
| { |
| "epoch": 1.961184882533197, |
| "grad_norm": 0.7637848258018494, |
| "learning_rate": 7.333620762733376e-06, |
| "loss": 0.1148, |
| "step": 1020 |
| }, |
| { |
| "epoch": 1.9804121853031305, |
| "grad_norm": 0.8084997534751892, |
| "learning_rate": 7.0935121827597245e-06, |
| "loss": 0.1457, |
| "step": 1030 |
| }, |
| { |
| "epoch": 1.995794027519077, |
| "eval_loss": 0.10069960355758667, |
| "eval_runtime": 203.0573, |
| "eval_samples_per_second": 5.279, |
| "eval_steps_per_second": 5.279, |
| "step": 1038 |
| }, |
| { |
| "epoch": 1.999639488073064, |
| "grad_norm": 1.0884274244308472, |
| "learning_rate": 6.855833021113886e-06, |
| "loss": 0.1641, |
| "step": 1040 |
| }, |
| { |
| "epoch": 2.018866790842997, |
| "grad_norm": 0.702237069606781, |
| "learning_rate": 6.620690079499835e-06, |
| "loss": 0.1159, |
| "step": 1050 |
| }, |
| { |
| "epoch": 2.0380940936129304, |
| "grad_norm": 0.6377178430557251, |
| "learning_rate": 6.388189019964976e-06, |
| "loss": 0.1103, |
| "step": 1060 |
| }, |
| { |
| "epoch": 2.057321396382864, |
| "grad_norm": 0.8843504786491394, |
| "learning_rate": 6.158434317420636e-06, |
| "loss": 0.1178, |
| "step": 1070 |
| }, |
| { |
| "epoch": 2.076548699152797, |
| "grad_norm": 0.42746174335479736, |
| "learning_rate": 5.931529212695996e-06, |
| "loss": 0.1143, |
| "step": 1080 |
| }, |
| { |
| "epoch": 2.0957760019227303, |
| "grad_norm": 0.7449749708175659, |
| "learning_rate": 5.70757566614661e-06, |
| "loss": 0.1262, |
| "step": 1090 |
| }, |
| { |
| "epoch": 2.1150033046926637, |
| "grad_norm": 0.6538805961608887, |
| "learning_rate": 5.48667431183824e-06, |
| "loss": 0.1344, |
| "step": 1100 |
| }, |
| { |
| "epoch": 2.1342306074625967, |
| "grad_norm": 0.8034993410110474, |
| "learning_rate": 5.268924412326709e-06, |
| "loss": 0.1447, |
| "step": 1110 |
| }, |
| { |
| "epoch": 2.15345791023253, |
| "grad_norm": 0.7438477277755737, |
| "learning_rate": 5.054423814054049e-06, |
| "loss": 0.1082, |
| "step": 1120 |
| }, |
| { |
| "epoch": 2.1726852130024636, |
| "grad_norm": 0.5646623373031616, |
| "learning_rate": 4.843268903380932e-06, |
| "loss": 0.1199, |
| "step": 1130 |
| }, |
| { |
| "epoch": 2.1919125157723967, |
| "grad_norm": 0.9965047240257263, |
| "learning_rate": 4.6355545632752575e-06, |
| "loss": 0.1303, |
| "step": 1140 |
| }, |
| { |
| "epoch": 2.21113981854233, |
| "grad_norm": 0.8709131479263306, |
| "learning_rate": 4.4313741306762495e-06, |
| "loss": 0.1107, |
| "step": 1150 |
| }, |
| { |
| "epoch": 2.2303671213122636, |
| "grad_norm": 0.6653530597686768, |
| "learning_rate": 4.230819354553279e-06, |
| "loss": 0.1053, |
| "step": 1160 |
| }, |
| { |
| "epoch": 2.2495944240821966, |
| "grad_norm": 0.766173243522644, |
| "learning_rate": 4.033980354678239e-06, |
| "loss": 0.1017, |
| "step": 1170 |
| }, |
| { |
| "epoch": 2.26882172685213, |
| "grad_norm": 0.5112572312355042, |
| "learning_rate": 3.840945581130008e-06, |
| "loss": 0.109, |
| "step": 1180 |
| }, |
| { |
| "epoch": 2.2880490296220635, |
| "grad_norm": 0.8744060397148132, |
| "learning_rate": 3.651801774549213e-06, |
| "loss": 0.1026, |
| "step": 1190 |
| }, |
| { |
| "epoch": 2.3072763323919965, |
| "grad_norm": 0.8215727806091309, |
| "learning_rate": 3.4666339271610836e-06, |
| "loss": 0.1058, |
| "step": 1200 |
| }, |
| { |
| "epoch": 2.32650363516193, |
| "grad_norm": 0.6597920656204224, |
| "learning_rate": 3.285525244584017e-06, |
| "loss": 0.1378, |
| "step": 1210 |
| }, |
| { |
| "epoch": 2.3284263654389235, |
| "eval_loss": 0.10013294219970703, |
| "eval_runtime": 203.5302, |
| "eval_samples_per_second": 5.267, |
| "eval_steps_per_second": 5.267, |
| "step": 1211 |
| }, |
| { |
| "epoch": 2.3457309379318634, |
| "grad_norm": 0.7206103205680847, |
| "learning_rate": 3.108557108440914e-06, |
| "loss": 0.1028, |
| "step": 1220 |
| }, |
| { |
| "epoch": 2.3649582407017964, |
| "grad_norm": 0.968497097492218, |
| "learning_rate": 2.9358090397901634e-06, |
| "loss": 0.1345, |
| "step": 1230 |
| }, |
| { |
| "epoch": 2.38418554347173, |
| "grad_norm": 0.7522798180580139, |
| "learning_rate": 2.767358663392658e-06, |
| "loss": 0.1029, |
| "step": 1240 |
| }, |
| { |
| "epoch": 2.4034128462416633, |
| "grad_norm": 0.8699542284011841, |
| "learning_rate": 2.6032816728309166e-06, |
| "loss": 0.1181, |
| "step": 1250 |
| }, |
| { |
| "epoch": 2.4226401490115963, |
| "grad_norm": 0.8779841661453247, |
| "learning_rate": 2.4436517964960005e-06, |
| "loss": 0.1028, |
| "step": 1260 |
| }, |
| { |
| "epoch": 2.4418674517815298, |
| "grad_norm": 0.6922764182090759, |
| "learning_rate": 2.2885407644574696e-06, |
| "loss": 0.1148, |
| "step": 1270 |
| }, |
| { |
| "epoch": 2.461094754551463, |
| "grad_norm": 0.7528237700462341, |
| "learning_rate": 2.1380182762313238e-06, |
| "loss": 0.1128, |
| "step": 1280 |
| }, |
| { |
| "epoch": 2.480322057321396, |
| "grad_norm": 0.8349286913871765, |
| "learning_rate": 1.992151969460333e-06, |
| "loss": 0.1027, |
| "step": 1290 |
| }, |
| { |
| "epoch": 2.4995493600913297, |
| "grad_norm": 0.8040717244148254, |
| "learning_rate": 1.8510073895209131e-06, |
| "loss": 0.1001, |
| "step": 1300 |
| }, |
| { |
| "epoch": 2.518776662861263, |
| "grad_norm": 0.8065551519393921, |
| "learning_rate": 1.7146479600701565e-06, |
| "loss": 0.1454, |
| "step": 1310 |
| }, |
| { |
| "epoch": 2.538003965631196, |
| "grad_norm": 0.7855721712112427, |
| "learning_rate": 1.5831349545462461e-06, |
| "loss": 0.1063, |
| "step": 1320 |
| }, |
| { |
| "epoch": 2.5572312684011296, |
| "grad_norm": 0.9087608456611633, |
| "learning_rate": 1.4565274686351022e-06, |
| "loss": 0.1155, |
| "step": 1330 |
| }, |
| { |
| "epoch": 2.576458571171063, |
| "grad_norm": 0.49701324105262756, |
| "learning_rate": 1.334882393715585e-06, |
| "loss": 0.1001, |
| "step": 1340 |
| }, |
| { |
| "epoch": 2.5956858739409965, |
| "grad_norm": 0.7943114638328552, |
| "learning_rate": 1.2182543912952178e-06, |
| "loss": 0.1107, |
| "step": 1350 |
| }, |
| { |
| "epoch": 2.6149131767109295, |
| "grad_norm": 0.8685261607170105, |
| "learning_rate": 1.1066958684479074e-06, |
| "loss": 0.1209, |
| "step": 1360 |
| }, |
| { |
| "epoch": 2.634140479480863, |
| "grad_norm": 1.0667730569839478, |
| "learning_rate": 1.0002569542646973e-06, |
| "loss": 0.1361, |
| "step": 1370 |
| }, |
| { |
| "epoch": 2.653367782250796, |
| "grad_norm": 0.6879278421401978, |
| "learning_rate": 8.989854773281486e-07, |
| "loss": 0.0925, |
| "step": 1380 |
| }, |
| { |
| "epoch": 2.6610587033587696, |
| "eval_loss": 0.09926149994134903, |
| "eval_runtime": 203.0153, |
| "eval_samples_per_second": 5.28, |
| "eval_steps_per_second": 5.28, |
| "step": 1384 |
| }, |
| { |
| "epoch": 2.6725950850207294, |
| "grad_norm": 0.7204756736755371, |
| "learning_rate": 8.029269442204348e-07, |
| "loss": 0.1148, |
| "step": 1390 |
| }, |
| { |
| "epoch": 2.691822387790663, |
| "grad_norm": 0.834997832775116, |
| "learning_rate": 7.121245190748708e-07, |
| "loss": 0.0918, |
| "step": 1400 |
| }, |
| { |
| "epoch": 2.7110496905605963, |
| "grad_norm": 0.8163384795188904, |
| "learning_rate": 6.266190041799805e-07, |
| "loss": 0.1345, |
| "step": 1410 |
| }, |
| { |
| "epoch": 2.7302769933305293, |
| "grad_norm": 0.6108123660087585, |
| "learning_rate": 5.464488216449154e-07, |
| "loss": 0.1235, |
| "step": 1420 |
| }, |
| { |
| "epoch": 2.7495042961004628, |
| "grad_norm": 0.8302232027053833, |
| "learning_rate": 4.716499961343698e-07, |
| "loss": 0.1163, |
| "step": 1430 |
| }, |
| { |
| "epoch": 2.7687315988703958, |
| "grad_norm": 0.670668363571167, |
| "learning_rate": 4.022561386808177e-07, |
| "loss": 0.1103, |
| "step": 1440 |
| }, |
| { |
| "epoch": 2.7879589016403292, |
| "grad_norm": 0.7220197319984436, |
| "learning_rate": 3.3829843158131175e-07, |
| "loss": 0.1228, |
| "step": 1450 |
| }, |
| { |
| "epoch": 2.8071862044102627, |
| "grad_norm": 0.5018804669380188, |
| "learning_rate": 2.798056143856462e-07, |
| "loss": 0.1225, |
| "step": 1460 |
| }, |
| { |
| "epoch": 2.826413507180196, |
| "grad_norm": 0.5343906283378601, |
| "learning_rate": 2.268039709821687e-07, |
| "loss": 0.0918, |
| "step": 1470 |
| }, |
| { |
| "epoch": 2.845640809950129, |
| "grad_norm": 0.6775656938552856, |
| "learning_rate": 1.7931731778705052e-07, |
| "loss": 0.0903, |
| "step": 1480 |
| }, |
| { |
| "epoch": 2.8648681127200626, |
| "grad_norm": 0.7841689586639404, |
| "learning_rate": 1.373669930423288e-07, |
| "loss": 0.1308, |
| "step": 1490 |
| }, |
| { |
| "epoch": 2.8840954154899956, |
| "grad_norm": 0.8570185303688049, |
| "learning_rate": 1.0097184722750592e-07, |
| "loss": 0.1287, |
| "step": 1500 |
| }, |
| { |
| "epoch": 2.903322718259929, |
| "grad_norm": 0.635200023651123, |
| "learning_rate": 7.014823458905001e-08, |
| "loss": 0.1011, |
| "step": 1510 |
| }, |
| { |
| "epoch": 2.9225500210298625, |
| "grad_norm": 0.7127873301506042, |
| "learning_rate": 4.4910005791570786e-08, |
| "loss": 0.1345, |
| "step": 1520 |
| }, |
| { |
| "epoch": 2.941777323799796, |
| "grad_norm": 0.9114808440208435, |
| "learning_rate": 2.526850169399103e-08, |
| "loss": 0.1132, |
| "step": 1530 |
| }, |
| { |
| "epoch": 2.961004626569729, |
| "grad_norm": 0.7554405927658081, |
| "learning_rate": 1.1232548253503616e-08, |
| "loss": 0.1091, |
| "step": 1540 |
| }, |
| { |
| "epoch": 2.9802319293396624, |
| "grad_norm": 0.7547165155410767, |
| "learning_rate": 2.8084525596064337e-09, |
| "loss": 0.0944, |
| "step": 1550 |
| }, |
| { |
| "epoch": 2.9936910412786157, |
| "eval_loss": 0.09935057163238525, |
| "eval_runtime": 203.0468, |
| "eval_samples_per_second": 5.28, |
| "eval_steps_per_second": 5.28, |
| "step": 1557 |
| }, |
| { |
| "epoch": 2.9994592321095954, |
| "grad_norm": 0.7488301992416382, |
| "learning_rate": 0.0, |
| "loss": 0.1237, |
| "step": 1560 |
| }, |
| { |
| "epoch": 2.9994592321095954, |
| "step": 1560, |
| "total_flos": 1.3623219564340838e+18, |
| "train_loss": 0.15373969880434182, |
| "train_runtime": 33993.1903, |
| "train_samples_per_second": 1.469, |
| "train_steps_per_second": 0.046 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 1560, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 173, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.3623219564340838e+18, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|