{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 449, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.011135857461024499, "grad_norm": 1.3413572311401367, "learning_rate": 1.0619469026548673e-06, "loss": 1.3355, "step": 5 }, { "epoch": 0.022271714922048998, "grad_norm": 1.0000957250595093, "learning_rate": 2.389380530973451e-06, "loss": 1.4079, "step": 10 }, { "epoch": 0.0334075723830735, "grad_norm": 0.7801820039749146, "learning_rate": 3.7168141592920353e-06, "loss": 1.3474, "step": 15 }, { "epoch": 0.044543429844097995, "grad_norm": 0.5836945176124573, "learning_rate": 5.04424778761062e-06, "loss": 1.3387, "step": 20 }, { "epoch": 0.0556792873051225, "grad_norm": 0.6259394288063049, "learning_rate": 6.371681415929204e-06, "loss": 1.3457, "step": 25 }, { "epoch": 0.066815144766147, "grad_norm": 0.6279721856117249, "learning_rate": 7.699115044247788e-06, "loss": 1.345, "step": 30 }, { "epoch": 0.0779510022271715, "grad_norm": 0.5959689021110535, "learning_rate": 9.026548672566373e-06, "loss": 1.3064, "step": 35 }, { "epoch": 0.08908685968819599, "grad_norm": 0.4760344624519348, "learning_rate": 1.0353982300884955e-05, "loss": 1.1918, "step": 40 }, { "epoch": 0.10022271714922049, "grad_norm": 0.5409045219421387, "learning_rate": 1.168141592920354e-05, "loss": 1.2243, "step": 45 }, { "epoch": 0.111358574610245, "grad_norm": 0.6069848537445068, "learning_rate": 1.3008849557522125e-05, "loss": 1.2276, "step": 50 }, { "epoch": 0.12249443207126949, "grad_norm": 0.5506272315979004, "learning_rate": 1.433628318584071e-05, "loss": 1.2671, "step": 55 }, { "epoch": 0.133630289532294, "grad_norm": 0.4604385793209076, "learning_rate": 1.5663716814159295e-05, "loss": 1.2268, "step": 60 }, { "epoch": 0.1447661469933185, "grad_norm": 0.5579900741577148, "learning_rate": 1.6991150442477876e-05, "loss": 1.2215, "step": 65 }, { "epoch": 0.155902004454343, "grad_norm": 0.48294851183891296, "learning_rate": 1.8318584070796458e-05, "loss": 1.2418, "step": 70 }, { "epoch": 0.16703786191536749, "grad_norm": 0.5626055598258972, "learning_rate": 1.9646017699115046e-05, "loss": 1.185, "step": 75 }, { "epoch": 0.17817371937639198, "grad_norm": 0.5725001096725464, "learning_rate": 2.0973451327433628e-05, "loss": 1.2337, "step": 80 }, { "epoch": 0.18930957683741648, "grad_norm": 0.5308484435081482, "learning_rate": 2.2300884955752213e-05, "loss": 1.164, "step": 85 }, { "epoch": 0.20044543429844097, "grad_norm": 0.5047495365142822, "learning_rate": 2.3628318584070798e-05, "loss": 1.1862, "step": 90 }, { "epoch": 0.21158129175946547, "grad_norm": 0.5524989366531372, "learning_rate": 2.495575221238938e-05, "loss": 1.1503, "step": 95 }, { "epoch": 0.22271714922049, "grad_norm": 0.5751224160194397, "learning_rate": 2.6283185840707968e-05, "loss": 1.1819, "step": 100 }, { "epoch": 0.23385300668151449, "grad_norm": 0.590943455696106, "learning_rate": 2.761061946902655e-05, "loss": 1.1177, "step": 105 }, { "epoch": 0.24498886414253898, "grad_norm": 0.624424159526825, "learning_rate": 2.893805309734513e-05, "loss": 1.1612, "step": 110 }, { "epoch": 0.2561247216035635, "grad_norm": 0.7900341153144836, "learning_rate": 2.9999983715046248e-05, "loss": 1.1556, "step": 115 }, { "epoch": 0.267260579064588, "grad_norm": 0.717691957950592, "learning_rate": 2.9999413745377787e-05, "loss": 1.1074, "step": 120 }, { "epoch": 0.27839643652561247, "grad_norm": 0.5590342283248901, "learning_rate": 2.9998029563381436e-05, "loss": 1.1106, "step": 125 }, { "epoch": 0.289532293986637, "grad_norm": 0.5695084929466248, "learning_rate": 2.999583124419468e-05, "loss": 1.0615, "step": 130 }, { "epoch": 0.30066815144766146, "grad_norm": 0.6339223384857178, "learning_rate": 2.999281890714874e-05, "loss": 1.1041, "step": 135 }, { "epoch": 0.311804008908686, "grad_norm": 0.6401590704917908, "learning_rate": 2.9988992715762148e-05, "loss": 1.0891, "step": 140 }, { "epoch": 0.32293986636971045, "grad_norm": 0.6139689087867737, "learning_rate": 2.9984352877731836e-05, "loss": 1.0935, "step": 145 }, { "epoch": 0.33407572383073497, "grad_norm": 0.5202445387840271, "learning_rate": 2.9978899644921866e-05, "loss": 1.0622, "step": 150 }, { "epoch": 0.34521158129175944, "grad_norm": 0.5589560866355896, "learning_rate": 2.9972633313349764e-05, "loss": 1.0604, "step": 155 }, { "epoch": 0.35634743875278396, "grad_norm": 0.6837779879570007, "learning_rate": 2.9965554223170455e-05, "loss": 1.008, "step": 160 }, { "epoch": 0.3674832962138085, "grad_norm": 0.7174135446548462, "learning_rate": 2.9957662758657787e-05, "loss": 1.0292, "step": 165 }, { "epoch": 0.37861915367483295, "grad_norm": 0.6325995922088623, "learning_rate": 2.9948959348183686e-05, "loss": 1.0019, "step": 170 }, { "epoch": 0.3897550111358575, "grad_norm": 0.6495500802993774, "learning_rate": 2.993944446419489e-05, "loss": 0.9838, "step": 175 }, { "epoch": 0.40089086859688194, "grad_norm": 0.641270101070404, "learning_rate": 2.9929118623187307e-05, "loss": 0.9553, "step": 180 }, { "epoch": 0.41202672605790647, "grad_norm": 0.6926126480102539, "learning_rate": 2.9917982385677982e-05, "loss": 1.0296, "step": 185 }, { "epoch": 0.42316258351893093, "grad_norm": 0.7776742577552795, "learning_rate": 2.990603635617466e-05, "loss": 1.0223, "step": 190 }, { "epoch": 0.43429844097995546, "grad_norm": 0.6762447357177734, "learning_rate": 2.9893281183142996e-05, "loss": 1.053, "step": 195 }, { "epoch": 0.44543429844098, "grad_norm": 0.70978844165802, "learning_rate": 2.9879717558971317e-05, "loss": 0.9561, "step": 200 }, { "epoch": 0.45657015590200445, "grad_norm": 0.7382838726043701, "learning_rate": 2.986534621993307e-05, "loss": 0.9923, "step": 205 }, { "epoch": 0.46770601336302897, "grad_norm": 0.7762662768363953, "learning_rate": 2.9850167946146838e-05, "loss": 1.0125, "step": 210 }, { "epoch": 0.47884187082405344, "grad_norm": 0.7446727752685547, "learning_rate": 2.9834183561533997e-05, "loss": 0.9445, "step": 215 }, { "epoch": 0.48997772828507796, "grad_norm": 0.78370201587677, "learning_rate": 2.9817393933773994e-05, "loss": 0.9717, "step": 220 }, { "epoch": 0.5011135857461024, "grad_norm": 0.6878297328948975, "learning_rate": 2.9799799974257248e-05, "loss": 0.9373, "step": 225 }, { "epoch": 0.512249443207127, "grad_norm": 0.690732479095459, "learning_rate": 2.9781402638035656e-05, "loss": 0.9109, "step": 230 }, { "epoch": 0.5233853006681515, "grad_norm": 0.8460608124732971, "learning_rate": 2.9762202923770795e-05, "loss": 0.9548, "step": 235 }, { "epoch": 0.534521158129176, "grad_norm": 0.8983225226402283, "learning_rate": 2.9742201873679656e-05, "loss": 0.9547, "step": 240 }, { "epoch": 0.5456570155902004, "grad_norm": 0.8119720816612244, "learning_rate": 2.9721400573478117e-05, "loss": 0.9748, "step": 245 }, { "epoch": 0.5567928730512249, "grad_norm": 0.8376508951187134, "learning_rate": 2.9699800152321975e-05, "loss": 0.8978, "step": 250 }, { "epoch": 0.5679287305122495, "grad_norm": 0.7981753349304199, "learning_rate": 2.967740178274567e-05, "loss": 0.8872, "step": 255 }, { "epoch": 0.579064587973274, "grad_norm": 0.7605271339416504, "learning_rate": 2.9654206680598638e-05, "loss": 0.8954, "step": 260 }, { "epoch": 0.5902004454342984, "grad_norm": 0.9272874593734741, "learning_rate": 2.9630216104979288e-05, "loss": 0.8797, "step": 265 }, { "epoch": 0.6013363028953229, "grad_norm": 0.8306118249893188, "learning_rate": 2.9605431358166687e-05, "loss": 0.9045, "step": 270 }, { "epoch": 0.6124721603563474, "grad_norm": 0.849515438079834, "learning_rate": 2.957985378554984e-05, "loss": 0.8929, "step": 275 }, { "epoch": 0.623608017817372, "grad_norm": 0.9004138708114624, "learning_rate": 2.955348477555467e-05, "loss": 0.9355, "step": 280 }, { "epoch": 0.6347438752783965, "grad_norm": 0.836115837097168, "learning_rate": 2.9526325759568665e-05, "loss": 0.8795, "step": 285 }, { "epoch": 0.6458797327394209, "grad_norm": 0.8236976861953735, "learning_rate": 2.9498378211863145e-05, "loss": 0.8241, "step": 290 }, { "epoch": 0.6570155902004454, "grad_norm": 0.817470908164978, "learning_rate": 2.9469643649513264e-05, "loss": 0.8541, "step": 295 }, { "epoch": 0.6681514476614699, "grad_norm": 0.9785251021385193, "learning_rate": 2.9440123632315647e-05, "loss": 0.8715, "step": 300 }, { "epoch": 0.6792873051224945, "grad_norm": 0.9281602501869202, "learning_rate": 2.9409819762703715e-05, "loss": 0.7914, "step": 305 }, { "epoch": 0.6904231625835189, "grad_norm": 0.8825387358665466, "learning_rate": 2.9378733685660707e-05, "loss": 0.8527, "step": 310 }, { "epoch": 0.7015590200445434, "grad_norm": 0.9314531683921814, "learning_rate": 2.934686708863039e-05, "loss": 0.8966, "step": 315 }, { "epoch": 0.7126948775055679, "grad_norm": 0.9172025322914124, "learning_rate": 2.9314221701425445e-05, "loss": 0.8472, "step": 320 }, { "epoch": 0.7238307349665924, "grad_norm": 0.9198710918426514, "learning_rate": 2.9280799296133577e-05, "loss": 0.878, "step": 325 }, { "epoch": 0.734966592427617, "grad_norm": 0.8762511610984802, "learning_rate": 2.9246601687021324e-05, "loss": 0.7845, "step": 330 }, { "epoch": 0.7461024498886414, "grad_norm": 0.8782876133918762, "learning_rate": 2.9211630730435564e-05, "loss": 0.7928, "step": 335 }, { "epoch": 0.7572383073496659, "grad_norm": 0.9156160950660706, "learning_rate": 2.9175888324702762e-05, "loss": 0.8467, "step": 340 }, { "epoch": 0.7683741648106904, "grad_norm": 1.0404008626937866, "learning_rate": 2.9139376410025897e-05, "loss": 0.7927, "step": 345 }, { "epoch": 0.779510022271715, "grad_norm": 1.0743643045425415, "learning_rate": 2.9102096968379178e-05, "loss": 0.8123, "step": 350 }, { "epoch": 0.7906458797327395, "grad_norm": 0.9985504150390625, "learning_rate": 2.9064052023400417e-05, "loss": 0.7735, "step": 355 }, { "epoch": 0.8017817371937639, "grad_norm": 0.9021556973457336, "learning_rate": 2.9025243640281226e-05, "loss": 0.8006, "step": 360 }, { "epoch": 0.8129175946547884, "grad_norm": 0.9394658803939819, "learning_rate": 2.8985673925654853e-05, "loss": 0.7879, "step": 365 }, { "epoch": 0.8240534521158129, "grad_norm": 0.88721764087677, "learning_rate": 2.8945345027481884e-05, "loss": 0.7225, "step": 370 }, { "epoch": 0.8351893095768375, "grad_norm": 0.9523298740386963, "learning_rate": 2.8904259134933627e-05, "loss": 0.8202, "step": 375 }, { "epoch": 0.8463251670378619, "grad_norm": 0.9752704501152039, "learning_rate": 2.8862418478273246e-05, "loss": 0.7871, "step": 380 }, { "epoch": 0.8574610244988864, "grad_norm": 0.9238730072975159, "learning_rate": 2.881982532873476e-05, "loss": 0.7216, "step": 385 }, { "epoch": 0.8685968819599109, "grad_norm": 0.8588601350784302, "learning_rate": 2.8776481998399676e-05, "loss": 0.7146, "step": 390 }, { "epoch": 0.8797327394209354, "grad_norm": 0.8526139259338379, "learning_rate": 2.873239084007156e-05, "loss": 0.8105, "step": 395 }, { "epoch": 0.89086859688196, "grad_norm": 1.0353056192398071, "learning_rate": 2.868755424714825e-05, "loss": 0.789, "step": 400 }, { "epoch": 0.9020044543429844, "grad_norm": 0.8390752077102661, "learning_rate": 2.8641974653491997e-05, "loss": 0.7338, "step": 405 }, { "epoch": 0.9131403118040089, "grad_norm": 0.9766253232955933, "learning_rate": 2.85956545332973e-05, "loss": 0.7782, "step": 410 }, { "epoch": 0.9242761692650334, "grad_norm": 0.9131956100463867, "learning_rate": 2.8548596400956614e-05, "loss": 0.734, "step": 415 }, { "epoch": 0.9354120267260579, "grad_norm": 0.979026734828949, "learning_rate": 2.850080281092389e-05, "loss": 0.7538, "step": 420 }, { "epoch": 0.9465478841870824, "grad_norm": 1.0852129459381104, "learning_rate": 2.845227635757587e-05, "loss": 0.7072, "step": 425 }, { "epoch": 0.9576837416481069, "grad_norm": 1.1135140657424927, "learning_rate": 2.8403019675071267e-05, "loss": 0.7657, "step": 430 }, { "epoch": 0.9688195991091314, "grad_norm": 0.8528003096580505, "learning_rate": 2.8353035437207796e-05, "loss": 0.7036, "step": 435 }, { "epoch": 0.9799554565701559, "grad_norm": 0.9384568929672241, "learning_rate": 2.8302326357277013e-05, "loss": 0.6948, "step": 440 }, { "epoch": 0.9910913140311804, "grad_norm": 1.1752519607543945, "learning_rate": 2.825089518791704e-05, "loss": 0.7052, "step": 445 } ], "logging_steps": 5, "max_steps": 2245, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 2000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 6.211605326316175e+17, "train_batch_size": 2, "trial_name": null, "trial_params": null }