{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "eval_steps": 50.0, "global_step": 276, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.007279344858962694, "grad_norm": 21.0, "learning_rate": 9.999676095560278e-06, "loss": 2.286525249481201, "step": 1, "token_acc": 0.5219220549158547 }, { "epoch": 0.036396724294813464, "grad_norm": 4.5, "learning_rate": 9.991904487098376e-06, "loss": 1.657058835029602, "step": 5, "token_acc": 0.5739727872827698 }, { "epoch": 0.07279344858962693, "grad_norm": 4.1875, "learning_rate": 9.967644163325157e-06, "loss": 1.4256611824035645, "step": 10, "token_acc": 0.6180926622248609 }, { "epoch": 0.1091901728844404, "grad_norm": 4.28125, "learning_rate": 9.927297588585984e-06, "loss": 1.328568458557129, "step": 15, "token_acc": 0.6331865321778661 }, { "epoch": 0.14558689717925385, "grad_norm": 3.515625, "learning_rate": 9.870995413367397e-06, "loss": 1.291972541809082, "step": 20, "token_acc": 0.6392932067391915 }, { "epoch": 0.18198362147406733, "grad_norm": 3.609375, "learning_rate": 9.798919955663738e-06, "loss": 1.2454283714294434, "step": 25, "token_acc": 0.6475439632844096 }, { "epoch": 0.2183803457688808, "grad_norm": 3.5625, "learning_rate": 9.711304610594104e-06, "loss": 1.2329243659973144, "step": 30, "token_acc": 0.6507554050533993 }, { "epoch": 0.25477707006369427, "grad_norm": 3.46875, "learning_rate": 9.608433094621047e-06, "loss": 1.2195627212524414, "step": 35, "token_acc": 0.6537052566535728 }, { "epoch": 0.2911737943585077, "grad_norm": 3.5, "learning_rate": 9.490638526818482e-06, "loss": 1.180650234222412, "step": 40, "token_acc": 0.6611108733792631 }, { "epoch": 0.3275705186533212, "grad_norm": 3.578125, "learning_rate": 9.358302350163758e-06, "loss": 1.187385654449463, "step": 45, "token_acc": 0.6624428655473593 }, { "epoch": 0.36396724294813465, "grad_norm": 3.53125, "learning_rate": 9.211853096347059e-06, "loss": 1.1924107551574707, "step": 50, "token_acc": 0.6569717166178053 }, { "epoch": 0.40036396724294815, "grad_norm": 3.625, "learning_rate": 9.05176499809787e-06, "loss": 1.1853842735290527, "step": 55, "token_acc": 0.6592355971475491 }, { "epoch": 0.4367606915377616, "grad_norm": 3.59375, "learning_rate": 8.8785564535221e-06, "loss": 1.1527756690979003, "step": 60, "token_acc": 0.668296105189309 }, { "epoch": 0.4731574158325751, "grad_norm": 3.484375, "learning_rate": 8.692788347422662e-06, "loss": 1.1660991668701173, "step": 65, "token_acc": 0.6642243722143711 }, { "epoch": 0.5095541401273885, "grad_norm": 3.625, "learning_rate": 8.49506223503941e-06, "loss": 1.1586066246032716, "step": 70, "token_acc": 0.6664147588352968 }, { "epoch": 0.545950864422202, "grad_norm": 3.46875, "learning_rate": 8.286018394089864e-06, "loss": 1.1374778747558594, "step": 75, "token_acc": 0.6725256953163237 }, { "epoch": 0.5823475887170154, "grad_norm": 3.546875, "learning_rate": 8.066333751418582e-06, "loss": 1.1596267700195313, "step": 80, "token_acc": 0.6691714836223507 }, { "epoch": 0.618744313011829, "grad_norm": 3.46875, "learning_rate": 7.836719690969183e-06, "loss": 1.1762300491333009, "step": 85, "token_acc": 0.6596193554179011 }, { "epoch": 0.6551410373066424, "grad_norm": 3.40625, "learning_rate": 7.597919750177168e-06, "loss": 1.1539989471435548, "step": 90, "token_acc": 0.6666959366013215 }, { "epoch": 0.6915377616014559, "grad_norm": 3.4375, "learning_rate": 7.3507072122431765e-06, "loss": 1.1496323585510253, "step": 95, "token_acc": 0.6683675763161444 }, { "epoch": 0.7279344858962693, "grad_norm": 3.625, "learning_rate": 7.095882602083321e-06, "loss": 1.1614603996276855, "step": 100, "token_acc": 0.6663477370833518 }, { "epoch": 0.7643312101910829, "grad_norm": 3.40625, "learning_rate": 6.834271094065284e-06, "loss": 1.1426753997802734, "step": 105, "token_acc": 0.6706135391028626 }, { "epoch": 0.8007279344858963, "grad_norm": 3.5625, "learning_rate": 6.566719839924412e-06, "loss": 1.1329108238220216, "step": 110, "token_acc": 0.672819576495736 }, { "epoch": 0.8371246587807097, "grad_norm": 3.40625, "learning_rate": 6.294095225512604e-06, "loss": 1.1409114837646483, "step": 115, "token_acc": 0.6668990154221487 }, { "epoch": 0.8735213830755232, "grad_norm": 3.5625, "learning_rate": 6.0172800652631706e-06, "loss": 1.1301198959350587, "step": 120, "token_acc": 0.6706012797117183 }, { "epoch": 0.9099181073703366, "grad_norm": 3.546875, "learning_rate": 5.737170743456573e-06, "loss": 1.1427392959594727, "step": 125, "token_acc": 0.6682659203482819 }, { "epoch": 0.9463148316651502, "grad_norm": 3.640625, "learning_rate": 5.454674311544236e-06, "loss": 1.1528873443603516, "step": 130, "token_acc": 0.6670924828755809 }, { "epoch": 0.9827115559599636, "grad_norm": 3.46875, "learning_rate": 5.17070555092984e-06, "loss": 1.129862117767334, "step": 135, "token_acc": 0.6717101461681683 }, { "epoch": 1.0145586897179253, "grad_norm": 3.5, "learning_rate": 4.886184010719472e-06, "loss": 1.0890559196472167, "step": 140, "token_acc": 0.6832747413945724 }, { "epoch": 1.0509554140127388, "grad_norm": 3.234375, "learning_rate": 4.6020310300329575e-06, "loss": 1.0480965614318847, "step": 145, "token_acc": 0.6927895623622932 }, { "epoch": 1.0873521383075524, "grad_norm": 3.390625, "learning_rate": 4.319166754518768e-06, "loss": 1.0643912315368653, "step": 150, "token_acc": 0.6858169156860152 }, { "epoch": 1.1237488626023657, "grad_norm": 3.25, "learning_rate": 4.038507156733637e-06, "loss": 1.063976001739502, "step": 155, "token_acc": 0.685272536687631 }, { "epoch": 1.1601455868971793, "grad_norm": 3.421875, "learning_rate": 3.7609610700355014e-06, "loss": 1.0600343704223634, "step": 160, "token_acc": 0.686132363766144 }, { "epoch": 1.1965423111919926, "grad_norm": 3.40625, "learning_rate": 3.4874272455946217e-06, "loss": 1.0625946998596192, "step": 165, "token_acc": 0.685317986646584 }, { "epoch": 1.2329390354868062, "grad_norm": 3.453125, "learning_rate": 3.2187914420529176e-06, "loss": 1.0257128715515136, "step": 170, "token_acc": 0.6952559635486465 }, { "epoch": 1.2693357597816197, "grad_norm": 3.375, "learning_rate": 2.9559235572557486e-06, "loss": 1.0540034294128418, "step": 175, "token_acc": 0.6904259442697791 }, { "epoch": 1.305732484076433, "grad_norm": 3.484375, "learning_rate": 2.6996748113442397e-06, "loss": 1.0541762351989745, "step": 180, "token_acc": 0.6903725834036933 }, { "epoch": 1.3421292083712466, "grad_norm": 3.46875, "learning_rate": 2.4508749903298086e-06, "loss": 1.0581584930419923, "step": 185, "token_acc": 0.6884617939463721 }, { "epoch": 1.3785259326660602, "grad_norm": 3.703125, "learning_rate": 2.2103297590768334e-06, "loss": 1.0549434661865233, "step": 190, "token_acc": 0.6910765087845028 }, { "epoch": 1.4149226569608735, "grad_norm": 3.515625, "learning_rate": 1.978818052394528e-06, "loss": 1.067518138885498, "step": 195, "token_acc": 0.6857334545297148 }, { "epoch": 1.451319381255687, "grad_norm": 3.421875, "learning_rate": 1.7570895526862202e-06, "loss": 1.068192195892334, "step": 200, "token_acc": 0.6860211259840784 }, { "epoch": 1.4877161055505004, "grad_norm": 3.515625, "learning_rate": 1.5458622623239306e-06, "loss": 1.0573176383972167, "step": 205, "token_acc": 0.6888121546961326 }, { "epoch": 1.524112829845314, "grad_norm": 3.40625, "learning_rate": 1.3458201786093795e-06, "loss": 1.0453268051147462, "step": 210, "token_acc": 0.6906213800187966 }, { "epoch": 1.5605095541401273, "grad_norm": 3.53125, "learning_rate": 1.1576110788503985e-06, "loss": 1.0663105964660644, "step": 215, "token_acc": 0.6848927327867012 }, { "epoch": 1.5969062784349408, "grad_norm": 3.5, "learning_rate": 9.81844422725109e-07, "loss": 1.0401561737060547, "step": 220, "token_acc": 0.6924896377725717 }, { "epoch": 1.6333030027297544, "grad_norm": 3.3125, "learning_rate": 8.19089378726447e-07, "loss": 1.0593575477600097, "step": 225, "token_acc": 0.6890886546807191 }, { "epoch": 1.6696997270245677, "grad_norm": 3.515625, "learning_rate": 6.698729810778065e-07, "loss": 1.0698697090148925, "step": 230, "token_acc": 0.685164035382376 }, { "epoch": 1.7060964513193813, "grad_norm": 3.53125, "learning_rate": 5.346784230881119e-07, "loss": 1.0585227966308595, "step": 235, "token_acc": 0.6885862630351565 }, { "epoch": 1.7424931756141948, "grad_norm": 3.484375, "learning_rate": 4.139434924727359e-07, "loss": 1.080869483947754, "step": 240, "token_acc": 0.6822751895991332 }, { "epoch": 1.7788898999090081, "grad_norm": 3.390625, "learning_rate": 3.0805915370706596e-07, "loss": 1.061899757385254, "step": 245, "token_acc": 0.6871229827490262 }, { "epoch": 1.8152866242038217, "grad_norm": 3.46875, "learning_rate": 2.1736828200332628e-07, "loss": 1.0475135803222657, "step": 250, "token_acc": 0.6912013150084471 }, { "epoch": 1.8516833484986353, "grad_norm": 3.5625, "learning_rate": 1.4216455301029274e-07, "loss": 1.0574556350708009, "step": 255, "token_acc": 0.6871923876189434 }, { "epoch": 1.8880800727934486, "grad_norm": 3.34375, "learning_rate": 8.269149183128988e-08, "loss": 1.0561877250671388, "step": 260, "token_acc": 0.688983606557377 }, { "epoch": 1.924476797088262, "grad_norm": 3.515625, "learning_rate": 3.91416844399467e-08, "loss": 1.042548370361328, "step": 265, "token_acc": 0.6905022933214006 }, { "epoch": 1.9608735213830755, "grad_norm": 3.609375, "learning_rate": 1.1656154047303691e-08, "loss": 1.0355847358703614, "step": 270, "token_acc": 0.6921419462644562 }, { "epoch": 1.997270245677889, "grad_norm": 3.59375, "learning_rate": 3.2390443972241113e-10, "loss": 1.0477985382080077, "step": 275, "token_acc": 0.6890192428407541 } ], "logging_steps": 5, "max_steps": 276, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 50, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.459348090205655e+17, "train_batch_size": 2, "trial_name": null, "trial_params": null }