| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.0, | |
| "eval_steps": 50.0, | |
| "global_step": 276, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.007279344858962694, | |
| "grad_norm": 21.0, | |
| "learning_rate": 9.999676095560278e-06, | |
| "loss": 2.286525249481201, | |
| "step": 1, | |
| "token_acc": 0.5219220549158547 | |
| }, | |
| { | |
| "epoch": 0.036396724294813464, | |
| "grad_norm": 4.5, | |
| "learning_rate": 9.991904487098376e-06, | |
| "loss": 1.657058835029602, | |
| "step": 5, | |
| "token_acc": 0.5739727872827698 | |
| }, | |
| { | |
| "epoch": 0.07279344858962693, | |
| "grad_norm": 4.1875, | |
| "learning_rate": 9.967644163325157e-06, | |
| "loss": 1.4256611824035645, | |
| "step": 10, | |
| "token_acc": 0.6180926622248609 | |
| }, | |
| { | |
| "epoch": 0.1091901728844404, | |
| "grad_norm": 4.28125, | |
| "learning_rate": 9.927297588585984e-06, | |
| "loss": 1.328568458557129, | |
| "step": 15, | |
| "token_acc": 0.6331865321778661 | |
| }, | |
| { | |
| "epoch": 0.14558689717925385, | |
| "grad_norm": 3.515625, | |
| "learning_rate": 9.870995413367397e-06, | |
| "loss": 1.291972541809082, | |
| "step": 20, | |
| "token_acc": 0.6392932067391915 | |
| }, | |
| { | |
| "epoch": 0.18198362147406733, | |
| "grad_norm": 3.609375, | |
| "learning_rate": 9.798919955663738e-06, | |
| "loss": 1.2454283714294434, | |
| "step": 25, | |
| "token_acc": 0.6475439632844096 | |
| }, | |
| { | |
| "epoch": 0.2183803457688808, | |
| "grad_norm": 3.5625, | |
| "learning_rate": 9.711304610594104e-06, | |
| "loss": 1.2329243659973144, | |
| "step": 30, | |
| "token_acc": 0.6507554050533993 | |
| }, | |
| { | |
| "epoch": 0.25477707006369427, | |
| "grad_norm": 3.46875, | |
| "learning_rate": 9.608433094621047e-06, | |
| "loss": 1.2195627212524414, | |
| "step": 35, | |
| "token_acc": 0.6537052566535728 | |
| }, | |
| { | |
| "epoch": 0.2911737943585077, | |
| "grad_norm": 3.5, | |
| "learning_rate": 9.490638526818482e-06, | |
| "loss": 1.180650234222412, | |
| "step": 40, | |
| "token_acc": 0.6611108733792631 | |
| }, | |
| { | |
| "epoch": 0.3275705186533212, | |
| "grad_norm": 3.578125, | |
| "learning_rate": 9.358302350163758e-06, | |
| "loss": 1.187385654449463, | |
| "step": 45, | |
| "token_acc": 0.6624428655473593 | |
| }, | |
| { | |
| "epoch": 0.36396724294813465, | |
| "grad_norm": 3.53125, | |
| "learning_rate": 9.211853096347059e-06, | |
| "loss": 1.1924107551574707, | |
| "step": 50, | |
| "token_acc": 0.6569717166178053 | |
| }, | |
| { | |
| "epoch": 0.40036396724294815, | |
| "grad_norm": 3.625, | |
| "learning_rate": 9.05176499809787e-06, | |
| "loss": 1.1853842735290527, | |
| "step": 55, | |
| "token_acc": 0.6592355971475491 | |
| }, | |
| { | |
| "epoch": 0.4367606915377616, | |
| "grad_norm": 3.59375, | |
| "learning_rate": 8.8785564535221e-06, | |
| "loss": 1.1527756690979003, | |
| "step": 60, | |
| "token_acc": 0.668296105189309 | |
| }, | |
| { | |
| "epoch": 0.4731574158325751, | |
| "grad_norm": 3.484375, | |
| "learning_rate": 8.692788347422662e-06, | |
| "loss": 1.1660991668701173, | |
| "step": 65, | |
| "token_acc": 0.6642243722143711 | |
| }, | |
| { | |
| "epoch": 0.5095541401273885, | |
| "grad_norm": 3.625, | |
| "learning_rate": 8.49506223503941e-06, | |
| "loss": 1.1586066246032716, | |
| "step": 70, | |
| "token_acc": 0.6664147588352968 | |
| }, | |
| { | |
| "epoch": 0.545950864422202, | |
| "grad_norm": 3.46875, | |
| "learning_rate": 8.286018394089864e-06, | |
| "loss": 1.1374778747558594, | |
| "step": 75, | |
| "token_acc": 0.6725256953163237 | |
| }, | |
| { | |
| "epoch": 0.5823475887170154, | |
| "grad_norm": 3.546875, | |
| "learning_rate": 8.066333751418582e-06, | |
| "loss": 1.1596267700195313, | |
| "step": 80, | |
| "token_acc": 0.6691714836223507 | |
| }, | |
| { | |
| "epoch": 0.618744313011829, | |
| "grad_norm": 3.46875, | |
| "learning_rate": 7.836719690969183e-06, | |
| "loss": 1.1762300491333009, | |
| "step": 85, | |
| "token_acc": 0.6596193554179011 | |
| }, | |
| { | |
| "epoch": 0.6551410373066424, | |
| "grad_norm": 3.40625, | |
| "learning_rate": 7.597919750177168e-06, | |
| "loss": 1.1539989471435548, | |
| "step": 90, | |
| "token_acc": 0.6666959366013215 | |
| }, | |
| { | |
| "epoch": 0.6915377616014559, | |
| "grad_norm": 3.4375, | |
| "learning_rate": 7.3507072122431765e-06, | |
| "loss": 1.1496323585510253, | |
| "step": 95, | |
| "token_acc": 0.6683675763161444 | |
| }, | |
| { | |
| "epoch": 0.7279344858962693, | |
| "grad_norm": 3.625, | |
| "learning_rate": 7.095882602083321e-06, | |
| "loss": 1.1614603996276855, | |
| "step": 100, | |
| "token_acc": 0.6663477370833518 | |
| }, | |
| { | |
| "epoch": 0.7643312101910829, | |
| "grad_norm": 3.40625, | |
| "learning_rate": 6.834271094065284e-06, | |
| "loss": 1.1426753997802734, | |
| "step": 105, | |
| "token_acc": 0.6706135391028626 | |
| }, | |
| { | |
| "epoch": 0.8007279344858963, | |
| "grad_norm": 3.5625, | |
| "learning_rate": 6.566719839924412e-06, | |
| "loss": 1.1329108238220216, | |
| "step": 110, | |
| "token_acc": 0.672819576495736 | |
| }, | |
| { | |
| "epoch": 0.8371246587807097, | |
| "grad_norm": 3.40625, | |
| "learning_rate": 6.294095225512604e-06, | |
| "loss": 1.1409114837646483, | |
| "step": 115, | |
| "token_acc": 0.6668990154221487 | |
| }, | |
| { | |
| "epoch": 0.8735213830755232, | |
| "grad_norm": 3.5625, | |
| "learning_rate": 6.0172800652631706e-06, | |
| "loss": 1.1301198959350587, | |
| "step": 120, | |
| "token_acc": 0.6706012797117183 | |
| }, | |
| { | |
| "epoch": 0.9099181073703366, | |
| "grad_norm": 3.546875, | |
| "learning_rate": 5.737170743456573e-06, | |
| "loss": 1.1427392959594727, | |
| "step": 125, | |
| "token_acc": 0.6682659203482819 | |
| }, | |
| { | |
| "epoch": 0.9463148316651502, | |
| "grad_norm": 3.640625, | |
| "learning_rate": 5.454674311544236e-06, | |
| "loss": 1.1528873443603516, | |
| "step": 130, | |
| "token_acc": 0.6670924828755809 | |
| }, | |
| { | |
| "epoch": 0.9827115559599636, | |
| "grad_norm": 3.46875, | |
| "learning_rate": 5.17070555092984e-06, | |
| "loss": 1.129862117767334, | |
| "step": 135, | |
| "token_acc": 0.6717101461681683 | |
| }, | |
| { | |
| "epoch": 1.0145586897179253, | |
| "grad_norm": 3.5, | |
| "learning_rate": 4.886184010719472e-06, | |
| "loss": 1.0890559196472167, | |
| "step": 140, | |
| "token_acc": 0.6832747413945724 | |
| }, | |
| { | |
| "epoch": 1.0509554140127388, | |
| "grad_norm": 3.234375, | |
| "learning_rate": 4.6020310300329575e-06, | |
| "loss": 1.0480965614318847, | |
| "step": 145, | |
| "token_acc": 0.6927895623622932 | |
| }, | |
| { | |
| "epoch": 1.0873521383075524, | |
| "grad_norm": 3.390625, | |
| "learning_rate": 4.319166754518768e-06, | |
| "loss": 1.0643912315368653, | |
| "step": 150, | |
| "token_acc": 0.6858169156860152 | |
| }, | |
| { | |
| "epoch": 1.1237488626023657, | |
| "grad_norm": 3.25, | |
| "learning_rate": 4.038507156733637e-06, | |
| "loss": 1.063976001739502, | |
| "step": 155, | |
| "token_acc": 0.685272536687631 | |
| }, | |
| { | |
| "epoch": 1.1601455868971793, | |
| "grad_norm": 3.421875, | |
| "learning_rate": 3.7609610700355014e-06, | |
| "loss": 1.0600343704223634, | |
| "step": 160, | |
| "token_acc": 0.686132363766144 | |
| }, | |
| { | |
| "epoch": 1.1965423111919926, | |
| "grad_norm": 3.40625, | |
| "learning_rate": 3.4874272455946217e-06, | |
| "loss": 1.0625946998596192, | |
| "step": 165, | |
| "token_acc": 0.685317986646584 | |
| }, | |
| { | |
| "epoch": 1.2329390354868062, | |
| "grad_norm": 3.453125, | |
| "learning_rate": 3.2187914420529176e-06, | |
| "loss": 1.0257128715515136, | |
| "step": 170, | |
| "token_acc": 0.6952559635486465 | |
| }, | |
| { | |
| "epoch": 1.2693357597816197, | |
| "grad_norm": 3.375, | |
| "learning_rate": 2.9559235572557486e-06, | |
| "loss": 1.0540034294128418, | |
| "step": 175, | |
| "token_acc": 0.6904259442697791 | |
| }, | |
| { | |
| "epoch": 1.305732484076433, | |
| "grad_norm": 3.484375, | |
| "learning_rate": 2.6996748113442397e-06, | |
| "loss": 1.0541762351989745, | |
| "step": 180, | |
| "token_acc": 0.6903725834036933 | |
| }, | |
| { | |
| "epoch": 1.3421292083712466, | |
| "grad_norm": 3.46875, | |
| "learning_rate": 2.4508749903298086e-06, | |
| "loss": 1.0581584930419923, | |
| "step": 185, | |
| "token_acc": 0.6884617939463721 | |
| }, | |
| { | |
| "epoch": 1.3785259326660602, | |
| "grad_norm": 3.703125, | |
| "learning_rate": 2.2103297590768334e-06, | |
| "loss": 1.0549434661865233, | |
| "step": 190, | |
| "token_acc": 0.6910765087845028 | |
| }, | |
| { | |
| "epoch": 1.4149226569608735, | |
| "grad_norm": 3.515625, | |
| "learning_rate": 1.978818052394528e-06, | |
| "loss": 1.067518138885498, | |
| "step": 195, | |
| "token_acc": 0.6857334545297148 | |
| }, | |
| { | |
| "epoch": 1.451319381255687, | |
| "grad_norm": 3.421875, | |
| "learning_rate": 1.7570895526862202e-06, | |
| "loss": 1.068192195892334, | |
| "step": 200, | |
| "token_acc": 0.6860211259840784 | |
| }, | |
| { | |
| "epoch": 1.4877161055505004, | |
| "grad_norm": 3.515625, | |
| "learning_rate": 1.5458622623239306e-06, | |
| "loss": 1.0573176383972167, | |
| "step": 205, | |
| "token_acc": 0.6888121546961326 | |
| }, | |
| { | |
| "epoch": 1.524112829845314, | |
| "grad_norm": 3.40625, | |
| "learning_rate": 1.3458201786093795e-06, | |
| "loss": 1.0453268051147462, | |
| "step": 210, | |
| "token_acc": 0.6906213800187966 | |
| }, | |
| { | |
| "epoch": 1.5605095541401273, | |
| "grad_norm": 3.53125, | |
| "learning_rate": 1.1576110788503985e-06, | |
| "loss": 1.0663105964660644, | |
| "step": 215, | |
| "token_acc": 0.6848927327867012 | |
| }, | |
| { | |
| "epoch": 1.5969062784349408, | |
| "grad_norm": 3.5, | |
| "learning_rate": 9.81844422725109e-07, | |
| "loss": 1.0401561737060547, | |
| "step": 220, | |
| "token_acc": 0.6924896377725717 | |
| }, | |
| { | |
| "epoch": 1.6333030027297544, | |
| "grad_norm": 3.3125, | |
| "learning_rate": 8.19089378726447e-07, | |
| "loss": 1.0593575477600097, | |
| "step": 225, | |
| "token_acc": 0.6890886546807191 | |
| }, | |
| { | |
| "epoch": 1.6696997270245677, | |
| "grad_norm": 3.515625, | |
| "learning_rate": 6.698729810778065e-07, | |
| "loss": 1.0698697090148925, | |
| "step": 230, | |
| "token_acc": 0.685164035382376 | |
| }, | |
| { | |
| "epoch": 1.7060964513193813, | |
| "grad_norm": 3.53125, | |
| "learning_rate": 5.346784230881119e-07, | |
| "loss": 1.0585227966308595, | |
| "step": 235, | |
| "token_acc": 0.6885862630351565 | |
| }, | |
| { | |
| "epoch": 1.7424931756141948, | |
| "grad_norm": 3.484375, | |
| "learning_rate": 4.139434924727359e-07, | |
| "loss": 1.080869483947754, | |
| "step": 240, | |
| "token_acc": 0.6822751895991332 | |
| }, | |
| { | |
| "epoch": 1.7788898999090081, | |
| "grad_norm": 3.390625, | |
| "learning_rate": 3.0805915370706596e-07, | |
| "loss": 1.061899757385254, | |
| "step": 245, | |
| "token_acc": 0.6871229827490262 | |
| }, | |
| { | |
| "epoch": 1.8152866242038217, | |
| "grad_norm": 3.46875, | |
| "learning_rate": 2.1736828200332628e-07, | |
| "loss": 1.0475135803222657, | |
| "step": 250, | |
| "token_acc": 0.6912013150084471 | |
| }, | |
| { | |
| "epoch": 1.8516833484986353, | |
| "grad_norm": 3.5625, | |
| "learning_rate": 1.4216455301029274e-07, | |
| "loss": 1.0574556350708009, | |
| "step": 255, | |
| "token_acc": 0.6871923876189434 | |
| }, | |
| { | |
| "epoch": 1.8880800727934486, | |
| "grad_norm": 3.34375, | |
| "learning_rate": 8.269149183128988e-08, | |
| "loss": 1.0561877250671388, | |
| "step": 260, | |
| "token_acc": 0.688983606557377 | |
| }, | |
| { | |
| "epoch": 1.924476797088262, | |
| "grad_norm": 3.515625, | |
| "learning_rate": 3.91416844399467e-08, | |
| "loss": 1.042548370361328, | |
| "step": 265, | |
| "token_acc": 0.6905022933214006 | |
| }, | |
| { | |
| "epoch": 1.9608735213830755, | |
| "grad_norm": 3.609375, | |
| "learning_rate": 1.1656154047303691e-08, | |
| "loss": 1.0355847358703614, | |
| "step": 270, | |
| "token_acc": 0.6921419462644562 | |
| }, | |
| { | |
| "epoch": 1.997270245677889, | |
| "grad_norm": 3.59375, | |
| "learning_rate": 3.2390443972241113e-10, | |
| "loss": 1.0477985382080077, | |
| "step": 275, | |
| "token_acc": 0.6890192428407541 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 276, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 50, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.459348090205655e+17, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |