| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 48, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.06299212598425197, | |
| "grad_norm": 1.6827600002288818, | |
| "learning_rate": 2.0000000000000003e-06, | |
| "loss": 0.9807, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.12598425196850394, | |
| "grad_norm": 1.6266508102416992, | |
| "learning_rate": 4.000000000000001e-06, | |
| "loss": 0.9458, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.1889763779527559, | |
| "grad_norm": 1.6120834350585938, | |
| "learning_rate": 6e-06, | |
| "loss": 0.9554, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.25196850393700787, | |
| "grad_norm": 1.5498627424240112, | |
| "learning_rate": 8.000000000000001e-06, | |
| "loss": 0.9262, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.31496062992125984, | |
| "grad_norm": 2.2747952938079834, | |
| "learning_rate": 1e-05, | |
| "loss": 0.9567, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.3779527559055118, | |
| "grad_norm": 2.724945068359375, | |
| "learning_rate": 9.986661418317759e-06, | |
| "loss": 0.94, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.4409448818897638, | |
| "grad_norm": 2.948086977005005, | |
| "learning_rate": 9.946716840375552e-06, | |
| "loss": 1.0088, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.5039370078740157, | |
| "grad_norm": 2.763153553009033, | |
| "learning_rate": 9.880379387779637e-06, | |
| "loss": 1.0134, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.5669291338582677, | |
| "grad_norm": 2.4897592067718506, | |
| "learning_rate": 9.78800299954203e-06, | |
| "loss": 1.0267, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.6299212598425197, | |
| "grad_norm": 2.5692765712738037, | |
| "learning_rate": 9.670080543662742e-06, | |
| "loss": 0.9758, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.6929133858267716, | |
| "grad_norm": 2.352846622467041, | |
| "learning_rate": 9.527241187465735e-06, | |
| "loss": 0.9602, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.7559055118110236, | |
| "grad_norm": 2.4274556636810303, | |
| "learning_rate": 9.36024704071904e-06, | |
| "loss": 1.0215, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.8188976377952756, | |
| "grad_norm": 2.3770511150360107, | |
| "learning_rate": 9.16998908944939e-06, | |
| "loss": 0.9639, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.8818897637795275, | |
| "grad_norm": 2.4261667728424072, | |
| "learning_rate": 8.957482442146271e-06, | |
| "loss": 0.9426, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.9448818897637795, | |
| "grad_norm": 2.2574000358581543, | |
| "learning_rate": 8.72386091371891e-06, | |
| "loss": 0.98, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 2.383176565170288, | |
| "learning_rate": 8.470370976103171e-06, | |
| "loss": 0.9405, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 1.0629921259842519, | |
| "grad_norm": 2.3710620403289795, | |
| "learning_rate": 8.198365107794457e-06, | |
| "loss": 0.8291, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 1.125984251968504, | |
| "grad_norm": 2.4993762969970703, | |
| "learning_rate": 7.909294577789765e-06, | |
| "loss": 0.83, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 1.188976377952756, | |
| "grad_norm": 2.5276553630828857, | |
| "learning_rate": 7.604701702439652e-06, | |
| "loss": 0.8006, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 1.2519685039370079, | |
| "grad_norm": 2.4743146896362305, | |
| "learning_rate": 7.286211616523193e-06, | |
| "loss": 0.7527, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 1.3149606299212597, | |
| "grad_norm": 2.530345916748047, | |
| "learning_rate": 6.95552360245078e-06, | |
| "loss": 0.785, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 1.3779527559055118, | |
| "grad_norm": 2.6544010639190674, | |
| "learning_rate": 6.614402023857231e-06, | |
| "loss": 0.8194, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 1.4409448818897639, | |
| "grad_norm": 2.5227644443511963, | |
| "learning_rate": 6.264666911958404e-06, | |
| "loss": 0.7987, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 1.5039370078740157, | |
| "grad_norm": 2.5911266803741455, | |
| "learning_rate": 5.908184254897183e-06, | |
| "loss": 0.7847, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 1.5669291338582676, | |
| "grad_norm": 2.3522489070892334, | |
| "learning_rate": 5.546856041889374e-06, | |
| "loss": 0.7723, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 1.6299212598425197, | |
| "grad_norm": 2.3171679973602295, | |
| "learning_rate": 5.182610115288296e-06, | |
| "loss": 0.7435, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 1.6929133858267718, | |
| "grad_norm": 2.3236382007598877, | |
| "learning_rate": 4.817389884711706e-06, | |
| "loss": 0.7955, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 1.7559055118110236, | |
| "grad_norm": 2.3249104022979736, | |
| "learning_rate": 4.4531439581106295e-06, | |
| "loss": 0.756, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 1.8188976377952755, | |
| "grad_norm": 2.308413028717041, | |
| "learning_rate": 4.091815745102818e-06, | |
| "loss": 0.7696, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 1.8818897637795275, | |
| "grad_norm": 2.3917453289031982, | |
| "learning_rate": 3.7353330880415963e-06, | |
| "loss": 0.7891, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 1.9448818897637796, | |
| "grad_norm": 2.2722179889678955, | |
| "learning_rate": 3.3855979761427705e-06, | |
| "loss": 0.7326, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 2.3211171627044678, | |
| "learning_rate": 3.044476397549221e-06, | |
| "loss": 0.7675, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 2.062992125984252, | |
| "grad_norm": 2.114405632019043, | |
| "learning_rate": 2.7137883834768076e-06, | |
| "loss": 0.6823, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 2.1259842519685037, | |
| "grad_norm": 2.106955051422119, | |
| "learning_rate": 2.3952982975603494e-06, | |
| "loss": 0.6718, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 2.188976377952756, | |
| "grad_norm": 2.1824538707733154, | |
| "learning_rate": 2.0907054222102367e-06, | |
| "loss": 0.6405, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 2.251968503937008, | |
| "grad_norm": 2.105780839920044, | |
| "learning_rate": 1.8016348922055448e-06, | |
| "loss": 0.6769, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 2.3149606299212597, | |
| "grad_norm": 2.0345335006713867, | |
| "learning_rate": 1.5296290238968303e-06, | |
| "loss": 0.6514, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 2.377952755905512, | |
| "grad_norm": 1.9925103187561035, | |
| "learning_rate": 1.2761390862810907e-06, | |
| "loss": 0.6376, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 2.440944881889764, | |
| "grad_norm": 2.051919937133789, | |
| "learning_rate": 1.04251755785373e-06, | |
| "loss": 0.666, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 2.5039370078740157, | |
| "grad_norm": 1.9680358171463013, | |
| "learning_rate": 8.30010910550611e-07, | |
| "loss": 0.667, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 2.5669291338582676, | |
| "grad_norm": 2.007450819015503, | |
| "learning_rate": 6.397529592809615e-07, | |
| "loss": 0.6699, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 2.6299212598425195, | |
| "grad_norm": 1.990918517112732, | |
| "learning_rate": 4.727588125342669e-07, | |
| "loss": 0.6151, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 2.6929133858267718, | |
| "grad_norm": 1.9402188062667847, | |
| "learning_rate": 3.299194563372604e-07, | |
| "loss": 0.6667, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 2.7559055118110236, | |
| "grad_norm": 1.8792628049850464, | |
| "learning_rate": 2.1199700045797077e-07, | |
| "loss": 0.6395, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 2.8188976377952755, | |
| "grad_norm": 1.8017092943191528, | |
| "learning_rate": 1.196206122203647e-07, | |
| "loss": 0.6417, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 2.8818897637795278, | |
| "grad_norm": 1.8651630878448486, | |
| "learning_rate": 5.3283159624448745e-08, | |
| "loss": 0.6361, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 2.9448818897637796, | |
| "grad_norm": 1.874280571937561, | |
| "learning_rate": 1.333858168224178e-08, | |
| "loss": 0.6426, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 1.9368858337402344, | |
| "learning_rate": 0.0, | |
| "loss": 0.6177, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "step": 48, | |
| "total_flos": 1.2587951231441306e+17, | |
| "train_loss": 0.8018192363282045, | |
| "train_runtime": 3899.5488, | |
| "train_samples_per_second": 0.786, | |
| "train_steps_per_second": 0.012 | |
| } | |
| ], | |
| "logging_steps": 1.0, | |
| "max_steps": 48, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 200, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.2587951231441306e+17, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |