| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 21.38888888888889, | |
| "eval_steps": 500, | |
| "global_step": 154, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 1.5e-05, | |
| "loss": 1.7268, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 3e-05, | |
| "loss": 1.959, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 4.5e-05, | |
| "loss": 1.8528, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 6e-05, | |
| "loss": 1.5683, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "learning_rate": 7.500000000000001e-05, | |
| "loss": 1.7871, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "learning_rate": 9e-05, | |
| "loss": 1.3609, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "learning_rate": 0.000105, | |
| "loss": 1.5358, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 3.17, | |
| "learning_rate": 0.00012, | |
| "loss": 1.4303, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 3.33, | |
| "learning_rate": 0.00013500000000000003, | |
| "loss": 1.2275, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 4.11, | |
| "learning_rate": 0.00015000000000000001, | |
| "loss": 1.3846, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 4.28, | |
| "learning_rate": 0.000165, | |
| "loss": 1.1696, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 5.06, | |
| "learning_rate": 0.00018, | |
| "loss": 0.8513, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 5.22, | |
| "learning_rate": 0.000195, | |
| "loss": 0.9204, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 5.39, | |
| "learning_rate": 0.00019887640449438204, | |
| "loss": 0.8819, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 6.17, | |
| "learning_rate": 0.00019719101123595508, | |
| "loss": 0.7262, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 6.33, | |
| "learning_rate": 0.0001955056179775281, | |
| "loss": 0.622, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 7.11, | |
| "learning_rate": 0.00019382022471910114, | |
| "loss": 0.45, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 7.28, | |
| "learning_rate": 0.00019213483146067416, | |
| "loss": 0.5254, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 8.06, | |
| "learning_rate": 0.0001904494382022472, | |
| "loss": 0.4256, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 8.22, | |
| "learning_rate": 0.00018876404494382024, | |
| "loss": 0.3008, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 8.39, | |
| "learning_rate": 0.00018707865168539326, | |
| "loss": 0.3208, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 9.17, | |
| "learning_rate": 0.0001853932584269663, | |
| "loss": 0.235, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 9.33, | |
| "learning_rate": 0.00018370786516853932, | |
| "loss": 0.3065, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 10.11, | |
| "learning_rate": 0.00018202247191011236, | |
| "loss": 0.2178, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 10.28, | |
| "learning_rate": 0.0001803370786516854, | |
| "loss": 0.2144, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 11.06, | |
| "learning_rate": 0.00017865168539325842, | |
| "loss": 0.157, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 11.22, | |
| "learning_rate": 0.00017696629213483146, | |
| "loss": 0.1541, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 11.39, | |
| "learning_rate": 0.0001752808988764045, | |
| "loss": 0.1394, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 12.17, | |
| "learning_rate": 0.00017359550561797752, | |
| "loss": 0.1196, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 12.33, | |
| "learning_rate": 0.00017191011235955056, | |
| "loss": 0.1, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 13.11, | |
| "learning_rate": 0.0001702247191011236, | |
| "loss": 0.0882, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 13.28, | |
| "learning_rate": 0.00016853932584269662, | |
| "loss": 0.0671, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 14.06, | |
| "learning_rate": 0.00016685393258426967, | |
| "loss": 0.0509, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 14.22, | |
| "learning_rate": 0.0001651685393258427, | |
| "loss": 0.0435, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 14.39, | |
| "learning_rate": 0.00016348314606741575, | |
| "loss": 0.0529, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 15.17, | |
| "learning_rate": 0.00016179775280898877, | |
| "loss": 0.0349, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 15.33, | |
| "learning_rate": 0.0001601123595505618, | |
| "loss": 0.0298, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 16.11, | |
| "learning_rate": 0.00015842696629213485, | |
| "loss": 0.0405, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 16.28, | |
| "learning_rate": 0.00015674157303370787, | |
| "loss": 0.0279, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 17.06, | |
| "learning_rate": 0.0001550561797752809, | |
| "loss": 0.033, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 17.22, | |
| "learning_rate": 0.00015337078651685396, | |
| "loss": 0.0221, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 17.39, | |
| "learning_rate": 0.00015168539325842697, | |
| "loss": 0.0253, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 18.17, | |
| "learning_rate": 0.00015000000000000001, | |
| "loss": 0.0188, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 18.33, | |
| "learning_rate": 0.00014831460674157306, | |
| "loss": 0.0229, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 19.11, | |
| "learning_rate": 0.00014662921348314607, | |
| "loss": 0.0168, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 19.28, | |
| "learning_rate": 0.00014494382022471912, | |
| "loss": 0.023, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 20.06, | |
| "learning_rate": 0.00014325842696629216, | |
| "loss": 0.0177, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 20.22, | |
| "learning_rate": 0.00014157303370786517, | |
| "loss": 0.016, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 20.39, | |
| "learning_rate": 0.00013988764044943822, | |
| "loss": 0.0176, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 21.17, | |
| "learning_rate": 0.00013820224719101123, | |
| "loss": 0.0109, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 21.33, | |
| "learning_rate": 0.00013651685393258428, | |
| "loss": 0.0161, | |
| "step": 153 | |
| } | |
| ], | |
| "logging_steps": 3, | |
| "max_steps": 396, | |
| "num_train_epochs": 22, | |
| "save_steps": 500, | |
| "total_flos": 1.251930185662464e+16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |