| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 276, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.018148820326678767, | |
| "grad_norm": 1.0784239768981934, | |
| "learning_rate": 1.7391304347826088e-06, | |
| "loss": 1.2027, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.036297640653357534, | |
| "grad_norm": 0.8809816837310791, | |
| "learning_rate": 3.913043478260869e-06, | |
| "loss": 1.2648, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.0544464609800363, | |
| "grad_norm": 0.6940921545028687, | |
| "learning_rate": 6.086956521739131e-06, | |
| "loss": 1.263, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.07259528130671507, | |
| "grad_norm": 0.7154361009597778, | |
| "learning_rate": 8.260869565217392e-06, | |
| "loss": 1.2225, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.09074410163339383, | |
| "grad_norm": 0.6354780197143555, | |
| "learning_rate": 1.0434782608695653e-05, | |
| "loss": 1.2134, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.1088929219600726, | |
| "grad_norm": 0.6585204005241394, | |
| "learning_rate": 1.2608695652173912e-05, | |
| "loss": 1.14, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.12704174228675136, | |
| "grad_norm": 0.5307602286338806, | |
| "learning_rate": 1.4782608695652174e-05, | |
| "loss": 1.128, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.14519056261343014, | |
| "grad_norm": 0.5485066771507263, | |
| "learning_rate": 1.6956521739130433e-05, | |
| "loss": 1.1518, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.16333938294010888, | |
| "grad_norm": 0.7185664772987366, | |
| "learning_rate": 1.9130434782608694e-05, | |
| "loss": 1.1149, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.18148820326678766, | |
| "grad_norm": 0.51127028465271, | |
| "learning_rate": 2.1304347826086958e-05, | |
| "loss": 1.143, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.1996370235934664, | |
| "grad_norm": 0.535865843296051, | |
| "learning_rate": 2.347826086956522e-05, | |
| "loss": 1.1259, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.2177858439201452, | |
| "grad_norm": 0.5300245881080627, | |
| "learning_rate": 2.565217391304348e-05, | |
| "loss": 1.0789, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.23593466424682397, | |
| "grad_norm": 0.5276510715484619, | |
| "learning_rate": 2.782608695652174e-05, | |
| "loss": 1.0811, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.2540834845735027, | |
| "grad_norm": 0.5264666676521301, | |
| "learning_rate": 3e-05, | |
| "loss": 1.0645, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.27223230490018147, | |
| "grad_norm": 0.5153283476829529, | |
| "learning_rate": 2.999892331059753e-05, | |
| "loss": 1.1043, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.29038112522686027, | |
| "grad_norm": 0.533360481262207, | |
| "learning_rate": 2.999569339695812e-05, | |
| "loss": 1.0288, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.308529945553539, | |
| "grad_norm": 0.6700855493545532, | |
| "learning_rate": 2.9990310722763616e-05, | |
| "loss": 1.0037, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.32667876588021777, | |
| "grad_norm": 0.6054568290710449, | |
| "learning_rate": 2.9982776060743112e-05, | |
| "loss": 1.0362, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.3448275862068966, | |
| "grad_norm": 0.9136693477630615, | |
| "learning_rate": 2.9973090492562048e-05, | |
| "loss": 1.0228, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.3629764065335753, | |
| "grad_norm": 0.609896183013916, | |
| "learning_rate": 2.9961255408666903e-05, | |
| "loss": 0.9895, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.3811252268602541, | |
| "grad_norm": 0.657203733921051, | |
| "learning_rate": 2.99472725080856e-05, | |
| "loss": 0.9758, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.3992740471869328, | |
| "grad_norm": 0.7311093211174011, | |
| "learning_rate": 2.9931143798183588e-05, | |
| "loss": 0.9626, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.41742286751361163, | |
| "grad_norm": 0.6201342940330505, | |
| "learning_rate": 2.9912871594375667e-05, | |
| "loss": 0.9704, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.4355716878402904, | |
| "grad_norm": 0.6121538281440735, | |
| "learning_rate": 2.98924585197936e-05, | |
| "loss": 0.931, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.4537205081669691, | |
| "grad_norm": 0.6566389799118042, | |
| "learning_rate": 2.9869907504909532e-05, | |
| "loss": 0.9246, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.47186932849364793, | |
| "grad_norm": 0.6150594353675842, | |
| "learning_rate": 2.984522178711529e-05, | |
| "loss": 0.9232, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.4900181488203267, | |
| "grad_norm": 0.68607497215271, | |
| "learning_rate": 2.9818404910257645e-05, | |
| "loss": 0.8966, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.5081669691470054, | |
| "grad_norm": 0.6849779486656189, | |
| "learning_rate": 2.9789460724129545e-05, | |
| "loss": 0.9419, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.5263157894736842, | |
| "grad_norm": 0.8690723180770874, | |
| "learning_rate": 2.9758393383917447e-05, | |
| "loss": 0.8704, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.5444646098003629, | |
| "grad_norm": 0.962196409702301, | |
| "learning_rate": 2.9725207349604823e-05, | |
| "loss": 0.9073, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.5626134301270418, | |
| "grad_norm": 0.7416574954986572, | |
| "learning_rate": 2.968990738533186e-05, | |
| "loss": 0.9249, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.5807622504537205, | |
| "grad_norm": 0.7890663146972656, | |
| "learning_rate": 2.965249855871155e-05, | |
| "loss": 0.8781, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.5989110707803993, | |
| "grad_norm": 0.814849317073822, | |
| "learning_rate": 2.961298624010219e-05, | |
| "loss": 0.8271, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.617059891107078, | |
| "grad_norm": 0.8292708396911621, | |
| "learning_rate": 2.9571376101836397e-05, | |
| "loss": 0.8268, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.6352087114337568, | |
| "grad_norm": 0.8682757019996643, | |
| "learning_rate": 2.9527674117406834e-05, | |
| "loss": 0.7837, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.6533575317604355, | |
| "grad_norm": 0.7653095126152039, | |
| "learning_rate": 2.948188656060864e-05, | |
| "loss": 0.8026, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.6715063520871143, | |
| "grad_norm": 1.0673071146011353, | |
| "learning_rate": 2.9434020004638757e-05, | |
| "loss": 0.7887, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.6896551724137931, | |
| "grad_norm": 0.869884192943573, | |
| "learning_rate": 2.9384081321152335e-05, | |
| "loss": 0.7971, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.7078039927404719, | |
| "grad_norm": 0.926493227481842, | |
| "learning_rate": 2.9332077679276206e-05, | |
| "loss": 0.7297, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.7259528130671506, | |
| "grad_norm": 0.8653711080551147, | |
| "learning_rate": 2.927801654457972e-05, | |
| "loss": 0.7314, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.7441016333938294, | |
| "grad_norm": 0.8328022956848145, | |
| "learning_rate": 2.9221905678002982e-05, | |
| "loss": 0.7543, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.7622504537205081, | |
| "grad_norm": 0.8467786908149719, | |
| "learning_rate": 2.9163753134742716e-05, | |
| "loss": 0.7292, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.7803992740471869, | |
| "grad_norm": 0.9099471569061279, | |
| "learning_rate": 2.910356726309586e-05, | |
| "loss": 0.7632, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.7985480943738656, | |
| "grad_norm": 0.8697426915168762, | |
| "learning_rate": 2.9041356703261108e-05, | |
| "loss": 0.6898, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.8166969147005445, | |
| "grad_norm": 0.8971152305603027, | |
| "learning_rate": 2.8977130386098525e-05, | |
| "loss": 0.6785, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.8348457350272233, | |
| "grad_norm": 0.9118236303329468, | |
| "learning_rate": 2.8910897531847447e-05, | |
| "loss": 0.6571, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.852994555353902, | |
| "grad_norm": 0.8698996901512146, | |
| "learning_rate": 2.8842667648802847e-05, | |
| "loss": 0.6911, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.8711433756805808, | |
| "grad_norm": 0.9623836874961853, | |
| "learning_rate": 2.877245053195033e-05, | |
| "loss": 0.6313, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.8892921960072595, | |
| "grad_norm": 0.9111562371253967, | |
| "learning_rate": 2.8700256261559962e-05, | |
| "loss": 0.6695, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.9074410163339383, | |
| "grad_norm": 0.8837359547615051, | |
| "learning_rate": 2.8626095201739206e-05, | |
| "loss": 0.6461, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.925589836660617, | |
| "grad_norm": 1.2072163820266724, | |
| "learning_rate": 2.8549977998945003e-05, | |
| "loss": 0.6622, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.9437386569872959, | |
| "grad_norm": 0.987786591053009, | |
| "learning_rate": 2.847191558045544e-05, | |
| "loss": 0.6912, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.9618874773139746, | |
| "grad_norm": 0.9701903462409973, | |
| "learning_rate": 2.839191915280102e-05, | |
| "loss": 0.6322, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.9800362976406534, | |
| "grad_norm": 1.2309809923171997, | |
| "learning_rate": 2.831000020015585e-05, | |
| "loss": 0.5662, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.9981851179673321, | |
| "grad_norm": 1.0858169794082642, | |
| "learning_rate": 2.8226170482689022e-05, | |
| "loss": 0.6321, | |
| "step": 275 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 1380, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 2000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.8427147710234624e+17, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |