| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 276, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.1092896174863388, | |
| "grad_norm": 5.1577437597516225, | |
| "learning_rate": 3.2142857142857147e-06, | |
| "loss": 1.1995, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.2185792349726776, | |
| "grad_norm": 3.5481701787136384, | |
| "learning_rate": 6.785714285714287e-06, | |
| "loss": 1.0414, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.32786885245901637, | |
| "grad_norm": 3.1144850116240357, | |
| "learning_rate": 9.999598828026644e-06, | |
| "loss": 0.9119, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.4371584699453552, | |
| "grad_norm": 2.73270142860577, | |
| "learning_rate": 9.951536036943753e-06, | |
| "loss": 0.8715, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.546448087431694, | |
| "grad_norm": 3.026946904388784, | |
| "learning_rate": 9.824121682679072e-06, | |
| "loss": 0.8884, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.6557377049180327, | |
| "grad_norm": 2.7780305606367834, | |
| "learning_rate": 9.619397662556434e-06, | |
| "loss": 0.8572, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.7650273224043715, | |
| "grad_norm": 2.9940795505131277, | |
| "learning_rate": 9.340644811229243e-06, | |
| "loss": 0.8223, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.8743169398907104, | |
| "grad_norm": 2.966943396900539, | |
| "learning_rate": 8.992330323186069e-06, | |
| "loss": 0.8355, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.9836065573770492, | |
| "grad_norm": 2.8647421289333863, | |
| "learning_rate": 8.580036163073615e-06, | |
| "loss": 0.825, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 1.0874316939890711, | |
| "grad_norm": 2.8839975616255398, | |
| "learning_rate": 8.110369611107869e-06, | |
| "loss": 0.4997, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.1967213114754098, | |
| "grad_norm": 2.6455455905909773, | |
| "learning_rate": 7.59085737714101e-06, | |
| "loss": 0.3941, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 1.3060109289617485, | |
| "grad_norm": 2.672032421410989, | |
| "learning_rate": 7.029824980274536e-06, | |
| "loss": 0.3573, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 1.4153005464480874, | |
| "grad_norm": 2.5862676408818532, | |
| "learning_rate": 6.436263327038225e-06, | |
| "loss": 0.3592, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 1.5245901639344264, | |
| "grad_norm": 2.515383899328089, | |
| "learning_rate": 5.819684626305776e-06, | |
| "loss": 0.3639, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 1.633879781420765, | |
| "grad_norm": 2.31624870273084, | |
| "learning_rate": 5.189969950003697e-06, | |
| "loss": 0.3668, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 1.7431693989071038, | |
| "grad_norm": 3.014731439814705, | |
| "learning_rate": 4.5572108825515e-06, | |
| "loss": 0.3436, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 1.8524590163934427, | |
| "grad_norm": 2.6503531682704313, | |
| "learning_rate": 3.931547796703245e-06, | |
| "loss": 0.3337, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 1.9617486338797814, | |
| "grad_norm": 2.5340962318062865, | |
| "learning_rate": 3.323007347524515e-06, | |
| "loss": 0.3306, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 2.0655737704918034, | |
| "grad_norm": 2.2253864218482913, | |
| "learning_rate": 2.7413417887687644e-06, | |
| "loss": 0.204, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 2.1748633879781423, | |
| "grad_norm": 2.265109953776663, | |
| "learning_rate": 2.1958726867119785e-06, | |
| "loss": 0.1156, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 2.2841530054644807, | |
| "grad_norm": 1.9501794833499622, | |
| "learning_rate": 1.6953415360322972e-06, | |
| "loss": 0.1097, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 2.3934426229508197, | |
| "grad_norm": 2.21254078583728, | |
| "learning_rate": 1.2477696717116878e-06, | |
| "loss": 0.1106, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 2.5027322404371586, | |
| "grad_norm": 2.44268406044375, | |
| "learning_rate": 8.603297219618933e-07, | |
| "loss": 0.1061, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 2.612021857923497, | |
| "grad_norm": 2.009876541736874, | |
| "learning_rate": 5.392306622245408e-07, | |
| "loss": 0.1039, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 2.721311475409836, | |
| "grad_norm": 2.069446369955909, | |
| "learning_rate": 2.896183123293256e-07, | |
| "loss": 0.117, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 2.830601092896175, | |
| "grad_norm": 2.1655093723456593, | |
| "learning_rate": 1.1549287140756593e-07, | |
| "loss": 0.1154, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 2.939890710382514, | |
| "grad_norm": 2.5124812267504013, | |
| "learning_rate": 1.964481211739533e-08, | |
| "loss": 0.1124, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "step": 276, | |
| "total_flos": 14707949568000.0, | |
| "train_loss": 0.4614721061526865, | |
| "train_runtime": 4173.8964, | |
| "train_samples_per_second": 2.095, | |
| "train_steps_per_second": 0.066 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 276, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 14707949568000.0, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |