| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 3878, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.025786487880350695, | |
| "grad_norm": 0.1338733434677124, | |
| "learning_rate": 6.666666666666667e-06, | |
| "loss": 2.5125, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.05157297576070139, | |
| "grad_norm": 0.17936278879642487, | |
| "learning_rate": 1.3333333333333333e-05, | |
| "loss": 2.4886, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.07735946364105209, | |
| "grad_norm": 0.2758166491985321, | |
| "learning_rate": 2e-05, | |
| "loss": 2.4178, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.10314595152140278, | |
| "grad_norm": 0.38494062423706055, | |
| "learning_rate": 1.9961477891705203e-05, | |
| "loss": 2.4016, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.12893243940175347, | |
| "grad_norm": 0.408550888299942, | |
| "learning_rate": 1.98462083573863e-05, | |
| "loss": 2.3598, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.15471892728210418, | |
| "grad_norm": 0.43653976917266846, | |
| "learning_rate": 1.9655079482140115e-05, | |
| "loss": 2.3474, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.18050541516245489, | |
| "grad_norm": 0.5025712251663208, | |
| "learning_rate": 1.9389563803412753e-05, | |
| "loss": 2.3225, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.20629190304280556, | |
| "grad_norm": 0.565731406211853, | |
| "learning_rate": 1.9051706965950192e-05, | |
| "loss": 2.2821, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.23207839092315627, | |
| "grad_norm": 0.5192058682441711, | |
| "learning_rate": 1.8644111961288605e-05, | |
| "loss": 2.3188, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.25786487880350695, | |
| "grad_norm": 0.8693593740463257, | |
| "learning_rate": 1.816991907320999e-05, | |
| "loss": 2.2895, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.28365136668385765, | |
| "grad_norm": 0.6395590901374817, | |
| "learning_rate": 1.7632781683671787e-05, | |
| "loss": 2.2892, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.30943785456420836, | |
| "grad_norm": 0.7124245166778564, | |
| "learning_rate": 1.703683812561179e-05, | |
| "loss": 2.2695, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.33522434244455906, | |
| "grad_norm": 0.6226526498794556, | |
| "learning_rate": 1.6386679799486236e-05, | |
| "loss": 2.2573, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.36101083032490977, | |
| "grad_norm": 0.7176969051361084, | |
| "learning_rate": 1.568731579918468e-05, | |
| "loss": 2.2483, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.3867973182052604, | |
| "grad_norm": 0.6624855399131775, | |
| "learning_rate": 1.494413431985854e-05, | |
| "loss": 2.2084, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.4125838060856111, | |
| "grad_norm": 0.669903039932251, | |
| "learning_rate": 1.4162861144993671e-05, | |
| "loss": 2.2376, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.43837029396596183, | |
| "grad_norm": 0.7109538316726685, | |
| "learning_rate": 1.3349515532560074e-05, | |
| "loss": 2.2318, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.46415678184631254, | |
| "grad_norm": 0.7055323123931885, | |
| "learning_rate": 1.2510363840110396e-05, | |
| "loss": 2.2585, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.48994326972666324, | |
| "grad_norm": 0.8980267643928528, | |
| "learning_rate": 1.1651871246119102e-05, | |
| "loss": 2.2008, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.5157297576070139, | |
| "grad_norm": 0.774202287197113, | |
| "learning_rate": 1.0780651939521396e-05, | |
| "loss": 2.2063, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.5415162454873647, | |
| "grad_norm": 0.9350955486297607, | |
| "learning_rate": 9.903418161212732e-06, | |
| "loss": 2.2451, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.5673027333677153, | |
| "grad_norm": 0.9068596959114075, | |
| "learning_rate": 9.026928490114683e-06, | |
| "loss": 2.2293, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.5930892212480661, | |
| "grad_norm": 0.7404003739356995, | |
| "learning_rate": 8.15793577223311e-06, | |
| "loss": 2.2155, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.6188757091284167, | |
| "grad_norm": 0.8607114553451538, | |
| "learning_rate": 7.303135093885141e-06, | |
| "loss": 2.2158, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.6446621970087674, | |
| "grad_norm": 0.7401602864265442, | |
| "learning_rate": 6.469112199931131e-06, | |
| "loss": 2.1806, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.6704486848891181, | |
| "grad_norm": 0.818596363067627, | |
| "learning_rate": 5.662292754419332e-06, | |
| "loss": 2.1965, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.6962351727694688, | |
| "grad_norm": 0.9028757810592651, | |
| "learning_rate": 4.888892834560608e-06, | |
| "loss": 2.2402, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.7220216606498195, | |
| "grad_norm": 0.8473127484321594, | |
| "learning_rate": 4.154871039448561e-06, | |
| "loss": 2.2167, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.7478081485301702, | |
| "grad_norm": 0.7806875705718994, | |
| "learning_rate": 3.4658825824996036e-06, | |
| "loss": 2.2116, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.7735946364105208, | |
| "grad_norm": 0.7143692970275879, | |
| "learning_rate": 2.82723572130422e-06, | |
| "loss": 2.2068, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.7993811242908716, | |
| "grad_norm": 0.8515029549598694, | |
| "learning_rate": 2.243850860572239e-06, | |
| "loss": 2.1972, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.8251676121712223, | |
| "grad_norm": 0.770374596118927, | |
| "learning_rate": 1.7202226432601833e-06, | |
| "loss": 2.2316, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.850954100051573, | |
| "grad_norm": 0.7793726325035095, | |
| "learning_rate": 1.260385321946761e-06, | |
| "loss": 2.2046, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.8767405879319237, | |
| "grad_norm": 1.0234565734863281, | |
| "learning_rate": 8.678816772498988e-07, | |
| "loss": 2.1848, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.9025270758122743, | |
| "grad_norm": 0.8092569708824158, | |
| "learning_rate": 5.457357227510152e-07, | |
| "loss": 2.2268, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.9283135636926251, | |
| "grad_norm": 0.9072908759117126, | |
| "learning_rate": 2.964294067193008e-07, | |
| "loss": 2.2251, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.9541000515729757, | |
| "grad_norm": 0.6942015886306763, | |
| "learning_rate": 1.2188349013570356e-07, | |
| "loss": 2.2285, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.9798865394533265, | |
| "grad_norm": 0.8001830577850342, | |
| "learning_rate": 2.344274834043425e-08, | |
| "loss": 2.2278, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "step": 3878, | |
| "total_flos": 7.04757648064512e+16, | |
| "train_loss": 2.263120666000264, | |
| "train_runtime": 1207.6216, | |
| "train_samples_per_second": 6.423, | |
| "train_steps_per_second": 3.211 | |
| } | |
| ], | |
| "logging_steps": 100, | |
| "max_steps": 3878, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 7.04757648064512e+16, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |