| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.0, |
| "eval_steps": 500, |
| "global_step": 408, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.049170251997541485, |
| "grad_norm": 8.936578750610352, |
| "learning_rate": 3.461538461538462e-05, |
| "loss": 3.0445, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.09834050399508297, |
| "grad_norm": 1.3836116790771484, |
| "learning_rate": 4.997153995054481e-05, |
| "loss": 1.2916, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.14751075599262448, |
| "grad_norm": 0.8220507502555847, |
| "learning_rate": 4.9797852029940475e-05, |
| "loss": 0.7916, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.19668100799016594, |
| "grad_norm": 0.8145950436592102, |
| "learning_rate": 4.946738410462334e-05, |
| "loss": 0.6026, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.24585125998770743, |
| "grad_norm": 0.8008847832679749, |
| "learning_rate": 4.8982225499218236e-05, |
| "loss": 0.563, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.29502151198524895, |
| "grad_norm": 0.9043287038803101, |
| "learning_rate": 4.834544354263534e-05, |
| "loss": 0.515, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.3441917639827904, |
| "grad_norm": 0.6810412406921387, |
| "learning_rate": 4.756106417542925e-05, |
| "loss": 0.4954, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.3933620159803319, |
| "grad_norm": 0.8481300473213196, |
| "learning_rate": 4.6634046496506536e-05, |
| "loss": 0.5008, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.4425322679778734, |
| "grad_norm": 0.9863914847373962, |
| "learning_rate": 4.55702514101058e-05, |
| "loss": 0.4653, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.49170251997541486, |
| "grad_norm": 0.739658772945404, |
| "learning_rate": 4.437640457127401e-05, |
| "loss": 0.4553, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.5408727719729564, |
| "grad_norm": 0.8426279425621033, |
| "learning_rate": 4.306005386410986e-05, |
| "loss": 0.4598, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.5900430239704979, |
| "grad_norm": 0.7732704877853394, |
| "learning_rate": 4.162952168161028e-05, |
| "loss": 0.4462, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.6392132759680393, |
| "grad_norm": 0.7927232980728149, |
| "learning_rate": 4.009385230882235e-05, |
| "loss": 0.4477, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.6883835279655808, |
| "grad_norm": 0.7976663708686829, |
| "learning_rate": 3.846275474196124e-05, |
| "loss": 0.4339, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.7375537799631223, |
| "grad_norm": 0.9223753809928894, |
| "learning_rate": 3.674654130501011e-05, |
| "loss": 0.4131, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.7867240319606638, |
| "grad_norm": 0.8355945944786072, |
| "learning_rate": 3.4956062451887664e-05, |
| "loss": 0.4455, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.8358942839582053, |
| "grad_norm": 0.8845610618591309, |
| "learning_rate": 3.31026381663849e-05, |
| "loss": 0.4217, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.8850645359557467, |
| "grad_norm": 0.8245550394058228, |
| "learning_rate": 3.119798639358291e-05, |
| "loss": 0.4357, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.9342347879532883, |
| "grad_norm": 0.7879987955093384, |
| "learning_rate": 2.925414895523123e-05, |
| "loss": 0.4287, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.9834050399508297, |
| "grad_norm": 0.9960600733757019, |
| "learning_rate": 2.7283415417473657e-05, |
| "loss": 0.4071, |
| "step": 200 |
| }, |
| { |
| "epoch": 1.029502151198525, |
| "grad_norm": 1.0155885219573975, |
| "learning_rate": 2.5298245392254172e-05, |
| "loss": 0.3991, |
| "step": 210 |
| }, |
| { |
| "epoch": 1.0786724031960664, |
| "grad_norm": 0.837479293346405, |
| "learning_rate": 2.3311189763638598e-05, |
| "loss": 0.3927, |
| "step": 220 |
| }, |
| { |
| "epoch": 1.1278426551936078, |
| "grad_norm": 0.9499828815460205, |
| "learning_rate": 2.1334811337084555e-05, |
| "loss": 0.4027, |
| "step": 230 |
| }, |
| { |
| "epoch": 1.1770129071911493, |
| "grad_norm": 0.8929746150970459, |
| "learning_rate": 1.938160541334049e-05, |
| "loss": 0.3817, |
| "step": 240 |
| }, |
| { |
| "epoch": 1.226183159188691, |
| "grad_norm": 0.7907595038414001, |
| "learning_rate": 1.746392078913122e-05, |
| "loss": 0.3884, |
| "step": 250 |
| }, |
| { |
| "epoch": 1.2753534111862324, |
| "grad_norm": 0.884660005569458, |
| "learning_rate": 1.559388168408906e-05, |
| "loss": 0.3922, |
| "step": 260 |
| }, |
| { |
| "epoch": 1.3245236631837738, |
| "grad_norm": 0.9521052241325378, |
| "learning_rate": 1.3783311087533724e-05, |
| "loss": 0.3902, |
| "step": 270 |
| }, |
| { |
| "epoch": 1.3736939151813152, |
| "grad_norm": 0.9781070351600647, |
| "learning_rate": 1.2043656009727161e-05, |
| "loss": 0.3771, |
| "step": 280 |
| }, |
| { |
| "epoch": 1.4228641671788567, |
| "grad_norm": 0.8051853775978088, |
| "learning_rate": 1.0385915110189057e-05, |
| "loss": 0.3926, |
| "step": 290 |
| }, |
| { |
| "epoch": 1.4720344191763983, |
| "grad_norm": 0.8826114535331726, |
| "learning_rate": 8.820569160630129e-06, |
| "loss": 0.3903, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.5212046711739398, |
| "grad_norm": 0.8723428249359131, |
| "learning_rate": 7.357514782138896e-06, |
| "loss": 0.3742, |
| "step": 310 |
| }, |
| { |
| "epoch": 1.5703749231714812, |
| "grad_norm": 0.8239277005195618, |
| "learning_rate": 6.006001875556672e-06, |
| "loss": 0.3701, |
| "step": 320 |
| }, |
| { |
| "epoch": 1.6195451751690229, |
| "grad_norm": 0.9797578454017639, |
| "learning_rate": 4.7745751406263165e-06, |
| "loss": 0.3915, |
| "step": 330 |
| }, |
| { |
| "epoch": 1.668715427166564, |
| "grad_norm": 0.9234381914138794, |
| "learning_rate": 3.6710200536492655e-06, |
| "loss": 0.3808, |
| "step": 340 |
| }, |
| { |
| "epoch": 1.7178856791641057, |
| "grad_norm": 0.9126710891723633, |
| "learning_rate": 2.702313645197685e-06, |
| "loss": 0.389, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.7670559311616472, |
| "grad_norm": 0.820958137512207, |
| "learning_rate": 1.87458038908081e-06, |
| "loss": 0.361, |
| "step": 360 |
| }, |
| { |
| "epoch": 1.8162261831591886, |
| "grad_norm": 1.0256679058074951, |
| "learning_rate": 1.1930534814495824e-06, |
| "loss": 0.3957, |
| "step": 370 |
| }, |
| { |
| "epoch": 1.8653964351567303, |
| "grad_norm": 0.8567346334457397, |
| "learning_rate": 6.620417548456853e-07, |
| "loss": 0.3894, |
| "step": 380 |
| }, |
| { |
| "epoch": 1.9145666871542717, |
| "grad_norm": 0.9570908546447754, |
| "learning_rate": 2.8490243637487313e-07, |
| "loss": 0.3944, |
| "step": 390 |
| }, |
| { |
| "epoch": 1.9637369391518131, |
| "grad_norm": 0.8246704339981079, |
| "learning_rate": 6.401992223629694e-08, |
| "loss": 0.3871, |
| "step": 400 |
| }, |
| { |
| "epoch": 2.0, |
| "step": 408, |
| "total_flos": 8.07989399478697e+16, |
| "train_loss": 0.5172226218616262, |
| "train_runtime": 2703.1266, |
| "train_samples_per_second": 2.408, |
| "train_steps_per_second": 0.151 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 408, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 8.07989399478697e+16, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|