| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.16644474034620507, |
| "eval_steps": 500, |
| "global_step": 500, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.003328894806924101, |
| "grad_norm": 37.75, |
| "learning_rate": 1.9933422103861518e-05, |
| "loss": 1.4084, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.006657789613848202, |
| "grad_norm": 12.125, |
| "learning_rate": 1.9866844207723038e-05, |
| "loss": 1.0474, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.009986684420772303, |
| "grad_norm": 45.25, |
| "learning_rate": 1.9800266311584554e-05, |
| "loss": 1.2455, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.013315579227696404, |
| "grad_norm": 13.9375, |
| "learning_rate": 1.9733688415446073e-05, |
| "loss": 1.1969, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.016644474034620507, |
| "grad_norm": 32.0, |
| "learning_rate": 1.966711051930759e-05, |
| "loss": 1.1659, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.019973368841544607, |
| "grad_norm": 19.0, |
| "learning_rate": 1.960053262316911e-05, |
| "loss": 1.1159, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.02330226364846871, |
| "grad_norm": 27.5, |
| "learning_rate": 1.953395472703063e-05, |
| "loss": 1.1861, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.02663115845539281, |
| "grad_norm": 17.5, |
| "learning_rate": 1.9467376830892145e-05, |
| "loss": 1.1073, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.02996005326231691, |
| "grad_norm": 33.75, |
| "learning_rate": 1.9400798934753665e-05, |
| "loss": 1.1506, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.033288948069241014, |
| "grad_norm": 9.5, |
| "learning_rate": 1.933422103861518e-05, |
| "loss": 1.246, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.03661784287616511, |
| "grad_norm": 11.3125, |
| "learning_rate": 1.92676431424767e-05, |
| "loss": 1.1708, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.03994673768308921, |
| "grad_norm": 14.375, |
| "learning_rate": 1.9201065246338217e-05, |
| "loss": 1.1302, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.043275632490013316, |
| "grad_norm": 20.625, |
| "learning_rate": 1.9134487350199737e-05, |
| "loss": 1.0537, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.04660452729693742, |
| "grad_norm": 21.75, |
| "learning_rate": 1.9067909454061253e-05, |
| "loss": 1.0669, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.049933422103861515, |
| "grad_norm": 14.125, |
| "learning_rate": 1.900133155792277e-05, |
| "loss": 1.0482, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.05326231691078562, |
| "grad_norm": 27.625, |
| "learning_rate": 1.893475366178429e-05, |
| "loss": 1.1468, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.05659121171770972, |
| "grad_norm": 4.53125, |
| "learning_rate": 1.8868175765645805e-05, |
| "loss": 1.092, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.05992010652463382, |
| "grad_norm": 32.5, |
| "learning_rate": 1.8801597869507325e-05, |
| "loss": 1.2208, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.06324900133155792, |
| "grad_norm": 56.0, |
| "learning_rate": 1.873501997336884e-05, |
| "loss": 1.2359, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.06657789613848203, |
| "grad_norm": 8.375, |
| "learning_rate": 1.866844207723036e-05, |
| "loss": 1.0712, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.06990679094540612, |
| "grad_norm": 26.375, |
| "learning_rate": 1.860186418109188e-05, |
| "loss": 1.0156, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.07323568575233022, |
| "grad_norm": 12.0, |
| "learning_rate": 1.8535286284953397e-05, |
| "loss": 1.0407, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.07656458055925433, |
| "grad_norm": 13.75, |
| "learning_rate": 1.8468708388814916e-05, |
| "loss": 1.0161, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.07989347536617843, |
| "grad_norm": 27.125, |
| "learning_rate": 1.8402130492676432e-05, |
| "loss": 1.1466, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.08322237017310254, |
| "grad_norm": 26.125, |
| "learning_rate": 1.8335552596537952e-05, |
| "loss": 1.3192, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.08655126498002663, |
| "grad_norm": 25.75, |
| "learning_rate": 1.826897470039947e-05, |
| "loss": 1.0628, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.08988015978695073, |
| "grad_norm": 29.25, |
| "learning_rate": 1.8202396804260988e-05, |
| "loss": 0.967, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.09320905459387484, |
| "grad_norm": 29.75, |
| "learning_rate": 1.8135818908122504e-05, |
| "loss": 1.0356, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.09653794940079893, |
| "grad_norm": 11.6875, |
| "learning_rate": 1.806924101198402e-05, |
| "loss": 0.9853, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.09986684420772303, |
| "grad_norm": 27.5, |
| "learning_rate": 1.800266311584554e-05, |
| "loss": 1.0279, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.10319573901464714, |
| "grad_norm": 5.65625, |
| "learning_rate": 1.7936085219707056e-05, |
| "loss": 1.0115, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.10652463382157124, |
| "grad_norm": 8.1875, |
| "learning_rate": 1.7869507323568576e-05, |
| "loss": 0.942, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.10985352862849534, |
| "grad_norm": 65.0, |
| "learning_rate": 1.7802929427430096e-05, |
| "loss": 1.0208, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.11318242343541944, |
| "grad_norm": 9.625, |
| "learning_rate": 1.7736351531291612e-05, |
| "loss": 1.1729, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.11651131824234354, |
| "grad_norm": 17.5, |
| "learning_rate": 1.766977363515313e-05, |
| "loss": 0.9322, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.11984021304926765, |
| "grad_norm": 28.125, |
| "learning_rate": 1.7603195739014648e-05, |
| "loss": 1.0417, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.12316910785619174, |
| "grad_norm": 18.25, |
| "learning_rate": 1.7536617842876168e-05, |
| "loss": 1.2491, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.12649800266311584, |
| "grad_norm": 27.375, |
| "learning_rate": 1.7470039946737684e-05, |
| "loss": 0.9388, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.12982689747003995, |
| "grad_norm": 22.125, |
| "learning_rate": 1.7403462050599203e-05, |
| "loss": 1.0488, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.13315579227696406, |
| "grad_norm": 21.25, |
| "learning_rate": 1.733688415446072e-05, |
| "loss": 0.9951, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.13648468708388814, |
| "grad_norm": 11.75, |
| "learning_rate": 1.727030625832224e-05, |
| "loss": 0.9212, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.13981358189081225, |
| "grad_norm": 16.25, |
| "learning_rate": 1.7203728362183756e-05, |
| "loss": 1.0548, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.14314247669773636, |
| "grad_norm": 19.875, |
| "learning_rate": 1.7137150466045275e-05, |
| "loss": 1.0238, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.14647137150466044, |
| "grad_norm": 18.875, |
| "learning_rate": 1.707057256990679e-05, |
| "loss": 1.0327, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.14980026631158455, |
| "grad_norm": 7.84375, |
| "learning_rate": 1.7003994673768308e-05, |
| "loss": 1.1155, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.15312916111850866, |
| "grad_norm": 14.125, |
| "learning_rate": 1.693741677762983e-05, |
| "loss": 0.9627, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.15645805592543274, |
| "grad_norm": 7.4375, |
| "learning_rate": 1.6870838881491347e-05, |
| "loss": 1.0216, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.15978695073235685, |
| "grad_norm": 9.375, |
| "learning_rate": 1.6804260985352863e-05, |
| "loss": 1.0489, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.16311584553928096, |
| "grad_norm": 19.5, |
| "learning_rate": 1.6737683089214383e-05, |
| "loss": 1.1254, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.16644474034620507, |
| "grad_norm": 26.125, |
| "learning_rate": 1.66711051930759e-05, |
| "loss": 1.0134, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.16644474034620507, |
| "eval_accuracy": 0.5015762402521985, |
| "eval_loss": 0.9942083358764648, |
| "eval_runtime": 211.8123, |
| "eval_samples_per_second": 113.818, |
| "eval_steps_per_second": 28.454, |
| "step": 500 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 3004, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|