| { |
| "best_global_step": 40, |
| "best_metric": 2.4598617553710938, |
| "best_model_checkpoint": "branham_fast_model/checkpoint-40", |
| "epoch": 0.007578987257827673, |
| "eval_steps": 20, |
| "global_step": 40, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0001894746814456918, |
| "grad_norm": 0.346444696187973, |
| "learning_rate": 0.0, |
| "loss": 2.7226, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0003789493628913836, |
| "grad_norm": 0.3149067461490631, |
| "learning_rate": 2e-05, |
| "loss": 2.8379, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.0005684240443370755, |
| "grad_norm": 0.34180235862731934, |
| "learning_rate": 4e-05, |
| "loss": 2.7675, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.0007578987257827672, |
| "grad_norm": 0.30274030566215515, |
| "learning_rate": 6e-05, |
| "loss": 2.5742, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.0009473734072284591, |
| "grad_norm": 0.2904745638370514, |
| "learning_rate": 8e-05, |
| "loss": 2.7251, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.001136848088674151, |
| "grad_norm": 0.416220486164093, |
| "learning_rate": 0.0001, |
| "loss": 2.7004, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.0013263227701198427, |
| "grad_norm": 0.3409057557582855, |
| "learning_rate": 0.00012, |
| "loss": 2.8056, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.0015157974515655345, |
| "grad_norm": 0.36513611674308777, |
| "learning_rate": 0.00014, |
| "loss": 2.5909, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.0017052721330112265, |
| "grad_norm": 0.41099268198013306, |
| "learning_rate": 0.00016, |
| "loss": 2.422, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.0018947468144569182, |
| "grad_norm": 0.44745072722435, |
| "learning_rate": 0.00018, |
| "loss": 2.5508, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.00208422149590261, |
| "grad_norm": 0.46904876828193665, |
| "learning_rate": 0.0002, |
| "loss": 2.6118, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.002273696177348302, |
| "grad_norm": 0.5026034712791443, |
| "learning_rate": 0.00019777777777777778, |
| "loss": 2.7444, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.0024631708587939936, |
| "grad_norm": 0.44979187846183777, |
| "learning_rate": 0.00019555555555555556, |
| "loss": 2.4904, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.0026526455402396854, |
| "grad_norm": 0.44741418957710266, |
| "learning_rate": 0.00019333333333333333, |
| "loss": 2.4011, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.002842120221685377, |
| "grad_norm": 0.42839404940605164, |
| "learning_rate": 0.00019111111111111114, |
| "loss": 2.3113, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.003031594903131069, |
| "grad_norm": 0.4257930517196655, |
| "learning_rate": 0.00018888888888888888, |
| "loss": 2.536, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.003221069584576761, |
| "grad_norm": 0.4507642090320587, |
| "learning_rate": 0.0001866666666666667, |
| "loss": 2.6034, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.003410544266022453, |
| "grad_norm": 0.37896326184272766, |
| "learning_rate": 0.00018444444444444446, |
| "loss": 2.5196, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.0036000189474681447, |
| "grad_norm": 0.39688196778297424, |
| "learning_rate": 0.00018222222222222224, |
| "loss": 2.5604, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.0037894936289138365, |
| "grad_norm": 0.46683597564697266, |
| "learning_rate": 0.00018, |
| "loss": 2.5619, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.0037894936289138365, |
| "eval_loss": 2.506500244140625, |
| "eval_runtime": 30.9292, |
| "eval_samples_per_second": 1.94, |
| "eval_steps_per_second": 0.485, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.003978968310359528, |
| "grad_norm": 0.40029749274253845, |
| "learning_rate": 0.00017777777777777779, |
| "loss": 2.5232, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.00416844299180522, |
| "grad_norm": 0.4430139362812042, |
| "learning_rate": 0.00017555555555555556, |
| "loss": 2.486, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.004357917673250912, |
| "grad_norm": 0.3959173262119293, |
| "learning_rate": 0.00017333333333333334, |
| "loss": 2.4454, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.004547392354696604, |
| "grad_norm": 0.38484787940979004, |
| "learning_rate": 0.0001711111111111111, |
| "loss": 2.4271, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.004736867036142295, |
| "grad_norm": 0.43511584401130676, |
| "learning_rate": 0.00016888888888888889, |
| "loss": 2.5368, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.004926341717587987, |
| "grad_norm": 0.45704638957977295, |
| "learning_rate": 0.0001666666666666667, |
| "loss": 2.5921, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.005115816399033679, |
| "grad_norm": 0.399456262588501, |
| "learning_rate": 0.00016444444444444444, |
| "loss": 2.5925, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.005305291080479371, |
| "grad_norm": 0.39530467987060547, |
| "learning_rate": 0.00016222222222222224, |
| "loss": 2.4086, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.0054947657619250625, |
| "grad_norm": 0.4018021821975708, |
| "learning_rate": 0.00016, |
| "loss": 2.449, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.005684240443370754, |
| "grad_norm": 0.39381134510040283, |
| "learning_rate": 0.0001577777777777778, |
| "loss": 2.5262, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.005873715124816446, |
| "grad_norm": 0.3801777958869934, |
| "learning_rate": 0.00015555555555555556, |
| "loss": 2.442, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.006063189806262138, |
| "grad_norm": 0.4166563153266907, |
| "learning_rate": 0.00015333333333333334, |
| "loss": 2.5827, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.00625266448770783, |
| "grad_norm": 0.3769904375076294, |
| "learning_rate": 0.0001511111111111111, |
| "loss": 2.4996, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.006442139169153522, |
| "grad_norm": 0.39586856961250305, |
| "learning_rate": 0.0001488888888888889, |
| "loss": 2.4672, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.006631613850599214, |
| "grad_norm": 0.46447986364364624, |
| "learning_rate": 0.00014666666666666666, |
| "loss": 2.282, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.006821088532044906, |
| "grad_norm": 0.4014877676963806, |
| "learning_rate": 0.00014444444444444444, |
| "loss": 2.3437, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.007010563213490598, |
| "grad_norm": 0.4009767174720764, |
| "learning_rate": 0.00014222222222222224, |
| "loss": 2.5165, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.007200037894936289, |
| "grad_norm": 0.43349936604499817, |
| "learning_rate": 0.00014, |
| "loss": 2.4813, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.007389512576381981, |
| "grad_norm": 0.4228633642196655, |
| "learning_rate": 0.0001377777777777778, |
| "loss": 2.4006, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.007578987257827673, |
| "grad_norm": 0.42890605330467224, |
| "learning_rate": 0.00013555555555555556, |
| "loss": 2.4066, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.007578987257827673, |
| "eval_loss": 2.4598617553710938, |
| "eval_runtime": 30.9692, |
| "eval_samples_per_second": 1.937, |
| "eval_steps_per_second": 0.484, |
| "step": 40 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 100, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 20, |
| "stateful_callbacks": { |
| "EarlyStoppingCallback": { |
| "args": { |
| "early_stopping_patience": 3, |
| "early_stopping_threshold": 0.0 |
| }, |
| "attributes": { |
| "early_stopping_patience_counter": 0 |
| } |
| }, |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 7355660271648768.0, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|