| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.14245690678569733, | |
| "eval_steps": 500, | |
| "global_step": 3000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0047485635595232445, | |
| "grad_norm": 0.3049672544002533, | |
| "learning_rate": 8.333333333333334e-06, | |
| "loss": 0.623, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.009497127119046489, | |
| "grad_norm": 0.09646450728178024, | |
| "learning_rate": 1.6666666666666667e-05, | |
| "loss": 0.29, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.014245690678569733, | |
| "grad_norm": 0.16680897772312164, | |
| "learning_rate": 2.5e-05, | |
| "loss": 0.1391, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.018994254238092978, | |
| "grad_norm": 0.17046234011650085, | |
| "learning_rate": 3.3333333333333335e-05, | |
| "loss": 0.1325, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.023742817797616222, | |
| "grad_norm": 0.08646216243505478, | |
| "learning_rate": 4.166666666666667e-05, | |
| "loss": 0.1238, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.028491381357139467, | |
| "grad_norm": 0.09403830766677856, | |
| "learning_rate": 5e-05, | |
| "loss": 0.1128, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.03323994491666271, | |
| "grad_norm": 0.08856412768363953, | |
| "learning_rate": 4.9786121534345265e-05, | |
| "loss": 0.1134, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.037988508476185956, | |
| "grad_norm": 0.1673099398612976, | |
| "learning_rate": 4.914814565722671e-05, | |
| "loss": 0.1108, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.0427370720357092, | |
| "grad_norm": 0.07016578316688538, | |
| "learning_rate": 4.8096988312782174e-05, | |
| "loss": 0.1088, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.047485635595232445, | |
| "grad_norm": 0.07002697139978409, | |
| "learning_rate": 4.665063509461097e-05, | |
| "loss": 0.1096, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.05223419915475569, | |
| "grad_norm": 0.16299889981746674, | |
| "learning_rate": 4.4833833507280884e-05, | |
| "loss": 0.1071, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.056982762714278934, | |
| "grad_norm": 0.06416748464107513, | |
| "learning_rate": 4.267766952966369e-05, | |
| "loss": 0.107, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.06173132627380217, | |
| "grad_norm": 0.06463466584682465, | |
| "learning_rate": 4.021903572521802e-05, | |
| "loss": 0.1062, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.06647988983332542, | |
| "grad_norm": 0.07041608542203903, | |
| "learning_rate": 3.7500000000000003e-05, | |
| "loss": 0.1065, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.07122845339284867, | |
| "grad_norm": 0.07932303100824356, | |
| "learning_rate": 3.456708580912725e-05, | |
| "loss": 0.1044, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.07597701695237191, | |
| "grad_norm": 0.061066512018442154, | |
| "learning_rate": 3.147047612756302e-05, | |
| "loss": 0.1034, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.08072558051189516, | |
| "grad_norm": 0.06134779006242752, | |
| "learning_rate": 2.8263154805501297e-05, | |
| "loss": 0.1034, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.0854741440714184, | |
| "grad_norm": 0.06713131070137024, | |
| "learning_rate": 2.5e-05, | |
| "loss": 0.1049, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.09022270763094165, | |
| "grad_norm": 0.09950446337461472, | |
| "learning_rate": 2.173684519449872e-05, | |
| "loss": 0.105, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.09497127119046489, | |
| "grad_norm": 0.06452978402376175, | |
| "learning_rate": 1.852952387243698e-05, | |
| "loss": 0.1054, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.09971983474998813, | |
| "grad_norm": 0.11814086139202118, | |
| "learning_rate": 1.5432914190872757e-05, | |
| "loss": 0.1031, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.10446839830951138, | |
| "grad_norm": 0.08128858357667923, | |
| "learning_rate": 1.2500000000000006e-05, | |
| "loss": 0.103, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.10921696186903462, | |
| "grad_norm": 0.07054319977760315, | |
| "learning_rate": 9.780964274781984e-06, | |
| "loss": 0.103, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.11396552542855787, | |
| "grad_norm": 0.06334717571735382, | |
| "learning_rate": 7.3223304703363135e-06, | |
| "loss": 0.1034, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.11871408898808111, | |
| "grad_norm": 0.07204329967498779, | |
| "learning_rate": 5.166166492719124e-06, | |
| "loss": 0.1059, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.12346265254760434, | |
| "grad_norm": 0.05405697599053383, | |
| "learning_rate": 3.3493649053890326e-06, | |
| "loss": 0.105, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.1282112161071276, | |
| "grad_norm": 0.06032924726605415, | |
| "learning_rate": 1.9030116872178316e-06, | |
| "loss": 0.1009, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.13295977966665085, | |
| "grad_norm": 0.07012154161930084, | |
| "learning_rate": 8.51854342773295e-07, | |
| "loss": 0.1042, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.1377083432261741, | |
| "grad_norm": 0.07376914471387863, | |
| "learning_rate": 2.1387846565474045e-07, | |
| "loss": 0.103, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.14245690678569733, | |
| "grad_norm": 0.09159684926271439, | |
| "learning_rate": 0.0, | |
| "loss": 0.1014, | |
| "step": 3000 | |
| } | |
| ], | |
| "logging_steps": 100, | |
| "max_steps": 3000, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 200, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.658973782016e+17, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |