| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.3404255319148937, |
| "eval_steps": 500, |
| "global_step": 40, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0851063829787234, |
| "grad_norm": 4.807689189910889, |
| "learning_rate": 0.0, |
| "loss": 0.6635, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.1702127659574468, |
| "grad_norm": 4.9480061531066895, |
| "learning_rate": 1e-05, |
| "loss": 0.6577, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.2553191489361702, |
| "grad_norm": 3.104273557662964, |
| "learning_rate": 9.99291347838381e-06, |
| "loss": 0.6214, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.3404255319148936, |
| "grad_norm": 3.2951602935791016, |
| "learning_rate": 9.971674001050687e-06, |
| "loss": 0.5713, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.425531914893617, |
| "grad_norm": 2.968020439147949, |
| "learning_rate": 9.936341773606723e-06, |
| "loss": 0.5727, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.5106382978723404, |
| "grad_norm": 2.5378077030181885, |
| "learning_rate": 9.887016949089334e-06, |
| "loss": 0.5519, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.5957446808510638, |
| "grad_norm": 1.7747889757156372, |
| "learning_rate": 9.823839344072582e-06, |
| "loss": 0.5147, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.6808510638297872, |
| "grad_norm": 1.0918842554092407, |
| "learning_rate": 9.746988042341907e-06, |
| "loss": 0.5046, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.7659574468085106, |
| "grad_norm": 1.385604977607727, |
| "learning_rate": 9.656680887261693e-06, |
| "loss": 0.484, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.851063829787234, |
| "grad_norm": 1.240978479385376, |
| "learning_rate": 9.553173864274567e-06, |
| "loss": 0.4879, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.9361702127659575, |
| "grad_norm": 0.9019972085952759, |
| "learning_rate": 9.436760375282858e-06, |
| "loss": 0.4562, |
| "step": 11 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.8302024602890015, |
| "learning_rate": 9.307770406969032e-06, |
| "loss": 0.4453, |
| "step": 12 |
| }, |
| { |
| "epoch": 1.0851063829787233, |
| "grad_norm": 0.8462631702423096, |
| "learning_rate": 9.166569595412576e-06, |
| "loss": 0.4313, |
| "step": 13 |
| }, |
| { |
| "epoch": 1.1702127659574468, |
| "grad_norm": 0.9012259840965271, |
| "learning_rate": 9.013558189654819e-06, |
| "loss": 0.4485, |
| "step": 14 |
| }, |
| { |
| "epoch": 1.2553191489361701, |
| "grad_norm": 0.7793322801589966, |
| "learning_rate": 8.849169917149532e-06, |
| "loss": 0.4391, |
| "step": 15 |
| }, |
| { |
| "epoch": 1.3404255319148937, |
| "grad_norm": 0.6103827357292175, |
| "learning_rate": 8.673870754315336e-06, |
| "loss": 0.4224, |
| "step": 16 |
| }, |
| { |
| "epoch": 1.425531914893617, |
| "grad_norm": 0.5824944376945496, |
| "learning_rate": 8.488157605674924e-06, |
| "loss": 0.4208, |
| "step": 17 |
| }, |
| { |
| "epoch": 1.5106382978723403, |
| "grad_norm": 0.644345223903656, |
| "learning_rate": 8.292556895325195e-06, |
| "loss": 0.425, |
| "step": 18 |
| }, |
| { |
| "epoch": 1.5957446808510638, |
| "grad_norm": 0.600309431552887, |
| "learning_rate": 8.08762307473096e-06, |
| "loss": 0.4186, |
| "step": 19 |
| }, |
| { |
| "epoch": 1.6808510638297873, |
| "grad_norm": 0.508482813835144, |
| "learning_rate": 7.873937051072037e-06, |
| "loss": 0.4029, |
| "step": 20 |
| }, |
| { |
| "epoch": 1.7659574468085106, |
| "grad_norm": 0.44848644733428955, |
| "learning_rate": 7.652104540598712e-06, |
| "loss": 0.4091, |
| "step": 21 |
| }, |
| { |
| "epoch": 1.851063829787234, |
| "grad_norm": 0.47672998905181885, |
| "learning_rate": 7.422754351663252e-06, |
| "loss": 0.4125, |
| "step": 22 |
| }, |
| { |
| "epoch": 1.9361702127659575, |
| "grad_norm": 0.47977009415626526, |
| "learning_rate": 7.186536602294278e-06, |
| "loss": 0.4083, |
| "step": 23 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.4431890845298767, |
| "learning_rate": 6.944120877366605e-06, |
| "loss": 0.4013, |
| "step": 24 |
| }, |
| { |
| "epoch": 2.0851063829787235, |
| "grad_norm": 0.4314478039741516, |
| "learning_rate": 6.6961943305901515e-06, |
| "loss": 0.3872, |
| "step": 25 |
| }, |
| { |
| "epoch": 2.1702127659574466, |
| "grad_norm": 0.4408315420150757, |
| "learning_rate": 6.443459736698106e-06, |
| "loss": 0.3867, |
| "step": 26 |
| }, |
| { |
| "epoch": 2.25531914893617, |
| "grad_norm": 0.3981506824493408, |
| "learning_rate": 6.186633499355576e-06, |
| "loss": 0.391, |
| "step": 27 |
| }, |
| { |
| "epoch": 2.3404255319148937, |
| "grad_norm": 0.39310428500175476, |
| "learning_rate": 5.926443620435572e-06, |
| "loss": 0.3789, |
| "step": 28 |
| }, |
| { |
| "epoch": 2.425531914893617, |
| "grad_norm": 0.38572362065315247, |
| "learning_rate": 5.663627636418611e-06, |
| "loss": 0.3801, |
| "step": 29 |
| }, |
| { |
| "epoch": 2.5106382978723403, |
| "grad_norm": 0.399061381816864, |
| "learning_rate": 5.398930527765416e-06, |
| "loss": 0.3931, |
| "step": 30 |
| }, |
| { |
| "epoch": 2.595744680851064, |
| "grad_norm": 0.35792312026023865, |
| "learning_rate": 5.133102607188875e-06, |
| "loss": 0.3816, |
| "step": 31 |
| }, |
| { |
| "epoch": 2.6808510638297873, |
| "grad_norm": 0.34908685088157654, |
| "learning_rate": 4.866897392811127e-06, |
| "loss": 0.3641, |
| "step": 32 |
| }, |
| { |
| "epoch": 2.7659574468085104, |
| "grad_norm": 0.35231131315231323, |
| "learning_rate": 4.601069472234584e-06, |
| "loss": 0.3583, |
| "step": 33 |
| }, |
| { |
| "epoch": 2.851063829787234, |
| "grad_norm": 0.35334518551826477, |
| "learning_rate": 4.336372363581391e-06, |
| "loss": 0.3821, |
| "step": 34 |
| }, |
| { |
| "epoch": 2.9361702127659575, |
| "grad_norm": 0.31531235575675964, |
| "learning_rate": 4.073556379564429e-06, |
| "loss": 0.3634, |
| "step": 35 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 0.3611237108707428, |
| "learning_rate": 3.813366500644426e-06, |
| "loss": 0.361, |
| "step": 36 |
| }, |
| { |
| "epoch": 3.0851063829787235, |
| "grad_norm": 0.3339614272117615, |
| "learning_rate": 3.5565402633018963e-06, |
| "loss": 0.3686, |
| "step": 37 |
| }, |
| { |
| "epoch": 3.1702127659574466, |
| "grad_norm": 0.3014519214630127, |
| "learning_rate": 3.3038056694098485e-06, |
| "loss": 0.3567, |
| "step": 38 |
| }, |
| { |
| "epoch": 3.25531914893617, |
| "grad_norm": 0.2862333655357361, |
| "learning_rate": 3.0558791226333974e-06, |
| "loss": 0.3525, |
| "step": 39 |
| }, |
| { |
| "epoch": 3.3404255319148937, |
| "grad_norm": 0.3033890724182129, |
| "learning_rate": 2.8134633977057236e-06, |
| "loss": 0.3623, |
| "step": 40 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 60, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 5, |
| "save_steps": 20, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 7.911961183849021e+17, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|