| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.9973394146712277, |
| "eval_steps": 500, |
| "global_step": 328, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.03040668947168377, |
| "grad_norm": 5.6152509966106985, |
| "learning_rate": 5e-06, |
| "loss": 0.9255, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.06081337894336754, |
| "grad_norm": 1.948199272854136, |
| "learning_rate": 5e-06, |
| "loss": 0.8117, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.09122006841505131, |
| "grad_norm": 1.6637843548352236, |
| "learning_rate": 5e-06, |
| "loss": 0.7758, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.12162675788673508, |
| "grad_norm": 0.907983749953646, |
| "learning_rate": 5e-06, |
| "loss": 0.7519, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.15203344735841884, |
| "grad_norm": 1.7280229551951782, |
| "learning_rate": 5e-06, |
| "loss": 0.7352, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.18244013683010263, |
| "grad_norm": 0.8504963430088522, |
| "learning_rate": 5e-06, |
| "loss": 0.7224, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.2128468263017864, |
| "grad_norm": 1.2357854791523568, |
| "learning_rate": 5e-06, |
| "loss": 0.7105, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.24325351577347015, |
| "grad_norm": 1.2476890602998016, |
| "learning_rate": 5e-06, |
| "loss": 0.7038, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.27366020524515394, |
| "grad_norm": 0.63122069402106, |
| "learning_rate": 5e-06, |
| "loss": 0.6999, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.3040668947168377, |
| "grad_norm": 0.6553804145899658, |
| "learning_rate": 5e-06, |
| "loss": 0.6863, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.33447358418852147, |
| "grad_norm": 0.5841011076306307, |
| "learning_rate": 5e-06, |
| "loss": 0.6907, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.36488027366020526, |
| "grad_norm": 0.7996499453541979, |
| "learning_rate": 5e-06, |
| "loss": 0.6819, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.395286963131889, |
| "grad_norm": 0.5950822241048144, |
| "learning_rate": 5e-06, |
| "loss": 0.6871, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.4256936526035728, |
| "grad_norm": 0.5683219252381152, |
| "learning_rate": 5e-06, |
| "loss": 0.6825, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.45610034207525657, |
| "grad_norm": 0.7530256115772663, |
| "learning_rate": 5e-06, |
| "loss": 0.6774, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.4865070315469403, |
| "grad_norm": 0.6459654005909365, |
| "learning_rate": 5e-06, |
| "loss": 0.6847, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.5169137210186241, |
| "grad_norm": 0.5836331839357853, |
| "learning_rate": 5e-06, |
| "loss": 0.6768, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.5473204104903079, |
| "grad_norm": 0.5565233021284824, |
| "learning_rate": 5e-06, |
| "loss": 0.6849, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.5777270999619917, |
| "grad_norm": 0.6013400542500057, |
| "learning_rate": 5e-06, |
| "loss": 0.6713, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.6081337894336754, |
| "grad_norm": 0.5233954675275515, |
| "learning_rate": 5e-06, |
| "loss": 0.671, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.6385404789053591, |
| "grad_norm": 0.5779435903050023, |
| "learning_rate": 5e-06, |
| "loss": 0.6724, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.6689471683770429, |
| "grad_norm": 0.6526020744171068, |
| "learning_rate": 5e-06, |
| "loss": 0.6719, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.6993538578487267, |
| "grad_norm": 0.5895434275536652, |
| "learning_rate": 5e-06, |
| "loss": 0.6689, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.7297605473204105, |
| "grad_norm": 0.6343504733705319, |
| "learning_rate": 5e-06, |
| "loss": 0.6687, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.7601672367920943, |
| "grad_norm": 0.7332518294295413, |
| "learning_rate": 5e-06, |
| "loss": 0.6639, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.790573926263778, |
| "grad_norm": 0.5332685530635167, |
| "learning_rate": 5e-06, |
| "loss": 0.6684, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.8209806157354618, |
| "grad_norm": 0.5890443926278276, |
| "learning_rate": 5e-06, |
| "loss": 0.6619, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.8513873052071456, |
| "grad_norm": 0.5702438242189162, |
| "learning_rate": 5e-06, |
| "loss": 0.6611, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.8817939946788294, |
| "grad_norm": 0.6228763934659501, |
| "learning_rate": 5e-06, |
| "loss": 0.6596, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.9122006841505131, |
| "grad_norm": 0.6318262944444193, |
| "learning_rate": 5e-06, |
| "loss": 0.6623, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.9426073736221969, |
| "grad_norm": 0.6675343568518476, |
| "learning_rate": 5e-06, |
| "loss": 0.6565, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.9730140630938806, |
| "grad_norm": 0.5399374272267031, |
| "learning_rate": 5e-06, |
| "loss": 0.6579, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.9973394146712277, |
| "eval_loss": 0.6590226292610168, |
| "eval_runtime": 348.3776, |
| "eval_samples_per_second": 25.435, |
| "eval_steps_per_second": 0.399, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.9973394146712277, |
| "step": 328, |
| "total_flos": 549202836848640.0, |
| "train_loss": 0.696046143043332, |
| "train_runtime": 19374.8951, |
| "train_samples_per_second": 8.689, |
| "train_steps_per_second": 0.017 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 328, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 549202836848640.0, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|