| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 10.0, |
| "eval_steps": 500.0, |
| "global_step": 7660, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.6529546196539341, |
| "grad_norm": 0.2742629051208496, |
| "learning_rate": 0.0001996, |
| "loss": 8.915619140625, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.3055827619980411, |
| "grad_norm": 0.3224039077758789, |
| "learning_rate": 0.0001860614525139665, |
| "loss": 7.01044482421875, |
| "step": 1000 |
| }, |
| { |
| "epoch": 1.958537381651975, |
| "grad_norm": 0.33685654401779175, |
| "learning_rate": 0.0001720949720670391, |
| "loss": 6.59273095703125, |
| "step": 1500 |
| }, |
| { |
| "epoch": 2.6111655239960823, |
| "grad_norm": 0.3674958050251007, |
| "learning_rate": 0.00015812849162011173, |
| "loss": 6.29274072265625, |
| "step": 2000 |
| }, |
| { |
| "epoch": 3.2637936663401894, |
| "grad_norm": 0.381099671125412, |
| "learning_rate": 0.00014416201117318437, |
| "loss": 6.14118603515625, |
| "step": 2500 |
| }, |
| { |
| "epoch": 3.9167482859941236, |
| "grad_norm": 0.37459638714790344, |
| "learning_rate": 0.000130195530726257, |
| "loss": 6.01636474609375, |
| "step": 3000 |
| }, |
| { |
| "epoch": 4.569376428338231, |
| "grad_norm": 0.37941330671310425, |
| "learning_rate": 0.00011622905027932962, |
| "loss": 5.841765625, |
| "step": 3500 |
| }, |
| { |
| "epoch": 5.222004570682338, |
| "grad_norm": 0.37199151515960693, |
| "learning_rate": 0.00010226256983240223, |
| "loss": 5.79295166015625, |
| "step": 4000 |
| }, |
| { |
| "epoch": 5.874959190336272, |
| "grad_norm": 0.3803079128265381, |
| "learning_rate": 8.829608938547486e-05, |
| "loss": 5.71527734375, |
| "step": 4500 |
| }, |
| { |
| "epoch": 6.527587332680379, |
| "grad_norm": 0.37484532594680786, |
| "learning_rate": 7.43296089385475e-05, |
| "loss": 5.6381220703125, |
| "step": 5000 |
| }, |
| { |
| "epoch": 7.180215475024486, |
| "grad_norm": 0.395292192697525, |
| "learning_rate": 6.036312849162011e-05, |
| "loss": 5.58549072265625, |
| "step": 5500 |
| }, |
| { |
| "epoch": 7.83317009467842, |
| "grad_norm": 0.4512353241443634, |
| "learning_rate": 4.6396648044692745e-05, |
| "loss": 5.5589013671875, |
| "step": 6000 |
| }, |
| { |
| "epoch": 8.485798237022527, |
| "grad_norm": 0.4076979458332062, |
| "learning_rate": 3.2430167597765364e-05, |
| "loss": 5.5175244140625, |
| "step": 6500 |
| }, |
| { |
| "epoch": 9.138426379366633, |
| "grad_norm": 0.4094633162021637, |
| "learning_rate": 1.8463687150837993e-05, |
| "loss": 5.470318359375, |
| "step": 7000 |
| }, |
| { |
| "epoch": 9.791380999020568, |
| "grad_norm": 0.4419861435890198, |
| "learning_rate": 4.497206703910615e-06, |
| "loss": 5.4800283203125, |
| "step": 7500 |
| }, |
| { |
| "epoch": 10.0, |
| "step": 7660, |
| "total_flos": 1.7418239852347392e+17, |
| "train_loss": 6.09080462343699, |
| "train_runtime": 4841.9523, |
| "train_samples_per_second": 202.402, |
| "train_steps_per_second": 1.582 |
| } |
| ], |
| "logging_steps": 500, |
| "max_steps": 7660, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 10, |
| "save_steps": 2000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.7418239852347392e+17, |
| "train_batch_size": 32, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|