| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.0128, |
| "eval_steps": 1000, |
| "global_step": 200, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.00064, |
| "grad_norm": 1.2594444751739502, |
| "learning_rate": 3.840409643695328e-08, |
| "loss": 0.7238, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.00128, |
| "grad_norm": 1.3807746171951294, |
| "learning_rate": 8.10753147002347e-08, |
| "loss": 0.7178, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.00192, |
| "grad_norm": 0.9590708613395691, |
| "learning_rate": 1.2374653296351612e-07, |
| "loss": 0.7106, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.00256, |
| "grad_norm": 1.2128362655639648, |
| "learning_rate": 1.6641775122679754e-07, |
| "loss": 0.72, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.0032, |
| "grad_norm": 1.0866276025772095, |
| "learning_rate": 2.0908896949007894e-07, |
| "loss": 0.7108, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.00384, |
| "grad_norm": 1.6984202861785889, |
| "learning_rate": 2.517601877533604e-07, |
| "loss": 0.7149, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.00448, |
| "grad_norm": 1.249053716659546, |
| "learning_rate": 2.944314060166418e-07, |
| "loss": 0.7243, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.00512, |
| "grad_norm": 1.8781795501708984, |
| "learning_rate": 3.371026242799232e-07, |
| "loss": 0.7138, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.00576, |
| "grad_norm": 2.162505626678467, |
| "learning_rate": 3.7977384254320464e-07, |
| "loss": 0.7117, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.0064, |
| "grad_norm": 1.0761003494262695, |
| "learning_rate": 4.22445060806486e-07, |
| "loss": 0.7121, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.00704, |
| "grad_norm": 2.104625940322876, |
| "learning_rate": 4.651162790697675e-07, |
| "loss": 0.7157, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.00768, |
| "grad_norm": 1.6532983779907227, |
| "learning_rate": 5.077874973330489e-07, |
| "loss": 0.7175, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.00832, |
| "grad_norm": 1.094260334968567, |
| "learning_rate": 5.504587155963304e-07, |
| "loss": 0.7142, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.00896, |
| "grad_norm": 1.7268928289413452, |
| "learning_rate": 5.931299338596117e-07, |
| "loss": 0.717, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.0096, |
| "grad_norm": 2.225884199142456, |
| "learning_rate": 6.358011521228932e-07, |
| "loss": 0.7144, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.01024, |
| "grad_norm": 1.7743901014328003, |
| "learning_rate": 6.784723703861745e-07, |
| "loss": 0.7127, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.01088, |
| "grad_norm": 1.1327497959136963, |
| "learning_rate": 7.21143588649456e-07, |
| "loss": 0.7148, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.01152, |
| "grad_norm": 1.9613993167877197, |
| "learning_rate": 7.638148069127374e-07, |
| "loss": 0.711, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.01216, |
| "grad_norm": 1.4991788864135742, |
| "learning_rate": 8.064860251760189e-07, |
| "loss": 0.713, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.0128, |
| "grad_norm": 1.3405441045761108, |
| "learning_rate": 8.491572434393003e-07, |
| "loss": 0.7142, |
| "step": 200 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 46875, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 200, |
| "stateful_callbacks": { |
| "EarlyStoppingCallback": { |
| "args": { |
| "early_stopping_patience": 3, |
| "early_stopping_threshold": 0.0 |
| }, |
| "attributes": { |
| "early_stopping_patience_counter": 0 |
| } |
| }, |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1683335198870400.0, |
| "train_batch_size": 32, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|