{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.01223540927444023, "eval_steps": 1000, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0006117704637220115, "grad_norm": 1.647070050239563, "learning_rate": 3.671221700999388e-08, "loss": 0.1895, "step": 10 }, { "epoch": 0.001223540927444023, "grad_norm": 1.2862460613250732, "learning_rate": 7.750356924332042e-08, "loss": 0.1824, "step": 20 }, { "epoch": 0.0018353113911660345, "grad_norm": 1.6323548555374146, "learning_rate": 1.1829492147664697e-07, "loss": 0.1927, "step": 30 }, { "epoch": 0.002447081854888046, "grad_norm": 1.4893640279769897, "learning_rate": 1.590862737099735e-07, "loss": 0.1911, "step": 40 }, { "epoch": 0.0030588523186100575, "grad_norm": 1.0960227251052856, "learning_rate": 1.9987762594330002e-07, "loss": 0.1801, "step": 50 }, { "epoch": 0.003670622782332069, "grad_norm": 0.6325238943099976, "learning_rate": 2.406689781766266e-07, "loss": 0.1818, "step": 60 }, { "epoch": 0.0042823932460540805, "grad_norm": 2.047482490539551, "learning_rate": 2.814603304099531e-07, "loss": 0.192, "step": 70 }, { "epoch": 0.004894163709776092, "grad_norm": 2.4107882976531982, "learning_rate": 3.222516826432796e-07, "loss": 0.1897, "step": 80 }, { "epoch": 0.0055059341734981036, "grad_norm": 0.7147008180618286, "learning_rate": 3.630430348766062e-07, "loss": 0.1833, "step": 90 }, { "epoch": 0.006117704637220115, "grad_norm": 0.7912544012069702, "learning_rate": 4.0383438710993277e-07, "loss": 0.1869, "step": 100 }, { "epoch": 0.006729475100942127, "grad_norm": 1.3714642524719238, "learning_rate": 4.4462573934325923e-07, "loss": 0.1861, "step": 110 }, { "epoch": 0.007341245564664138, "grad_norm": 1.2271082401275635, "learning_rate": 4.854170915765858e-07, "loss": 0.1843, "step": 120 }, { "epoch": 0.00795301602838615, "grad_norm": 0.6724980473518372, "learning_rate": 5.262084438099123e-07, "loss": 0.1819, "step": 130 }, { "epoch": 0.008564786492108161, "grad_norm": 1.4163967370986938, "learning_rate": 5.669997960432388e-07, "loss": 0.182, "step": 140 }, { "epoch": 0.009176556955830173, "grad_norm": 1.1623996496200562, "learning_rate": 6.077911482765654e-07, "loss": 0.1834, "step": 150 }, { "epoch": 0.009788327419552184, "grad_norm": 1.2043864727020264, "learning_rate": 6.48582500509892e-07, "loss": 0.1806, "step": 160 }, { "epoch": 0.010400097883274196, "grad_norm": 0.9487743973731995, "learning_rate": 6.893738527432185e-07, "loss": 0.1806, "step": 170 }, { "epoch": 0.011011868346996207, "grad_norm": 1.135107159614563, "learning_rate": 7.301652049765451e-07, "loss": 0.178, "step": 180 }, { "epoch": 0.011623638810718219, "grad_norm": 1.8040673732757568, "learning_rate": 7.709565572098715e-07, "loss": 0.1776, "step": 190 }, { "epoch": 0.01223540927444023, "grad_norm": 0.6054055690765381, "learning_rate": 8.117479094431981e-07, "loss": 0.1781, "step": 200 } ], "logging_steps": 10, "max_steps": 49038, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 200, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1683910754304000.0, "train_batch_size": 32, "trial_name": null, "trial_params": null }