| { |
| "best_metric": 0.002846540417522192, |
| "best_model_checkpoint": "./xlam_3epoch_fix_tool/checkpoint-343", |
| "epoch": 2.982127659574468, |
| "eval_steps": 49, |
| "global_step": 438, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.06808510638297872, |
| "grad_norm": 0.11843034625053406, |
| "learning_rate": 9.090909090909091e-06, |
| "loss": 0.0267, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.13617021276595745, |
| "grad_norm": 0.10037467628717422, |
| "learning_rate": 1.8181818181818182e-05, |
| "loss": 0.0184, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.20425531914893616, |
| "grad_norm": 0.11075304448604584, |
| "learning_rate": 1.9981755542233175e-05, |
| "loss": 0.0166, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.2723404255319149, |
| "grad_norm": 0.056458745151758194, |
| "learning_rate": 1.99077515134553e-05, |
| "loss": 0.0109, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.33361702127659576, |
| "eval_loss": 0.011730118654668331, |
| "eval_runtime": 15.7106, |
| "eval_samples_per_second": 3.374, |
| "eval_steps_per_second": 3.374, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.3404255319148936, |
| "grad_norm": 0.09626475721597672, |
| "learning_rate": 1.977726916370847e-05, |
| "loss": 0.0087, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.4085106382978723, |
| "grad_norm": 0.06276140362024307, |
| "learning_rate": 1.959105229687389e-05, |
| "loss": 0.0084, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.4765957446808511, |
| "grad_norm": 0.08935730159282684, |
| "learning_rate": 1.9350162426854152e-05, |
| "loss": 0.0053, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.5446808510638298, |
| "grad_norm": 0.08406774699687958, |
| "learning_rate": 1.9055972726500696e-05, |
| "loss": 0.0079, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.6127659574468085, |
| "grad_norm": 0.06417492032051086, |
| "learning_rate": 1.8710160199955158e-05, |
| "loss": 0.0097, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.6672340425531915, |
| "eval_loss": 0.005933254957199097, |
| "eval_runtime": 15.6927, |
| "eval_samples_per_second": 3.377, |
| "eval_steps_per_second": 3.377, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.6808510638297872, |
| "grad_norm": 0.08632910996675491, |
| "learning_rate": 1.8314696123025456e-05, |
| "loss": 0.0039, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.7489361702127659, |
| "grad_norm": 0.05762840434908867, |
| "learning_rate": 1.7871834806090502e-05, |
| "loss": 0.0072, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.8170212765957446, |
| "grad_norm": 0.03136470168828964, |
| "learning_rate": 1.7384100743589698e-05, |
| "loss": 0.0048, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.8851063829787233, |
| "grad_norm": 0.0481775663793087, |
| "learning_rate": 1.68542742233504e-05, |
| "loss": 0.0026, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.9531914893617022, |
| "grad_norm": 0.048834796994924545, |
| "learning_rate": 1.6285375477786322e-05, |
| "loss": 0.0025, |
| "step": 140 |
| }, |
| { |
| "epoch": 1.0008510638297872, |
| "eval_loss": 0.0041367849335074425, |
| "eval_runtime": 15.6638, |
| "eval_samples_per_second": 3.384, |
| "eval_steps_per_second": 3.384, |
| "step": 147 |
| }, |
| { |
| "epoch": 1.0212765957446808, |
| "grad_norm": 0.05243828892707825, |
| "learning_rate": 1.568064746731156e-05, |
| "loss": 0.0027, |
| "step": 150 |
| }, |
| { |
| "epoch": 1.0893617021276596, |
| "grad_norm": 0.03650873154401779, |
| "learning_rate": 1.5043537394112008e-05, |
| "loss": 0.0017, |
| "step": 160 |
| }, |
| { |
| "epoch": 1.1574468085106382, |
| "grad_norm": 0.051568031311035156, |
| "learning_rate": 1.4377677051653404e-05, |
| "loss": 0.0027, |
| "step": 170 |
| }, |
| { |
| "epoch": 1.225531914893617, |
| "grad_norm": 0.03690316155552864, |
| "learning_rate": 1.368686212194199e-05, |
| "loss": 0.0013, |
| "step": 180 |
| }, |
| { |
| "epoch": 1.2936170212765958, |
| "grad_norm": 0.09455057233572006, |
| "learning_rate": 1.297503053855203e-05, |
| "loss": 0.0028, |
| "step": 190 |
| }, |
| { |
| "epoch": 1.334468085106383, |
| "eval_loss": 0.004107403103262186, |
| "eval_runtime": 15.6495, |
| "eval_samples_per_second": 3.387, |
| "eval_steps_per_second": 3.387, |
| "step": 196 |
| }, |
| { |
| "epoch": 1.3617021276595744, |
| "grad_norm": 0.09587451815605164, |
| "learning_rate": 1.2246240038760042e-05, |
| "loss": 0.0016, |
| "step": 200 |
| }, |
| { |
| "epoch": 1.4297872340425533, |
| "grad_norm": 0.03558555245399475, |
| "learning_rate": 1.1504645032747832e-05, |
| "loss": 0.0011, |
| "step": 210 |
| }, |
| { |
| "epoch": 1.4978723404255319, |
| "grad_norm": 0.059051159769296646, |
| "learning_rate": 1.0754472921729661e-05, |
| "loss": 0.0025, |
| "step": 220 |
| }, |
| { |
| "epoch": 1.5659574468085107, |
| "grad_norm": 0.03486516326665878, |
| "learning_rate": 1e-05, |
| "loss": 0.0062, |
| "step": 230 |
| }, |
| { |
| "epoch": 1.6340425531914895, |
| "grad_norm": 0.0532718189060688, |
| "learning_rate": 9.24552707827034e-06, |
| "loss": 0.0045, |
| "step": 240 |
| }, |
| { |
| "epoch": 1.6680851063829787, |
| "eval_loss": 0.0032062295358628035, |
| "eval_runtime": 15.7019, |
| "eval_samples_per_second": 3.375, |
| "eval_steps_per_second": 3.375, |
| "step": 245 |
| }, |
| { |
| "epoch": 1.702127659574468, |
| "grad_norm": 0.08899789303541183, |
| "learning_rate": 8.49535496725217e-06, |
| "loss": 0.0039, |
| "step": 250 |
| }, |
| { |
| "epoch": 1.7702127659574467, |
| "grad_norm": 0.02918567880988121, |
| "learning_rate": 7.753759961239965e-06, |
| "loss": 0.0012, |
| "step": 260 |
| }, |
| { |
| "epoch": 1.8382978723404255, |
| "grad_norm": 0.023614011704921722, |
| "learning_rate": 7.024969461447973e-06, |
| "loss": 0.0012, |
| "step": 270 |
| }, |
| { |
| "epoch": 1.9063829787234043, |
| "grad_norm": 0.05735747143626213, |
| "learning_rate": 6.3131378780580134e-06, |
| "loss": 0.0021, |
| "step": 280 |
| }, |
| { |
| "epoch": 1.974468085106383, |
| "grad_norm": 0.08490067720413208, |
| "learning_rate": 5.622322948346595e-06, |
| "loss": 0.0064, |
| "step": 290 |
| }, |
| { |
| "epoch": 2.0017021276595743, |
| "eval_loss": 0.0028720616828650236, |
| "eval_runtime": 15.7327, |
| "eval_samples_per_second": 3.369, |
| "eval_steps_per_second": 3.369, |
| "step": 294 |
| }, |
| { |
| "epoch": 2.0425531914893615, |
| "grad_norm": 0.031081199645996094, |
| "learning_rate": 4.956462605887994e-06, |
| "loss": 0.0014, |
| "step": 300 |
| }, |
| { |
| "epoch": 2.1106382978723404, |
| "grad_norm": 0.02734055370092392, |
| "learning_rate": 4.319352532688444e-06, |
| "loss": 0.0006, |
| "step": 310 |
| }, |
| { |
| "epoch": 2.178723404255319, |
| "grad_norm": 0.0303335003554821, |
| "learning_rate": 3.714624522213681e-06, |
| "loss": 0.0012, |
| "step": 320 |
| }, |
| { |
| "epoch": 2.246808510638298, |
| "grad_norm": 0.04239821061491966, |
| "learning_rate": 3.145725776649602e-06, |
| "loss": 0.0014, |
| "step": 330 |
| }, |
| { |
| "epoch": 2.3148936170212764, |
| "grad_norm": 0.0520123615860939, |
| "learning_rate": 2.615899256410306e-06, |
| "loss": 0.0019, |
| "step": 340 |
| }, |
| { |
| "epoch": 2.33531914893617, |
| "eval_loss": 0.002846540417522192, |
| "eval_runtime": 15.6173, |
| "eval_samples_per_second": 3.394, |
| "eval_steps_per_second": 3.394, |
| "step": 343 |
| }, |
| { |
| "epoch": 2.382978723404255, |
| "grad_norm": 0.10489223152399063, |
| "learning_rate": 2.1281651939094996e-06, |
| "loss": 0.005, |
| "step": 350 |
| }, |
| { |
| "epoch": 2.451063829787234, |
| "grad_norm": 0.06684073060750961, |
| "learning_rate": 1.6853038769745466e-06, |
| "loss": 0.0013, |
| "step": 360 |
| }, |
| { |
| "epoch": 2.519148936170213, |
| "grad_norm": 0.04539426416158676, |
| "learning_rate": 1.2898398000448441e-06, |
| "loss": 0.0013, |
| "step": 370 |
| }, |
| { |
| "epoch": 2.5872340425531917, |
| "grad_norm": 0.014835229143500328, |
| "learning_rate": 9.440272734993072e-07, |
| "loss": 0.0009, |
| "step": 380 |
| }, |
| { |
| "epoch": 2.65531914893617, |
| "grad_norm": 0.04478687047958374, |
| "learning_rate": 6.498375731458529e-07, |
| "loss": 0.0007, |
| "step": 390 |
| }, |
| { |
| "epoch": 2.668936170212766, |
| "eval_loss": 0.002925063017755747, |
| "eval_runtime": 15.5681, |
| "eval_samples_per_second": 3.404, |
| "eval_steps_per_second": 3.404, |
| "step": 392 |
| }, |
| { |
| "epoch": 2.723404255319149, |
| "grad_norm": 0.01802811771631241, |
| "learning_rate": 4.089477031261113e-07, |
| "loss": 0.0054, |
| "step": 400 |
| }, |
| { |
| "epoch": 2.7914893617021277, |
| "grad_norm": 0.037977054715156555, |
| "learning_rate": 2.2273083629153148e-07, |
| "loss": 0.0007, |
| "step": 410 |
| }, |
| { |
| "epoch": 2.8595744680851065, |
| "grad_norm": 0.02204310894012451, |
| "learning_rate": 9.224848654469932e-08, |
| "loss": 0.0035, |
| "step": 420 |
| }, |
| { |
| "epoch": 2.927659574468085, |
| "grad_norm": 0.02121455781161785, |
| "learning_rate": 1.824445776682504e-08, |
| "loss": 0.0034, |
| "step": 430 |
| }, |
| { |
| "epoch": 2.982127659574468, |
| "step": 438, |
| "total_flos": 2.997240173615186e+18, |
| "train_loss": 0.004681217891363997, |
| "train_runtime": 15083.6638, |
| "train_samples_per_second": 0.935, |
| "train_steps_per_second": 0.029 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 438, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 49, |
| "stateful_callbacks": { |
| "EarlyStoppingCallback": { |
| "args": { |
| "early_stopping_patience": 2, |
| "early_stopping_threshold": 0.0 |
| }, |
| "attributes": { |
| "early_stopping_patience_counter": 1 |
| } |
| }, |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.997240173615186e+18, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|