| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 5.0, | |
| "eval_steps": 500, | |
| "global_step": 545, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.5454545454545455e-06, | |
| "loss": 0.3364, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.00011363636363636362, | |
| "loss": 0.1163, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.0001499042835718179, | |
| "loss": 0.0592, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 0.00014941639959850612, | |
| "loss": 0.0931, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 0.00014851784029408765, | |
| "loss": 0.0872, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_dev.en.casimedicos.rag_loss": 0.060965459793806076, | |
| "eval_dev.en.casimedicos.rag_runtime": 86.964, | |
| "eval_dev.en.casimedicos.rag_samples_per_second": 0.724, | |
| "eval_dev.en.casimedicos.rag_steps_per_second": 0.092, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 0.00014721356445987312, | |
| "loss": 0.0679, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 0.00014551076988980255, | |
| "loss": 0.0418, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 0.00014341885364860423, | |
| "loss": 0.0404, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 0.0001409493602130284, | |
| "loss": 0.062, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_dev.en.casimedicos.rag_loss": 0.06188119202852249, | |
| "eval_dev.en.casimedicos.rag_runtime": 84.614, | |
| "eval_dev.en.casimedicos.rag_samples_per_second": 0.745, | |
| "eval_dev.en.casimedicos.rag_steps_per_second": 0.095, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "learning_rate": 0.00013811591776234454, | |
| "loss": 0.0473, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "learning_rate": 0.00013493416296969042, | |
| "loss": 0.0269, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "learning_rate": 0.0001314216547093198, | |
| "loss": 0.0383, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "learning_rate": 0.00012759777715596606, | |
| "loss": 0.036, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "learning_rate": 0.0001234836328110779, | |
| "loss": 0.0292, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_dev.en.casimedicos.rag_loss": 0.07440634071826935, | |
| "eval_dev.en.casimedicos.rag_runtime": 84.4854, | |
| "eval_dev.en.casimedicos.rag_samples_per_second": 0.746, | |
| "eval_dev.en.casimedicos.rag_steps_per_second": 0.095, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 3.21, | |
| "learning_rate": 0.00011910192604627463, | |
| "loss": 0.017, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 3.44, | |
| "learning_rate": 0.00011447683780669952, | |
| "loss": 0.0163, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 3.67, | |
| "learning_rate": 0.00010963389216573467, | |
| "loss": 0.0083, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 3.9, | |
| "learning_rate": 0.00010459981546751072, | |
| "loss": 0.0056, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_dev.en.casimedicos.rag_loss": 0.09140212833881378, | |
| "eval_dev.en.casimedicos.rag_runtime": 83.9809, | |
| "eval_dev.en.casimedicos.rag_samples_per_second": 0.75, | |
| "eval_dev.en.casimedicos.rag_steps_per_second": 0.095, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 4.13, | |
| "learning_rate": 9.940238883454927e-05, | |
| "loss": 0.0003, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 4.36, | |
| "learning_rate": 9.407029485449108e-05, | |
| "loss": 0.003, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 4.59, | |
| "learning_rate": 8.863295929198699e-05, | |
| "loss": 0.0056, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 4.82, | |
| "learning_rate": 8.312038869928251e-05, | |
| "loss": 0.0027, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_dev.en.casimedicos.rag_loss": 0.11252260953187943, | |
| "eval_dev.en.casimedicos.rag_runtime": 84.0376, | |
| "eval_dev.en.casimedicos.rag_samples_per_second": 0.75, | |
| "eval_dev.en.casimedicos.rag_steps_per_second": 0.095, | |
| "step": 545 | |
| } | |
| ], | |
| "logging_steps": 25, | |
| "max_steps": 1090, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 10, | |
| "save_steps": 500, | |
| "total_flos": 7.319956897348977e+17, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |