{ "best_metric": 0.8784255981445312, "best_model_checkpoint": "../Modelos/mt0_QG_SQAC_SQuAD 17-11-22_10:03:48/checkpoint-1200", "epoch": 2.9976617303195634, "global_step": 1200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.12, "eval_loss": 1.0145061016082764, "eval_runtime": 65.3999, "eval_samples_per_second": 190.122, "eval_steps_per_second": 11.896, "step": 50 }, { "epoch": 0.25, "eval_loss": 0.9660477638244629, "eval_runtime": 65.4002, "eval_samples_per_second": 190.122, "eval_steps_per_second": 11.896, "step": 100 }, { "epoch": 0.37, "eval_loss": 0.9502729773521423, "eval_runtime": 65.4029, "eval_samples_per_second": 190.114, "eval_steps_per_second": 11.895, "step": 150 }, { "epoch": 0.5, "eval_loss": 0.9393932819366455, "eval_runtime": 65.4026, "eval_samples_per_second": 190.115, "eval_steps_per_second": 11.896, "step": 200 }, { "epoch": 0.62, "eval_loss": 0.9244149923324585, "eval_runtime": 65.4024, "eval_samples_per_second": 190.115, "eval_steps_per_second": 11.896, "step": 250 }, { "epoch": 0.75, "eval_loss": 0.912540078163147, "eval_runtime": 65.4032, "eval_samples_per_second": 190.113, "eval_steps_per_second": 11.895, "step": 300 }, { "epoch": 0.87, "eval_loss": 0.9208225607872009, "eval_runtime": 65.4136, "eval_samples_per_second": 190.083, "eval_steps_per_second": 11.894, "step": 350 }, { "epoch": 1.0, "eval_loss": 0.907939076423645, "eval_runtime": 65.3953, "eval_samples_per_second": 190.136, "eval_steps_per_second": 11.897, "step": 400 }, { "epoch": 1.12, "eval_loss": 0.903082013130188, "eval_runtime": 65.3937, "eval_samples_per_second": 190.141, "eval_steps_per_second": 11.897, "step": 450 }, { "epoch": 1.25, "learning_rate": 0.001, "loss": 1.1491, "step": 500 }, { "epoch": 1.25, "eval_loss": 0.8966090083122253, "eval_runtime": 65.4073, "eval_samples_per_second": 190.101, "eval_steps_per_second": 11.895, "step": 500 }, { "epoch": 1.37, "eval_loss": 0.8984885215759277, "eval_runtime": 65.3845, "eval_samples_per_second": 190.167, "eval_steps_per_second": 11.899, "step": 550 }, { "epoch": 1.5, "eval_loss": 0.8956364393234253, "eval_runtime": 65.3948, "eval_samples_per_second": 190.137, "eval_steps_per_second": 11.897, "step": 600 }, { "epoch": 1.62, "eval_loss": 0.8861657381057739, "eval_runtime": 65.3936, "eval_samples_per_second": 190.141, "eval_steps_per_second": 11.897, "step": 650 }, { "epoch": 1.75, "eval_loss": 0.8837835788726807, "eval_runtime": 65.4055, "eval_samples_per_second": 190.106, "eval_steps_per_second": 11.895, "step": 700 }, { "epoch": 1.87, "eval_loss": 0.875281035900116, "eval_runtime": 65.3939, "eval_samples_per_second": 190.14, "eval_steps_per_second": 11.897, "step": 750 }, { "epoch": 2.0, "eval_loss": 0.889029860496521, "eval_runtime": 65.4822, "eval_samples_per_second": 189.884, "eval_steps_per_second": 11.881, "step": 800 }, { "epoch": 2.12, "eval_loss": 0.9000933766365051, "eval_runtime": 65.3866, "eval_samples_per_second": 190.161, "eval_steps_per_second": 11.898, "step": 850 }, { "epoch": 2.25, "eval_loss": 0.897091805934906, "eval_runtime": 65.4252, "eval_samples_per_second": 190.049, "eval_steps_per_second": 11.891, "step": 900 }, { "epoch": 2.37, "eval_loss": 0.8837863206863403, "eval_runtime": 65.4877, "eval_samples_per_second": 189.868, "eval_steps_per_second": 11.88, "step": 950 }, { "epoch": 2.5, "learning_rate": 0.001, "loss": 0.7877, "step": 1000 }, { "epoch": 2.5, "eval_loss": 0.8839919567108154, "eval_runtime": 65.4142, "eval_samples_per_second": 190.081, "eval_steps_per_second": 11.893, "step": 1000 }, { "epoch": 2.62, "eval_loss": 0.8844324350357056, "eval_runtime": 65.4264, "eval_samples_per_second": 190.046, "eval_steps_per_second": 11.891, "step": 1050 }, { "epoch": 2.75, "eval_loss": 0.8891133666038513, "eval_runtime": 65.4136, "eval_samples_per_second": 190.083, "eval_steps_per_second": 11.894, "step": 1100 }, { "epoch": 2.87, "eval_loss": 0.8817307949066162, "eval_runtime": 65.4132, "eval_samples_per_second": 190.084, "eval_steps_per_second": 11.894, "step": 1150 }, { "epoch": 3.0, "eval_loss": 0.8784255981445312, "eval_runtime": 65.4323, "eval_samples_per_second": 190.029, "eval_steps_per_second": 11.89, "step": 1200 } ], "max_steps": 1200, "num_train_epochs": 3, "total_flos": 3.392579398501663e+17, "trial_name": null, "trial_params": null }