| { |
| "best_global_step": 1575, |
| "best_metric": 0.8444872862934459, |
| "best_model_checkpoint": "outputs/runs/xlm-r/checkpoint-1575", |
| "epoch": 5.0, |
| "eval_steps": 500, |
| "global_step": 1575, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.9968253968253968, |
| "grad_norm": 30.17567253112793, |
| "learning_rate": 4.999432333543028e-06, |
| "loss": 0.4538, |
| "step": 314 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_accuracy": 0.8139264251224511, |
| "eval_f1": 0.7854504397785999, |
| "eval_loss": 0.44861647486686707, |
| "eval_precision": 0.8467340416826592, |
| "eval_recall": 0.7710513443383047, |
| "eval_runtime": 1.5133, |
| "eval_samples_per_second": 859.712, |
| "eval_steps_per_second": 27.093, |
| "step": 315 |
| }, |
| { |
| "epoch": 1.9936507936507937, |
| "grad_norm": 3.3635013103485107, |
| "learning_rate": 4.996525669931999e-06, |
| "loss": 0.3243, |
| "step": 628 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_accuracy": 0.8448431583700976, |
| "eval_f1": 0.8327710497152178, |
| "eval_loss": 0.3449461758136749, |
| "eval_precision": 0.8440828341294326, |
| "eval_recall": 0.8258900626750871, |
| "eval_runtime": 1.5236, |
| "eval_samples_per_second": 853.92, |
| "eval_steps_per_second": 26.911, |
| "step": 630 |
| }, |
| { |
| "epoch": 2.9904761904761905, |
| "grad_norm": 5.762388706207275, |
| "learning_rate": 4.991155236893945e-06, |
| "loss": 0.3034, |
| "step": 942 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_accuracy": 0.8510959808246779, |
| "eval_f1": 0.8412021181756717, |
| "eval_loss": 0.34437716007232666, |
| "eval_precision": 0.8470134885464016, |
| "eval_recall": 0.8369179080203759, |
| "eval_runtime": 1.5273, |
| "eval_samples_per_second": 851.818, |
| "eval_steps_per_second": 26.844, |
| "step": 945 |
| }, |
| { |
| "epoch": 3.9873015873015873, |
| "grad_norm": 3.3767998218536377, |
| "learning_rate": 4.983326334397891e-06, |
| "loss": 0.2899, |
| "step": 1256 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_accuracy": 0.8500885816514399, |
| "eval_f1": 0.8385606442359046, |
| "eval_loss": 0.34483084082603455, |
| "eval_precision": 0.8495561216187286, |
| "eval_recall": 0.8317476865987361, |
| "eval_runtime": 1.5267, |
| "eval_samples_per_second": 852.172, |
| "eval_steps_per_second": 26.856, |
| "step": 1260 |
| }, |
| { |
| "epoch": 4.984126984126984, |
| "grad_norm": 7.968695640563965, |
| "learning_rate": 4.97304668862541e-06, |
| "loss": 0.2865, |
| "step": 1570 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_accuracy": 0.8532670997325181, |
| "eval_f1": 0.8444872862934459, |
| "eval_loss": 0.3270244300365448, |
| "eval_precision": 0.8474564252758483, |
| "eval_recall": 0.8420027140388191, |
| "eval_runtime": 1.5286, |
| "eval_samples_per_second": 851.129, |
| "eval_steps_per_second": 26.823, |
| "step": 1575 |
| } |
| ], |
| "logging_steps": 314, |
| "max_steps": 31500, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 100, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "EarlyStoppingCallback": { |
| "args": { |
| "early_stopping_patience": 5, |
| "early_stopping_threshold": 0.0 |
| }, |
| "attributes": { |
| "early_stopping_patience_counter": 0 |
| } |
| }, |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1645189704599040.0, |
| "train_batch_size": 32, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|