| { |
| "best_metric": 0.3326460123062134, |
| "best_model_checkpoint": "out/deberta-v3-small-bkl-multi/checkpoint-4476", |
| "epoch": 3.0, |
| "eval_steps": 500, |
| "global_step": 4476, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.3351206434316354, |
| "grad_norm": 1.3047796487808228, |
| "learning_rate": 1.865951742627346e-05, |
| "loss": 0.4032, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.6702412868632708, |
| "grad_norm": 0.7828705310821533, |
| "learning_rate": 1.731903485254692e-05, |
| "loss": 0.3503, |
| "step": 1000 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_accuracy": 0.2743503772003353, |
| "eval_f1": 0.7807241322816955, |
| "eval_loss": 0.34355366230010986, |
| "eval_roc_auc": 0.8276824995087615, |
| "eval_runtime": 20.9327, |
| "eval_samples_per_second": 569.923, |
| "eval_steps_per_second": 8.933, |
| "step": 1492 |
| }, |
| { |
| "epoch": 1.0053619302949062, |
| "grad_norm": 1.0840567350387573, |
| "learning_rate": 1.5978552278820375e-05, |
| "loss": 0.3432, |
| "step": 1500 |
| }, |
| { |
| "epoch": 1.3404825737265416, |
| "grad_norm": 0.586756706237793, |
| "learning_rate": 1.4638069705093835e-05, |
| "loss": 0.3322, |
| "step": 2000 |
| }, |
| { |
| "epoch": 1.675603217158177, |
| "grad_norm": 0.6265957951545715, |
| "learning_rate": 1.3297587131367293e-05, |
| "loss": 0.3313, |
| "step": 2500 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_accuracy": 0.28491198658843253, |
| "eval_f1": 0.7920844448532793, |
| "eval_loss": 0.3326815962791443, |
| "eval_roc_auc": 0.8374262996694112, |
| "eval_runtime": 20.987, |
| "eval_samples_per_second": 568.447, |
| "eval_steps_per_second": 8.91, |
| "step": 2984 |
| }, |
| { |
| "epoch": 2.0107238605898123, |
| "grad_norm": 0.7209903001785278, |
| "learning_rate": 1.1957104557640751e-05, |
| "loss": 0.3266, |
| "step": 3000 |
| }, |
| { |
| "epoch": 2.3458445040214477, |
| "grad_norm": 0.6551246643066406, |
| "learning_rate": 1.061662198391421e-05, |
| "loss": 0.3191, |
| "step": 3500 |
| }, |
| { |
| "epoch": 2.680965147453083, |
| "grad_norm": 0.9030967354774475, |
| "learning_rate": 9.276139410187667e-06, |
| "loss": 0.3199, |
| "step": 4000 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_accuracy": 0.28499580888516346, |
| "eval_f1": 0.7939965223757665, |
| "eval_loss": 0.3326460123062134, |
| "eval_roc_auc": 0.8392196917667687, |
| "eval_runtime": 21.0081, |
| "eval_samples_per_second": 567.876, |
| "eval_steps_per_second": 8.901, |
| "step": 4476 |
| } |
| ], |
| "logging_steps": 500, |
| "max_steps": 7460, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 5, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 7408602625928400.0, |
| "train_batch_size": 64, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|