| { |
| "best_global_step": 3906, |
| "best_metric": 0.0746106207370758, |
| "best_model_checkpoint": "./results_albert_punctuation_casing/checkpoint-3906", |
| "epoch": 3.0, |
| "eval_steps": 500, |
| "global_step": 5859, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.2560163850486431, |
| "grad_norm": 7.659163475036621, |
| "learning_rate": 1.8296637651476362e-05, |
| "loss": 0.17497076416015625, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.5120327700972862, |
| "grad_norm": 9.602242469787598, |
| "learning_rate": 1.6589861751152075e-05, |
| "loss": 0.09840100860595703, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.7680491551459293, |
| "grad_norm": 2.2302889823913574, |
| "learning_rate": 1.4883085850827787e-05, |
| "loss": 0.08661653137207032, |
| "step": 1500 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_casing_accuracy": 0.5982128829536528, |
| "eval_loss": 0.07590867578983307, |
| "eval_overall_accuracy": 0.597888845247447, |
| "eval_punctuation_accuracy": 0.5975648075412412, |
| "eval_runtime": 11.014, |
| "eval_samples_per_second": 709.096, |
| "eval_steps_per_second": 44.398, |
| "step": 1953 |
| }, |
| { |
| "epoch": 1.0240655401945724, |
| "grad_norm": 0.46477240324020386, |
| "learning_rate": 1.3176309950503501e-05, |
| "loss": 0.08205690765380859, |
| "step": 2000 |
| }, |
| { |
| "epoch": 1.2800819252432156, |
| "grad_norm": 6.8253865242004395, |
| "learning_rate": 1.1469534050179212e-05, |
| "loss": 0.0625931053161621, |
| "step": 2500 |
| }, |
| { |
| "epoch": 1.5360983102918588, |
| "grad_norm": 3.1819510459899902, |
| "learning_rate": 9.762758149854925e-06, |
| "loss": 0.06253623962402344, |
| "step": 3000 |
| }, |
| { |
| "epoch": 1.7921146953405018, |
| "grad_norm": 4.698487281799316, |
| "learning_rate": 8.055982249530637e-06, |
| "loss": 0.053002174377441404, |
| "step": 3500 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_casing_accuracy": 0.5975058915946583, |
| "eval_loss": 0.0746106207370758, |
| "eval_overall_accuracy": 0.5965730557737627, |
| "eval_punctuation_accuracy": 0.5956402199528672, |
| "eval_runtime": 11.2558, |
| "eval_samples_per_second": 693.865, |
| "eval_steps_per_second": 43.444, |
| "step": 3906 |
| }, |
| { |
| "epoch": 2.048131080389145, |
| "grad_norm": 0.29059869050979614, |
| "learning_rate": 6.349206349206349e-06, |
| "loss": 0.057237117767333985, |
| "step": 4000 |
| }, |
| { |
| "epoch": 2.3041474654377883, |
| "grad_norm": 1.9593513011932373, |
| "learning_rate": 4.642430448882062e-06, |
| "loss": 0.03903835678100586, |
| "step": 4500 |
| }, |
| { |
| "epoch": 2.5601638504864312, |
| "grad_norm": 4.75918436050415, |
| "learning_rate": 2.9356545485577746e-06, |
| "loss": 0.03857281112670898, |
| "step": 5000 |
| }, |
| { |
| "epoch": 2.8161802355350742, |
| "grad_norm": 1.5033795833587646, |
| "learning_rate": 1.228878648233487e-06, |
| "loss": 0.03791716384887695, |
| "step": 5500 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_casing_accuracy": 0.5970542026708563, |
| "eval_loss": 0.07589453458786011, |
| "eval_overall_accuracy": 0.5967596229379419, |
| "eval_punctuation_accuracy": 0.5964650432050275, |
| "eval_runtime": 11.3344, |
| "eval_samples_per_second": 689.056, |
| "eval_steps_per_second": 43.143, |
| "step": 5859 |
| } |
| ], |
| "logging_steps": 500, |
| "max_steps": 5859, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|