| { |
| "best_global_step": 11655, |
| "best_metric": 0.06289209425449371, |
| "best_model_checkpoint": "./results_albert_punctuation_casing/checkpoint-11655", |
| "epoch": 3.0, |
| "eval_steps": 500, |
| "global_step": 11655, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.1287001287001287, |
| "grad_norm": 4.2918195724487305, |
| "learning_rate": 1.9143715143715144e-05, |
| "loss": 0.2105482635498047, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.2574002574002574, |
| "grad_norm": 3.180814504623413, |
| "learning_rate": 1.8285714285714288e-05, |
| "loss": 0.09811102294921875, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.3861003861003861, |
| "grad_norm": 4.069736480712891, |
| "learning_rate": 1.742771342771343e-05, |
| "loss": 0.08830223083496094, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.5148005148005148, |
| "grad_norm": 2.3238089084625244, |
| "learning_rate": 1.656971256971257e-05, |
| "loss": 0.07262681579589844, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.6435006435006435, |
| "grad_norm": 0.725167989730835, |
| "learning_rate": 1.5711711711711713e-05, |
| "loss": 0.07961682891845703, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.7722007722007722, |
| "grad_norm": 13.671622276306152, |
| "learning_rate": 1.4853710853710854e-05, |
| "loss": 0.06993846893310547, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.9009009009009009, |
| "grad_norm": 2.6098945140838623, |
| "learning_rate": 1.3995709995709996e-05, |
| "loss": 0.07217549133300781, |
| "step": 3500 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_casing_accuracy": 0.6387912059001499, |
| "eval_loss": 0.07048454880714417, |
| "eval_overall_accuracy": 0.6404223412931571, |
| "eval_punctuation_accuracy": 0.6420534766861643, |
| "eval_runtime": 86.2448, |
| "eval_samples_per_second": 180.162, |
| "eval_steps_per_second": 11.27, |
| "step": 3885 |
| }, |
| { |
| "epoch": 1.0296010296010296, |
| "grad_norm": 1.678989052772522, |
| "learning_rate": 1.3137709137709139e-05, |
| "loss": 0.05899927520751953, |
| "step": 4000 |
| }, |
| { |
| "epoch": 1.1583011583011582, |
| "grad_norm": 5.855215549468994, |
| "learning_rate": 1.2279708279708281e-05, |
| "loss": 0.05248377227783203, |
| "step": 4500 |
| }, |
| { |
| "epoch": 1.287001287001287, |
| "grad_norm": 20.59808921813965, |
| "learning_rate": 1.1421707421707422e-05, |
| "loss": 0.05537939834594727, |
| "step": 5000 |
| }, |
| { |
| "epoch": 1.4157014157014158, |
| "grad_norm": 3.922346830368042, |
| "learning_rate": 1.0563706563706564e-05, |
| "loss": 0.05087580490112305, |
| "step": 5500 |
| }, |
| { |
| "epoch": 1.5444015444015444, |
| "grad_norm": 0.129458948969841, |
| "learning_rate": 9.705705705705706e-06, |
| "loss": 0.0524902229309082, |
| "step": 6000 |
| }, |
| { |
| "epoch": 1.673101673101673, |
| "grad_norm": 0.10066387057304382, |
| "learning_rate": 8.847704847704849e-06, |
| "loss": 0.04880419921875, |
| "step": 6500 |
| }, |
| { |
| "epoch": 1.8018018018018018, |
| "grad_norm": 1.1645872592926025, |
| "learning_rate": 7.989703989703991e-06, |
| "loss": 0.04735799407958984, |
| "step": 7000 |
| }, |
| { |
| "epoch": 1.9305019305019306, |
| "grad_norm": 1.6507762670516968, |
| "learning_rate": 7.1317031317031325e-06, |
| "loss": 0.05284581756591797, |
| "step": 7500 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_casing_accuracy": 0.6404749585638992, |
| "eval_loss": 0.06381073594093323, |
| "eval_overall_accuracy": 0.6414089151195728, |
| "eval_punctuation_accuracy": 0.6423428716752462, |
| "eval_runtime": 85.5043, |
| "eval_samples_per_second": 181.722, |
| "eval_steps_per_second": 11.368, |
| "step": 7770 |
| }, |
| { |
| "epoch": 2.0592020592020592, |
| "grad_norm": 0.3554779887199402, |
| "learning_rate": 6.273702273702275e-06, |
| "loss": 0.042236793518066404, |
| "step": 8000 |
| }, |
| { |
| "epoch": 2.187902187902188, |
| "grad_norm": 0.3045165240764618, |
| "learning_rate": 5.415701415701416e-06, |
| "loss": 0.03922730255126953, |
| "step": 8500 |
| }, |
| { |
| "epoch": 2.3166023166023164, |
| "grad_norm": 1.6675119400024414, |
| "learning_rate": 4.557700557700558e-06, |
| "loss": 0.034516990661621096, |
| "step": 9000 |
| }, |
| { |
| "epoch": 2.4453024453024454, |
| "grad_norm": 1.5336593389511108, |
| "learning_rate": 3.6996996996997e-06, |
| "loss": 0.03220732116699219, |
| "step": 9500 |
| }, |
| { |
| "epoch": 2.574002574002574, |
| "grad_norm": 2.9998581409454346, |
| "learning_rate": 2.8416988416988417e-06, |
| "loss": 0.033812404632568356, |
| "step": 10000 |
| }, |
| { |
| "epoch": 2.7027027027027026, |
| "grad_norm": 3.3144118785858154, |
| "learning_rate": 1.9836979836979837e-06, |
| "loss": 0.03330759048461914, |
| "step": 10500 |
| }, |
| { |
| "epoch": 2.8314028314028317, |
| "grad_norm": 2.0357117652893066, |
| "learning_rate": 1.1256971256971258e-06, |
| "loss": 0.03533472442626953, |
| "step": 11000 |
| }, |
| { |
| "epoch": 2.9601029601029603, |
| "grad_norm": 3.5177860260009766, |
| "learning_rate": 2.676962676962677e-07, |
| "loss": 0.0314073543548584, |
| "step": 11500 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_casing_accuracy": 0.6400978681235804, |
| "eval_loss": 0.06289209425449371, |
| "eval_overall_accuracy": 0.6402776437986162, |
| "eval_punctuation_accuracy": 0.6404574194736519, |
| "eval_runtime": 85.5898, |
| "eval_samples_per_second": 181.54, |
| "eval_steps_per_second": 11.356, |
| "step": 11655 |
| } |
| ], |
| "logging_steps": 500, |
| "max_steps": 11655, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|