{ "best_global_step": 2049, "best_metric": 0.6427525622254758, "best_model_checkpoint": "./saved_models/cbert/checkpoint-2049", "epoch": 5.0, "eval_steps": 500, "global_step": 3415, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.14641288433382138, "grad_norm": 3.022106647491455, "learning_rate": 1.942020497803807e-05, "loss": 0.6936, "step": 100 }, { "epoch": 0.29282576866764276, "grad_norm": 1.8588309288024902, "learning_rate": 1.8834553440702785e-05, "loss": 0.6835, "step": 200 }, { "epoch": 0.43923865300146414, "grad_norm": 3.637296438217163, "learning_rate": 1.8248901903367496e-05, "loss": 0.6754, "step": 300 }, { "epoch": 0.5856515373352855, "grad_norm": 1.5142912864685059, "learning_rate": 1.766325036603221e-05, "loss": 0.6739, "step": 400 }, { "epoch": 0.7320644216691069, "grad_norm": 4.135972023010254, "learning_rate": 1.7077598828696925e-05, "loss": 0.6699, "step": 500 }, { "epoch": 0.8784773060029283, "grad_norm": 2.758687734603882, "learning_rate": 1.649194729136164e-05, "loss": 0.6775, "step": 600 }, { "epoch": 1.0, "eval_accuracy": 0.5995607613469985, "eval_loss": 0.6492753028869629, "eval_runtime": 1.5709, "eval_samples_per_second": 1739.163, "eval_steps_per_second": 54.747, "step": 683 }, { "epoch": 1.0248901903367496, "grad_norm": 4.188994407653809, "learning_rate": 1.5906295754026355e-05, "loss": 0.6622, "step": 700 }, { "epoch": 1.171303074670571, "grad_norm": 2.755507230758667, "learning_rate": 1.532064421669107e-05, "loss": 0.6485, "step": 800 }, { "epoch": 1.3177159590043923, "grad_norm": 4.434422016143799, "learning_rate": 1.4734992679355784e-05, "loss": 0.6422, "step": 900 }, { "epoch": 1.4641288433382138, "grad_norm": 3.1107141971588135, "learning_rate": 1.4149341142020499e-05, "loss": 0.6436, "step": 1000 }, { "epoch": 1.610541727672035, "grad_norm": 4.113313674926758, "learning_rate": 1.3563689604685213e-05, "loss": 0.6308, "step": 1100 }, { "epoch": 1.7569546120058566, "grad_norm": 5.160773754119873, "learning_rate": 1.2978038067349928e-05, "loss": 0.6223, "step": 1200 }, { "epoch": 1.903367496339678, "grad_norm": 2.9103806018829346, "learning_rate": 1.2392386530014641e-05, "loss": 0.6312, "step": 1300 }, { "epoch": 2.0, "eval_accuracy": 0.6317715959004392, "eval_loss": 0.6276857256889343, "eval_runtime": 1.5723, "eval_samples_per_second": 1737.632, "eval_steps_per_second": 54.699, "step": 1366 }, { "epoch": 2.049780380673499, "grad_norm": 4.584695339202881, "learning_rate": 1.1806734992679356e-05, "loss": 0.6105, "step": 1400 }, { "epoch": 2.1961932650073206, "grad_norm": 11.84752368927002, "learning_rate": 1.122108345534407e-05, "loss": 0.5812, "step": 1500 }, { "epoch": 2.342606149341142, "grad_norm": 4.095500946044922, "learning_rate": 1.0635431918008785e-05, "loss": 0.589, "step": 1600 }, { "epoch": 2.4890190336749636, "grad_norm": 4.87593936920166, "learning_rate": 1.00497803806735e-05, "loss": 0.5835, "step": 1700 }, { "epoch": 2.6354319180087846, "grad_norm": 4.864409446716309, "learning_rate": 9.464128843338215e-06, "loss": 0.5877, "step": 1800 }, { "epoch": 2.781844802342606, "grad_norm": 5.729854583740234, "learning_rate": 8.87847730600293e-06, "loss": 0.5911, "step": 1900 }, { "epoch": 2.9282576866764276, "grad_norm": 9.67723560333252, "learning_rate": 8.292825768667644e-06, "loss": 0.5689, "step": 2000 }, { "epoch": 3.0, "eval_accuracy": 0.6427525622254758, "eval_loss": 0.6189645528793335, "eval_runtime": 1.5647, "eval_samples_per_second": 1746.065, "eval_steps_per_second": 54.964, "step": 2049 }, { "epoch": 3.074670571010249, "grad_norm": 5.034663677215576, "learning_rate": 7.707174231332359e-06, "loss": 0.5712, "step": 2100 }, { "epoch": 3.22108345534407, "grad_norm": 4.707498550415039, "learning_rate": 7.1215226939970725e-06, "loss": 0.5292, "step": 2200 }, { "epoch": 3.3674963396778916, "grad_norm": 4.101770401000977, "learning_rate": 6.535871156661787e-06, "loss": 0.5367, "step": 2300 }, { "epoch": 3.513909224011713, "grad_norm": 6.7061614990234375, "learning_rate": 5.950219619326502e-06, "loss": 0.542, "step": 2400 }, { "epoch": 3.660322108345534, "grad_norm": 10.22519588470459, "learning_rate": 5.364568081991216e-06, "loss": 0.5486, "step": 2500 }, { "epoch": 3.8067349926793557, "grad_norm": 4.292933940887451, "learning_rate": 4.77891654465593e-06, "loss": 0.5331, "step": 2600 }, { "epoch": 3.953147877013177, "grad_norm": 4.438432216644287, "learning_rate": 4.193265007320644e-06, "loss": 0.5361, "step": 2700 }, { "epoch": 4.0, "eval_accuracy": 0.6295754026354319, "eval_loss": 0.633277177810669, "eval_runtime": 1.5589, "eval_samples_per_second": 1752.491, "eval_steps_per_second": 55.166, "step": 2732 }, { "epoch": 4.099560761346998, "grad_norm": 5.632227420806885, "learning_rate": 3.607613469985359e-06, "loss": 0.5327, "step": 2800 }, { "epoch": 4.24597364568082, "grad_norm": 8.069804191589355, "learning_rate": 3.0219619326500732e-06, "loss": 0.5111, "step": 2900 }, { "epoch": 4.392386530014641, "grad_norm": 7.2090840339660645, "learning_rate": 2.436310395314788e-06, "loss": 0.4965, "step": 3000 }, { "epoch": 4.538799414348462, "grad_norm": 10.964486122131348, "learning_rate": 1.8506588579795024e-06, "loss": 0.5082, "step": 3100 }, { "epoch": 4.685212298682284, "grad_norm": 4.838050365447998, "learning_rate": 1.2650073206442169e-06, "loss": 0.497, "step": 3200 }, { "epoch": 4.831625183016105, "grad_norm": 6.126997470855713, "learning_rate": 6.793557833089313e-07, "loss": 0.5137, "step": 3300 }, { "epoch": 4.978038067349927, "grad_norm": 13.103296279907227, "learning_rate": 9.370424597364569e-08, "loss": 0.5202, "step": 3400 }, { "epoch": 5.0, "eval_accuracy": 0.6372620790629575, "eval_loss": 0.6495257019996643, "eval_runtime": 1.5569, "eval_samples_per_second": 1754.803, "eval_steps_per_second": 55.239, "step": 3415 } ], "logging_steps": 100, "max_steps": 3415, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3218980489267200.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }