| { | |
| "best_global_step": 2550, | |
| "best_metric": 0.08080464601516724, | |
| "best_model_checkpoint": "./turkish-toxic-bert-full\\checkpoint-2550", | |
| "epoch": 0.749173705471906, | |
| "eval_steps": 850, | |
| "global_step": 2550, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.049944913698127064, | |
| "grad_norm": 10.951066970825195, | |
| "learning_rate": 9.941176470588236e-06, | |
| "loss": 0.6808, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.09988982739625413, | |
| "grad_norm": 2.395719051361084, | |
| "learning_rate": 1.9941176470588238e-05, | |
| "loss": 0.2552, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.1498347410943812, | |
| "grad_norm": 31.97089385986328, | |
| "learning_rate": 1.9657617504051864e-05, | |
| "loss": 0.1784, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.19977965479250825, | |
| "grad_norm": 3.0786185264587402, | |
| "learning_rate": 1.931320907617504e-05, | |
| "loss": 0.1626, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.24972456849063532, | |
| "grad_norm": 36.20637893676758, | |
| "learning_rate": 1.896880064829822e-05, | |
| "loss": 0.163, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.24972456849063532, | |
| "eval_accuracy": 0.9607969151670951, | |
| "eval_loss": 0.12955638766288757, | |
| "eval_runtime": 632.0606, | |
| "eval_samples_per_second": 12.309, | |
| "eval_steps_per_second": 3.077, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.2996694821887624, | |
| "grad_norm": 13.24213695526123, | |
| "learning_rate": 1.8624392220421394e-05, | |
| "loss": 0.1494, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.3496143958868895, | |
| "grad_norm": 7.672886371612549, | |
| "learning_rate": 1.8279983792544573e-05, | |
| "loss": 0.1403, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.3995593095850165, | |
| "grad_norm": 1.0008550882339478, | |
| "learning_rate": 1.7935575364667748e-05, | |
| "loss": 0.133, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.4495042232831436, | |
| "grad_norm": 15.496179580688477, | |
| "learning_rate": 1.7591166936790924e-05, | |
| "loss": 0.1059, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.49944913698127064, | |
| "grad_norm": 13.099618911743164, | |
| "learning_rate": 1.7246758508914102e-05, | |
| "loss": 0.1281, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.49944913698127064, | |
| "eval_accuracy": 0.9701799485861182, | |
| "eval_loss": 0.0947001576423645, | |
| "eval_runtime": 632.2141, | |
| "eval_samples_per_second": 12.306, | |
| "eval_steps_per_second": 3.076, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.5493940506793977, | |
| "grad_norm": 9.940011024475098, | |
| "learning_rate": 1.6902350081037278e-05, | |
| "loss": 0.1258, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.5993389643775248, | |
| "grad_norm": 20.614500045776367, | |
| "learning_rate": 1.6557941653160453e-05, | |
| "loss": 0.0975, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.6492838780756518, | |
| "grad_norm": 12.049532890319824, | |
| "learning_rate": 1.6213533225283632e-05, | |
| "loss": 0.1297, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 0.699228791773779, | |
| "grad_norm": 2.572075843811035, | |
| "learning_rate": 1.5869124797406807e-05, | |
| "loss": 0.1329, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 0.749173705471906, | |
| "grad_norm": 7.409031391143799, | |
| "learning_rate": 1.5524716369529983e-05, | |
| "loss": 0.1162, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.749173705471906, | |
| "eval_accuracy": 0.9699228791773779, | |
| "eval_loss": 0.08080464601516724, | |
| "eval_runtime": 632.5405, | |
| "eval_samples_per_second": 12.3, | |
| "eval_steps_per_second": 3.075, | |
| "step": 2550 | |
| } | |
| ], | |
| "logging_steps": 170, | |
| "max_steps": 10212, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 2550, | |
| "stateful_callbacks": { | |
| "EarlyStoppingCallback": { | |
| "args": { | |
| "early_stopping_patience": 3, | |
| "early_stopping_threshold": 0.0 | |
| }, | |
| "attributes": { | |
| "early_stopping_patience_counter": 0 | |
| } | |
| }, | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 5367465529344000.0, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |