{ "best_global_step": 1410, "best_metric": 0.7945503472501974, "best_model_checkpoint": "./roberta-toxic-classifier-en/checkpoint-1410", "epoch": 10.0, "eval_steps": 500, "global_step": 1410, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.3546099290780142, "grad_norm": 9.611958503723145, "learning_rate": 9.884160756501183e-06, "loss": 0.653124008178711, "step": 50 }, { "epoch": 0.7092198581560284, "grad_norm": 7.586147308349609, "learning_rate": 9.765957446808511e-06, "loss": 0.5039330291748046, "step": 100 }, { "epoch": 1.0, "eval_accuracy": 0.745, "eval_f1": 0.7445706232176288, "eval_loss": 0.5029547214508057, "eval_precision": 0.7506988625409678, "eval_recall": 0.7551099231212526, "eval_runtime": 0.3569, "eval_samples_per_second": 2801.728, "eval_steps_per_second": 44.828, "step": 141 }, { "epoch": 1.0638297872340425, "grad_norm": 9.09180736541748, "learning_rate": 9.64775413711584e-06, "loss": 0.4156942367553711, "step": 150 }, { "epoch": 1.4184397163120568, "grad_norm": 7.9193925857543945, "learning_rate": 9.529550827423168e-06, "loss": 0.38147411346435545, "step": 200 }, { "epoch": 1.773049645390071, "grad_norm": 11.232121467590332, "learning_rate": 9.411347517730498e-06, "loss": 0.3642482376098633, "step": 250 }, { "epoch": 2.0, "eval_accuracy": 0.778, "eval_f1": 0.7759350171177573, "eval_loss": 0.45547381043434143, "eval_precision": 0.7752243747770282, "eval_recall": 0.7806237764181289, "eval_runtime": 0.3628, "eval_samples_per_second": 2756.355, "eval_steps_per_second": 44.102, "step": 282 }, { "epoch": 2.127659574468085, "grad_norm": 19.67840576171875, "learning_rate": 9.293144208037825e-06, "loss": 0.36101982116699216, "step": 300 }, { "epoch": 2.482269503546099, "grad_norm": 7.5036492347717285, "learning_rate": 9.174940898345155e-06, "loss": 0.3269504547119141, "step": 350 }, { "epoch": 2.8368794326241136, "grad_norm": 18.646142959594727, "learning_rate": 9.056737588652483e-06, "loss": 0.32073143005371096, "step": 400 }, { "epoch": 3.0, "eval_accuracy": 0.775, "eval_f1": 0.7745832043448335, "eval_loss": 0.5032930374145508, "eval_precision": 0.780389401846648, "eval_recall": 0.7854649713288456, "eval_runtime": 0.3639, "eval_samples_per_second": 2748.102, "eval_steps_per_second": 43.97, "step": 423 }, { "epoch": 3.1914893617021276, "grad_norm": 6.803809642791748, "learning_rate": 8.938534278959812e-06, "loss": 0.29136837005615235, "step": 450 }, { "epoch": 3.546099290780142, "grad_norm": 13.090974807739258, "learning_rate": 8.82033096926714e-06, "loss": 0.27523290634155273, "step": 500 }, { "epoch": 3.900709219858156, "grad_norm": 10.417556762695312, "learning_rate": 8.70212765957447e-06, "loss": 0.2774989700317383, "step": 550 }, { "epoch": 4.0, "eval_accuracy": 0.791, "eval_f1": 0.789619694817699, "eval_loss": 0.4875805974006653, "eval_precision": 0.7899062159912937, "eval_recall": 0.7961446186920396, "eval_runtime": 0.3651, "eval_samples_per_second": 2738.756, "eval_steps_per_second": 43.82, "step": 564 }, { "epoch": 4.25531914893617, "grad_norm": 13.110213279724121, "learning_rate": 8.583924349881797e-06, "loss": 0.240526180267334, "step": 600 }, { "epoch": 4.609929078014185, "grad_norm": 16.28187370300293, "learning_rate": 8.465721040189125e-06, "loss": 0.21295007705688476, "step": 650 }, { "epoch": 4.964539007092198, "grad_norm": 16.510469436645508, "learning_rate": 8.347517730496454e-06, "loss": 0.22765600204467773, "step": 700 }, { "epoch": 5.0, "eval_accuracy": 0.788, "eval_f1": 0.7827120812082848, "eval_loss": 0.5206102132797241, "eval_precision": 0.7836996269193908, "eval_recall": 0.781890783950693, "eval_runtime": 0.3796, "eval_samples_per_second": 2634.502, "eval_steps_per_second": 42.152, "step": 705 }, { "epoch": 5.319148936170213, "grad_norm": 24.18474006652832, "learning_rate": 8.229314420803784e-06, "loss": 0.18396900177001954, "step": 750 }, { "epoch": 5.673758865248227, "grad_norm": 21.326946258544922, "learning_rate": 8.111111111111112e-06, "loss": 0.19418764114379883, "step": 800 }, { "epoch": 6.0, "eval_accuracy": 0.796, "eval_f1": 0.7937694099378882, "eval_loss": 0.6245253086090088, "eval_precision": 0.7925987495934372, "eval_recall": 0.7978223818924188, "eval_runtime": 0.3664, "eval_samples_per_second": 2729.117, "eval_steps_per_second": 43.666, "step": 846 }, { "epoch": 6.028368794326241, "grad_norm": 7.92404842376709, "learning_rate": 7.992907801418441e-06, "loss": 0.1742597770690918, "step": 850 }, { "epoch": 6.382978723404255, "grad_norm": 11.012433052062988, "learning_rate": 7.874704491725769e-06, "loss": 0.14716711044311523, "step": 900 }, { "epoch": 6.73758865248227, "grad_norm": 9.80257511138916, "learning_rate": 7.756501182033097e-06, "loss": 0.13163516998291017, "step": 950 }, { "epoch": 7.0, "eval_accuracy": 0.796, "eval_f1": 0.7943341062607118, "eval_loss": 0.6543664932250977, "eval_precision": 0.7938576995006228, "eval_recall": 0.7999109007606132, "eval_runtime": 0.3667, "eval_samples_per_second": 2727.034, "eval_steps_per_second": 43.633, "step": 987 }, { "epoch": 7.092198581560283, "grad_norm": 2.5416817665100098, "learning_rate": 7.638297872340426e-06, "loss": 0.1342453956604004, "step": 1000 }, { "epoch": 7.446808510638298, "grad_norm": 9.653868675231934, "learning_rate": 7.520094562647755e-06, "loss": 0.11633362770080566, "step": 1050 }, { "epoch": 7.801418439716312, "grad_norm": 16.958995819091797, "learning_rate": 7.401891252955083e-06, "loss": 0.11314620018005371, "step": 1100 }, { "epoch": 8.0, "eval_accuracy": 0.777, "eval_f1": 0.77662450579424, "eval_loss": 0.788261353969574, "eval_precision": 0.7828304736199473, "eval_recall": 0.7878068917035529, "eval_runtime": 0.3673, "eval_samples_per_second": 2722.508, "eval_steps_per_second": 43.56, "step": 1128 }, { "epoch": 8.156028368794326, "grad_norm": 9.911176681518555, "learning_rate": 7.283687943262412e-06, "loss": 0.10539370536804199, "step": 1150 }, { "epoch": 8.51063829787234, "grad_norm": 14.041365623474121, "learning_rate": 7.1654846335697405e-06, "loss": 0.10781905174255371, "step": 1200 }, { "epoch": 8.865248226950355, "grad_norm": 10.613764762878418, "learning_rate": 7.047281323877069e-06, "loss": 0.10644716262817383, "step": 1250 }, { "epoch": 9.0, "eval_accuracy": 0.788, "eval_f1": 0.7864203650196856, "eval_loss": 0.7684714198112488, "eval_precision": 0.7862955357821888, "eval_recall": 0.7923333782916652, "eval_runtime": 0.3673, "eval_samples_per_second": 2722.633, "eval_steps_per_second": 43.562, "step": 1269 }, { "epoch": 9.21985815602837, "grad_norm": 15.78783130645752, "learning_rate": 6.929078014184397e-06, "loss": 0.09560779571533203, "step": 1300 }, { "epoch": 9.574468085106384, "grad_norm": 10.481488227844238, "learning_rate": 6.810874704491726e-06, "loss": 0.0669502067565918, "step": 1350 }, { "epoch": 9.929078014184396, "grad_norm": 52.73137664794922, "learning_rate": 6.692671394799055e-06, "loss": 0.07912938117980957, "step": 1400 }, { "epoch": 10.0, "eval_accuracy": 0.796, "eval_f1": 0.7945503472501974, "eval_loss": 0.9117002487182617, "eval_precision": 0.7945365556929553, "eval_recall": 0.8008059802755537, "eval_runtime": 0.3717, "eval_samples_per_second": 2690.34, "eval_steps_per_second": 43.045, "step": 1410 } ], "logging_steps": 50, "max_steps": 4230, "num_input_tokens_seen": 0, "num_train_epochs": 30, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 5919998745600000.0, "train_batch_size": 64, "trial_name": null, "trial_params": null }