{ "best_metric": 0.7580168991143236, "best_model_checkpoint": "/nfs/production/literature/amina-mardiyyah/new_data/OT-Entity-Extraction-Pipeline/model_outputs/hf/CeLLaTe-AL-Test_1.0/base/cellate_tapt_freeze_llrd_ww_mask_LR_2e/checkpoint-920", "epoch": 30.0, "eval_steps": 500, "global_step": 3450, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 2.0460078716278076, "learning_rate": 6.434782608695652e-06, "loss": 1.3009, "step": 115 }, { "epoch": 1.0, "eval_accuracy": 0.8241595949177176, "eval_f1": 0.16903515332834707, "eval_loss": 0.6194407939910889, "eval_precision": 0.3309900410076157, "eval_recall": 0.11349939734833267, "eval_runtime": 4.4674, "eval_samples_per_second": 213.098, "eval_steps_per_second": 13.431, "step": 115 }, { "epoch": 2.0, "grad_norm": 2.8860368728637695, "learning_rate": 1.3101449275362319e-05, "loss": 0.3335, "step": 230 }, { "epoch": 2.0, "eval_accuracy": 0.9082938721927891, "eval_f1": 0.6236261015942172, "eval_loss": 0.29858899116516113, "eval_precision": 0.6149189611404022, "eval_recall": 0.6325833668139815, "eval_runtime": 4.5486, "eval_samples_per_second": 209.295, "eval_steps_per_second": 13.191, "step": 230 }, { "epoch": 3.0, "grad_norm": 2.3074285984039307, "learning_rate": 1.9768115942028986e-05, "loss": 0.1312, "step": 345 }, { "epoch": 3.0, "eval_accuracy": 0.9199915607857846, "eval_f1": 0.6880798004987531, "eval_loss": 0.26223188638687134, "eval_precision": 0.6833762631266098, "eval_recall": 0.6928485335476094, "eval_runtime": 4.4824, "eval_samples_per_second": 212.386, "eval_steps_per_second": 13.386, "step": 345 }, { "epoch": 4.0, "grad_norm": 2.443023204803467, "learning_rate": 1.9285024154589372e-05, "loss": 0.0797, "step": 460 }, { "epoch": 4.0, "eval_accuracy": 0.9264850672792911, "eval_f1": 0.7306420915556447, "eval_loss": 0.27330681681632996, "eval_precision": 0.7286713286713287, "eval_recall": 0.7326235435918039, "eval_runtime": 4.5427, "eval_samples_per_second": 209.567, "eval_steps_per_second": 13.208, "step": 460 }, { "epoch": 5.0, "grad_norm": 4.006119251251221, "learning_rate": 1.8544283413848634e-05, "loss": 0.0566, "step": 575 }, { "epoch": 5.0, "eval_accuracy": 0.9305874630784378, "eval_f1": 0.746298320580344, "eval_loss": 0.2804930508136749, "eval_precision": 0.7384464110127827, "eval_recall": 0.75431900361591, "eval_runtime": 4.4864, "eval_samples_per_second": 212.196, "eval_steps_per_second": 13.374, "step": 575 }, { "epoch": 6.0, "grad_norm": 2.0166711807250977, "learning_rate": 1.7803542673107893e-05, "loss": 0.0404, "step": 690 }, { "epoch": 6.0, "eval_accuracy": 0.927797833935018, "eval_f1": 0.7355380362444968, "eval_loss": 0.3372795581817627, "eval_precision": 0.7500522029651284, "eval_recall": 0.7215749296906389, "eval_runtime": 4.5271, "eval_samples_per_second": 210.29, "eval_steps_per_second": 13.254, "step": 690 }, { "epoch": 7.0, "grad_norm": 2.738591194152832, "learning_rate": 1.706280193236715e-05, "loss": 0.0299, "step": 805 }, { "epoch": 7.0, "eval_accuracy": 0.9269304702517699, "eval_f1": 0.7306815827636214, "eval_loss": 0.3340073227882385, "eval_precision": 0.7542771599657827, "eval_recall": 0.7085174768983528, "eval_runtime": 4.5362, "eval_samples_per_second": 209.869, "eval_steps_per_second": 13.227, "step": 805 }, { "epoch": 8.0, "grad_norm": 1.060254454612732, "learning_rate": 1.632206119162641e-05, "loss": 0.0221, "step": 920 }, { "epoch": 8.0, "eval_accuracy": 0.9331426696047634, "eval_f1": 0.7580168991143236, "eval_loss": 0.31868091225624084, "eval_precision": 0.7684210526315789, "eval_recall": 0.747890719164323, "eval_runtime": 4.4814, "eval_samples_per_second": 212.432, "eval_steps_per_second": 13.389, "step": 920 }, { "epoch": 9.0, "grad_norm": 0.8265031576156616, "learning_rate": 1.5581320450885667e-05, "loss": 0.0172, "step": 1035 }, { "epoch": 9.0, "eval_accuracy": 0.9277275071498898, "eval_f1": 0.7327145687767235, "eval_loss": 0.39539921283721924, "eval_precision": 0.7618737800910865, "eval_recall": 0.7057051024507834, "eval_runtime": 4.5117, "eval_samples_per_second": 211.008, "eval_steps_per_second": 13.299, "step": 1035 }, { "epoch": 10.0, "grad_norm": 0.03647367283701897, "learning_rate": 1.484057971014493e-05, "loss": 0.0142, "step": 1150 }, { "epoch": 10.0, "eval_accuracy": 0.9295560035632238, "eval_f1": 0.7459492509935799, "eval_loss": 0.38732069730758667, "eval_precision": 0.7569803516028956, "eval_recall": 0.7352350341502611, "eval_runtime": 4.5775, "eval_samples_per_second": 207.974, "eval_steps_per_second": 13.108, "step": 1150 }, { "epoch": 11.0, "grad_norm": 2.521304130554199, "learning_rate": 1.4099838969404187e-05, "loss": 0.0116, "step": 1265 }, { "epoch": 11.0, "eval_accuracy": 0.9310328660509166, "eval_f1": 0.7478805006055712, "eval_loss": 0.3827316462993622, "eval_precision": 0.7515212981744422, "eval_recall": 0.7442748091603053, "eval_runtime": 4.4769, "eval_samples_per_second": 212.648, "eval_steps_per_second": 13.402, "step": 1265 }, { "epoch": 12.0, "grad_norm": 1.6821355819702148, "learning_rate": 1.3359098228663448e-05, "loss": 0.0101, "step": 1380 }, { "epoch": 12.0, "eval_accuracy": 0.9300951755825402, "eval_f1": 0.7481072232453447, "eval_loss": 0.403070330619812, "eval_precision": 0.762301918265221, "eval_recall": 0.7344314985938127, "eval_runtime": 4.4976, "eval_samples_per_second": 211.666, "eval_steps_per_second": 13.34, "step": 1380 }, { "epoch": 13.0, "grad_norm": 0.06722108274698257, "learning_rate": 1.2618357487922706e-05, "loss": 0.0074, "step": 1495 }, { "epoch": 13.0, "eval_accuracy": 0.9326269398471565, "eval_f1": 0.7498711738637535, "eval_loss": 0.3884039521217346, "eval_precision": 0.7699470899470899, "eval_recall": 0.7308155885897951, "eval_runtime": 4.5432, "eval_samples_per_second": 209.544, "eval_steps_per_second": 13.207, "step": 1495 }, { "epoch": 14.0, "grad_norm": 0.2433609515428543, "learning_rate": 1.1877616747181966e-05, "loss": 0.0061, "step": 1610 }, { "epoch": 14.0, "eval_accuracy": 0.9247972244362136, "eval_f1": 0.7140626626418237, "eval_loss": 0.4754845201969147, "eval_precision": 0.7409807733851804, "eval_recall": 0.6890317396544797, "eval_runtime": 4.5526, "eval_samples_per_second": 209.113, "eval_steps_per_second": 13.179, "step": 1610 }, { "epoch": 15.0, "grad_norm": 0.013076603412628174, "learning_rate": 1.1136876006441224e-05, "loss": 0.0056, "step": 1725 }, { "epoch": 15.0, "eval_accuracy": 0.9288761779736509, "eval_f1": 0.7404103479036573, "eval_loss": 0.4164673686027527, "eval_precision": 0.7307767560164351, "eval_recall": 0.7503013258336682, "eval_runtime": 4.5295, "eval_samples_per_second": 210.177, "eval_steps_per_second": 13.246, "step": 1725 }, { "epoch": 16.0, "grad_norm": 0.03739052265882492, "learning_rate": 1.0396135265700484e-05, "loss": 0.0052, "step": 1840 }, { "epoch": 16.0, "eval_accuracy": 0.9296966571334803, "eval_f1": 0.744733952281122, "eval_loss": 0.43249383568763733, "eval_precision": 0.7402262353641595, "eval_recall": 0.7492969063881076, "eval_runtime": 4.5747, "eval_samples_per_second": 208.102, "eval_steps_per_second": 13.116, "step": 1840 }, { "epoch": 17.0, "grad_norm": 0.07633493095636368, "learning_rate": 9.655394524959744e-06, "loss": 0.0059, "step": 1955 }, { "epoch": 17.0, "eval_accuracy": 0.9313141731914295, "eval_f1": 0.7502017756255044, "eval_loss": 0.4149955213069916, "eval_precision": 0.7535468179975678, "eval_recall": 0.7468862997187625, "eval_runtime": 4.5473, "eval_samples_per_second": 209.353, "eval_steps_per_second": 13.195, "step": 1955 }, { "epoch": 18.0, "grad_norm": 0.006521929986774921, "learning_rate": 8.914653784219003e-06, "loss": 0.0034, "step": 2070 }, { "epoch": 18.0, "eval_accuracy": 0.9325097285386094, "eval_f1": 0.750355474304286, "eval_loss": 0.4321502447128296, "eval_precision": 0.7588331963845522, "eval_recall": 0.7420650863800723, "eval_runtime": 4.5005, "eval_samples_per_second": 211.53, "eval_steps_per_second": 13.332, "step": 2070 }, { "epoch": 19.0, "grad_norm": 0.045015785843133926, "learning_rate": 8.173913043478263e-06, "loss": 0.0037, "step": 2185 }, { "epoch": 19.0, "eval_accuracy": 0.9316892493787801, "eval_f1": 0.7498972461981094, "eval_loss": 0.4423995316028595, "eval_precision": 0.7675641564997896, "eval_recall": 0.7330253113700281, "eval_runtime": 4.5385, "eval_samples_per_second": 209.761, "eval_steps_per_second": 13.22, "step": 2185 }, { "epoch": 20.0, "grad_norm": 0.007699380628764629, "learning_rate": 7.43317230273752e-06, "loss": 0.0034, "step": 2300 }, { "epoch": 20.0, "eval_accuracy": 0.9285011017863003, "eval_f1": 0.7358452138492871, "eval_loss": 0.4641232490539551, "eval_precision": 0.7461792647666253, "eval_recall": 0.7257934913619928, "eval_runtime": 4.5632, "eval_samples_per_second": 208.626, "eval_steps_per_second": 13.149, "step": 2300 }, { "epoch": 21.0, "grad_norm": 0.019550923258066177, "learning_rate": 6.692431561996779e-06, "loss": 0.0028, "step": 2415 }, { "epoch": 21.0, "eval_accuracy": 0.9311031928360448, "eval_f1": 0.7502527805864508, "eval_loss": 0.4524008631706238, "eval_precision": 0.7552931596091205, "eval_recall": 0.7452792286058658, "eval_runtime": 4.5032, "eval_samples_per_second": 211.407, "eval_steps_per_second": 13.324, "step": 2415 }, { "epoch": 22.0, "grad_norm": 0.006682181265205145, "learning_rate": 5.951690821256038e-06, "loss": 0.0023, "step": 2530 }, { "epoch": 22.0, "eval_accuracy": 0.9294153499929674, "eval_f1": 0.745173745173745, "eval_loss": 0.46753570437431335, "eval_precision": 0.75390625, "eval_recall": 0.7366412213740458, "eval_runtime": 4.4674, "eval_samples_per_second": 213.099, "eval_steps_per_second": 13.431, "step": 2530 }, { "epoch": 23.0, "grad_norm": 0.005257639102637768, "learning_rate": 5.210950080515298e-06, "loss": 0.0022, "step": 2645 }, { "epoch": 23.0, "eval_accuracy": 0.9308453279572413, "eval_f1": 0.7522655534059668, "eval_loss": 0.4650319218635559, "eval_precision": 0.7627503613462729, "eval_recall": 0.7420650863800723, "eval_runtime": 4.5526, "eval_samples_per_second": 209.113, "eval_steps_per_second": 13.179, "step": 2645 }, { "epoch": 24.0, "grad_norm": 0.04593547806143761, "learning_rate": 4.4702093397745575e-06, "loss": 0.0021, "step": 2760 }, { "epoch": 24.0, "eval_accuracy": 0.9316658071170707, "eval_f1": 0.7469412724306688, "eval_loss": 0.47501814365386963, "eval_precision": 0.7583850931677019, "eval_recall": 0.7358376858175975, "eval_runtime": 26.4469, "eval_samples_per_second": 35.997, "eval_steps_per_second": 2.269, "step": 2760 }, { "epoch": 25.0, "grad_norm": 0.1780153512954712, "learning_rate": 3.729468599033817e-06, "loss": 0.0022, "step": 2875 }, { "epoch": 25.0, "eval_accuracy": 0.9307750011721131, "eval_f1": 0.7475728155339806, "eval_loss": 0.4700013995170593, "eval_precision": 0.7527494908350305, "eval_recall": 0.7424668541582965, "eval_runtime": 4.5324, "eval_samples_per_second": 210.043, "eval_steps_per_second": 13.238, "step": 2875 }, { "epoch": 26.0, "grad_norm": 0.2522285580635071, "learning_rate": 2.988727858293076e-06, "loss": 0.0015, "step": 2990 }, { "epoch": 26.0, "eval_accuracy": 0.9324628440151906, "eval_f1": 0.7531491263713938, "eval_loss": 0.47662001848220825, "eval_precision": 0.761816687217427, "eval_recall": 0.7446765769385295, "eval_runtime": 4.5408, "eval_samples_per_second": 209.653, "eval_steps_per_second": 13.213, "step": 2990 }, { "epoch": 27.0, "grad_norm": 0.290810763835907, "learning_rate": 2.247987117552335e-06, "loss": 0.0018, "step": 3105 }, { "epoch": 27.0, "eval_accuracy": 0.9317361339021989, "eval_f1": 0.7472753444375899, "eval_loss": 0.48660585284233093, "eval_precision": 0.7653748946925021, "eval_recall": 0.7300120530333467, "eval_runtime": 4.5274, "eval_samples_per_second": 210.274, "eval_steps_per_second": 13.253, "step": 3105 }, { "epoch": 28.0, "grad_norm": 1.0475250482559204, "learning_rate": 1.5072463768115944e-06, "loss": 0.0016, "step": 3220 }, { "epoch": 28.0, "eval_accuracy": 0.9320408833044213, "eval_f1": 0.752308472856418, "eval_loss": 0.4792047142982483, "eval_precision": 0.7600984211605495, "eval_recall": 0.7446765769385295, "eval_runtime": 4.5334, "eval_samples_per_second": 209.997, "eval_steps_per_second": 13.235, "step": 3220 }, { "epoch": 29.0, "grad_norm": 0.029815517365932465, "learning_rate": 7.665056360708536e-07, "loss": 0.0014, "step": 3335 }, { "epoch": 29.0, "eval_accuracy": 0.9320174410427118, "eval_f1": 0.7504327461561959, "eval_loss": 0.4807119071483612, "eval_precision": 0.7608920090852778, "eval_recall": 0.7402571313780635, "eval_runtime": 4.4183, "eval_samples_per_second": 215.467, "eval_steps_per_second": 13.58, "step": 3335 }, { "epoch": 30.0, "grad_norm": 0.06022266671061516, "learning_rate": 2.5764895330112724e-08, "loss": 0.0014, "step": 3450 }, { "epoch": 30.0, "eval_accuracy": 0.9322049791363871, "eval_f1": 0.7508629441624366, "eval_loss": 0.4801517724990845, "eval_precision": 0.7590311986863711, "eval_recall": 0.7428686219365207, "eval_runtime": 4.5052, "eval_samples_per_second": 211.31, "eval_steps_per_second": 13.318, "step": 3450 }, { "epoch": 30.0, "step": 3450, "total_flos": 1629028238600664.0, "train_loss": 0.07025153513403906, "train_runtime": 736.8598, "train_samples_per_second": 74.79, "train_steps_per_second": 4.682 } ], "logging_steps": 500, "max_steps": 3450, "num_input_tokens_seen": 0, "num_train_epochs": 30, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1629028238600664.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }