| { | |
| "best_metric": 0.7580168991143236, | |
| "best_model_checkpoint": "/nfs/production/literature/amina-mardiyyah/new_data/OT-Entity-Extraction-Pipeline/model_outputs/hf/CeLLaTe-AL-Test_1.0/base/cellate_tapt_freeze_llrd_ww_mask_LR_2e/checkpoint-920", | |
| "epoch": 30.0, | |
| "eval_steps": 500, | |
| "global_step": 3450, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 2.0460078716278076, | |
| "learning_rate": 6.434782608695652e-06, | |
| "loss": 1.3009, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.8241595949177176, | |
| "eval_f1": 0.16903515332834707, | |
| "eval_loss": 0.6194407939910889, | |
| "eval_precision": 0.3309900410076157, | |
| "eval_recall": 0.11349939734833267, | |
| "eval_runtime": 4.4674, | |
| "eval_samples_per_second": 213.098, | |
| "eval_steps_per_second": 13.431, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 2.8860368728637695, | |
| "learning_rate": 1.3101449275362319e-05, | |
| "loss": 0.3335, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.9082938721927891, | |
| "eval_f1": 0.6236261015942172, | |
| "eval_loss": 0.29858899116516113, | |
| "eval_precision": 0.6149189611404022, | |
| "eval_recall": 0.6325833668139815, | |
| "eval_runtime": 4.5486, | |
| "eval_samples_per_second": 209.295, | |
| "eval_steps_per_second": 13.191, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 2.3074285984039307, | |
| "learning_rate": 1.9768115942028986e-05, | |
| "loss": 0.1312, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.9199915607857846, | |
| "eval_f1": 0.6880798004987531, | |
| "eval_loss": 0.26223188638687134, | |
| "eval_precision": 0.6833762631266098, | |
| "eval_recall": 0.6928485335476094, | |
| "eval_runtime": 4.4824, | |
| "eval_samples_per_second": 212.386, | |
| "eval_steps_per_second": 13.386, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 2.443023204803467, | |
| "learning_rate": 1.9285024154589372e-05, | |
| "loss": 0.0797, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.9264850672792911, | |
| "eval_f1": 0.7306420915556447, | |
| "eval_loss": 0.27330681681632996, | |
| "eval_precision": 0.7286713286713287, | |
| "eval_recall": 0.7326235435918039, | |
| "eval_runtime": 4.5427, | |
| "eval_samples_per_second": 209.567, | |
| "eval_steps_per_second": 13.208, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 4.006119251251221, | |
| "learning_rate": 1.8544283413848634e-05, | |
| "loss": 0.0566, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.9305874630784378, | |
| "eval_f1": 0.746298320580344, | |
| "eval_loss": 0.2804930508136749, | |
| "eval_precision": 0.7384464110127827, | |
| "eval_recall": 0.75431900361591, | |
| "eval_runtime": 4.4864, | |
| "eval_samples_per_second": 212.196, | |
| "eval_steps_per_second": 13.374, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "grad_norm": 2.0166711807250977, | |
| "learning_rate": 1.7803542673107893e-05, | |
| "loss": 0.0404, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.927797833935018, | |
| "eval_f1": 0.7355380362444968, | |
| "eval_loss": 0.3372795581817627, | |
| "eval_precision": 0.7500522029651284, | |
| "eval_recall": 0.7215749296906389, | |
| "eval_runtime": 4.5271, | |
| "eval_samples_per_second": 210.29, | |
| "eval_steps_per_second": 13.254, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "grad_norm": 2.738591194152832, | |
| "learning_rate": 1.706280193236715e-05, | |
| "loss": 0.0299, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.9269304702517699, | |
| "eval_f1": 0.7306815827636214, | |
| "eval_loss": 0.3340073227882385, | |
| "eval_precision": 0.7542771599657827, | |
| "eval_recall": 0.7085174768983528, | |
| "eval_runtime": 4.5362, | |
| "eval_samples_per_second": 209.869, | |
| "eval_steps_per_second": 13.227, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "grad_norm": 1.060254454612732, | |
| "learning_rate": 1.632206119162641e-05, | |
| "loss": 0.0221, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.9331426696047634, | |
| "eval_f1": 0.7580168991143236, | |
| "eval_loss": 0.31868091225624084, | |
| "eval_precision": 0.7684210526315789, | |
| "eval_recall": 0.747890719164323, | |
| "eval_runtime": 4.4814, | |
| "eval_samples_per_second": 212.432, | |
| "eval_steps_per_second": 13.389, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "grad_norm": 0.8265031576156616, | |
| "learning_rate": 1.5581320450885667e-05, | |
| "loss": 0.0172, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_accuracy": 0.9277275071498898, | |
| "eval_f1": 0.7327145687767235, | |
| "eval_loss": 0.39539921283721924, | |
| "eval_precision": 0.7618737800910865, | |
| "eval_recall": 0.7057051024507834, | |
| "eval_runtime": 4.5117, | |
| "eval_samples_per_second": 211.008, | |
| "eval_steps_per_second": 13.299, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "grad_norm": 0.03647367283701897, | |
| "learning_rate": 1.484057971014493e-05, | |
| "loss": 0.0142, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accuracy": 0.9295560035632238, | |
| "eval_f1": 0.7459492509935799, | |
| "eval_loss": 0.38732069730758667, | |
| "eval_precision": 0.7569803516028956, | |
| "eval_recall": 0.7352350341502611, | |
| "eval_runtime": 4.5775, | |
| "eval_samples_per_second": 207.974, | |
| "eval_steps_per_second": 13.108, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "grad_norm": 2.521304130554199, | |
| "learning_rate": 1.4099838969404187e-05, | |
| "loss": 0.0116, | |
| "step": 1265 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_accuracy": 0.9310328660509166, | |
| "eval_f1": 0.7478805006055712, | |
| "eval_loss": 0.3827316462993622, | |
| "eval_precision": 0.7515212981744422, | |
| "eval_recall": 0.7442748091603053, | |
| "eval_runtime": 4.4769, | |
| "eval_samples_per_second": 212.648, | |
| "eval_steps_per_second": 13.402, | |
| "step": 1265 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "grad_norm": 1.6821355819702148, | |
| "learning_rate": 1.3359098228663448e-05, | |
| "loss": 0.0101, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_accuracy": 0.9300951755825402, | |
| "eval_f1": 0.7481072232453447, | |
| "eval_loss": 0.403070330619812, | |
| "eval_precision": 0.762301918265221, | |
| "eval_recall": 0.7344314985938127, | |
| "eval_runtime": 4.4976, | |
| "eval_samples_per_second": 211.666, | |
| "eval_steps_per_second": 13.34, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "grad_norm": 0.06722108274698257, | |
| "learning_rate": 1.2618357487922706e-05, | |
| "loss": 0.0074, | |
| "step": 1495 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_accuracy": 0.9326269398471565, | |
| "eval_f1": 0.7498711738637535, | |
| "eval_loss": 0.3884039521217346, | |
| "eval_precision": 0.7699470899470899, | |
| "eval_recall": 0.7308155885897951, | |
| "eval_runtime": 4.5432, | |
| "eval_samples_per_second": 209.544, | |
| "eval_steps_per_second": 13.207, | |
| "step": 1495 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "grad_norm": 0.2433609515428543, | |
| "learning_rate": 1.1877616747181966e-05, | |
| "loss": 0.0061, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_accuracy": 0.9247972244362136, | |
| "eval_f1": 0.7140626626418237, | |
| "eval_loss": 0.4754845201969147, | |
| "eval_precision": 0.7409807733851804, | |
| "eval_recall": 0.6890317396544797, | |
| "eval_runtime": 4.5526, | |
| "eval_samples_per_second": 209.113, | |
| "eval_steps_per_second": 13.179, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "grad_norm": 0.013076603412628174, | |
| "learning_rate": 1.1136876006441224e-05, | |
| "loss": 0.0056, | |
| "step": 1725 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_accuracy": 0.9288761779736509, | |
| "eval_f1": 0.7404103479036573, | |
| "eval_loss": 0.4164673686027527, | |
| "eval_precision": 0.7307767560164351, | |
| "eval_recall": 0.7503013258336682, | |
| "eval_runtime": 4.5295, | |
| "eval_samples_per_second": 210.177, | |
| "eval_steps_per_second": 13.246, | |
| "step": 1725 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "grad_norm": 0.03739052265882492, | |
| "learning_rate": 1.0396135265700484e-05, | |
| "loss": 0.0052, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_accuracy": 0.9296966571334803, | |
| "eval_f1": 0.744733952281122, | |
| "eval_loss": 0.43249383568763733, | |
| "eval_precision": 0.7402262353641595, | |
| "eval_recall": 0.7492969063881076, | |
| "eval_runtime": 4.5747, | |
| "eval_samples_per_second": 208.102, | |
| "eval_steps_per_second": 13.116, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "grad_norm": 0.07633493095636368, | |
| "learning_rate": 9.655394524959744e-06, | |
| "loss": 0.0059, | |
| "step": 1955 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_accuracy": 0.9313141731914295, | |
| "eval_f1": 0.7502017756255044, | |
| "eval_loss": 0.4149955213069916, | |
| "eval_precision": 0.7535468179975678, | |
| "eval_recall": 0.7468862997187625, | |
| "eval_runtime": 4.5473, | |
| "eval_samples_per_second": 209.353, | |
| "eval_steps_per_second": 13.195, | |
| "step": 1955 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "grad_norm": 0.006521929986774921, | |
| "learning_rate": 8.914653784219003e-06, | |
| "loss": 0.0034, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_accuracy": 0.9325097285386094, | |
| "eval_f1": 0.750355474304286, | |
| "eval_loss": 0.4321502447128296, | |
| "eval_precision": 0.7588331963845522, | |
| "eval_recall": 0.7420650863800723, | |
| "eval_runtime": 4.5005, | |
| "eval_samples_per_second": 211.53, | |
| "eval_steps_per_second": 13.332, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "grad_norm": 0.045015785843133926, | |
| "learning_rate": 8.173913043478263e-06, | |
| "loss": 0.0037, | |
| "step": 2185 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_accuracy": 0.9316892493787801, | |
| "eval_f1": 0.7498972461981094, | |
| "eval_loss": 0.4423995316028595, | |
| "eval_precision": 0.7675641564997896, | |
| "eval_recall": 0.7330253113700281, | |
| "eval_runtime": 4.5385, | |
| "eval_samples_per_second": 209.761, | |
| "eval_steps_per_second": 13.22, | |
| "step": 2185 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "grad_norm": 0.007699380628764629, | |
| "learning_rate": 7.43317230273752e-06, | |
| "loss": 0.0034, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_accuracy": 0.9285011017863003, | |
| "eval_f1": 0.7358452138492871, | |
| "eval_loss": 0.4641232490539551, | |
| "eval_precision": 0.7461792647666253, | |
| "eval_recall": 0.7257934913619928, | |
| "eval_runtime": 4.5632, | |
| "eval_samples_per_second": 208.626, | |
| "eval_steps_per_second": 13.149, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "grad_norm": 0.019550923258066177, | |
| "learning_rate": 6.692431561996779e-06, | |
| "loss": 0.0028, | |
| "step": 2415 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "eval_accuracy": 0.9311031928360448, | |
| "eval_f1": 0.7502527805864508, | |
| "eval_loss": 0.4524008631706238, | |
| "eval_precision": 0.7552931596091205, | |
| "eval_recall": 0.7452792286058658, | |
| "eval_runtime": 4.5032, | |
| "eval_samples_per_second": 211.407, | |
| "eval_steps_per_second": 13.324, | |
| "step": 2415 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "grad_norm": 0.006682181265205145, | |
| "learning_rate": 5.951690821256038e-06, | |
| "loss": 0.0023, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "eval_accuracy": 0.9294153499929674, | |
| "eval_f1": 0.745173745173745, | |
| "eval_loss": 0.46753570437431335, | |
| "eval_precision": 0.75390625, | |
| "eval_recall": 0.7366412213740458, | |
| "eval_runtime": 4.4674, | |
| "eval_samples_per_second": 213.099, | |
| "eval_steps_per_second": 13.431, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "grad_norm": 0.005257639102637768, | |
| "learning_rate": 5.210950080515298e-06, | |
| "loss": 0.0022, | |
| "step": 2645 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "eval_accuracy": 0.9308453279572413, | |
| "eval_f1": 0.7522655534059668, | |
| "eval_loss": 0.4650319218635559, | |
| "eval_precision": 0.7627503613462729, | |
| "eval_recall": 0.7420650863800723, | |
| "eval_runtime": 4.5526, | |
| "eval_samples_per_second": 209.113, | |
| "eval_steps_per_second": 13.179, | |
| "step": 2645 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "grad_norm": 0.04593547806143761, | |
| "learning_rate": 4.4702093397745575e-06, | |
| "loss": 0.0021, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_accuracy": 0.9316658071170707, | |
| "eval_f1": 0.7469412724306688, | |
| "eval_loss": 0.47501814365386963, | |
| "eval_precision": 0.7583850931677019, | |
| "eval_recall": 0.7358376858175975, | |
| "eval_runtime": 26.4469, | |
| "eval_samples_per_second": 35.997, | |
| "eval_steps_per_second": 2.269, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "grad_norm": 0.1780153512954712, | |
| "learning_rate": 3.729468599033817e-06, | |
| "loss": 0.0022, | |
| "step": 2875 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "eval_accuracy": 0.9307750011721131, | |
| "eval_f1": 0.7475728155339806, | |
| "eval_loss": 0.4700013995170593, | |
| "eval_precision": 0.7527494908350305, | |
| "eval_recall": 0.7424668541582965, | |
| "eval_runtime": 4.5324, | |
| "eval_samples_per_second": 210.043, | |
| "eval_steps_per_second": 13.238, | |
| "step": 2875 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "grad_norm": 0.2522285580635071, | |
| "learning_rate": 2.988727858293076e-06, | |
| "loss": 0.0015, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "eval_accuracy": 0.9324628440151906, | |
| "eval_f1": 0.7531491263713938, | |
| "eval_loss": 0.47662001848220825, | |
| "eval_precision": 0.761816687217427, | |
| "eval_recall": 0.7446765769385295, | |
| "eval_runtime": 4.5408, | |
| "eval_samples_per_second": 209.653, | |
| "eval_steps_per_second": 13.213, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 27.0, | |
| "grad_norm": 0.290810763835907, | |
| "learning_rate": 2.247987117552335e-06, | |
| "loss": 0.0018, | |
| "step": 3105 | |
| }, | |
| { | |
| "epoch": 27.0, | |
| "eval_accuracy": 0.9317361339021989, | |
| "eval_f1": 0.7472753444375899, | |
| "eval_loss": 0.48660585284233093, | |
| "eval_precision": 0.7653748946925021, | |
| "eval_recall": 0.7300120530333467, | |
| "eval_runtime": 4.5274, | |
| "eval_samples_per_second": 210.274, | |
| "eval_steps_per_second": 13.253, | |
| "step": 3105 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "grad_norm": 1.0475250482559204, | |
| "learning_rate": 1.5072463768115944e-06, | |
| "loss": 0.0016, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "eval_accuracy": 0.9320408833044213, | |
| "eval_f1": 0.752308472856418, | |
| "eval_loss": 0.4792047142982483, | |
| "eval_precision": 0.7600984211605495, | |
| "eval_recall": 0.7446765769385295, | |
| "eval_runtime": 4.5334, | |
| "eval_samples_per_second": 209.997, | |
| "eval_steps_per_second": 13.235, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 29.0, | |
| "grad_norm": 0.029815517365932465, | |
| "learning_rate": 7.665056360708536e-07, | |
| "loss": 0.0014, | |
| "step": 3335 | |
| }, | |
| { | |
| "epoch": 29.0, | |
| "eval_accuracy": 0.9320174410427118, | |
| "eval_f1": 0.7504327461561959, | |
| "eval_loss": 0.4807119071483612, | |
| "eval_precision": 0.7608920090852778, | |
| "eval_recall": 0.7402571313780635, | |
| "eval_runtime": 4.4183, | |
| "eval_samples_per_second": 215.467, | |
| "eval_steps_per_second": 13.58, | |
| "step": 3335 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "grad_norm": 0.06022266671061516, | |
| "learning_rate": 2.5764895330112724e-08, | |
| "loss": 0.0014, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "eval_accuracy": 0.9322049791363871, | |
| "eval_f1": 0.7508629441624366, | |
| "eval_loss": 0.4801517724990845, | |
| "eval_precision": 0.7590311986863711, | |
| "eval_recall": 0.7428686219365207, | |
| "eval_runtime": 4.5052, | |
| "eval_samples_per_second": 211.31, | |
| "eval_steps_per_second": 13.318, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "step": 3450, | |
| "total_flos": 1629028238600664.0, | |
| "train_loss": 0.07025153513403906, | |
| "train_runtime": 736.8598, | |
| "train_samples_per_second": 74.79, | |
| "train_steps_per_second": 4.682 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 3450, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 30, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1629028238600664.0, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |