| { |
| "best_global_step": 1430, |
| "best_metric": 0.6789528523052879, |
| "best_model_checkpoint": "energy_intelligence_multitask_custom_ner/checkpoint-1430", |
| "epoch": 5.0, |
| "eval_steps": 500, |
| "global_step": 1430, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.06993006993006994, |
| "grad_norm": 14.66324520111084, |
| "learning_rate": 2.6573426573426574e-06, |
| "loss": 2.297128105163574, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.13986013986013987, |
| "grad_norm": 8.70977783203125, |
| "learning_rate": 5.4545454545454545e-06, |
| "loss": 1.5782511711120606, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.2097902097902098, |
| "grad_norm": 0.9631044268608093, |
| "learning_rate": 8.251748251748254e-06, |
| "loss": 0.7814332008361816, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.27972027972027974, |
| "grad_norm": 0.513141393661499, |
| "learning_rate": 1.1048951048951048e-05, |
| "loss": 0.4949788570404053, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.34965034965034963, |
| "grad_norm": 0.3357048034667969, |
| "learning_rate": 1.3846153846153847e-05, |
| "loss": 0.4088569164276123, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.4195804195804196, |
| "grad_norm": 0.44831985235214233, |
| "learning_rate": 1.6643356643356645e-05, |
| "loss": 0.3331140518188477, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.48951048951048953, |
| "grad_norm": 0.38962382078170776, |
| "learning_rate": 1.944055944055944e-05, |
| "loss": 0.28518569469451904, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.5594405594405595, |
| "grad_norm": 0.550012469291687, |
| "learning_rate": 1.9751359751359752e-05, |
| "loss": 0.255126428604126, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.6293706293706294, |
| "grad_norm": 0.4113660752773285, |
| "learning_rate": 1.944055944055944e-05, |
| "loss": 0.23595545291900635, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.6993006993006993, |
| "grad_norm": 0.5528178811073303, |
| "learning_rate": 1.912975912975913e-05, |
| "loss": 0.23385438919067383, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.7692307692307693, |
| "grad_norm": 0.5938523411750793, |
| "learning_rate": 1.8818958818958822e-05, |
| "loss": 0.217702054977417, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.8391608391608392, |
| "grad_norm": 0.5627780556678772, |
| "learning_rate": 1.850815850815851e-05, |
| "loss": 0.20575783252716065, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.9090909090909091, |
| "grad_norm": 0.6800552606582642, |
| "learning_rate": 1.81973581973582e-05, |
| "loss": 0.20057928562164307, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.9790209790209791, |
| "grad_norm": 0.5200193524360657, |
| "learning_rate": 1.7886557886557888e-05, |
| "loss": 0.1952407717704773, |
| "step": 280 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_COMMODITY_f1": 0.4243, |
| "eval_COMPANY_f1": 0.6975, |
| "eval_COUNTRY_f1": 0.8103, |
| "eval_EVENT_f1": 0.0509, |
| "eval_INFRASTRUCTURE_f1": 0.0265, |
| "eval_LOCATION_f1": 0.5881, |
| "eval_MARKET_f1": 0.3048, |
| "eval_ORGANIZATION_f1": 0.5435, |
| "eval_PERSON_f1": 0.6313, |
| "eval_accuracy": 0.9383044022343244, |
| "eval_f1": 0.5853641863485661, |
| "eval_loss": 0.18643628060817719, |
| "eval_precision": 0.5841462582713691, |
| "eval_recall": 0.5865872037265378, |
| "eval_runtime": 4.3741, |
| "eval_samples_per_second": 261.541, |
| "eval_steps_per_second": 4.115, |
| "step": 286 |
| }, |
| { |
| "epoch": 1.048951048951049, |
| "grad_norm": 0.45493006706237793, |
| "learning_rate": 1.7575757575757576e-05, |
| "loss": 0.18901759386062622, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.118881118881119, |
| "grad_norm": 0.4867892861366272, |
| "learning_rate": 1.7264957264957267e-05, |
| "loss": 0.18016949892044068, |
| "step": 320 |
| }, |
| { |
| "epoch": 1.1888111888111887, |
| "grad_norm": 0.5517728328704834, |
| "learning_rate": 1.6954156954156954e-05, |
| "loss": 0.18962303400039673, |
| "step": 340 |
| }, |
| { |
| "epoch": 1.2587412587412588, |
| "grad_norm": 0.5174335241317749, |
| "learning_rate": 1.6643356643356645e-05, |
| "loss": 0.18070975542068482, |
| "step": 360 |
| }, |
| { |
| "epoch": 1.3286713286713288, |
| "grad_norm": 0.5536178350448608, |
| "learning_rate": 1.6332556332556333e-05, |
| "loss": 0.17582471370697023, |
| "step": 380 |
| }, |
| { |
| "epoch": 1.3986013986013985, |
| "grad_norm": 0.49411725997924805, |
| "learning_rate": 1.6021756021756024e-05, |
| "loss": 0.17660335302352906, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.4685314685314685, |
| "grad_norm": 0.37996432185173035, |
| "learning_rate": 1.5710955710955715e-05, |
| "loss": 0.1685216546058655, |
| "step": 420 |
| }, |
| { |
| "epoch": 1.5384615384615383, |
| "grad_norm": 0.5632461905479431, |
| "learning_rate": 1.5400155400155402e-05, |
| "loss": 0.1708904027938843, |
| "step": 440 |
| }, |
| { |
| "epoch": 1.6083916083916083, |
| "grad_norm": 0.4930890202522278, |
| "learning_rate": 1.5089355089355091e-05, |
| "loss": 0.16708383560180665, |
| "step": 460 |
| }, |
| { |
| "epoch": 1.6783216783216783, |
| "grad_norm": 0.5667280554771423, |
| "learning_rate": 1.4778554778554779e-05, |
| "loss": 0.16816866397857666, |
| "step": 480 |
| }, |
| { |
| "epoch": 1.7482517482517483, |
| "grad_norm": 0.5394991040229797, |
| "learning_rate": 1.4467754467754468e-05, |
| "loss": 0.15965052843093872, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.8181818181818183, |
| "grad_norm": 0.4395284354686737, |
| "learning_rate": 1.415695415695416e-05, |
| "loss": 0.16719096899032593, |
| "step": 520 |
| }, |
| { |
| "epoch": 1.8881118881118881, |
| "grad_norm": 0.44819605350494385, |
| "learning_rate": 1.3846153846153847e-05, |
| "loss": 0.16433268785476685, |
| "step": 540 |
| }, |
| { |
| "epoch": 1.958041958041958, |
| "grad_norm": 0.630233645439148, |
| "learning_rate": 1.3535353535353538e-05, |
| "loss": 0.1701101541519165, |
| "step": 560 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_COMMODITY_f1": 0.5098, |
| "eval_COMPANY_f1": 0.7415, |
| "eval_COUNTRY_f1": 0.8307, |
| "eval_EVENT_f1": 0.2393, |
| "eval_INFRASTRUCTURE_f1": 0.2385, |
| "eval_LOCATION_f1": 0.6418, |
| "eval_MARKET_f1": 0.3517, |
| "eval_ORGANIZATION_f1": 0.6229, |
| "eval_PERSON_f1": 0.6953, |
| "eval_accuracy": 0.9454946483875253, |
| "eval_f1": 0.6355833913245186, |
| "eval_loss": 0.15933312475681305, |
| "eval_precision": 0.6155722446883626, |
| "eval_recall": 0.6569393067543499, |
| "eval_runtime": 4.404, |
| "eval_samples_per_second": 259.765, |
| "eval_steps_per_second": 4.087, |
| "step": 572 |
| }, |
| { |
| "epoch": 2.027972027972028, |
| "grad_norm": 0.6636055111885071, |
| "learning_rate": 1.3224553224553225e-05, |
| "loss": 0.16211626529693604, |
| "step": 580 |
| }, |
| { |
| "epoch": 2.097902097902098, |
| "grad_norm": 0.5422558188438416, |
| "learning_rate": 1.2913752913752915e-05, |
| "loss": 0.1504289388656616, |
| "step": 600 |
| }, |
| { |
| "epoch": 2.167832167832168, |
| "grad_norm": 0.5456656813621521, |
| "learning_rate": 1.2602952602952606e-05, |
| "loss": 0.15346094369888305, |
| "step": 620 |
| }, |
| { |
| "epoch": 2.237762237762238, |
| "grad_norm": 0.5587140321731567, |
| "learning_rate": 1.2292152292152293e-05, |
| "loss": 0.15093343257904052, |
| "step": 640 |
| }, |
| { |
| "epoch": 2.3076923076923075, |
| "grad_norm": 0.4914584457874298, |
| "learning_rate": 1.1981351981351982e-05, |
| "loss": 0.14887770414352416, |
| "step": 660 |
| }, |
| { |
| "epoch": 2.3776223776223775, |
| "grad_norm": 0.42390987277030945, |
| "learning_rate": 1.1670551670551672e-05, |
| "loss": 0.15100462436676027, |
| "step": 680 |
| }, |
| { |
| "epoch": 2.4475524475524475, |
| "grad_norm": 0.42798611521720886, |
| "learning_rate": 1.1359751359751361e-05, |
| "loss": 0.14843168258666992, |
| "step": 700 |
| }, |
| { |
| "epoch": 2.5174825174825175, |
| "grad_norm": 0.559911847114563, |
| "learning_rate": 1.1048951048951048e-05, |
| "loss": 0.14720556735992432, |
| "step": 720 |
| }, |
| { |
| "epoch": 2.5874125874125875, |
| "grad_norm": 0.5178841948509216, |
| "learning_rate": 1.073815073815074e-05, |
| "loss": 0.14619035720825196, |
| "step": 740 |
| }, |
| { |
| "epoch": 2.6573426573426575, |
| "grad_norm": 0.6083295941352844, |
| "learning_rate": 1.0427350427350429e-05, |
| "loss": 0.14750727415084838, |
| "step": 760 |
| }, |
| { |
| "epoch": 2.7272727272727275, |
| "grad_norm": 0.46528416872024536, |
| "learning_rate": 1.0116550116550116e-05, |
| "loss": 0.14334226846694947, |
| "step": 780 |
| }, |
| { |
| "epoch": 2.797202797202797, |
| "grad_norm": 0.48938772082328796, |
| "learning_rate": 9.805749805749807e-06, |
| "loss": 0.14555764198303223, |
| "step": 800 |
| }, |
| { |
| "epoch": 2.867132867132867, |
| "grad_norm": 0.4275546371936798, |
| "learning_rate": 9.494949494949497e-06, |
| "loss": 0.14947701692581178, |
| "step": 820 |
| }, |
| { |
| "epoch": 2.937062937062937, |
| "grad_norm": 0.5199507474899292, |
| "learning_rate": 9.184149184149184e-06, |
| "loss": 0.14035249948501588, |
| "step": 840 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_COMMODITY_f1": 0.4887, |
| "eval_COMPANY_f1": 0.7755, |
| "eval_COUNTRY_f1": 0.8455, |
| "eval_EVENT_f1": 0.3252, |
| "eval_INFRASTRUCTURE_f1": 0.3, |
| "eval_LOCATION_f1": 0.6741, |
| "eval_MARKET_f1": 0.4189, |
| "eval_ORGANIZATION_f1": 0.6693, |
| "eval_PERSON_f1": 0.7555, |
| "eval_accuracy": 0.9481756978768325, |
| "eval_f1": 0.6681568653232964, |
| "eval_loss": 0.15031008422374725, |
| "eval_precision": 0.6422928648170385, |
| "eval_recall": 0.6961912590765859, |
| "eval_runtime": 4.4312, |
| "eval_samples_per_second": 258.167, |
| "eval_steps_per_second": 4.062, |
| "step": 858 |
| }, |
| { |
| "epoch": 3.006993006993007, |
| "grad_norm": 0.5143063068389893, |
| "learning_rate": 8.873348873348873e-06, |
| "loss": 0.13901138305664062, |
| "step": 860 |
| }, |
| { |
| "epoch": 3.076923076923077, |
| "grad_norm": 0.46927395462989807, |
| "learning_rate": 8.562548562548563e-06, |
| "loss": 0.13868144750595093, |
| "step": 880 |
| }, |
| { |
| "epoch": 3.1468531468531467, |
| "grad_norm": 0.4429858922958374, |
| "learning_rate": 8.251748251748254e-06, |
| "loss": 0.13494281768798827, |
| "step": 900 |
| }, |
| { |
| "epoch": 3.2167832167832167, |
| "grad_norm": 0.4369621276855469, |
| "learning_rate": 7.940947940947941e-06, |
| "loss": 0.13719457387924194, |
| "step": 920 |
| }, |
| { |
| "epoch": 3.2867132867132867, |
| "grad_norm": 0.5295616388320923, |
| "learning_rate": 7.63014763014763e-06, |
| "loss": 0.13230640888214112, |
| "step": 940 |
| }, |
| { |
| "epoch": 3.3566433566433567, |
| "grad_norm": 0.7585546374320984, |
| "learning_rate": 7.31934731934732e-06, |
| "loss": 0.13142707347869872, |
| "step": 960 |
| }, |
| { |
| "epoch": 3.4265734265734267, |
| "grad_norm": 0.7057129740715027, |
| "learning_rate": 7.008547008547009e-06, |
| "loss": 0.13272271156311036, |
| "step": 980 |
| }, |
| { |
| "epoch": 3.4965034965034967, |
| "grad_norm": 0.4822175204753876, |
| "learning_rate": 6.697746697746699e-06, |
| "loss": 0.13051105737686158, |
| "step": 1000 |
| }, |
| { |
| "epoch": 3.5664335664335667, |
| "grad_norm": 0.6353693008422852, |
| "learning_rate": 6.3869463869463875e-06, |
| "loss": 0.1364367723464966, |
| "step": 1020 |
| }, |
| { |
| "epoch": 3.6363636363636362, |
| "grad_norm": 0.6322542428970337, |
| "learning_rate": 6.076146076146077e-06, |
| "loss": 0.13394793272018432, |
| "step": 1040 |
| }, |
| { |
| "epoch": 3.7062937062937062, |
| "grad_norm": 0.6142822504043579, |
| "learning_rate": 5.765345765345766e-06, |
| "loss": 0.13053072690963746, |
| "step": 1060 |
| }, |
| { |
| "epoch": 3.7762237762237763, |
| "grad_norm": 0.4618857800960541, |
| "learning_rate": 5.4545454545454545e-06, |
| "loss": 0.13272134065628052, |
| "step": 1080 |
| }, |
| { |
| "epoch": 3.8461538461538463, |
| "grad_norm": 0.5411708354949951, |
| "learning_rate": 5.1437451437451446e-06, |
| "loss": 0.1273587703704834, |
| "step": 1100 |
| }, |
| { |
| "epoch": 3.916083916083916, |
| "grad_norm": 0.438820481300354, |
| "learning_rate": 4.832944832944833e-06, |
| "loss": 0.13514026403427123, |
| "step": 1120 |
| }, |
| { |
| "epoch": 3.986013986013986, |
| "grad_norm": 0.580314040184021, |
| "learning_rate": 4.522144522144522e-06, |
| "loss": 0.13724528551101683, |
| "step": 1140 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_COMMODITY_f1": 0.5015, |
| "eval_COMPANY_f1": 0.7911, |
| "eval_COUNTRY_f1": 0.8487, |
| "eval_EVENT_f1": 0.3324, |
| "eval_INFRASTRUCTURE_f1": 0.3305, |
| "eval_LOCATION_f1": 0.6716, |
| "eval_MARKET_f1": 0.4217, |
| "eval_ORGANIZATION_f1": 0.68, |
| "eval_PERSON_f1": 0.7632, |
| "eval_accuracy": 0.9506155577771408, |
| "eval_f1": 0.6738893476465732, |
| "eval_loss": 0.1432034969329834, |
| "eval_precision": 0.6506984754736238, |
| "eval_recall": 0.6987943553911494, |
| "eval_runtime": 4.3982, |
| "eval_samples_per_second": 260.109, |
| "eval_steps_per_second": 4.093, |
| "step": 1144 |
| }, |
| { |
| "epoch": 4.055944055944056, |
| "grad_norm": 0.5337245464324951, |
| "learning_rate": 4.2113442113442115e-06, |
| "loss": 0.1314162254333496, |
| "step": 1160 |
| }, |
| { |
| "epoch": 4.125874125874126, |
| "grad_norm": 0.5353516936302185, |
| "learning_rate": 3.900543900543901e-06, |
| "loss": 0.1259661316871643, |
| "step": 1180 |
| }, |
| { |
| "epoch": 4.195804195804196, |
| "grad_norm": 0.5923385620117188, |
| "learning_rate": 3.58974358974359e-06, |
| "loss": 0.12636299133300782, |
| "step": 1200 |
| }, |
| { |
| "epoch": 4.265734265734266, |
| "grad_norm": 0.4649389982223511, |
| "learning_rate": 3.278943278943279e-06, |
| "loss": 0.12732148170471191, |
| "step": 1220 |
| }, |
| { |
| "epoch": 4.335664335664336, |
| "grad_norm": 0.44835126399993896, |
| "learning_rate": 2.9681429681429686e-06, |
| "loss": 0.12263227701187134, |
| "step": 1240 |
| }, |
| { |
| "epoch": 4.405594405594406, |
| "grad_norm": 0.48421064019203186, |
| "learning_rate": 2.6573426573426574e-06, |
| "loss": 0.12957746982574464, |
| "step": 1260 |
| }, |
| { |
| "epoch": 4.475524475524476, |
| "grad_norm": 0.5995394587516785, |
| "learning_rate": 2.3465423465423467e-06, |
| "loss": 0.12977392673492433, |
| "step": 1280 |
| }, |
| { |
| "epoch": 4.545454545454545, |
| "grad_norm": 0.49631068110466003, |
| "learning_rate": 2.035742035742036e-06, |
| "loss": 0.12345067262649537, |
| "step": 1300 |
| }, |
| { |
| "epoch": 4.615384615384615, |
| "grad_norm": 0.5985969305038452, |
| "learning_rate": 1.724941724941725e-06, |
| "loss": 0.12002362012863159, |
| "step": 1320 |
| }, |
| { |
| "epoch": 4.685314685314685, |
| "grad_norm": 0.5093550086021423, |
| "learning_rate": 1.4141414141414143e-06, |
| "loss": 0.12667241096496581, |
| "step": 1340 |
| }, |
| { |
| "epoch": 4.755244755244755, |
| "grad_norm": 0.5784407258033752, |
| "learning_rate": 1.1033411033411034e-06, |
| "loss": 0.12439815998077393, |
| "step": 1360 |
| }, |
| { |
| "epoch": 4.825174825174825, |
| "grad_norm": 0.4653092920780182, |
| "learning_rate": 7.925407925407925e-07, |
| "loss": 0.11679568290710449, |
| "step": 1380 |
| }, |
| { |
| "epoch": 4.895104895104895, |
| "grad_norm": 0.7826379537582397, |
| "learning_rate": 4.817404817404818e-07, |
| "loss": 0.12205266952514648, |
| "step": 1400 |
| }, |
| { |
| "epoch": 4.965034965034965, |
| "grad_norm": 0.5184548497200012, |
| "learning_rate": 1.7094017094017097e-07, |
| "loss": 0.12192339897155761, |
| "step": 1420 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_COMMODITY_f1": 0.5255, |
| "eval_COMPANY_f1": 0.7943, |
| "eval_COUNTRY_f1": 0.849, |
| "eval_EVENT_f1": 0.3433, |
| "eval_INFRASTRUCTURE_f1": 0.3473, |
| "eval_LOCATION_f1": 0.6793, |
| "eval_MARKET_f1": 0.4206, |
| "eval_ORGANIZATION_f1": 0.6913, |
| "eval_PERSON_f1": 0.7696, |
| "eval_accuracy": 0.950874224872589, |
| "eval_f1": 0.6789528523052879, |
| "eval_loss": 0.14190641045570374, |
| "eval_precision": 0.647015017996773, |
| "eval_recall": 0.7142074256747499, |
| "eval_runtime": 4.3789, |
| "eval_samples_per_second": 261.252, |
| "eval_steps_per_second": 4.111, |
| "step": 1430 |
| } |
| ], |
| "logging_steps": 20, |
| "max_steps": 1430, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 5, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2990261795389440.0, |
| "train_batch_size": 32, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|