Menon-nb-bert-base / checkpoint-3892 /trainer_state.json
RozaA's picture
Upload folder using huggingface_hub
1965272 verified
{
"best_global_step": 3892,
"best_metric": 0.9585253456221198,
"best_model_checkpoint": "nb_bert_base_relevance_weighted/checkpoint-3892",
"epoch": 1.0,
"eval_steps": 500,
"global_step": 3892,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.012846865364850977,
"grad_norm": 1.7052006721496582,
"learning_rate": 1.9874100719424462e-05,
"loss": 0.6012,
"step": 50
},
{
"epoch": 0.025693730729701953,
"grad_norm": 32.50202941894531,
"learning_rate": 1.9745632065775954e-05,
"loss": 0.6445,
"step": 100
},
{
"epoch": 0.03854059609455293,
"grad_norm": 0.2601597011089325,
"learning_rate": 1.9617163412127443e-05,
"loss": 0.5417,
"step": 150
},
{
"epoch": 0.051387461459403906,
"grad_norm": 0.4800266921520233,
"learning_rate": 1.948869475847893e-05,
"loss": 0.4754,
"step": 200
},
{
"epoch": 0.06423432682425488,
"grad_norm": 0.1330161839723587,
"learning_rate": 1.9360226104830423e-05,
"loss": 0.6532,
"step": 250
},
{
"epoch": 0.07708119218910586,
"grad_norm": 15.620330810546875,
"learning_rate": 1.9231757451181915e-05,
"loss": 0.7903,
"step": 300
},
{
"epoch": 0.08992805755395683,
"grad_norm": 0.08341807126998901,
"learning_rate": 1.9103288797533403e-05,
"loss": 0.3625,
"step": 350
},
{
"epoch": 0.10277492291880781,
"grad_norm": 27.293317794799805,
"learning_rate": 1.8974820143884892e-05,
"loss": 0.5664,
"step": 400
},
{
"epoch": 0.1156217882836588,
"grad_norm": 0.09574569761753082,
"learning_rate": 1.8846351490236384e-05,
"loss": 0.4336,
"step": 450
},
{
"epoch": 0.12846865364850976,
"grad_norm": 0.04544169455766678,
"learning_rate": 1.8717882836587876e-05,
"loss": 0.5034,
"step": 500
},
{
"epoch": 0.14131551901336073,
"grad_norm": 43.972049713134766,
"learning_rate": 1.8589414182939364e-05,
"loss": 0.5966,
"step": 550
},
{
"epoch": 0.15416238437821173,
"grad_norm": 4.7578582763671875,
"learning_rate": 1.8460945529290856e-05,
"loss": 0.2444,
"step": 600
},
{
"epoch": 0.1670092497430627,
"grad_norm": 28.756206512451172,
"learning_rate": 1.8332476875642344e-05,
"loss": 0.4511,
"step": 650
},
{
"epoch": 0.17985611510791366,
"grad_norm": 0.1027965322136879,
"learning_rate": 1.8204008221993833e-05,
"loss": 0.4211,
"step": 700
},
{
"epoch": 0.19270298047276466,
"grad_norm": 0.023025257512927055,
"learning_rate": 1.8075539568345325e-05,
"loss": 0.4003,
"step": 750
},
{
"epoch": 0.20554984583761562,
"grad_norm": 11.712437629699707,
"learning_rate": 1.7947070914696817e-05,
"loss": 0.5503,
"step": 800
},
{
"epoch": 0.2183967112024666,
"grad_norm": 0.0492803193628788,
"learning_rate": 1.7818602261048305e-05,
"loss": 0.336,
"step": 850
},
{
"epoch": 0.2312435765673176,
"grad_norm": 0.8247962594032288,
"learning_rate": 1.7690133607399797e-05,
"loss": 0.4743,
"step": 900
},
{
"epoch": 0.24409044193216856,
"grad_norm": 0.03478659689426422,
"learning_rate": 1.7561664953751285e-05,
"loss": 0.4543,
"step": 950
},
{
"epoch": 0.2569373072970195,
"grad_norm": 0.3004553020000458,
"learning_rate": 1.7433196300102777e-05,
"loss": 0.2296,
"step": 1000
},
{
"epoch": 0.2697841726618705,
"grad_norm": 2.058995246887207,
"learning_rate": 1.7304727646454266e-05,
"loss": 0.3517,
"step": 1050
},
{
"epoch": 0.28263103802672146,
"grad_norm": 0.18223777413368225,
"learning_rate": 1.7176258992805758e-05,
"loss": 0.3628,
"step": 1100
},
{
"epoch": 0.2954779033915725,
"grad_norm": 30.207103729248047,
"learning_rate": 1.7047790339157246e-05,
"loss": 0.3983,
"step": 1150
},
{
"epoch": 0.30832476875642345,
"grad_norm": 0.0368342325091362,
"learning_rate": 1.6919321685508738e-05,
"loss": 0.5342,
"step": 1200
},
{
"epoch": 0.3211716341212744,
"grad_norm": 0.28241753578186035,
"learning_rate": 1.679085303186023e-05,
"loss": 0.3943,
"step": 1250
},
{
"epoch": 0.3340184994861254,
"grad_norm": 0.033875174820423126,
"learning_rate": 1.6662384378211718e-05,
"loss": 0.2127,
"step": 1300
},
{
"epoch": 0.34686536485097635,
"grad_norm": 0.11910529434680939,
"learning_rate": 1.6533915724563207e-05,
"loss": 0.4309,
"step": 1350
},
{
"epoch": 0.3597122302158273,
"grad_norm": 0.5434423089027405,
"learning_rate": 1.64054470709147e-05,
"loss": 0.1101,
"step": 1400
},
{
"epoch": 0.3725590955806783,
"grad_norm": 47.17951202392578,
"learning_rate": 1.627697841726619e-05,
"loss": 0.2418,
"step": 1450
},
{
"epoch": 0.3854059609455293,
"grad_norm": 2.3193013668060303,
"learning_rate": 1.614850976361768e-05,
"loss": 0.2157,
"step": 1500
},
{
"epoch": 0.3982528263103803,
"grad_norm": 0.10629215836524963,
"learning_rate": 1.6020041109969167e-05,
"loss": 0.4207,
"step": 1550
},
{
"epoch": 0.41109969167523125,
"grad_norm": 0.027008764445781708,
"learning_rate": 1.589157245632066e-05,
"loss": 0.2517,
"step": 1600
},
{
"epoch": 0.4239465570400822,
"grad_norm": 0.026176316663622856,
"learning_rate": 1.5763103802672148e-05,
"loss": 0.2796,
"step": 1650
},
{
"epoch": 0.4367934224049332,
"grad_norm": 0.09446433931589127,
"learning_rate": 1.563463514902364e-05,
"loss": 0.2172,
"step": 1700
},
{
"epoch": 0.44964028776978415,
"grad_norm": 0.08241437375545502,
"learning_rate": 1.550616649537513e-05,
"loss": 0.3683,
"step": 1750
},
{
"epoch": 0.4624871531346352,
"grad_norm": 44.19976806640625,
"learning_rate": 1.537769784172662e-05,
"loss": 0.4662,
"step": 1800
},
{
"epoch": 0.47533401849948614,
"grad_norm": 0.03341331705451012,
"learning_rate": 1.524922918807811e-05,
"loss": 0.2785,
"step": 1850
},
{
"epoch": 0.4881808838643371,
"grad_norm": 0.13134817779064178,
"learning_rate": 1.5120760534429599e-05,
"loss": 0.3011,
"step": 1900
},
{
"epoch": 0.501027749229188,
"grad_norm": 0.09789072722196579,
"learning_rate": 1.4992291880781092e-05,
"loss": 0.132,
"step": 1950
},
{
"epoch": 0.513874614594039,
"grad_norm": 0.5926951766014099,
"learning_rate": 1.486382322713258e-05,
"loss": 0.3846,
"step": 2000
},
{
"epoch": 0.5267214799588901,
"grad_norm": 0.016193868592381477,
"learning_rate": 1.473535457348407e-05,
"loss": 0.1759,
"step": 2050
},
{
"epoch": 0.539568345323741,
"grad_norm": 107.60330963134766,
"learning_rate": 1.4606885919835561e-05,
"loss": 0.329,
"step": 2100
},
{
"epoch": 0.552415210688592,
"grad_norm": 0.013422131538391113,
"learning_rate": 1.4478417266187053e-05,
"loss": 0.2515,
"step": 2150
},
{
"epoch": 0.5652620760534429,
"grad_norm": 0.015327083878219128,
"learning_rate": 1.4349948612538543e-05,
"loss": 0.2247,
"step": 2200
},
{
"epoch": 0.5781089414182939,
"grad_norm": 46.177696228027344,
"learning_rate": 1.4221479958890031e-05,
"loss": 0.3581,
"step": 2250
},
{
"epoch": 0.590955806783145,
"grad_norm": 0.16671152412891388,
"learning_rate": 1.4093011305241522e-05,
"loss": 0.2248,
"step": 2300
},
{
"epoch": 0.6038026721479959,
"grad_norm": 0.021253060549497604,
"learning_rate": 1.3964542651593012e-05,
"loss": 0.0853,
"step": 2350
},
{
"epoch": 0.6166495375128469,
"grad_norm": 0.0710051879286766,
"learning_rate": 1.3836073997944504e-05,
"loss": 0.4464,
"step": 2400
},
{
"epoch": 0.6294964028776978,
"grad_norm": 44.617759704589844,
"learning_rate": 1.3707605344295994e-05,
"loss": 0.261,
"step": 2450
},
{
"epoch": 0.6423432682425488,
"grad_norm": 0.012564734555780888,
"learning_rate": 1.3579136690647484e-05,
"loss": 0.0948,
"step": 2500
},
{
"epoch": 0.6551901336073997,
"grad_norm": 0.10313341021537781,
"learning_rate": 1.3450668036998972e-05,
"loss": 0.2493,
"step": 2550
},
{
"epoch": 0.6680369989722508,
"grad_norm": 0.16787320375442505,
"learning_rate": 1.3322199383350463e-05,
"loss": 0.1627,
"step": 2600
},
{
"epoch": 0.6808838643371018,
"grad_norm": 0.23290768265724182,
"learning_rate": 1.3193730729701954e-05,
"loss": 0.4074,
"step": 2650
},
{
"epoch": 0.6937307297019527,
"grad_norm": 0.15848670899868011,
"learning_rate": 1.3065262076053445e-05,
"loss": 0.2521,
"step": 2700
},
{
"epoch": 0.7065775950668037,
"grad_norm": 0.1227729544043541,
"learning_rate": 1.2936793422404935e-05,
"loss": 0.2727,
"step": 2750
},
{
"epoch": 0.7194244604316546,
"grad_norm": 14.59352970123291,
"learning_rate": 1.2808324768756423e-05,
"loss": 0.1938,
"step": 2800
},
{
"epoch": 0.7322713257965057,
"grad_norm": 0.011683103628456593,
"learning_rate": 1.2679856115107915e-05,
"loss": 0.3774,
"step": 2850
},
{
"epoch": 0.7451181911613566,
"grad_norm": 0.15291182696819305,
"learning_rate": 1.2551387461459405e-05,
"loss": 0.414,
"step": 2900
},
{
"epoch": 0.7579650565262076,
"grad_norm": 0.008359256200492382,
"learning_rate": 1.2422918807810895e-05,
"loss": 0.1985,
"step": 2950
},
{
"epoch": 0.7708119218910586,
"grad_norm": 0.20358124375343323,
"learning_rate": 1.2294450154162386e-05,
"loss": 0.3415,
"step": 3000
},
{
"epoch": 0.7836587872559095,
"grad_norm": 0.07088713347911835,
"learning_rate": 1.2165981500513874e-05,
"loss": 0.3632,
"step": 3050
},
{
"epoch": 0.7965056526207606,
"grad_norm": 0.09244630485773087,
"learning_rate": 1.2037512846865368e-05,
"loss": 0.0333,
"step": 3100
},
{
"epoch": 0.8093525179856115,
"grad_norm": 0.012910844758152962,
"learning_rate": 1.1909044193216856e-05,
"loss": 0.3435,
"step": 3150
},
{
"epoch": 0.8221993833504625,
"grad_norm": 0.17528291046619415,
"learning_rate": 1.1780575539568346e-05,
"loss": 0.4858,
"step": 3200
},
{
"epoch": 0.8350462487153134,
"grad_norm": 0.23448841273784637,
"learning_rate": 1.1652106885919836e-05,
"loss": 0.239,
"step": 3250
},
{
"epoch": 0.8478931140801644,
"grad_norm": 0.7074928879737854,
"learning_rate": 1.1523638232271327e-05,
"loss": 0.1674,
"step": 3300
},
{
"epoch": 0.8607399794450155,
"grad_norm": 0.1711515188217163,
"learning_rate": 1.1395169578622818e-05,
"loss": 0.4993,
"step": 3350
},
{
"epoch": 0.8735868448098664,
"grad_norm": 0.08787185698747635,
"learning_rate": 1.1266700924974307e-05,
"loss": 0.1309,
"step": 3400
},
{
"epoch": 0.8864337101747174,
"grad_norm": 0.12081218510866165,
"learning_rate": 1.1138232271325797e-05,
"loss": 0.3276,
"step": 3450
},
{
"epoch": 0.8992805755395683,
"grad_norm": 0.0644136592745781,
"learning_rate": 1.1009763617677287e-05,
"loss": 0.2722,
"step": 3500
},
{
"epoch": 0.9121274409044193,
"grad_norm": 0.10556616634130478,
"learning_rate": 1.0881294964028777e-05,
"loss": 0.4247,
"step": 3550
},
{
"epoch": 0.9249743062692704,
"grad_norm": 0.2076292783021927,
"learning_rate": 1.075282631038027e-05,
"loss": 0.3477,
"step": 3600
},
{
"epoch": 0.9378211716341213,
"grad_norm": 0.2808614671230316,
"learning_rate": 1.062435765673176e-05,
"loss": 0.4806,
"step": 3650
},
{
"epoch": 0.9506680369989723,
"grad_norm": 0.0461493581533432,
"learning_rate": 1.0495889003083248e-05,
"loss": 0.2213,
"step": 3700
},
{
"epoch": 0.9635149023638232,
"grad_norm": 15.367751121520996,
"learning_rate": 1.0367420349434738e-05,
"loss": 0.2639,
"step": 3750
},
{
"epoch": 0.9763617677286742,
"grad_norm": 0.3630225658416748,
"learning_rate": 1.023895169578623e-05,
"loss": 0.1761,
"step": 3800
},
{
"epoch": 0.9892086330935251,
"grad_norm": 0.12363607436418533,
"learning_rate": 1.011048304213772e-05,
"loss": 0.2654,
"step": 3850
},
{
"epoch": 1.0,
"eval_f1": 0.6698872785829307,
"eval_loss": 0.14509662985801697,
"eval_precision": 0.5148514851485149,
"eval_recall": 0.9585253456221198,
"eval_runtime": 174.2843,
"eval_samples_per_second": 89.325,
"eval_steps_per_second": 2.794,
"step": 3892
}
],
"logging_steps": 50,
"max_steps": 7784,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 8191831153105920.0,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}