bge_reranker_v2_m3_db_v1 / trainer_state.json
mertege's picture
Add files using upload-large-folder tool
455d6c4 verified
{
"best_global_step": 2230,
"best_metric": 0.9941502463054187,
"best_model_checkpoint": "/workspace/hallucination/bge-reranker-v2-m3/v6/checkpoint-1784",
"epoch": 2.9941225860621326,
"eval_steps": 446,
"global_step": 2676,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.055975370836831795,
"grad_norm": 4.56851863861084,
"learning_rate": 2.9037037037037038e-06,
"loss": 0.1467,
"step": 50
},
{
"epoch": 0.11195074167366359,
"grad_norm": 1.2010878324508667,
"learning_rate": 5.866666666666666e-06,
"loss": 0.0727,
"step": 100
},
{
"epoch": 0.16792611251049538,
"grad_norm": 6.644562244415283,
"learning_rate": 7.99940362815711e-06,
"loss": 0.0405,
"step": 150
},
{
"epoch": 0.22390148334732718,
"grad_norm": 3.49969744682312,
"learning_rate": 7.98754320672899e-06,
"loss": 0.0358,
"step": 200
},
{
"epoch": 0.279876854184159,
"grad_norm": 3.164804220199585,
"learning_rate": 7.960521025612183e-06,
"loss": 0.0389,
"step": 250
},
{
"epoch": 0.33585222502099077,
"grad_norm": 4.480787754058838,
"learning_rate": 7.918439830731966e-06,
"loss": 0.0543,
"step": 300
},
{
"epoch": 0.39182759585782256,
"grad_norm": 5.254024028778076,
"learning_rate": 7.861459626615215e-06,
"loss": 0.0465,
"step": 350
},
{
"epoch": 0.44780296669465436,
"grad_norm": 0.4903552532196045,
"learning_rate": 7.789797068008236e-06,
"loss": 0.037,
"step": 400
},
{
"epoch": 0.4993003078645396,
"eval_accuracy": 0.9902377578334121,
"eval_f1": 0.9904202719406675,
"eval_loss": 0.03584026172757149,
"eval_precision": 0.9925673583152679,
"eval_recall": 0.9882824545174221,
"eval_runtime": 328.1406,
"eval_samples_per_second": 19.355,
"eval_steps_per_second": 1.21,
"step": 446
},
{
"epoch": 0.5037783375314862,
"grad_norm": 12.020153045654297,
"learning_rate": 7.703724636094536e-06,
"loss": 0.0334,
"step": 450
},
{
"epoch": 0.559753708368318,
"grad_norm": 9.890530586242676,
"learning_rate": 7.603569602444819e-06,
"loss": 0.0402,
"step": 500
},
{
"epoch": 0.6157290792051497,
"grad_norm": 8.32047176361084,
"learning_rate": 7.4897127846385005e-06,
"loss": 0.0367,
"step": 550
},
{
"epoch": 0.6717044500419815,
"grad_norm": 0.729023277759552,
"learning_rate": 7.362587098288277e-06,
"loss": 0.0279,
"step": 600
},
{
"epoch": 0.7276798208788133,
"grad_norm": 0.38823333382606506,
"learning_rate": 7.222675910973328e-06,
"loss": 0.0226,
"step": 650
},
{
"epoch": 0.7836551917156451,
"grad_norm": 14.20788860321045,
"learning_rate": 7.070511204339955e-06,
"loss": 0.0285,
"step": 700
},
{
"epoch": 0.8396305625524769,
"grad_norm": 14.189713478088379,
"learning_rate": 6.906671551357899e-06,
"loss": 0.0183,
"step": 750
},
{
"epoch": 0.8956059333893087,
"grad_norm": 9.239374160766602,
"learning_rate": 6.731779916423332e-06,
"loss": 0.025,
"step": 800
},
{
"epoch": 0.9515813042261405,
"grad_norm": 7.430564880371094,
"learning_rate": 6.546501286673185e-06,
"loss": 0.0267,
"step": 850
},
{
"epoch": 0.9986006157290792,
"eval_accuracy": 0.9916548575027555,
"eval_f1": 0.9918649270913277,
"eval_loss": 0.03128722682595253,
"eval_precision": 0.9874694376528117,
"eval_recall": 0.996299722479186,
"eval_runtime": 328.6993,
"eval_samples_per_second": 19.322,
"eval_steps_per_second": 1.208,
"step": 892
},
{
"epoch": 1.0067170445004199,
"grad_norm": 1.4463914632797241,
"learning_rate": 6.351540143517212e-06,
"loss": 0.0215,
"step": 900
},
{
"epoch": 1.0626924153372517,
"grad_norm": 0.02502119354903698,
"learning_rate": 6.147637784001716e-06,
"loss": 0.0062,
"step": 950
},
{
"epoch": 1.1186677861740835,
"grad_norm": 0.039681658148765564,
"learning_rate": 5.935569502189897e-06,
"loss": 0.0034,
"step": 1000
},
{
"epoch": 1.1746431570109153,
"grad_norm": 0.005194108001887798,
"learning_rate": 5.716141641275983e-06,
"loss": 0.0087,
"step": 1050
},
{
"epoch": 1.230618527847747,
"grad_norm": 0.17214158177375793,
"learning_rate": 5.490188527641829e-06,
"loss": 0.0156,
"step": 1100
},
{
"epoch": 1.2865938986845789,
"grad_norm": 0.48181217908859253,
"learning_rate": 5.258569298513601e-06,
"loss": 0.0066,
"step": 1150
},
{
"epoch": 1.3425692695214106,
"grad_norm": 0.12410200387239456,
"learning_rate": 5.0221646352806285e-06,
"loss": 0.0085,
"step": 1200
},
{
"epoch": 1.3985446403582424,
"grad_norm": 0.03792842850089073,
"learning_rate": 4.781873414897317e-06,
"loss": 0.0179,
"step": 1250
},
{
"epoch": 1.4545200111950742,
"grad_norm": 1.4860434532165527,
"learning_rate": 4.538609292100398e-06,
"loss": 0.0112,
"step": 1300
},
{
"epoch": 1.4970612930310663,
"eval_accuracy": 0.9918123130215714,
"eval_f1": 0.9920196439533456,
"eval_loss": 0.03981148824095726,
"eval_precision": 0.9874732661167125,
"eval_recall": 0.9966080789392537,
"eval_runtime": 329.583,
"eval_samples_per_second": 19.27,
"eval_steps_per_second": 1.205,
"step": 1338
},
{
"epoch": 1.5104953820319058,
"grad_norm": 0.007759585976600647,
"learning_rate": 4.293297225436905e-06,
"loss": 0.0034,
"step": 1350
},
{
"epoch": 1.5664707528687378,
"grad_norm": 0.02019183151423931,
"learning_rate": 4.046869960311881e-06,
"loss": 0.004,
"step": 1400
},
{
"epoch": 1.6224461237055694,
"grad_norm": 0.03353915363550186,
"learning_rate": 3.8002644824282683e-06,
"loss": 0.0067,
"step": 1450
},
{
"epoch": 1.6784214945424014,
"grad_norm": 3.9431345462799072,
"learning_rate": 3.5544184551039885e-06,
"loss": 0.0062,
"step": 1500
},
{
"epoch": 1.734396865379233,
"grad_norm": 0.009291726164519787,
"learning_rate": 3.3102666540125503e-06,
"loss": 0.0044,
"step": 1550
},
{
"epoch": 1.790372236216065,
"grad_norm": 0.024871619418263435,
"learning_rate": 3.06873741290328e-06,
"loss": 0.0044,
"step": 1600
},
{
"epoch": 1.8463476070528966,
"grad_norm": 0.0040198941715061665,
"learning_rate": 2.8307490938155686e-06,
"loss": 0.0074,
"step": 1650
},
{
"epoch": 1.9023229778897286,
"grad_norm": 0.010355140082538128,
"learning_rate": 2.597206595208356e-06,
"loss": 0.007,
"step": 1700
},
{
"epoch": 1.9582983487265602,
"grad_norm": 0.05570561811327934,
"learning_rate": 2.36899791128193e-06,
"loss": 0.0114,
"step": 1750
},
{
"epoch": 1.996361600895606,
"eval_accuracy": 0.9935443237285467,
"eval_f1": 0.9936913371287891,
"eval_loss": 0.03450547158718109,
"eval_precision": 0.9917076167076168,
"eval_recall": 0.9956830095590503,
"eval_runtime": 329.5265,
"eval_samples_per_second": 19.273,
"eval_steps_per_second": 1.205,
"step": 1784
},
{
"epoch": 2.0134340890008398,
"grad_norm": 0.0031412208918482065,
"learning_rate": 2.146990755574473e-06,
"loss": 0.0049,
"step": 1800
},
{
"epoch": 2.0694094598376713,
"grad_norm": 0.022113706916570663,
"learning_rate": 1.9320292616713783e-06,
"loss": 0.0023,
"step": 1850
},
{
"epoch": 2.1253848306745033,
"grad_norm": 0.013204299844801426,
"learning_rate": 1.7249307735721739e-06,
"loss": 0.0034,
"step": 1900
},
{
"epoch": 2.181360201511335,
"grad_norm": 0.34572720527648926,
"learning_rate": 1.526482737918981e-06,
"loss": 0.0002,
"step": 1950
},
{
"epoch": 2.237335572348167,
"grad_norm": 0.003737039864063263,
"learning_rate": 1.337439709903123e-06,
"loss": 0.0022,
"step": 2000
},
{
"epoch": 2.2933109431849985,
"grad_norm": 0.0018848059698939323,
"learning_rate": 1.1585204842342755e-06,
"loss": 0.0001,
"step": 2050
},
{
"epoch": 2.3492863140218305,
"grad_norm": 0.0221543088555336,
"learning_rate": 9.904053620810118e-07,
"loss": 0.0002,
"step": 2100
},
{
"epoch": 2.405261684858662,
"grad_norm": 0.0014764212537556887,
"learning_rate": 8.337335643746119e-07,
"loss": 0.003,
"step": 2150
},
{
"epoch": 2.461237055695494,
"grad_norm": 0.24720342457294464,
"learning_rate": 6.891008013114716e-07,
"loss": 0.0025,
"step": 2200
},
{
"epoch": 2.494822278197593,
"eval_accuracy": 0.9940166902849945,
"eval_f1": 0.9941502463054187,
"eval_loss": 0.034027792513370514,
"eval_precision": 0.9926221948970181,
"eval_recall": 0.9956830095590503,
"eval_runtime": 328.5789,
"eval_samples_per_second": 19.329,
"eval_steps_per_second": 1.208,
"step": 2230
},
{
"epoch": 2.5172124265323257,
"grad_norm": 0.04605395719408989,
"learning_rate": 5.570570072955551e-07,
"loss": 0.0017,
"step": 2250
},
{
"epoch": 2.5731877973691577,
"grad_norm": 0.0023472148459404707,
"learning_rate": 4.3810424993327053e-07,
"loss": 0.0018,
"step": 2300
},
{
"epoch": 2.6291631682059893,
"grad_norm": 0.0014394799945876002,
"learning_rate": 3.326948210313607e-07,
"loss": 0.0015,
"step": 2350
},
{
"epoch": 2.6851385390428213,
"grad_norm": 0.029553143307566643,
"learning_rate": 2.412295168563667e-07,
"loss": 0.0007,
"step": 2400
},
{
"epoch": 2.741113909879653,
"grad_norm": 0.0062004872597754,
"learning_rate": 1.6405611419461017e-07,
"loss": 0.0002,
"step": 2450
},
{
"epoch": 2.797089280716485,
"grad_norm": 1.4929763078689575,
"learning_rate": 1.0146804800713304e-07,
"loss": 0.0001,
"step": 2500
},
{
"epoch": 2.8530646515533165,
"grad_norm": 0.0059923469088971615,
"learning_rate": 5.3703295707523854e-08,
"loss": 0.0011,
"step": 2550
},
{
"epoch": 2.9090400223901485,
"grad_norm": 0.00393084529787302,
"learning_rate": 2.094347230492799e-08,
"loss": 0.0014,
"step": 2600
},
{
"epoch": 2.96501539322698,
"grad_norm": 0.0017132419161498547,
"learning_rate": 3.3131398527510567e-09,
"loss": 0.0017,
"step": 2650
},
{
"epoch": 2.9941225860621326,
"eval_accuracy": 0.9938592347661785,
"eval_f1": 0.9939935314954567,
"eval_loss": 0.033541660755872726,
"eval_precision": 0.9929230769230769,
"eval_recall": 0.9950662966389145,
"eval_runtime": 329.149,
"eval_samples_per_second": 19.295,
"eval_steps_per_second": 1.206,
"step": 2676
}
],
"logging_steps": 50,
"max_steps": 2682,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 892,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 3.189990278329344e+17,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}