{ "best_global_step": 2230, "best_metric": 0.9941502463054187, "best_model_checkpoint": "/workspace/hallucination/bge-reranker-v2-m3/v6/checkpoint-1784", "epoch": 2.9941225860621326, "eval_steps": 446, "global_step": 2676, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.055975370836831795, "grad_norm": 4.56851863861084, "learning_rate": 2.9037037037037038e-06, "loss": 0.1467, "step": 50 }, { "epoch": 0.11195074167366359, "grad_norm": 1.2010878324508667, "learning_rate": 5.866666666666666e-06, "loss": 0.0727, "step": 100 }, { "epoch": 0.16792611251049538, "grad_norm": 6.644562244415283, "learning_rate": 7.99940362815711e-06, "loss": 0.0405, "step": 150 }, { "epoch": 0.22390148334732718, "grad_norm": 3.49969744682312, "learning_rate": 7.98754320672899e-06, "loss": 0.0358, "step": 200 }, { "epoch": 0.279876854184159, "grad_norm": 3.164804220199585, "learning_rate": 7.960521025612183e-06, "loss": 0.0389, "step": 250 }, { "epoch": 0.33585222502099077, "grad_norm": 4.480787754058838, "learning_rate": 7.918439830731966e-06, "loss": 0.0543, "step": 300 }, { "epoch": 0.39182759585782256, "grad_norm": 5.254024028778076, "learning_rate": 7.861459626615215e-06, "loss": 0.0465, "step": 350 }, { "epoch": 0.44780296669465436, "grad_norm": 0.4903552532196045, "learning_rate": 7.789797068008236e-06, "loss": 0.037, "step": 400 }, { "epoch": 0.4993003078645396, "eval_accuracy": 0.9902377578334121, "eval_f1": 0.9904202719406675, "eval_loss": 0.03584026172757149, "eval_precision": 0.9925673583152679, "eval_recall": 0.9882824545174221, "eval_runtime": 328.1406, "eval_samples_per_second": 19.355, "eval_steps_per_second": 1.21, "step": 446 }, { "epoch": 0.5037783375314862, "grad_norm": 12.020153045654297, "learning_rate": 7.703724636094536e-06, "loss": 0.0334, "step": 450 }, { "epoch": 0.559753708368318, "grad_norm": 9.890530586242676, "learning_rate": 7.603569602444819e-06, "loss": 0.0402, "step": 500 }, { "epoch": 0.6157290792051497, "grad_norm": 8.32047176361084, "learning_rate": 7.4897127846385005e-06, "loss": 0.0367, "step": 550 }, { "epoch": 0.6717044500419815, "grad_norm": 0.729023277759552, "learning_rate": 7.362587098288277e-06, "loss": 0.0279, "step": 600 }, { "epoch": 0.7276798208788133, "grad_norm": 0.38823333382606506, "learning_rate": 7.222675910973328e-06, "loss": 0.0226, "step": 650 }, { "epoch": 0.7836551917156451, "grad_norm": 14.20788860321045, "learning_rate": 7.070511204339955e-06, "loss": 0.0285, "step": 700 }, { "epoch": 0.8396305625524769, "grad_norm": 14.189713478088379, "learning_rate": 6.906671551357899e-06, "loss": 0.0183, "step": 750 }, { "epoch": 0.8956059333893087, "grad_norm": 9.239374160766602, "learning_rate": 6.731779916423332e-06, "loss": 0.025, "step": 800 }, { "epoch": 0.9515813042261405, "grad_norm": 7.430564880371094, "learning_rate": 6.546501286673185e-06, "loss": 0.0267, "step": 850 }, { "epoch": 0.9986006157290792, "eval_accuracy": 0.9916548575027555, "eval_f1": 0.9918649270913277, "eval_loss": 0.03128722682595253, "eval_precision": 0.9874694376528117, "eval_recall": 0.996299722479186, "eval_runtime": 328.6993, "eval_samples_per_second": 19.322, "eval_steps_per_second": 1.208, "step": 892 }, { "epoch": 1.0067170445004199, "grad_norm": 1.4463914632797241, "learning_rate": 6.351540143517212e-06, "loss": 0.0215, "step": 900 }, { "epoch": 1.0626924153372517, "grad_norm": 0.02502119354903698, "learning_rate": 6.147637784001716e-06, "loss": 0.0062, "step": 950 }, { "epoch": 1.1186677861740835, "grad_norm": 0.039681658148765564, "learning_rate": 5.935569502189897e-06, "loss": 0.0034, "step": 1000 }, { "epoch": 1.1746431570109153, "grad_norm": 0.005194108001887798, "learning_rate": 5.716141641275983e-06, "loss": 0.0087, "step": 1050 }, { "epoch": 1.230618527847747, "grad_norm": 0.17214158177375793, "learning_rate": 5.490188527641829e-06, "loss": 0.0156, "step": 1100 }, { "epoch": 1.2865938986845789, "grad_norm": 0.48181217908859253, "learning_rate": 5.258569298513601e-06, "loss": 0.0066, "step": 1150 }, { "epoch": 1.3425692695214106, "grad_norm": 0.12410200387239456, "learning_rate": 5.0221646352806285e-06, "loss": 0.0085, "step": 1200 }, { "epoch": 1.3985446403582424, "grad_norm": 0.03792842850089073, "learning_rate": 4.781873414897317e-06, "loss": 0.0179, "step": 1250 }, { "epoch": 1.4545200111950742, "grad_norm": 1.4860434532165527, "learning_rate": 4.538609292100398e-06, "loss": 0.0112, "step": 1300 }, { "epoch": 1.4970612930310663, "eval_accuracy": 0.9918123130215714, "eval_f1": 0.9920196439533456, "eval_loss": 0.03981148824095726, "eval_precision": 0.9874732661167125, "eval_recall": 0.9966080789392537, "eval_runtime": 329.583, "eval_samples_per_second": 19.27, "eval_steps_per_second": 1.205, "step": 1338 }, { "epoch": 1.5104953820319058, "grad_norm": 0.007759585976600647, "learning_rate": 4.293297225436905e-06, "loss": 0.0034, "step": 1350 }, { "epoch": 1.5664707528687378, "grad_norm": 0.02019183151423931, "learning_rate": 4.046869960311881e-06, "loss": 0.004, "step": 1400 }, { "epoch": 1.6224461237055694, "grad_norm": 0.03353915363550186, "learning_rate": 3.8002644824282683e-06, "loss": 0.0067, "step": 1450 }, { "epoch": 1.6784214945424014, "grad_norm": 3.9431345462799072, "learning_rate": 3.5544184551039885e-06, "loss": 0.0062, "step": 1500 }, { "epoch": 1.734396865379233, "grad_norm": 0.009291726164519787, "learning_rate": 3.3102666540125503e-06, "loss": 0.0044, "step": 1550 }, { "epoch": 1.790372236216065, "grad_norm": 0.024871619418263435, "learning_rate": 3.06873741290328e-06, "loss": 0.0044, "step": 1600 }, { "epoch": 1.8463476070528966, "grad_norm": 0.0040198941715061665, "learning_rate": 2.8307490938155686e-06, "loss": 0.0074, "step": 1650 }, { "epoch": 1.9023229778897286, "grad_norm": 0.010355140082538128, "learning_rate": 2.597206595208356e-06, "loss": 0.007, "step": 1700 }, { "epoch": 1.9582983487265602, "grad_norm": 0.05570561811327934, "learning_rate": 2.36899791128193e-06, "loss": 0.0114, "step": 1750 }, { "epoch": 1.996361600895606, "eval_accuracy": 0.9935443237285467, "eval_f1": 0.9936913371287891, "eval_loss": 0.03450547158718109, "eval_precision": 0.9917076167076168, "eval_recall": 0.9956830095590503, "eval_runtime": 329.5265, "eval_samples_per_second": 19.273, "eval_steps_per_second": 1.205, "step": 1784 }, { "epoch": 2.0134340890008398, "grad_norm": 0.0031412208918482065, "learning_rate": 2.146990755574473e-06, "loss": 0.0049, "step": 1800 }, { "epoch": 2.0694094598376713, "grad_norm": 0.022113706916570663, "learning_rate": 1.9320292616713783e-06, "loss": 0.0023, "step": 1850 }, { "epoch": 2.1253848306745033, "grad_norm": 0.013204299844801426, "learning_rate": 1.7249307735721739e-06, "loss": 0.0034, "step": 1900 }, { "epoch": 2.181360201511335, "grad_norm": 0.34572720527648926, "learning_rate": 1.526482737918981e-06, "loss": 0.0002, "step": 1950 }, { "epoch": 2.237335572348167, "grad_norm": 0.003737039864063263, "learning_rate": 1.337439709903123e-06, "loss": 0.0022, "step": 2000 }, { "epoch": 2.2933109431849985, "grad_norm": 0.0018848059698939323, "learning_rate": 1.1585204842342755e-06, "loss": 0.0001, "step": 2050 }, { "epoch": 2.3492863140218305, "grad_norm": 0.0221543088555336, "learning_rate": 9.904053620810118e-07, "loss": 0.0002, "step": 2100 }, { "epoch": 2.405261684858662, "grad_norm": 0.0014764212537556887, "learning_rate": 8.337335643746119e-07, "loss": 0.003, "step": 2150 }, { "epoch": 2.461237055695494, "grad_norm": 0.24720342457294464, "learning_rate": 6.891008013114716e-07, "loss": 0.0025, "step": 2200 }, { "epoch": 2.494822278197593, "eval_accuracy": 0.9940166902849945, "eval_f1": 0.9941502463054187, "eval_loss": 0.034027792513370514, "eval_precision": 0.9926221948970181, "eval_recall": 0.9956830095590503, "eval_runtime": 328.5789, "eval_samples_per_second": 19.329, "eval_steps_per_second": 1.208, "step": 2230 }, { "epoch": 2.5172124265323257, "grad_norm": 0.04605395719408989, "learning_rate": 5.570570072955551e-07, "loss": 0.0017, "step": 2250 }, { "epoch": 2.5731877973691577, "grad_norm": 0.0023472148459404707, "learning_rate": 4.3810424993327053e-07, "loss": 0.0018, "step": 2300 }, { "epoch": 2.6291631682059893, "grad_norm": 0.0014394799945876002, "learning_rate": 3.326948210313607e-07, "loss": 0.0015, "step": 2350 }, { "epoch": 2.6851385390428213, "grad_norm": 0.029553143307566643, "learning_rate": 2.412295168563667e-07, "loss": 0.0007, "step": 2400 }, { "epoch": 2.741113909879653, "grad_norm": 0.0062004872597754, "learning_rate": 1.6405611419461017e-07, "loss": 0.0002, "step": 2450 }, { "epoch": 2.797089280716485, "grad_norm": 1.4929763078689575, "learning_rate": 1.0146804800713304e-07, "loss": 0.0001, "step": 2500 }, { "epoch": 2.8530646515533165, "grad_norm": 0.0059923469088971615, "learning_rate": 5.3703295707523854e-08, "loss": 0.0011, "step": 2550 }, { "epoch": 2.9090400223901485, "grad_norm": 0.00393084529787302, "learning_rate": 2.094347230492799e-08, "loss": 0.0014, "step": 2600 }, { "epoch": 2.96501539322698, "grad_norm": 0.0017132419161498547, "learning_rate": 3.3131398527510567e-09, "loss": 0.0017, "step": 2650 }, { "epoch": 2.9941225860621326, "eval_accuracy": 0.9938592347661785, "eval_f1": 0.9939935314954567, "eval_loss": 0.033541660755872726, "eval_precision": 0.9929230769230769, "eval_recall": 0.9950662966389145, "eval_runtime": 329.149, "eval_samples_per_second": 19.295, "eval_steps_per_second": 1.206, "step": 2676 } ], "logging_steps": 50, "max_steps": 2682, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 892, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 3.189990278329344e+17, "train_batch_size": 16, "trial_name": null, "trial_params": null }