| { | |
| "best_global_step": 2230, | |
| "best_metric": 0.9941502463054187, | |
| "best_model_checkpoint": "/workspace/hallucination/bge-reranker-v2-m3/v6/checkpoint-1784", | |
| "epoch": 2.9941225860621326, | |
| "eval_steps": 446, | |
| "global_step": 2676, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.055975370836831795, | |
| "grad_norm": 4.56851863861084, | |
| "learning_rate": 2.9037037037037038e-06, | |
| "loss": 0.1467, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.11195074167366359, | |
| "grad_norm": 1.2010878324508667, | |
| "learning_rate": 5.866666666666666e-06, | |
| "loss": 0.0727, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.16792611251049538, | |
| "grad_norm": 6.644562244415283, | |
| "learning_rate": 7.99940362815711e-06, | |
| "loss": 0.0405, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.22390148334732718, | |
| "grad_norm": 3.49969744682312, | |
| "learning_rate": 7.98754320672899e-06, | |
| "loss": 0.0358, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.279876854184159, | |
| "grad_norm": 3.164804220199585, | |
| "learning_rate": 7.960521025612183e-06, | |
| "loss": 0.0389, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.33585222502099077, | |
| "grad_norm": 4.480787754058838, | |
| "learning_rate": 7.918439830731966e-06, | |
| "loss": 0.0543, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.39182759585782256, | |
| "grad_norm": 5.254024028778076, | |
| "learning_rate": 7.861459626615215e-06, | |
| "loss": 0.0465, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.44780296669465436, | |
| "grad_norm": 0.4903552532196045, | |
| "learning_rate": 7.789797068008236e-06, | |
| "loss": 0.037, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.4993003078645396, | |
| "eval_accuracy": 0.9902377578334121, | |
| "eval_f1": 0.9904202719406675, | |
| "eval_loss": 0.03584026172757149, | |
| "eval_precision": 0.9925673583152679, | |
| "eval_recall": 0.9882824545174221, | |
| "eval_runtime": 328.1406, | |
| "eval_samples_per_second": 19.355, | |
| "eval_steps_per_second": 1.21, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.5037783375314862, | |
| "grad_norm": 12.020153045654297, | |
| "learning_rate": 7.703724636094536e-06, | |
| "loss": 0.0334, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.559753708368318, | |
| "grad_norm": 9.890530586242676, | |
| "learning_rate": 7.603569602444819e-06, | |
| "loss": 0.0402, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.6157290792051497, | |
| "grad_norm": 8.32047176361084, | |
| "learning_rate": 7.4897127846385005e-06, | |
| "loss": 0.0367, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.6717044500419815, | |
| "grad_norm": 0.729023277759552, | |
| "learning_rate": 7.362587098288277e-06, | |
| "loss": 0.0279, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.7276798208788133, | |
| "grad_norm": 0.38823333382606506, | |
| "learning_rate": 7.222675910973328e-06, | |
| "loss": 0.0226, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.7836551917156451, | |
| "grad_norm": 14.20788860321045, | |
| "learning_rate": 7.070511204339955e-06, | |
| "loss": 0.0285, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.8396305625524769, | |
| "grad_norm": 14.189713478088379, | |
| "learning_rate": 6.906671551357899e-06, | |
| "loss": 0.0183, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.8956059333893087, | |
| "grad_norm": 9.239374160766602, | |
| "learning_rate": 6.731779916423332e-06, | |
| "loss": 0.025, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.9515813042261405, | |
| "grad_norm": 7.430564880371094, | |
| "learning_rate": 6.546501286673185e-06, | |
| "loss": 0.0267, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.9986006157290792, | |
| "eval_accuracy": 0.9916548575027555, | |
| "eval_f1": 0.9918649270913277, | |
| "eval_loss": 0.03128722682595253, | |
| "eval_precision": 0.9874694376528117, | |
| "eval_recall": 0.996299722479186, | |
| "eval_runtime": 328.6993, | |
| "eval_samples_per_second": 19.322, | |
| "eval_steps_per_second": 1.208, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 1.0067170445004199, | |
| "grad_norm": 1.4463914632797241, | |
| "learning_rate": 6.351540143517212e-06, | |
| "loss": 0.0215, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.0626924153372517, | |
| "grad_norm": 0.02502119354903698, | |
| "learning_rate": 6.147637784001716e-06, | |
| "loss": 0.0062, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 1.1186677861740835, | |
| "grad_norm": 0.039681658148765564, | |
| "learning_rate": 5.935569502189897e-06, | |
| "loss": 0.0034, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.1746431570109153, | |
| "grad_norm": 0.005194108001887798, | |
| "learning_rate": 5.716141641275983e-06, | |
| "loss": 0.0087, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 1.230618527847747, | |
| "grad_norm": 0.17214158177375793, | |
| "learning_rate": 5.490188527641829e-06, | |
| "loss": 0.0156, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.2865938986845789, | |
| "grad_norm": 0.48181217908859253, | |
| "learning_rate": 5.258569298513601e-06, | |
| "loss": 0.0066, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 1.3425692695214106, | |
| "grad_norm": 0.12410200387239456, | |
| "learning_rate": 5.0221646352806285e-06, | |
| "loss": 0.0085, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.3985446403582424, | |
| "grad_norm": 0.03792842850089073, | |
| "learning_rate": 4.781873414897317e-06, | |
| "loss": 0.0179, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 1.4545200111950742, | |
| "grad_norm": 1.4860434532165527, | |
| "learning_rate": 4.538609292100398e-06, | |
| "loss": 0.0112, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 1.4970612930310663, | |
| "eval_accuracy": 0.9918123130215714, | |
| "eval_f1": 0.9920196439533456, | |
| "eval_loss": 0.03981148824095726, | |
| "eval_precision": 0.9874732661167125, | |
| "eval_recall": 0.9966080789392537, | |
| "eval_runtime": 329.583, | |
| "eval_samples_per_second": 19.27, | |
| "eval_steps_per_second": 1.205, | |
| "step": 1338 | |
| }, | |
| { | |
| "epoch": 1.5104953820319058, | |
| "grad_norm": 0.007759585976600647, | |
| "learning_rate": 4.293297225436905e-06, | |
| "loss": 0.0034, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 1.5664707528687378, | |
| "grad_norm": 0.02019183151423931, | |
| "learning_rate": 4.046869960311881e-06, | |
| "loss": 0.004, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 1.6224461237055694, | |
| "grad_norm": 0.03353915363550186, | |
| "learning_rate": 3.8002644824282683e-06, | |
| "loss": 0.0067, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 1.6784214945424014, | |
| "grad_norm": 3.9431345462799072, | |
| "learning_rate": 3.5544184551039885e-06, | |
| "loss": 0.0062, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.734396865379233, | |
| "grad_norm": 0.009291726164519787, | |
| "learning_rate": 3.3102666540125503e-06, | |
| "loss": 0.0044, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 1.790372236216065, | |
| "grad_norm": 0.024871619418263435, | |
| "learning_rate": 3.06873741290328e-06, | |
| "loss": 0.0044, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 1.8463476070528966, | |
| "grad_norm": 0.0040198941715061665, | |
| "learning_rate": 2.8307490938155686e-06, | |
| "loss": 0.0074, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 1.9023229778897286, | |
| "grad_norm": 0.010355140082538128, | |
| "learning_rate": 2.597206595208356e-06, | |
| "loss": 0.007, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 1.9582983487265602, | |
| "grad_norm": 0.05570561811327934, | |
| "learning_rate": 2.36899791128193e-06, | |
| "loss": 0.0114, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 1.996361600895606, | |
| "eval_accuracy": 0.9935443237285467, | |
| "eval_f1": 0.9936913371287891, | |
| "eval_loss": 0.03450547158718109, | |
| "eval_precision": 0.9917076167076168, | |
| "eval_recall": 0.9956830095590503, | |
| "eval_runtime": 329.5265, | |
| "eval_samples_per_second": 19.273, | |
| "eval_steps_per_second": 1.205, | |
| "step": 1784 | |
| }, | |
| { | |
| "epoch": 2.0134340890008398, | |
| "grad_norm": 0.0031412208918482065, | |
| "learning_rate": 2.146990755574473e-06, | |
| "loss": 0.0049, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 2.0694094598376713, | |
| "grad_norm": 0.022113706916570663, | |
| "learning_rate": 1.9320292616713783e-06, | |
| "loss": 0.0023, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 2.1253848306745033, | |
| "grad_norm": 0.013204299844801426, | |
| "learning_rate": 1.7249307735721739e-06, | |
| "loss": 0.0034, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 2.181360201511335, | |
| "grad_norm": 0.34572720527648926, | |
| "learning_rate": 1.526482737918981e-06, | |
| "loss": 0.0002, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 2.237335572348167, | |
| "grad_norm": 0.003737039864063263, | |
| "learning_rate": 1.337439709903123e-06, | |
| "loss": 0.0022, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 2.2933109431849985, | |
| "grad_norm": 0.0018848059698939323, | |
| "learning_rate": 1.1585204842342755e-06, | |
| "loss": 0.0001, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 2.3492863140218305, | |
| "grad_norm": 0.0221543088555336, | |
| "learning_rate": 9.904053620810118e-07, | |
| "loss": 0.0002, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 2.405261684858662, | |
| "grad_norm": 0.0014764212537556887, | |
| "learning_rate": 8.337335643746119e-07, | |
| "loss": 0.003, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 2.461237055695494, | |
| "grad_norm": 0.24720342457294464, | |
| "learning_rate": 6.891008013114716e-07, | |
| "loss": 0.0025, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 2.494822278197593, | |
| "eval_accuracy": 0.9940166902849945, | |
| "eval_f1": 0.9941502463054187, | |
| "eval_loss": 0.034027792513370514, | |
| "eval_precision": 0.9926221948970181, | |
| "eval_recall": 0.9956830095590503, | |
| "eval_runtime": 328.5789, | |
| "eval_samples_per_second": 19.329, | |
| "eval_steps_per_second": 1.208, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 2.5172124265323257, | |
| "grad_norm": 0.04605395719408989, | |
| "learning_rate": 5.570570072955551e-07, | |
| "loss": 0.0017, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 2.5731877973691577, | |
| "grad_norm": 0.0023472148459404707, | |
| "learning_rate": 4.3810424993327053e-07, | |
| "loss": 0.0018, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 2.6291631682059893, | |
| "grad_norm": 0.0014394799945876002, | |
| "learning_rate": 3.326948210313607e-07, | |
| "loss": 0.0015, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 2.6851385390428213, | |
| "grad_norm": 0.029553143307566643, | |
| "learning_rate": 2.412295168563667e-07, | |
| "loss": 0.0007, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 2.741113909879653, | |
| "grad_norm": 0.0062004872597754, | |
| "learning_rate": 1.6405611419461017e-07, | |
| "loss": 0.0002, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 2.797089280716485, | |
| "grad_norm": 1.4929763078689575, | |
| "learning_rate": 1.0146804800713304e-07, | |
| "loss": 0.0001, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 2.8530646515533165, | |
| "grad_norm": 0.0059923469088971615, | |
| "learning_rate": 5.3703295707523854e-08, | |
| "loss": 0.0011, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 2.9090400223901485, | |
| "grad_norm": 0.00393084529787302, | |
| "learning_rate": 2.094347230492799e-08, | |
| "loss": 0.0014, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 2.96501539322698, | |
| "grad_norm": 0.0017132419161498547, | |
| "learning_rate": 3.3131398527510567e-09, | |
| "loss": 0.0017, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 2.9941225860621326, | |
| "eval_accuracy": 0.9938592347661785, | |
| "eval_f1": 0.9939935314954567, | |
| "eval_loss": 0.033541660755872726, | |
| "eval_precision": 0.9929230769230769, | |
| "eval_recall": 0.9950662966389145, | |
| "eval_runtime": 329.149, | |
| "eval_samples_per_second": 19.295, | |
| "eval_steps_per_second": 1.206, | |
| "step": 2676 | |
| } | |
| ], | |
| "logging_steps": 50, | |
| "max_steps": 2682, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 892, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.189990278329344e+17, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |