diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,22192 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 5.0, + "eval_steps": 500, + "global_step": 2215, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "contrastive_loss": 1.7576, + "epoch": 0.002257336343115124, + "grad_norm": 83.0724105834961, + "learning_rate": 2.0000000000000002e-07, + "lm_loss": 18.2929, + "loss": 5.5681, + "step": 1, + "text_contrastive_loss": 3.9624 + }, + { + "contrastive_loss": 1.7382, + "epoch": 0.004514672686230248, + "grad_norm": 80.65425109863281, + "learning_rate": 4.0000000000000003e-07, + "lm_loss": 18.5957, + "loss": 5.1631, + "step": 2, + "text_contrastive_loss": 3.1305 + }, + { + "contrastive_loss": 1.4854, + "epoch": 0.006772009029345372, + "grad_norm": 73.6375961303711, + "learning_rate": 6.000000000000001e-07, + "lm_loss": 17.8023, + "loss": 4.828, + "step": 3, + "text_contrastive_loss": 3.1249 + }, + { + "contrastive_loss": 1.4299, + "epoch": 0.009029345372460496, + "grad_norm": 68.13858032226562, + "learning_rate": 8.000000000000001e-07, + "lm_loss": 17.6495, + "loss": 4.8285, + "step": 4, + "text_contrastive_loss": 3.2673 + }, + { + "contrastive_loss": 1.4406, + "epoch": 0.011286681715575621, + "grad_norm": 57.55910110473633, + "learning_rate": 1.0000000000000002e-06, + "lm_loss": 17.7182, + "loss": 4.4308, + "step": 5, + "text_contrastive_loss": 2.4367 + }, + { + "contrastive_loss": 1.6318, + "epoch": 0.013544018058690745, + "grad_norm": 68.49832153320312, + "learning_rate": 1.2000000000000002e-06, + "lm_loss": 17.6306, + "loss": 5.1547, + "step": 6, + "text_contrastive_loss": 3.5197 + }, + { + "contrastive_loss": 1.4941, + "epoch": 0.01580135440180587, + "grad_norm": 63.47561264038086, + "learning_rate": 1.4000000000000001e-06, + "lm_loss": 17.8862, + "loss": 4.6936, + "step": 7, + "text_contrastive_loss": 2.8219 + }, + { + "contrastive_loss": 1.3978, + "epoch": 0.01805869074492099, + "grad_norm": 52.5750617980957, + "learning_rate": 1.6000000000000001e-06, + "lm_loss": 17.6667, + "loss": 4.4269, + "step": 8, + "text_contrastive_loss": 2.5248 + }, + { + "contrastive_loss": 1.3932, + "epoch": 0.020316027088036117, + "grad_norm": 58.56637954711914, + "learning_rate": 1.8000000000000001e-06, + "lm_loss": 17.3383, + "loss": 4.5758, + "step": 9, + "text_contrastive_loss": 2.8975 + }, + { + "contrastive_loss": 1.388, + "epoch": 0.022573363431151242, + "grad_norm": 51.45430374145508, + "learning_rate": 2.0000000000000003e-06, + "lm_loss": 17.4333, + "loss": 4.1629, + "step": 10, + "text_contrastive_loss": 2.063 + }, + { + "contrastive_loss": 1.453, + "epoch": 0.024830699774266364, + "grad_norm": 52.92259979248047, + "learning_rate": 2.2e-06, + "lm_loss": 16.8459, + "loss": 4.4766, + "step": 11, + "text_contrastive_loss": 2.6781 + }, + { + "contrastive_loss": 1.4529, + "epoch": 0.02708803611738149, + "grad_norm": 47.33721923828125, + "learning_rate": 2.4000000000000003e-06, + "lm_loss": 16.5753, + "loss": 4.1433, + "step": 12, + "text_contrastive_loss": 2.0658 + }, + { + "contrastive_loss": 1.3384, + "epoch": 0.029345372460496615, + "grad_norm": 43.125755310058594, + "learning_rate": 2.6e-06, + "lm_loss": 16.4345, + "loss": 4.1263, + "step": 13, + "text_contrastive_loss": 2.289 + }, + { + "contrastive_loss": 1.2533, + "epoch": 0.03160270880361174, + "grad_norm": 40.11970138549805, + "learning_rate": 2.8000000000000003e-06, + "lm_loss": 16.2871, + "loss": 3.9051, + "step": 14, + "text_contrastive_loss": 2.0462 + }, + { + "contrastive_loss": 1.3104, + "epoch": 0.033860045146726865, + "grad_norm": 36.58595275878906, + "learning_rate": 3e-06, + "lm_loss": 15.9117, + "loss": 3.8372, + "step": 15, + "text_contrastive_loss": 1.8711 + }, + { + "contrastive_loss": 1.3056, + "epoch": 0.03611738148984198, + "grad_norm": 35.10764694213867, + "learning_rate": 3.2000000000000003e-06, + "lm_loss": 15.4712, + "loss": 3.7313, + "step": 16, + "text_contrastive_loss": 1.7571 + }, + { + "contrastive_loss": 1.3351, + "epoch": 0.03837471783295711, + "grad_norm": 32.6670036315918, + "learning_rate": 3.4000000000000005e-06, + "lm_loss": 15.0543, + "loss": 3.6613, + "step": 17, + "text_contrastive_loss": 1.6414 + }, + { + "contrastive_loss": 1.4445, + "epoch": 0.040632054176072234, + "grad_norm": 37.94453811645508, + "learning_rate": 3.6000000000000003e-06, + "lm_loss": 14.7468, + "loss": 3.8162, + "step": 18, + "text_contrastive_loss": 1.794 + }, + { + "contrastive_loss": 1.3024, + "epoch": 0.04288939051918736, + "grad_norm": 30.52195930480957, + "learning_rate": 3.8000000000000005e-06, + "lm_loss": 14.5291, + "loss": 3.5248, + "step": 19, + "text_contrastive_loss": 1.539 + }, + { + "contrastive_loss": 1.3973, + "epoch": 0.045146726862302484, + "grad_norm": 28.206356048583984, + "learning_rate": 4.000000000000001e-06, + "lm_loss": 14.1728, + "loss": 3.5596, + "step": 20, + "text_contrastive_loss": 1.49 + }, + { + "contrastive_loss": 1.3414, + "epoch": 0.04740406320541761, + "grad_norm": 28.01343536376953, + "learning_rate": 4.2000000000000004e-06, + "lm_loss": 13.6672, + "loss": 3.5034, + "step": 21, + "text_contrastive_loss": 1.5904 + }, + { + "contrastive_loss": 1.4096, + "epoch": 0.04966139954853273, + "grad_norm": 30.40696907043457, + "learning_rate": 4.4e-06, + "lm_loss": 13.4134, + "loss": 3.4848, + "step": 22, + "text_contrastive_loss": 1.4676 + }, + { + "contrastive_loss": 1.3485, + "epoch": 0.05191873589164785, + "grad_norm": 27.267833709716797, + "learning_rate": 4.600000000000001e-06, + "lm_loss": 13.0796, + "loss": 3.3649, + "step": 23, + "text_contrastive_loss": 1.4167 + }, + { + "contrastive_loss": 1.2928, + "epoch": 0.05417607223476298, + "grad_norm": 26.30367660522461, + "learning_rate": 4.800000000000001e-06, + "lm_loss": 12.588, + "loss": 3.2157, + "step": 24, + "text_contrastive_loss": 1.3283 + }, + { + "contrastive_loss": 1.1886, + "epoch": 0.056433408577878104, + "grad_norm": 25.767690658569336, + "learning_rate": 5e-06, + "lm_loss": 12.3093, + "loss": 3.0822, + "step": 25, + "text_contrastive_loss": 1.3254 + }, + { + "contrastive_loss": 1.3105, + "epoch": 0.05869074492099323, + "grad_norm": 26.667863845825195, + "learning_rate": 5.2e-06, + "lm_loss": 11.9948, + "loss": 3.1951, + "step": 26, + "text_contrastive_loss": 1.3703 + }, + { + "contrastive_loss": 1.3686, + "epoch": 0.060948081264108354, + "grad_norm": 24.12013816833496, + "learning_rate": 5.400000000000001e-06, + "lm_loss": 11.6429, + "loss": 3.1517, + "step": 27, + "text_contrastive_loss": 1.2377 + }, + { + "contrastive_loss": 1.2975, + "epoch": 0.06320541760722348, + "grad_norm": 21.883312225341797, + "learning_rate": 5.600000000000001e-06, + "lm_loss": 11.3803, + "loss": 3.1306, + "step": 28, + "text_contrastive_loss": 1.3901 + }, + { + "contrastive_loss": 1.3535, + "epoch": 0.0654627539503386, + "grad_norm": 25.86800765991211, + "learning_rate": 5.8e-06, + "lm_loss": 10.9448, + "loss": 3.0264, + "step": 29, + "text_contrastive_loss": 1.1568 + }, + { + "contrastive_loss": 1.3445, + "epoch": 0.06772009029345373, + "grad_norm": 23.156831741333008, + "learning_rate": 6e-06, + "lm_loss": 10.6367, + "loss": 3.1278, + "step": 30, + "text_contrastive_loss": 1.4392 + }, + { + "contrastive_loss": 1.1378, + "epoch": 0.06997742663656885, + "grad_norm": 21.37388038635254, + "learning_rate": 6.200000000000001e-06, + "lm_loss": 10.5811, + "loss": 2.7304, + "step": 31, + "text_contrastive_loss": 1.069 + }, + { + "contrastive_loss": 1.2668, + "epoch": 0.07223476297968397, + "grad_norm": 22.608537673950195, + "learning_rate": 6.4000000000000006e-06, + "lm_loss": 10.1412, + "loss": 2.9049, + "step": 32, + "text_contrastive_loss": 1.248 + }, + { + "contrastive_loss": 1.1991, + "epoch": 0.0744920993227991, + "grad_norm": 20.33625030517578, + "learning_rate": 6.600000000000001e-06, + "lm_loss": 10.1149, + "loss": 2.7904, + "step": 33, + "text_contrastive_loss": 1.1597 + }, + { + "contrastive_loss": 1.2407, + "epoch": 0.07674943566591422, + "grad_norm": 22.88743019104004, + "learning_rate": 6.800000000000001e-06, + "lm_loss": 9.8612, + "loss": 2.8439, + "step": 34, + "text_contrastive_loss": 1.2343 + }, + { + "contrastive_loss": 1.1218, + "epoch": 0.07900677200902935, + "grad_norm": 22.587696075439453, + "learning_rate": 7e-06, + "lm_loss": 9.5444, + "loss": 2.6075, + "step": 35, + "text_contrastive_loss": 1.0625 + }, + { + "contrastive_loss": 1.2272, + "epoch": 0.08126410835214447, + "grad_norm": 22.853036880493164, + "learning_rate": 7.2000000000000005e-06, + "lm_loss": 9.3847, + "loss": 2.7395, + "step": 36, + "text_contrastive_loss": 1.1476 + }, + { + "contrastive_loss": 1.2236, + "epoch": 0.0835214446952596, + "grad_norm": 23.60159683227539, + "learning_rate": 7.4e-06, + "lm_loss": 9.1248, + "loss": 2.7053, + "step": 37, + "text_contrastive_loss": 1.1384 + }, + { + "contrastive_loss": 1.1706, + "epoch": 0.08577878103837472, + "grad_norm": 25.40546417236328, + "learning_rate": 7.600000000000001e-06, + "lm_loss": 9.15, + "loss": 2.7461, + "step": 38, + "text_contrastive_loss": 1.321 + }, + { + "contrastive_loss": 1.0864, + "epoch": 0.08803611738148984, + "grad_norm": 25.925676345825195, + "learning_rate": 7.800000000000002e-06, + "lm_loss": 9.0864, + "loss": 2.5104, + "step": 39, + "text_contrastive_loss": 1.0307 + }, + { + "contrastive_loss": 0.999, + "epoch": 0.09029345372460497, + "grad_norm": 23.36672592163086, + "learning_rate": 8.000000000000001e-06, + "lm_loss": 8.905, + "loss": 2.4223, + "step": 40, + "text_contrastive_loss": 1.0656 + }, + { + "contrastive_loss": 1.1439, + "epoch": 0.09255079006772009, + "grad_norm": 24.166732788085938, + "learning_rate": 8.2e-06, + "lm_loss": 8.8927, + "loss": 2.6421, + "step": 41, + "text_contrastive_loss": 1.218 + }, + { + "contrastive_loss": 1.2004, + "epoch": 0.09480812641083522, + "grad_norm": 25.256484985351562, + "learning_rate": 8.400000000000001e-06, + "lm_loss": 8.6496, + "loss": 2.656, + "step": 42, + "text_contrastive_loss": 1.1813 + }, + { + "contrastive_loss": 1.0696, + "epoch": 0.09706546275395034, + "grad_norm": 22.554882049560547, + "learning_rate": 8.6e-06, + "lm_loss": 8.5634, + "loss": 2.4508, + "step": 43, + "text_contrastive_loss": 1.0498 + }, + { + "contrastive_loss": 1.1158, + "epoch": 0.09932279909706546, + "grad_norm": 23.315597534179688, + "learning_rate": 8.8e-06, + "lm_loss": 8.4833, + "loss": 2.5364, + "step": 44, + "text_contrastive_loss": 1.1445 + }, + { + "contrastive_loss": 1.169, + "epoch": 0.10158013544018059, + "grad_norm": 26.591489791870117, + "learning_rate": 9e-06, + "lm_loss": 8.4115, + "loss": 2.6611, + "step": 45, + "text_contrastive_loss": 1.302 + }, + { + "contrastive_loss": 1.0582, + "epoch": 0.1038374717832957, + "grad_norm": 25.223093032836914, + "learning_rate": 9.200000000000002e-06, + "lm_loss": 8.3529, + "loss": 2.4645, + "step": 46, + "text_contrastive_loss": 1.142 + }, + { + "contrastive_loss": 1.0643, + "epoch": 0.10609480812641084, + "grad_norm": 21.448993682861328, + "learning_rate": 9.4e-06, + "lm_loss": 8.3398, + "loss": 2.4296, + "step": 47, + "text_contrastive_loss": 1.0627 + }, + { + "contrastive_loss": 1.1898, + "epoch": 0.10835214446952596, + "grad_norm": 22.973129272460938, + "learning_rate": 9.600000000000001e-06, + "lm_loss": 8.2453, + "loss": 2.5315, + "step": 48, + "text_contrastive_loss": 1.0344 + }, + { + "contrastive_loss": 1.1033, + "epoch": 0.11060948081264109, + "grad_norm": 24.533485412597656, + "learning_rate": 9.800000000000001e-06, + "lm_loss": 8.211, + "loss": 2.4239, + "step": 49, + "text_contrastive_loss": 0.9989 + }, + { + "contrastive_loss": 0.9491, + "epoch": 0.11286681715575621, + "grad_norm": 20.077064514160156, + "learning_rate": 1e-05, + "lm_loss": 8.1512, + "loss": 2.283, + "step": 50, + "text_contrastive_loss": 1.0376 + }, + { + "contrastive_loss": 1.137, + "epoch": 0.11512415349887133, + "grad_norm": 21.466537475585938, + "learning_rate": 9.999994735903083e-06, + "lm_loss": 8.0854, + "loss": 2.4723, + "step": 51, + "text_contrastive_loss": 1.0535 + }, + { + "contrastive_loss": 1.0502, + "epoch": 0.11738148984198646, + "grad_norm": 23.226255416870117, + "learning_rate": 9.999978943623417e-06, + "lm_loss": 7.9951, + "loss": 2.4097, + "step": 52, + "text_contrastive_loss": 1.12 + }, + { + "contrastive_loss": 0.9545, + "epoch": 0.11963882618510158, + "grad_norm": 20.001995086669922, + "learning_rate": 9.999952623194252e-06, + "lm_loss": 7.9469, + "loss": 2.2316, + "step": 53, + "text_contrastive_loss": 0.9648 + }, + { + "contrastive_loss": 1.0112, + "epoch": 0.12189616252821671, + "grad_norm": 25.90329360961914, + "learning_rate": 9.999915774671009e-06, + "lm_loss": 7.933, + "loss": 2.3686, + "step": 54, + "text_contrastive_loss": 1.1281 + }, + { + "contrastive_loss": 1.0171, + "epoch": 0.12415349887133183, + "grad_norm": 21.86749839782715, + "learning_rate": 9.999868398131282e-06, + "lm_loss": 8.0448, + "loss": 2.396, + "step": 55, + "text_contrastive_loss": 1.1488 + }, + { + "contrastive_loss": 0.8968, + "epoch": 0.12641083521444696, + "grad_norm": 22.552684783935547, + "learning_rate": 9.999810493674826e-06, + "lm_loss": 7.9022, + "loss": 2.1881, + "step": 56, + "text_contrastive_loss": 1.0021 + }, + { + "contrastive_loss": 0.9698, + "epoch": 0.12866817155756208, + "grad_norm": 23.647756576538086, + "learning_rate": 9.999742061423567e-06, + "lm_loss": 7.9914, + "loss": 2.3512, + "step": 57, + "text_contrastive_loss": 1.1645 + }, + { + "contrastive_loss": 1.0828, + "epoch": 0.1309255079006772, + "grad_norm": 20.83641815185547, + "learning_rate": 9.999663101521599e-06, + "lm_loss": 7.8775, + "loss": 2.401, + "step": 58, + "text_contrastive_loss": 1.0609 + }, + { + "contrastive_loss": 0.9098, + "epoch": 0.13318284424379231, + "grad_norm": 19.888952255249023, + "learning_rate": 9.999573614135183e-06, + "lm_loss": 7.8223, + "loss": 2.1864, + "step": 59, + "text_contrastive_loss": 0.9888 + }, + { + "contrastive_loss": 1.0804, + "epoch": 0.13544018058690746, + "grad_norm": 23.106822967529297, + "learning_rate": 9.999473599452746e-06, + "lm_loss": 7.6884, + "loss": 2.411, + "step": 60, + "text_contrastive_loss": 1.1234 + }, + { + "contrastive_loss": 0.962, + "epoch": 0.13769751693002258, + "grad_norm": 21.09849739074707, + "learning_rate": 9.999363057684885e-06, + "lm_loss": 7.7574, + "loss": 2.2024, + "step": 61, + "text_contrastive_loss": 0.9293 + }, + { + "contrastive_loss": 1.1026, + "epoch": 0.1399548532731377, + "grad_norm": 22.664743423461914, + "learning_rate": 9.999241989064358e-06, + "lm_loss": 7.6629, + "loss": 2.3475, + "step": 62, + "text_contrastive_loss": 0.9571 + }, + { + "contrastive_loss": 1.0655, + "epoch": 0.14221218961625282, + "grad_norm": 21.019250869750977, + "learning_rate": 9.999110393846097e-06, + "lm_loss": 7.6433, + "loss": 2.4061, + "step": 63, + "text_contrastive_loss": 1.1526 + }, + { + "contrastive_loss": 0.94, + "epoch": 0.14446952595936793, + "grad_norm": 22.08234977722168, + "learning_rate": 9.998968272307187e-06, + "lm_loss": 7.6577, + "loss": 2.18, + "step": 64, + "text_contrastive_loss": 0.9484 + }, + { + "contrastive_loss": 1.045, + "epoch": 0.14672686230248308, + "grad_norm": 20.573204040527344, + "learning_rate": 9.99881562474689e-06, + "lm_loss": 7.5849, + "loss": 2.3881, + "step": 65, + "text_contrastive_loss": 1.1692 + }, + { + "contrastive_loss": 0.9304, + "epoch": 0.1489841986455982, + "grad_norm": 19.680683135986328, + "learning_rate": 9.998652451486626e-06, + "lm_loss": 7.5698, + "loss": 2.2469, + "step": 66, + "text_contrastive_loss": 1.119 + }, + { + "contrastive_loss": 1.0139, + "epoch": 0.15124153498871332, + "grad_norm": 21.903793334960938, + "learning_rate": 9.998478752869976e-06, + "lm_loss": 7.5142, + "loss": 2.2693, + "step": 67, + "text_contrastive_loss": 1.0078 + }, + { + "contrastive_loss": 1.0315, + "epoch": 0.15349887133182843, + "grad_norm": 21.78392219543457, + "learning_rate": 9.998294529262688e-06, + "lm_loss": 7.4581, + "loss": 2.3674, + "step": 68, + "text_contrastive_loss": 1.1802 + }, + { + "contrastive_loss": 1.0517, + "epoch": 0.15575620767494355, + "grad_norm": 22.221651077270508, + "learning_rate": 9.998099781052673e-06, + "lm_loss": 7.4321, + "loss": 2.34, + "step": 69, + "text_contrastive_loss": 1.0903 + }, + { + "contrastive_loss": 0.8489, + "epoch": 0.1580135440180587, + "grad_norm": 18.843486785888672, + "learning_rate": 9.997894508649995e-06, + "lm_loss": 7.5177, + "loss": 2.0465, + "step": 70, + "text_contrastive_loss": 0.8917 + }, + { + "contrastive_loss": 0.9347, + "epoch": 0.16027088036117382, + "grad_norm": 21.679800033569336, + "learning_rate": 9.997678712486889e-06, + "lm_loss": 7.4389, + "loss": 2.1878, + "step": 71, + "text_contrastive_loss": 1.0185 + }, + { + "contrastive_loss": 0.9222, + "epoch": 0.16252821670428894, + "grad_norm": 21.853797912597656, + "learning_rate": 9.99745239301774e-06, + "lm_loss": 7.4099, + "loss": 2.1896, + "step": 72, + "text_contrastive_loss": 1.0527 + }, + { + "contrastive_loss": 0.936, + "epoch": 0.16478555304740405, + "grad_norm": 19.19109344482422, + "learning_rate": 9.997215550719097e-06, + "lm_loss": 7.504, + "loss": 2.1866, + "step": 73, + "text_contrastive_loss": 1.0003 + }, + { + "contrastive_loss": 1.017, + "epoch": 0.1670428893905192, + "grad_norm": 23.202587127685547, + "learning_rate": 9.996968186089664e-06, + "lm_loss": 7.5318, + "loss": 2.3209, + "step": 74, + "text_contrastive_loss": 1.1015 + }, + { + "contrastive_loss": 0.8918, + "epoch": 0.16930022573363432, + "grad_norm": 18.873233795166016, + "learning_rate": 9.996710299650302e-06, + "lm_loss": 7.3373, + "loss": 2.1512, + "step": 75, + "text_contrastive_loss": 1.0514 + }, + { + "contrastive_loss": 1.0035, + "epoch": 0.17155756207674944, + "grad_norm": 22.463476181030273, + "learning_rate": 9.996441891944023e-06, + "lm_loss": 7.4114, + "loss": 2.3061, + "step": 76, + "text_contrastive_loss": 1.1229 + }, + { + "contrastive_loss": 1.0282, + "epoch": 0.17381489841986456, + "grad_norm": 21.54694366455078, + "learning_rate": 9.996162963536004e-06, + "lm_loss": 7.3556, + "loss": 2.2261, + "step": 77, + "text_contrastive_loss": 0.9248 + }, + { + "contrastive_loss": 1.0397, + "epoch": 0.17607223476297967, + "grad_norm": 22.61308479309082, + "learning_rate": 9.995873515013562e-06, + "lm_loss": 7.2681, + "loss": 2.382, + "step": 78, + "text_contrastive_loss": 1.2308 + }, + { + "contrastive_loss": 0.9218, + "epoch": 0.17832957110609482, + "grad_norm": 20.243572235107422, + "learning_rate": 9.99557354698617e-06, + "lm_loss": 7.3342, + "loss": 2.2013, + "step": 79, + "text_contrastive_loss": 1.0922 + }, + { + "contrastive_loss": 0.9145, + "epoch": 0.18058690744920994, + "grad_norm": 20.536954879760742, + "learning_rate": 9.995263060085456e-06, + "lm_loss": 7.3494, + "loss": 2.1959, + "step": 80, + "text_contrastive_loss": 1.0929 + }, + { + "contrastive_loss": 0.957, + "epoch": 0.18284424379232506, + "grad_norm": 21.177587509155273, + "learning_rate": 9.99494205496519e-06, + "lm_loss": 7.2084, + "loss": 2.2445, + "step": 81, + "text_contrastive_loss": 1.1331 + }, + { + "contrastive_loss": 0.7592, + "epoch": 0.18510158013544017, + "grad_norm": 18.126768112182617, + "learning_rate": 9.994610532301296e-06, + "lm_loss": 7.3328, + "loss": 1.9956, + "step": 82, + "text_contrastive_loss": 1.0064 + }, + { + "contrastive_loss": 1.0863, + "epoch": 0.1873589164785553, + "grad_norm": 20.58598518371582, + "learning_rate": 9.99426849279184e-06, + "lm_loss": 7.1922, + "loss": 2.3667, + "step": 83, + "text_contrastive_loss": 1.1222 + }, + { + "contrastive_loss": 1.0172, + "epoch": 0.18961625282167044, + "grad_norm": 21.558956146240234, + "learning_rate": 9.993915937157033e-06, + "lm_loss": 7.2709, + "loss": 2.2399, + "step": 84, + "text_contrastive_loss": 0.9911 + }, + { + "contrastive_loss": 1.0567, + "epoch": 0.19187358916478556, + "grad_norm": 20.75426483154297, + "learning_rate": 9.99355286613923e-06, + "lm_loss": 7.1795, + "loss": 2.407, + "step": 85, + "text_contrastive_loss": 1.2648 + }, + { + "contrastive_loss": 0.8501, + "epoch": 0.19413092550790068, + "grad_norm": 18.426937103271484, + "learning_rate": 9.993179280502926e-06, + "lm_loss": 7.1365, + "loss": 2.0173, + "step": 86, + "text_contrastive_loss": 0.9071 + }, + { + "contrastive_loss": 0.9691, + "epoch": 0.1963882618510158, + "grad_norm": 24.029739379882812, + "learning_rate": 9.99279518103476e-06, + "lm_loss": 7.3336, + "loss": 2.2099, + "step": 87, + "text_contrastive_loss": 1.0147 + }, + { + "contrastive_loss": 1.0107, + "epoch": 0.1986455981941309, + "grad_norm": 22.55620765686035, + "learning_rate": 9.992400568543506e-06, + "lm_loss": 7.1243, + "loss": 2.2311, + "step": 88, + "text_contrastive_loss": 1.0161 + }, + { + "contrastive_loss": 0.9813, + "epoch": 0.20090293453724606, + "grad_norm": 23.12877082824707, + "learning_rate": 9.991995443860074e-06, + "lm_loss": 7.0373, + "loss": 2.1753, + "step": 89, + "text_contrastive_loss": 0.9805 + }, + { + "contrastive_loss": 1.0067, + "epoch": 0.20316027088036118, + "grad_norm": 20.94490623474121, + "learning_rate": 9.991579807837511e-06, + "lm_loss": 7.0666, + "loss": 2.2587, + "step": 90, + "text_contrastive_loss": 1.0907 + }, + { + "contrastive_loss": 0.8441, + "epoch": 0.2054176072234763, + "grad_norm": 17.67084503173828, + "learning_rate": 9.991153661350996e-06, + "lm_loss": 7.0421, + "loss": 2.0348, + "step": 91, + "text_contrastive_loss": 0.9731 + }, + { + "contrastive_loss": 0.8157, + "epoch": 0.2076749435665914, + "grad_norm": 19.95044708251953, + "learning_rate": 9.990717005297841e-06, + "lm_loss": 7.139, + "loss": 2.1037, + "step": 92, + "text_contrastive_loss": 1.1482 + }, + { + "contrastive_loss": 0.9876, + "epoch": 0.20993227990970656, + "grad_norm": 19.68552017211914, + "learning_rate": 9.990269840597484e-06, + "lm_loss": 7.0277, + "loss": 2.1938, + "step": 93, + "text_contrastive_loss": 1.0068 + }, + { + "contrastive_loss": 0.8988, + "epoch": 0.21218961625282168, + "grad_norm": 18.531797409057617, + "learning_rate": 9.989812168191495e-06, + "lm_loss": 7.0713, + "loss": 2.0347, + "step": 94, + "text_contrastive_loss": 0.8575 + }, + { + "contrastive_loss": 1.0284, + "epoch": 0.2144469525959368, + "grad_norm": 21.68889045715332, + "learning_rate": 9.989343989043563e-06, + "lm_loss": 6.9658, + "loss": 2.2725, + "step": 95, + "text_contrastive_loss": 1.095 + }, + { + "contrastive_loss": 0.9913, + "epoch": 0.21670428893905191, + "grad_norm": 23.28238868713379, + "learning_rate": 9.988865304139509e-06, + "lm_loss": 7.0698, + "loss": 2.2637, + "step": 96, + "text_contrastive_loss": 1.1309 + }, + { + "contrastive_loss": 1.0535, + "epoch": 0.21896162528216703, + "grad_norm": 21.559371948242188, + "learning_rate": 9.988376114487264e-06, + "lm_loss": 7.0501, + "loss": 2.3147, + "step": 97, + "text_contrastive_loss": 1.1124 + }, + { + "contrastive_loss": 0.7835, + "epoch": 0.22121896162528218, + "grad_norm": 17.92148780822754, + "learning_rate": 9.98787642111689e-06, + "lm_loss": 6.9048, + "loss": 1.9187, + "step": 98, + "text_contrastive_loss": 0.8894 + }, + { + "contrastive_loss": 1.0026, + "epoch": 0.2234762979683973, + "grad_norm": 20.566707611083984, + "learning_rate": 9.98736622508056e-06, + "lm_loss": 7.0112, + "loss": 2.1804, + "step": 99, + "text_contrastive_loss": 0.9533 + }, + { + "contrastive_loss": 1.1049, + "epoch": 0.22573363431151242, + "grad_norm": 22.571447372436523, + "learning_rate": 9.98684552745256e-06, + "lm_loss": 6.8937, + "loss": 2.3282, + "step": 100, + "text_contrastive_loss": 1.0679 + }, + { + "contrastive_loss": 0.9926, + "epoch": 0.22799097065462753, + "grad_norm": 22.88062858581543, + "learning_rate": 9.986314329329294e-06, + "lm_loss": 6.9328, + "loss": 2.2112, + "step": 101, + "text_contrastive_loss": 1.0507 + }, + { + "contrastive_loss": 0.926, + "epoch": 0.23024830699774265, + "grad_norm": 23.333864212036133, + "learning_rate": 9.985772631829272e-06, + "lm_loss": 7.0156, + "loss": 2.0837, + "step": 102, + "text_contrastive_loss": 0.9122 + }, + { + "contrastive_loss": 0.8629, + "epoch": 0.2325056433408578, + "grad_norm": 19.736459732055664, + "learning_rate": 9.985220436093112e-06, + "lm_loss": 6.8497, + "loss": 2.1504, + "step": 103, + "text_contrastive_loss": 1.2051 + }, + { + "contrastive_loss": 0.9073, + "epoch": 0.23476297968397292, + "grad_norm": 22.719139099121094, + "learning_rate": 9.984657743283543e-06, + "lm_loss": 6.9904, + "loss": 2.1453, + "step": 104, + "text_contrastive_loss": 1.0779 + }, + { + "contrastive_loss": 1.0299, + "epoch": 0.23702031602708803, + "grad_norm": 24.38851547241211, + "learning_rate": 9.984084554585387e-06, + "lm_loss": 6.9032, + "loss": 2.2046, + "step": 105, + "text_contrastive_loss": 0.9688 + }, + { + "contrastive_loss": 0.7721, + "epoch": 0.23927765237020315, + "grad_norm": 19.937759399414062, + "learning_rate": 9.983500871205577e-06, + "lm_loss": 6.8874, + "loss": 1.8608, + "step": 106, + "text_contrastive_loss": 0.8 + }, + { + "contrastive_loss": 0.9991, + "epoch": 0.24153498871331827, + "grad_norm": 22.109214782714844, + "learning_rate": 9.982906694373136e-06, + "lm_loss": 6.8954, + "loss": 2.2567, + "step": 107, + "text_contrastive_loss": 1.1362 + }, + { + "contrastive_loss": 1.0091, + "epoch": 0.24379232505643342, + "grad_norm": 23.73514175415039, + "learning_rate": 9.98230202533919e-06, + "lm_loss": 6.8097, + "loss": 2.2244, + "step": 108, + "text_contrastive_loss": 1.0687 + }, + { + "contrastive_loss": 0.8579, + "epoch": 0.24604966139954854, + "grad_norm": 19.403186798095703, + "learning_rate": 9.98168686537695e-06, + "lm_loss": 6.8249, + "loss": 1.9528, + "step": 109, + "text_contrastive_loss": 0.8249 + }, + { + "contrastive_loss": 0.8255, + "epoch": 0.24830699774266365, + "grad_norm": 22.000633239746094, + "learning_rate": 9.98106121578172e-06, + "lm_loss": 6.841, + "loss": 2.0006, + "step": 110, + "text_contrastive_loss": 0.982 + }, + { + "contrastive_loss": 0.9317, + "epoch": 0.2505643340857788, + "grad_norm": 22.940288543701172, + "learning_rate": 9.980425077870895e-06, + "lm_loss": 6.8525, + "loss": 2.1067, + "step": 111, + "text_contrastive_loss": 0.9794 + }, + { + "contrastive_loss": 0.8398, + "epoch": 0.2528216704288939, + "grad_norm": 19.426469802856445, + "learning_rate": 9.979778452983949e-06, + "lm_loss": 6.8473, + "loss": 2.0275, + "step": 112, + "text_contrastive_loss": 1.006 + }, + { + "contrastive_loss": 0.9049, + "epoch": 0.255079006772009, + "grad_norm": 21.40607452392578, + "learning_rate": 9.979121342482442e-06, + "lm_loss": 6.7444, + "loss": 2.1078, + "step": 113, + "text_contrastive_loss": 1.0568 + }, + { + "contrastive_loss": 0.8558, + "epoch": 0.25733634311512416, + "grad_norm": 22.809553146362305, + "learning_rate": 9.978453747750012e-06, + "lm_loss": 6.8781, + "loss": 1.9855, + "step": 114, + "text_contrastive_loss": 0.8837 + }, + { + "contrastive_loss": 0.8186, + "epoch": 0.2595936794582393, + "grad_norm": 17.59353256225586, + "learning_rate": 9.977775670192373e-06, + "lm_loss": 6.7933, + "loss": 1.9924, + "step": 115, + "text_contrastive_loss": 0.989 + }, + { + "contrastive_loss": 0.9305, + "epoch": 0.2618510158013544, + "grad_norm": 21.62824249267578, + "learning_rate": 9.977087111237307e-06, + "lm_loss": 6.7723, + "loss": 2.145, + "step": 116, + "text_contrastive_loss": 1.0745 + }, + { + "contrastive_loss": 0.9281, + "epoch": 0.26410835214446954, + "grad_norm": 24.133895874023438, + "learning_rate": 9.976388072334674e-06, + "lm_loss": 6.7408, + "loss": 2.0355, + "step": 117, + "text_contrastive_loss": 0.8666 + }, + { + "contrastive_loss": 0.949, + "epoch": 0.26636568848758463, + "grad_norm": 20.17030906677246, + "learning_rate": 9.975678554956397e-06, + "lm_loss": 6.7308, + "loss": 2.0432, + "step": 118, + "text_contrastive_loss": 0.8423 + }, + { + "contrastive_loss": 0.8287, + "epoch": 0.2686230248306998, + "grad_norm": 19.101581573486328, + "learning_rate": 9.974958560596464e-06, + "lm_loss": 6.7608, + "loss": 1.9723, + "step": 119, + "text_contrastive_loss": 0.935 + }, + { + "contrastive_loss": 0.94, + "epoch": 0.2708803611738149, + "grad_norm": 20.302513122558594, + "learning_rate": 9.97422809077092e-06, + "lm_loss": 6.6856, + "loss": 2.1868, + "step": 120, + "text_contrastive_loss": 1.1564 + }, + { + "contrastive_loss": 0.836, + "epoch": 0.27313769751693, + "grad_norm": 18.526567459106445, + "learning_rate": 9.973487147017874e-06, + "lm_loss": 6.7732, + "loss": 1.998, + "step": 121, + "text_contrastive_loss": 0.9693 + }, + { + "contrastive_loss": 0.8308, + "epoch": 0.27539503386004516, + "grad_norm": 20.648765563964844, + "learning_rate": 9.972735730897484e-06, + "lm_loss": 6.7667, + "loss": 2.0166, + "step": 122, + "text_contrastive_loss": 1.0183 + }, + { + "contrastive_loss": 0.9621, + "epoch": 0.27765237020316025, + "grad_norm": 20.575937271118164, + "learning_rate": 9.97197384399196e-06, + "lm_loss": 6.7598, + "loss": 2.1682, + "step": 123, + "text_contrastive_loss": 1.0602 + }, + { + "contrastive_loss": 0.7394, + "epoch": 0.2799097065462754, + "grad_norm": 19.688508987426758, + "learning_rate": 9.971201487905563e-06, + "lm_loss": 6.7787, + "loss": 1.8177, + "step": 124, + "text_contrastive_loss": 0.8007 + }, + { + "contrastive_loss": 0.8498, + "epoch": 0.28216704288939054, + "grad_norm": 19.605121612548828, + "learning_rate": 9.970418664264596e-06, + "lm_loss": 6.7351, + "loss": 2.0701, + "step": 125, + "text_contrastive_loss": 1.0937 + }, + { + "contrastive_loss": 0.9438, + "epoch": 0.28442437923250563, + "grad_norm": 20.848880767822266, + "learning_rate": 9.969625374717401e-06, + "lm_loss": 6.6819, + "loss": 2.1431, + "step": 126, + "text_contrastive_loss": 1.0621 + }, + { + "contrastive_loss": 0.9114, + "epoch": 0.2866817155756208, + "grad_norm": 21.030580520629883, + "learning_rate": 9.96882162093436e-06, + "lm_loss": 6.6468, + "loss": 2.0173, + "step": 127, + "text_contrastive_loss": 0.8825 + }, + { + "contrastive_loss": 0.8366, + "epoch": 0.28893905191873587, + "grad_norm": 20.475706100463867, + "learning_rate": 9.968007404607887e-06, + "lm_loss": 6.6789, + "loss": 2.0272, + "step": 128, + "text_contrastive_loss": 1.0453 + }, + { + "contrastive_loss": 0.8921, + "epoch": 0.291196388261851, + "grad_norm": 20.923486709594727, + "learning_rate": 9.96718272745243e-06, + "lm_loss": 6.5704, + "loss": 2.0007, + "step": 129, + "text_contrastive_loss": 0.903 + }, + { + "contrastive_loss": 0.9031, + "epoch": 0.29345372460496616, + "grad_norm": 23.098770141601562, + "learning_rate": 9.966347591204459e-06, + "lm_loss": 6.6826, + "loss": 2.0489, + "step": 130, + "text_contrastive_loss": 0.9552 + }, + { + "contrastive_loss": 0.7868, + "epoch": 0.29571106094808125, + "grad_norm": 18.975332260131836, + "learning_rate": 9.96550199762247e-06, + "lm_loss": 6.602, + "loss": 1.9859, + "step": 131, + "text_contrastive_loss": 1.0778 + }, + { + "contrastive_loss": 0.8502, + "epoch": 0.2979683972911964, + "grad_norm": 19.30949592590332, + "learning_rate": 9.964645948486978e-06, + "lm_loss": 6.5967, + "loss": 1.9755, + "step": 132, + "text_contrastive_loss": 0.9313 + }, + { + "contrastive_loss": 0.9409, + "epoch": 0.3002257336343115, + "grad_norm": 19.818702697753906, + "learning_rate": 9.963779445600512e-06, + "lm_loss": 6.655, + "loss": 2.1949, + "step": 133, + "text_contrastive_loss": 1.1771 + }, + { + "contrastive_loss": 0.7461, + "epoch": 0.30248306997742663, + "grad_norm": 17.95615005493164, + "learning_rate": 9.962902490787616e-06, + "lm_loss": 6.7079, + "loss": 1.914, + "step": 134, + "text_contrastive_loss": 0.9942 + }, + { + "contrastive_loss": 0.8042, + "epoch": 0.3047404063205418, + "grad_norm": 18.942901611328125, + "learning_rate": 9.962015085894838e-06, + "lm_loss": 6.6384, + "loss": 1.9746, + "step": 135, + "text_contrastive_loss": 1.0131 + }, + { + "contrastive_loss": 0.9972, + "epoch": 0.30699774266365687, + "grad_norm": 24.471101760864258, + "learning_rate": 9.961117232790734e-06, + "lm_loss": 6.6302, + "loss": 2.1493, + "step": 136, + "text_contrastive_loss": 0.9783 + }, + { + "contrastive_loss": 0.9711, + "epoch": 0.309255079006772, + "grad_norm": 20.733802795410156, + "learning_rate": 9.960208933365857e-06, + "lm_loss": 6.7176, + "loss": 2.1471, + "step": 137, + "text_contrastive_loss": 1.0086 + }, + { + "contrastive_loss": 0.9169, + "epoch": 0.3115124153498871, + "grad_norm": 20.384777069091797, + "learning_rate": 9.959290189532757e-06, + "lm_loss": 6.5893, + "loss": 2.0997, + "step": 138, + "text_contrastive_loss": 1.0476 + }, + { + "contrastive_loss": 0.7578, + "epoch": 0.31376975169300225, + "grad_norm": 19.190717697143555, + "learning_rate": 9.958361003225979e-06, + "lm_loss": 6.5529, + "loss": 1.8754, + "step": 139, + "text_contrastive_loss": 0.9248 + }, + { + "contrastive_loss": 0.8863, + "epoch": 0.3160270880361174, + "grad_norm": 19.11601448059082, + "learning_rate": 9.957421376402053e-06, + "lm_loss": 6.575, + "loss": 2.0286, + "step": 140, + "text_contrastive_loss": 0.9696 + }, + { + "contrastive_loss": 0.7891, + "epoch": 0.3182844243792325, + "grad_norm": 19.93131446838379, + "learning_rate": 9.956471311039491e-06, + "lm_loss": 6.571, + "loss": 1.9056, + "step": 141, + "text_contrastive_loss": 0.9187 + }, + { + "contrastive_loss": 0.9544, + "epoch": 0.32054176072234764, + "grad_norm": 20.333568572998047, + "learning_rate": 9.95551080913879e-06, + "lm_loss": 6.5818, + "loss": 2.1021, + "step": 142, + "text_contrastive_loss": 0.979 + }, + { + "contrastive_loss": 0.8276, + "epoch": 0.3227990970654628, + "grad_norm": 19.743680953979492, + "learning_rate": 9.954539872722417e-06, + "lm_loss": 6.6458, + "loss": 1.8983, + "step": 143, + "text_contrastive_loss": 0.8123 + }, + { + "contrastive_loss": 0.9204, + "epoch": 0.32505643340857787, + "grad_norm": 20.866682052612305, + "learning_rate": 9.953558503834819e-06, + "lm_loss": 6.6503, + "loss": 2.1714, + "step": 144, + "text_contrastive_loss": 1.172 + }, + { + "contrastive_loss": 0.9538, + "epoch": 0.327313769751693, + "grad_norm": 19.932788848876953, + "learning_rate": 9.9525667045424e-06, + "lm_loss": 6.5406, + "loss": 2.0964, + "step": 145, + "text_contrastive_loss": 0.9772 + }, + { + "contrastive_loss": 0.7554, + "epoch": 0.3295711060948081, + "grad_norm": 18.704795837402344, + "learning_rate": 9.951564476933534e-06, + "lm_loss": 6.6267, + "loss": 1.8488, + "step": 146, + "text_contrastive_loss": 0.8613 + }, + { + "contrastive_loss": 0.8672, + "epoch": 0.33182844243792325, + "grad_norm": 18.536409378051758, + "learning_rate": 9.950551823118544e-06, + "lm_loss": 6.5201, + "loss": 1.9945, + "step": 147, + "text_contrastive_loss": 0.9506 + }, + { + "contrastive_loss": 0.926, + "epoch": 0.3340857787810384, + "grad_norm": 21.630870819091797, + "learning_rate": 9.949528745229721e-06, + "lm_loss": 6.5951, + "loss": 2.1363, + "step": 148, + "text_contrastive_loss": 1.1015 + }, + { + "contrastive_loss": 0.8198, + "epoch": 0.3363431151241535, + "grad_norm": 20.16887664794922, + "learning_rate": 9.948495245421294e-06, + "lm_loss": 6.6474, + "loss": 2.0023, + "step": 149, + "text_contrastive_loss": 1.0355 + }, + { + "contrastive_loss": 0.9098, + "epoch": 0.33860045146726864, + "grad_norm": 21.951305389404297, + "learning_rate": 9.94745132586944e-06, + "lm_loss": 6.5247, + "loss": 2.0433, + "step": 150, + "text_contrastive_loss": 0.962 + }, + { + "contrastive_loss": 0.7625, + "epoch": 0.34085778781038373, + "grad_norm": 17.890987396240234, + "learning_rate": 9.946396988772275e-06, + "lm_loss": 6.5289, + "loss": 1.8931, + "step": 151, + "text_contrastive_loss": 0.9554 + }, + { + "contrastive_loss": 0.8016, + "epoch": 0.3431151241534989, + "grad_norm": 21.804550170898438, + "learning_rate": 9.945332236349857e-06, + "lm_loss": 6.6814, + "loss": 1.8688, + "step": 152, + "text_contrastive_loss": 0.7981 + }, + { + "contrastive_loss": 0.8857, + "epoch": 0.345372460496614, + "grad_norm": 21.859079360961914, + "learning_rate": 9.944257070844165e-06, + "lm_loss": 6.5121, + "loss": 2.0385, + "step": 153, + "text_contrastive_loss": 1.0031 + }, + { + "contrastive_loss": 0.8998, + "epoch": 0.3476297968397291, + "grad_norm": 20.89244270324707, + "learning_rate": 9.943171494519111e-06, + "lm_loss": 6.4703, + "loss": 2.0058, + "step": 154, + "text_contrastive_loss": 0.9181 + }, + { + "contrastive_loss": 0.779, + "epoch": 0.34988713318284426, + "grad_norm": 20.04449462890625, + "learning_rate": 9.942075509660527e-06, + "lm_loss": 6.4538, + "loss": 1.9743, + "step": 155, + "text_contrastive_loss": 1.0998 + }, + { + "contrastive_loss": 0.7987, + "epoch": 0.35214446952595935, + "grad_norm": 19.918045043945312, + "learning_rate": 9.94096911857616e-06, + "lm_loss": 6.6398, + "loss": 1.956, + "step": 156, + "text_contrastive_loss": 0.9865 + }, + { + "contrastive_loss": 0.8872, + "epoch": 0.3544018058690745, + "grad_norm": 22.130380630493164, + "learning_rate": 9.939852323595671e-06, + "lm_loss": 6.5633, + "loss": 1.9952, + "step": 157, + "text_contrastive_loss": 0.9034 + }, + { + "contrastive_loss": 0.8786, + "epoch": 0.35665914221218964, + "grad_norm": 22.071300506591797, + "learning_rate": 9.938725127070628e-06, + "lm_loss": 6.5936, + "loss": 2.0969, + "step": 158, + "text_contrastive_loss": 1.118 + }, + { + "contrastive_loss": 0.8317, + "epoch": 0.35891647855530473, + "grad_norm": 20.15006446838379, + "learning_rate": 9.937587531374497e-06, + "lm_loss": 6.5906, + "loss": 1.9588, + "step": 159, + "text_contrastive_loss": 0.9361 + }, + { + "contrastive_loss": 0.7938, + "epoch": 0.3611738148984199, + "grad_norm": 20.539600372314453, + "learning_rate": 9.936439538902644e-06, + "lm_loss": 6.4727, + "loss": 1.8619, + "step": 160, + "text_contrastive_loss": 0.8417 + }, + { + "contrastive_loss": 0.7934, + "epoch": 0.36343115124153497, + "grad_norm": 19.92677116394043, + "learning_rate": 9.935281152072329e-06, + "lm_loss": 6.5515, + "loss": 1.9428, + "step": 161, + "text_contrastive_loss": 0.9885 + }, + { + "contrastive_loss": 0.8887, + "epoch": 0.3656884875846501, + "grad_norm": 19.666276931762695, + "learning_rate": 9.934112373322695e-06, + "lm_loss": 6.5895, + "loss": 2.1052, + "step": 162, + "text_contrastive_loss": 1.1152 + }, + { + "contrastive_loss": 0.8653, + "epoch": 0.36794582392776526, + "grad_norm": 24.452041625976562, + "learning_rate": 9.932933205114766e-06, + "lm_loss": 6.45, + "loss": 1.9736, + "step": 163, + "text_contrastive_loss": 0.9265 + }, + { + "contrastive_loss": 0.7489, + "epoch": 0.37020316027088035, + "grad_norm": 18.748714447021484, + "learning_rate": 9.931743649931446e-06, + "lm_loss": 6.4978, + "loss": 1.8218, + "step": 164, + "text_contrastive_loss": 0.8463 + }, + { + "contrastive_loss": 0.8158, + "epoch": 0.3724604966139955, + "grad_norm": 20.521987915039062, + "learning_rate": 9.93054371027751e-06, + "lm_loss": 6.465, + "loss": 2.0003, + "step": 165, + "text_contrastive_loss": 1.076 + }, + { + "contrastive_loss": 0.7755, + "epoch": 0.3747178329571106, + "grad_norm": 18.84111976623535, + "learning_rate": 9.929333388679593e-06, + "lm_loss": 6.5212, + "loss": 1.8329, + "step": 166, + "text_contrastive_loss": 0.8105 + }, + { + "contrastive_loss": 0.8448, + "epoch": 0.37697516930022573, + "grad_norm": 18.741506576538086, + "learning_rate": 9.928112687686197e-06, + "lm_loss": 6.5177, + "loss": 1.946, + "step": 167, + "text_contrastive_loss": 0.8989 + }, + { + "contrastive_loss": 0.7236, + "epoch": 0.3792325056433409, + "grad_norm": 17.72517204284668, + "learning_rate": 9.92688160986768e-06, + "lm_loss": 6.3992, + "loss": 1.7728, + "step": 168, + "text_contrastive_loss": 0.8185 + }, + { + "contrastive_loss": 0.8869, + "epoch": 0.38148984198645597, + "grad_norm": 21.22738265991211, + "learning_rate": 9.925640157816246e-06, + "lm_loss": 6.4493, + "loss": 1.9893, + "step": 169, + "text_contrastive_loss": 0.9149 + }, + { + "contrastive_loss": 0.8058, + "epoch": 0.3837471783295711, + "grad_norm": 19.492149353027344, + "learning_rate": 9.924388334145943e-06, + "lm_loss": 6.3983, + "loss": 1.9272, + "step": 170, + "text_contrastive_loss": 0.9632 + }, + { + "contrastive_loss": 0.7366, + "epoch": 0.3860045146726862, + "grad_norm": 19.459230422973633, + "learning_rate": 9.92312614149266e-06, + "lm_loss": 6.3751, + "loss": 1.8532, + "step": 171, + "text_contrastive_loss": 0.9582 + }, + { + "contrastive_loss": 0.8346, + "epoch": 0.38826185101580135, + "grad_norm": 20.554122924804688, + "learning_rate": 9.92185358251412e-06, + "lm_loss": 6.3351, + "loss": 1.9061, + "step": 172, + "text_contrastive_loss": 0.8758 + }, + { + "contrastive_loss": 0.7572, + "epoch": 0.3905191873589165, + "grad_norm": 19.220319747924805, + "learning_rate": 9.92057065988987e-06, + "lm_loss": 6.493, + "loss": 1.8178, + "step": 173, + "text_contrastive_loss": 0.8226 + }, + { + "contrastive_loss": 0.8348, + "epoch": 0.3927765237020316, + "grad_norm": 21.056917190551758, + "learning_rate": 9.919277376321284e-06, + "lm_loss": 6.4358, + "loss": 1.947, + "step": 174, + "text_contrastive_loss": 0.9373 + }, + { + "contrastive_loss": 0.8956, + "epoch": 0.39503386004514673, + "grad_norm": 19.383411407470703, + "learning_rate": 9.917973734531549e-06, + "lm_loss": 6.4578, + "loss": 1.9908, + "step": 175, + "text_contrastive_loss": 0.8988 + }, + { + "contrastive_loss": 0.7699, + "epoch": 0.3972911963882618, + "grad_norm": 19.435169219970703, + "learning_rate": 9.916659737265664e-06, + "lm_loss": 6.4342, + "loss": 1.917, + "step": 176, + "text_contrastive_loss": 1.0074 + }, + { + "contrastive_loss": 0.757, + "epoch": 0.39954853273137697, + "grad_norm": 21.105255126953125, + "learning_rate": 9.915335387290432e-06, + "lm_loss": 6.2528, + "loss": 1.7563, + "step": 177, + "text_contrastive_loss": 0.7481 + }, + { + "contrastive_loss": 0.7856, + "epoch": 0.4018058690744921, + "grad_norm": 19.341381072998047, + "learning_rate": 9.914000687394457e-06, + "lm_loss": 6.4932, + "loss": 1.8719, + "step": 178, + "text_contrastive_loss": 0.874 + }, + { + "contrastive_loss": 0.7868, + "epoch": 0.4040632054176072, + "grad_norm": 19.426422119140625, + "learning_rate": 9.912655640388134e-06, + "lm_loss": 6.3566, + "loss": 1.8835, + "step": 179, + "text_contrastive_loss": 0.9222 + }, + { + "contrastive_loss": 0.8502, + "epoch": 0.40632054176072235, + "grad_norm": 21.145036697387695, + "learning_rate": 9.911300249103646e-06, + "lm_loss": 6.4195, + "loss": 1.9542, + "step": 180, + "text_contrastive_loss": 0.9241 + }, + { + "contrastive_loss": 0.8322, + "epoch": 0.40857787810383744, + "grad_norm": 18.03723907470703, + "learning_rate": 9.909934516394957e-06, + "lm_loss": 6.2954, + "loss": 2.0169, + "step": 181, + "text_contrastive_loss": 1.1104 + }, + { + "contrastive_loss": 1.003, + "epoch": 0.4108352144469526, + "grad_norm": 20.614885330200195, + "learning_rate": 9.908558445137807e-06, + "lm_loss": 6.4824, + "loss": 2.1626, + "step": 182, + "text_contrastive_loss": 1.0227 + }, + { + "contrastive_loss": 0.7777, + "epoch": 0.41309255079006774, + "grad_norm": 20.063800811767578, + "learning_rate": 9.907172038229706e-06, + "lm_loss": 6.3171, + "loss": 1.8316, + "step": 183, + "text_contrastive_loss": 0.8445 + }, + { + "contrastive_loss": 0.8547, + "epoch": 0.4153498871331828, + "grad_norm": 18.237977981567383, + "learning_rate": 9.905775298589923e-06, + "lm_loss": 6.4206, + "loss": 1.9172, + "step": 184, + "text_contrastive_loss": 0.8408 + }, + { + "contrastive_loss": 0.8484, + "epoch": 0.417607223476298, + "grad_norm": 20.95216941833496, + "learning_rate": 9.904368229159494e-06, + "lm_loss": 6.3509, + "loss": 1.945, + "step": 185, + "text_contrastive_loss": 0.9229 + }, + { + "contrastive_loss": 0.8764, + "epoch": 0.4198645598194131, + "grad_norm": 19.93537712097168, + "learning_rate": 9.90295083290119e-06, + "lm_loss": 6.6118, + "loss": 2.0261, + "step": 186, + "text_contrastive_loss": 0.977 + }, + { + "contrastive_loss": 1.0481, + "epoch": 0.4221218961625282, + "grad_norm": 22.335296630859375, + "learning_rate": 9.901523112799543e-06, + "lm_loss": 6.3017, + "loss": 2.2153, + "step": 187, + "text_contrastive_loss": 1.0739 + }, + { + "contrastive_loss": 0.9157, + "epoch": 0.42437923250564336, + "grad_norm": 21.24988555908203, + "learning_rate": 9.90008507186081e-06, + "lm_loss": 6.2699, + "loss": 2.0187, + "step": 188, + "text_contrastive_loss": 0.952 + }, + { + "contrastive_loss": 0.7626, + "epoch": 0.42663656884875845, + "grad_norm": 17.65534782409668, + "learning_rate": 9.898636713112992e-06, + "lm_loss": 6.3991, + "loss": 1.8415, + "step": 189, + "text_contrastive_loss": 0.878 + }, + { + "contrastive_loss": 0.8812, + "epoch": 0.4288939051918736, + "grad_norm": 20.629322052001953, + "learning_rate": 9.897178039605803e-06, + "lm_loss": 6.3606, + "loss": 1.9729, + "step": 190, + "text_contrastive_loss": 0.9112 + }, + { + "contrastive_loss": 0.8356, + "epoch": 0.43115124153498874, + "grad_norm": 19.353788375854492, + "learning_rate": 9.895709054410686e-06, + "lm_loss": 6.3502, + "loss": 1.911, + "step": 191, + "text_contrastive_loss": 0.8808 + }, + { + "contrastive_loss": 0.8544, + "epoch": 0.43340857787810383, + "grad_norm": 21.310510635375977, + "learning_rate": 9.894229760620793e-06, + "lm_loss": 6.31, + "loss": 1.9133, + "step": 192, + "text_contrastive_loss": 0.8558 + }, + { + "contrastive_loss": 0.8522, + "epoch": 0.435665914221219, + "grad_norm": 18.96470069885254, + "learning_rate": 9.892740161350981e-06, + "lm_loss": 6.3418, + "loss": 1.9353, + "step": 193, + "text_contrastive_loss": 0.8978 + }, + { + "contrastive_loss": 0.7986, + "epoch": 0.43792325056433407, + "grad_norm": 20.11381721496582, + "learning_rate": 9.891240259737809e-06, + "lm_loss": 6.3733, + "loss": 1.9381, + "step": 194, + "text_contrastive_loss": 1.0043 + }, + { + "contrastive_loss": 0.8889, + "epoch": 0.4401805869074492, + "grad_norm": 20.309608459472656, + "learning_rate": 9.889730058939529e-06, + "lm_loss": 6.4461, + "loss": 1.96, + "step": 195, + "text_contrastive_loss": 0.8529 + }, + { + "contrastive_loss": 0.8012, + "epoch": 0.44243792325056436, + "grad_norm": 20.747764587402344, + "learning_rate": 9.888209562136074e-06, + "lm_loss": 6.3646, + "loss": 1.9054, + "step": 196, + "text_contrastive_loss": 0.9354 + }, + { + "contrastive_loss": 0.841, + "epoch": 0.44469525959367945, + "grad_norm": 21.405588150024414, + "learning_rate": 9.886678772529069e-06, + "lm_loss": 6.2747, + "loss": 2.0179, + "step": 197, + "text_contrastive_loss": 1.0988 + }, + { + "contrastive_loss": 0.8754, + "epoch": 0.4469525959367946, + "grad_norm": 19.198837280273438, + "learning_rate": 9.885137693341795e-06, + "lm_loss": 6.328, + "loss": 2.1004, + "step": 198, + "text_contrastive_loss": 1.1845 + }, + { + "contrastive_loss": 0.8876, + "epoch": 0.4492099322799097, + "grad_norm": 18.697420120239258, + "learning_rate": 9.883586327819214e-06, + "lm_loss": 6.3426, + "loss": 1.9726, + "step": 199, + "text_contrastive_loss": 0.9014 + }, + { + "contrastive_loss": 0.8075, + "epoch": 0.45146726862302483, + "grad_norm": 19.029743194580078, + "learning_rate": 9.88202467922794e-06, + "lm_loss": 6.3034, + "loss": 1.9712, + "step": 200, + "text_contrastive_loss": 1.0667 + }, + { + "contrastive_loss": 0.8803, + "epoch": 0.45372460496614, + "grad_norm": 19.263851165771484, + "learning_rate": 9.880452750856239e-06, + "lm_loss": 6.3328, + "loss": 2.043, + "step": 201, + "text_contrastive_loss": 1.0589 + }, + { + "contrastive_loss": 0.8736, + "epoch": 0.45598194130925507, + "grad_norm": 19.789085388183594, + "learning_rate": 9.878870546014025e-06, + "lm_loss": 6.297, + "loss": 1.9871, + "step": 202, + "text_contrastive_loss": 0.9676 + }, + { + "contrastive_loss": 0.8993, + "epoch": 0.4582392776523702, + "grad_norm": 23.019350051879883, + "learning_rate": 9.877278068032852e-06, + "lm_loss": 6.2742, + "loss": 2.0623, + "step": 203, + "text_contrastive_loss": 1.0711 + }, + { + "contrastive_loss": 0.7822, + "epoch": 0.4604966139954853, + "grad_norm": 19.80655288696289, + "learning_rate": 9.875675320265903e-06, + "lm_loss": 6.2757, + "loss": 1.8815, + "step": 204, + "text_contrastive_loss": 0.9435 + }, + { + "contrastive_loss": 0.8734, + "epoch": 0.46275395033860045, + "grad_norm": 20.55428123474121, + "learning_rate": 9.874062306087983e-06, + "lm_loss": 6.2592, + "loss": 1.9863, + "step": 205, + "text_contrastive_loss": 0.9739 + }, + { + "contrastive_loss": 0.921, + "epoch": 0.4650112866817156, + "grad_norm": 21.780397415161133, + "learning_rate": 9.872439028895518e-06, + "lm_loss": 6.2387, + "loss": 2.0729, + "step": 206, + "text_contrastive_loss": 1.056 + }, + { + "contrastive_loss": 0.723, + "epoch": 0.4672686230248307, + "grad_norm": 17.902149200439453, + "learning_rate": 9.870805492106546e-06, + "lm_loss": 6.2261, + "loss": 1.7971, + "step": 207, + "text_contrastive_loss": 0.9029 + }, + { + "contrastive_loss": 0.8151, + "epoch": 0.46952595936794583, + "grad_norm": 17.340742111206055, + "learning_rate": 9.869161699160704e-06, + "lm_loss": 6.1862, + "loss": 1.8445, + "step": 208, + "text_contrastive_loss": 0.8216 + }, + { + "contrastive_loss": 0.7565, + "epoch": 0.4717832957110609, + "grad_norm": 17.048093795776367, + "learning_rate": 9.867507653519225e-06, + "lm_loss": 6.2923, + "loss": 1.8102, + "step": 209, + "text_contrastive_loss": 0.8489 + }, + { + "contrastive_loss": 0.6876, + "epoch": 0.47404063205417607, + "grad_norm": 17.589111328125, + "learning_rate": 9.865843358664933e-06, + "lm_loss": 6.2506, + "loss": 1.753, + "step": 210, + "text_contrastive_loss": 0.8807 + }, + { + "contrastive_loss": 0.8278, + "epoch": 0.4762979683972912, + "grad_norm": 18.502525329589844, + "learning_rate": 9.86416881810223e-06, + "lm_loss": 6.2913, + "loss": 1.9284, + "step": 211, + "text_contrastive_loss": 0.943 + }, + { + "contrastive_loss": 0.7451, + "epoch": 0.4785553047404063, + "grad_norm": 18.441038131713867, + "learning_rate": 9.862484035357095e-06, + "lm_loss": 6.3486, + "loss": 1.7617, + "step": 212, + "text_contrastive_loss": 0.7634 + }, + { + "contrastive_loss": 0.8968, + "epoch": 0.48081264108352145, + "grad_norm": 24.60144805908203, + "learning_rate": 9.860789013977074e-06, + "lm_loss": 6.3089, + "loss": 2.0082, + "step": 213, + "text_contrastive_loss": 0.961 + }, + { + "contrastive_loss": 0.7364, + "epoch": 0.48306997742663654, + "grad_norm": 18.438432693481445, + "learning_rate": 9.859083757531265e-06, + "lm_loss": 6.2724, + "loss": 1.8209, + "step": 214, + "text_contrastive_loss": 0.9144 + }, + { + "contrastive_loss": 0.8151, + "epoch": 0.4853273137697517, + "grad_norm": 19.14459228515625, + "learning_rate": 9.857368269610325e-06, + "lm_loss": 6.2602, + "loss": 1.9227, + "step": 215, + "text_contrastive_loss": 0.9632 + }, + { + "contrastive_loss": 0.8037, + "epoch": 0.48758465011286684, + "grad_norm": 19.237321853637695, + "learning_rate": 9.85564255382645e-06, + "lm_loss": 6.2941, + "loss": 1.8994, + "step": 216, + "text_contrastive_loss": 0.9326 + }, + { + "contrastive_loss": 0.7454, + "epoch": 0.4898419864559819, + "grad_norm": 18.339815139770508, + "learning_rate": 9.853906613813378e-06, + "lm_loss": 6.2437, + "loss": 1.7913, + "step": 217, + "text_contrastive_loss": 0.8431 + }, + { + "contrastive_loss": 0.7796, + "epoch": 0.49209932279909707, + "grad_norm": 20.978559494018555, + "learning_rate": 9.852160453226367e-06, + "lm_loss": 6.0887, + "loss": 1.9323, + "step": 218, + "text_contrastive_loss": 1.0878 + }, + { + "contrastive_loss": 0.8093, + "epoch": 0.49435665914221216, + "grad_norm": 19.603336334228516, + "learning_rate": 9.850404075742204e-06, + "lm_loss": 6.2458, + "loss": 1.9098, + "step": 219, + "text_contrastive_loss": 0.9519 + }, + { + "contrastive_loss": 0.8294, + "epoch": 0.4966139954853273, + "grad_norm": 19.92656707763672, + "learning_rate": 9.848637485059183e-06, + "lm_loss": 6.2273, + "loss": 1.929, + "step": 220, + "text_contrastive_loss": 0.9537 + }, + { + "contrastive_loss": 0.8109, + "epoch": 0.49887133182844245, + "grad_norm": 23.059837341308594, + "learning_rate": 9.846860684897107e-06, + "lm_loss": 6.2547, + "loss": 2.0089, + "step": 221, + "text_contrastive_loss": 1.1451 + }, + { + "contrastive_loss": 0.713, + "epoch": 0.5011286681715575, + "grad_norm": 18.783077239990234, + "learning_rate": 9.845073678997275e-06, + "lm_loss": 6.1863, + "loss": 1.722, + "step": 222, + "text_contrastive_loss": 0.7807 + }, + { + "contrastive_loss": 0.7074, + "epoch": 0.5033860045146726, + "grad_norm": 17.86538314819336, + "learning_rate": 9.843276471122473e-06, + "lm_loss": 6.1494, + "loss": 1.7629, + "step": 223, + "text_contrastive_loss": 0.8812 + }, + { + "contrastive_loss": 0.8604, + "epoch": 0.5056433408577878, + "grad_norm": 18.984697341918945, + "learning_rate": 9.84146906505698e-06, + "lm_loss": 6.3609, + "loss": 1.9277, + "step": 224, + "text_contrastive_loss": 0.8625 + }, + { + "contrastive_loss": 0.7167, + "epoch": 0.5079006772009029, + "grad_norm": 17.270008087158203, + "learning_rate": 9.83965146460653e-06, + "lm_loss": 6.2559, + "loss": 1.8291, + "step": 225, + "text_contrastive_loss": 0.9736 + }, + { + "contrastive_loss": 0.71, + "epoch": 0.510158013544018, + "grad_norm": 17.672595977783203, + "learning_rate": 9.83782367359834e-06, + "lm_loss": 6.0805, + "loss": 1.6965, + "step": 226, + "text_contrastive_loss": 0.7568 + }, + { + "contrastive_loss": 0.7668, + "epoch": 0.5124153498871332, + "grad_norm": 18.119373321533203, + "learning_rate": 9.835985695881076e-06, + "lm_loss": 6.2149, + "loss": 1.846, + "step": 227, + "text_contrastive_loss": 0.9155 + }, + { + "contrastive_loss": 0.6243, + "epoch": 0.5146726862302483, + "grad_norm": 17.03944969177246, + "learning_rate": 9.834137535324852e-06, + "lm_loss": 6.2418, + "loss": 1.6596, + "step": 228, + "text_contrastive_loss": 0.8222 + }, + { + "contrastive_loss": 0.8295, + "epoch": 0.5169300225733634, + "grad_norm": 20.065959930419922, + "learning_rate": 9.83227919582123e-06, + "lm_loss": 6.1313, + "loss": 1.9263, + "step": 229, + "text_contrastive_loss": 0.9675 + }, + { + "contrastive_loss": 0.7715, + "epoch": 0.5191873589164786, + "grad_norm": 18.148019790649414, + "learning_rate": 9.830410681283203e-06, + "lm_loss": 6.3683, + "loss": 1.9036, + "step": 230, + "text_contrastive_loss": 0.9904 + }, + { + "contrastive_loss": 0.7624, + "epoch": 0.5214446952595937, + "grad_norm": 20.754575729370117, + "learning_rate": 9.828531995645183e-06, + "lm_loss": 6.2368, + "loss": 1.7355, + "step": 231, + "text_contrastive_loss": 0.6987 + }, + { + "contrastive_loss": 0.9046, + "epoch": 0.5237020316027088, + "grad_norm": 23.928089141845703, + "learning_rate": 9.826643142863006e-06, + "lm_loss": 6.2934, + "loss": 2.1454, + "step": 232, + "text_contrastive_loss": 1.2228 + }, + { + "contrastive_loss": 0.848, + "epoch": 0.5259593679458239, + "grad_norm": 24.783737182617188, + "learning_rate": 9.824744126913914e-06, + "lm_loss": 6.2529, + "loss": 2.0252, + "step": 233, + "text_contrastive_loss": 1.104 + }, + { + "contrastive_loss": 0.7888, + "epoch": 0.5282167042889391, + "grad_norm": 17.51032257080078, + "learning_rate": 9.822834951796547e-06, + "lm_loss": 6.3361, + "loss": 1.9283, + "step": 234, + "text_contrastive_loss": 1.0118 + }, + { + "contrastive_loss": 0.8544, + "epoch": 0.5304740406320542, + "grad_norm": 19.135835647583008, + "learning_rate": 9.820915621530939e-06, + "lm_loss": 6.27, + "loss": 1.9822, + "step": 235, + "text_contrastive_loss": 1.0017 + }, + { + "contrastive_loss": 0.8286, + "epoch": 0.5327313769751693, + "grad_norm": 19.796823501586914, + "learning_rate": 9.818986140158507e-06, + "lm_loss": 6.1489, + "loss": 1.862, + "step": 236, + "text_contrastive_loss": 0.8369 + }, + { + "contrastive_loss": 0.7975, + "epoch": 0.5349887133182845, + "grad_norm": 18.388835906982422, + "learning_rate": 9.817046511742042e-06, + "lm_loss": 6.1075, + "loss": 1.854, + "step": 237, + "text_contrastive_loss": 0.8914 + }, + { + "contrastive_loss": 0.7875, + "epoch": 0.5372460496613995, + "grad_norm": 18.588680267333984, + "learning_rate": 9.815096740365698e-06, + "lm_loss": 6.1547, + "loss": 1.8664, + "step": 238, + "text_contrastive_loss": 0.927 + }, + { + "contrastive_loss": 0.7999, + "epoch": 0.5395033860045146, + "grad_norm": 19.11373519897461, + "learning_rate": 9.81313683013499e-06, + "lm_loss": 6.1818, + "loss": 1.8483, + "step": 239, + "text_contrastive_loss": 0.8605 + }, + { + "contrastive_loss": 0.8682, + "epoch": 0.5417607223476298, + "grad_norm": 17.680660247802734, + "learning_rate": 9.811166785176785e-06, + "lm_loss": 6.1513, + "loss": 1.9229, + "step": 240, + "text_contrastive_loss": 0.8791 + }, + { + "contrastive_loss": 0.7493, + "epoch": 0.5440180586907449, + "grad_norm": 17.710893630981445, + "learning_rate": 9.809186609639281e-06, + "lm_loss": 6.2055, + "loss": 1.7842, + "step": 241, + "text_contrastive_loss": 0.8287 + }, + { + "contrastive_loss": 0.8647, + "epoch": 0.54627539503386, + "grad_norm": 19.41048812866211, + "learning_rate": 9.807196307692015e-06, + "lm_loss": 6.1424, + "loss": 1.9948, + "step": 242, + "text_contrastive_loss": 1.0316 + }, + { + "contrastive_loss": 0.7898, + "epoch": 0.5485327313769752, + "grad_norm": 19.4019832611084, + "learning_rate": 9.805195883525844e-06, + "lm_loss": 6.178, + "loss": 1.8574, + "step": 243, + "text_contrastive_loss": 0.8996 + }, + { + "contrastive_loss": 0.7414, + "epoch": 0.5507900677200903, + "grad_norm": 18.429107666015625, + "learning_rate": 9.803185341352936e-06, + "lm_loss": 6.1879, + "loss": 1.7764, + "step": 244, + "text_contrastive_loss": 0.8325 + }, + { + "contrastive_loss": 0.8691, + "epoch": 0.5530474040632054, + "grad_norm": 19.68091583251953, + "learning_rate": 9.80116468540677e-06, + "lm_loss": 6.1057, + "loss": 1.9459, + "step": 245, + "text_contrastive_loss": 0.9324 + }, + { + "contrastive_loss": 0.8321, + "epoch": 0.5553047404063205, + "grad_norm": 19.439861297607422, + "learning_rate": 9.799133919942117e-06, + "lm_loss": 6.1777, + "loss": 1.9379, + "step": 246, + "text_contrastive_loss": 0.9762 + }, + { + "contrastive_loss": 0.6576, + "epoch": 0.5575620767494357, + "grad_norm": 18.352218627929688, + "learning_rate": 9.797093049235034e-06, + "lm_loss": 6.1988, + "loss": 1.7478, + "step": 247, + "text_contrastive_loss": 0.9408 + }, + { + "contrastive_loss": 0.7989, + "epoch": 0.5598194130925508, + "grad_norm": 18.497886657714844, + "learning_rate": 9.795042077582856e-06, + "lm_loss": 6.2574, + "loss": 1.8518, + "step": 248, + "text_contrastive_loss": 0.8545 + }, + { + "contrastive_loss": 0.6566, + "epoch": 0.5620767494356659, + "grad_norm": 16.71062660217285, + "learning_rate": 9.792981009304192e-06, + "lm_loss": 6.1203, + "loss": 1.7234, + "step": 249, + "text_contrastive_loss": 0.9095 + }, + { + "contrastive_loss": 0.7675, + "epoch": 0.5643340857787811, + "grad_norm": 19.55929946899414, + "learning_rate": 9.790909848738907e-06, + "lm_loss": 6.1617, + "loss": 1.9057, + "step": 250, + "text_contrastive_loss": 1.044 + }, + { + "contrastive_loss": 0.8264, + "epoch": 0.5665914221218962, + "grad_norm": 19.731369018554688, + "learning_rate": 9.788828600248114e-06, + "lm_loss": 6.1771, + "loss": 1.9228, + "step": 251, + "text_contrastive_loss": 0.9573 + }, + { + "contrastive_loss": 0.7257, + "epoch": 0.5688487584650113, + "grad_norm": 16.746164321899414, + "learning_rate": 9.786737268214172e-06, + "lm_loss": 6.1749, + "loss": 1.8192, + "step": 252, + "text_contrastive_loss": 0.9519 + }, + { + "contrastive_loss": 0.8816, + "epoch": 0.5711060948081265, + "grad_norm": 19.127241134643555, + "learning_rate": 9.784635857040672e-06, + "lm_loss": 6.086, + "loss": 1.9611, + "step": 253, + "text_contrastive_loss": 0.9417 + }, + { + "contrastive_loss": 0.8804, + "epoch": 0.5733634311512416, + "grad_norm": 20.695344924926758, + "learning_rate": 9.782524371152425e-06, + "lm_loss": 6.1488, + "loss": 2.0119, + "step": 254, + "text_contrastive_loss": 1.0332 + }, + { + "contrastive_loss": 0.844, + "epoch": 0.5756207674943566, + "grad_norm": 20.949626922607422, + "learning_rate": 9.780402814995458e-06, + "lm_loss": 6.2809, + "loss": 1.8781, + "step": 255, + "text_contrastive_loss": 0.8119 + }, + { + "contrastive_loss": 0.7005, + "epoch": 0.5778781038374717, + "grad_norm": 18.161090850830078, + "learning_rate": 9.778271193037003e-06, + "lm_loss": 6.1164, + "loss": 1.8367, + "step": 256, + "text_contrastive_loss": 1.0491 + }, + { + "contrastive_loss": 0.741, + "epoch": 0.5801354401805869, + "grad_norm": 17.87932777404785, + "learning_rate": 9.776129509765487e-06, + "lm_loss": 6.1365, + "loss": 1.7805, + "step": 257, + "text_contrastive_loss": 0.8517 + }, + { + "contrastive_loss": 0.8227, + "epoch": 0.582392776523702, + "grad_norm": 19.600927352905273, + "learning_rate": 9.773977769690517e-06, + "lm_loss": 6.1083, + "loss": 1.9466, + "step": 258, + "text_contrastive_loss": 1.0261 + }, + { + "contrastive_loss": 0.8353, + "epoch": 0.5846501128668171, + "grad_norm": 17.820899963378906, + "learning_rate": 9.771815977342882e-06, + "lm_loss": 6.0494, + "loss": 1.9368, + "step": 259, + "text_contrastive_loss": 0.9931 + }, + { + "contrastive_loss": 0.8439, + "epoch": 0.5869074492099323, + "grad_norm": 19.391754150390625, + "learning_rate": 9.76964413727454e-06, + "lm_loss": 6.1206, + "loss": 1.9207, + "step": 260, + "text_contrastive_loss": 0.9295 + }, + { + "contrastive_loss": 0.7734, + "epoch": 0.5891647855530474, + "grad_norm": 19.11463165283203, + "learning_rate": 9.767462254058593e-06, + "lm_loss": 6.0388, + "loss": 1.8577, + "step": 261, + "text_contrastive_loss": 0.961 + }, + { + "contrastive_loss": 0.8034, + "epoch": 0.5914221218961625, + "grad_norm": 19.316638946533203, + "learning_rate": 9.765270332289307e-06, + "lm_loss": 6.0882, + "loss": 1.8743, + "step": 262, + "text_contrastive_loss": 0.9242 + }, + { + "contrastive_loss": 0.84, + "epoch": 0.5936794582392777, + "grad_norm": 20.758302688598633, + "learning_rate": 9.763068376582075e-06, + "lm_loss": 6.033, + "loss": 1.8907, + "step": 263, + "text_contrastive_loss": 0.8948 + }, + { + "contrastive_loss": 0.6596, + "epoch": 0.5959367945823928, + "grad_norm": 17.379722595214844, + "learning_rate": 9.76085639157342e-06, + "lm_loss": 6.1246, + "loss": 1.7109, + "step": 264, + "text_contrastive_loss": 0.8776 + }, + { + "contrastive_loss": 0.7387, + "epoch": 0.5981941309255079, + "grad_norm": 19.40483283996582, + "learning_rate": 9.758634381920982e-06, + "lm_loss": 6.2125, + "loss": 1.7771, + "step": 265, + "text_contrastive_loss": 0.8343 + }, + { + "contrastive_loss": 0.9388, + "epoch": 0.600451467268623, + "grad_norm": 22.724979400634766, + "learning_rate": 9.756402352303513e-06, + "lm_loss": 6.1672, + "loss": 2.1706, + "step": 266, + "text_contrastive_loss": 1.2302 + }, + { + "contrastive_loss": 0.7913, + "epoch": 0.6027088036117382, + "grad_norm": 19.11801528930664, + "learning_rate": 9.754160307420858e-06, + "lm_loss": 6.2058, + "loss": 1.8721, + "step": 267, + "text_contrastive_loss": 0.9203 + }, + { + "contrastive_loss": 0.8368, + "epoch": 0.6049661399548533, + "grad_norm": 18.960634231567383, + "learning_rate": 9.751908251993956e-06, + "lm_loss": 6.153, + "loss": 1.9811, + "step": 268, + "text_contrastive_loss": 1.058 + }, + { + "contrastive_loss": 0.722, + "epoch": 0.6072234762979684, + "grad_norm": 18.81309700012207, + "learning_rate": 9.749646190764823e-06, + "lm_loss": 6.0044, + "loss": 1.737, + "step": 269, + "text_contrastive_loss": 0.8293 + }, + { + "contrastive_loss": 0.665, + "epoch": 0.6094808126410836, + "grad_norm": 18.15359115600586, + "learning_rate": 9.747374128496541e-06, + "lm_loss": 6.225, + "loss": 1.7902, + "step": 270, + "text_contrastive_loss": 1.0054 + }, + { + "contrastive_loss": 0.8478, + "epoch": 0.6117381489841986, + "grad_norm": 18.519039154052734, + "learning_rate": 9.745092069973254e-06, + "lm_loss": 6.1183, + "loss": 1.9146, + "step": 271, + "text_contrastive_loss": 0.9099 + }, + { + "contrastive_loss": 0.6826, + "epoch": 0.6139954853273137, + "grad_norm": 18.80630874633789, + "learning_rate": 9.74280002000015e-06, + "lm_loss": 6.1514, + "loss": 1.736, + "step": 272, + "text_contrastive_loss": 0.8765 + }, + { + "contrastive_loss": 0.8552, + "epoch": 0.6162528216704289, + "grad_norm": 22.33182144165039, + "learning_rate": 9.74049798340346e-06, + "lm_loss": 6.0026, + "loss": 1.942, + "step": 273, + "text_contrastive_loss": 0.9731 + }, + { + "contrastive_loss": 0.7087, + "epoch": 0.618510158013544, + "grad_norm": 18.393476486206055, + "learning_rate": 9.738185965030444e-06, + "lm_loss": 6.1862, + "loss": 1.777, + "step": 274, + "text_contrastive_loss": 0.8993 + }, + { + "contrastive_loss": 0.9343, + "epoch": 0.6207674943566591, + "grad_norm": 25.709531784057617, + "learning_rate": 9.735863969749373e-06, + "lm_loss": 5.9993, + "loss": 1.9874, + "step": 275, + "text_contrastive_loss": 0.9063 + }, + { + "contrastive_loss": 0.7387, + "epoch": 0.6230248306997742, + "grad_norm": 19.612539291381836, + "learning_rate": 9.733532002449533e-06, + "lm_loss": 6.1208, + "loss": 1.8223, + "step": 276, + "text_contrastive_loss": 0.943 + }, + { + "contrastive_loss": 0.8726, + "epoch": 0.6252821670428894, + "grad_norm": 18.62133026123047, + "learning_rate": 9.731190068041205e-06, + "lm_loss": 6.0839, + "loss": 2.0505, + "step": 277, + "text_contrastive_loss": 1.1389 + }, + { + "contrastive_loss": 0.7972, + "epoch": 0.6275395033860045, + "grad_norm": 18.76290512084961, + "learning_rate": 9.728838171455655e-06, + "lm_loss": 6.1786, + "loss": 1.8659, + "step": 278, + "text_contrastive_loss": 0.9017 + }, + { + "contrastive_loss": 0.7623, + "epoch": 0.6297968397291196, + "grad_norm": 19.95505714416504, + "learning_rate": 9.72647631764513e-06, + "lm_loss": 6.1835, + "loss": 1.8325, + "step": 279, + "text_contrastive_loss": 0.9037 + }, + { + "contrastive_loss": 0.7498, + "epoch": 0.6320541760722348, + "grad_norm": 17.03273582458496, + "learning_rate": 9.724104511582838e-06, + "lm_loss": 6.0583, + "loss": 1.8232, + "step": 280, + "text_contrastive_loss": 0.9351 + }, + { + "contrastive_loss": 0.8429, + "epoch": 0.6343115124153499, + "grad_norm": 21.37076187133789, + "learning_rate": 9.721722758262948e-06, + "lm_loss": 6.0827, + "loss": 1.896, + "step": 281, + "text_contrastive_loss": 0.8896 + }, + { + "contrastive_loss": 0.8617, + "epoch": 0.636568848758465, + "grad_norm": 19.734153747558594, + "learning_rate": 9.719331062700572e-06, + "lm_loss": 6.0058, + "loss": 1.9611, + "step": 282, + "text_contrastive_loss": 0.9976 + }, + { + "contrastive_loss": 0.8787, + "epoch": 0.6388261851015802, + "grad_norm": 21.746551513671875, + "learning_rate": 9.716929429931757e-06, + "lm_loss": 6.0947, + "loss": 1.9843, + "step": 283, + "text_contrastive_loss": 0.9922 + }, + { + "contrastive_loss": 0.832, + "epoch": 0.6410835214446953, + "grad_norm": 18.039628982543945, + "learning_rate": 9.714517865013473e-06, + "lm_loss": 6.1092, + "loss": 1.9548, + "step": 284, + "text_contrastive_loss": 1.0239 + }, + { + "contrastive_loss": 0.764, + "epoch": 0.6433408577878104, + "grad_norm": 18.836318969726562, + "learning_rate": 9.712096373023603e-06, + "lm_loss": 6.0648, + "loss": 1.832, + "step": 285, + "text_contrastive_loss": 0.9231 + }, + { + "contrastive_loss": 0.9148, + "epoch": 0.6455981941309256, + "grad_norm": 20.60382652282715, + "learning_rate": 9.70966495906094e-06, + "lm_loss": 6.0534, + "loss": 2.0786, + "step": 286, + "text_contrastive_loss": 1.1169 + }, + { + "contrastive_loss": 0.6971, + "epoch": 0.6478555304740407, + "grad_norm": 16.91483497619629, + "learning_rate": 9.707223628245157e-06, + "lm_loss": 6.0742, + "loss": 1.7743, + "step": 287, + "text_contrastive_loss": 0.9396 + }, + { + "contrastive_loss": 0.7757, + "epoch": 0.6501128668171557, + "grad_norm": 17.441919326782227, + "learning_rate": 9.70477238571682e-06, + "lm_loss": 6.1147, + "loss": 1.8067, + "step": 288, + "text_contrastive_loss": 0.8391 + }, + { + "contrastive_loss": 0.825, + "epoch": 0.6523702031602708, + "grad_norm": 18.200672149658203, + "learning_rate": 9.702311236637357e-06, + "lm_loss": 6.0121, + "loss": 1.9316, + "step": 289, + "text_contrastive_loss": 1.0108 + }, + { + "contrastive_loss": 0.764, + "epoch": 0.654627539503386, + "grad_norm": 18.553850173950195, + "learning_rate": 9.699840186189061e-06, + "lm_loss": 6.0134, + "loss": 1.8213, + "step": 290, + "text_contrastive_loss": 0.9119 + }, + { + "contrastive_loss": 0.876, + "epoch": 0.6568848758465011, + "grad_norm": 18.69213104248047, + "learning_rate": 9.697359239575069e-06, + "lm_loss": 6.1142, + "loss": 1.904, + "step": 291, + "text_contrastive_loss": 0.8331 + }, + { + "contrastive_loss": 0.7976, + "epoch": 0.6591422121896162, + "grad_norm": 19.904512405395508, + "learning_rate": 9.694868402019362e-06, + "lm_loss": 6.0478, + "loss": 1.8918, + "step": 292, + "text_contrastive_loss": 0.9788 + }, + { + "contrastive_loss": 0.8514, + "epoch": 0.6613995485327314, + "grad_norm": 20.151090621948242, + "learning_rate": 9.69236767876674e-06, + "lm_loss": 5.9712, + "loss": 2.0123, + "step": 293, + "text_contrastive_loss": 1.1275 + }, + { + "contrastive_loss": 0.7869, + "epoch": 0.6636568848758465, + "grad_norm": 19.191810607910156, + "learning_rate": 9.689857075082828e-06, + "lm_loss": 6.0753, + "loss": 1.8907, + "step": 294, + "text_contrastive_loss": 0.9925 + }, + { + "contrastive_loss": 0.9015, + "epoch": 0.6659142212189616, + "grad_norm": 18.698793411254883, + "learning_rate": 9.687336596254045e-06, + "lm_loss": 6.009, + "loss": 2.0058, + "step": 295, + "text_contrastive_loss": 1.0067 + }, + { + "contrastive_loss": 0.7328, + "epoch": 0.6681715575620768, + "grad_norm": 18.889925003051758, + "learning_rate": 9.68480624758761e-06, + "lm_loss": 6.1208, + "loss": 1.748, + "step": 296, + "text_contrastive_loss": 0.8062 + }, + { + "contrastive_loss": 0.7373, + "epoch": 0.6704288939051919, + "grad_norm": 17.773231506347656, + "learning_rate": 9.682266034411527e-06, + "lm_loss": 6.0026, + "loss": 1.7348, + "step": 297, + "text_contrastive_loss": 0.7945 + }, + { + "contrastive_loss": 0.8293, + "epoch": 0.672686230248307, + "grad_norm": 17.689512252807617, + "learning_rate": 9.679715962074566e-06, + "lm_loss": 6.0864, + "loss": 1.8706, + "step": 298, + "text_contrastive_loss": 0.8653 + }, + { + "contrastive_loss": 0.7081, + "epoch": 0.6749435665914221, + "grad_norm": 16.270689010620117, + "learning_rate": 9.677156035946253e-06, + "lm_loss": 6.1083, + "loss": 1.8451, + "step": 299, + "text_contrastive_loss": 1.0523 + }, + { + "contrastive_loss": 0.6502, + "epoch": 0.6772009029345373, + "grad_norm": 16.979660034179688, + "learning_rate": 9.674586261416874e-06, + "lm_loss": 6.0773, + "loss": 1.6506, + "step": 300, + "text_contrastive_loss": 0.7853 + }, + { + "contrastive_loss": 0.7074, + "epoch": 0.6794582392776524, + "grad_norm": 17.27545928955078, + "learning_rate": 9.672006643897444e-06, + "lm_loss": 6.1153, + "loss": 1.8001, + "step": 301, + "text_contrastive_loss": 0.9624 + }, + { + "contrastive_loss": 0.8176, + "epoch": 0.6817155756207675, + "grad_norm": 18.559659957885742, + "learning_rate": 9.669417188819704e-06, + "lm_loss": 6.0473, + "loss": 1.9087, + "step": 302, + "text_contrastive_loss": 0.9728 + }, + { + "contrastive_loss": 0.741, + "epoch": 0.6839729119638827, + "grad_norm": 16.704532623291016, + "learning_rate": 9.666817901636115e-06, + "lm_loss": 6.0039, + "loss": 1.7943, + "step": 303, + "text_contrastive_loss": 0.9059 + }, + { + "contrastive_loss": 0.8288, + "epoch": 0.6862302483069977, + "grad_norm": 20.246355056762695, + "learning_rate": 9.664208787819833e-06, + "lm_loss": 6.0545, + "loss": 1.8484, + "step": 304, + "text_contrastive_loss": 0.8284 + }, + { + "contrastive_loss": 0.6943, + "epoch": 0.6884875846501128, + "grad_norm": 17.586076736450195, + "learning_rate": 9.66158985286471e-06, + "lm_loss": 5.9785, + "loss": 1.7344, + "step": 305, + "text_contrastive_loss": 0.8846 + }, + { + "contrastive_loss": 0.6383, + "epoch": 0.690744920993228, + "grad_norm": 16.763389587402344, + "learning_rate": 9.658961102285276e-06, + "lm_loss": 6.1017, + "loss": 1.6815, + "step": 306, + "text_contrastive_loss": 0.8661 + }, + { + "contrastive_loss": 0.7344, + "epoch": 0.6930022573363431, + "grad_norm": 17.52794647216797, + "learning_rate": 9.656322541616734e-06, + "lm_loss": 5.981, + "loss": 1.7619, + "step": 307, + "text_contrastive_loss": 0.8589 + }, + { + "contrastive_loss": 0.9579, + "epoch": 0.6952595936794582, + "grad_norm": 21.528606414794922, + "learning_rate": 9.653674176414936e-06, + "lm_loss": 6.0425, + "loss": 2.0525, + "step": 308, + "text_contrastive_loss": 0.9807 + }, + { + "contrastive_loss": 0.803, + "epoch": 0.6975169300225733, + "grad_norm": 19.41025161743164, + "learning_rate": 9.651016012256382e-06, + "lm_loss": 5.9865, + "loss": 1.9478, + "step": 309, + "text_contrastive_loss": 1.0923 + }, + { + "contrastive_loss": 0.7292, + "epoch": 0.6997742663656885, + "grad_norm": 19.212434768676758, + "learning_rate": 9.648348054738208e-06, + "lm_loss": 6.2198, + "loss": 1.8035, + "step": 310, + "text_contrastive_loss": 0.9047 + }, + { + "contrastive_loss": 0.7602, + "epoch": 0.7020316027088036, + "grad_norm": 20.256837844848633, + "learning_rate": 9.64567030947817e-06, + "lm_loss": 6.1579, + "loss": 1.8317, + "step": 311, + "text_contrastive_loss": 0.9115 + }, + { + "contrastive_loss": 0.7533, + "epoch": 0.7042889390519187, + "grad_norm": 17.484106063842773, + "learning_rate": 9.642982782114628e-06, + "lm_loss": 6.0454, + "loss": 1.8201, + "step": 312, + "text_contrastive_loss": 0.9245 + }, + { + "contrastive_loss": 0.8264, + "epoch": 0.7065462753950339, + "grad_norm": 18.091968536376953, + "learning_rate": 9.640285478306546e-06, + "lm_loss": 6.1478, + "loss": 1.8968, + "step": 313, + "text_contrastive_loss": 0.9113 + }, + { + "contrastive_loss": 0.7541, + "epoch": 0.708803611738149, + "grad_norm": 24.659730911254883, + "learning_rate": 9.63757840373347e-06, + "lm_loss": 6.1216, + "loss": 1.8043, + "step": 314, + "text_contrastive_loss": 0.876 + }, + { + "contrastive_loss": 0.765, + "epoch": 0.7110609480812641, + "grad_norm": 19.639833450317383, + "learning_rate": 9.634861564095525e-06, + "lm_loss": 6.1299, + "loss": 1.9027, + "step": 315, + "text_contrastive_loss": 1.0496 + }, + { + "contrastive_loss": 0.8109, + "epoch": 0.7133182844243793, + "grad_norm": 17.972936630249023, + "learning_rate": 9.632134965113389e-06, + "lm_loss": 5.9087, + "loss": 1.9626, + "step": 316, + "text_contrastive_loss": 1.1217 + }, + { + "contrastive_loss": 0.7516, + "epoch": 0.7155756207674944, + "grad_norm": 18.55760383605957, + "learning_rate": 9.629398612528299e-06, + "lm_loss": 6.0409, + "loss": 1.8372, + "step": 317, + "text_contrastive_loss": 0.963 + }, + { + "contrastive_loss": 0.7369, + "epoch": 0.7178329571106095, + "grad_norm": 16.857271194458008, + "learning_rate": 9.626652512102021e-06, + "lm_loss": 6.049, + "loss": 1.8467, + "step": 318, + "text_contrastive_loss": 1.0097 + }, + { + "contrastive_loss": 0.7255, + "epoch": 0.7200902934537246, + "grad_norm": 17.906700134277344, + "learning_rate": 9.623896669616855e-06, + "lm_loss": 6.1112, + "loss": 1.791, + "step": 319, + "text_contrastive_loss": 0.9087 + }, + { + "contrastive_loss": 0.8361, + "epoch": 0.7223476297968398, + "grad_norm": 18.165489196777344, + "learning_rate": 9.621131090875603e-06, + "lm_loss": 6.0386, + "loss": 1.8247, + "step": 320, + "text_contrastive_loss": 0.7695 + }, + { + "contrastive_loss": 0.9146, + "epoch": 0.7246049661399548, + "grad_norm": 18.603918075561523, + "learning_rate": 9.618355781701584e-06, + "lm_loss": 5.9065, + "loss": 1.9471, + "step": 321, + "text_contrastive_loss": 0.8838 + }, + { + "contrastive_loss": 0.77, + "epoch": 0.7268623024830699, + "grad_norm": 18.002338409423828, + "learning_rate": 9.61557074793859e-06, + "lm_loss": 6.0749, + "loss": 1.8642, + "step": 322, + "text_contrastive_loss": 0.9735 + }, + { + "contrastive_loss": 0.8282, + "epoch": 0.7291196388261851, + "grad_norm": 18.4736385345459, + "learning_rate": 9.612775995450896e-06, + "lm_loss": 6.0022, + "loss": 1.899, + "step": 323, + "text_contrastive_loss": 0.9412 + }, + { + "contrastive_loss": 0.7157, + "epoch": 0.7313769751693002, + "grad_norm": 16.921104431152344, + "learning_rate": 9.609971530123243e-06, + "lm_loss": 5.9862, + "loss": 1.7669, + "step": 324, + "text_contrastive_loss": 0.9052 + }, + { + "contrastive_loss": 0.7339, + "epoch": 0.7336343115124153, + "grad_norm": 18.970903396606445, + "learning_rate": 9.607157357860823e-06, + "lm_loss": 6.0996, + "loss": 1.7801, + "step": 325, + "text_contrastive_loss": 0.8727 + }, + { + "contrastive_loss": 0.7823, + "epoch": 0.7358916478555305, + "grad_norm": 19.708547592163086, + "learning_rate": 9.604333484589266e-06, + "lm_loss": 5.9082, + "loss": 1.8611, + "step": 326, + "text_contrastive_loss": 0.976 + }, + { + "contrastive_loss": 0.7769, + "epoch": 0.7381489841986456, + "grad_norm": 18.057262420654297, + "learning_rate": 9.601499916254626e-06, + "lm_loss": 5.992, + "loss": 1.9043, + "step": 327, + "text_contrastive_loss": 1.0565 + }, + { + "contrastive_loss": 0.7535, + "epoch": 0.7404063205417607, + "grad_norm": 19.701824188232422, + "learning_rate": 9.598656658823378e-06, + "lm_loss": 6.1302, + "loss": 1.8041, + "step": 328, + "text_contrastive_loss": 0.8751 + }, + { + "contrastive_loss": 0.75, + "epoch": 0.7426636568848759, + "grad_norm": 18.57602310180664, + "learning_rate": 9.595803718282391e-06, + "lm_loss": 6.0646, + "loss": 1.7876, + "step": 329, + "text_contrastive_loss": 0.8623 + }, + { + "contrastive_loss": 0.7559, + "epoch": 0.744920993227991, + "grad_norm": 17.691495895385742, + "learning_rate": 9.59294110063893e-06, + "lm_loss": 5.971, + "loss": 1.8275, + "step": 330, + "text_contrastive_loss": 0.9489 + }, + { + "contrastive_loss": 0.7651, + "epoch": 0.7471783295711061, + "grad_norm": 19.426851272583008, + "learning_rate": 9.590068811920637e-06, + "lm_loss": 5.9845, + "loss": 1.8219, + "step": 331, + "text_contrastive_loss": 0.9168 + }, + { + "contrastive_loss": 0.7343, + "epoch": 0.7494356659142212, + "grad_norm": 18.151290893554688, + "learning_rate": 9.587186858175507e-06, + "lm_loss": 5.9706, + "loss": 1.7612, + "step": 332, + "text_contrastive_loss": 0.8596 + }, + { + "contrastive_loss": 0.7179, + "epoch": 0.7516930022573364, + "grad_norm": 16.147844314575195, + "learning_rate": 9.584295245471898e-06, + "lm_loss": 6.0191, + "loss": 1.7633, + "step": 333, + "text_contrastive_loss": 0.887 + }, + { + "contrastive_loss": 0.696, + "epoch": 0.7539503386004515, + "grad_norm": 16.67496681213379, + "learning_rate": 9.581393979898502e-06, + "lm_loss": 6.0286, + "loss": 1.7429, + "step": 334, + "text_contrastive_loss": 0.8882 + }, + { + "contrastive_loss": 0.7027, + "epoch": 0.7562076749435666, + "grad_norm": 17.9287166595459, + "learning_rate": 9.578483067564335e-06, + "lm_loss": 6.037, + "loss": 1.8146, + "step": 335, + "text_contrastive_loss": 1.0164 + }, + { + "contrastive_loss": 0.8264, + "epoch": 0.7584650112866818, + "grad_norm": 18.266008377075195, + "learning_rate": 9.575562514598727e-06, + "lm_loss": 5.93, + "loss": 1.8494, + "step": 336, + "text_contrastive_loss": 0.86 + }, + { + "contrastive_loss": 0.6572, + "epoch": 0.7607223476297968, + "grad_norm": 17.233238220214844, + "learning_rate": 9.572632327151309e-06, + "lm_loss": 6.0427, + "loss": 1.7353, + "step": 337, + "text_contrastive_loss": 0.9478 + }, + { + "contrastive_loss": 0.8304, + "epoch": 0.7629796839729119, + "grad_norm": 18.098194122314453, + "learning_rate": 9.569692511391995e-06, + "lm_loss": 5.9944, + "loss": 1.8887, + "step": 338, + "text_contrastive_loss": 0.9178 + }, + { + "contrastive_loss": 0.8, + "epoch": 0.7652370203160271, + "grad_norm": 17.877992630004883, + "learning_rate": 9.566743073510976e-06, + "lm_loss": 5.9982, + "loss": 1.8333, + "step": 339, + "text_contrastive_loss": 0.867 + }, + { + "contrastive_loss": 0.725, + "epoch": 0.7674943566591422, + "grad_norm": 17.114086151123047, + "learning_rate": 9.563784019718704e-06, + "lm_loss": 6.0061, + "loss": 1.6668, + "step": 340, + "text_contrastive_loss": 0.6822 + }, + { + "contrastive_loss": 0.7336, + "epoch": 0.7697516930022573, + "grad_norm": 17.13675308227539, + "learning_rate": 9.560815356245875e-06, + "lm_loss": 5.9689, + "loss": 1.7971, + "step": 341, + "text_contrastive_loss": 0.9332 + }, + { + "contrastive_loss": 0.6919, + "epoch": 0.7720090293453724, + "grad_norm": 17.996566772460938, + "learning_rate": 9.557837089343424e-06, + "lm_loss": 6.0123, + "loss": 1.7188, + "step": 342, + "text_contrastive_loss": 0.8514 + }, + { + "contrastive_loss": 0.795, + "epoch": 0.7742663656884876, + "grad_norm": 18.684648513793945, + "learning_rate": 9.554849225282503e-06, + "lm_loss": 5.9597, + "loss": 1.8536, + "step": 343, + "text_contrastive_loss": 0.9251 + }, + { + "contrastive_loss": 0.7855, + "epoch": 0.7765237020316027, + "grad_norm": 20.135141372680664, + "learning_rate": 9.551851770354477e-06, + "lm_loss": 6.0397, + "loss": 1.8296, + "step": 344, + "text_contrastive_loss": 0.8803 + }, + { + "contrastive_loss": 0.7646, + "epoch": 0.7787810383747178, + "grad_norm": 18.024913787841797, + "learning_rate": 9.548844730870903e-06, + "lm_loss": 5.933, + "loss": 1.82, + "step": 345, + "text_contrastive_loss": 0.9243 + }, + { + "contrastive_loss": 0.8148, + "epoch": 0.781038374717833, + "grad_norm": 19.366764068603516, + "learning_rate": 9.545828113163516e-06, + "lm_loss": 6.0345, + "loss": 1.9181, + "step": 346, + "text_contrastive_loss": 0.9997 + }, + { + "contrastive_loss": 0.6809, + "epoch": 0.7832957110609481, + "grad_norm": 16.887325286865234, + "learning_rate": 9.542801923584228e-06, + "lm_loss": 6.0031, + "loss": 1.7064, + "step": 347, + "text_contrastive_loss": 0.8503 + }, + { + "contrastive_loss": 0.8188, + "epoch": 0.7855530474040632, + "grad_norm": 19.915956497192383, + "learning_rate": 9.5397661685051e-06, + "lm_loss": 5.9961, + "loss": 1.8996, + "step": 348, + "text_contrastive_loss": 0.9623 + }, + { + "contrastive_loss": 0.8719, + "epoch": 0.7878103837471784, + "grad_norm": 17.26915168762207, + "learning_rate": 9.536720854318333e-06, + "lm_loss": 5.9717, + "loss": 1.9456, + "step": 349, + "text_contrastive_loss": 0.9532 + }, + { + "contrastive_loss": 0.7545, + "epoch": 0.7900677200902935, + "grad_norm": 19.145780563354492, + "learning_rate": 9.533665987436262e-06, + "lm_loss": 6.1077, + "loss": 1.7845, + "step": 350, + "text_contrastive_loss": 0.8383 + }, + { + "contrastive_loss": 0.71, + "epoch": 0.7923250564334086, + "grad_norm": 18.116910934448242, + "learning_rate": 9.530601574291331e-06, + "lm_loss": 5.972, + "loss": 1.7411, + "step": 351, + "text_contrastive_loss": 0.8678 + }, + { + "contrastive_loss": 0.8342, + "epoch": 0.7945823927765236, + "grad_norm": 22.180986404418945, + "learning_rate": 9.527527621336087e-06, + "lm_loss": 6.0789, + "loss": 1.9307, + "step": 352, + "text_contrastive_loss": 0.9772 + }, + { + "contrastive_loss": 0.8068, + "epoch": 0.7968397291196389, + "grad_norm": 17.729379653930664, + "learning_rate": 9.524444135043168e-06, + "lm_loss": 6.0396, + "loss": 1.9457, + "step": 353, + "text_contrastive_loss": 1.0698 + }, + { + "contrastive_loss": 0.6485, + "epoch": 0.7990970654627539, + "grad_norm": 16.314897537231445, + "learning_rate": 9.521351121905278e-06, + "lm_loss": 5.8872, + "loss": 1.6582, + "step": 354, + "text_contrastive_loss": 0.842 + }, + { + "contrastive_loss": 0.819, + "epoch": 0.801354401805869, + "grad_norm": 18.633771896362305, + "learning_rate": 9.518248588435185e-06, + "lm_loss": 6.0862, + "loss": 1.8484, + "step": 355, + "text_contrastive_loss": 0.8414 + }, + { + "contrastive_loss": 0.8041, + "epoch": 0.8036117381489842, + "grad_norm": 17.356449127197266, + "learning_rate": 9.515136541165708e-06, + "lm_loss": 5.9713, + "loss": 1.9172, + "step": 356, + "text_contrastive_loss": 1.0319 + }, + { + "contrastive_loss": 0.7013, + "epoch": 0.8058690744920993, + "grad_norm": 17.476306915283203, + "learning_rate": 9.512014986649691e-06, + "lm_loss": 6.0015, + "loss": 1.6744, + "step": 357, + "text_contrastive_loss": 0.7459 + }, + { + "contrastive_loss": 0.6759, + "epoch": 0.8081264108352144, + "grad_norm": 17.530406951904297, + "learning_rate": 9.50888393146e-06, + "lm_loss": 5.9793, + "loss": 1.7313, + "step": 358, + "text_contrastive_loss": 0.9151 + }, + { + "contrastive_loss": 0.7349, + "epoch": 0.8103837471783296, + "grad_norm": 17.47347640991211, + "learning_rate": 9.50574338218951e-06, + "lm_loss": 5.8924, + "loss": 1.7656, + "step": 359, + "text_contrastive_loss": 0.883 + }, + { + "contrastive_loss": 0.7602, + "epoch": 0.8126410835214447, + "grad_norm": 17.209794998168945, + "learning_rate": 9.502593345451078e-06, + "lm_loss": 5.8913, + "loss": 1.8603, + "step": 360, + "text_contrastive_loss": 1.0221 + }, + { + "contrastive_loss": 0.5707, + "epoch": 0.8148984198645598, + "grad_norm": 15.557596206665039, + "learning_rate": 9.499433827877547e-06, + "lm_loss": 5.9123, + "loss": 1.5769, + "step": 361, + "text_contrastive_loss": 0.83 + }, + { + "contrastive_loss": 0.7045, + "epoch": 0.8171557562076749, + "grad_norm": 17.717817306518555, + "learning_rate": 9.49626483612172e-06, + "lm_loss": 5.971, + "loss": 1.7505, + "step": 362, + "text_contrastive_loss": 0.8978 + }, + { + "contrastive_loss": 0.7613, + "epoch": 0.8194130925507901, + "grad_norm": 18.573936462402344, + "learning_rate": 9.493086376856346e-06, + "lm_loss": 5.97, + "loss": 1.8467, + "step": 363, + "text_contrastive_loss": 0.9768 + }, + { + "contrastive_loss": 0.7302, + "epoch": 0.8216704288939052, + "grad_norm": 19.44474983215332, + "learning_rate": 9.489898456774116e-06, + "lm_loss": 5.9779, + "loss": 1.751, + "step": 364, + "text_contrastive_loss": 0.846 + }, + { + "contrastive_loss": 0.7228, + "epoch": 0.8239277652370203, + "grad_norm": 16.996919631958008, + "learning_rate": 9.486701082587635e-06, + "lm_loss": 5.9198, + "loss": 1.6931, + "step": 365, + "text_contrastive_loss": 0.7567 + }, + { + "contrastive_loss": 0.7575, + "epoch": 0.8261851015801355, + "grad_norm": 19.95162010192871, + "learning_rate": 9.483494261029418e-06, + "lm_loss": 5.9313, + "loss": 1.8519, + "step": 366, + "text_contrastive_loss": 1.0027 + }, + { + "contrastive_loss": 0.7983, + "epoch": 0.8284424379232506, + "grad_norm": 21.023330688476562, + "learning_rate": 9.480277998851875e-06, + "lm_loss": 5.941, + "loss": 1.8368, + "step": 367, + "text_contrastive_loss": 0.8888 + }, + { + "contrastive_loss": 1.0023, + "epoch": 0.8306997742663657, + "grad_norm": 24.805105209350586, + "learning_rate": 9.47705230282729e-06, + "lm_loss": 5.915, + "loss": 2.1282, + "step": 368, + "text_contrastive_loss": 1.0688 + }, + { + "contrastive_loss": 0.6559, + "epoch": 0.8329571106094809, + "grad_norm": 17.166223526000977, + "learning_rate": 9.473817179747815e-06, + "lm_loss": 5.921, + "loss": 1.6759, + "step": 369, + "text_contrastive_loss": 0.8558 + }, + { + "contrastive_loss": 0.8761, + "epoch": 0.835214446952596, + "grad_norm": 19.827442169189453, + "learning_rate": 9.470572636425451e-06, + "lm_loss": 5.8163, + "loss": 1.934, + "step": 370, + "text_contrastive_loss": 0.9525 + }, + { + "contrastive_loss": 0.7552, + "epoch": 0.837471783295711, + "grad_norm": 18.570728302001953, + "learning_rate": 9.467318679692031e-06, + "lm_loss": 6.1239, + "loss": 1.7529, + "step": 371, + "text_contrastive_loss": 0.7706 + }, + { + "contrastive_loss": 0.6568, + "epoch": 0.8397291196388262, + "grad_norm": 15.911295890808105, + "learning_rate": 9.464055316399217e-06, + "lm_loss": 6.0501, + "loss": 1.6502, + "step": 372, + "text_contrastive_loss": 0.7767 + }, + { + "contrastive_loss": 0.6174, + "epoch": 0.8419864559819413, + "grad_norm": 17.889629364013672, + "learning_rate": 9.46078255341847e-06, + "lm_loss": 5.9044, + "loss": 1.5875, + "step": 373, + "text_contrastive_loss": 0.7594 + }, + { + "contrastive_loss": 0.8546, + "epoch": 0.8442437923250564, + "grad_norm": 20.568992614746094, + "learning_rate": 9.457500397641049e-06, + "lm_loss": 6.0276, + "loss": 1.9056, + "step": 374, + "text_contrastive_loss": 0.8965 + }, + { + "contrastive_loss": 0.6998, + "epoch": 0.8465011286681715, + "grad_norm": 19.322673797607422, + "learning_rate": 9.454208855977986e-06, + "lm_loss": 5.9043, + "loss": 1.657, + "step": 375, + "text_contrastive_loss": 0.7336 + }, + { + "contrastive_loss": 0.7738, + "epoch": 0.8487584650112867, + "grad_norm": 20.86590576171875, + "learning_rate": 9.450907935360081e-06, + "lm_loss": 5.931, + "loss": 1.7915, + "step": 376, + "text_contrastive_loss": 0.8493 + }, + { + "contrastive_loss": 0.7651, + "epoch": 0.8510158013544018, + "grad_norm": 17.621313095092773, + "learning_rate": 9.447597642737878e-06, + "lm_loss": 6.0016, + "loss": 1.8215, + "step": 377, + "text_contrastive_loss": 0.9126 + }, + { + "contrastive_loss": 0.7374, + "epoch": 0.8532731376975169, + "grad_norm": 18.98619842529297, + "learning_rate": 9.44427798508166e-06, + "lm_loss": 5.9121, + "loss": 1.8098, + "step": 378, + "text_contrastive_loss": 0.9625 + }, + { + "contrastive_loss": 0.7457, + "epoch": 0.8555304740406321, + "grad_norm": 19.90561866760254, + "learning_rate": 9.440948969381425e-06, + "lm_loss": 5.9216, + "loss": 1.7157, + "step": 379, + "text_contrastive_loss": 0.7558 + }, + { + "contrastive_loss": 0.7678, + "epoch": 0.8577878103837472, + "grad_norm": 18.853580474853516, + "learning_rate": 9.437610602646878e-06, + "lm_loss": 5.9362, + "loss": 1.8166, + "step": 380, + "text_contrastive_loss": 0.9103 + }, + { + "contrastive_loss": 0.6174, + "epoch": 0.8600451467268623, + "grad_norm": 16.593408584594727, + "learning_rate": 9.434262891907413e-06, + "lm_loss": 5.9121, + "loss": 1.6502, + "step": 381, + "text_contrastive_loss": 0.883 + }, + { + "contrastive_loss": 0.7372, + "epoch": 0.8623024830699775, + "grad_norm": 19.423614501953125, + "learning_rate": 9.430905844212102e-06, + "lm_loss": 5.9725, + "loss": 1.7701, + "step": 382, + "text_contrastive_loss": 0.8712 + }, + { + "contrastive_loss": 0.7376, + "epoch": 0.8645598194130926, + "grad_norm": 18.79985809326172, + "learning_rate": 9.427539466629672e-06, + "lm_loss": 5.8906, + "loss": 1.7281, + "step": 383, + "text_contrastive_loss": 0.8029 + }, + { + "contrastive_loss": 0.6192, + "epoch": 0.8668171557562077, + "grad_norm": 16.10624122619629, + "learning_rate": 9.424163766248499e-06, + "lm_loss": 5.9218, + "loss": 1.6325, + "step": 384, + "text_contrastive_loss": 0.8423 + }, + { + "contrastive_loss": 0.8226, + "epoch": 0.8690744920993227, + "grad_norm": 19.454587936401367, + "learning_rate": 9.420778750176588e-06, + "lm_loss": 5.9127, + "loss": 1.8905, + "step": 385, + "text_contrastive_loss": 0.9533 + }, + { + "contrastive_loss": 0.8053, + "epoch": 0.871331828442438, + "grad_norm": 19.90314483642578, + "learning_rate": 9.41738442554156e-06, + "lm_loss": 5.8353, + "loss": 1.8316, + "step": 386, + "text_contrastive_loss": 0.8856 + }, + { + "contrastive_loss": 0.9067, + "epoch": 0.873589164785553, + "grad_norm": 20.5092830657959, + "learning_rate": 9.41398079949064e-06, + "lm_loss": 5.97, + "loss": 1.9714, + "step": 387, + "text_contrastive_loss": 0.9354 + }, + { + "contrastive_loss": 0.7665, + "epoch": 0.8758465011286681, + "grad_norm": 19.935422897338867, + "learning_rate": 9.41056787919063e-06, + "lm_loss": 5.7969, + "loss": 1.7965, + "step": 388, + "text_contrastive_loss": 0.9006 + }, + { + "contrastive_loss": 0.7677, + "epoch": 0.8781038374717833, + "grad_norm": 18.127853393554688, + "learning_rate": 9.407145671827909e-06, + "lm_loss": 5.9307, + "loss": 1.7778, + "step": 389, + "text_contrastive_loss": 0.8342 + }, + { + "contrastive_loss": 0.7298, + "epoch": 0.8803611738148984, + "grad_norm": 17.655113220214844, + "learning_rate": 9.403714184608411e-06, + "lm_loss": 5.7757, + "loss": 1.7349, + "step": 390, + "text_contrastive_loss": 0.8551 + }, + { + "contrastive_loss": 0.8102, + "epoch": 0.8826185101580135, + "grad_norm": 18.176137924194336, + "learning_rate": 9.400273424757607e-06, + "lm_loss": 6.0719, + "loss": 1.8673, + "step": 391, + "text_contrastive_loss": 0.8999 + }, + { + "contrastive_loss": 0.6804, + "epoch": 0.8848758465011287, + "grad_norm": 19.85501480102539, + "learning_rate": 9.396823399520495e-06, + "lm_loss": 5.861, + "loss": 1.6786, + "step": 392, + "text_contrastive_loss": 0.8241 + }, + { + "contrastive_loss": 0.7525, + "epoch": 0.8871331828442438, + "grad_norm": 18.777780532836914, + "learning_rate": 9.393364116161582e-06, + "lm_loss": 5.9507, + "loss": 1.8802, + "step": 393, + "text_contrastive_loss": 1.0652 + }, + { + "contrastive_loss": 0.6989, + "epoch": 0.8893905191873589, + "grad_norm": 17.526351928710938, + "learning_rate": 9.38989558196487e-06, + "lm_loss": 5.8805, + "loss": 1.7488, + "step": 394, + "text_contrastive_loss": 0.9238 + }, + { + "contrastive_loss": 0.8578, + "epoch": 0.891647855530474, + "grad_norm": 19.801944732666016, + "learning_rate": 9.386417804233836e-06, + "lm_loss": 5.9629, + "loss": 1.9293, + "step": 395, + "text_contrastive_loss": 0.9504 + }, + { + "contrastive_loss": 0.7496, + "epoch": 0.8939051918735892, + "grad_norm": 18.684749603271484, + "learning_rate": 9.382930790291426e-06, + "lm_loss": 5.8636, + "loss": 1.7431, + "step": 396, + "text_contrastive_loss": 0.8142 + }, + { + "contrastive_loss": 0.6452, + "epoch": 0.8961625282167043, + "grad_norm": 14.384578704833984, + "learning_rate": 9.37943454748003e-06, + "lm_loss": 6.0057, + "loss": 1.6747, + "step": 397, + "text_contrastive_loss": 0.8579 + }, + { + "contrastive_loss": 0.6706, + "epoch": 0.8984198645598194, + "grad_norm": 16.447463989257812, + "learning_rate": 9.375929083161475e-06, + "lm_loss": 5.9143, + "loss": 1.6825, + "step": 398, + "text_contrastive_loss": 0.8411 + }, + { + "contrastive_loss": 0.6437, + "epoch": 0.9006772009029346, + "grad_norm": 16.03485870361328, + "learning_rate": 9.372414404717001e-06, + "lm_loss": 5.8768, + "loss": 1.6824, + "step": 399, + "text_contrastive_loss": 0.902 + }, + { + "contrastive_loss": 0.7504, + "epoch": 0.9029345372460497, + "grad_norm": 17.238916397094727, + "learning_rate": 9.36889051954725e-06, + "lm_loss": 5.8913, + "loss": 1.7632, + "step": 400, + "text_contrastive_loss": 0.8474 + }, + { + "contrastive_loss": 0.7562, + "epoch": 0.9051918735891648, + "grad_norm": 18.555673599243164, + "learning_rate": 9.365357435072255e-06, + "lm_loss": 5.9769, + "loss": 1.8466, + "step": 401, + "text_contrastive_loss": 0.9854 + }, + { + "contrastive_loss": 0.7598, + "epoch": 0.90744920993228, + "grad_norm": 17.139881134033203, + "learning_rate": 9.361815158731413e-06, + "lm_loss": 5.8767, + "loss": 1.7825, + "step": 402, + "text_contrastive_loss": 0.8701 + }, + { + "contrastive_loss": 0.7891, + "epoch": 0.909706546275395, + "grad_norm": 19.237648010253906, + "learning_rate": 9.358263697983479e-06, + "lm_loss": 5.974, + "loss": 1.849, + "step": 403, + "text_contrastive_loss": 0.9252 + }, + { + "contrastive_loss": 0.73, + "epoch": 0.9119638826185101, + "grad_norm": 17.79875373840332, + "learning_rate": 9.354703060306546e-06, + "lm_loss": 5.8671, + "loss": 1.7131, + "step": 404, + "text_contrastive_loss": 0.7927 + }, + { + "contrastive_loss": 0.7431, + "epoch": 0.9142212189616253, + "grad_norm": 17.295480728149414, + "learning_rate": 9.351133253198027e-06, + "lm_loss": 5.941, + "loss": 1.8208, + "step": 405, + "text_contrastive_loss": 0.9671 + }, + { + "contrastive_loss": 0.7145, + "epoch": 0.9164785553047404, + "grad_norm": 17.430517196655273, + "learning_rate": 9.347554284174654e-06, + "lm_loss": 5.9689, + "loss": 1.8182, + "step": 406, + "text_contrastive_loss": 1.0136 + }, + { + "contrastive_loss": 0.7077, + "epoch": 0.9187358916478555, + "grad_norm": 16.849336624145508, + "learning_rate": 9.343966160772438e-06, + "lm_loss": 5.9726, + "loss": 1.722, + "step": 407, + "text_contrastive_loss": 0.8341 + }, + { + "contrastive_loss": 0.7493, + "epoch": 0.9209932279909706, + "grad_norm": 15.629685401916504, + "learning_rate": 9.340368890546672e-06, + "lm_loss": 5.8878, + "loss": 1.7794, + "step": 408, + "text_contrastive_loss": 0.8826 + }, + { + "contrastive_loss": 0.652, + "epoch": 0.9232505643340858, + "grad_norm": 16.84798812866211, + "learning_rate": 9.336762481071906e-06, + "lm_loss": 5.9996, + "loss": 1.6227, + "step": 409, + "text_contrastive_loss": 0.7415 + }, + { + "contrastive_loss": 0.868, + "epoch": 0.9255079006772009, + "grad_norm": 19.487653732299805, + "learning_rate": 9.333146939941938e-06, + "lm_loss": 5.8464, + "loss": 1.9711, + "step": 410, + "text_contrastive_loss": 1.037 + }, + { + "contrastive_loss": 0.6776, + "epoch": 0.927765237020316, + "grad_norm": 16.648977279663086, + "learning_rate": 9.329522274769791e-06, + "lm_loss": 5.8577, + "loss": 1.6416, + "step": 411, + "text_contrastive_loss": 0.7565 + }, + { + "contrastive_loss": 0.7495, + "epoch": 0.9300225733634312, + "grad_norm": 18.50621223449707, + "learning_rate": 9.325888493187699e-06, + "lm_loss": 5.8654, + "loss": 1.7748, + "step": 412, + "text_contrastive_loss": 0.8775 + }, + { + "contrastive_loss": 0.6992, + "epoch": 0.9322799097065463, + "grad_norm": 19.02260971069336, + "learning_rate": 9.322245602847094e-06, + "lm_loss": 5.8865, + "loss": 1.734, + "step": 413, + "text_contrastive_loss": 0.8923 + }, + { + "contrastive_loss": 0.6929, + "epoch": 0.9345372460496614, + "grad_norm": 19.366676330566406, + "learning_rate": 9.31859361141859e-06, + "lm_loss": 5.9832, + "loss": 1.7528, + "step": 414, + "text_contrastive_loss": 0.9231 + }, + { + "contrastive_loss": 0.6418, + "epoch": 0.9367945823927766, + "grad_norm": 16.030540466308594, + "learning_rate": 9.314932526591956e-06, + "lm_loss": 5.9664, + "loss": 1.6605, + "step": 415, + "text_contrastive_loss": 0.8441 + }, + { + "contrastive_loss": 0.6763, + "epoch": 0.9390519187358917, + "grad_norm": 16.432239532470703, + "learning_rate": 9.311262356076118e-06, + "lm_loss": 5.9031, + "loss": 1.7403, + "step": 416, + "text_contrastive_loss": 0.9474 + }, + { + "contrastive_loss": 0.7221, + "epoch": 0.9413092550790068, + "grad_norm": 19.767297744750977, + "learning_rate": 9.30758310759913e-06, + "lm_loss": 5.8982, + "loss": 1.7693, + "step": 417, + "text_contrastive_loss": 0.9148 + }, + { + "contrastive_loss": 0.7386, + "epoch": 0.9435665914221218, + "grad_norm": 18.91975212097168, + "learning_rate": 9.303894788908158e-06, + "lm_loss": 5.7443, + "loss": 1.7289, + "step": 418, + "text_contrastive_loss": 0.8318 + }, + { + "contrastive_loss": 0.9391, + "epoch": 0.945823927765237, + "grad_norm": 20.29686737060547, + "learning_rate": 9.300197407769472e-06, + "lm_loss": 6.0597, + "loss": 1.9885, + "step": 419, + "text_contrastive_loss": 0.887 + }, + { + "contrastive_loss": 0.6991, + "epoch": 0.9480812641083521, + "grad_norm": 18.48418617248535, + "learning_rate": 9.296490971968416e-06, + "lm_loss": 5.9284, + "loss": 1.6989, + "step": 420, + "text_contrastive_loss": 0.8139 + }, + { + "contrastive_loss": 0.7461, + "epoch": 0.9503386004514672, + "grad_norm": 17.921653747558594, + "learning_rate": 9.292775489309409e-06, + "lm_loss": 5.8958, + "loss": 1.7633, + "step": 421, + "text_contrastive_loss": 0.8552 + }, + { + "contrastive_loss": 0.6148, + "epoch": 0.9525959367945824, + "grad_norm": 16.497175216674805, + "learning_rate": 9.289050967615914e-06, + "lm_loss": 5.9521, + "loss": 1.6272, + "step": 422, + "text_contrastive_loss": 0.8344 + }, + { + "contrastive_loss": 0.7885, + "epoch": 0.9548532731376975, + "grad_norm": 16.786640167236328, + "learning_rate": 9.285317414730427e-06, + "lm_loss": 5.8784, + "loss": 1.7938, + "step": 423, + "text_contrastive_loss": 0.8349 + }, + { + "contrastive_loss": 0.7501, + "epoch": 0.9571106094808126, + "grad_norm": 17.82651710510254, + "learning_rate": 9.281574838514464e-06, + "lm_loss": 5.9497, + "loss": 1.7509, + "step": 424, + "text_contrastive_loss": 0.8116 + }, + { + "contrastive_loss": 0.7176, + "epoch": 0.9593679458239278, + "grad_norm": 16.84232521057129, + "learning_rate": 9.277823246848537e-06, + "lm_loss": 5.9303, + "loss": 1.7595, + "step": 425, + "text_contrastive_loss": 0.8978 + }, + { + "contrastive_loss": 0.8188, + "epoch": 0.9616252821670429, + "grad_norm": 19.891075134277344, + "learning_rate": 9.274062647632144e-06, + "lm_loss": 5.8945, + "loss": 1.9416, + "step": 426, + "text_contrastive_loss": 1.0667 + }, + { + "contrastive_loss": 0.7146, + "epoch": 0.963882618510158, + "grad_norm": 17.92348289489746, + "learning_rate": 9.270293048783747e-06, + "lm_loss": 5.9633, + "loss": 1.6935, + "step": 427, + "text_contrastive_loss": 0.7651 + }, + { + "contrastive_loss": 0.6258, + "epoch": 0.9661399548532731, + "grad_norm": 17.62327003479004, + "learning_rate": 9.266514458240762e-06, + "lm_loss": 5.9171, + "loss": 1.5839, + "step": 428, + "text_contrastive_loss": 0.7328 + }, + { + "contrastive_loss": 0.7873, + "epoch": 0.9683972911963883, + "grad_norm": 19.740936279296875, + "learning_rate": 9.262726883959535e-06, + "lm_loss": 5.8752, + "loss": 1.773, + "step": 429, + "text_contrastive_loss": 0.7965 + }, + { + "contrastive_loss": 0.7144, + "epoch": 0.9706546275395034, + "grad_norm": 16.983598709106445, + "learning_rate": 9.258930333915325e-06, + "lm_loss": 5.8795, + "loss": 1.7912, + "step": 430, + "text_contrastive_loss": 0.9778 + }, + { + "contrastive_loss": 0.6559, + "epoch": 0.9729119638826185, + "grad_norm": 17.392724990844727, + "learning_rate": 9.2551248161023e-06, + "lm_loss": 5.762, + "loss": 1.6471, + "step": 431, + "text_contrastive_loss": 0.8301 + }, + { + "contrastive_loss": 0.6605, + "epoch": 0.9751693002257337, + "grad_norm": 17.272245407104492, + "learning_rate": 9.251310338533504e-06, + "lm_loss": 5.8766, + "loss": 1.7141, + "step": 432, + "text_contrastive_loss": 0.9318 + }, + { + "contrastive_loss": 0.7755, + "epoch": 0.9774266365688488, + "grad_norm": 18.896547317504883, + "learning_rate": 9.247486909240849e-06, + "lm_loss": 6.0408, + "loss": 1.8467, + "step": 433, + "text_contrastive_loss": 0.9341 + }, + { + "contrastive_loss": 0.745, + "epoch": 0.9796839729119639, + "grad_norm": 20.267478942871094, + "learning_rate": 9.243654536275095e-06, + "lm_loss": 5.9013, + "loss": 1.7646, + "step": 434, + "text_contrastive_loss": 0.859 + }, + { + "contrastive_loss": 0.7923, + "epoch": 0.981941309255079, + "grad_norm": 18.046537399291992, + "learning_rate": 9.23981322770584e-06, + "lm_loss": 5.8208, + "loss": 1.8683, + "step": 435, + "text_contrastive_loss": 0.9878 + }, + { + "contrastive_loss": 0.7119, + "epoch": 0.9841986455981941, + "grad_norm": 17.922704696655273, + "learning_rate": 9.235962991621484e-06, + "lm_loss": 5.933, + "loss": 1.8701, + "step": 436, + "text_contrastive_loss": 1.1298 + }, + { + "contrastive_loss": 0.8629, + "epoch": 0.9864559819413092, + "grad_norm": 18.518964767456055, + "learning_rate": 9.232103836129239e-06, + "lm_loss": 5.8936, + "loss": 1.8983, + "step": 437, + "text_contrastive_loss": 0.8921 + }, + { + "contrastive_loss": 0.8594, + "epoch": 0.9887133182844243, + "grad_norm": 20.471054077148438, + "learning_rate": 9.22823576935509e-06, + "lm_loss": 5.873, + "loss": 1.8687, + "step": 438, + "text_contrastive_loss": 0.844 + }, + { + "contrastive_loss": 0.6949, + "epoch": 0.9909706546275395, + "grad_norm": 19.415773391723633, + "learning_rate": 9.224358799443791e-06, + "lm_loss": 5.7932, + "loss": 1.6119, + "step": 439, + "text_contrastive_loss": 0.6754 + }, + { + "contrastive_loss": 0.7808, + "epoch": 0.9932279909706546, + "grad_norm": 17.44244384765625, + "learning_rate": 9.220472934558838e-06, + "lm_loss": 5.8916, + "loss": 1.8226, + "step": 440, + "text_contrastive_loss": 0.9054 + }, + { + "contrastive_loss": 0.5842, + "epoch": 0.9954853273137697, + "grad_norm": 15.602773666381836, + "learning_rate": 9.216578182882459e-06, + "lm_loss": 5.8873, + "loss": 1.6194, + "step": 441, + "text_contrastive_loss": 0.8929 + }, + { + "contrastive_loss": 0.7421, + "epoch": 0.9977426636568849, + "grad_norm": 18.081762313842773, + "learning_rate": 9.212674552615594e-06, + "lm_loss": 5.8661, + "loss": 1.7894, + "step": 442, + "text_contrastive_loss": 0.9215 + }, + { + "contrastive_loss": 0.5515, + "epoch": 1.0, + "grad_norm": 24.072240829467773, + "learning_rate": 9.208762051977879e-06, + "lm_loss": 5.969, + "loss": 1.4913, + "step": 443, + "text_contrastive_loss": 0.6858 + }, + { + "contrastive_loss": 0.6564, + "epoch": 1.002257336343115, + "grad_norm": 16.146533966064453, + "learning_rate": 9.204840689207626e-06, + "lm_loss": 5.8738, + "loss": 1.679, + "step": 444, + "text_contrastive_loss": 0.8705 + }, + { + "contrastive_loss": 0.7222, + "epoch": 1.0045146726862302, + "grad_norm": 16.364309310913086, + "learning_rate": 9.20091047256181e-06, + "lm_loss": 5.7891, + "loss": 1.7542, + "step": 445, + "text_contrastive_loss": 0.9063 + }, + { + "contrastive_loss": 0.7453, + "epoch": 1.0067720090293453, + "grad_norm": 18.12066650390625, + "learning_rate": 9.196971410316047e-06, + "lm_loss": 5.8606, + "loss": 1.7919, + "step": 446, + "text_contrastive_loss": 0.921 + }, + { + "contrastive_loss": 0.6281, + "epoch": 1.0090293453724606, + "grad_norm": 15.592330932617188, + "learning_rate": 9.193023510764578e-06, + "lm_loss": 5.7501, + "loss": 1.6208, + "step": 447, + "text_contrastive_loss": 0.8353 + }, + { + "contrastive_loss": 0.6958, + "epoch": 1.0112866817155757, + "grad_norm": 16.594594955444336, + "learning_rate": 9.189066782220253e-06, + "lm_loss": 5.9351, + "loss": 1.781, + "step": 448, + "text_contrastive_loss": 0.9834 + }, + { + "contrastive_loss": 0.6019, + "epoch": 1.0135440180586908, + "grad_norm": 14.94572925567627, + "learning_rate": 9.185101233014516e-06, + "lm_loss": 5.8615, + "loss": 1.6711, + "step": 449, + "text_contrastive_loss": 0.966 + }, + { + "contrastive_loss": 0.6496, + "epoch": 1.0158013544018059, + "grad_norm": 15.78742790222168, + "learning_rate": 9.181126871497378e-06, + "lm_loss": 5.9094, + "loss": 1.6736, + "step": 450, + "text_contrastive_loss": 0.8662 + }, + { + "contrastive_loss": 0.5917, + "epoch": 1.018058690744921, + "grad_norm": 16.283035278320312, + "learning_rate": 9.177143706037411e-06, + "lm_loss": 5.8957, + "loss": 1.6316, + "step": 451, + "text_contrastive_loss": 0.9006 + }, + { + "contrastive_loss": 0.5687, + "epoch": 1.020316027088036, + "grad_norm": 16.268436431884766, + "learning_rate": 9.173151745021722e-06, + "lm_loss": 5.8449, + "loss": 1.5149, + "step": 452, + "text_contrastive_loss": 0.7233 + }, + { + "contrastive_loss": 0.597, + "epoch": 1.0225733634311513, + "grad_norm": 17.817989349365234, + "learning_rate": 9.169150996855939e-06, + "lm_loss": 5.9054, + "loss": 1.5376, + "step": 453, + "text_contrastive_loss": 0.7 + }, + { + "contrastive_loss": 0.6642, + "epoch": 1.0248306997742664, + "grad_norm": 17.480836868286133, + "learning_rate": 9.16514146996419e-06, + "lm_loss": 5.8982, + "loss": 1.6826, + "step": 454, + "text_contrastive_loss": 0.8571 + }, + { + "contrastive_loss": 0.5013, + "epoch": 1.0270880361173815, + "grad_norm": 13.058002471923828, + "learning_rate": 9.161123172789091e-06, + "lm_loss": 5.8368, + "loss": 1.4934, + "step": 455, + "text_contrastive_loss": 0.8169 + }, + { + "contrastive_loss": 0.6949, + "epoch": 1.0293453724604966, + "grad_norm": 17.163982391357422, + "learning_rate": 9.157096113791727e-06, + "lm_loss": 5.8088, + "loss": 1.7878, + "step": 456, + "text_contrastive_loss": 1.0242 + }, + { + "contrastive_loss": 0.7183, + "epoch": 1.0316027088036117, + "grad_norm": 15.776236534118652, + "learning_rate": 9.153060301451629e-06, + "lm_loss": 5.8989, + "loss": 1.7657, + "step": 457, + "text_contrastive_loss": 0.9149 + }, + { + "contrastive_loss": 0.7033, + "epoch": 1.0338600451467268, + "grad_norm": 16.365798950195312, + "learning_rate": 9.149015744266759e-06, + "lm_loss": 5.8877, + "loss": 1.697, + "step": 458, + "text_contrastive_loss": 0.8098 + }, + { + "contrastive_loss": 0.7502, + "epoch": 1.036117381489842, + "grad_norm": 20.798824310302734, + "learning_rate": 9.144962450753491e-06, + "lm_loss": 5.8439, + "loss": 1.7898, + "step": 459, + "text_contrastive_loss": 0.9104 + }, + { + "contrastive_loss": 0.8053, + "epoch": 1.0383747178329572, + "grad_norm": 19.01142120361328, + "learning_rate": 9.140900429446601e-06, + "lm_loss": 5.7618, + "loss": 1.8092, + "step": 460, + "text_contrastive_loss": 0.8555 + }, + { + "contrastive_loss": 0.7128, + "epoch": 1.0406320541760723, + "grad_norm": 17.179027557373047, + "learning_rate": 9.136829688899236e-06, + "lm_loss": 5.8604, + "loss": 1.7158, + "step": 461, + "text_contrastive_loss": 0.8338 + }, + { + "contrastive_loss": 0.6353, + "epoch": 1.0428893905191874, + "grad_norm": 17.52668571472168, + "learning_rate": 9.132750237682907e-06, + "lm_loss": 5.8812, + "loss": 1.578, + "step": 462, + "text_contrastive_loss": 0.7092 + }, + { + "contrastive_loss": 0.7385, + "epoch": 1.0451467268623025, + "grad_norm": 17.666667938232422, + "learning_rate": 9.128662084387462e-06, + "lm_loss": 5.7808, + "loss": 1.7409, + "step": 463, + "text_contrastive_loss": 0.8487 + }, + { + "contrastive_loss": 0.6852, + "epoch": 1.0474040632054176, + "grad_norm": 17.20553207397461, + "learning_rate": 9.12456523762108e-06, + "lm_loss": 5.8279, + "loss": 1.6443, + "step": 464, + "text_contrastive_loss": 0.7527 + }, + { + "contrastive_loss": 0.6127, + "epoch": 1.0496613995485327, + "grad_norm": 15.813065528869629, + "learning_rate": 9.120459706010233e-06, + "lm_loss": 5.9009, + "loss": 1.5875, + "step": 465, + "text_contrastive_loss": 0.7696 + }, + { + "contrastive_loss": 0.579, + "epoch": 1.0519187358916477, + "grad_norm": 15.512067794799805, + "learning_rate": 9.116345498199693e-06, + "lm_loss": 5.8277, + "loss": 1.5727, + "step": 466, + "text_contrastive_loss": 0.8218 + }, + { + "contrastive_loss": 0.6469, + "epoch": 1.054176072234763, + "grad_norm": 17.455263137817383, + "learning_rate": 9.112222622852494e-06, + "lm_loss": 5.9135, + "loss": 1.7021, + "step": 467, + "text_contrastive_loss": 0.9276 + }, + { + "contrastive_loss": 0.6732, + "epoch": 1.0564334085778782, + "grad_norm": 15.996644020080566, + "learning_rate": 9.108091088649922e-06, + "lm_loss": 5.8312, + "loss": 1.6661, + "step": 468, + "text_contrastive_loss": 0.8195 + }, + { + "contrastive_loss": 0.6767, + "epoch": 1.0586907449209932, + "grad_norm": 18.72588348388672, + "learning_rate": 9.103950904291496e-06, + "lm_loss": 5.6876, + "loss": 1.7071, + "step": 469, + "text_contrastive_loss": 0.9233 + }, + { + "contrastive_loss": 0.5908, + "epoch": 1.0609480812641083, + "grad_norm": 15.662790298461914, + "learning_rate": 9.099802078494947e-06, + "lm_loss": 5.7395, + "loss": 1.5848, + "step": 470, + "text_contrastive_loss": 0.8401 + }, + { + "contrastive_loss": 0.5719, + "epoch": 1.0632054176072234, + "grad_norm": 14.050433158874512, + "learning_rate": 9.095644619996206e-06, + "lm_loss": 5.9394, + "loss": 1.6242, + "step": 471, + "text_contrastive_loss": 0.9167 + }, + { + "contrastive_loss": 0.6776, + "epoch": 1.0654627539503385, + "grad_norm": 16.951717376708984, + "learning_rate": 9.09147853754938e-06, + "lm_loss": 5.8241, + "loss": 1.7083, + "step": 472, + "text_contrastive_loss": 0.8966 + }, + { + "contrastive_loss": 0.6857, + "epoch": 1.0677200902934538, + "grad_norm": 18.096969604492188, + "learning_rate": 9.087303839926727e-06, + "lm_loss": 5.7433, + "loss": 1.7767, + "step": 473, + "text_contrastive_loss": 1.0333 + }, + { + "contrastive_loss": 0.6358, + "epoch": 1.069977426636569, + "grad_norm": 15.680360794067383, + "learning_rate": 9.08312053591866e-06, + "lm_loss": 5.8963, + "loss": 1.6427, + "step": 474, + "text_contrastive_loss": 0.8345 + }, + { + "contrastive_loss": 0.647, + "epoch": 1.072234762979684, + "grad_norm": 16.32209014892578, + "learning_rate": 9.0789286343337e-06, + "lm_loss": 5.8095, + "loss": 1.7041, + "step": 475, + "text_contrastive_loss": 0.9523 + }, + { + "contrastive_loss": 0.6131, + "epoch": 1.074492099322799, + "grad_norm": 15.483674049377441, + "learning_rate": 9.07472814399848e-06, + "lm_loss": 5.8696, + "loss": 1.6418, + "step": 476, + "text_contrastive_loss": 0.8836 + }, + { + "contrastive_loss": 0.7275, + "epoch": 1.0767494356659142, + "grad_norm": 16.682987213134766, + "learning_rate": 9.070519073757717e-06, + "lm_loss": 5.8161, + "loss": 1.7329, + "step": 477, + "text_contrastive_loss": 0.8474 + }, + { + "contrastive_loss": 0.7091, + "epoch": 1.0790067720090293, + "grad_norm": 17.670696258544922, + "learning_rate": 9.06630143247419e-06, + "lm_loss": 5.8142, + "loss": 1.6948, + "step": 478, + "text_contrastive_loss": 0.8085 + }, + { + "contrastive_loss": 0.6821, + "epoch": 1.0812641083521444, + "grad_norm": 17.2860107421875, + "learning_rate": 9.062075229028728e-06, + "lm_loss": 5.8454, + "loss": 1.7093, + "step": 479, + "text_contrastive_loss": 0.8853 + }, + { + "contrastive_loss": 0.7009, + "epoch": 1.0835214446952597, + "grad_norm": 16.963953018188477, + "learning_rate": 9.057840472320192e-06, + "lm_loss": 5.7601, + "loss": 1.6516, + "step": 480, + "text_contrastive_loss": 0.7494 + }, + { + "contrastive_loss": 0.603, + "epoch": 1.0857787810383748, + "grad_norm": 16.2852783203125, + "learning_rate": 9.053597171265447e-06, + "lm_loss": 5.8464, + "loss": 1.6351, + "step": 481, + "text_contrastive_loss": 0.8949 + }, + { + "contrastive_loss": 0.62, + "epoch": 1.0880361173814899, + "grad_norm": 17.021404266357422, + "learning_rate": 9.04934533479935e-06, + "lm_loss": 5.8477, + "loss": 1.6446, + "step": 482, + "text_contrastive_loss": 0.8795 + }, + { + "contrastive_loss": 0.6784, + "epoch": 1.090293453724605, + "grad_norm": 19.252443313598633, + "learning_rate": 9.045084971874738e-06, + "lm_loss": 5.8896, + "loss": 1.6798, + "step": 483, + "text_contrastive_loss": 0.825 + }, + { + "contrastive_loss": 0.5168, + "epoch": 1.09255079006772, + "grad_norm": 16.297832489013672, + "learning_rate": 9.040816091462393e-06, + "lm_loss": 5.8755, + "loss": 1.4908, + "step": 484, + "text_contrastive_loss": 0.7729 + }, + { + "contrastive_loss": 0.6637, + "epoch": 1.0948081264108351, + "grad_norm": 16.924846649169922, + "learning_rate": 9.036538702551037e-06, + "lm_loss": 5.8421, + "loss": 1.6557, + "step": 485, + "text_contrastive_loss": 0.8156 + }, + { + "contrastive_loss": 0.7114, + "epoch": 1.0970654627539504, + "grad_norm": 18.735641479492188, + "learning_rate": 9.032252814147302e-06, + "lm_loss": 5.8253, + "loss": 1.7244, + "step": 486, + "text_contrastive_loss": 0.861 + }, + { + "contrastive_loss": 0.7342, + "epoch": 1.0993227990970655, + "grad_norm": 18.187530517578125, + "learning_rate": 9.027958435275726e-06, + "lm_loss": 5.9273, + "loss": 1.7577, + "step": 487, + "text_contrastive_loss": 0.8616 + }, + { + "contrastive_loss": 0.618, + "epoch": 1.1015801354401806, + "grad_norm": 17.452129364013672, + "learning_rate": 9.023655574978716e-06, + "lm_loss": 5.8096, + "loss": 1.6569, + "step": 488, + "text_contrastive_loss": 0.9159 + }, + { + "contrastive_loss": 0.549, + "epoch": 1.1038374717832957, + "grad_norm": 15.237523078918457, + "learning_rate": 9.019344242316542e-06, + "lm_loss": 5.7269, + "loss": 1.509, + "step": 489, + "text_contrastive_loss": 0.7746 + }, + { + "contrastive_loss": 0.6338, + "epoch": 1.1060948081264108, + "grad_norm": 19.214378356933594, + "learning_rate": 9.015024446367315e-06, + "lm_loss": 5.9157, + "loss": 1.5921, + "step": 490, + "text_contrastive_loss": 0.7335 + }, + { + "contrastive_loss": 0.6884, + "epoch": 1.108352144469526, + "grad_norm": 17.360498428344727, + "learning_rate": 9.010696196226963e-06, + "lm_loss": 5.852, + "loss": 1.7197, + "step": 491, + "text_contrastive_loss": 0.8923 + }, + { + "contrastive_loss": 0.7024, + "epoch": 1.110609480812641, + "grad_norm": 18.37981414794922, + "learning_rate": 9.00635950100922e-06, + "lm_loss": 5.7854, + "loss": 1.7987, + "step": 492, + "text_contrastive_loss": 1.0355 + }, + { + "contrastive_loss": 0.688, + "epoch": 1.1128668171557563, + "grad_norm": 17.151948928833008, + "learning_rate": 9.002014369845592e-06, + "lm_loss": 5.7959, + "loss": 1.7285, + "step": 493, + "text_contrastive_loss": 0.9218 + }, + { + "contrastive_loss": 0.5828, + "epoch": 1.1151241534988714, + "grad_norm": 16.890501022338867, + "learning_rate": 8.997660811885367e-06, + "lm_loss": 5.8493, + "loss": 1.6414, + "step": 494, + "text_contrastive_loss": 0.9474 + }, + { + "contrastive_loss": 0.681, + "epoch": 1.1173814898419865, + "grad_norm": 16.833452224731445, + "learning_rate": 8.993298836295556e-06, + "lm_loss": 5.8133, + "loss": 1.6957, + "step": 495, + "text_contrastive_loss": 0.8668 + }, + { + "contrastive_loss": 0.5756, + "epoch": 1.1196388261851016, + "grad_norm": 15.70335578918457, + "learning_rate": 8.988928452260909e-06, + "lm_loss": 5.7206, + "loss": 1.5213, + "step": 496, + "text_contrastive_loss": 0.7472 + }, + { + "contrastive_loss": 0.5968, + "epoch": 1.1218961625282167, + "grad_norm": 18.256250381469727, + "learning_rate": 8.984549668983875e-06, + "lm_loss": 5.8204, + "loss": 1.5841, + "step": 497, + "text_contrastive_loss": 0.8105 + }, + { + "contrastive_loss": 0.6764, + "epoch": 1.1241534988713318, + "grad_norm": 19.21333885192871, + "learning_rate": 8.980162495684587e-06, + "lm_loss": 5.8301, + "loss": 1.6476, + "step": 498, + "text_contrastive_loss": 0.7764 + }, + { + "contrastive_loss": 0.688, + "epoch": 1.1264108352144468, + "grad_norm": 20.066160202026367, + "learning_rate": 8.975766941600852e-06, + "lm_loss": 5.7774, + "loss": 1.6899, + "step": 499, + "text_contrastive_loss": 0.8483 + }, + { + "contrastive_loss": 0.6225, + "epoch": 1.1286681715575622, + "grad_norm": 18.304738998413086, + "learning_rate": 8.971363015988115e-06, + "lm_loss": 5.7485, + "loss": 1.6332, + "step": 500, + "text_contrastive_loss": 0.8717 + }, + { + "contrastive_loss": 0.5801, + "epoch": 1.1309255079006773, + "grad_norm": 16.779212951660156, + "learning_rate": 8.966950728119453e-06, + "lm_loss": 5.7688, + "loss": 1.5785, + "step": 501, + "text_contrastive_loss": 0.843 + }, + { + "contrastive_loss": 0.6416, + "epoch": 1.1331828442437923, + "grad_norm": 16.1234130859375, + "learning_rate": 8.962530087285552e-06, + "lm_loss": 5.8799, + "loss": 1.6927, + "step": 502, + "text_contrastive_loss": 0.9262 + }, + { + "contrastive_loss": 0.6148, + "epoch": 1.1354401805869074, + "grad_norm": 17.34881591796875, + "learning_rate": 8.958101102794686e-06, + "lm_loss": 5.7185, + "loss": 1.6059, + "step": 503, + "text_contrastive_loss": 0.8385 + }, + { + "contrastive_loss": 0.7299, + "epoch": 1.1376975169300225, + "grad_norm": 20.895261764526367, + "learning_rate": 8.953663783972692e-06, + "lm_loss": 5.6982, + "loss": 1.7532, + "step": 504, + "text_contrastive_loss": 0.907 + }, + { + "contrastive_loss": 0.6176, + "epoch": 1.1399548532731376, + "grad_norm": 19.33408546447754, + "learning_rate": 8.949218140162965e-06, + "lm_loss": 5.8697, + "loss": 1.6283, + "step": 505, + "text_contrastive_loss": 0.8474 + }, + { + "contrastive_loss": 0.6625, + "epoch": 1.1422121896162527, + "grad_norm": 16.830759048461914, + "learning_rate": 8.944764180726423e-06, + "lm_loss": 5.8084, + "loss": 1.7575, + "step": 506, + "text_contrastive_loss": 1.0283 + }, + { + "contrastive_loss": 0.7758, + "epoch": 1.144469525959368, + "grad_norm": 20.973735809326172, + "learning_rate": 8.940301915041496e-06, + "lm_loss": 5.7669, + "loss": 1.8436, + "step": 507, + "text_contrastive_loss": 0.9822 + }, + { + "contrastive_loss": 0.7011, + "epoch": 1.146726862302483, + "grad_norm": 17.06911849975586, + "learning_rate": 8.935831352504103e-06, + "lm_loss": 5.7935, + "loss": 1.7367, + "step": 508, + "text_contrastive_loss": 0.9125 + }, + { + "contrastive_loss": 0.6511, + "epoch": 1.1489841986455982, + "grad_norm": 16.534908294677734, + "learning_rate": 8.931352502527633e-06, + "lm_loss": 5.8002, + "loss": 1.656, + "step": 509, + "text_contrastive_loss": 0.8496 + }, + { + "contrastive_loss": 0.6919, + "epoch": 1.1512415349887133, + "grad_norm": 17.590503692626953, + "learning_rate": 8.926865374542928e-06, + "lm_loss": 5.8197, + "loss": 1.7221, + "step": 510, + "text_contrastive_loss": 0.8965 + }, + { + "contrastive_loss": 0.6852, + "epoch": 1.1534988713318284, + "grad_norm": 17.848155975341797, + "learning_rate": 8.922369977998257e-06, + "lm_loss": 5.7688, + "loss": 1.827, + "step": 511, + "text_contrastive_loss": 1.1299 + }, + { + "contrastive_loss": 0.6549, + "epoch": 1.1557562076749435, + "grad_norm": 16.373695373535156, + "learning_rate": 8.917866322359303e-06, + "lm_loss": 5.7991, + "loss": 1.6557, + "step": 512, + "text_contrastive_loss": 0.8419 + }, + { + "contrastive_loss": 0.6713, + "epoch": 1.1580135440180588, + "grad_norm": 16.844192504882812, + "learning_rate": 8.913354417109136e-06, + "lm_loss": 5.7205, + "loss": 1.605, + "step": 513, + "text_contrastive_loss": 0.7233 + }, + { + "contrastive_loss": 0.5913, + "epoch": 1.1602708803611739, + "grad_norm": 16.347095489501953, + "learning_rate": 8.908834271748202e-06, + "lm_loss": 5.7978, + "loss": 1.5581, + "step": 514, + "text_contrastive_loss": 0.7742 + }, + { + "contrastive_loss": 0.6158, + "epoch": 1.162528216704289, + "grad_norm": 16.738037109375, + "learning_rate": 8.904305895794292e-06, + "lm_loss": 5.7616, + "loss": 1.5996, + "step": 515, + "text_contrastive_loss": 0.8154 + }, + { + "contrastive_loss": 0.7822, + "epoch": 1.164785553047404, + "grad_norm": 19.219724655151367, + "learning_rate": 8.899769298782528e-06, + "lm_loss": 5.8163, + "loss": 1.823, + "step": 516, + "text_contrastive_loss": 0.9184 + }, + { + "contrastive_loss": 0.6589, + "epoch": 1.1670428893905191, + "grad_norm": 18.149572372436523, + "learning_rate": 8.895224490265346e-06, + "lm_loss": 5.8049, + "loss": 1.6915, + "step": 517, + "text_contrastive_loss": 0.9041 + }, + { + "contrastive_loss": 0.58, + "epoch": 1.1693002257336342, + "grad_norm": 15.932313919067383, + "learning_rate": 8.890671479812472e-06, + "lm_loss": 5.6878, + "loss": 1.5722, + "step": 518, + "text_contrastive_loss": 0.8469 + }, + { + "contrastive_loss": 0.6245, + "epoch": 1.1715575620767495, + "grad_norm": 17.646120071411133, + "learning_rate": 8.886110277010902e-06, + "lm_loss": 5.8012, + "loss": 1.7384, + "step": 519, + "text_contrastive_loss": 1.0677 + }, + { + "contrastive_loss": 0.6592, + "epoch": 1.1738148984198646, + "grad_norm": 17.75513458251953, + "learning_rate": 8.88154089146488e-06, + "lm_loss": 5.7272, + "loss": 1.7283, + "step": 520, + "text_contrastive_loss": 0.9926 + }, + { + "contrastive_loss": 0.7284, + "epoch": 1.1760722347629797, + "grad_norm": 18.321510314941406, + "learning_rate": 8.876963332795881e-06, + "lm_loss": 5.8515, + "loss": 1.8513, + "step": 521, + "text_contrastive_loss": 1.0755 + }, + { + "contrastive_loss": 0.6343, + "epoch": 1.1783295711060948, + "grad_norm": 16.36499786376953, + "learning_rate": 8.87237761064259e-06, + "lm_loss": 5.7822, + "loss": 1.6614, + "step": 522, + "text_contrastive_loss": 0.8979 + }, + { + "contrastive_loss": 0.6272, + "epoch": 1.18058690744921, + "grad_norm": 16.643741607666016, + "learning_rate": 8.867783734660883e-06, + "lm_loss": 5.6926, + "loss": 1.6266, + "step": 523, + "text_contrastive_loss": 0.8603 + }, + { + "contrastive_loss": 0.6937, + "epoch": 1.182844243792325, + "grad_norm": 16.587600708007812, + "learning_rate": 8.8631817145238e-06, + "lm_loss": 5.7304, + "loss": 1.6685, + "step": 524, + "text_contrastive_loss": 0.8036 + }, + { + "contrastive_loss": 0.5257, + "epoch": 1.18510158013544, + "grad_norm": 16.325925827026367, + "learning_rate": 8.858571559921539e-06, + "lm_loss": 5.7365, + "loss": 1.4816, + "step": 525, + "text_contrastive_loss": 0.7644 + }, + { + "contrastive_loss": 0.6224, + "epoch": 1.1873589164785554, + "grad_norm": 15.408103942871094, + "learning_rate": 8.853953280561412e-06, + "lm_loss": 5.6735, + "loss": 1.6471, + "step": 526, + "text_contrastive_loss": 0.9147 + }, + { + "contrastive_loss": 0.552, + "epoch": 1.1896162528216705, + "grad_norm": 14.61215877532959, + "learning_rate": 8.849326886167854e-06, + "lm_loss": 5.8261, + "loss": 1.4629, + "step": 527, + "text_contrastive_loss": 0.6564 + }, + { + "contrastive_loss": 0.5832, + "epoch": 1.1918735891647856, + "grad_norm": 14.496379852294922, + "learning_rate": 8.844692386482379e-06, + "lm_loss": 5.6996, + "loss": 1.5809, + "step": 528, + "text_contrastive_loss": 0.8553 + }, + { + "contrastive_loss": 0.5086, + "epoch": 1.1941309255079007, + "grad_norm": 14.251022338867188, + "learning_rate": 8.840049791263567e-06, + "lm_loss": 5.7484, + "loss": 1.5166, + "step": 529, + "text_contrastive_loss": 0.8663 + }, + { + "contrastive_loss": 0.6403, + "epoch": 1.1963882618510158, + "grad_norm": 16.765060424804688, + "learning_rate": 8.835399110287046e-06, + "lm_loss": 5.8704, + "loss": 1.7473, + "step": 530, + "text_contrastive_loss": 1.0399 + }, + { + "contrastive_loss": 0.7202, + "epoch": 1.1986455981941309, + "grad_norm": 16.017635345458984, + "learning_rate": 8.830740353345475e-06, + "lm_loss": 5.7155, + "loss": 1.7729, + "step": 531, + "text_contrastive_loss": 0.9623 + }, + { + "contrastive_loss": 0.6431, + "epoch": 1.200902934537246, + "grad_norm": 16.272201538085938, + "learning_rate": 8.826073530248508e-06, + "lm_loss": 5.7467, + "loss": 1.6205, + "step": 532, + "text_contrastive_loss": 0.8054 + }, + { + "contrastive_loss": 0.6697, + "epoch": 1.2031602708803613, + "grad_norm": 19.03203773498535, + "learning_rate": 8.82139865082279e-06, + "lm_loss": 5.8501, + "loss": 1.6495, + "step": 533, + "text_contrastive_loss": 0.7896 + }, + { + "contrastive_loss": 0.7965, + "epoch": 1.2054176072234764, + "grad_norm": 17.053361892700195, + "learning_rate": 8.81671572491193e-06, + "lm_loss": 5.6792, + "loss": 1.7992, + "step": 534, + "text_contrastive_loss": 0.8696 + }, + { + "contrastive_loss": 0.6738, + "epoch": 1.2076749435665914, + "grad_norm": 17.29465103149414, + "learning_rate": 8.812024762376477e-06, + "lm_loss": 5.682, + "loss": 1.6524, + "step": 535, + "text_contrastive_loss": 0.8209 + }, + { + "contrastive_loss": 0.7326, + "epoch": 1.2099322799097065, + "grad_norm": 18.285282135009766, + "learning_rate": 8.807325773093904e-06, + "lm_loss": 5.7431, + "loss": 1.7493, + "step": 536, + "text_contrastive_loss": 0.8848 + }, + { + "contrastive_loss": 0.7514, + "epoch": 1.2121896162528216, + "grad_norm": 18.895790100097656, + "learning_rate": 8.802618766958586e-06, + "lm_loss": 5.7001, + "loss": 1.8127, + "step": 537, + "text_contrastive_loss": 0.9826 + }, + { + "contrastive_loss": 0.7019, + "epoch": 1.2144469525959367, + "grad_norm": 17.55310821533203, + "learning_rate": 8.797903753881775e-06, + "lm_loss": 5.701, + "loss": 1.806, + "step": 538, + "text_contrastive_loss": 1.0679 + }, + { + "contrastive_loss": 0.6178, + "epoch": 1.2167042889390518, + "grad_norm": 18.120283126831055, + "learning_rate": 8.793180743791587e-06, + "lm_loss": 5.7682, + "loss": 1.6386, + "step": 539, + "text_contrastive_loss": 0.8878 + }, + { + "contrastive_loss": 0.7747, + "epoch": 1.2189616252821671, + "grad_norm": 17.4415340423584, + "learning_rate": 8.788449746632976e-06, + "lm_loss": 5.6951, + "loss": 1.796, + "step": 540, + "text_contrastive_loss": 0.9037 + }, + { + "contrastive_loss": 0.6149, + "epoch": 1.2212189616252822, + "grad_norm": 16.051456451416016, + "learning_rate": 8.78371077236771e-06, + "lm_loss": 5.8233, + "loss": 1.5976, + "step": 541, + "text_contrastive_loss": 0.8006 + }, + { + "contrastive_loss": 0.5918, + "epoch": 1.2234762979683973, + "grad_norm": 16.21177864074707, + "learning_rate": 8.778963830974362e-06, + "lm_loss": 5.8637, + "loss": 1.7496, + "step": 542, + "text_contrastive_loss": 1.1429 + }, + { + "contrastive_loss": 0.6083, + "epoch": 1.2257336343115124, + "grad_norm": 16.857582092285156, + "learning_rate": 8.77420893244827e-06, + "lm_loss": 5.7729, + "loss": 1.6038, + "step": 543, + "text_contrastive_loss": 0.8365 + }, + { + "contrastive_loss": 0.5756, + "epoch": 1.2279909706546275, + "grad_norm": 16.42177963256836, + "learning_rate": 8.769446086801536e-06, + "lm_loss": 5.7586, + "loss": 1.5623, + "step": 544, + "text_contrastive_loss": 0.8217 + }, + { + "contrastive_loss": 0.6637, + "epoch": 1.2302483069977426, + "grad_norm": 17.359819412231445, + "learning_rate": 8.764675304062992e-06, + "lm_loss": 5.6912, + "loss": 1.6575, + "step": 545, + "text_contrastive_loss": 0.8493 + }, + { + "contrastive_loss": 0.6592, + "epoch": 1.2325056433408579, + "grad_norm": 17.608606338500977, + "learning_rate": 8.759896594278183e-06, + "lm_loss": 5.767, + "loss": 1.6739, + "step": 546, + "text_contrastive_loss": 0.8761 + }, + { + "contrastive_loss": 0.6723, + "epoch": 1.234762979683973, + "grad_norm": 16.28169822692871, + "learning_rate": 8.755109967509345e-06, + "lm_loss": 5.732, + "loss": 1.6321, + "step": 547, + "text_contrastive_loss": 0.773 + }, + { + "contrastive_loss": 0.78, + "epoch": 1.237020316027088, + "grad_norm": 17.190608978271484, + "learning_rate": 8.750315433835387e-06, + "lm_loss": 5.744, + "loss": 1.8254, + "step": 548, + "text_contrastive_loss": 0.9421 + }, + { + "contrastive_loss": 0.6585, + "epoch": 1.2392776523702032, + "grad_norm": 16.766374588012695, + "learning_rate": 8.745513003351862e-06, + "lm_loss": 5.6844, + "loss": 1.6865, + "step": 549, + "text_contrastive_loss": 0.9191 + }, + { + "contrastive_loss": 0.8379, + "epoch": 1.2415349887133182, + "grad_norm": 19.849149703979492, + "learning_rate": 8.740702686170955e-06, + "lm_loss": 5.7946, + "loss": 1.9411, + "step": 550, + "text_contrastive_loss": 1.0475 + }, + { + "contrastive_loss": 0.6246, + "epoch": 1.2437923250564333, + "grad_norm": 16.94928550720215, + "learning_rate": 8.735884492421457e-06, + "lm_loss": 5.6652, + "loss": 1.513, + "step": 551, + "text_contrastive_loss": 0.6438 + }, + { + "contrastive_loss": 0.5589, + "epoch": 1.2460496613995486, + "grad_norm": 14.843914985656738, + "learning_rate": 8.731058432248743e-06, + "lm_loss": 5.7704, + "loss": 1.5717, + "step": 552, + "text_contrastive_loss": 0.8714 + }, + { + "contrastive_loss": 0.6603, + "epoch": 1.2483069977426637, + "grad_norm": 17.591907501220703, + "learning_rate": 8.726224515814752e-06, + "lm_loss": 5.6885, + "loss": 1.6171, + "step": 553, + "text_contrastive_loss": 0.7759 + }, + { + "contrastive_loss": 0.6484, + "epoch": 1.2505643340857788, + "grad_norm": 17.86832618713379, + "learning_rate": 8.721382753297967e-06, + "lm_loss": 5.6409, + "loss": 1.67, + "step": 554, + "text_contrastive_loss": 0.915 + }, + { + "contrastive_loss": 0.6211, + "epoch": 1.252821670428894, + "grad_norm": 17.385845184326172, + "learning_rate": 8.71653315489339e-06, + "lm_loss": 5.8337, + "loss": 1.6136, + "step": 555, + "text_contrastive_loss": 0.8183 + }, + { + "contrastive_loss": 0.7168, + "epoch": 1.255079006772009, + "grad_norm": 18.187938690185547, + "learning_rate": 8.711675730812522e-06, + "lm_loss": 5.7761, + "loss": 1.746, + "step": 556, + "text_contrastive_loss": 0.9032 + }, + { + "contrastive_loss": 0.7436, + "epoch": 1.257336343115124, + "grad_norm": 20.165138244628906, + "learning_rate": 8.706810491283346e-06, + "lm_loss": 5.7146, + "loss": 1.803, + "step": 557, + "text_contrastive_loss": 0.9759 + }, + { + "contrastive_loss": 0.7407, + "epoch": 1.2595936794582392, + "grad_norm": 20.50693130493164, + "learning_rate": 8.701937446550298e-06, + "lm_loss": 5.7585, + "loss": 1.7661, + "step": 558, + "text_contrastive_loss": 0.8991 + }, + { + "contrastive_loss": 0.6625, + "epoch": 1.2618510158013545, + "grad_norm": 15.074134826660156, + "learning_rate": 8.69705660687425e-06, + "lm_loss": 5.7851, + "loss": 1.7489, + "step": 559, + "text_contrastive_loss": 1.0158 + }, + { + "contrastive_loss": 0.7133, + "epoch": 1.2641083521444696, + "grad_norm": 17.727819442749023, + "learning_rate": 8.692167982532487e-06, + "lm_loss": 5.7226, + "loss": 1.7565, + "step": 560, + "text_contrastive_loss": 0.9418 + }, + { + "contrastive_loss": 0.7011, + "epoch": 1.2663656884875847, + "grad_norm": 17.928321838378906, + "learning_rate": 8.687271583818687e-06, + "lm_loss": 5.6885, + "loss": 1.7219, + "step": 561, + "text_contrastive_loss": 0.904 + }, + { + "contrastive_loss": 0.659, + "epoch": 1.2686230248306998, + "grad_norm": 16.586898803710938, + "learning_rate": 8.682367421042895e-06, + "lm_loss": 5.8365, + "loss": 1.6248, + "step": 562, + "text_contrastive_loss": 0.7643 + }, + { + "contrastive_loss": 0.6807, + "epoch": 1.2708803611738149, + "grad_norm": 16.01237678527832, + "learning_rate": 8.677455504531507e-06, + "lm_loss": 5.6571, + "loss": 1.6628, + "step": 563, + "text_contrastive_loss": 0.8328 + }, + { + "contrastive_loss": 0.6099, + "epoch": 1.27313769751693, + "grad_norm": 14.771806716918945, + "learning_rate": 8.672535844627243e-06, + "lm_loss": 5.7449, + "loss": 1.5882, + "step": 564, + "text_contrastive_loss": 0.8076 + }, + { + "contrastive_loss": 0.6119, + "epoch": 1.275395033860045, + "grad_norm": 17.364458084106445, + "learning_rate": 8.667608451689135e-06, + "lm_loss": 5.8589, + "loss": 1.6081, + "step": 565, + "text_contrastive_loss": 0.8206 + }, + { + "contrastive_loss": 0.6576, + "epoch": 1.2776523702031604, + "grad_norm": 17.16614532470703, + "learning_rate": 8.662673336092487e-06, + "lm_loss": 5.8289, + "loss": 1.6548, + "step": 566, + "text_contrastive_loss": 0.8286 + }, + { + "contrastive_loss": 0.6646, + "epoch": 1.2799097065462754, + "grad_norm": 17.29311752319336, + "learning_rate": 8.657730508228874e-06, + "lm_loss": 5.8073, + "loss": 1.6869, + "step": 567, + "text_contrastive_loss": 0.883 + }, + { + "contrastive_loss": 0.628, + "epoch": 1.2821670428893905, + "grad_norm": 15.421435356140137, + "learning_rate": 8.652779978506103e-06, + "lm_loss": 5.91, + "loss": 1.6394, + "step": 568, + "text_contrastive_loss": 0.8408 + }, + { + "contrastive_loss": 0.6373, + "epoch": 1.2844243792325056, + "grad_norm": 15.888031005859375, + "learning_rate": 8.647821757348202e-06, + "lm_loss": 5.8405, + "loss": 1.5627, + "step": 569, + "text_contrastive_loss": 0.6827 + }, + { + "contrastive_loss": 0.7228, + "epoch": 1.2866817155756207, + "grad_norm": 17.23069953918457, + "learning_rate": 8.642855855195394e-06, + "lm_loss": 5.6198, + "loss": 1.7024, + "step": 570, + "text_contrastive_loss": 0.8352 + }, + { + "contrastive_loss": 0.5868, + "epoch": 1.2889390519187358, + "grad_norm": 15.839852333068848, + "learning_rate": 8.637882282504075e-06, + "lm_loss": 5.8711, + "loss": 1.6117, + "step": 571, + "text_contrastive_loss": 0.8756 + }, + { + "contrastive_loss": 0.6876, + "epoch": 1.291196388261851, + "grad_norm": 18.078350067138672, + "learning_rate": 8.632901049746793e-06, + "lm_loss": 5.8506, + "loss": 1.7769, + "step": 572, + "text_contrastive_loss": 1.0084 + }, + { + "contrastive_loss": 0.7239, + "epoch": 1.2934537246049662, + "grad_norm": 19.60801124572754, + "learning_rate": 8.627912167412222e-06, + "lm_loss": 5.7687, + "loss": 1.7269, + "step": 573, + "text_contrastive_loss": 0.8523 + }, + { + "contrastive_loss": 0.6649, + "epoch": 1.2957110609480813, + "grad_norm": 17.82904052734375, + "learning_rate": 8.622915646005152e-06, + "lm_loss": 5.7369, + "loss": 1.6693, + "step": 574, + "text_contrastive_loss": 0.8615 + }, + { + "contrastive_loss": 0.5279, + "epoch": 1.2979683972911964, + "grad_norm": 16.017885208129883, + "learning_rate": 8.617911496046446e-06, + "lm_loss": 5.6855, + "loss": 1.4724, + "step": 575, + "text_contrastive_loss": 0.752 + }, + { + "contrastive_loss": 0.6755, + "epoch": 1.3002257336343115, + "grad_norm": 17.49553871154785, + "learning_rate": 8.612899728073039e-06, + "lm_loss": 5.8032, + "loss": 1.6608, + "step": 576, + "text_contrastive_loss": 0.8099 + }, + { + "contrastive_loss": 0.6034, + "epoch": 1.3024830699774266, + "grad_norm": 16.62548065185547, + "learning_rate": 8.607880352637905e-06, + "lm_loss": 5.651, + "loss": 1.6213, + "step": 577, + "text_contrastive_loss": 0.9056 + }, + { + "contrastive_loss": 0.6879, + "epoch": 1.304740406320542, + "grad_norm": 18.7608699798584, + "learning_rate": 8.602853380310033e-06, + "lm_loss": 5.8627, + "loss": 1.7153, + "step": 578, + "text_contrastive_loss": 0.8822 + }, + { + "contrastive_loss": 0.5729, + "epoch": 1.3069977426636568, + "grad_norm": 16.681779861450195, + "learning_rate": 8.59781882167441e-06, + "lm_loss": 5.7276, + "loss": 1.6316, + "step": 579, + "text_contrastive_loss": 0.9718 + }, + { + "contrastive_loss": 0.6994, + "epoch": 1.309255079006772, + "grad_norm": 18.027572631835938, + "learning_rate": 8.592776687332003e-06, + "lm_loss": 5.7858, + "loss": 1.6982, + "step": 580, + "text_contrastive_loss": 0.8403 + }, + { + "contrastive_loss": 0.643, + "epoch": 1.3115124153498872, + "grad_norm": 15.352033615112305, + "learning_rate": 8.58772698789972e-06, + "lm_loss": 5.6869, + "loss": 1.669, + "step": 581, + "text_contrastive_loss": 0.9146 + }, + { + "contrastive_loss": 0.6563, + "epoch": 1.3137697516930023, + "grad_norm": 18.501941680908203, + "learning_rate": 8.582669734010407e-06, + "lm_loss": 5.8005, + "loss": 1.6796, + "step": 582, + "text_contrastive_loss": 0.8866 + }, + { + "contrastive_loss": 0.5751, + "epoch": 1.3160270880361173, + "grad_norm": 15.410369873046875, + "learning_rate": 8.577604936312813e-06, + "lm_loss": 5.6843, + "loss": 1.5664, + "step": 583, + "text_contrastive_loss": 0.8458 + }, + { + "contrastive_loss": 0.648, + "epoch": 1.3182844243792324, + "grad_norm": 15.552810668945312, + "learning_rate": 8.572532605471572e-06, + "lm_loss": 5.7188, + "loss": 1.5767, + "step": 584, + "text_contrastive_loss": 0.7137 + }, + { + "contrastive_loss": 0.6221, + "epoch": 1.3205417607223477, + "grad_norm": 18.540748596191406, + "learning_rate": 8.567452752167183e-06, + "lm_loss": 5.6481, + "loss": 1.5526, + "step": 585, + "text_contrastive_loss": 0.7313 + }, + { + "contrastive_loss": 0.5366, + "epoch": 1.3227990970654628, + "grad_norm": 16.199588775634766, + "learning_rate": 8.562365387095977e-06, + "lm_loss": 5.6682, + "loss": 1.4805, + "step": 586, + "text_contrastive_loss": 0.7541 + }, + { + "contrastive_loss": 0.6598, + "epoch": 1.325056433408578, + "grad_norm": 16.728755950927734, + "learning_rate": 8.557270520970111e-06, + "lm_loss": 5.6622, + "loss": 1.6551, + "step": 587, + "text_contrastive_loss": 0.8582 + }, + { + "contrastive_loss": 0.6284, + "epoch": 1.327313769751693, + "grad_norm": 16.7345027923584, + "learning_rate": 8.552168164517532e-06, + "lm_loss": 5.6653, + "loss": 1.6524, + "step": 588, + "text_contrastive_loss": 0.9149 + }, + { + "contrastive_loss": 0.5527, + "epoch": 1.329571106094808, + "grad_norm": 14.206117630004883, + "learning_rate": 8.547058328481959e-06, + "lm_loss": 5.6429, + "loss": 1.5285, + "step": 589, + "text_contrastive_loss": 0.823 + }, + { + "contrastive_loss": 0.6591, + "epoch": 1.3318284424379232, + "grad_norm": 15.768681526184082, + "learning_rate": 8.54194102362286e-06, + "lm_loss": 5.7572, + "loss": 1.6542, + "step": 590, + "text_contrastive_loss": 0.8387 + }, + { + "contrastive_loss": 0.7248, + "epoch": 1.3340857787810383, + "grad_norm": 19.255586624145508, + "learning_rate": 8.536816260715433e-06, + "lm_loss": 5.6369, + "loss": 1.7682, + "step": 591, + "text_contrastive_loss": 0.9594 + }, + { + "contrastive_loss": 0.6456, + "epoch": 1.3363431151241536, + "grad_norm": 16.265714645385742, + "learning_rate": 8.531684050550575e-06, + "lm_loss": 5.7152, + "loss": 1.6206, + "step": 592, + "text_contrastive_loss": 0.8069 + }, + { + "contrastive_loss": 0.6153, + "epoch": 1.3386004514672687, + "grad_norm": 16.676197052001953, + "learning_rate": 8.526544403934868e-06, + "lm_loss": 5.7057, + "loss": 1.5796, + "step": 593, + "text_contrastive_loss": 0.7875 + }, + { + "contrastive_loss": 0.6598, + "epoch": 1.3408577878103838, + "grad_norm": 17.399089813232422, + "learning_rate": 8.521397331690551e-06, + "lm_loss": 5.6061, + "loss": 1.6512, + "step": 594, + "text_contrastive_loss": 0.8617 + }, + { + "contrastive_loss": 0.6508, + "epoch": 1.3431151241534989, + "grad_norm": 18.223529815673828, + "learning_rate": 8.516242844655498e-06, + "lm_loss": 5.7351, + "loss": 1.5835, + "step": 595, + "text_contrastive_loss": 0.7186 + }, + { + "contrastive_loss": 0.6331, + "epoch": 1.345372460496614, + "grad_norm": 16.342121124267578, + "learning_rate": 8.5110809536832e-06, + "lm_loss": 5.7773, + "loss": 1.6419, + "step": 596, + "text_contrastive_loss": 0.862 + }, + { + "contrastive_loss": 0.6344, + "epoch": 1.347629796839729, + "grad_norm": 17.7054443359375, + "learning_rate": 8.50591166964273e-06, + "lm_loss": 5.7044, + "loss": 1.6997, + "step": 597, + "text_contrastive_loss": 0.9899 + }, + { + "contrastive_loss": 0.7845, + "epoch": 1.3498871331828441, + "grad_norm": 15.945330619812012, + "learning_rate": 8.500735003418734e-06, + "lm_loss": 5.8071, + "loss": 1.8835, + "step": 598, + "text_contrastive_loss": 1.0364 + }, + { + "contrastive_loss": 0.6898, + "epoch": 1.3521444695259595, + "grad_norm": 17.745105743408203, + "learning_rate": 8.495550965911403e-06, + "lm_loss": 5.8617, + "loss": 1.731, + "step": 599, + "text_contrastive_loss": 0.9101 + }, + { + "contrastive_loss": 0.6985, + "epoch": 1.3544018058690745, + "grad_norm": 18.3575382232666, + "learning_rate": 8.490359568036446e-06, + "lm_loss": 5.7694, + "loss": 1.7739, + "step": 600, + "text_contrastive_loss": 0.997 + }, + { + "contrastive_loss": 0.6558, + "epoch": 1.3566591422121896, + "grad_norm": 16.287246704101562, + "learning_rate": 8.485160820725073e-06, + "lm_loss": 5.794, + "loss": 1.672, + "step": 601, + "text_contrastive_loss": 0.8737 + }, + { + "contrastive_loss": 0.5926, + "epoch": 1.3589164785553047, + "grad_norm": 15.705453872680664, + "learning_rate": 8.479954734923967e-06, + "lm_loss": 5.7311, + "loss": 1.609, + "step": 602, + "text_contrastive_loss": 0.8867 + }, + { + "contrastive_loss": 0.6082, + "epoch": 1.3611738148984198, + "grad_norm": 15.387367248535156, + "learning_rate": 8.474741321595263e-06, + "lm_loss": 5.7703, + "loss": 1.605, + "step": 603, + "text_contrastive_loss": 0.8396 + }, + { + "contrastive_loss": 0.6713, + "epoch": 1.363431151241535, + "grad_norm": 19.237621307373047, + "learning_rate": 8.46952059171653e-06, + "lm_loss": 5.6506, + "loss": 1.7068, + "step": 604, + "text_contrastive_loss": 0.9411 + }, + { + "contrastive_loss": 0.5959, + "epoch": 1.36568848758465, + "grad_norm": 15.905624389648438, + "learning_rate": 8.464292556280734e-06, + "lm_loss": 5.6875, + "loss": 1.5965, + "step": 605, + "text_contrastive_loss": 0.8635 + }, + { + "contrastive_loss": 0.4923, + "epoch": 1.3679458239277653, + "grad_norm": 14.173022270202637, + "learning_rate": 8.459057226296232e-06, + "lm_loss": 5.8231, + "loss": 1.4332, + "step": 606, + "text_contrastive_loss": 0.7171 + }, + { + "contrastive_loss": 0.6652, + "epoch": 1.3702031602708804, + "grad_norm": 17.57984161376953, + "learning_rate": 8.453814612786736e-06, + "lm_loss": 5.7891, + "loss": 1.6656, + "step": 607, + "text_contrastive_loss": 0.8429 + }, + { + "contrastive_loss": 0.6025, + "epoch": 1.3724604966139955, + "grad_norm": 15.989808082580566, + "learning_rate": 8.4485647267913e-06, + "lm_loss": 5.7487, + "loss": 1.5859, + "step": 608, + "text_contrastive_loss": 0.8172 + }, + { + "contrastive_loss": 0.6286, + "epoch": 1.3747178329571106, + "grad_norm": 17.961231231689453, + "learning_rate": 8.443307579364282e-06, + "lm_loss": 5.7026, + "loss": 1.608, + "step": 609, + "text_contrastive_loss": 0.8183 + }, + { + "contrastive_loss": 0.6308, + "epoch": 1.3769751693002257, + "grad_norm": 17.083267211914062, + "learning_rate": 8.43804318157534e-06, + "lm_loss": 5.6939, + "loss": 1.6289, + "step": 610, + "text_contrastive_loss": 0.8573 + }, + { + "contrastive_loss": 0.6925, + "epoch": 1.379232505643341, + "grad_norm": 17.16958999633789, + "learning_rate": 8.432771544509395e-06, + "lm_loss": 5.7154, + "loss": 1.7076, + "step": 611, + "text_contrastive_loss": 0.8871 + }, + { + "contrastive_loss": 0.5918, + "epoch": 1.3814898419864559, + "grad_norm": 16.935251235961914, + "learning_rate": 8.427492679266605e-06, + "lm_loss": 5.7298, + "loss": 1.5374, + "step": 612, + "text_contrastive_loss": 0.7451 + }, + { + "contrastive_loss": 0.73, + "epoch": 1.3837471783295712, + "grad_norm": 18.650781631469727, + "learning_rate": 8.422206596962357e-06, + "lm_loss": 5.7431, + "loss": 1.7484, + "step": 613, + "text_contrastive_loss": 0.8882 + }, + { + "contrastive_loss": 0.7085, + "epoch": 1.3860045146726863, + "grad_norm": 17.921417236328125, + "learning_rate": 8.416913308727229e-06, + "lm_loss": 5.6926, + "loss": 1.6725, + "step": 614, + "text_contrastive_loss": 0.7894 + }, + { + "contrastive_loss": 0.5775, + "epoch": 1.3882618510158014, + "grad_norm": 17.152624130249023, + "learning_rate": 8.411612825706976e-06, + "lm_loss": 5.7791, + "loss": 1.598, + "step": 615, + "text_contrastive_loss": 0.8852 + }, + { + "contrastive_loss": 0.6252, + "epoch": 1.3905191873589164, + "grad_norm": 16.69524574279785, + "learning_rate": 8.4063051590625e-06, + "lm_loss": 5.6092, + "loss": 1.6112, + "step": 616, + "text_contrastive_loss": 0.8503 + }, + { + "contrastive_loss": 0.6031, + "epoch": 1.3927765237020315, + "grad_norm": 18.74748992919922, + "learning_rate": 8.400990319969829e-06, + "lm_loss": 5.6256, + "loss": 1.6014, + "step": 617, + "text_contrastive_loss": 0.8716 + }, + { + "contrastive_loss": 0.6992, + "epoch": 1.3950338600451468, + "grad_norm": 16.87995147705078, + "learning_rate": 8.395668319620092e-06, + "lm_loss": 5.8057, + "loss": 1.7324, + "step": 618, + "text_contrastive_loss": 0.9052 + }, + { + "contrastive_loss": 0.5642, + "epoch": 1.3972911963882617, + "grad_norm": 17.030317306518555, + "learning_rate": 8.390339169219504e-06, + "lm_loss": 5.8441, + "loss": 1.611, + "step": 619, + "text_contrastive_loss": 0.9249 + }, + { + "contrastive_loss": 0.6297, + "epoch": 1.399548532731377, + "grad_norm": 16.5139102935791, + "learning_rate": 8.385002879989328e-06, + "lm_loss": 5.7669, + "loss": 1.6511, + "step": 620, + "text_contrastive_loss": 0.8894 + }, + { + "contrastive_loss": 0.6669, + "epoch": 1.4018058690744921, + "grad_norm": 17.75321388244629, + "learning_rate": 8.37965946316586e-06, + "lm_loss": 5.5939, + "loss": 1.6587, + "step": 621, + "text_contrastive_loss": 0.8647 + }, + { + "contrastive_loss": 0.5724, + "epoch": 1.4040632054176072, + "grad_norm": 17.158180236816406, + "learning_rate": 8.37430893000041e-06, + "lm_loss": 5.6086, + "loss": 1.5737, + "step": 622, + "text_contrastive_loss": 0.8808 + }, + { + "contrastive_loss": 0.667, + "epoch": 1.4063205417607223, + "grad_norm": 16.76217269897461, + "learning_rate": 8.368951291759264e-06, + "lm_loss": 5.6943, + "loss": 1.6317, + "step": 623, + "text_contrastive_loss": 0.7906 + }, + { + "contrastive_loss": 0.7416, + "epoch": 1.4085778781038374, + "grad_norm": 18.57181167602539, + "learning_rate": 8.363586559723675e-06, + "lm_loss": 5.8373, + "loss": 1.7799, + "step": 624, + "text_contrastive_loss": 0.9091 + }, + { + "contrastive_loss": 0.6005, + "epoch": 1.4108352144469527, + "grad_norm": 16.08402442932129, + "learning_rate": 8.35821474518983e-06, + "lm_loss": 5.6551, + "loss": 1.4957, + "step": 625, + "text_contrastive_loss": 0.6593 + }, + { + "contrastive_loss": 0.6548, + "epoch": 1.4130925507900678, + "grad_norm": 16.66714096069336, + "learning_rate": 8.352835859468829e-06, + "lm_loss": 5.7476, + "loss": 1.6845, + "step": 626, + "text_contrastive_loss": 0.9099 + }, + { + "contrastive_loss": 0.7207, + "epoch": 1.4153498871331829, + "grad_norm": 18.51789093017578, + "learning_rate": 8.347449913886662e-06, + "lm_loss": 5.6149, + "loss": 1.7858, + "step": 627, + "text_contrastive_loss": 1.0074 + }, + { + "contrastive_loss": 0.6197, + "epoch": 1.417607223476298, + "grad_norm": 14.465909004211426, + "learning_rate": 8.34205691978419e-06, + "lm_loss": 5.6513, + "loss": 1.6331, + "step": 628, + "text_contrastive_loss": 0.8966 + }, + { + "contrastive_loss": 0.5238, + "epoch": 1.419864559819413, + "grad_norm": 15.883209228515625, + "learning_rate": 8.336656888517103e-06, + "lm_loss": 5.7084, + "loss": 1.4716, + "step": 629, + "text_contrastive_loss": 0.7539 + }, + { + "contrastive_loss": 0.5395, + "epoch": 1.4221218961625282, + "grad_norm": 15.386760711669922, + "learning_rate": 8.331249831455921e-06, + "lm_loss": 5.7436, + "loss": 1.4861, + "step": 630, + "text_contrastive_loss": 0.7443 + }, + { + "contrastive_loss": 0.6283, + "epoch": 1.4243792325056432, + "grad_norm": 15.466524124145508, + "learning_rate": 8.325835759985951e-06, + "lm_loss": 5.8448, + "loss": 1.6198, + "step": 631, + "text_contrastive_loss": 0.814 + }, + { + "contrastive_loss": 0.6742, + "epoch": 1.4266365688487586, + "grad_norm": 17.011396408081055, + "learning_rate": 8.320414685507272e-06, + "lm_loss": 5.753, + "loss": 1.6581, + "step": 632, + "text_contrastive_loss": 0.8171 + }, + { + "contrastive_loss": 0.7357, + "epoch": 1.4288939051918736, + "grad_norm": 17.184545516967773, + "learning_rate": 8.31498661943471e-06, + "lm_loss": 5.6525, + "loss": 1.7411, + "step": 633, + "text_contrastive_loss": 0.8803 + }, + { + "contrastive_loss": 0.6741, + "epoch": 1.4311512415349887, + "grad_norm": 16.616487503051758, + "learning_rate": 8.309551573197809e-06, + "lm_loss": 5.6614, + "loss": 1.6868, + "step": 634, + "text_contrastive_loss": 0.8931 + }, + { + "contrastive_loss": 0.5647, + "epoch": 1.4334085778781038, + "grad_norm": 15.058182716369629, + "learning_rate": 8.304109558240817e-06, + "lm_loss": 5.7895, + "loss": 1.5024, + "step": 635, + "text_contrastive_loss": 0.7175 + }, + { + "contrastive_loss": 0.6847, + "epoch": 1.435665914221219, + "grad_norm": 17.552400588989258, + "learning_rate": 8.298660586022646e-06, + "lm_loss": 5.6282, + "loss": 1.6699, + "step": 636, + "text_contrastive_loss": 0.8448 + }, + { + "contrastive_loss": 0.7085, + "epoch": 1.437923250564334, + "grad_norm": 16.694761276245117, + "learning_rate": 8.293204668016867e-06, + "lm_loss": 5.7376, + "loss": 1.7699, + "step": 637, + "text_contrastive_loss": 0.9752 + }, + { + "contrastive_loss": 0.7224, + "epoch": 1.440180586907449, + "grad_norm": 18.381868362426758, + "learning_rate": 8.287741815711674e-06, + "lm_loss": 5.8248, + "loss": 1.8135, + "step": 638, + "text_contrastive_loss": 1.0172 + }, + { + "contrastive_loss": 0.7324, + "epoch": 1.4424379232505644, + "grad_norm": 17.921268463134766, + "learning_rate": 8.282272040609855e-06, + "lm_loss": 5.6446, + "loss": 1.7582, + "step": 639, + "text_contrastive_loss": 0.9228 + }, + { + "contrastive_loss": 0.6363, + "epoch": 1.4446952595936795, + "grad_norm": 18.61540985107422, + "learning_rate": 8.276795354228785e-06, + "lm_loss": 5.7035, + "loss": 1.6168, + "step": 640, + "text_contrastive_loss": 0.8203 + }, + { + "contrastive_loss": 0.5566, + "epoch": 1.4469525959367946, + "grad_norm": 15.693979263305664, + "learning_rate": 8.271311768100386e-06, + "lm_loss": 5.74, + "loss": 1.5628, + "step": 641, + "text_contrastive_loss": 0.8644 + }, + { + "contrastive_loss": 0.6698, + "epoch": 1.4492099322799097, + "grad_norm": 17.22825813293457, + "learning_rate": 8.26582129377111e-06, + "lm_loss": 5.789, + "loss": 1.7538, + "step": 642, + "text_contrastive_loss": 1.0104 + }, + { + "contrastive_loss": 0.5938, + "epoch": 1.4514672686230248, + "grad_norm": 16.227344512939453, + "learning_rate": 8.26032394280191e-06, + "lm_loss": 5.8488, + "loss": 1.6316, + "step": 643, + "text_contrastive_loss": 0.9059 + }, + { + "contrastive_loss": 0.7882, + "epoch": 1.45372460496614, + "grad_norm": 18.474050521850586, + "learning_rate": 8.254819726768224e-06, + "lm_loss": 5.6374, + "loss": 1.8157, + "step": 644, + "text_contrastive_loss": 0.9275 + }, + { + "contrastive_loss": 0.6401, + "epoch": 1.455981941309255, + "grad_norm": 16.156492233276367, + "learning_rate": 8.249308657259943e-06, + "lm_loss": 5.662, + "loss": 1.6219, + "step": 645, + "text_contrastive_loss": 0.8312 + }, + { + "contrastive_loss": 0.5753, + "epoch": 1.4582392776523703, + "grad_norm": 15.180570602416992, + "learning_rate": 8.243790745881389e-06, + "lm_loss": 5.6963, + "loss": 1.5059, + "step": 646, + "text_contrastive_loss": 0.722 + }, + { + "contrastive_loss": 0.7943, + "epoch": 1.4604966139954854, + "grad_norm": 18.923797607421875, + "learning_rate": 8.238266004251284e-06, + "lm_loss": 5.6449, + "loss": 1.8869, + "step": 647, + "text_contrastive_loss": 1.0563 + }, + { + "contrastive_loss": 0.6139, + "epoch": 1.4627539503386005, + "grad_norm": 16.359981536865234, + "learning_rate": 8.232734444002748e-06, + "lm_loss": 5.7505, + "loss": 1.5844, + "step": 648, + "text_contrastive_loss": 0.7908 + }, + { + "contrastive_loss": 0.6666, + "epoch": 1.4650112866817155, + "grad_norm": 17.091907501220703, + "learning_rate": 8.22719607678324e-06, + "lm_loss": 5.576, + "loss": 1.6539, + "step": 649, + "text_contrastive_loss": 0.8594 + }, + { + "contrastive_loss": 0.5477, + "epoch": 1.4672686230248306, + "grad_norm": 15.386693000793457, + "learning_rate": 8.221650914254566e-06, + "lm_loss": 5.6408, + "loss": 1.5554, + "step": 650, + "text_contrastive_loss": 0.8871 + }, + { + "contrastive_loss": 0.6767, + "epoch": 1.469525959367946, + "grad_norm": 17.18435287475586, + "learning_rate": 8.216098968092833e-06, + "lm_loss": 5.6699, + "loss": 1.6436, + "step": 651, + "text_contrastive_loss": 0.7998 + }, + { + "contrastive_loss": 0.5608, + "epoch": 1.4717832957110608, + "grad_norm": 14.66658878326416, + "learning_rate": 8.210540249988435e-06, + "lm_loss": 5.8803, + "loss": 1.5156, + "step": 652, + "text_contrastive_loss": 0.7336 + }, + { + "contrastive_loss": 0.6009, + "epoch": 1.4740406320541761, + "grad_norm": 16.17041778564453, + "learning_rate": 8.204974771646023e-06, + "lm_loss": 5.6205, + "loss": 1.5734, + "step": 653, + "text_contrastive_loss": 0.8209 + }, + { + "contrastive_loss": 0.6915, + "epoch": 1.4762979683972912, + "grad_norm": 16.177255630493164, + "learning_rate": 8.199402544784485e-06, + "lm_loss": 5.6258, + "loss": 1.7184, + "step": 654, + "text_contrastive_loss": 0.9286 + }, + { + "contrastive_loss": 0.7241, + "epoch": 1.4785553047404063, + "grad_norm": 17.70816421508789, + "learning_rate": 8.193823581136919e-06, + "lm_loss": 5.7242, + "loss": 1.7506, + "step": 655, + "text_contrastive_loss": 0.9082 + }, + { + "contrastive_loss": 0.6462, + "epoch": 1.4808126410835214, + "grad_norm": 15.968859672546387, + "learning_rate": 8.188237892450603e-06, + "lm_loss": 5.6605, + "loss": 1.626, + "step": 656, + "text_contrastive_loss": 0.8276 + }, + { + "contrastive_loss": 0.6915, + "epoch": 1.4830699774266365, + "grad_norm": 16.231510162353516, + "learning_rate": 8.182645490486986e-06, + "lm_loss": 5.6709, + "loss": 1.6858, + "step": 657, + "text_contrastive_loss": 0.8545 + }, + { + "contrastive_loss": 0.6668, + "epoch": 1.4853273137697518, + "grad_norm": 17.567447662353516, + "learning_rate": 8.177046387021641e-06, + "lm_loss": 5.8608, + "loss": 1.6953, + "step": 658, + "text_contrastive_loss": 0.8848 + }, + { + "contrastive_loss": 0.6871, + "epoch": 1.487584650112867, + "grad_norm": 17.185976028442383, + "learning_rate": 8.17144059384426e-06, + "lm_loss": 5.7217, + "loss": 1.613, + "step": 659, + "text_contrastive_loss": 0.7075 + }, + { + "contrastive_loss": 0.6068, + "epoch": 1.489841986455982, + "grad_norm": 17.20939064025879, + "learning_rate": 8.165828122758615e-06, + "lm_loss": 5.6503, + "loss": 1.6265, + "step": 660, + "text_contrastive_loss": 0.9093 + }, + { + "contrastive_loss": 0.6366, + "epoch": 1.492099322799097, + "grad_norm": 17.39378547668457, + "learning_rate": 8.160208985582547e-06, + "lm_loss": 5.7519, + "loss": 1.7123, + "step": 661, + "text_contrastive_loss": 1.0011 + }, + { + "contrastive_loss": 0.5162, + "epoch": 1.4943566591422122, + "grad_norm": 15.506391525268555, + "learning_rate": 8.154583194147929e-06, + "lm_loss": 5.7635, + "loss": 1.438, + "step": 662, + "text_contrastive_loss": 0.6908 + }, + { + "contrastive_loss": 0.4868, + "epoch": 1.4966139954853273, + "grad_norm": 15.273134231567383, + "learning_rate": 8.148950760300642e-06, + "lm_loss": 5.7494, + "loss": 1.4756, + "step": 663, + "text_contrastive_loss": 0.8277 + }, + { + "contrastive_loss": 0.7131, + "epoch": 1.4988713318284423, + "grad_norm": 18.11568260192871, + "learning_rate": 8.14331169590056e-06, + "lm_loss": 5.667, + "loss": 1.7176, + "step": 664, + "text_contrastive_loss": 0.8756 + }, + { + "contrastive_loss": 0.562, + "epoch": 1.5011286681715577, + "grad_norm": 17.426877975463867, + "learning_rate": 8.137666012821514e-06, + "lm_loss": 5.7174, + "loss": 1.5592, + "step": 665, + "text_contrastive_loss": 0.8509 + }, + { + "contrastive_loss": 0.6659, + "epoch": 1.5033860045146725, + "grad_norm": 18.524707794189453, + "learning_rate": 8.132013722951275e-06, + "lm_loss": 5.6328, + "loss": 1.6976, + "step": 666, + "text_contrastive_loss": 0.9368 + }, + { + "contrastive_loss": 0.6454, + "epoch": 1.5056433408577878, + "grad_norm": 16.65372657775879, + "learning_rate": 8.12635483819152e-06, + "lm_loss": 5.7965, + "loss": 1.6676, + "step": 667, + "text_contrastive_loss": 0.8852 + }, + { + "contrastive_loss": 0.6684, + "epoch": 1.507900677200903, + "grad_norm": 18.827136993408203, + "learning_rate": 8.12068937045782e-06, + "lm_loss": 5.638, + "loss": 1.6413, + "step": 668, + "text_contrastive_loss": 0.8183 + }, + { + "contrastive_loss": 0.5611, + "epoch": 1.510158013544018, + "grad_norm": 15.873318672180176, + "learning_rate": 8.115017331679602e-06, + "lm_loss": 5.7432, + "loss": 1.56, + "step": 669, + "text_contrastive_loss": 0.8492 + }, + { + "contrastive_loss": 0.6918, + "epoch": 1.5124153498871333, + "grad_norm": 18.114233016967773, + "learning_rate": 8.109338733800132e-06, + "lm_loss": 5.7047, + "loss": 1.7948, + "step": 670, + "text_contrastive_loss": 1.0651 + }, + { + "contrastive_loss": 0.5811, + "epoch": 1.5146726862302482, + "grad_norm": 15.506185531616211, + "learning_rate": 8.103653588776483e-06, + "lm_loss": 5.7547, + "loss": 1.6146, + "step": 671, + "text_contrastive_loss": 0.9161 + }, + { + "contrastive_loss": 0.5985, + "epoch": 1.5169300225733635, + "grad_norm": 15.577679634094238, + "learning_rate": 8.09796190857952e-06, + "lm_loss": 5.713, + "loss": 1.594, + "step": 672, + "text_contrastive_loss": 0.8484 + }, + { + "contrastive_loss": 0.6494, + "epoch": 1.5191873589164786, + "grad_norm": 15.215145111083984, + "learning_rate": 8.09226370519386e-06, + "lm_loss": 5.5607, + "loss": 1.649, + "step": 673, + "text_contrastive_loss": 0.8871 + }, + { + "contrastive_loss": 0.4342, + "epoch": 1.5214446952595937, + "grad_norm": 13.164642333984375, + "learning_rate": 8.08655899061787e-06, + "lm_loss": 5.6408, + "loss": 1.4285, + "step": 674, + "text_contrastive_loss": 0.8605 + }, + { + "contrastive_loss": 0.6813, + "epoch": 1.5237020316027088, + "grad_norm": 17.442340850830078, + "learning_rate": 8.080847776863609e-06, + "lm_loss": 5.6544, + "loss": 1.6585, + "step": 675, + "text_contrastive_loss": 0.8235 + }, + { + "contrastive_loss": 0.6757, + "epoch": 1.5259593679458239, + "grad_norm": 17.141281127929688, + "learning_rate": 8.075130075956836e-06, + "lm_loss": 5.6888, + "loss": 1.7499, + "step": 676, + "text_contrastive_loss": 1.0107 + }, + { + "contrastive_loss": 0.6188, + "epoch": 1.5282167042889392, + "grad_norm": 16.08421516418457, + "learning_rate": 8.069405899936961e-06, + "lm_loss": 5.6586, + "loss": 1.6152, + "step": 677, + "text_contrastive_loss": 0.8611 + }, + { + "contrastive_loss": 0.5577, + "epoch": 1.530474040632054, + "grad_norm": 14.082602500915527, + "learning_rate": 8.06367526085703e-06, + "lm_loss": 5.7302, + "loss": 1.5262, + "step": 678, + "text_contrastive_loss": 0.791 + }, + { + "contrastive_loss": 0.6431, + "epoch": 1.5327313769751694, + "grad_norm": 15.874601364135742, + "learning_rate": 8.057938170783704e-06, + "lm_loss": 5.7184, + "loss": 1.6087, + "step": 679, + "text_contrastive_loss": 0.7874 + }, + { + "contrastive_loss": 0.5947, + "epoch": 1.5349887133182845, + "grad_norm": 15.756805419921875, + "learning_rate": 8.052194641797217e-06, + "lm_loss": 5.6824, + "loss": 1.5455, + "step": 680, + "text_contrastive_loss": 0.7652 + }, + { + "contrastive_loss": 0.5992, + "epoch": 1.5372460496613995, + "grad_norm": 15.742182731628418, + "learning_rate": 8.046444685991369e-06, + "lm_loss": 5.6288, + "loss": 1.5945, + "step": 681, + "text_contrastive_loss": 0.8649 + }, + { + "contrastive_loss": 0.5685, + "epoch": 1.5395033860045146, + "grad_norm": 16.00629234313965, + "learning_rate": 8.040688315473489e-06, + "lm_loss": 5.6869, + "loss": 1.5184, + "step": 682, + "text_contrastive_loss": 0.7625 + }, + { + "contrastive_loss": 0.6884, + "epoch": 1.5417607223476297, + "grad_norm": 17.776254653930664, + "learning_rate": 8.034925542364412e-06, + "lm_loss": 5.6575, + "loss": 1.636, + "step": 683, + "text_contrastive_loss": 0.7636 + }, + { + "contrastive_loss": 0.5358, + "epoch": 1.544018058690745, + "grad_norm": 14.859524726867676, + "learning_rate": 8.029156378798459e-06, + "lm_loss": 5.677, + "loss": 1.5058, + "step": 684, + "text_contrastive_loss": 0.8047 + }, + { + "contrastive_loss": 0.5631, + "epoch": 1.54627539503386, + "grad_norm": 15.648707389831543, + "learning_rate": 8.023380836923404e-06, + "lm_loss": 5.6694, + "loss": 1.5223, + "step": 685, + "text_contrastive_loss": 0.7845 + }, + { + "contrastive_loss": 0.5461, + "epoch": 1.5485327313769752, + "grad_norm": 16.77898597717285, + "learning_rate": 8.017598928900452e-06, + "lm_loss": 5.6663, + "loss": 1.4575, + "step": 686, + "text_contrastive_loss": 0.6896 + }, + { + "contrastive_loss": 0.6517, + "epoch": 1.5507900677200903, + "grad_norm": 17.933149337768555, + "learning_rate": 8.011810666904212e-06, + "lm_loss": 5.6912, + "loss": 1.668, + "step": 687, + "text_contrastive_loss": 0.8943 + }, + { + "contrastive_loss": 0.7144, + "epoch": 1.5530474040632054, + "grad_norm": 19.620168685913086, + "learning_rate": 8.006016063122672e-06, + "lm_loss": 5.634, + "loss": 1.7312, + "step": 688, + "text_contrastive_loss": 0.9069 + }, + { + "contrastive_loss": 0.7824, + "epoch": 1.5553047404063205, + "grad_norm": 19.481861114501953, + "learning_rate": 8.000215129757178e-06, + "lm_loss": 5.5779, + "loss": 1.8526, + "step": 689, + "text_contrastive_loss": 1.0248 + }, + { + "contrastive_loss": 0.6186, + "epoch": 1.5575620767494356, + "grad_norm": 17.900890350341797, + "learning_rate": 7.994407879022397e-06, + "lm_loss": 5.6997, + "loss": 1.5957, + "step": 690, + "text_contrastive_loss": 0.8142 + }, + { + "contrastive_loss": 0.69, + "epoch": 1.559819413092551, + "grad_norm": 18.235824584960938, + "learning_rate": 7.9885943231463e-06, + "lm_loss": 5.6989, + "loss": 1.7303, + "step": 691, + "text_contrastive_loss": 0.9408 + }, + { + "contrastive_loss": 0.6137, + "epoch": 1.5620767494356658, + "grad_norm": 16.703012466430664, + "learning_rate": 7.98277447437014e-06, + "lm_loss": 5.6882, + "loss": 1.6007, + "step": 692, + "text_contrastive_loss": 0.8362 + }, + { + "contrastive_loss": 0.6348, + "epoch": 1.564334085778781, + "grad_norm": 16.685638427734375, + "learning_rate": 7.976948344948412e-06, + "lm_loss": 5.5761, + "loss": 1.6125, + "step": 693, + "text_contrastive_loss": 0.8402 + }, + { + "contrastive_loss": 0.6494, + "epoch": 1.5665914221218962, + "grad_norm": 16.68299102783203, + "learning_rate": 7.971115947148842e-06, + "lm_loss": 5.6636, + "loss": 1.6867, + "step": 694, + "text_contrastive_loss": 0.942 + }, + { + "contrastive_loss": 0.7732, + "epoch": 1.5688487584650113, + "grad_norm": 18.180322647094727, + "learning_rate": 7.965277293252354e-06, + "lm_loss": 5.6107, + "loss": 1.7923, + "step": 695, + "text_contrastive_loss": 0.916 + }, + { + "contrastive_loss": 0.6828, + "epoch": 1.5711060948081266, + "grad_norm": 17.642742156982422, + "learning_rate": 7.95943239555304e-06, + "lm_loss": 5.6807, + "loss": 1.7629, + "step": 696, + "text_contrastive_loss": 1.0239 + }, + { + "contrastive_loss": 0.6149, + "epoch": 1.5733634311512414, + "grad_norm": 15.9694242477417, + "learning_rate": 7.953581266358148e-06, + "lm_loss": 5.6692, + "loss": 1.5975, + "step": 697, + "text_contrastive_loss": 0.8312 + }, + { + "contrastive_loss": 0.5002, + "epoch": 1.5756207674943568, + "grad_norm": 15.842578887939453, + "learning_rate": 7.94772391798804e-06, + "lm_loss": 5.564, + "loss": 1.382, + "step": 698, + "text_contrastive_loss": 0.6508 + }, + { + "contrastive_loss": 0.7284, + "epoch": 1.5778781038374716, + "grad_norm": 15.847796440124512, + "learning_rate": 7.941860362776176e-06, + "lm_loss": 5.622, + "loss": 1.7816, + "step": 699, + "text_contrastive_loss": 0.982 + }, + { + "contrastive_loss": 0.6855, + "epoch": 1.580135440180587, + "grad_norm": 18.121475219726562, + "learning_rate": 7.935990613069087e-06, + "lm_loss": 5.6488, + "loss": 1.6833, + "step": 700, + "text_contrastive_loss": 0.8658 + }, + { + "contrastive_loss": 0.6365, + "epoch": 1.582392776523702, + "grad_norm": 16.283931732177734, + "learning_rate": 7.930114681226341e-06, + "lm_loss": 5.6791, + "loss": 1.6332, + "step": 701, + "text_contrastive_loss": 0.8576 + }, + { + "contrastive_loss": 0.6198, + "epoch": 1.5846501128668171, + "grad_norm": 17.043893814086914, + "learning_rate": 7.924232579620533e-06, + "lm_loss": 5.5998, + "loss": 1.5759, + "step": 702, + "text_contrastive_loss": 0.7922 + }, + { + "contrastive_loss": 0.6537, + "epoch": 1.5869074492099324, + "grad_norm": 16.453500747680664, + "learning_rate": 7.91834432063724e-06, + "lm_loss": 5.574, + "loss": 1.6571, + "step": 703, + "text_contrastive_loss": 0.8919 + }, + { + "contrastive_loss": 0.6257, + "epoch": 1.5891647855530473, + "grad_norm": 16.873483657836914, + "learning_rate": 7.912449916675008e-06, + "lm_loss": 5.7479, + "loss": 1.6281, + "step": 704, + "text_contrastive_loss": 0.8553 + }, + { + "contrastive_loss": 0.5225, + "epoch": 1.5914221218961626, + "grad_norm": 14.994361877441406, + "learning_rate": 7.90654938014533e-06, + "lm_loss": 5.7882, + "loss": 1.5076, + "step": 705, + "text_contrastive_loss": 0.8125 + }, + { + "contrastive_loss": 0.5905, + "epoch": 1.5936794582392777, + "grad_norm": 16.01613426208496, + "learning_rate": 7.900642723472596e-06, + "lm_loss": 5.7974, + "loss": 1.5573, + "step": 706, + "text_contrastive_loss": 0.7741 + }, + { + "contrastive_loss": 0.5499, + "epoch": 1.5959367945823928, + "grad_norm": 15.973322868347168, + "learning_rate": 7.894729959094097e-06, + "lm_loss": 5.5895, + "loss": 1.5522, + "step": 707, + "text_contrastive_loss": 0.8867 + }, + { + "contrastive_loss": 0.7159, + "epoch": 1.5981941309255079, + "grad_norm": 17.146879196166992, + "learning_rate": 7.888811099459974e-06, + "lm_loss": 5.6006, + "loss": 1.7478, + "step": 708, + "text_contrastive_loss": 0.9437 + }, + { + "contrastive_loss": 0.5521, + "epoch": 1.600451467268623, + "grad_norm": 15.732077598571777, + "learning_rate": 7.882886157033209e-06, + "lm_loss": 5.6624, + "loss": 1.5276, + "step": 709, + "text_contrastive_loss": 0.8185 + }, + { + "contrastive_loss": 0.5883, + "epoch": 1.6027088036117383, + "grad_norm": 16.416379928588867, + "learning_rate": 7.876955144289594e-06, + "lm_loss": 5.7265, + "loss": 1.6427, + "step": 710, + "text_contrastive_loss": 0.9633 + }, + { + "contrastive_loss": 0.6321, + "epoch": 1.6049661399548532, + "grad_norm": 17.86478614807129, + "learning_rate": 7.871018073717693e-06, + "lm_loss": 5.7549, + "loss": 1.5937, + "step": 711, + "text_contrastive_loss": 0.7723 + }, + { + "contrastive_loss": 0.5866, + "epoch": 1.6072234762979685, + "grad_norm": 17.412540435791016, + "learning_rate": 7.865074957818839e-06, + "lm_loss": 5.7178, + "loss": 1.5264, + "step": 712, + "text_contrastive_loss": 0.736 + }, + { + "contrastive_loss": 0.6745, + "epoch": 1.6094808126410836, + "grad_norm": 17.987707138061523, + "learning_rate": 7.859125809107082e-06, + "lm_loss": 5.6346, + "loss": 1.6675, + "step": 713, + "text_contrastive_loss": 0.8591 + }, + { + "contrastive_loss": 0.6714, + "epoch": 1.6117381489841986, + "grad_norm": 17.884342193603516, + "learning_rate": 7.853170640109182e-06, + "lm_loss": 5.7804, + "loss": 1.6289, + "step": 714, + "text_contrastive_loss": 0.759 + }, + { + "contrastive_loss": 0.6766, + "epoch": 1.6139954853273137, + "grad_norm": 16.94292449951172, + "learning_rate": 7.847209463364574e-06, + "lm_loss": 5.7075, + "loss": 1.7594, + "step": 715, + "text_contrastive_loss": 1.024 + }, + { + "contrastive_loss": 0.6867, + "epoch": 1.6162528216704288, + "grad_norm": 19.232624053955078, + "learning_rate": 7.841242291425342e-06, + "lm_loss": 5.69, + "loss": 1.6586, + "step": 716, + "text_contrastive_loss": 0.8058 + }, + { + "contrastive_loss": 0.7001, + "epoch": 1.6185101580135441, + "grad_norm": 17.87067222595215, + "learning_rate": 7.835269136856194e-06, + "lm_loss": 5.544, + "loss": 1.7777, + "step": 717, + "text_contrastive_loss": 1.0465 + }, + { + "contrastive_loss": 0.5075, + "epoch": 1.620767494356659, + "grad_norm": 20.154897689819336, + "learning_rate": 7.829290012234438e-06, + "lm_loss": 5.6526, + "loss": 1.4962, + "step": 718, + "text_contrastive_loss": 0.8469 + }, + { + "contrastive_loss": 0.5362, + "epoch": 1.6230248306997743, + "grad_norm": 15.40334415435791, + "learning_rate": 7.823304930149949e-06, + "lm_loss": 5.6081, + "loss": 1.4894, + "step": 719, + "text_contrastive_loss": 0.7848 + }, + { + "contrastive_loss": 0.5925, + "epoch": 1.6252821670428894, + "grad_norm": 15.82955265045166, + "learning_rate": 7.817313903205148e-06, + "lm_loss": 5.6713, + "loss": 1.5427, + "step": 720, + "text_contrastive_loss": 0.7662 + }, + { + "contrastive_loss": 0.6818, + "epoch": 1.6275395033860045, + "grad_norm": 16.279958724975586, + "learning_rate": 7.811316944014974e-06, + "lm_loss": 5.6306, + "loss": 1.6209, + "step": 721, + "text_contrastive_loss": 0.7521 + }, + { + "contrastive_loss": 0.5416, + "epoch": 1.6297968397291196, + "grad_norm": 16.718181610107422, + "learning_rate": 7.805314065206857e-06, + "lm_loss": 5.5601, + "loss": 1.4453, + "step": 722, + "text_contrastive_loss": 0.6954 + }, + { + "contrastive_loss": 0.7102, + "epoch": 1.6320541760722347, + "grad_norm": 17.937105178833008, + "learning_rate": 7.799305279420691e-06, + "lm_loss": 5.5872, + "loss": 1.6505, + "step": 723, + "text_contrastive_loss": 0.7631 + }, + { + "contrastive_loss": 0.5706, + "epoch": 1.63431151241535, + "grad_norm": 16.313173294067383, + "learning_rate": 7.793290599308807e-06, + "lm_loss": 5.6602, + "loss": 1.5522, + "step": 724, + "text_contrastive_loss": 0.8312 + }, + { + "contrastive_loss": 0.7123, + "epoch": 1.6365688487584649, + "grad_norm": 20.161495208740234, + "learning_rate": 7.78727003753595e-06, + "lm_loss": 5.5927, + "loss": 1.7213, + "step": 725, + "text_contrastive_loss": 0.8995 + }, + { + "contrastive_loss": 0.5938, + "epoch": 1.6388261851015802, + "grad_norm": 16.399438858032227, + "learning_rate": 7.78124360677925e-06, + "lm_loss": 5.7231, + "loss": 1.5695, + "step": 726, + "text_contrastive_loss": 0.8068 + }, + { + "contrastive_loss": 0.6285, + "epoch": 1.6410835214446953, + "grad_norm": 15.218472480773926, + "learning_rate": 7.775211319728191e-06, + "lm_loss": 5.5433, + "loss": 1.5815, + "step": 727, + "text_contrastive_loss": 0.7972 + }, + { + "contrastive_loss": 0.5776, + "epoch": 1.6433408577878104, + "grad_norm": 15.522934913635254, + "learning_rate": 7.769173189084589e-06, + "lm_loss": 5.7274, + "loss": 1.556, + "step": 728, + "text_contrastive_loss": 0.8112 + }, + { + "contrastive_loss": 0.7049, + "epoch": 1.6455981941309257, + "grad_norm": 16.525672912597656, + "learning_rate": 7.763129227562568e-06, + "lm_loss": 5.6235, + "loss": 1.7948, + "step": 729, + "text_contrastive_loss": 1.055 + }, + { + "contrastive_loss": 0.6319, + "epoch": 1.6478555304740405, + "grad_norm": 21.969823837280273, + "learning_rate": 7.757079447888529e-06, + "lm_loss": 5.6324, + "loss": 1.5885, + "step": 730, + "text_contrastive_loss": 0.7868 + }, + { + "contrastive_loss": 0.6082, + "epoch": 1.6501128668171559, + "grad_norm": 16.272430419921875, + "learning_rate": 7.75102386280112e-06, + "lm_loss": 5.581, + "loss": 1.6565, + "step": 731, + "text_contrastive_loss": 0.9803 + }, + { + "contrastive_loss": 0.5209, + "epoch": 1.6523702031602707, + "grad_norm": 15.750184059143066, + "learning_rate": 7.744962485051217e-06, + "lm_loss": 5.6793, + "loss": 1.4867, + "step": 732, + "text_contrastive_loss": 0.7959 + }, + { + "contrastive_loss": 0.8521, + "epoch": 1.654627539503386, + "grad_norm": 19.615497589111328, + "learning_rate": 7.738895327401891e-06, + "lm_loss": 5.6483, + "loss": 1.9015, + "step": 733, + "text_contrastive_loss": 0.9692 + }, + { + "contrastive_loss": 0.5763, + "epoch": 1.6568848758465011, + "grad_norm": 14.801111221313477, + "learning_rate": 7.732822402628385e-06, + "lm_loss": 5.5696, + "loss": 1.5111, + "step": 734, + "text_contrastive_loss": 0.7557 + }, + { + "contrastive_loss": 0.7067, + "epoch": 1.6591422121896162, + "grad_norm": 16.893308639526367, + "learning_rate": 7.726743723518087e-06, + "lm_loss": 5.6091, + "loss": 1.7366, + "step": 735, + "text_contrastive_loss": 0.9378 + }, + { + "contrastive_loss": 0.7603, + "epoch": 1.6613995485327315, + "grad_norm": 16.60019302368164, + "learning_rate": 7.720659302870496e-06, + "lm_loss": 5.5871, + "loss": 1.7595, + "step": 736, + "text_contrastive_loss": 0.881 + }, + { + "contrastive_loss": 0.6263, + "epoch": 1.6636568848758464, + "grad_norm": 15.957043647766113, + "learning_rate": 7.714569153497204e-06, + "lm_loss": 5.6781, + "loss": 1.6366, + "step": 737, + "text_contrastive_loss": 0.885 + }, + { + "contrastive_loss": 0.5992, + "epoch": 1.6659142212189617, + "grad_norm": 14.994237899780273, + "learning_rate": 7.708473288221868e-06, + "lm_loss": 5.5369, + "loss": 1.4997, + "step": 738, + "text_contrastive_loss": 0.6935 + }, + { + "contrastive_loss": 0.5831, + "epoch": 1.6681715575620768, + "grad_norm": 15.554744720458984, + "learning_rate": 7.702371719880178e-06, + "lm_loss": 5.6742, + "loss": 1.5097, + "step": 739, + "text_contrastive_loss": 0.7185 + }, + { + "contrastive_loss": 0.657, + "epoch": 1.670428893905192, + "grad_norm": 14.496306419372559, + "learning_rate": 7.696264461319831e-06, + "lm_loss": 5.6038, + "loss": 1.6738, + "step": 740, + "text_contrastive_loss": 0.913 + }, + { + "contrastive_loss": 0.633, + "epoch": 1.672686230248307, + "grad_norm": 16.079214096069336, + "learning_rate": 7.69015152540051e-06, + "lm_loss": 5.7092, + "loss": 1.6277, + "step": 741, + "text_contrastive_loss": 0.8476 + }, + { + "contrastive_loss": 0.5856, + "epoch": 1.674943566591422, + "grad_norm": 16.214691162109375, + "learning_rate": 7.684032924993845e-06, + "lm_loss": 5.6346, + "loss": 1.5279, + "step": 742, + "text_contrastive_loss": 0.7577 + }, + { + "contrastive_loss": 0.6277, + "epoch": 1.6772009029345374, + "grad_norm": 15.978594779968262, + "learning_rate": 7.677908672983404e-06, + "lm_loss": 5.5738, + "loss": 1.5994, + "step": 743, + "text_contrastive_loss": 0.8286 + }, + { + "contrastive_loss": 0.6228, + "epoch": 1.6794582392776523, + "grad_norm": 14.20463752746582, + "learning_rate": 7.671778782264647e-06, + "lm_loss": 5.635, + "loss": 1.6288, + "step": 744, + "text_contrastive_loss": 0.8849 + }, + { + "contrastive_loss": 0.787, + "epoch": 1.6817155756207676, + "grad_norm": 17.111305236816406, + "learning_rate": 7.66564326574491e-06, + "lm_loss": 5.6514, + "loss": 1.8412, + "step": 745, + "text_contrastive_loss": 0.978 + }, + { + "contrastive_loss": 0.6928, + "epoch": 1.6839729119638827, + "grad_norm": 19.419923782348633, + "learning_rate": 7.65950213634337e-06, + "lm_loss": 5.6392, + "loss": 1.6916, + "step": 746, + "text_contrastive_loss": 0.8696 + }, + { + "contrastive_loss": 0.5187, + "epoch": 1.6862302483069977, + "grad_norm": 15.22336196899414, + "learning_rate": 7.653355406991034e-06, + "lm_loss": 5.6101, + "loss": 1.4763, + "step": 747, + "text_contrastive_loss": 0.7933 + }, + { + "contrastive_loss": 0.5471, + "epoch": 1.6884875846501128, + "grad_norm": 16.713504791259766, + "learning_rate": 7.64720309063069e-06, + "lm_loss": 5.5356, + "loss": 1.5305, + "step": 748, + "text_contrastive_loss": 0.8597 + }, + { + "contrastive_loss": 0.6765, + "epoch": 1.690744920993228, + "grad_norm": 15.930522918701172, + "learning_rate": 7.641045200216896e-06, + "lm_loss": 5.597, + "loss": 1.647, + "step": 749, + "text_contrastive_loss": 0.8216 + }, + { + "contrastive_loss": 0.7218, + "epoch": 1.6930022573363432, + "grad_norm": 18.222816467285156, + "learning_rate": 7.634881748715941e-06, + "lm_loss": 5.74, + "loss": 1.6928, + "step": 750, + "text_contrastive_loss": 0.7941 + }, + { + "contrastive_loss": 0.6519, + "epoch": 1.695259593679458, + "grad_norm": 17.758466720581055, + "learning_rate": 7.628712749105831e-06, + "lm_loss": 5.6711, + "loss": 1.7348, + "step": 751, + "text_contrastive_loss": 1.0316 + }, + { + "contrastive_loss": 0.5155, + "epoch": 1.6975169300225734, + "grad_norm": 16.984390258789062, + "learning_rate": 7.622538214376248e-06, + "lm_loss": 5.53, + "loss": 1.4664, + "step": 752, + "text_contrastive_loss": 0.7957 + }, + { + "contrastive_loss": 0.5652, + "epoch": 1.6997742663656885, + "grad_norm": 15.909612655639648, + "learning_rate": 7.616358157528535e-06, + "lm_loss": 5.6624, + "loss": 1.5215, + "step": 753, + "text_contrastive_loss": 0.7802 + }, + { + "contrastive_loss": 0.5905, + "epoch": 1.7020316027088036, + "grad_norm": 16.675256729125977, + "learning_rate": 7.610172591575656e-06, + "lm_loss": 5.6161, + "loss": 1.5576, + "step": 754, + "text_contrastive_loss": 0.811 + }, + { + "contrastive_loss": 0.6756, + "epoch": 1.7042889390519187, + "grad_norm": 18.014429092407227, + "learning_rate": 7.60398152954218e-06, + "lm_loss": 5.6102, + "loss": 1.6643, + "step": 755, + "text_contrastive_loss": 0.8554 + }, + { + "contrastive_loss": 0.5153, + "epoch": 1.7065462753950338, + "grad_norm": 15.344369888305664, + "learning_rate": 7.597784984464248e-06, + "lm_loss": 5.6551, + "loss": 1.4603, + "step": 756, + "text_contrastive_loss": 0.7589 + }, + { + "contrastive_loss": 0.708, + "epoch": 1.708803611738149, + "grad_norm": 19.520374298095703, + "learning_rate": 7.5915829693895435e-06, + "lm_loss": 5.5414, + "loss": 1.7017, + "step": 757, + "text_contrastive_loss": 0.879 + }, + { + "contrastive_loss": 0.7059, + "epoch": 1.711060948081264, + "grad_norm": 16.9716796875, + "learning_rate": 7.585375497377271e-06, + "lm_loss": 5.6469, + "loss": 1.778, + "step": 758, + "text_contrastive_loss": 1.0148 + }, + { + "contrastive_loss": 0.5584, + "epoch": 1.7133182844243793, + "grad_norm": 14.892501831054688, + "learning_rate": 7.579162581498125e-06, + "lm_loss": 5.5184, + "loss": 1.5484, + "step": 759, + "text_contrastive_loss": 0.8765 + }, + { + "contrastive_loss": 0.6526, + "epoch": 1.7155756207674944, + "grad_norm": 17.0418643951416, + "learning_rate": 7.572944234834261e-06, + "lm_loss": 5.5582, + "loss": 1.6884, + "step": 760, + "text_contrastive_loss": 0.9599 + }, + { + "contrastive_loss": 0.6604, + "epoch": 1.7178329571106095, + "grad_norm": 15.868025779724121, + "learning_rate": 7.5667204704792706e-06, + "lm_loss": 5.6248, + "loss": 1.6557, + "step": 761, + "text_contrastive_loss": 0.8656 + }, + { + "contrastive_loss": 0.6528, + "epoch": 1.7200902934537246, + "grad_norm": 16.643239974975586, + "learning_rate": 7.5604913015381535e-06, + "lm_loss": 5.6341, + "loss": 1.6089, + "step": 762, + "text_contrastive_loss": 0.7854 + }, + { + "contrastive_loss": 0.6528, + "epoch": 1.7223476297968396, + "grad_norm": 15.834339141845703, + "learning_rate": 7.554256741127291e-06, + "lm_loss": 5.6096, + "loss": 1.6176, + "step": 763, + "text_contrastive_loss": 0.8078 + }, + { + "contrastive_loss": 0.6601, + "epoch": 1.724604966139955, + "grad_norm": 17.527952194213867, + "learning_rate": 7.548016802374412e-06, + "lm_loss": 5.5987, + "loss": 1.6024, + "step": 764, + "text_contrastive_loss": 0.7649 + }, + { + "contrastive_loss": 0.6799, + "epoch": 1.7268623024830698, + "grad_norm": 16.515607833862305, + "learning_rate": 7.541771498418575e-06, + "lm_loss": 5.7484, + "loss": 1.7105, + "step": 765, + "text_contrastive_loss": 0.9116 + }, + { + "contrastive_loss": 0.6125, + "epoch": 1.7291196388261851, + "grad_norm": 18.52354621887207, + "learning_rate": 7.535520842410136e-06, + "lm_loss": 5.6428, + "loss": 1.5753, + "step": 766, + "text_contrastive_loss": 0.7972 + }, + { + "contrastive_loss": 0.6454, + "epoch": 1.7313769751693002, + "grad_norm": 17.109241485595703, + "learning_rate": 7.529264847510715e-06, + "lm_loss": 5.6524, + "loss": 1.6544, + "step": 767, + "text_contrastive_loss": 0.8875 + }, + { + "contrastive_loss": 0.6188, + "epoch": 1.7336343115124153, + "grad_norm": 17.155672073364258, + "learning_rate": 7.52300352689318e-06, + "lm_loss": 5.6825, + "loss": 1.6844, + "step": 768, + "text_contrastive_loss": 0.9947 + }, + { + "contrastive_loss": 0.6882, + "epoch": 1.7358916478555306, + "grad_norm": 17.156185150146484, + "learning_rate": 7.516736893741611e-06, + "lm_loss": 5.6875, + "loss": 1.6641, + "step": 769, + "text_contrastive_loss": 0.8142 + }, + { + "contrastive_loss": 0.5441, + "epoch": 1.7381489841986455, + "grad_norm": 15.328943252563477, + "learning_rate": 7.510464961251271e-06, + "lm_loss": 5.6166, + "loss": 1.4909, + "step": 770, + "text_contrastive_loss": 0.7704 + }, + { + "contrastive_loss": 0.5575, + "epoch": 1.7404063205417608, + "grad_norm": 15.422810554504395, + "learning_rate": 7.5041877426285856e-06, + "lm_loss": 5.6112, + "loss": 1.5464, + "step": 771, + "text_contrastive_loss": 0.8555 + }, + { + "contrastive_loss": 0.6555, + "epoch": 1.742663656884876, + "grad_norm": 17.159561157226562, + "learning_rate": 7.49790525109111e-06, + "lm_loss": 5.6352, + "loss": 1.6591, + "step": 772, + "text_contrastive_loss": 0.8802 + }, + { + "contrastive_loss": 0.5397, + "epoch": 1.744920993227991, + "grad_norm": 14.259885787963867, + "learning_rate": 7.491617499867502e-06, + "lm_loss": 5.5852, + "loss": 1.4501, + "step": 773, + "text_contrastive_loss": 0.7039 + }, + { + "contrastive_loss": 0.5738, + "epoch": 1.747178329571106, + "grad_norm": 14.911514282226562, + "learning_rate": 7.485324502197494e-06, + "lm_loss": 5.6263, + "loss": 1.5204, + "step": 774, + "text_contrastive_loss": 0.768 + }, + { + "contrastive_loss": 0.4498, + "epoch": 1.7494356659142212, + "grad_norm": 13.753421783447266, + "learning_rate": 7.479026271331864e-06, + "lm_loss": 5.561, + "loss": 1.4023, + "step": 775, + "text_contrastive_loss": 0.7929 + }, + { + "contrastive_loss": 0.7496, + "epoch": 1.7516930022573365, + "grad_norm": 19.0356388092041, + "learning_rate": 7.472722820532414e-06, + "lm_loss": 5.5895, + "loss": 1.7852, + "step": 776, + "text_contrastive_loss": 0.9534 + }, + { + "contrastive_loss": 0.5782, + "epoch": 1.7539503386004514, + "grad_norm": 16.46868324279785, + "learning_rate": 7.466414163071934e-06, + "lm_loss": 5.5661, + "loss": 1.6188, + "step": 777, + "text_contrastive_loss": 0.968 + }, + { + "contrastive_loss": 0.732, + "epoch": 1.7562076749435667, + "grad_norm": 18.876466751098633, + "learning_rate": 7.460100312234176e-06, + "lm_loss": 5.6912, + "loss": 1.7865, + "step": 778, + "text_contrastive_loss": 0.9707 + }, + { + "contrastive_loss": 0.6535, + "epoch": 1.7584650112866818, + "grad_norm": 17.19675636291504, + "learning_rate": 7.453781281313831e-06, + "lm_loss": 5.6308, + "loss": 1.6226, + "step": 779, + "text_contrastive_loss": 0.8121 + }, + { + "contrastive_loss": 0.6757, + "epoch": 1.7607223476297968, + "grad_norm": 16.143611907958984, + "learning_rate": 7.447457083616494e-06, + "lm_loss": 5.6272, + "loss": 1.7421, + "step": 780, + "text_contrastive_loss": 1.0074 + }, + { + "contrastive_loss": 0.572, + "epoch": 1.762979683972912, + "grad_norm": 13.709511756896973, + "learning_rate": 7.441127732458642e-06, + "lm_loss": 5.5948, + "loss": 1.5837, + "step": 781, + "text_contrastive_loss": 0.9045 + }, + { + "contrastive_loss": 0.5823, + "epoch": 1.765237020316027, + "grad_norm": 15.926129341125488, + "learning_rate": 7.434793241167601e-06, + "lm_loss": 5.6045, + "loss": 1.5605, + "step": 782, + "text_contrastive_loss": 0.8355 + }, + { + "contrastive_loss": 0.5187, + "epoch": 1.7674943566591423, + "grad_norm": 14.050180435180664, + "learning_rate": 7.428453623081522e-06, + "lm_loss": 5.5293, + "loss": 1.5309, + "step": 783, + "text_contrastive_loss": 0.9185 + }, + { + "contrastive_loss": 0.6622, + "epoch": 1.7697516930022572, + "grad_norm": 15.827361106872559, + "learning_rate": 7.422108891549349e-06, + "lm_loss": 5.7322, + "loss": 1.6548, + "step": 784, + "text_contrastive_loss": 0.8389 + }, + { + "contrastive_loss": 0.5779, + "epoch": 1.7720090293453725, + "grad_norm": 16.51972770690918, + "learning_rate": 7.415759059930799e-06, + "lm_loss": 5.6133, + "loss": 1.4832, + "step": 785, + "text_contrastive_loss": 0.688 + }, + { + "contrastive_loss": 0.5768, + "epoch": 1.7742663656884876, + "grad_norm": 15.352350234985352, + "learning_rate": 7.409404141596319e-06, + "lm_loss": 5.4875, + "loss": 1.5152, + "step": 786, + "text_contrastive_loss": 0.7793 + }, + { + "contrastive_loss": 0.4536, + "epoch": 1.7765237020316027, + "grad_norm": 15.709349632263184, + "learning_rate": 7.403044149927074e-06, + "lm_loss": 5.617, + "loss": 1.3583, + "step": 787, + "text_contrastive_loss": 0.6859 + }, + { + "contrastive_loss": 0.5617, + "epoch": 1.7787810383747178, + "grad_norm": 15.498834609985352, + "learning_rate": 7.396679098314908e-06, + "lm_loss": 5.4998, + "loss": 1.5357, + "step": 788, + "text_contrastive_loss": 0.8482 + }, + { + "contrastive_loss": 0.6754, + "epoch": 1.7810383747178329, + "grad_norm": 17.250186920166016, + "learning_rate": 7.390309000162321e-06, + "lm_loss": 5.6479, + "loss": 1.6909, + "step": 789, + "text_contrastive_loss": 0.9013 + }, + { + "contrastive_loss": 0.6052, + "epoch": 1.7832957110609482, + "grad_norm": 17.019081115722656, + "learning_rate": 7.383933868882438e-06, + "lm_loss": 5.72, + "loss": 1.5785, + "step": 790, + "text_contrastive_loss": 0.8025 + }, + { + "contrastive_loss": 0.6701, + "epoch": 1.785553047404063, + "grad_norm": 18.17104148864746, + "learning_rate": 7.377553717898983e-06, + "lm_loss": 5.5142, + "loss": 1.6041, + "step": 791, + "text_contrastive_loss": 0.7652 + }, + { + "contrastive_loss": 0.5827, + "epoch": 1.7878103837471784, + "grad_norm": 17.872495651245117, + "learning_rate": 7.37116856064625e-06, + "lm_loss": 5.7807, + "loss": 1.5565, + "step": 792, + "text_contrastive_loss": 0.7916 + }, + { + "contrastive_loss": 0.7069, + "epoch": 1.7900677200902935, + "grad_norm": 18.860488891601562, + "learning_rate": 7.364778410569071e-06, + "lm_loss": 5.5773, + "loss": 1.7215, + "step": 793, + "text_contrastive_loss": 0.9137 + }, + { + "contrastive_loss": 0.5601, + "epoch": 1.7923250564334086, + "grad_norm": 16.680278778076172, + "learning_rate": 7.358383281122797e-06, + "lm_loss": 5.5869, + "loss": 1.4951, + "step": 794, + "text_contrastive_loss": 0.7525 + }, + { + "contrastive_loss": 0.693, + "epoch": 1.7945823927765236, + "grad_norm": 16.43022918701172, + "learning_rate": 7.351983185773259e-06, + "lm_loss": 5.6024, + "loss": 1.7592, + "step": 795, + "text_contrastive_loss": 1.0118 + }, + { + "contrastive_loss": 0.6788, + "epoch": 1.7968397291196387, + "grad_norm": 16.972368240356445, + "learning_rate": 7.345578137996745e-06, + "lm_loss": 5.5441, + "loss": 1.6168, + "step": 796, + "text_contrastive_loss": 0.7673 + }, + { + "contrastive_loss": 0.6206, + "epoch": 1.799097065462754, + "grad_norm": 15.841362953186035, + "learning_rate": 7.339168151279974e-06, + "lm_loss": 5.7047, + "loss": 1.6207, + "step": 797, + "text_contrastive_loss": 0.8593 + }, + { + "contrastive_loss": 0.5893, + "epoch": 1.801354401805869, + "grad_norm": 16.630756378173828, + "learning_rate": 7.332753239120061e-06, + "lm_loss": 5.6058, + "loss": 1.5894, + "step": 798, + "text_contrastive_loss": 0.8792 + }, + { + "contrastive_loss": 0.6043, + "epoch": 1.8036117381489842, + "grad_norm": 15.179779052734375, + "learning_rate": 7.326333415024494e-06, + "lm_loss": 5.5835, + "loss": 1.5948, + "step": 799, + "text_contrastive_loss": 0.8643 + }, + { + "contrastive_loss": 0.5845, + "epoch": 1.8058690744920993, + "grad_norm": 15.992825508117676, + "learning_rate": 7.319908692511103e-06, + "lm_loss": 5.6918, + "loss": 1.5861, + "step": 800, + "text_contrastive_loss": 0.8648 + }, + { + "contrastive_loss": 0.6359, + "epoch": 1.8081264108352144, + "grad_norm": 16.51412582397461, + "learning_rate": 7.313479085108033e-06, + "lm_loss": 5.6701, + "loss": 1.6496, + "step": 801, + "text_contrastive_loss": 0.8935 + }, + { + "contrastive_loss": 0.6834, + "epoch": 1.8103837471783297, + "grad_norm": 16.19455337524414, + "learning_rate": 7.307044606353715e-06, + "lm_loss": 5.5504, + "loss": 1.6494, + "step": 802, + "text_contrastive_loss": 0.8219 + }, + { + "contrastive_loss": 0.6577, + "epoch": 1.8126410835214446, + "grad_norm": 16.510953903198242, + "learning_rate": 7.300605269796839e-06, + "lm_loss": 5.5902, + "loss": 1.6321, + "step": 803, + "text_contrastive_loss": 0.8306 + }, + { + "contrastive_loss": 0.6756, + "epoch": 1.81489841986456, + "grad_norm": 18.364416122436523, + "learning_rate": 7.2941610889963164e-06, + "lm_loss": 5.6027, + "loss": 1.7232, + "step": 804, + "text_contrastive_loss": 0.9745 + }, + { + "contrastive_loss": 0.6775, + "epoch": 1.8171557562076748, + "grad_norm": 17.008562088012695, + "learning_rate": 7.2877120775212685e-06, + "lm_loss": 5.7175, + "loss": 1.7378, + "step": 805, + "text_contrastive_loss": 0.9769 + }, + { + "contrastive_loss": 0.6402, + "epoch": 1.81941309255079, + "grad_norm": 16.21780014038086, + "learning_rate": 7.2812582489509844e-06, + "lm_loss": 5.6571, + "loss": 1.5613, + "step": 806, + "text_contrastive_loss": 0.7108 + }, + { + "contrastive_loss": 0.6218, + "epoch": 1.8216704288939052, + "grad_norm": 17.749752044677734, + "learning_rate": 7.2747996168748915e-06, + "lm_loss": 5.5993, + "loss": 1.5797, + "step": 807, + "text_contrastive_loss": 0.7959 + }, + { + "contrastive_loss": 0.5546, + "epoch": 1.8239277652370203, + "grad_norm": 14.439248085021973, + "learning_rate": 7.26833619489254e-06, + "lm_loss": 5.5922, + "loss": 1.4468, + "step": 808, + "text_contrastive_loss": 0.666 + }, + { + "contrastive_loss": 0.6521, + "epoch": 1.8261851015801356, + "grad_norm": 17.860071182250977, + "learning_rate": 7.261867996613559e-06, + "lm_loss": 5.7373, + "loss": 1.6656, + "step": 809, + "text_contrastive_loss": 0.8796 + }, + { + "contrastive_loss": 0.5127, + "epoch": 1.8284424379232505, + "grad_norm": 16.678512573242188, + "learning_rate": 7.255395035657639e-06, + "lm_loss": 5.6824, + "loss": 1.4757, + "step": 810, + "text_contrastive_loss": 0.7894 + }, + { + "contrastive_loss": 0.5386, + "epoch": 1.8306997742663658, + "grad_norm": 16.262500762939453, + "learning_rate": 7.2489173256544975e-06, + "lm_loss": 5.5996, + "loss": 1.5711, + "step": 811, + "text_contrastive_loss": 0.9452 + }, + { + "contrastive_loss": 0.5726, + "epoch": 1.8329571106094809, + "grad_norm": 15.10853099822998, + "learning_rate": 7.242434880243851e-06, + "lm_loss": 5.57, + "loss": 1.5025, + "step": 812, + "text_contrastive_loss": 0.7457 + }, + { + "contrastive_loss": 0.5137, + "epoch": 1.835214446952596, + "grad_norm": 16.65679931640625, + "learning_rate": 7.235947713075389e-06, + "lm_loss": 5.5702, + "loss": 1.3889, + "step": 813, + "text_contrastive_loss": 0.6364 + }, + { + "contrastive_loss": 0.6597, + "epoch": 1.837471783295711, + "grad_norm": 18.37490463256836, + "learning_rate": 7.229455837808741e-06, + "lm_loss": 5.6616, + "loss": 1.6702, + "step": 814, + "text_contrastive_loss": 0.8886 + }, + { + "contrastive_loss": 0.5845, + "epoch": 1.8397291196388261, + "grad_norm": 16.170198440551758, + "learning_rate": 7.222959268113452e-06, + "lm_loss": 5.5558, + "loss": 1.5941, + "step": 815, + "text_contrastive_loss": 0.908 + }, + { + "contrastive_loss": 0.5638, + "epoch": 1.8419864559819414, + "grad_norm": 16.061973571777344, + "learning_rate": 7.216458017668951e-06, + "lm_loss": 5.6042, + "loss": 1.4835, + "step": 816, + "text_contrastive_loss": 0.7185 + }, + { + "contrastive_loss": 0.5483, + "epoch": 1.8442437923250563, + "grad_norm": 16.66253662109375, + "learning_rate": 7.2099521001645225e-06, + "lm_loss": 5.5736, + "loss": 1.4765, + "step": 817, + "text_contrastive_loss": 0.7417 + }, + { + "contrastive_loss": 0.6933, + "epoch": 1.8465011286681716, + "grad_norm": 16.928455352783203, + "learning_rate": 7.20344152929928e-06, + "lm_loss": 5.7431, + "loss": 1.7452, + "step": 818, + "text_contrastive_loss": 0.9552 + }, + { + "contrastive_loss": 0.4993, + "epoch": 1.8487584650112867, + "grad_norm": 15.764592170715332, + "learning_rate": 7.19692631878213e-06, + "lm_loss": 5.6099, + "loss": 1.4026, + "step": 819, + "text_contrastive_loss": 0.6845 + }, + { + "contrastive_loss": 0.6099, + "epoch": 1.8510158013544018, + "grad_norm": 15.19287395477295, + "learning_rate": 7.190406482331757e-06, + "lm_loss": 5.6551, + "loss": 1.5898, + "step": 820, + "text_contrastive_loss": 0.8288 + }, + { + "contrastive_loss": 0.6181, + "epoch": 1.853273137697517, + "grad_norm": 17.772113800048828, + "learning_rate": 7.183882033676579e-06, + "lm_loss": 5.5202, + "loss": 1.538, + "step": 821, + "text_contrastive_loss": 0.7357 + }, + { + "contrastive_loss": 0.6082, + "epoch": 1.855530474040632, + "grad_norm": 17.984954833984375, + "learning_rate": 7.177352986554729e-06, + "lm_loss": 5.5946, + "loss": 1.6758, + "step": 822, + "text_contrastive_loss": 1.0162 + }, + { + "contrastive_loss": 0.5879, + "epoch": 1.8577878103837473, + "grad_norm": 16.008399963378906, + "learning_rate": 7.1708193547140205e-06, + "lm_loss": 5.7365, + "loss": 1.5873, + "step": 823, + "text_contrastive_loss": 0.8514 + }, + { + "contrastive_loss": 0.5878, + "epoch": 1.8600451467268622, + "grad_norm": 15.425915718078613, + "learning_rate": 7.164281151911923e-06, + "lm_loss": 5.6353, + "loss": 1.5688, + "step": 824, + "text_contrastive_loss": 0.8348 + }, + { + "contrastive_loss": 0.673, + "epoch": 1.8623024830699775, + "grad_norm": 15.963248252868652, + "learning_rate": 7.157738391915531e-06, + "lm_loss": 5.5391, + "loss": 1.6485, + "step": 825, + "text_contrastive_loss": 0.8431 + }, + { + "contrastive_loss": 0.6093, + "epoch": 1.8645598194130926, + "grad_norm": 16.09975242614746, + "learning_rate": 7.151191088501531e-06, + "lm_loss": 5.5798, + "loss": 1.591, + "step": 826, + "text_contrastive_loss": 0.8473 + }, + { + "contrastive_loss": 0.6162, + "epoch": 1.8668171557562077, + "grad_norm": 15.717811584472656, + "learning_rate": 7.14463925545618e-06, + "lm_loss": 5.6483, + "loss": 1.6201, + "step": 827, + "text_contrastive_loss": 0.878 + }, + { + "contrastive_loss": 0.5907, + "epoch": 1.8690744920993227, + "grad_norm": 15.425777435302734, + "learning_rate": 7.138082906575271e-06, + "lm_loss": 5.5912, + "loss": 1.5954, + "step": 828, + "text_contrastive_loss": 0.8913 + }, + { + "contrastive_loss": 0.6175, + "epoch": 1.8713318284424378, + "grad_norm": 16.30082893371582, + "learning_rate": 7.131522055664109e-06, + "lm_loss": 5.4914, + "loss": 1.5953, + "step": 829, + "text_contrastive_loss": 0.8574 + }, + { + "contrastive_loss": 0.5407, + "epoch": 1.8735891647855532, + "grad_norm": 14.981559753417969, + "learning_rate": 7.124956716537471e-06, + "lm_loss": 5.6236, + "loss": 1.5297, + "step": 830, + "text_contrastive_loss": 0.8534 + }, + { + "contrastive_loss": 0.5908, + "epoch": 1.875846501128668, + "grad_norm": 15.349632263183594, + "learning_rate": 7.118386903019594e-06, + "lm_loss": 5.5637, + "loss": 1.5616, + "step": 831, + "text_contrastive_loss": 0.8288 + }, + { + "contrastive_loss": 0.664, + "epoch": 1.8781038374717833, + "grad_norm": 16.663297653198242, + "learning_rate": 7.111812628944132e-06, + "lm_loss": 5.703, + "loss": 1.6699, + "step": 832, + "text_contrastive_loss": 0.8711 + }, + { + "contrastive_loss": 0.6863, + "epoch": 1.8803611738148984, + "grad_norm": 17.10484504699707, + "learning_rate": 7.105233908154128e-06, + "lm_loss": 5.494, + "loss": 1.7709, + "step": 833, + "text_contrastive_loss": 1.0703 + }, + { + "contrastive_loss": 0.6986, + "epoch": 1.8826185101580135, + "grad_norm": 16.48059844970703, + "learning_rate": 7.098650754501994e-06, + "lm_loss": 5.614, + "loss": 1.6907, + "step": 834, + "text_contrastive_loss": 0.8613 + }, + { + "contrastive_loss": 0.5455, + "epoch": 1.8848758465011288, + "grad_norm": 15.246926307678223, + "learning_rate": 7.0920631818494745e-06, + "lm_loss": 5.749, + "loss": 1.5203, + "step": 835, + "text_contrastive_loss": 0.7998 + }, + { + "contrastive_loss": 0.6555, + "epoch": 1.8871331828442437, + "grad_norm": 16.64633560180664, + "learning_rate": 7.085471204067616e-06, + "lm_loss": 5.5486, + "loss": 1.5612, + "step": 836, + "text_contrastive_loss": 0.7016 + }, + { + "contrastive_loss": 0.6638, + "epoch": 1.889390519187359, + "grad_norm": 16.877185821533203, + "learning_rate": 7.078874835036742e-06, + "lm_loss": 5.5712, + "loss": 1.6556, + "step": 837, + "text_contrastive_loss": 0.8694 + }, + { + "contrastive_loss": 0.5417, + "epoch": 1.8916478555304739, + "grad_norm": 15.430839538574219, + "learning_rate": 7.072274088646425e-06, + "lm_loss": 5.5435, + "loss": 1.5126, + "step": 838, + "text_contrastive_loss": 0.8332 + }, + { + "contrastive_loss": 0.6148, + "epoch": 1.8939051918735892, + "grad_norm": 14.822012901306152, + "learning_rate": 7.065668978795449e-06, + "lm_loss": 5.5579, + "loss": 1.6278, + "step": 839, + "text_contrastive_loss": 0.9145 + }, + { + "contrastive_loss": 0.5328, + "epoch": 1.8961625282167043, + "grad_norm": 14.90259838104248, + "learning_rate": 7.059059519391794e-06, + "lm_loss": 5.6553, + "loss": 1.482, + "step": 840, + "text_contrastive_loss": 0.7674 + }, + { + "contrastive_loss": 0.6179, + "epoch": 1.8984198645598194, + "grad_norm": 15.720355987548828, + "learning_rate": 7.05244572435259e-06, + "lm_loss": 5.5941, + "loss": 1.5282, + "step": 841, + "text_contrastive_loss": 0.7018 + }, + { + "contrastive_loss": 0.5856, + "epoch": 1.9006772009029347, + "grad_norm": 18.02252769470215, + "learning_rate": 7.045827607604103e-06, + "lm_loss": 5.5812, + "loss": 1.4887, + "step": 842, + "text_contrastive_loss": 0.6899 + }, + { + "contrastive_loss": 0.6467, + "epoch": 1.9029345372460496, + "grad_norm": 16.064016342163086, + "learning_rate": 7.039205183081694e-06, + "lm_loss": 5.6405, + "loss": 1.6037, + "step": 843, + "text_contrastive_loss": 0.7858 + }, + { + "contrastive_loss": 0.6753, + "epoch": 1.9051918735891649, + "grad_norm": 17.041418075561523, + "learning_rate": 7.0325784647298e-06, + "lm_loss": 5.65, + "loss": 1.6561, + "step": 844, + "text_contrastive_loss": 0.8316 + }, + { + "contrastive_loss": 0.5641, + "epoch": 1.90744920993228, + "grad_norm": 16.694120407104492, + "learning_rate": 7.0259474665018915e-06, + "lm_loss": 5.6036, + "loss": 1.5535, + "step": 845, + "text_contrastive_loss": 0.8581 + }, + { + "contrastive_loss": 0.5545, + "epoch": 1.909706546275395, + "grad_norm": 15.350920677185059, + "learning_rate": 7.019312202360457e-06, + "lm_loss": 5.5084, + "loss": 1.4644, + "step": 846, + "text_contrastive_loss": 0.718 + }, + { + "contrastive_loss": 0.7071, + "epoch": 1.9119638826185101, + "grad_norm": 17.851167678833008, + "learning_rate": 7.012672686276969e-06, + "lm_loss": 5.565, + "loss": 1.6721, + "step": 847, + "text_contrastive_loss": 0.8169 + }, + { + "contrastive_loss": 0.662, + "epoch": 1.9142212189616252, + "grad_norm": 17.496871948242188, + "learning_rate": 7.006028932231847e-06, + "lm_loss": 5.4837, + "loss": 1.6971, + "step": 848, + "text_contrastive_loss": 0.9735 + }, + { + "contrastive_loss": 0.6087, + "epoch": 1.9164785553047405, + "grad_norm": 15.356800079345703, + "learning_rate": 6.999380954214438e-06, + "lm_loss": 5.6388, + "loss": 1.6348, + "step": 849, + "text_contrastive_loss": 0.9244 + }, + { + "contrastive_loss": 0.5352, + "epoch": 1.9187358916478554, + "grad_norm": 16.45505142211914, + "learning_rate": 6.992728766222982e-06, + "lm_loss": 5.6371, + "loss": 1.4918, + "step": 850, + "text_contrastive_loss": 0.7856 + }, + { + "contrastive_loss": 0.6315, + "epoch": 1.9209932279909707, + "grad_norm": 16.982715606689453, + "learning_rate": 6.9860723822645825e-06, + "lm_loss": 5.5917, + "loss": 1.6422, + "step": 851, + "text_contrastive_loss": 0.9031 + }, + { + "contrastive_loss": 0.5311, + "epoch": 1.9232505643340858, + "grad_norm": 15.755159378051758, + "learning_rate": 6.979411816355183e-06, + "lm_loss": 5.5169, + "loss": 1.4205, + "step": 852, + "text_contrastive_loss": 0.6753 + }, + { + "contrastive_loss": 0.6723, + "epoch": 1.925507900677201, + "grad_norm": 17.17357063293457, + "learning_rate": 6.972747082519526e-06, + "lm_loss": 5.5413, + "loss": 1.731, + "step": 853, + "text_contrastive_loss": 1.0091 + }, + { + "contrastive_loss": 0.5499, + "epoch": 1.927765237020316, + "grad_norm": 15.229076385498047, + "learning_rate": 6.966078194791133e-06, + "lm_loss": 5.5788, + "loss": 1.4664, + "step": 854, + "text_contrastive_loss": 0.7172 + }, + { + "contrastive_loss": 0.7271, + "epoch": 1.930022573363431, + "grad_norm": 17.912700653076172, + "learning_rate": 6.959405167212278e-06, + "lm_loss": 5.6208, + "loss": 1.7654, + "step": 855, + "text_contrastive_loss": 0.9525 + }, + { + "contrastive_loss": 0.6074, + "epoch": 1.9322799097065464, + "grad_norm": 15.696358680725098, + "learning_rate": 6.952728013833941e-06, + "lm_loss": 5.6749, + "loss": 1.6688, + "step": 856, + "text_contrastive_loss": 0.9878 + }, + { + "contrastive_loss": 0.6983, + "epoch": 1.9345372460496613, + "grad_norm": 17.30682373046875, + "learning_rate": 6.946046748715796e-06, + "lm_loss": 5.6378, + "loss": 1.7168, + "step": 857, + "text_contrastive_loss": 0.9095 + }, + { + "contrastive_loss": 0.5927, + "epoch": 1.9367945823927766, + "grad_norm": 17.79252052307129, + "learning_rate": 6.9393613859261755e-06, + "lm_loss": 5.6205, + "loss": 1.5651, + "step": 858, + "text_contrastive_loss": 0.8207 + }, + { + "contrastive_loss": 0.6771, + "epoch": 1.9390519187358917, + "grad_norm": 18.796653747558594, + "learning_rate": 6.932671939542037e-06, + "lm_loss": 5.5481, + "loss": 1.7077, + "step": 859, + "text_contrastive_loss": 0.9515 + }, + { + "contrastive_loss": 0.7152, + "epoch": 1.9413092550790068, + "grad_norm": 17.163236618041992, + "learning_rate": 6.925978423648941e-06, + "lm_loss": 5.659, + "loss": 1.7527, + "step": 860, + "text_contrastive_loss": 0.9432 + }, + { + "contrastive_loss": 0.6069, + "epoch": 1.9435665914221218, + "grad_norm": 14.760443687438965, + "learning_rate": 6.919280852341011e-06, + "lm_loss": 5.5914, + "loss": 1.6069, + "step": 861, + "text_contrastive_loss": 0.8818 + }, + { + "contrastive_loss": 0.4893, + "epoch": 1.945823927765237, + "grad_norm": 14.319083213806152, + "learning_rate": 6.912579239720913e-06, + "lm_loss": 5.6018, + "loss": 1.3888, + "step": 862, + "text_contrastive_loss": 0.6787 + }, + { + "contrastive_loss": 0.6111, + "epoch": 1.9480812641083523, + "grad_norm": 15.737593650817871, + "learning_rate": 6.9058735998998224e-06, + "lm_loss": 5.5189, + "loss": 1.5841, + "step": 863, + "text_contrastive_loss": 0.8422 + }, + { + "contrastive_loss": 0.7067, + "epoch": 1.9503386004514671, + "grad_norm": 16.638887405395508, + "learning_rate": 6.899163946997396e-06, + "lm_loss": 5.5953, + "loss": 1.6981, + "step": 864, + "text_contrastive_loss": 0.8638 + }, + { + "contrastive_loss": 0.7025, + "epoch": 1.9525959367945824, + "grad_norm": 16.231441497802734, + "learning_rate": 6.892450295141737e-06, + "lm_loss": 5.6633, + "loss": 1.6773, + "step": 865, + "text_contrastive_loss": 0.8169 + }, + { + "contrastive_loss": 0.6093, + "epoch": 1.9548532731376975, + "grad_norm": 15.671695709228516, + "learning_rate": 6.885732658469374e-06, + "lm_loss": 5.6814, + "loss": 1.6418, + "step": 866, + "text_contrastive_loss": 0.9287 + }, + { + "contrastive_loss": 0.6353, + "epoch": 1.9571106094808126, + "grad_norm": 17.087011337280273, + "learning_rate": 6.8790110511252195e-06, + "lm_loss": 5.6027, + "loss": 1.5883, + "step": 867, + "text_contrastive_loss": 0.7855 + }, + { + "contrastive_loss": 0.6424, + "epoch": 1.959367945823928, + "grad_norm": 14.936962127685547, + "learning_rate": 6.872285487262555e-06, + "lm_loss": 5.5884, + "loss": 1.6137, + "step": 868, + "text_contrastive_loss": 0.8249 + }, + { + "contrastive_loss": 0.6417, + "epoch": 1.9616252821670428, + "grad_norm": 16.661287307739258, + "learning_rate": 6.865555981042983e-06, + "lm_loss": 5.6825, + "loss": 1.668, + "step": 869, + "text_contrastive_loss": 0.9159 + }, + { + "contrastive_loss": 0.5622, + "epoch": 1.963882618510158, + "grad_norm": 15.579157829284668, + "learning_rate": 6.858822546636417e-06, + "lm_loss": 5.4974, + "loss": 1.508, + "step": 870, + "text_contrastive_loss": 0.7922 + }, + { + "contrastive_loss": 0.6186, + "epoch": 1.966139954853273, + "grad_norm": 15.09514331817627, + "learning_rate": 6.852085198221035e-06, + "lm_loss": 5.5999, + "loss": 1.6754, + "step": 871, + "text_contrastive_loss": 0.9935 + }, + { + "contrastive_loss": 0.668, + "epoch": 1.9683972911963883, + "grad_norm": 18.11931037902832, + "learning_rate": 6.845343949983258e-06, + "lm_loss": 5.6957, + "loss": 1.6854, + "step": 872, + "text_contrastive_loss": 0.8957 + }, + { + "contrastive_loss": 0.6113, + "epoch": 1.9706546275395034, + "grad_norm": 15.733857154846191, + "learning_rate": 6.838598816117725e-06, + "lm_loss": 5.6184, + "loss": 1.6452, + "step": 873, + "text_contrastive_loss": 0.944 + }, + { + "contrastive_loss": 0.6003, + "epoch": 1.9729119638826185, + "grad_norm": 15.424590110778809, + "learning_rate": 6.831849810827247e-06, + "lm_loss": 5.4789, + "loss": 1.5784, + "step": 874, + "text_contrastive_loss": 0.8603 + }, + { + "contrastive_loss": 0.6459, + "epoch": 1.9751693002257338, + "grad_norm": 17.472450256347656, + "learning_rate": 6.825096948322791e-06, + "lm_loss": 5.5224, + "loss": 1.6739, + "step": 875, + "text_contrastive_loss": 0.9516 + }, + { + "contrastive_loss": 0.6078, + "epoch": 1.9774266365688487, + "grad_norm": 17.250064849853516, + "learning_rate": 6.818340242823449e-06, + "lm_loss": 5.5071, + "loss": 1.545, + "step": 876, + "text_contrastive_loss": 0.7729 + }, + { + "contrastive_loss": 0.5681, + "epoch": 1.979683972911964, + "grad_norm": 15.3363618850708, + "learning_rate": 6.8115797085564e-06, + "lm_loss": 5.5644, + "loss": 1.5219, + "step": 877, + "text_contrastive_loss": 0.7947 + }, + { + "contrastive_loss": 0.6233, + "epoch": 1.981941309255079, + "grad_norm": 16.2225341796875, + "learning_rate": 6.804815359756887e-06, + "lm_loss": 5.6397, + "loss": 1.5594, + "step": 878, + "text_contrastive_loss": 0.7442 + }, + { + "contrastive_loss": 0.5095, + "epoch": 1.9841986455981941, + "grad_norm": 14.354838371276855, + "learning_rate": 6.798047210668185e-06, + "lm_loss": 5.5914, + "loss": 1.4432, + "step": 879, + "text_contrastive_loss": 0.7491 + }, + { + "contrastive_loss": 0.5826, + "epoch": 1.9864559819413092, + "grad_norm": 18.07027244567871, + "learning_rate": 6.7912752755415716e-06, + "lm_loss": 5.6192, + "loss": 1.5915, + "step": 880, + "text_contrastive_loss": 0.8941 + }, + { + "contrastive_loss": 0.6141, + "epoch": 1.9887133182844243, + "grad_norm": 16.308622360229492, + "learning_rate": 6.7844995686362955e-06, + "lm_loss": 5.6988, + "loss": 1.5604, + "step": 881, + "text_contrastive_loss": 0.7529 + }, + { + "contrastive_loss": 0.5646, + "epoch": 1.9909706546275396, + "grad_norm": 14.187543869018555, + "learning_rate": 6.777720104219548e-06, + "lm_loss": 5.6544, + "loss": 1.5361, + "step": 882, + "text_contrastive_loss": 0.812 + }, + { + "contrastive_loss": 0.6631, + "epoch": 1.9932279909706545, + "grad_norm": 16.99749755859375, + "learning_rate": 6.770936896566434e-06, + "lm_loss": 5.6296, + "loss": 1.6973, + "step": 883, + "text_contrastive_loss": 0.9424 + }, + { + "contrastive_loss": 0.5883, + "epoch": 1.9954853273137698, + "grad_norm": 16.888206481933594, + "learning_rate": 6.7641499599599355e-06, + "lm_loss": 5.6055, + "loss": 1.5547, + "step": 884, + "text_contrastive_loss": 0.8117 + }, + { + "contrastive_loss": 0.5973, + "epoch": 1.997742663656885, + "grad_norm": 16.451189041137695, + "learning_rate": 6.757359308690889e-06, + "lm_loss": 5.5911, + "loss": 1.6725, + "step": 885, + "text_contrastive_loss": 1.0321 + }, + { + "contrastive_loss": 0.3834, + "epoch": 2.0, + "grad_norm": 14.721372604370117, + "learning_rate": 6.750564957057958e-06, + "lm_loss": 5.4662, + "loss": 1.2558, + "step": 886, + "text_contrastive_loss": 0.6516 + }, + { + "contrastive_loss": 0.5205, + "epoch": 2.0022573363431153, + "grad_norm": 14.922968864440918, + "learning_rate": 6.743766919367588e-06, + "lm_loss": 5.5173, + "loss": 1.5126, + "step": 887, + "text_contrastive_loss": 0.8808 + }, + { + "contrastive_loss": 0.5919, + "epoch": 2.00451467268623, + "grad_norm": 13.492685317993164, + "learning_rate": 6.736965209933992e-06, + "lm_loss": 5.4521, + "loss": 1.5412, + "step": 888, + "text_contrastive_loss": 0.8082 + }, + { + "contrastive_loss": 0.5223, + "epoch": 2.0067720090293455, + "grad_norm": 14.532832145690918, + "learning_rate": 6.730159843079113e-06, + "lm_loss": 5.4331, + "loss": 1.5689, + "step": 889, + "text_contrastive_loss": 1.0067 + }, + { + "contrastive_loss": 0.5552, + "epoch": 2.0090293453724604, + "grad_norm": 15.606881141662598, + "learning_rate": 6.723350833132596e-06, + "lm_loss": 5.4307, + "loss": 1.546, + "step": 890, + "text_contrastive_loss": 0.8954 + }, + { + "contrastive_loss": 0.6329, + "epoch": 2.0112866817155757, + "grad_norm": 17.337417602539062, + "learning_rate": 6.716538194431754e-06, + "lm_loss": 5.6051, + "loss": 1.6961, + "step": 891, + "text_contrastive_loss": 1.0053 + }, + { + "contrastive_loss": 0.5831, + "epoch": 2.0135440180586905, + "grad_norm": 15.493812561035156, + "learning_rate": 6.7097219413215474e-06, + "lm_loss": 5.6355, + "loss": 1.5764, + "step": 892, + "text_contrastive_loss": 0.8596 + }, + { + "contrastive_loss": 0.5551, + "epoch": 2.015801354401806, + "grad_norm": 15.401676177978516, + "learning_rate": 6.702902088154539e-06, + "lm_loss": 5.6025, + "loss": 1.5234, + "step": 893, + "text_contrastive_loss": 0.8163 + }, + { + "contrastive_loss": 0.4718, + "epoch": 2.018058690744921, + "grad_norm": 15.74850082397461, + "learning_rate": 6.696078649290878e-06, + "lm_loss": 5.5779, + "loss": 1.4007, + "step": 894, + "text_contrastive_loss": 0.7422 + }, + { + "contrastive_loss": 0.4403, + "epoch": 2.020316027088036, + "grad_norm": 14.414772033691406, + "learning_rate": 6.689251639098261e-06, + "lm_loss": 5.6971, + "loss": 1.323, + "step": 895, + "text_contrastive_loss": 0.626 + }, + { + "contrastive_loss": 0.6067, + "epoch": 2.0225733634311513, + "grad_norm": 15.891339302062988, + "learning_rate": 6.682421071951907e-06, + "lm_loss": 5.665, + "loss": 1.6, + "step": 896, + "text_contrastive_loss": 0.8535 + }, + { + "contrastive_loss": 0.5972, + "epoch": 2.024830699774266, + "grad_norm": 15.956475257873535, + "learning_rate": 6.67558696223452e-06, + "lm_loss": 5.6644, + "loss": 1.609, + "step": 897, + "text_contrastive_loss": 0.8907 + }, + { + "contrastive_loss": 0.5028, + "epoch": 2.0270880361173815, + "grad_norm": 14.930764198303223, + "learning_rate": 6.668749324336268e-06, + "lm_loss": 5.6442, + "loss": 1.4141, + "step": 898, + "text_contrastive_loss": 0.6937 + }, + { + "contrastive_loss": 0.5613, + "epoch": 2.0293453724604964, + "grad_norm": 15.869874000549316, + "learning_rate": 6.661908172654746e-06, + "lm_loss": 5.5546, + "loss": 1.4967, + "step": 899, + "text_contrastive_loss": 0.7599 + }, + { + "contrastive_loss": 0.5959, + "epoch": 2.0316027088036117, + "grad_norm": 15.18961238861084, + "learning_rate": 6.65506352159495e-06, + "lm_loss": 5.5258, + "loss": 1.5579, + "step": 900, + "text_contrastive_loss": 0.8189 + }, + { + "contrastive_loss": 0.4927, + "epoch": 2.033860045146727, + "grad_norm": 14.179841041564941, + "learning_rate": 6.6482153855692395e-06, + "lm_loss": 5.6035, + "loss": 1.4198, + "step": 901, + "text_contrastive_loss": 0.7335 + }, + { + "contrastive_loss": 0.6173, + "epoch": 2.036117381489842, + "grad_norm": 16.063549041748047, + "learning_rate": 6.64136377899732e-06, + "lm_loss": 5.6174, + "loss": 1.5094, + "step": 902, + "text_contrastive_loss": 0.6607 + }, + { + "contrastive_loss": 0.6371, + "epoch": 2.038374717832957, + "grad_norm": 17.114227294921875, + "learning_rate": 6.6345087163061935e-06, + "lm_loss": 5.5659, + "loss": 1.7515, + "step": 903, + "text_contrastive_loss": 1.1155 + }, + { + "contrastive_loss": 0.5376, + "epoch": 2.040632054176072, + "grad_norm": 15.359448432922363, + "learning_rate": 6.627650211930152e-06, + "lm_loss": 5.535, + "loss": 1.6288, + "step": 904, + "text_contrastive_loss": 1.0754 + }, + { + "contrastive_loss": 0.5696, + "epoch": 2.0428893905191874, + "grad_norm": 14.724507331848145, + "learning_rate": 6.620788280310722e-06, + "lm_loss": 5.4937, + "loss": 1.5034, + "step": 905, + "text_contrastive_loss": 0.7688 + }, + { + "contrastive_loss": 0.5079, + "epoch": 2.0451467268623027, + "grad_norm": 14.867406845092773, + "learning_rate": 6.613922935896659e-06, + "lm_loss": 5.5898, + "loss": 1.401, + "step": 906, + "text_contrastive_loss": 0.6683 + }, + { + "contrastive_loss": 0.6476, + "epoch": 2.0474040632054176, + "grad_norm": 17.842845916748047, + "learning_rate": 6.607054193143894e-06, + "lm_loss": 5.5677, + "loss": 1.6823, + "step": 907, + "text_contrastive_loss": 0.9559 + }, + { + "contrastive_loss": 0.5156, + "epoch": 2.049661399548533, + "grad_norm": 14.921574592590332, + "learning_rate": 6.600182066515519e-06, + "lm_loss": 5.6815, + "loss": 1.409, + "step": 908, + "text_contrastive_loss": 0.6505 + }, + { + "contrastive_loss": 0.5357, + "epoch": 2.0519187358916477, + "grad_norm": 15.891834259033203, + "learning_rate": 6.593306570481751e-06, + "lm_loss": 5.5604, + "loss": 1.4846, + "step": 909, + "text_contrastive_loss": 0.7856 + }, + { + "contrastive_loss": 0.5784, + "epoch": 2.054176072234763, + "grad_norm": 17.075801849365234, + "learning_rate": 6.586427719519901e-06, + "lm_loss": 5.4482, + "loss": 1.5577, + "step": 910, + "text_contrastive_loss": 0.8688 + }, + { + "contrastive_loss": 0.5847, + "epoch": 2.056433408577878, + "grad_norm": 15.731554985046387, + "learning_rate": 6.579545528114344e-06, + "lm_loss": 5.6414, + "loss": 1.5577, + "step": 911, + "text_contrastive_loss": 0.8177 + }, + { + "contrastive_loss": 0.5448, + "epoch": 2.0586907449209932, + "grad_norm": 16.339303970336914, + "learning_rate": 6.572660010756489e-06, + "lm_loss": 5.4309, + "loss": 1.4814, + "step": 912, + "text_contrastive_loss": 0.7871 + }, + { + "contrastive_loss": 0.4855, + "epoch": 2.0609480812641086, + "grad_norm": 13.657450675964355, + "learning_rate": 6.565771181944747e-06, + "lm_loss": 5.5683, + "loss": 1.3892, + "step": 913, + "text_contrastive_loss": 0.6938 + }, + { + "contrastive_loss": 0.5451, + "epoch": 2.0632054176072234, + "grad_norm": 15.438440322875977, + "learning_rate": 6.558879056184505e-06, + "lm_loss": 5.657, + "loss": 1.5392, + "step": 914, + "text_contrastive_loss": 0.8569 + }, + { + "contrastive_loss": 0.5521, + "epoch": 2.0654627539503387, + "grad_norm": 14.946206092834473, + "learning_rate": 6.551983647988089e-06, + "lm_loss": 5.5941, + "loss": 1.5225, + "step": 915, + "text_contrastive_loss": 0.822 + }, + { + "contrastive_loss": 0.6857, + "epoch": 2.0677200902934536, + "grad_norm": 16.517148971557617, + "learning_rate": 6.545084971874738e-06, + "lm_loss": 5.5931, + "loss": 1.7039, + "step": 916, + "text_contrastive_loss": 0.9178 + }, + { + "contrastive_loss": 0.4806, + "epoch": 2.069977426636569, + "grad_norm": 16.00227165222168, + "learning_rate": 6.5381830423705714e-06, + "lm_loss": 5.5547, + "loss": 1.4019, + "step": 917, + "text_contrastive_loss": 0.7317 + }, + { + "contrastive_loss": 0.5729, + "epoch": 2.072234762979684, + "grad_norm": 16.76964569091797, + "learning_rate": 6.531277874008562e-06, + "lm_loss": 5.5874, + "loss": 1.6875, + "step": 918, + "text_contrastive_loss": 1.1116 + }, + { + "contrastive_loss": 0.4385, + "epoch": 2.074492099322799, + "grad_norm": 18.411714553833008, + "learning_rate": 6.5243694813284975e-06, + "lm_loss": 5.5367, + "loss": 1.431, + "step": 919, + "text_contrastive_loss": 0.8777 + }, + { + "contrastive_loss": 0.6399, + "epoch": 2.0767494356659144, + "grad_norm": 16.799848556518555, + "learning_rate": 6.517457878876958e-06, + "lm_loss": 5.5023, + "loss": 1.6103, + "step": 920, + "text_contrastive_loss": 0.8404 + }, + { + "contrastive_loss": 0.5781, + "epoch": 2.0790067720090293, + "grad_norm": 15.620450019836426, + "learning_rate": 6.510543081207281e-06, + "lm_loss": 5.6476, + "loss": 1.6084, + "step": 921, + "text_contrastive_loss": 0.931 + }, + { + "contrastive_loss": 0.5393, + "epoch": 2.0812641083521446, + "grad_norm": 14.422009468078613, + "learning_rate": 6.503625102879534e-06, + "lm_loss": 5.6779, + "loss": 1.4782, + "step": 922, + "text_contrastive_loss": 0.7423 + }, + { + "contrastive_loss": 0.5087, + "epoch": 2.0835214446952595, + "grad_norm": 15.963953018188477, + "learning_rate": 6.496703958460479e-06, + "lm_loss": 5.5043, + "loss": 1.4632, + "step": 923, + "text_contrastive_loss": 0.8081 + }, + { + "contrastive_loss": 0.6506, + "epoch": 2.0857787810383748, + "grad_norm": 16.180904388427734, + "learning_rate": 6.489779662523545e-06, + "lm_loss": 5.3862, + "loss": 1.6508, + "step": 924, + "text_contrastive_loss": 0.9232 + }, + { + "contrastive_loss": 0.5739, + "epoch": 2.0880361173814896, + "grad_norm": 16.602985382080078, + "learning_rate": 6.4828522296488014e-06, + "lm_loss": 5.666, + "loss": 1.5834, + "step": 925, + "text_contrastive_loss": 0.8859 + }, + { + "contrastive_loss": 0.6208, + "epoch": 2.090293453724605, + "grad_norm": 16.005596160888672, + "learning_rate": 6.475921674422917e-06, + "lm_loss": 5.5651, + "loss": 1.6567, + "step": 926, + "text_contrastive_loss": 0.9587 + }, + { + "contrastive_loss": 0.5289, + "epoch": 2.0925507900677203, + "grad_norm": 14.40626335144043, + "learning_rate": 6.4689880114391375e-06, + "lm_loss": 5.687, + "loss": 1.4922, + "step": 927, + "text_contrastive_loss": 0.7891 + }, + { + "contrastive_loss": 0.593, + "epoch": 2.094808126410835, + "grad_norm": 15.942220687866211, + "learning_rate": 6.462051255297255e-06, + "lm_loss": 5.6157, + "loss": 1.5557, + "step": 928, + "text_contrastive_loss": 0.8023 + }, + { + "contrastive_loss": 0.564, + "epoch": 2.0970654627539504, + "grad_norm": 15.784947395324707, + "learning_rate": 6.455111420603568e-06, + "lm_loss": 5.6122, + "loss": 1.5411, + "step": 929, + "text_contrastive_loss": 0.8316 + }, + { + "contrastive_loss": 0.5249, + "epoch": 2.0993227990970653, + "grad_norm": 17.09646224975586, + "learning_rate": 6.448168521970865e-06, + "lm_loss": 5.5094, + "loss": 1.4558, + "step": 930, + "text_contrastive_loss": 0.7599 + }, + { + "contrastive_loss": 0.6272, + "epoch": 2.1015801354401806, + "grad_norm": 17.021900177001953, + "learning_rate": 6.441222574018378e-06, + "lm_loss": 5.6399, + "loss": 1.6545, + "step": 931, + "text_contrastive_loss": 0.9266 + }, + { + "contrastive_loss": 0.5656, + "epoch": 2.1038374717832955, + "grad_norm": 17.24850082397461, + "learning_rate": 6.434273591371771e-06, + "lm_loss": 5.5486, + "loss": 1.6227, + "step": 932, + "text_contrastive_loss": 1.0044 + }, + { + "contrastive_loss": 0.4486, + "epoch": 2.106094808126411, + "grad_norm": 14.898802757263184, + "learning_rate": 6.427321588663085e-06, + "lm_loss": 5.568, + "loss": 1.3912, + "step": 933, + "text_contrastive_loss": 0.7716 + }, + { + "contrastive_loss": 0.5385, + "epoch": 2.108352144469526, + "grad_norm": 14.44471549987793, + "learning_rate": 6.42036658053073e-06, + "lm_loss": 5.5793, + "loss": 1.5247, + "step": 934, + "text_contrastive_loss": 0.8567 + }, + { + "contrastive_loss": 0.6541, + "epoch": 2.110609480812641, + "grad_norm": 17.068880081176758, + "learning_rate": 6.41340858161944e-06, + "lm_loss": 5.5587, + "loss": 1.707, + "step": 935, + "text_contrastive_loss": 0.9941 + }, + { + "contrastive_loss": 0.5043, + "epoch": 2.1128668171557563, + "grad_norm": 13.335604667663574, + "learning_rate": 6.406447606580248e-06, + "lm_loss": 5.5333, + "loss": 1.4185, + "step": 936, + "text_contrastive_loss": 0.7216 + }, + { + "contrastive_loss": 0.4877, + "epoch": 2.115124153498871, + "grad_norm": 13.42530632019043, + "learning_rate": 6.399483670070451e-06, + "lm_loss": 5.5543, + "loss": 1.4184, + "step": 937, + "text_contrastive_loss": 0.7505 + }, + { + "contrastive_loss": 0.5192, + "epoch": 2.1173814898419865, + "grad_norm": 14.670174598693848, + "learning_rate": 6.392516786753586e-06, + "lm_loss": 5.5975, + "loss": 1.4684, + "step": 938, + "text_contrastive_loss": 0.7788 + }, + { + "contrastive_loss": 0.4451, + "epoch": 2.119638826185102, + "grad_norm": 14.285451889038086, + "learning_rate": 6.385546971299389e-06, + "lm_loss": 5.606, + "loss": 1.3854, + "step": 939, + "text_contrastive_loss": 0.7594 + }, + { + "contrastive_loss": 0.4997, + "epoch": 2.1218961625282167, + "grad_norm": 15.921854019165039, + "learning_rate": 6.378574238383776e-06, + "lm_loss": 5.4192, + "loss": 1.447, + "step": 940, + "text_contrastive_loss": 0.8108 + }, + { + "contrastive_loss": 0.47, + "epoch": 2.124153498871332, + "grad_norm": 13.7954740524292, + "learning_rate": 6.3715986026888046e-06, + "lm_loss": 5.5366, + "loss": 1.3794, + "step": 941, + "text_contrastive_loss": 0.7116 + }, + { + "contrastive_loss": 0.5892, + "epoch": 2.126410835214447, + "grad_norm": 16.45612335205078, + "learning_rate": 6.3646200789026426e-06, + "lm_loss": 5.445, + "loss": 1.6059, + "step": 942, + "text_contrastive_loss": 0.9444 + }, + { + "contrastive_loss": 0.5043, + "epoch": 2.128668171557562, + "grad_norm": 17.27448844909668, + "learning_rate": 6.35763868171954e-06, + "lm_loss": 5.6406, + "loss": 1.5254, + "step": 943, + "text_contrastive_loss": 0.9139 + }, + { + "contrastive_loss": 0.5443, + "epoch": 2.130925507900677, + "grad_norm": 16.419078826904297, + "learning_rate": 6.350654425839799e-06, + "lm_loss": 5.5499, + "loss": 1.5785, + "step": 944, + "text_contrastive_loss": 0.9585 + }, + { + "contrastive_loss": 0.6089, + "epoch": 2.1331828442437923, + "grad_norm": 16.696115493774414, + "learning_rate": 6.343667325969736e-06, + "lm_loss": 5.509, + "loss": 1.564, + "step": 945, + "text_contrastive_loss": 0.8083 + }, + { + "contrastive_loss": 0.6238, + "epoch": 2.1354401805869077, + "grad_norm": 17.05726432800293, + "learning_rate": 6.336677396821663e-06, + "lm_loss": 5.505, + "loss": 1.6649, + "step": 946, + "text_contrastive_loss": 0.9813 + }, + { + "contrastive_loss": 0.4916, + "epoch": 2.1376975169300225, + "grad_norm": 13.684940338134766, + "learning_rate": 6.3296846531138445e-06, + "lm_loss": 5.6108, + "loss": 1.4635, + "step": 947, + "text_contrastive_loss": 0.8216 + }, + { + "contrastive_loss": 0.5568, + "epoch": 2.139954853273138, + "grad_norm": 14.160411834716797, + "learning_rate": 6.322689109570472e-06, + "lm_loss": 5.6072, + "loss": 1.5434, + "step": 948, + "text_contrastive_loss": 0.8518 + }, + { + "contrastive_loss": 0.6086, + "epoch": 2.1422121896162527, + "grad_norm": 17.340606689453125, + "learning_rate": 6.315690780921634e-06, + "lm_loss": 5.5506, + "loss": 1.58, + "step": 949, + "text_contrastive_loss": 0.8325 + }, + { + "contrastive_loss": 0.541, + "epoch": 2.144469525959368, + "grad_norm": 14.390788078308105, + "learning_rate": 6.3086896819032814e-06, + "lm_loss": 5.6194, + "loss": 1.4609, + "step": 950, + "text_contrastive_loss": 0.716 + }, + { + "contrastive_loss": 0.5307, + "epoch": 2.146726862302483, + "grad_norm": 16.99388885498047, + "learning_rate": 6.301685827257202e-06, + "lm_loss": 5.6178, + "loss": 1.5383, + "step": 951, + "text_contrastive_loss": 0.8917 + }, + { + "contrastive_loss": 0.5455, + "epoch": 2.148984198645598, + "grad_norm": 14.5534029006958, + "learning_rate": 6.294679231730983e-06, + "lm_loss": 5.4725, + "loss": 1.5116, + "step": 952, + "text_contrastive_loss": 0.8377 + }, + { + "contrastive_loss": 0.593, + "epoch": 2.1512415349887135, + "grad_norm": 16.284915924072266, + "learning_rate": 6.2876699100779815e-06, + "lm_loss": 5.5311, + "loss": 1.5691, + "step": 953, + "text_contrastive_loss": 0.8461 + }, + { + "contrastive_loss": 0.5442, + "epoch": 2.1534988713318284, + "grad_norm": 14.362798690795898, + "learning_rate": 6.2806578770573e-06, + "lm_loss": 5.568, + "loss": 1.4761, + "step": 954, + "text_contrastive_loss": 0.7503 + }, + { + "contrastive_loss": 0.4771, + "epoch": 2.1557562076749437, + "grad_norm": 14.400273323059082, + "learning_rate": 6.273643147433743e-06, + "lm_loss": 5.6257, + "loss": 1.4969, + "step": 955, + "text_contrastive_loss": 0.9144 + }, + { + "contrastive_loss": 0.4491, + "epoch": 2.1580135440180586, + "grad_norm": 14.197193145751953, + "learning_rate": 6.266625735977802e-06, + "lm_loss": 5.5312, + "loss": 1.3221, + "step": 956, + "text_contrastive_loss": 0.6398 + }, + { + "contrastive_loss": 0.5357, + "epoch": 2.160270880361174, + "grad_norm": 14.38227367401123, + "learning_rate": 6.259605657465607e-06, + "lm_loss": 5.6018, + "loss": 1.5223, + "step": 957, + "text_contrastive_loss": 0.8529 + }, + { + "contrastive_loss": 0.5237, + "epoch": 2.1625282167042887, + "grad_norm": 13.620275497436523, + "learning_rate": 6.252582926678908e-06, + "lm_loss": 5.611, + "loss": 1.4757, + "step": 958, + "text_contrastive_loss": 0.7819 + }, + { + "contrastive_loss": 0.5603, + "epoch": 2.164785553047404, + "grad_norm": 16.51104164123535, + "learning_rate": 6.24555755840504e-06, + "lm_loss": 5.759, + "loss": 1.5847, + "step": 959, + "text_contrastive_loss": 0.897 + }, + { + "contrastive_loss": 0.5889, + "epoch": 2.1670428893905194, + "grad_norm": 15.760680198669434, + "learning_rate": 6.238529567436892e-06, + "lm_loss": 5.528, + "loss": 1.5646, + "step": 960, + "text_contrastive_loss": 0.8456 + }, + { + "contrastive_loss": 0.5447, + "epoch": 2.1693002257336342, + "grad_norm": 15.119182586669922, + "learning_rate": 6.231498968572872e-06, + "lm_loss": 5.5281, + "loss": 1.5874, + "step": 961, + "text_contrastive_loss": 0.9797 + }, + { + "contrastive_loss": 0.5813, + "epoch": 2.1715575620767495, + "grad_norm": 15.867561340332031, + "learning_rate": 6.224465776616883e-06, + "lm_loss": 5.5412, + "loss": 1.5815, + "step": 962, + "text_contrastive_loss": 0.8922 + }, + { + "contrastive_loss": 0.6029, + "epoch": 2.1738148984198644, + "grad_norm": 16.155677795410156, + "learning_rate": 6.217430006378285e-06, + "lm_loss": 5.6498, + "loss": 1.6473, + "step": 963, + "text_contrastive_loss": 0.9588 + }, + { + "contrastive_loss": 0.4913, + "epoch": 2.1760722347629797, + "grad_norm": 14.698906898498535, + "learning_rate": 6.210391672671869e-06, + "lm_loss": 5.4799, + "loss": 1.3923, + "step": 964, + "text_contrastive_loss": 0.7059 + }, + { + "contrastive_loss": 0.5443, + "epoch": 2.1783295711060946, + "grad_norm": 15.556617736816406, + "learning_rate": 6.203350790317825e-06, + "lm_loss": 5.6059, + "loss": 1.5384, + "step": 965, + "text_contrastive_loss": 0.8669 + }, + { + "contrastive_loss": 0.6068, + "epoch": 2.18058690744921, + "grad_norm": 15.900672912597656, + "learning_rate": 6.196307374141707e-06, + "lm_loss": 5.475, + "loss": 1.6357, + "step": 966, + "text_contrastive_loss": 0.9627 + }, + { + "contrastive_loss": 0.625, + "epoch": 2.1828442437923252, + "grad_norm": 17.61025047302246, + "learning_rate": 6.189261438974403e-06, + "lm_loss": 5.4916, + "loss": 1.5829, + "step": 967, + "text_contrastive_loss": 0.8175 + }, + { + "contrastive_loss": 0.5096, + "epoch": 2.18510158013544, + "grad_norm": 15.51711654663086, + "learning_rate": 6.1822129996521105e-06, + "lm_loss": 5.6518, + "loss": 1.449, + "step": 968, + "text_contrastive_loss": 0.7483 + }, + { + "contrastive_loss": 0.6001, + "epoch": 2.1873589164785554, + "grad_norm": 16.458173751831055, + "learning_rate": 6.175162071016295e-06, + "lm_loss": 5.6193, + "loss": 1.6752, + "step": 969, + "text_contrastive_loss": 1.0262 + }, + { + "contrastive_loss": 0.619, + "epoch": 2.1896162528216703, + "grad_norm": 16.29840087890625, + "learning_rate": 6.168108667913666e-06, + "lm_loss": 5.5032, + "loss": 1.594, + "step": 970, + "text_contrastive_loss": 0.8493 + }, + { + "contrastive_loss": 0.5649, + "epoch": 2.1918735891647856, + "grad_norm": 14.947978019714355, + "learning_rate": 6.161052805196141e-06, + "lm_loss": 5.5425, + "loss": 1.5648, + "step": 971, + "text_contrastive_loss": 0.8913 + }, + { + "contrastive_loss": 0.4871, + "epoch": 2.194130925507901, + "grad_norm": 13.491371154785156, + "learning_rate": 6.15399449772082e-06, + "lm_loss": 5.5075, + "loss": 1.4412, + "step": 972, + "text_contrastive_loss": 0.8069 + }, + { + "contrastive_loss": 0.4715, + "epoch": 2.1963882618510158, + "grad_norm": 15.550296783447266, + "learning_rate": 6.146933760349947e-06, + "lm_loss": 5.59, + "loss": 1.346, + "step": 973, + "text_contrastive_loss": 0.6311 + }, + { + "contrastive_loss": 0.5665, + "epoch": 2.198645598194131, + "grad_norm": 15.592700004577637, + "learning_rate": 6.139870607950885e-06, + "lm_loss": 5.5149, + "loss": 1.576, + "step": 974, + "text_contrastive_loss": 0.9159 + }, + { + "contrastive_loss": 0.5757, + "epoch": 2.200902934537246, + "grad_norm": 15.696521759033203, + "learning_rate": 6.1328050553960804e-06, + "lm_loss": 5.6159, + "loss": 1.5487, + "step": 975, + "text_contrastive_loss": 0.8227 + }, + { + "contrastive_loss": 0.5798, + "epoch": 2.2031602708803613, + "grad_norm": 17.08321189880371, + "learning_rate": 6.1257371175630375e-06, + "lm_loss": 5.6681, + "loss": 1.5939, + "step": 976, + "text_contrastive_loss": 0.8946 + }, + { + "contrastive_loss": 0.6711, + "epoch": 2.205417607223476, + "grad_norm": 16.547780990600586, + "learning_rate": 6.118666809334277e-06, + "lm_loss": 5.5481, + "loss": 1.7015, + "step": 977, + "text_contrastive_loss": 0.9512 + }, + { + "contrastive_loss": 0.5117, + "epoch": 2.2076749435665914, + "grad_norm": 14.462315559387207, + "learning_rate": 6.111594145597319e-06, + "lm_loss": 5.4319, + "loss": 1.4326, + "step": 978, + "text_contrastive_loss": 0.7555 + }, + { + "contrastive_loss": 0.5518, + "epoch": 2.2099322799097068, + "grad_norm": 13.699687957763672, + "learning_rate": 6.104519141244631e-06, + "lm_loss": 5.414, + "loss": 1.5038, + "step": 979, + "text_contrastive_loss": 0.8212 + }, + { + "contrastive_loss": 0.5758, + "epoch": 2.2121896162528216, + "grad_norm": 15.209568977355957, + "learning_rate": 6.0974418111736235e-06, + "lm_loss": 5.5458, + "loss": 1.5145, + "step": 980, + "text_contrastive_loss": 0.7682 + }, + { + "contrastive_loss": 0.5101, + "epoch": 2.214446952595937, + "grad_norm": 15.351068496704102, + "learning_rate": 6.090362170286591e-06, + "lm_loss": 5.5988, + "loss": 1.4326, + "step": 981, + "text_contrastive_loss": 0.7253 + }, + { + "contrastive_loss": 0.6009, + "epoch": 2.216704288939052, + "grad_norm": 16.76323699951172, + "learning_rate": 6.0832802334907044e-06, + "lm_loss": 5.5892, + "loss": 1.6298, + "step": 982, + "text_contrastive_loss": 0.94 + }, + { + "contrastive_loss": 0.4723, + "epoch": 2.218961625282167, + "grad_norm": 15.08696174621582, + "learning_rate": 6.076196015697963e-06, + "lm_loss": 5.5802, + "loss": 1.4103, + "step": 983, + "text_contrastive_loss": 0.7599 + }, + { + "contrastive_loss": 0.4897, + "epoch": 2.221218961625282, + "grad_norm": 15.753348350524902, + "learning_rate": 6.069109531825169e-06, + "lm_loss": 5.7111, + "loss": 1.4806, + "step": 984, + "text_contrastive_loss": 0.8395 + }, + { + "contrastive_loss": 0.4869, + "epoch": 2.2234762979683973, + "grad_norm": 15.647372245788574, + "learning_rate": 6.0620207967939e-06, + "lm_loss": 5.5092, + "loss": 1.4976, + "step": 985, + "text_contrastive_loss": 0.9195 + }, + { + "contrastive_loss": 0.5674, + "epoch": 2.2257336343115126, + "grad_norm": 16.478822708129883, + "learning_rate": 6.054929825530469e-06, + "lm_loss": 5.5507, + "loss": 1.5417, + "step": 986, + "text_contrastive_loss": 0.8385 + }, + { + "contrastive_loss": 0.4659, + "epoch": 2.2279909706546275, + "grad_norm": 14.923596382141113, + "learning_rate": 6.047836632965901e-06, + "lm_loss": 5.6204, + "loss": 1.4151, + "step": 987, + "text_contrastive_loss": 0.7743 + }, + { + "contrastive_loss": 0.6934, + "epoch": 2.230248306997743, + "grad_norm": 17.369243621826172, + "learning_rate": 6.040741234035898e-06, + "lm_loss": 5.6445, + "loss": 1.7237, + "step": 988, + "text_contrastive_loss": 0.9319 + }, + { + "contrastive_loss": 0.5848, + "epoch": 2.2325056433408577, + "grad_norm": 18.03710174560547, + "learning_rate": 6.0336436436808054e-06, + "lm_loss": 5.642, + "loss": 1.5677, + "step": 989, + "text_contrastive_loss": 0.8374 + }, + { + "contrastive_loss": 0.5226, + "epoch": 2.234762979683973, + "grad_norm": 14.763822555541992, + "learning_rate": 6.026543876845586e-06, + "lm_loss": 5.6278, + "loss": 1.5478, + "step": 990, + "text_contrastive_loss": 0.9248 + }, + { + "contrastive_loss": 0.5392, + "epoch": 2.237020316027088, + "grad_norm": 15.49229621887207, + "learning_rate": 6.019441948479784e-06, + "lm_loss": 5.5376, + "loss": 1.5442, + "step": 991, + "text_contrastive_loss": 0.9026 + }, + { + "contrastive_loss": 0.5638, + "epoch": 2.239277652370203, + "grad_norm": 15.575782775878906, + "learning_rate": 6.012337873537494e-06, + "lm_loss": 5.5279, + "loss": 1.4383, + "step": 992, + "text_contrastive_loss": 0.6434 + }, + { + "contrastive_loss": 0.5159, + "epoch": 2.2415349887133185, + "grad_norm": 16.054767608642578, + "learning_rate": 6.005231666977331e-06, + "lm_loss": 5.5313, + "loss": 1.4993, + "step": 993, + "text_contrastive_loss": 0.8606 + }, + { + "contrastive_loss": 0.6831, + "epoch": 2.2437923250564333, + "grad_norm": 18.615949630737305, + "learning_rate": 5.998123343762403e-06, + "lm_loss": 5.6966, + "loss": 1.6825, + "step": 994, + "text_contrastive_loss": 0.8595 + }, + { + "contrastive_loss": 0.4579, + "epoch": 2.2460496613995486, + "grad_norm": 14.217131614685059, + "learning_rate": 5.9910129188602665e-06, + "lm_loss": 5.6927, + "loss": 1.4118, + "step": 995, + "text_contrastive_loss": 0.7694 + }, + { + "contrastive_loss": 0.4881, + "epoch": 2.2483069977426635, + "grad_norm": 13.442610740661621, + "learning_rate": 5.983900407242911e-06, + "lm_loss": 5.5797, + "loss": 1.4403, + "step": 996, + "text_contrastive_loss": 0.7884 + }, + { + "contrastive_loss": 0.5378, + "epoch": 2.250564334085779, + "grad_norm": 16.705442428588867, + "learning_rate": 5.976785823886713e-06, + "lm_loss": 5.4976, + "loss": 1.5061, + "step": 997, + "text_contrastive_loss": 0.8372 + }, + { + "contrastive_loss": 0.5148, + "epoch": 2.2528216704288937, + "grad_norm": 14.701661109924316, + "learning_rate": 5.96966918377242e-06, + "lm_loss": 5.3676, + "loss": 1.4534, + "step": 998, + "text_contrastive_loss": 0.8037 + }, + { + "contrastive_loss": 0.5229, + "epoch": 2.255079006772009, + "grad_norm": 16.033830642700195, + "learning_rate": 5.9625505018851e-06, + "lm_loss": 5.4788, + "loss": 1.4674, + "step": 999, + "text_contrastive_loss": 0.7932 + }, + { + "contrastive_loss": 0.5061, + "epoch": 2.2573363431151243, + "grad_norm": 15.547621726989746, + "learning_rate": 5.955429793214129e-06, + "lm_loss": 5.5908, + "loss": 1.454, + "step": 1000, + "text_contrastive_loss": 0.7777 + }, + { + "contrastive_loss": 0.6188, + "epoch": 2.259593679458239, + "grad_norm": 18.8467960357666, + "learning_rate": 5.948307072753146e-06, + "lm_loss": 5.5699, + "loss": 1.5722, + "step": 1001, + "text_contrastive_loss": 0.7927 + }, + { + "contrastive_loss": 0.5788, + "epoch": 2.2618510158013545, + "grad_norm": 14.916223526000977, + "learning_rate": 5.941182355500028e-06, + "lm_loss": 5.6379, + "loss": 1.5869, + "step": 1002, + "text_contrastive_loss": 0.8886 + }, + { + "contrastive_loss": 0.4433, + "epoch": 2.2641083521444694, + "grad_norm": 14.932116508483887, + "learning_rate": 5.934055656456855e-06, + "lm_loss": 5.5532, + "loss": 1.4275, + "step": 1003, + "text_contrastive_loss": 0.8576 + }, + { + "contrastive_loss": 0.4633, + "epoch": 2.2663656884875847, + "grad_norm": 13.377215385437012, + "learning_rate": 5.926926990629883e-06, + "lm_loss": 5.4663, + "loss": 1.3174, + "step": 1004, + "text_contrastive_loss": 0.6149 + }, + { + "contrastive_loss": 0.5241, + "epoch": 2.2686230248307, + "grad_norm": 14.37695598602295, + "learning_rate": 5.919796373029504e-06, + "lm_loss": 5.5585, + "loss": 1.5059, + "step": 1005, + "text_contrastive_loss": 0.8521 + }, + { + "contrastive_loss": 0.4805, + "epoch": 2.270880361173815, + "grad_norm": 15.076510429382324, + "learning_rate": 5.912663818670224e-06, + "lm_loss": 5.5408, + "loss": 1.4197, + "step": 1006, + "text_contrastive_loss": 0.7702 + }, + { + "contrastive_loss": 0.4996, + "epoch": 2.27313769751693, + "grad_norm": 14.953193664550781, + "learning_rate": 5.905529342570627e-06, + "lm_loss": 5.5402, + "loss": 1.429, + "step": 1007, + "text_contrastive_loss": 0.7507 + }, + { + "contrastive_loss": 0.4851, + "epoch": 2.275395033860045, + "grad_norm": 15.045732498168945, + "learning_rate": 5.898392959753343e-06, + "lm_loss": 5.569, + "loss": 1.4581, + "step": 1008, + "text_contrastive_loss": 0.8322 + }, + { + "contrastive_loss": 0.6655, + "epoch": 2.2776523702031604, + "grad_norm": 17.517780303955078, + "learning_rate": 5.8912546852450116e-06, + "lm_loss": 5.5835, + "loss": 1.655, + "step": 1009, + "text_contrastive_loss": 0.8622 + }, + { + "contrastive_loss": 0.4815, + "epoch": 2.2799097065462752, + "grad_norm": 15.604001998901367, + "learning_rate": 5.8841145340762665e-06, + "lm_loss": 5.5648, + "loss": 1.4179, + "step": 1010, + "text_contrastive_loss": 0.7598 + }, + { + "contrastive_loss": 0.447, + "epoch": 2.2821670428893905, + "grad_norm": 14.039140701293945, + "learning_rate": 5.876972521281683e-06, + "lm_loss": 5.4931, + "loss": 1.3804, + "step": 1011, + "text_contrastive_loss": 0.7682 + }, + { + "contrastive_loss": 0.485, + "epoch": 2.2844243792325054, + "grad_norm": 14.94192886352539, + "learning_rate": 5.869828661899761e-06, + "lm_loss": 5.6206, + "loss": 1.4788, + "step": 1012, + "text_contrastive_loss": 0.8635 + }, + { + "contrastive_loss": 0.4348, + "epoch": 2.2866817155756207, + "grad_norm": 13.55521297454834, + "learning_rate": 5.862682970972888e-06, + "lm_loss": 5.5266, + "loss": 1.35, + "step": 1013, + "text_contrastive_loss": 0.725 + }, + { + "contrastive_loss": 0.572, + "epoch": 2.288939051918736, + "grad_norm": 16.7547664642334, + "learning_rate": 5.855535463547309e-06, + "lm_loss": 5.4203, + "loss": 1.5671, + "step": 1014, + "text_contrastive_loss": 0.9061 + }, + { + "contrastive_loss": 0.579, + "epoch": 2.291196388261851, + "grad_norm": 16.843368530273438, + "learning_rate": 5.8483861546730915e-06, + "lm_loss": 5.5052, + "loss": 1.5681, + "step": 1015, + "text_contrastive_loss": 0.8772 + }, + { + "contrastive_loss": 0.5231, + "epoch": 2.293453724604966, + "grad_norm": 16.329423904418945, + "learning_rate": 5.841235059404097e-06, + "lm_loss": 5.4309, + "loss": 1.473, + "step": 1016, + "text_contrastive_loss": 0.8135 + }, + { + "contrastive_loss": 0.4935, + "epoch": 2.295711060948081, + "grad_norm": 16.164878845214844, + "learning_rate": 5.834082192797948e-06, + "lm_loss": 5.5583, + "loss": 1.4542, + "step": 1017, + "text_contrastive_loss": 0.8098 + }, + { + "contrastive_loss": 0.6854, + "epoch": 2.2979683972911964, + "grad_norm": 16.56847381591797, + "learning_rate": 5.826927569915999e-06, + "lm_loss": 5.5319, + "loss": 1.7242, + "step": 1018, + "text_contrastive_loss": 0.9713 + }, + { + "contrastive_loss": 0.58, + "epoch": 2.3002257336343117, + "grad_norm": 16.325702667236328, + "learning_rate": 5.819771205823303e-06, + "lm_loss": 5.5061, + "loss": 1.6016, + "step": 1019, + "text_contrastive_loss": 0.9418 + }, + { + "contrastive_loss": 0.5142, + "epoch": 2.3024830699774266, + "grad_norm": 17.441755294799805, + "learning_rate": 5.812613115588575e-06, + "lm_loss": 5.5065, + "loss": 1.4703, + "step": 1020, + "text_contrastive_loss": 0.8108 + }, + { + "contrastive_loss": 0.447, + "epoch": 2.304740406320542, + "grad_norm": 15.045120239257812, + "learning_rate": 5.805453314284168e-06, + "lm_loss": 5.6466, + "loss": 1.3683, + "step": 1021, + "text_contrastive_loss": 0.7133 + }, + { + "contrastive_loss": 0.5044, + "epoch": 2.3069977426636568, + "grad_norm": 15.242061614990234, + "learning_rate": 5.7982918169860395e-06, + "lm_loss": 5.4769, + "loss": 1.4511, + "step": 1022, + "text_contrastive_loss": 0.798 + }, + { + "contrastive_loss": 0.4894, + "epoch": 2.309255079006772, + "grad_norm": 15.019240379333496, + "learning_rate": 5.791128638773711e-06, + "lm_loss": 5.4992, + "loss": 1.4205, + "step": 1023, + "text_contrastive_loss": 0.7624 + }, + { + "contrastive_loss": 0.4916, + "epoch": 2.311512415349887, + "grad_norm": 14.840372085571289, + "learning_rate": 5.783963794730254e-06, + "lm_loss": 5.5569, + "loss": 1.3969, + "step": 1024, + "text_contrastive_loss": 0.6992 + }, + { + "contrastive_loss": 0.56, + "epoch": 2.3137697516930023, + "grad_norm": 15.438685417175293, + "learning_rate": 5.776797299942236e-06, + "lm_loss": 5.6762, + "loss": 1.5331, + "step": 1025, + "text_contrastive_loss": 0.8109 + }, + { + "contrastive_loss": 0.5842, + "epoch": 2.3160270880361176, + "grad_norm": 16.042137145996094, + "learning_rate": 5.7696291694997105e-06, + "lm_loss": 5.5272, + "loss": 1.5411, + "step": 1026, + "text_contrastive_loss": 0.8084 + }, + { + "contrastive_loss": 0.6225, + "epoch": 2.3182844243792324, + "grad_norm": 17.81473731994629, + "learning_rate": 5.762459418496169e-06, + "lm_loss": 5.535, + "loss": 1.6085, + "step": 1027, + "text_contrastive_loss": 0.8649 + }, + { + "contrastive_loss": 0.5751, + "epoch": 2.3205417607223477, + "grad_norm": 16.053951263427734, + "learning_rate": 5.755288062028519e-06, + "lm_loss": 5.5689, + "loss": 1.5817, + "step": 1028, + "text_contrastive_loss": 0.8994 + }, + { + "contrastive_loss": 0.442, + "epoch": 2.3227990970654626, + "grad_norm": 12.914002418518066, + "learning_rate": 5.748115115197045e-06, + "lm_loss": 5.5298, + "loss": 1.3506, + "step": 1029, + "text_contrastive_loss": 0.7112 + }, + { + "contrastive_loss": 0.5847, + "epoch": 2.325056433408578, + "grad_norm": 15.449382781982422, + "learning_rate": 5.740940593105383e-06, + "lm_loss": 5.4938, + "loss": 1.4823, + "step": 1030, + "text_contrastive_loss": 0.6964 + }, + { + "contrastive_loss": 0.5351, + "epoch": 2.327313769751693, + "grad_norm": 15.321664810180664, + "learning_rate": 5.733764510860482e-06, + "lm_loss": 5.5159, + "loss": 1.4961, + "step": 1031, + "text_contrastive_loss": 0.8188 + }, + { + "contrastive_loss": 0.4763, + "epoch": 2.329571106094808, + "grad_norm": 15.322822570800781, + "learning_rate": 5.726586883572584e-06, + "lm_loss": 5.5235, + "loss": 1.4653, + "step": 1032, + "text_contrastive_loss": 0.8732 + }, + { + "contrastive_loss": 0.5442, + "epoch": 2.3318284424379234, + "grad_norm": 15.321481704711914, + "learning_rate": 5.719407726355174e-06, + "lm_loss": 5.4556, + "loss": 1.4708, + "step": 1033, + "text_contrastive_loss": 0.7621 + }, + { + "contrastive_loss": 0.5535, + "epoch": 2.3340857787810383, + "grad_norm": 17.706205368041992, + "learning_rate": 5.712227054324968e-06, + "lm_loss": 5.475, + "loss": 1.5449, + "step": 1034, + "text_contrastive_loss": 0.8878 + }, + { + "contrastive_loss": 0.5824, + "epoch": 2.3363431151241536, + "grad_norm": 15.88497543334961, + "learning_rate": 5.705044882601862e-06, + "lm_loss": 5.5598, + "loss": 1.5435, + "step": 1035, + "text_contrastive_loss": 0.8102 + }, + { + "contrastive_loss": 0.5771, + "epoch": 2.3386004514672685, + "grad_norm": 16.463577270507812, + "learning_rate": 5.697861226308923e-06, + "lm_loss": 5.6166, + "loss": 1.6027, + "step": 1036, + "text_contrastive_loss": 0.9279 + }, + { + "contrastive_loss": 0.6645, + "epoch": 2.340857787810384, + "grad_norm": 15.305360794067383, + "learning_rate": 5.69067610057233e-06, + "lm_loss": 5.4175, + "loss": 1.6381, + "step": 1037, + "text_contrastive_loss": 0.8637 + }, + { + "contrastive_loss": 0.5334, + "epoch": 2.343115124153499, + "grad_norm": 14.267290115356445, + "learning_rate": 5.683489520521365e-06, + "lm_loss": 5.6242, + "loss": 1.509, + "step": 1038, + "text_contrastive_loss": 0.8262 + }, + { + "contrastive_loss": 0.6476, + "epoch": 2.345372460496614, + "grad_norm": 16.81133460998535, + "learning_rate": 5.6763015012883686e-06, + "lm_loss": 5.4711, + "loss": 1.6709, + "step": 1039, + "text_contrastive_loss": 0.9524 + }, + { + "contrastive_loss": 0.5176, + "epoch": 2.3476297968397293, + "grad_norm": 13.270197868347168, + "learning_rate": 5.6691120580087126e-06, + "lm_loss": 5.5653, + "loss": 1.3955, + "step": 1040, + "text_contrastive_loss": 0.6428 + }, + { + "contrastive_loss": 0.5129, + "epoch": 2.349887133182844, + "grad_norm": 12.754667282104492, + "learning_rate": 5.661921205820767e-06, + "lm_loss": 5.6232, + "loss": 1.4187, + "step": 1041, + "text_contrastive_loss": 0.6868 + }, + { + "contrastive_loss": 0.5021, + "epoch": 2.3521444695259595, + "grad_norm": 14.27110767364502, + "learning_rate": 5.654728959865872e-06, + "lm_loss": 5.4533, + "loss": 1.4623, + "step": 1042, + "text_contrastive_loss": 0.8298 + }, + { + "contrastive_loss": 0.5154, + "epoch": 2.3544018058690743, + "grad_norm": 15.107547760009766, + "learning_rate": 5.647535335288296e-06, + "lm_loss": 5.6959, + "loss": 1.5089, + "step": 1043, + "text_contrastive_loss": 0.8478 + }, + { + "contrastive_loss": 0.4731, + "epoch": 2.3566591422121896, + "grad_norm": 13.888667106628418, + "learning_rate": 5.640340347235215e-06, + "lm_loss": 5.703, + "loss": 1.4613, + "step": 1044, + "text_contrastive_loss": 0.8357 + }, + { + "contrastive_loss": 0.6407, + "epoch": 2.3589164785553045, + "grad_norm": 16.244823455810547, + "learning_rate": 5.6331440108566735e-06, + "lm_loss": 5.7367, + "loss": 1.6674, + "step": 1045, + "text_contrastive_loss": 0.906 + }, + { + "contrastive_loss": 0.5418, + "epoch": 2.36117381489842, + "grad_norm": 16.122529983520508, + "learning_rate": 5.6259463413055604e-06, + "lm_loss": 5.4931, + "loss": 1.5393, + "step": 1046, + "text_contrastive_loss": 0.8965 + }, + { + "contrastive_loss": 0.4846, + "epoch": 2.363431151241535, + "grad_norm": 13.905860900878906, + "learning_rate": 5.6187473537375635e-06, + "lm_loss": 5.6189, + "loss": 1.4072, + "step": 1047, + "text_contrastive_loss": 0.7214 + }, + { + "contrastive_loss": 0.4365, + "epoch": 2.36568848758465, + "grad_norm": 15.10843563079834, + "learning_rate": 5.611547063311152e-06, + "lm_loss": 5.5181, + "loss": 1.3854, + "step": 1048, + "text_contrastive_loss": 0.7942 + }, + { + "contrastive_loss": 0.6531, + "epoch": 2.3679458239277653, + "grad_norm": 15.9534912109375, + "learning_rate": 5.604345485187535e-06, + "lm_loss": 5.5287, + "loss": 1.6077, + "step": 1049, + "text_contrastive_loss": 0.8036 + }, + { + "contrastive_loss": 0.5596, + "epoch": 2.37020316027088, + "grad_norm": 15.557417869567871, + "learning_rate": 5.597142634530639e-06, + "lm_loss": 5.4709, + "loss": 1.5331, + "step": 1050, + "text_contrastive_loss": 0.8528 + }, + { + "contrastive_loss": 0.4985, + "epoch": 2.3724604966139955, + "grad_norm": 14.158095359802246, + "learning_rate": 5.589938526507059e-06, + "lm_loss": 5.6086, + "loss": 1.4683, + "step": 1051, + "text_contrastive_loss": 0.818 + }, + { + "contrastive_loss": 0.5393, + "epoch": 2.374717832957111, + "grad_norm": 16.066301345825195, + "learning_rate": 5.582733176286048e-06, + "lm_loss": 5.5186, + "loss": 1.498, + "step": 1052, + "text_contrastive_loss": 0.8138 + }, + { + "contrastive_loss": 0.4833, + "epoch": 2.3769751693002257, + "grad_norm": 14.408507347106934, + "learning_rate": 5.575526599039472e-06, + "lm_loss": 5.4066, + "loss": 1.4347, + "step": 1053, + "text_contrastive_loss": 0.8216 + }, + { + "contrastive_loss": 0.5148, + "epoch": 2.379232505643341, + "grad_norm": 14.628676414489746, + "learning_rate": 5.568318809941777e-06, + "lm_loss": 5.578, + "loss": 1.4936, + "step": 1054, + "text_contrastive_loss": 0.8419 + }, + { + "contrastive_loss": 0.4618, + "epoch": 2.381489841986456, + "grad_norm": 13.120189666748047, + "learning_rate": 5.561109824169962e-06, + "lm_loss": 5.4411, + "loss": 1.3932, + "step": 1055, + "text_contrastive_loss": 0.7746 + }, + { + "contrastive_loss": 0.4568, + "epoch": 2.383747178329571, + "grad_norm": 14.947108268737793, + "learning_rate": 5.553899656903552e-06, + "lm_loss": 5.5084, + "loss": 1.4191, + "step": 1056, + "text_contrastive_loss": 0.8229 + }, + { + "contrastive_loss": 0.5233, + "epoch": 2.386004514672686, + "grad_norm": 14.998693466186523, + "learning_rate": 5.546688323324548e-06, + "lm_loss": 5.4731, + "loss": 1.5457, + "step": 1057, + "text_contrastive_loss": 0.9501 + }, + { + "contrastive_loss": 0.506, + "epoch": 2.3882618510158014, + "grad_norm": 14.649811744689941, + "learning_rate": 5.53947583861742e-06, + "lm_loss": 5.4863, + "loss": 1.4666, + "step": 1058, + "text_contrastive_loss": 0.824 + }, + { + "contrastive_loss": 0.589, + "epoch": 2.3905191873589167, + "grad_norm": 14.989198684692383, + "learning_rate": 5.5322622179690514e-06, + "lm_loss": 5.5289, + "loss": 1.4968, + "step": 1059, + "text_contrastive_loss": 0.7098 + }, + { + "contrastive_loss": 0.4902, + "epoch": 2.3927765237020315, + "grad_norm": 14.715935707092285, + "learning_rate": 5.525047476568722e-06, + "lm_loss": 5.471, + "loss": 1.4134, + "step": 1060, + "text_contrastive_loss": 0.7523 + }, + { + "contrastive_loss": 0.4415, + "epoch": 2.395033860045147, + "grad_norm": 12.693151473999023, + "learning_rate": 5.51783162960807e-06, + "lm_loss": 5.4789, + "loss": 1.2823, + "step": 1061, + "text_contrastive_loss": 0.586 + }, + { + "contrastive_loss": 0.531, + "epoch": 2.3972911963882617, + "grad_norm": 16.371997833251953, + "learning_rate": 5.5106146922810664e-06, + "lm_loss": 5.5396, + "loss": 1.5192, + "step": 1062, + "text_contrastive_loss": 0.8684 + }, + { + "contrastive_loss": 0.583, + "epoch": 2.399548532731377, + "grad_norm": 17.31743049621582, + "learning_rate": 5.50339667978397e-06, + "lm_loss": 5.5777, + "loss": 1.6913, + "step": 1063, + "text_contrastive_loss": 1.1012 + }, + { + "contrastive_loss": 0.695, + "epoch": 2.401805869074492, + "grad_norm": 18.16950798034668, + "learning_rate": 5.496177607315312e-06, + "lm_loss": 5.6135, + "loss": 1.6956, + "step": 1064, + "text_contrastive_loss": 0.8784 + }, + { + "contrastive_loss": 0.5599, + "epoch": 2.404063205417607, + "grad_norm": 15.148139953613281, + "learning_rate": 5.488957490075846e-06, + "lm_loss": 5.5422, + "loss": 1.4928, + "step": 1065, + "text_contrastive_loss": 0.7573 + }, + { + "contrastive_loss": 0.5054, + "epoch": 2.4063205417607225, + "grad_norm": 14.579808235168457, + "learning_rate": 5.4817363432685355e-06, + "lm_loss": 5.5686, + "loss": 1.5219, + "step": 1066, + "text_contrastive_loss": 0.9192 + }, + { + "contrastive_loss": 0.5734, + "epoch": 2.4085778781038374, + "grad_norm": 15.381420135498047, + "learning_rate": 5.474514182098504e-06, + "lm_loss": 5.5125, + "loss": 1.537, + "step": 1067, + "text_contrastive_loss": 0.8247 + }, + { + "contrastive_loss": 0.636, + "epoch": 2.4108352144469527, + "grad_norm": 15.787850379943848, + "learning_rate": 5.4672910217730155e-06, + "lm_loss": 5.5951, + "loss": 1.6146, + "step": 1068, + "text_contrastive_loss": 0.8382 + }, + { + "contrastive_loss": 0.4333, + "epoch": 2.4130925507900676, + "grad_norm": 14.761796951293945, + "learning_rate": 5.4600668775014355e-06, + "lm_loss": 5.6594, + "loss": 1.3938, + "step": 1069, + "text_contrastive_loss": 0.7891 + }, + { + "contrastive_loss": 0.4984, + "epoch": 2.415349887133183, + "grad_norm": 13.197561264038086, + "learning_rate": 5.452841764495203e-06, + "lm_loss": 5.4475, + "loss": 1.4568, + "step": 1070, + "text_contrastive_loss": 0.8273 + }, + { + "contrastive_loss": 0.4797, + "epoch": 2.417607223476298, + "grad_norm": 15.631025314331055, + "learning_rate": 5.445615697967797e-06, + "lm_loss": 5.3548, + "loss": 1.3307, + "step": 1071, + "text_contrastive_loss": 0.631 + }, + { + "contrastive_loss": 0.791, + "epoch": 2.419864559819413, + "grad_norm": 17.64402961730957, + "learning_rate": 5.438388693134702e-06, + "lm_loss": 5.5563, + "loss": 1.8377, + "step": 1072, + "text_contrastive_loss": 0.9822 + }, + { + "contrastive_loss": 0.5658, + "epoch": 2.4221218961625284, + "grad_norm": 13.911042213439941, + "learning_rate": 5.431160765213379e-06, + "lm_loss": 5.4367, + "loss": 1.5188, + "step": 1073, + "text_contrastive_loss": 0.8188 + }, + { + "contrastive_loss": 0.5071, + "epoch": 2.4243792325056432, + "grad_norm": 14.111321449279785, + "learning_rate": 5.423931929423235e-06, + "lm_loss": 5.4104, + "loss": 1.4533, + "step": 1074, + "text_contrastive_loss": 0.8102 + }, + { + "contrastive_loss": 0.5081, + "epoch": 2.4266365688487586, + "grad_norm": 13.774117469787598, + "learning_rate": 5.416702200985585e-06, + "lm_loss": 5.5223, + "loss": 1.4333, + "step": 1075, + "text_contrastive_loss": 0.746 + }, + { + "contrastive_loss": 0.6259, + "epoch": 2.4288939051918734, + "grad_norm": 15.925483703613281, + "learning_rate": 5.409471595123628e-06, + "lm_loss": 5.4057, + "loss": 1.5579, + "step": 1076, + "text_contrastive_loss": 0.7829 + }, + { + "contrastive_loss": 0.5815, + "epoch": 2.4311512415349887, + "grad_norm": 15.957097053527832, + "learning_rate": 5.4022401270624036e-06, + "lm_loss": 5.5097, + "loss": 1.521, + "step": 1077, + "text_contrastive_loss": 0.7769 + }, + { + "contrastive_loss": 0.5654, + "epoch": 2.4334085778781036, + "grad_norm": 14.975671768188477, + "learning_rate": 5.395007812028775e-06, + "lm_loss": 5.5032, + "loss": 1.4999, + "step": 1078, + "text_contrastive_loss": 0.7684 + }, + { + "contrastive_loss": 0.6256, + "epoch": 2.435665914221219, + "grad_norm": 16.601072311401367, + "learning_rate": 5.387774665251385e-06, + "lm_loss": 5.5002, + "loss": 1.6017, + "step": 1079, + "text_contrastive_loss": 0.8521 + }, + { + "contrastive_loss": 0.4895, + "epoch": 2.4379232505643342, + "grad_norm": 15.283729553222656, + "learning_rate": 5.380540701960627e-06, + "lm_loss": 5.4928, + "loss": 1.4356, + "step": 1080, + "text_contrastive_loss": 0.7936 + }, + { + "contrastive_loss": 0.6107, + "epoch": 2.440180586907449, + "grad_norm": 15.71643352508545, + "learning_rate": 5.373305937388613e-06, + "lm_loss": 5.492, + "loss": 1.5936, + "step": 1081, + "text_contrastive_loss": 0.8675 + }, + { + "contrastive_loss": 0.5536, + "epoch": 2.4424379232505644, + "grad_norm": 14.633138656616211, + "learning_rate": 5.3660703867691475e-06, + "lm_loss": 5.5287, + "loss": 1.4925, + "step": 1082, + "text_contrastive_loss": 0.7722 + }, + { + "contrastive_loss": 0.5391, + "epoch": 2.4446952595936793, + "grad_norm": 15.10629940032959, + "learning_rate": 5.358834065337684e-06, + "lm_loss": 5.5012, + "loss": 1.5241, + "step": 1083, + "text_contrastive_loss": 0.8696 + }, + { + "contrastive_loss": 0.5987, + "epoch": 2.4469525959367946, + "grad_norm": 16.985130310058594, + "learning_rate": 5.3515969883313e-06, + "lm_loss": 5.5252, + "loss": 1.5473, + "step": 1084, + "text_contrastive_loss": 0.7921 + }, + { + "contrastive_loss": 0.4536, + "epoch": 2.44920993227991, + "grad_norm": 13.985669136047363, + "learning_rate": 5.344359170988668e-06, + "lm_loss": 5.4269, + "loss": 1.349, + "step": 1085, + "text_contrastive_loss": 0.7056 + }, + { + "contrastive_loss": 0.5222, + "epoch": 2.4514672686230248, + "grad_norm": 14.75936222076416, + "learning_rate": 5.337120628550016e-06, + "lm_loss": 5.5617, + "loss": 1.4309, + "step": 1086, + "text_contrastive_loss": 0.705 + }, + { + "contrastive_loss": 0.4161, + "epoch": 2.45372460496614, + "grad_norm": 12.124979972839355, + "learning_rate": 5.329881376257098e-06, + "lm_loss": 5.4788, + "loss": 1.2727, + "step": 1087, + "text_contrastive_loss": 0.6174 + }, + { + "contrastive_loss": 0.5465, + "epoch": 2.455981941309255, + "grad_norm": 14.528797149658203, + "learning_rate": 5.322641429353167e-06, + "lm_loss": 5.6025, + "loss": 1.5016, + "step": 1088, + "text_contrastive_loss": 0.7897 + }, + { + "contrastive_loss": 0.5103, + "epoch": 2.4582392776523703, + "grad_norm": 14.936470031738281, + "learning_rate": 5.315400803082934e-06, + "lm_loss": 5.5148, + "loss": 1.4588, + "step": 1089, + "text_contrastive_loss": 0.7941 + }, + { + "contrastive_loss": 0.5379, + "epoch": 2.460496613995485, + "grad_norm": 15.522775650024414, + "learning_rate": 5.308159512692544e-06, + "lm_loss": 5.3921, + "loss": 1.472, + "step": 1090, + "text_contrastive_loss": 0.7897 + }, + { + "contrastive_loss": 0.6376, + "epoch": 2.4627539503386005, + "grad_norm": 15.824444770812988, + "learning_rate": 5.300917573429536e-06, + "lm_loss": 5.5943, + "loss": 1.5699, + "step": 1091, + "text_contrastive_loss": 0.7457 + }, + { + "contrastive_loss": 0.5642, + "epoch": 2.4650112866817158, + "grad_norm": 17.166841506958008, + "learning_rate": 5.293675000542822e-06, + "lm_loss": 5.6214, + "loss": 1.5572, + "step": 1092, + "text_contrastive_loss": 0.8616 + }, + { + "contrastive_loss": 0.5636, + "epoch": 2.4672686230248306, + "grad_norm": 16.25465202331543, + "learning_rate": 5.286431809282639e-06, + "lm_loss": 5.5358, + "loss": 1.6832, + "step": 1093, + "text_contrastive_loss": 1.1321 + }, + { + "contrastive_loss": 0.5424, + "epoch": 2.469525959367946, + "grad_norm": 17.31192970275879, + "learning_rate": 5.279188014900537e-06, + "lm_loss": 5.5631, + "loss": 1.4952, + "step": 1094, + "text_contrastive_loss": 0.793 + }, + { + "contrastive_loss": 0.5037, + "epoch": 2.471783295711061, + "grad_norm": 15.22265911102295, + "learning_rate": 5.2719436326493255e-06, + "lm_loss": 5.5699, + "loss": 1.5099, + "step": 1095, + "text_contrastive_loss": 0.8984 + }, + { + "contrastive_loss": 0.5294, + "epoch": 2.474040632054176, + "grad_norm": 14.717487335205078, + "learning_rate": 5.26469867778306e-06, + "lm_loss": 5.5627, + "loss": 1.478, + "step": 1096, + "text_contrastive_loss": 0.7848 + }, + { + "contrastive_loss": 0.5232, + "epoch": 2.476297968397291, + "grad_norm": 14.459714889526367, + "learning_rate": 5.257453165556996e-06, + "lm_loss": 5.4178, + "loss": 1.4919, + "step": 1097, + "text_contrastive_loss": 0.8539 + }, + { + "contrastive_loss": 0.6076, + "epoch": 2.4785553047404063, + "grad_norm": 16.94345474243164, + "learning_rate": 5.2502071112275675e-06, + "lm_loss": 5.4922, + "loss": 1.5671, + "step": 1098, + "text_contrastive_loss": 0.8205 + }, + { + "contrastive_loss": 0.5167, + "epoch": 2.4808126410835216, + "grad_norm": 16.165454864501953, + "learning_rate": 5.242960530052344e-06, + "lm_loss": 5.5237, + "loss": 1.5062, + "step": 1099, + "text_contrastive_loss": 0.8741 + }, + { + "contrastive_loss": 0.5703, + "epoch": 2.4830699774266365, + "grad_norm": 15.800847053527832, + "learning_rate": 5.235713437290012e-06, + "lm_loss": 5.5983, + "loss": 1.6058, + "step": 1100, + "text_contrastive_loss": 0.9515 + }, + { + "contrastive_loss": 0.5203, + "epoch": 2.485327313769752, + "grad_norm": 15.280631065368652, + "learning_rate": 5.228465848200327e-06, + "lm_loss": 5.5601, + "loss": 1.4564, + "step": 1101, + "text_contrastive_loss": 0.7602 + }, + { + "contrastive_loss": 0.5569, + "epoch": 2.4875846501128667, + "grad_norm": 16.5999755859375, + "learning_rate": 5.221217778044096e-06, + "lm_loss": 5.5037, + "loss": 1.5132, + "step": 1102, + "text_contrastive_loss": 0.8119 + }, + { + "contrastive_loss": 0.5037, + "epoch": 2.489841986455982, + "grad_norm": 16.614097595214844, + "learning_rate": 5.2139692420831325e-06, + "lm_loss": 5.5185, + "loss": 1.423, + "step": 1103, + "text_contrastive_loss": 0.7348 + }, + { + "contrastive_loss": 0.4788, + "epoch": 2.4920993227990973, + "grad_norm": 14.173137664794922, + "learning_rate": 5.206720255580241e-06, + "lm_loss": 5.5314, + "loss": 1.3925, + "step": 1104, + "text_contrastive_loss": 0.7211 + }, + { + "contrastive_loss": 0.5616, + "epoch": 2.494356659142212, + "grad_norm": 15.582025527954102, + "learning_rate": 5.199470833799164e-06, + "lm_loss": 5.5397, + "loss": 1.5229, + "step": 1105, + "text_contrastive_loss": 0.8147 + }, + { + "contrastive_loss": 0.4588, + "epoch": 2.4966139954853275, + "grad_norm": 14.748689651489258, + "learning_rate": 5.192220992004569e-06, + "lm_loss": 5.4949, + "loss": 1.3965, + "step": 1106, + "text_contrastive_loss": 0.7763 + }, + { + "contrastive_loss": 0.6206, + "epoch": 2.4988713318284423, + "grad_norm": 17.029296875, + "learning_rate": 5.184970745461998e-06, + "lm_loss": 5.5767, + "loss": 1.6424, + "step": 1107, + "text_contrastive_loss": 0.9282 + }, + { + "contrastive_loss": 0.6042, + "epoch": 2.5011286681715577, + "grad_norm": 15.634419441223145, + "learning_rate": 5.177720109437857e-06, + "lm_loss": 5.5833, + "loss": 1.6048, + "step": 1108, + "text_contrastive_loss": 0.8846 + }, + { + "contrastive_loss": 0.4785, + "epoch": 2.5033860045146725, + "grad_norm": 14.658867835998535, + "learning_rate": 5.170469099199363e-06, + "lm_loss": 5.5818, + "loss": 1.4511, + "step": 1109, + "text_contrastive_loss": 0.8288 + }, + { + "contrastive_loss": 0.5242, + "epoch": 2.505643340857788, + "grad_norm": 14.744585037231445, + "learning_rate": 5.1632177300145255e-06, + "lm_loss": 5.4845, + "loss": 1.503, + "step": 1110, + "text_contrastive_loss": 0.8606 + }, + { + "contrastive_loss": 0.6345, + "epoch": 2.5079006772009027, + "grad_norm": 16.3204288482666, + "learning_rate": 5.155966017152108e-06, + "lm_loss": 5.5486, + "loss": 1.673, + "step": 1111, + "text_contrastive_loss": 0.9673 + }, + { + "contrastive_loss": 0.6088, + "epoch": 2.510158013544018, + "grad_norm": 14.689356803894043, + "learning_rate": 5.148713975881598e-06, + "lm_loss": 5.4947, + "loss": 1.6335, + "step": 1112, + "text_contrastive_loss": 0.9503 + }, + { + "contrastive_loss": 0.4956, + "epoch": 2.5124153498871333, + "grad_norm": 13.767909049987793, + "learning_rate": 5.141461621473175e-06, + "lm_loss": 5.6347, + "loss": 1.4861, + "step": 1113, + "text_contrastive_loss": 0.854 + }, + { + "contrastive_loss": 0.6261, + "epoch": 2.514672686230248, + "grad_norm": 16.780237197875977, + "learning_rate": 5.1342089691976794e-06, + "lm_loss": 5.4666, + "loss": 1.6463, + "step": 1114, + "text_contrastive_loss": 0.9472 + }, + { + "contrastive_loss": 0.5675, + "epoch": 2.5169300225733635, + "grad_norm": 16.320783615112305, + "learning_rate": 5.126956034326573e-06, + "lm_loss": 5.4596, + "loss": 1.5824, + "step": 1115, + "text_contrastive_loss": 0.9377 + }, + { + "contrastive_loss": 0.4942, + "epoch": 2.5191873589164784, + "grad_norm": 15.041061401367188, + "learning_rate": 5.119702832131922e-06, + "lm_loss": 5.6353, + "loss": 1.4093, + "step": 1116, + "text_contrastive_loss": 0.703 + }, + { + "contrastive_loss": 0.5249, + "epoch": 2.5214446952595937, + "grad_norm": 13.427505493164062, + "learning_rate": 5.112449377886345e-06, + "lm_loss": 5.5718, + "loss": 1.5324, + "step": 1117, + "text_contrastive_loss": 0.9006 + }, + { + "contrastive_loss": 0.5309, + "epoch": 2.523702031602709, + "grad_norm": 17.36693572998047, + "learning_rate": 5.105195686863e-06, + "lm_loss": 5.5065, + "loss": 1.5013, + "step": 1118, + "text_contrastive_loss": 0.8395 + }, + { + "contrastive_loss": 0.5017, + "epoch": 2.525959367945824, + "grad_norm": 15.0901517868042, + "learning_rate": 5.097941774335538e-06, + "lm_loss": 5.5845, + "loss": 1.5407, + "step": 1119, + "text_contrastive_loss": 0.961 + }, + { + "contrastive_loss": 0.5806, + "epoch": 2.528216704288939, + "grad_norm": 15.08397388458252, + "learning_rate": 5.090687655578078e-06, + "lm_loss": 5.4375, + "loss": 1.6074, + "step": 1120, + "text_contrastive_loss": 0.9661 + }, + { + "contrastive_loss": 0.6207, + "epoch": 2.530474040632054, + "grad_norm": 15.705390930175781, + "learning_rate": 5.083433345865175e-06, + "lm_loss": 5.4951, + "loss": 1.5651, + "step": 1121, + "text_contrastive_loss": 0.7897 + }, + { + "contrastive_loss": 0.6248, + "epoch": 2.5327313769751694, + "grad_norm": 18.175708770751953, + "learning_rate": 5.076178860471787e-06, + "lm_loss": 5.5092, + "loss": 1.6338, + "step": 1122, + "text_contrastive_loss": 0.9161 + }, + { + "contrastive_loss": 0.5244, + "epoch": 2.5349887133182847, + "grad_norm": 15.123210906982422, + "learning_rate": 5.068924214673234e-06, + "lm_loss": 5.4212, + "loss": 1.4828, + "step": 1123, + "text_contrastive_loss": 0.8326 + }, + { + "contrastive_loss": 0.4389, + "epoch": 2.5372460496613995, + "grad_norm": 14.30945873260498, + "learning_rate": 5.061669423745185e-06, + "lm_loss": 5.5389, + "loss": 1.3666, + "step": 1124, + "text_contrastive_loss": 0.7477 + }, + { + "contrastive_loss": 0.4769, + "epoch": 2.5395033860045144, + "grad_norm": 16.882328033447266, + "learning_rate": 5.054414502963605e-06, + "lm_loss": 5.6542, + "loss": 1.4617, + "step": 1125, + "text_contrastive_loss": 0.8388 + }, + { + "contrastive_loss": 0.4262, + "epoch": 2.5417607223476297, + "grad_norm": 12.748918533325195, + "learning_rate": 5.0471594676047385e-06, + "lm_loss": 5.588, + "loss": 1.3344, + "step": 1126, + "text_contrastive_loss": 0.6987 + }, + { + "contrastive_loss": 0.4708, + "epoch": 2.544018058690745, + "grad_norm": 14.473797798156738, + "learning_rate": 5.039904332945069e-06, + "lm_loss": 5.5085, + "loss": 1.3946, + "step": 1127, + "text_contrastive_loss": 0.7459 + }, + { + "contrastive_loss": 0.4823, + "epoch": 2.54627539503386, + "grad_norm": 15.05002498626709, + "learning_rate": 5.03264911426129e-06, + "lm_loss": 5.4364, + "loss": 1.4151, + "step": 1128, + "text_contrastive_loss": 0.7782 + }, + { + "contrastive_loss": 0.5926, + "epoch": 2.5485327313769752, + "grad_norm": 15.238771438598633, + "learning_rate": 5.025393826830267e-06, + "lm_loss": 5.5455, + "loss": 1.6105, + "step": 1129, + "text_contrastive_loss": 0.9267 + }, + { + "contrastive_loss": 0.5255, + "epoch": 2.55079006772009, + "grad_norm": 15.349923133850098, + "learning_rate": 5.0181384859290215e-06, + "lm_loss": 5.5044, + "loss": 1.4948, + "step": 1130, + "text_contrastive_loss": 0.8377 + }, + { + "contrastive_loss": 0.5483, + "epoch": 2.5530474040632054, + "grad_norm": 15.675585746765137, + "learning_rate": 5.010883106834676e-06, + "lm_loss": 5.5256, + "loss": 1.5209, + "step": 1131, + "text_contrastive_loss": 0.84 + }, + { + "contrastive_loss": 0.5889, + "epoch": 2.5553047404063207, + "grad_norm": 16.66605567932129, + "learning_rate": 5.003627704824438e-06, + "lm_loss": 5.4589, + "loss": 1.5148, + "step": 1132, + "text_contrastive_loss": 0.7601 + }, + { + "contrastive_loss": 0.4575, + "epoch": 2.5575620767494356, + "grad_norm": 13.640859603881836, + "learning_rate": 4.996372295175563e-06, + "lm_loss": 5.536, + "loss": 1.3606, + "step": 1133, + "text_contrastive_loss": 0.6991 + }, + { + "contrastive_loss": 0.645, + "epoch": 2.559819413092551, + "grad_norm": 16.621692657470703, + "learning_rate": 4.989116893165325e-06, + "lm_loss": 5.5277, + "loss": 1.6136, + "step": 1134, + "text_contrastive_loss": 0.8318 + }, + { + "contrastive_loss": 0.6095, + "epoch": 2.5620767494356658, + "grad_norm": 15.903656005859375, + "learning_rate": 4.981861514070979e-06, + "lm_loss": 5.4128, + "loss": 1.6205, + "step": 1135, + "text_contrastive_loss": 0.9394 + }, + { + "contrastive_loss": 0.5439, + "epoch": 2.564334085778781, + "grad_norm": 15.493552207946777, + "learning_rate": 4.974606173169733e-06, + "lm_loss": 5.6034, + "loss": 1.5336, + "step": 1136, + "text_contrastive_loss": 0.8587 + }, + { + "contrastive_loss": 0.491, + "epoch": 2.5665914221218964, + "grad_norm": 13.51240348815918, + "learning_rate": 4.9673508857387115e-06, + "lm_loss": 5.3974, + "loss": 1.3996, + "step": 1137, + "text_contrastive_loss": 0.7378 + }, + { + "contrastive_loss": 0.535, + "epoch": 2.5688487584650113, + "grad_norm": 14.415314674377441, + "learning_rate": 4.9600956670549324e-06, + "lm_loss": 5.5457, + "loss": 1.5186, + "step": 1138, + "text_contrastive_loss": 0.858 + }, + { + "contrastive_loss": 0.6606, + "epoch": 2.5711060948081266, + "grad_norm": 15.831226348876953, + "learning_rate": 4.952840532395262e-06, + "lm_loss": 5.5284, + "loss": 1.689, + "step": 1139, + "text_contrastive_loss": 0.9512 + }, + { + "contrastive_loss": 0.5523, + "epoch": 2.5733634311512414, + "grad_norm": 15.637651443481445, + "learning_rate": 4.945585497036396e-06, + "lm_loss": 5.5387, + "loss": 1.489, + "step": 1140, + "text_contrastive_loss": 0.7656 + }, + { + "contrastive_loss": 0.6667, + "epoch": 2.5756207674943568, + "grad_norm": 18.41811180114746, + "learning_rate": 4.938330576254817e-06, + "lm_loss": 5.4911, + "loss": 1.677, + "step": 1141, + "text_contrastive_loss": 0.9223 + }, + { + "contrastive_loss": 0.5854, + "epoch": 2.5778781038374716, + "grad_norm": 16.636863708496094, + "learning_rate": 4.931075785326767e-06, + "lm_loss": 5.4529, + "loss": 1.5712, + "step": 1142, + "text_contrastive_loss": 0.8811 + }, + { + "contrastive_loss": 0.587, + "epoch": 2.580135440180587, + "grad_norm": 16.620792388916016, + "learning_rate": 4.9238211395282156e-06, + "lm_loss": 5.4365, + "loss": 1.5025, + "step": 1143, + "text_contrastive_loss": 0.7438 + }, + { + "contrastive_loss": 0.4897, + "epoch": 2.582392776523702, + "grad_norm": 15.157502174377441, + "learning_rate": 4.9165666541348265e-06, + "lm_loss": 5.422, + "loss": 1.4327, + "step": 1144, + "text_contrastive_loss": 0.8017 + }, + { + "contrastive_loss": 0.4966, + "epoch": 2.584650112866817, + "grad_norm": 14.812685012817383, + "learning_rate": 4.909312344421923e-06, + "lm_loss": 5.4986, + "loss": 1.4148, + "step": 1145, + "text_contrastive_loss": 0.7367 + }, + { + "contrastive_loss": 0.4962, + "epoch": 2.5869074492099324, + "grad_norm": 14.94445514678955, + "learning_rate": 4.902058225664465e-06, + "lm_loss": 5.4792, + "loss": 1.4704, + "step": 1146, + "text_contrastive_loss": 0.8526 + }, + { + "contrastive_loss": 0.4477, + "epoch": 2.5891647855530473, + "grad_norm": 14.922908782958984, + "learning_rate": 4.8948043131370025e-06, + "lm_loss": 5.4961, + "loss": 1.3859, + "step": 1147, + "text_contrastive_loss": 0.777 + }, + { + "contrastive_loss": 0.5108, + "epoch": 2.5914221218961626, + "grad_norm": 15.819107055664062, + "learning_rate": 4.887550622113657e-06, + "lm_loss": 5.4651, + "loss": 1.4422, + "step": 1148, + "text_contrastive_loss": 0.7698 + }, + { + "contrastive_loss": 0.4764, + "epoch": 2.5936794582392775, + "grad_norm": 16.320865631103516, + "learning_rate": 4.88029716786808e-06, + "lm_loss": 5.5991, + "loss": 1.448, + "step": 1149, + "text_contrastive_loss": 0.8234 + }, + { + "contrastive_loss": 0.5614, + "epoch": 2.595936794582393, + "grad_norm": 17.54962921142578, + "learning_rate": 4.873043965673427e-06, + "lm_loss": 5.5239, + "loss": 1.5026, + "step": 1150, + "text_contrastive_loss": 0.7778 + }, + { + "contrastive_loss": 0.5654, + "epoch": 2.598194130925508, + "grad_norm": 19.690631866455078, + "learning_rate": 4.8657910308023205e-06, + "lm_loss": 5.5625, + "loss": 1.5904, + "step": 1151, + "text_contrastive_loss": 0.9375 + }, + { + "contrastive_loss": 0.6657, + "epoch": 2.600451467268623, + "grad_norm": 18.43953514099121, + "learning_rate": 4.858538378526825e-06, + "lm_loss": 5.6181, + "loss": 1.6538, + "step": 1152, + "text_contrastive_loss": 0.8525 + }, + { + "contrastive_loss": 0.6193, + "epoch": 2.6027088036117383, + "grad_norm": 15.578585624694824, + "learning_rate": 4.851286024118402e-06, + "lm_loss": 5.3932, + "loss": 1.5595, + "step": 1153, + "text_contrastive_loss": 0.8018 + }, + { + "contrastive_loss": 0.5564, + "epoch": 2.604966139954853, + "grad_norm": 17.017620086669922, + "learning_rate": 4.844033982847893e-06, + "lm_loss": 5.4934, + "loss": 1.5226, + "step": 1154, + "text_contrastive_loss": 0.8338 + }, + { + "contrastive_loss": 0.5105, + "epoch": 2.6072234762979685, + "grad_norm": 16.31838035583496, + "learning_rate": 4.836782269985475e-06, + "lm_loss": 5.4835, + "loss": 1.4426, + "step": 1155, + "text_contrastive_loss": 0.7675 + }, + { + "contrastive_loss": 0.4958, + "epoch": 2.609480812641084, + "grad_norm": 14.419219017028809, + "learning_rate": 4.829530900800638e-06, + "lm_loss": 5.4756, + "loss": 1.4355, + "step": 1156, + "text_contrastive_loss": 0.7841 + }, + { + "contrastive_loss": 0.6406, + "epoch": 2.6117381489841986, + "grad_norm": 15.977461814880371, + "learning_rate": 4.8222798905621445e-06, + "lm_loss": 5.5373, + "loss": 1.6012, + "step": 1157, + "text_contrastive_loss": 0.8137 + }, + { + "contrastive_loss": 0.6028, + "epoch": 2.6139954853273135, + "grad_norm": 18.114654541015625, + "learning_rate": 4.815029254538003e-06, + "lm_loss": 5.5876, + "loss": 1.5115, + "step": 1158, + "text_contrastive_loss": 0.7 + }, + { + "contrastive_loss": 0.5203, + "epoch": 2.616252821670429, + "grad_norm": 13.368484497070312, + "learning_rate": 4.807779007995434e-06, + "lm_loss": 5.4564, + "loss": 1.4583, + "step": 1159, + "text_contrastive_loss": 0.7849 + }, + { + "contrastive_loss": 0.4534, + "epoch": 2.618510158013544, + "grad_norm": 14.692235946655273, + "learning_rate": 4.800529166200837e-06, + "lm_loss": 5.6025, + "loss": 1.3771, + "step": 1160, + "text_contrastive_loss": 0.7269 + }, + { + "contrastive_loss": 0.5249, + "epoch": 2.620767494356659, + "grad_norm": 15.356192588806152, + "learning_rate": 4.7932797444197604e-06, + "lm_loss": 5.602, + "loss": 1.4717, + "step": 1161, + "text_contrastive_loss": 0.7733 + }, + { + "contrastive_loss": 0.517, + "epoch": 2.6230248306997743, + "grad_norm": 13.836146354675293, + "learning_rate": 4.786030757916868e-06, + "lm_loss": 5.5499, + "loss": 1.4582, + "step": 1162, + "text_contrastive_loss": 0.7724 + }, + { + "contrastive_loss": 0.6164, + "epoch": 2.625282167042889, + "grad_norm": 18.457595825195312, + "learning_rate": 4.778782221955907e-06, + "lm_loss": 5.5664, + "loss": 1.5773, + "step": 1163, + "text_contrastive_loss": 0.8086 + }, + { + "contrastive_loss": 0.4743, + "epoch": 2.6275395033860045, + "grad_norm": 14.833595275878906, + "learning_rate": 4.771534151799676e-06, + "lm_loss": 5.5332, + "loss": 1.4507, + "step": 1164, + "text_contrastive_loss": 0.8461 + }, + { + "contrastive_loss": 0.4967, + "epoch": 2.62979683972912, + "grad_norm": 14.186467170715332, + "learning_rate": 4.76428656270999e-06, + "lm_loss": 5.6641, + "loss": 1.4269, + "step": 1165, + "text_contrastive_loss": 0.7277 + }, + { + "contrastive_loss": 0.5967, + "epoch": 2.6320541760722347, + "grad_norm": 16.209814071655273, + "learning_rate": 4.757039469947658e-06, + "lm_loss": 5.4752, + "loss": 1.5592, + "step": 1166, + "text_contrastive_loss": 0.8299 + }, + { + "contrastive_loss": 0.4835, + "epoch": 2.63431151241535, + "grad_norm": 15.594867706298828, + "learning_rate": 4.7497928887724325e-06, + "lm_loss": 5.55, + "loss": 1.4447, + "step": 1167, + "text_contrastive_loss": 0.8123 + }, + { + "contrastive_loss": 0.5326, + "epoch": 2.636568848758465, + "grad_norm": 15.795209884643555, + "learning_rate": 4.7425468344430035e-06, + "lm_loss": 5.5219, + "loss": 1.4649, + "step": 1168, + "text_contrastive_loss": 0.7603 + }, + { + "contrastive_loss": 0.4957, + "epoch": 2.63882618510158, + "grad_norm": 16.403053283691406, + "learning_rate": 4.73530132221694e-06, + "lm_loss": 5.4785, + "loss": 1.4122, + "step": 1169, + "text_contrastive_loss": 0.7372 + }, + { + "contrastive_loss": 0.518, + "epoch": 2.6410835214446955, + "grad_norm": 16.638166427612305, + "learning_rate": 4.7280563673506745e-06, + "lm_loss": 5.6407, + "loss": 1.5187, + "step": 1170, + "text_contrastive_loss": 0.8733 + }, + { + "contrastive_loss": 0.5433, + "epoch": 2.6433408577878104, + "grad_norm": 16.512386322021484, + "learning_rate": 4.720811985099464e-06, + "lm_loss": 5.4574, + "loss": 1.543, + "step": 1171, + "text_contrastive_loss": 0.908 + }, + { + "contrastive_loss": 0.465, + "epoch": 2.6455981941309257, + "grad_norm": 14.082274436950684, + "learning_rate": 4.713568190717362e-06, + "lm_loss": 5.4258, + "loss": 1.4103, + "step": 1172, + "text_contrastive_loss": 0.8054 + }, + { + "contrastive_loss": 0.5123, + "epoch": 2.6478555304740405, + "grad_norm": 14.941728591918945, + "learning_rate": 4.70632499945718e-06, + "lm_loss": 5.5014, + "loss": 1.4248, + "step": 1173, + "text_contrastive_loss": 0.7248 + }, + { + "contrastive_loss": 0.6703, + "epoch": 2.650112866817156, + "grad_norm": 17.750526428222656, + "learning_rate": 4.699082426570465e-06, + "lm_loss": 5.5859, + "loss": 1.6718, + "step": 1174, + "text_contrastive_loss": 0.8858 + }, + { + "contrastive_loss": 0.6189, + "epoch": 2.6523702031602707, + "grad_norm": 18.926570892333984, + "learning_rate": 4.6918404873074574e-06, + "lm_loss": 5.4535, + "loss": 1.6068, + "step": 1175, + "text_contrastive_loss": 0.8851 + }, + { + "contrastive_loss": 0.4899, + "epoch": 2.654627539503386, + "grad_norm": 14.999008178710938, + "learning_rate": 4.684599196917067e-06, + "lm_loss": 5.7192, + "loss": 1.4694, + "step": 1176, + "text_contrastive_loss": 0.8153 + }, + { + "contrastive_loss": 0.5594, + "epoch": 2.656884875846501, + "grad_norm": 14.709131240844727, + "learning_rate": 4.677358570646834e-06, + "lm_loss": 5.4593, + "loss": 1.5028, + "step": 1177, + "text_contrastive_loss": 0.795 + }, + { + "contrastive_loss": 0.4572, + "epoch": 2.659142212189616, + "grad_norm": 13.443650245666504, + "learning_rate": 4.670118623742904e-06, + "lm_loss": 5.6383, + "loss": 1.3786, + "step": 1178, + "text_contrastive_loss": 0.7151 + }, + { + "contrastive_loss": 0.4666, + "epoch": 2.6613995485327315, + "grad_norm": 15.100601196289062, + "learning_rate": 4.662879371449987e-06, + "lm_loss": 5.4287, + "loss": 1.4094, + "step": 1179, + "text_contrastive_loss": 0.8 + }, + { + "contrastive_loss": 0.546, + "epoch": 2.6636568848758464, + "grad_norm": 15.13619327545166, + "learning_rate": 4.655640829011335e-06, + "lm_loss": 5.5045, + "loss": 1.463, + "step": 1180, + "text_contrastive_loss": 0.7331 + }, + { + "contrastive_loss": 0.509, + "epoch": 2.6659142212189617, + "grad_norm": 14.496602058410645, + "learning_rate": 4.6484030116687014e-06, + "lm_loss": 5.4848, + "loss": 1.4208, + "step": 1181, + "text_contrastive_loss": 0.7267 + }, + { + "contrastive_loss": 0.518, + "epoch": 2.6681715575620766, + "grad_norm": 15.54823112487793, + "learning_rate": 4.64116593466232e-06, + "lm_loss": 5.3756, + "loss": 1.5412, + "step": 1182, + "text_contrastive_loss": 0.9713 + }, + { + "contrastive_loss": 0.4753, + "epoch": 2.670428893905192, + "grad_norm": 15.684138298034668, + "learning_rate": 4.633929613230855e-06, + "lm_loss": 5.4609, + "loss": 1.4673, + "step": 1183, + "text_contrastive_loss": 0.8917 + }, + { + "contrastive_loss": 0.5154, + "epoch": 2.672686230248307, + "grad_norm": 14.663494110107422, + "learning_rate": 4.626694062611387e-06, + "lm_loss": 5.447, + "loss": 1.4817, + "step": 1184, + "text_contrastive_loss": 0.8431 + }, + { + "contrastive_loss": 0.5792, + "epoch": 2.674943566591422, + "grad_norm": 17.431798934936523, + "learning_rate": 4.619459298039373e-06, + "lm_loss": 5.4315, + "loss": 1.5457, + "step": 1185, + "text_contrastive_loss": 0.8467 + }, + { + "contrastive_loss": 0.4407, + "epoch": 2.6772009029345374, + "grad_norm": 13.369098663330078, + "learning_rate": 4.612225334748616e-06, + "lm_loss": 5.6408, + "loss": 1.3813, + "step": 1186, + "text_contrastive_loss": 0.753 + }, + { + "contrastive_loss": 0.4911, + "epoch": 2.6794582392776523, + "grad_norm": 15.295650482177734, + "learning_rate": 4.6049921879712254e-06, + "lm_loss": 5.4293, + "loss": 1.3968, + "step": 1187, + "text_contrastive_loss": 0.7257 + }, + { + "contrastive_loss": 0.5897, + "epoch": 2.6817155756207676, + "grad_norm": 14.655820846557617, + "learning_rate": 4.597759872937597e-06, + "lm_loss": 5.5653, + "loss": 1.4946, + "step": 1188, + "text_contrastive_loss": 0.6966 + }, + { + "contrastive_loss": 0.6091, + "epoch": 2.683972911963883, + "grad_norm": 15.665904998779297, + "learning_rate": 4.590528404876374e-06, + "lm_loss": 5.5102, + "loss": 1.605, + "step": 1189, + "text_contrastive_loss": 0.8899 + }, + { + "contrastive_loss": 0.573, + "epoch": 2.6862302483069977, + "grad_norm": 15.560206413269043, + "learning_rate": 4.5832977990144165e-06, + "lm_loss": 5.5248, + "loss": 1.573, + "step": 1190, + "text_contrastive_loss": 0.895 + }, + { + "contrastive_loss": 0.5217, + "epoch": 2.6884875846501126, + "grad_norm": 14.409103393554688, + "learning_rate": 4.5760680705767665e-06, + "lm_loss": 5.4393, + "loss": 1.4393, + "step": 1191, + "text_contrastive_loss": 0.7472 + }, + { + "contrastive_loss": 0.5037, + "epoch": 2.690744920993228, + "grad_norm": 15.002391815185547, + "learning_rate": 4.5688392347866226e-06, + "lm_loss": 5.5225, + "loss": 1.4061, + "step": 1192, + "text_contrastive_loss": 0.7002 + }, + { + "contrastive_loss": 0.6246, + "epoch": 2.6930022573363432, + "grad_norm": 15.702973365783691, + "learning_rate": 4.561611306865299e-06, + "lm_loss": 5.4947, + "loss": 1.5696, + "step": 1193, + "text_contrastive_loss": 0.7912 + }, + { + "contrastive_loss": 0.4687, + "epoch": 2.695259593679458, + "grad_norm": 14.511507987976074, + "learning_rate": 4.554384302032204e-06, + "lm_loss": 5.4935, + "loss": 1.3736, + "step": 1194, + "text_contrastive_loss": 0.7112 + }, + { + "contrastive_loss": 0.5338, + "epoch": 2.6975169300225734, + "grad_norm": 14.777057647705078, + "learning_rate": 4.547158235504797e-06, + "lm_loss": 5.6297, + "loss": 1.5445, + "step": 1195, + "text_contrastive_loss": 0.8953 + }, + { + "contrastive_loss": 0.567, + "epoch": 2.6997742663656883, + "grad_norm": 14.681438446044922, + "learning_rate": 4.539933122498566e-06, + "lm_loss": 5.4765, + "loss": 1.5354, + "step": 1196, + "text_contrastive_loss": 0.8415 + }, + { + "contrastive_loss": 0.4323, + "epoch": 2.7020316027088036, + "grad_norm": 14.590365409851074, + "learning_rate": 4.532708978226987e-06, + "lm_loss": 5.4267, + "loss": 1.323, + "step": 1197, + "text_contrastive_loss": 0.696 + }, + { + "contrastive_loss": 0.4428, + "epoch": 2.704288939051919, + "grad_norm": 14.539432525634766, + "learning_rate": 4.525485817901499e-06, + "lm_loss": 5.4905, + "loss": 1.3467, + "step": 1198, + "text_contrastive_loss": 0.7098 + }, + { + "contrastive_loss": 0.5636, + "epoch": 2.706546275395034, + "grad_norm": 16.865188598632812, + "learning_rate": 4.518263656731468e-06, + "lm_loss": 5.4869, + "loss": 1.5635, + "step": 1199, + "text_contrastive_loss": 0.9024 + }, + { + "contrastive_loss": 0.6087, + "epoch": 2.708803611738149, + "grad_norm": 17.277889251708984, + "learning_rate": 4.511042509924157e-06, + "lm_loss": 5.4855, + "loss": 1.5486, + "step": 1200, + "text_contrastive_loss": 0.7826 + }, + { + "contrastive_loss": 0.5515, + "epoch": 2.711060948081264, + "grad_norm": 14.8447847366333, + "learning_rate": 4.5038223926846905e-06, + "lm_loss": 5.4666, + "loss": 1.5006, + "step": 1201, + "text_contrastive_loss": 0.8048 + }, + { + "contrastive_loss": 0.578, + "epoch": 2.7133182844243793, + "grad_norm": 16.393775939941406, + "learning_rate": 4.49660332021603e-06, + "lm_loss": 5.3864, + "loss": 1.5414, + "step": 1202, + "text_contrastive_loss": 0.8497 + }, + { + "contrastive_loss": 0.4884, + "epoch": 2.7155756207674946, + "grad_norm": 16.465248107910156, + "learning_rate": 4.489385307718934e-06, + "lm_loss": 5.523, + "loss": 1.4537, + "step": 1203, + "text_contrastive_loss": 0.826 + }, + { + "contrastive_loss": 0.494, + "epoch": 2.7178329571106095, + "grad_norm": 15.032328605651855, + "learning_rate": 4.482168370391931e-06, + "lm_loss": 5.4548, + "loss": 1.4819, + "step": 1204, + "text_contrastive_loss": 0.8847 + }, + { + "contrastive_loss": 0.5458, + "epoch": 2.7200902934537243, + "grad_norm": 15.656888008117676, + "learning_rate": 4.47495252343128e-06, + "lm_loss": 5.4434, + "loss": 1.4992, + "step": 1205, + "text_contrastive_loss": 0.8182 + }, + { + "contrastive_loss": 0.4925, + "epoch": 2.7223476297968396, + "grad_norm": 16.2528076171875, + "learning_rate": 4.467737782030951e-06, + "lm_loss": 5.5406, + "loss": 1.4561, + "step": 1206, + "text_contrastive_loss": 0.819 + }, + { + "contrastive_loss": 0.4839, + "epoch": 2.724604966139955, + "grad_norm": 14.774943351745605, + "learning_rate": 4.460524161382582e-06, + "lm_loss": 5.5624, + "loss": 1.4267, + "step": 1207, + "text_contrastive_loss": 0.7732 + }, + { + "contrastive_loss": 0.5676, + "epoch": 2.72686230248307, + "grad_norm": 17.021915435791016, + "learning_rate": 4.453311676675453e-06, + "lm_loss": 5.4838, + "loss": 1.5747, + "step": 1208, + "text_contrastive_loss": 0.9174 + }, + { + "contrastive_loss": 0.5606, + "epoch": 2.729119638826185, + "grad_norm": 15.182758331298828, + "learning_rate": 4.44610034309645e-06, + "lm_loss": 5.4755, + "loss": 1.523, + "step": 1209, + "text_contrastive_loss": 0.8298 + }, + { + "contrastive_loss": 0.4408, + "epoch": 2.7313769751693, + "grad_norm": 13.728645324707031, + "learning_rate": 4.438890175830039e-06, + "lm_loss": 5.5334, + "loss": 1.3396, + "step": 1210, + "text_contrastive_loss": 0.6909 + }, + { + "contrastive_loss": 0.545, + "epoch": 2.7336343115124153, + "grad_norm": 15.889242172241211, + "learning_rate": 4.431681190058224e-06, + "lm_loss": 5.4992, + "loss": 1.5851, + "step": 1211, + "text_contrastive_loss": 0.9804 + }, + { + "contrastive_loss": 0.4922, + "epoch": 2.7358916478555306, + "grad_norm": 14.040260314941406, + "learning_rate": 4.42447340096053e-06, + "lm_loss": 5.6347, + "loss": 1.3842, + "step": 1212, + "text_contrastive_loss": 0.6572 + }, + { + "contrastive_loss": 0.5602, + "epoch": 2.7381489841986455, + "grad_norm": 14.200431823730469, + "learning_rate": 4.417266823713953e-06, + "lm_loss": 5.3882, + "loss": 1.4308, + "step": 1213, + "text_contrastive_loss": 0.6635 + }, + { + "contrastive_loss": 0.464, + "epoch": 2.740406320541761, + "grad_norm": 13.672407150268555, + "learning_rate": 4.410061473492943e-06, + "lm_loss": 5.4964, + "loss": 1.3614, + "step": 1214, + "text_contrastive_loss": 0.6955 + }, + { + "contrastive_loss": 0.5088, + "epoch": 2.7426636568848757, + "grad_norm": 16.66963005065918, + "learning_rate": 4.402857365469364e-06, + "lm_loss": 5.5433, + "loss": 1.4142, + "step": 1215, + "text_contrastive_loss": 0.7022 + }, + { + "contrastive_loss": 0.4742, + "epoch": 2.744920993227991, + "grad_norm": 14.881044387817383, + "learning_rate": 4.3956545148124665e-06, + "lm_loss": 5.4994, + "loss": 1.4533, + "step": 1216, + "text_contrastive_loss": 0.8582 + }, + { + "contrastive_loss": 0.4697, + "epoch": 2.7471783295711063, + "grad_norm": 13.808037757873535, + "learning_rate": 4.38845293668885e-06, + "lm_loss": 5.7113, + "loss": 1.5018, + "step": 1217, + "text_contrastive_loss": 0.9219 + }, + { + "contrastive_loss": 0.5519, + "epoch": 2.749435665914221, + "grad_norm": 16.27696990966797, + "learning_rate": 4.381252646262437e-06, + "lm_loss": 5.6259, + "loss": 1.4901, + "step": 1218, + "text_contrastive_loss": 0.7512 + }, + { + "contrastive_loss": 0.4959, + "epoch": 2.7516930022573365, + "grad_norm": 14.359052658081055, + "learning_rate": 4.37405365869444e-06, + "lm_loss": 5.5, + "loss": 1.4236, + "step": 1219, + "text_contrastive_loss": 0.7554 + }, + { + "contrastive_loss": 0.4808, + "epoch": 2.7539503386004514, + "grad_norm": 13.013676643371582, + "learning_rate": 4.366855989143326e-06, + "lm_loss": 5.4649, + "loss": 1.3498, + "step": 1220, + "text_contrastive_loss": 0.6449 + }, + { + "contrastive_loss": 0.4465, + "epoch": 2.7562076749435667, + "grad_norm": 15.298121452331543, + "learning_rate": 4.359659652764786e-06, + "lm_loss": 5.408, + "loss": 1.4054, + "step": 1221, + "text_contrastive_loss": 0.8361 + }, + { + "contrastive_loss": 0.5785, + "epoch": 2.758465011286682, + "grad_norm": 14.701443672180176, + "learning_rate": 4.352464664711706e-06, + "lm_loss": 5.4825, + "loss": 1.5586, + "step": 1222, + "text_contrastive_loss": 0.8637 + }, + { + "contrastive_loss": 0.5882, + "epoch": 2.760722347629797, + "grad_norm": 16.48676872253418, + "learning_rate": 4.345271040134129e-06, + "lm_loss": 5.4937, + "loss": 1.6228, + "step": 1223, + "text_contrastive_loss": 0.9705 + }, + { + "contrastive_loss": 0.5671, + "epoch": 2.7629796839729117, + "grad_norm": 15.255523681640625, + "learning_rate": 4.338078794179234e-06, + "lm_loss": 5.505, + "loss": 1.5007, + "step": 1224, + "text_contrastive_loss": 0.7661 + }, + { + "contrastive_loss": 0.4796, + "epoch": 2.765237020316027, + "grad_norm": 15.49962043762207, + "learning_rate": 4.330887941991288e-06, + "lm_loss": 5.5146, + "loss": 1.4869, + "step": 1225, + "text_contrastive_loss": 0.9117 + }, + { + "contrastive_loss": 0.4473, + "epoch": 2.7674943566591423, + "grad_norm": 12.847675323486328, + "learning_rate": 4.323698498711634e-06, + "lm_loss": 5.3823, + "loss": 1.3547, + "step": 1226, + "text_contrastive_loss": 0.7384 + }, + { + "contrastive_loss": 0.4975, + "epoch": 2.769751693002257, + "grad_norm": 14.924741744995117, + "learning_rate": 4.316510479478636e-06, + "lm_loss": 5.4048, + "loss": 1.4483, + "step": 1227, + "text_contrastive_loss": 0.8206 + }, + { + "contrastive_loss": 0.582, + "epoch": 2.7720090293453725, + "grad_norm": 17.28886604309082, + "learning_rate": 4.309323899427671e-06, + "lm_loss": 5.5183, + "loss": 1.5399, + "step": 1228, + "text_contrastive_loss": 0.8122 + }, + { + "contrastive_loss": 0.5158, + "epoch": 2.7742663656884874, + "grad_norm": 16.77458381652832, + "learning_rate": 4.302138773691079e-06, + "lm_loss": 5.4194, + "loss": 1.4964, + "step": 1229, + "text_contrastive_loss": 0.8773 + }, + { + "contrastive_loss": 0.4536, + "epoch": 2.7765237020316027, + "grad_norm": 15.139562606811523, + "learning_rate": 4.294955117398139e-06, + "lm_loss": 5.492, + "loss": 1.3421, + "step": 1230, + "text_contrastive_loss": 0.6786 + }, + { + "contrastive_loss": 0.4736, + "epoch": 2.778781038374718, + "grad_norm": 14.490601539611816, + "learning_rate": 4.287772945675035e-06, + "lm_loss": 5.4395, + "loss": 1.4308, + "step": 1231, + "text_contrastive_loss": 0.8265 + }, + { + "contrastive_loss": 0.4915, + "epoch": 2.781038374717833, + "grad_norm": 14.887679100036621, + "learning_rate": 4.280592273644829e-06, + "lm_loss": 5.4762, + "loss": 1.397, + "step": 1232, + "text_contrastive_loss": 0.7159 + }, + { + "contrastive_loss": 0.477, + "epoch": 2.783295711060948, + "grad_norm": 15.118358612060547, + "learning_rate": 4.273413116427419e-06, + "lm_loss": 5.517, + "loss": 1.3906, + "step": 1233, + "text_contrastive_loss": 0.7237 + }, + { + "contrastive_loss": 0.5144, + "epoch": 2.785553047404063, + "grad_norm": 16.686277389526367, + "learning_rate": 4.26623548913952e-06, + "lm_loss": 5.4588, + "loss": 1.4282, + "step": 1234, + "text_contrastive_loss": 0.7359 + }, + { + "contrastive_loss": 0.5352, + "epoch": 2.7878103837471784, + "grad_norm": 14.889708518981934, + "learning_rate": 4.259059406894619e-06, + "lm_loss": 5.438, + "loss": 1.431, + "step": 1235, + "text_contrastive_loss": 0.704 + }, + { + "contrastive_loss": 0.5859, + "epoch": 2.7900677200902937, + "grad_norm": 16.397249221801758, + "learning_rate": 4.251884884802956e-06, + "lm_loss": 5.4049, + "loss": 1.5383, + "step": 1236, + "text_contrastive_loss": 0.8239 + }, + { + "contrastive_loss": 0.5136, + "epoch": 2.7923250564334086, + "grad_norm": 16.485980987548828, + "learning_rate": 4.2447119379714805e-06, + "lm_loss": 5.538, + "loss": 1.4517, + "step": 1237, + "text_contrastive_loss": 0.7685 + }, + { + "contrastive_loss": 0.4696, + "epoch": 2.7945823927765234, + "grad_norm": 13.96359634399414, + "learning_rate": 4.237540581503831e-06, + "lm_loss": 5.4367, + "loss": 1.3865, + "step": 1238, + "text_contrastive_loss": 0.7464 + }, + { + "contrastive_loss": 0.4392, + "epoch": 2.7968397291196387, + "grad_norm": 14.440736770629883, + "learning_rate": 4.23037083050029e-06, + "lm_loss": 5.5737, + "loss": 1.4151, + "step": 1239, + "text_contrastive_loss": 0.8371 + }, + { + "contrastive_loss": 0.5125, + "epoch": 2.799097065462754, + "grad_norm": 17.516603469848633, + "learning_rate": 4.223202700057765e-06, + "lm_loss": 5.5025, + "loss": 1.4648, + "step": 1240, + "text_contrastive_loss": 0.8039 + }, + { + "contrastive_loss": 0.4722, + "epoch": 2.801354401805869, + "grad_norm": 14.528120040893555, + "learning_rate": 4.216036205269748e-06, + "lm_loss": 5.3938, + "loss": 1.3782, + "step": 1241, + "text_contrastive_loss": 0.7332 + }, + { + "contrastive_loss": 0.4854, + "epoch": 2.8036117381489842, + "grad_norm": 13.95931625366211, + "learning_rate": 4.20887136122629e-06, + "lm_loss": 5.5154, + "loss": 1.4556, + "step": 1242, + "text_contrastive_loss": 0.8372 + }, + { + "contrastive_loss": 0.6281, + "epoch": 2.805869074492099, + "grad_norm": 17.318078994750977, + "learning_rate": 4.201708183013963e-06, + "lm_loss": 5.4792, + "loss": 1.6898, + "step": 1243, + "text_contrastive_loss": 1.0275 + }, + { + "contrastive_loss": 0.5044, + "epoch": 2.8081264108352144, + "grad_norm": 16.64516258239746, + "learning_rate": 4.1945466857158336e-06, + "lm_loss": 5.4547, + "loss": 1.4273, + "step": 1244, + "text_contrastive_loss": 0.7547 + }, + { + "contrastive_loss": 0.4817, + "epoch": 2.8103837471783297, + "grad_norm": 17.341358184814453, + "learning_rate": 4.187386884411426e-06, + "lm_loss": 5.5751, + "loss": 1.4236, + "step": 1245, + "text_contrastive_loss": 0.7687 + }, + { + "contrastive_loss": 0.6545, + "epoch": 2.8126410835214446, + "grad_norm": 18.0773983001709, + "learning_rate": 4.1802287941767e-06, + "lm_loss": 5.5201, + "loss": 1.6484, + "step": 1246, + "text_contrastive_loss": 0.8837 + }, + { + "contrastive_loss": 0.5701, + "epoch": 2.81489841986456, + "grad_norm": 15.543575286865234, + "learning_rate": 4.173072430084002e-06, + "lm_loss": 5.5253, + "loss": 1.588, + "step": 1247, + "text_contrastive_loss": 0.9308 + }, + { + "contrastive_loss": 0.4257, + "epoch": 2.8171557562076748, + "grad_norm": 14.513049125671387, + "learning_rate": 4.165917807202055e-06, + "lm_loss": 5.54, + "loss": 1.3121, + "step": 1248, + "text_contrastive_loss": 0.6648 + }, + { + "contrastive_loss": 0.4788, + "epoch": 2.81941309255079, + "grad_norm": 14.176787376403809, + "learning_rate": 4.1587649405959065e-06, + "lm_loss": 5.465, + "loss": 1.4523, + "step": 1249, + "text_contrastive_loss": 0.854 + }, + { + "contrastive_loss": 0.5616, + "epoch": 2.8216704288939054, + "grad_norm": 15.48112964630127, + "learning_rate": 4.151613845326912e-06, + "lm_loss": 5.4052, + "loss": 1.514, + "step": 1250, + "text_contrastive_loss": 0.8237 + }, + { + "contrastive_loss": 0.5131, + "epoch": 2.8239277652370203, + "grad_norm": 16.84682273864746, + "learning_rate": 4.144464536452693e-06, + "lm_loss": 5.4927, + "loss": 1.3859, + "step": 1251, + "text_contrastive_loss": 0.6471 + }, + { + "contrastive_loss": 0.4262, + "epoch": 2.8261851015801356, + "grad_norm": 12.922840118408203, + "learning_rate": 4.137317029027111e-06, + "lm_loss": 5.5065, + "loss": 1.3289, + "step": 1252, + "text_contrastive_loss": 0.704 + }, + { + "contrastive_loss": 0.4614, + "epoch": 2.8284424379232505, + "grad_norm": 14.069493293762207, + "learning_rate": 4.1301713381002394e-06, + "lm_loss": 5.515, + "loss": 1.4152, + "step": 1253, + "text_contrastive_loss": 0.8045 + }, + { + "contrastive_loss": 0.5484, + "epoch": 2.8306997742663658, + "grad_norm": 14.741721153259277, + "learning_rate": 4.123027478718318e-06, + "lm_loss": 5.5015, + "loss": 1.457, + "step": 1254, + "text_contrastive_loss": 0.7169 + }, + { + "contrastive_loss": 0.5105, + "epoch": 2.832957110609481, + "grad_norm": 15.50547981262207, + "learning_rate": 4.115885465923734e-06, + "lm_loss": 5.5024, + "loss": 1.4671, + "step": 1255, + "text_contrastive_loss": 0.8127 + }, + { + "contrastive_loss": 0.5651, + "epoch": 2.835214446952596, + "grad_norm": 15.123098373413086, + "learning_rate": 4.108745314754989e-06, + "lm_loss": 5.4771, + "loss": 1.4908, + "step": 1256, + "text_contrastive_loss": 0.7559 + }, + { + "contrastive_loss": 0.5935, + "epoch": 2.837471783295711, + "grad_norm": 16.420448303222656, + "learning_rate": 4.101607040246659e-06, + "lm_loss": 5.4353, + "loss": 1.5466, + "step": 1257, + "text_contrastive_loss": 0.8192 + }, + { + "contrastive_loss": 0.5077, + "epoch": 2.839729119638826, + "grad_norm": 16.60091781616211, + "learning_rate": 4.094470657429374e-06, + "lm_loss": 5.444, + "loss": 1.4865, + "step": 1258, + "text_contrastive_loss": 0.8688 + }, + { + "contrastive_loss": 0.4556, + "epoch": 2.8419864559819414, + "grad_norm": 14.131499290466309, + "learning_rate": 4.087336181329777e-06, + "lm_loss": 5.5058, + "loss": 1.4254, + "step": 1259, + "text_contrastive_loss": 0.8385 + }, + { + "contrastive_loss": 0.5435, + "epoch": 2.8442437923250563, + "grad_norm": 16.15544319152832, + "learning_rate": 4.080203626970498e-06, + "lm_loss": 5.527, + "loss": 1.5237, + "step": 1260, + "text_contrastive_loss": 0.8549 + }, + { + "contrastive_loss": 0.5164, + "epoch": 2.8465011286681716, + "grad_norm": 14.349729537963867, + "learning_rate": 4.0730730093701185e-06, + "lm_loss": 5.425, + "loss": 1.4032, + "step": 1261, + "text_contrastive_loss": 0.6887 + }, + { + "contrastive_loss": 0.5041, + "epoch": 2.8487584650112865, + "grad_norm": 15.340802192687988, + "learning_rate": 4.065944343543146e-06, + "lm_loss": 5.4794, + "loss": 1.4466, + "step": 1262, + "text_contrastive_loss": 0.7892 + }, + { + "contrastive_loss": 0.5959, + "epoch": 2.851015801354402, + "grad_norm": 16.059484481811523, + "learning_rate": 4.058817644499973e-06, + "lm_loss": 5.5768, + "loss": 1.5973, + "step": 1263, + "text_contrastive_loss": 0.8875 + }, + { + "contrastive_loss": 0.6366, + "epoch": 2.853273137697517, + "grad_norm": 15.674849510192871, + "learning_rate": 4.051692927246857e-06, + "lm_loss": 5.4997, + "loss": 1.5755, + "step": 1264, + "text_contrastive_loss": 0.7779 + }, + { + "contrastive_loss": 0.4139, + "epoch": 2.855530474040632, + "grad_norm": 16.712678909301758, + "learning_rate": 4.044570206785874e-06, + "lm_loss": 5.4635, + "loss": 1.327, + "step": 1265, + "text_contrastive_loss": 0.7334 + }, + { + "contrastive_loss": 0.4657, + "epoch": 2.8577878103837473, + "grad_norm": 14.141221046447754, + "learning_rate": 4.037449498114903e-06, + "lm_loss": 5.4901, + "loss": 1.4713, + "step": 1266, + "text_contrastive_loss": 0.9131 + }, + { + "contrastive_loss": 0.5255, + "epoch": 2.860045146726862, + "grad_norm": 15.25301742553711, + "learning_rate": 4.0303308162275835e-06, + "lm_loss": 5.5251, + "loss": 1.468, + "step": 1267, + "text_contrastive_loss": 0.7801 + }, + { + "contrastive_loss": 0.4374, + "epoch": 2.8623024830699775, + "grad_norm": 13.475282669067383, + "learning_rate": 4.0232141761132894e-06, + "lm_loss": 5.4753, + "loss": 1.3752, + "step": 1268, + "text_contrastive_loss": 0.7805 + }, + { + "contrastive_loss": 0.5645, + "epoch": 2.864559819413093, + "grad_norm": 16.055419921875, + "learning_rate": 4.016099592757091e-06, + "lm_loss": 5.4632, + "loss": 1.5468, + "step": 1269, + "text_contrastive_loss": 0.872 + }, + { + "contrastive_loss": 0.5056, + "epoch": 2.8668171557562077, + "grad_norm": 15.780978202819824, + "learning_rate": 4.008987081139734e-06, + "lm_loss": 5.5773, + "loss": 1.4674, + "step": 1270, + "text_contrastive_loss": 0.8082 + }, + { + "contrastive_loss": 0.5171, + "epoch": 2.8690744920993225, + "grad_norm": 14.984731674194336, + "learning_rate": 4.0018766562375984e-06, + "lm_loss": 5.389, + "loss": 1.499, + "step": 1271, + "text_contrastive_loss": 0.8861 + }, + { + "contrastive_loss": 0.4955, + "epoch": 2.871331828442438, + "grad_norm": 14.706584930419922, + "learning_rate": 3.994768333022669e-06, + "lm_loss": 5.4877, + "loss": 1.4122, + "step": 1272, + "text_contrastive_loss": 0.7357 + }, + { + "contrastive_loss": 0.6436, + "epoch": 2.873589164785553, + "grad_norm": 15.665273666381836, + "learning_rate": 3.987662126462507e-06, + "lm_loss": 5.483, + "loss": 1.6234, + "step": 1273, + "text_contrastive_loss": 0.863 + }, + { + "contrastive_loss": 0.5189, + "epoch": 2.875846501128668, + "grad_norm": 16.631532669067383, + "learning_rate": 3.980558051520218e-06, + "lm_loss": 5.5155, + "loss": 1.4515, + "step": 1274, + "text_contrastive_loss": 0.7621 + }, + { + "contrastive_loss": 0.5533, + "epoch": 2.8781038374717833, + "grad_norm": 14.17483901977539, + "learning_rate": 3.973456123154415e-06, + "lm_loss": 5.5582, + "loss": 1.5751, + "step": 1275, + "text_contrastive_loss": 0.932 + }, + { + "contrastive_loss": 0.4405, + "epoch": 2.880361173814898, + "grad_norm": 14.921122550964355, + "learning_rate": 3.966356356319196e-06, + "lm_loss": 5.4335, + "loss": 1.3677, + "step": 1276, + "text_contrastive_loss": 0.7678 + }, + { + "contrastive_loss": 0.4333, + "epoch": 2.8826185101580135, + "grad_norm": 14.177828788757324, + "learning_rate": 3.959258765964104e-06, + "lm_loss": 5.4802, + "loss": 1.3804, + "step": 1277, + "text_contrastive_loss": 0.7981 + }, + { + "contrastive_loss": 0.5632, + "epoch": 2.884875846501129, + "grad_norm": 16.90080451965332, + "learning_rate": 3.9521633670341005e-06, + "lm_loss": 5.524, + "loss": 1.5014, + "step": 1278, + "text_contrastive_loss": 0.7715 + }, + { + "contrastive_loss": 0.538, + "epoch": 2.8871331828442437, + "grad_norm": 16.282695770263672, + "learning_rate": 3.9450701744695325e-06, + "lm_loss": 5.5656, + "loss": 1.5157, + "step": 1279, + "text_contrastive_loss": 0.8424 + }, + { + "contrastive_loss": 0.4526, + "epoch": 2.889390519187359, + "grad_norm": 15.23658275604248, + "learning_rate": 3.937979203206103e-06, + "lm_loss": 5.5136, + "loss": 1.37, + "step": 1280, + "text_contrastive_loss": 0.732 + }, + { + "contrastive_loss": 0.4487, + "epoch": 2.891647855530474, + "grad_norm": 12.806868553161621, + "learning_rate": 3.930890468174833e-06, + "lm_loss": 5.4335, + "loss": 1.4022, + "step": 1281, + "text_contrastive_loss": 0.8204 + }, + { + "contrastive_loss": 0.551, + "epoch": 2.893905191873589, + "grad_norm": 13.964062690734863, + "learning_rate": 3.92380398430204e-06, + "lm_loss": 5.4116, + "loss": 1.5054, + "step": 1282, + "text_contrastive_loss": 0.8266 + }, + { + "contrastive_loss": 0.5569, + "epoch": 2.8961625282167045, + "grad_norm": 14.256570816040039, + "learning_rate": 3.916719766509297e-06, + "lm_loss": 5.4027, + "loss": 1.4482, + "step": 1283, + "text_contrastive_loss": 0.7021 + }, + { + "contrastive_loss": 0.5465, + "epoch": 2.8984198645598194, + "grad_norm": 16.259355545043945, + "learning_rate": 3.9096378297134115e-06, + "lm_loss": 5.4163, + "loss": 1.5062, + "step": 1284, + "text_contrastive_loss": 0.8361 + }, + { + "contrastive_loss": 0.5112, + "epoch": 2.9006772009029347, + "grad_norm": 15.341547966003418, + "learning_rate": 3.90255818882638e-06, + "lm_loss": 5.512, + "loss": 1.4347, + "step": 1285, + "text_contrastive_loss": 0.7445 + }, + { + "contrastive_loss": 0.5085, + "epoch": 2.9029345372460496, + "grad_norm": 16.757909774780273, + "learning_rate": 3.89548085875537e-06, + "lm_loss": 5.3939, + "loss": 1.4695, + "step": 1286, + "text_contrastive_loss": 0.8432 + }, + { + "contrastive_loss": 0.5826, + "epoch": 2.905191873589165, + "grad_norm": 14.489827156066895, + "learning_rate": 3.888405854402684e-06, + "lm_loss": 5.5054, + "loss": 1.6021, + "step": 1287, + "text_contrastive_loss": 0.9379 + }, + { + "contrastive_loss": 0.3929, + "epoch": 2.90744920993228, + "grad_norm": 12.609889030456543, + "learning_rate": 3.881333190665723e-06, + "lm_loss": 5.4827, + "loss": 1.2927, + "step": 1288, + "text_contrastive_loss": 0.703 + }, + { + "contrastive_loss": 0.5341, + "epoch": 2.909706546275395, + "grad_norm": 15.732161521911621, + "learning_rate": 3.8742628824369624e-06, + "lm_loss": 5.4473, + "loss": 1.4551, + "step": 1289, + "text_contrastive_loss": 0.7525 + }, + { + "contrastive_loss": 0.442, + "epoch": 2.91196388261851, + "grad_norm": 14.732170104980469, + "learning_rate": 3.86719494460392e-06, + "lm_loss": 5.5936, + "loss": 1.4205, + "step": 1290, + "text_contrastive_loss": 0.8383 + }, + { + "contrastive_loss": 0.5042, + "epoch": 2.9142212189616252, + "grad_norm": 15.174171447753906, + "learning_rate": 3.8601293920491165e-06, + "lm_loss": 5.6159, + "loss": 1.4833, + "step": 1291, + "text_contrastive_loss": 0.835 + }, + { + "contrastive_loss": 0.6062, + "epoch": 2.9164785553047405, + "grad_norm": 16.383495330810547, + "learning_rate": 3.853066239650055e-06, + "lm_loss": 5.5655, + "loss": 1.6561, + "step": 1292, + "text_contrastive_loss": 0.9867 + }, + { + "contrastive_loss": 0.5467, + "epoch": 2.9187358916478554, + "grad_norm": 17.01769256591797, + "learning_rate": 3.846005502279182e-06, + "lm_loss": 5.3907, + "loss": 1.4632, + "step": 1293, + "text_contrastive_loss": 0.7548 + }, + { + "contrastive_loss": 0.5771, + "epoch": 2.9209932279909707, + "grad_norm": 16.033763885498047, + "learning_rate": 3.83894719480386e-06, + "lm_loss": 5.4395, + "loss": 1.5986, + "step": 1294, + "text_contrastive_loss": 0.9552 + }, + { + "contrastive_loss": 0.4185, + "epoch": 2.9232505643340856, + "grad_norm": 13.943612098693848, + "learning_rate": 3.8318913320863355e-06, + "lm_loss": 5.4561, + "loss": 1.3335, + "step": 1295, + "text_contrastive_loss": 0.7388 + }, + { + "contrastive_loss": 0.4371, + "epoch": 2.925507900677201, + "grad_norm": 13.424967765808105, + "learning_rate": 3.8248379289837065e-06, + "lm_loss": 5.3721, + "loss": 1.3949, + "step": 1296, + "text_contrastive_loss": 0.841 + }, + { + "contrastive_loss": 0.5854, + "epoch": 2.927765237020316, + "grad_norm": 16.367528915405273, + "learning_rate": 3.81778700034789e-06, + "lm_loss": 5.4614, + "loss": 1.542, + "step": 1297, + "text_contrastive_loss": 0.821 + }, + { + "contrastive_loss": 0.4734, + "epoch": 2.930022573363431, + "grad_norm": 14.303869247436523, + "learning_rate": 3.810738561025599e-06, + "lm_loss": 5.4918, + "loss": 1.3774, + "step": 1298, + "text_contrastive_loss": 0.7096 + }, + { + "contrastive_loss": 0.557, + "epoch": 2.9322799097065464, + "grad_norm": 17.35906982421875, + "learning_rate": 3.803692625858295e-06, + "lm_loss": 5.4766, + "loss": 1.5341, + "step": 1299, + "text_contrastive_loss": 0.8588 + }, + { + "contrastive_loss": 0.5556, + "epoch": 2.9345372460496613, + "grad_norm": 17.419885635375977, + "learning_rate": 3.7966492096821773e-06, + "lm_loss": 5.558, + "loss": 1.5786, + "step": 1300, + "text_contrastive_loss": 0.9343 + }, + { + "contrastive_loss": 0.4632, + "epoch": 2.9367945823927766, + "grad_norm": 14.226881980895996, + "learning_rate": 3.7896083273281324e-06, + "lm_loss": 5.4989, + "loss": 1.3574, + "step": 1301, + "text_contrastive_loss": 0.6886 + }, + { + "contrastive_loss": 0.5653, + "epoch": 2.939051918735892, + "grad_norm": 16.705991744995117, + "learning_rate": 3.7825699936217183e-06, + "lm_loss": 5.434, + "loss": 1.5657, + "step": 1302, + "text_contrastive_loss": 0.9139 + }, + { + "contrastive_loss": 0.5484, + "epoch": 2.9413092550790068, + "grad_norm": 16.482421875, + "learning_rate": 3.7755342233831188e-06, + "lm_loss": 5.4384, + "loss": 1.5302, + "step": 1303, + "text_contrastive_loss": 0.876 + }, + { + "contrastive_loss": 0.4876, + "epoch": 2.9435665914221216, + "grad_norm": 14.822606086730957, + "learning_rate": 3.7685010314271287e-06, + "lm_loss": 5.5107, + "loss": 1.5152, + "step": 1304, + "text_contrastive_loss": 0.953 + }, + { + "contrastive_loss": 0.5419, + "epoch": 2.945823927765237, + "grad_norm": 14.894735336303711, + "learning_rate": 3.761470432563109e-06, + "lm_loss": 5.6487, + "loss": 1.4286, + "step": 1305, + "text_contrastive_loss": 0.6437 + }, + { + "contrastive_loss": 0.5134, + "epoch": 2.9480812641083523, + "grad_norm": 15.99804401397705, + "learning_rate": 3.75444244159496e-06, + "lm_loss": 5.461, + "loss": 1.4507, + "step": 1306, + "text_contrastive_loss": 0.7824 + }, + { + "contrastive_loss": 0.6006, + "epoch": 2.950338600451467, + "grad_norm": 15.931785583496094, + "learning_rate": 3.747417073321092e-06, + "lm_loss": 5.6079, + "loss": 1.5664, + "step": 1307, + "text_contrastive_loss": 0.8101 + }, + { + "contrastive_loss": 0.5514, + "epoch": 2.9525959367945824, + "grad_norm": 14.610923767089844, + "learning_rate": 3.740394342534394e-06, + "lm_loss": 5.4427, + "loss": 1.524, + "step": 1308, + "text_contrastive_loss": 0.8566 + }, + { + "contrastive_loss": 0.5241, + "epoch": 2.9548532731376973, + "grad_norm": 14.238372802734375, + "learning_rate": 3.7333742640221994e-06, + "lm_loss": 5.6008, + "loss": 1.5176, + "step": 1309, + "text_contrastive_loss": 0.8669 + }, + { + "contrastive_loss": 0.6472, + "epoch": 2.9571106094808126, + "grad_norm": 16.7310791015625, + "learning_rate": 3.7263568525662574e-06, + "lm_loss": 5.5729, + "loss": 1.6776, + "step": 1310, + "text_contrastive_loss": 0.9462 + }, + { + "contrastive_loss": 0.5512, + "epoch": 2.959367945823928, + "grad_norm": 15.256760597229004, + "learning_rate": 3.7193421229427017e-06, + "lm_loss": 5.418, + "loss": 1.4937, + "step": 1311, + "text_contrastive_loss": 0.8014 + }, + { + "contrastive_loss": 0.5147, + "epoch": 2.961625282167043, + "grad_norm": 15.781255722045898, + "learning_rate": 3.7123300899220193e-06, + "lm_loss": 5.4881, + "loss": 1.3988, + "step": 1312, + "text_contrastive_loss": 0.6707 + }, + { + "contrastive_loss": 0.4753, + "epoch": 2.963882618510158, + "grad_norm": 14.34984016418457, + "learning_rate": 3.7053207682690184e-06, + "lm_loss": 5.5296, + "loss": 1.3822, + "step": 1313, + "text_contrastive_loss": 0.7078 + }, + { + "contrastive_loss": 0.5374, + "epoch": 2.966139954853273, + "grad_norm": 15.308257102966309, + "learning_rate": 3.698314172742799e-06, + "lm_loss": 5.3984, + "loss": 1.5562, + "step": 1314, + "text_contrastive_loss": 0.958 + }, + { + "contrastive_loss": 0.6667, + "epoch": 2.9683972911963883, + "grad_norm": 18.594757080078125, + "learning_rate": 3.691310318096719e-06, + "lm_loss": 5.5117, + "loss": 1.669, + "step": 1315, + "text_contrastive_loss": 0.9024 + }, + { + "contrastive_loss": 0.4934, + "epoch": 2.9706546275395036, + "grad_norm": 14.322915077209473, + "learning_rate": 3.684309219078368e-06, + "lm_loss": 5.5156, + "loss": 1.3692, + "step": 1316, + "text_contrastive_loss": 0.6484 + }, + { + "contrastive_loss": 0.4379, + "epoch": 2.9729119638826185, + "grad_norm": 14.292099952697754, + "learning_rate": 3.6773108904295294e-06, + "lm_loss": 5.4977, + "loss": 1.3835, + "step": 1317, + "text_contrastive_loss": 0.7918 + }, + { + "contrastive_loss": 0.6038, + "epoch": 2.975169300225734, + "grad_norm": 15.286895751953125, + "learning_rate": 3.6703153468861585e-06, + "lm_loss": 5.4807, + "loss": 1.5857, + "step": 1318, + "text_contrastive_loss": 0.8677 + }, + { + "contrastive_loss": 0.5631, + "epoch": 2.9774266365688487, + "grad_norm": 15.211936950683594, + "learning_rate": 3.663322603178339e-06, + "lm_loss": 5.5773, + "loss": 1.593, + "step": 1319, + "text_contrastive_loss": 0.9444 + }, + { + "contrastive_loss": 0.5717, + "epoch": 2.979683972911964, + "grad_norm": 16.08132553100586, + "learning_rate": 3.6563326740302664e-06, + "lm_loss": 5.4526, + "loss": 1.5835, + "step": 1320, + "text_contrastive_loss": 0.9329 + }, + { + "contrastive_loss": 0.5526, + "epoch": 2.9819413092550793, + "grad_norm": 14.952582359313965, + "learning_rate": 3.6493455741602035e-06, + "lm_loss": 5.5379, + "loss": 1.4936, + "step": 1321, + "text_contrastive_loss": 0.7743 + }, + { + "contrastive_loss": 0.4175, + "epoch": 2.984198645598194, + "grad_norm": 15.59471321105957, + "learning_rate": 3.642361318280461e-06, + "lm_loss": 5.4184, + "loss": 1.2694, + "step": 1322, + "text_contrastive_loss": 0.6201 + }, + { + "contrastive_loss": 0.4906, + "epoch": 2.986455981941309, + "grad_norm": 14.077731132507324, + "learning_rate": 3.635379921097359e-06, + "lm_loss": 5.5053, + "loss": 1.4442, + "step": 1323, + "text_contrastive_loss": 0.806 + }, + { + "contrastive_loss": 0.536, + "epoch": 2.9887133182844243, + "grad_norm": 15.875018119812012, + "learning_rate": 3.6284013973111962e-06, + "lm_loss": 5.5958, + "loss": 1.4832, + "step": 1324, + "text_contrastive_loss": 0.7754 + }, + { + "contrastive_loss": 0.5619, + "epoch": 2.9909706546275396, + "grad_norm": 16.182844161987305, + "learning_rate": 3.621425761616224e-06, + "lm_loss": 5.4129, + "loss": 1.579, + "step": 1325, + "text_contrastive_loss": 0.9516 + }, + { + "contrastive_loss": 0.5516, + "epoch": 2.9932279909706545, + "grad_norm": 16.243816375732422, + "learning_rate": 3.614453028700613e-06, + "lm_loss": 5.4592, + "loss": 1.4853, + "step": 1326, + "text_contrastive_loss": 0.7756 + }, + { + "contrastive_loss": 0.6289, + "epoch": 2.99548532731377, + "grad_norm": 17.56565284729004, + "learning_rate": 3.6074832132464165e-06, + "lm_loss": 5.5505, + "loss": 1.5472, + "step": 1327, + "text_contrastive_loss": 0.7264 + }, + { + "contrastive_loss": 0.5773, + "epoch": 2.9977426636568847, + "grad_norm": 18.263599395751953, + "learning_rate": 3.600516329929551e-06, + "lm_loss": 5.678, + "loss": 1.5534, + "step": 1328, + "text_contrastive_loss": 0.8166 + }, + { + "contrastive_loss": 0.3188, + "epoch": 3.0, + "grad_norm": 14.68419361114502, + "learning_rate": 3.5935523934197537e-06, + "lm_loss": 5.4675, + "loss": 1.0959, + "step": 1329, + "text_contrastive_loss": 0.4607 + }, + { + "contrastive_loss": 0.5175, + "epoch": 3.0022573363431153, + "grad_norm": 14.676491737365723, + "learning_rate": 3.5865914183805606e-06, + "lm_loss": 5.485, + "loss": 1.5562, + "step": 1330, + "text_contrastive_loss": 0.9802 + }, + { + "contrastive_loss": 0.4993, + "epoch": 3.00451467268623, + "grad_norm": 13.769871711730957, + "learning_rate": 3.5796334194692704e-06, + "lm_loss": 5.5265, + "loss": 1.4622, + "step": 1331, + "text_contrastive_loss": 0.8205 + }, + { + "contrastive_loss": 0.4971, + "epoch": 3.0067720090293455, + "grad_norm": 13.985860824584961, + "learning_rate": 3.572678411336916e-06, + "lm_loss": 5.4778, + "loss": 1.3986, + "step": 1332, + "text_contrastive_loss": 0.7073 + }, + { + "contrastive_loss": 0.5364, + "epoch": 3.0090293453724604, + "grad_norm": 14.060493469238281, + "learning_rate": 3.5657264086282317e-06, + "lm_loss": 5.4879, + "loss": 1.4643, + "step": 1333, + "text_contrastive_loss": 0.7582 + }, + { + "contrastive_loss": 0.4358, + "epoch": 3.0112866817155757, + "grad_norm": 14.356574058532715, + "learning_rate": 3.5587774259816234e-06, + "lm_loss": 5.5635, + "loss": 1.3742, + "step": 1334, + "text_contrastive_loss": 0.7641 + }, + { + "contrastive_loss": 0.5886, + "epoch": 3.0135440180586905, + "grad_norm": 13.934671401977539, + "learning_rate": 3.5518314780291384e-06, + "lm_loss": 5.5427, + "loss": 1.5663, + "step": 1335, + "text_contrastive_loss": 0.8469 + }, + { + "contrastive_loss": 0.4827, + "epoch": 3.015801354401806, + "grad_norm": 15.489354133605957, + "learning_rate": 3.544888579396435e-06, + "lm_loss": 5.4781, + "loss": 1.5782, + "step": 1336, + "text_contrastive_loss": 1.0954 + }, + { + "contrastive_loss": 0.4895, + "epoch": 3.018058690744921, + "grad_norm": 14.8477201461792, + "learning_rate": 3.5379487447027483e-06, + "lm_loss": 5.4953, + "loss": 1.3889, + "step": 1337, + "text_contrastive_loss": 0.6998 + }, + { + "contrastive_loss": 0.39, + "epoch": 3.020316027088036, + "grad_norm": 12.811365127563477, + "learning_rate": 3.5310119885608625e-06, + "lm_loss": 5.5246, + "loss": 1.25, + "step": 1338, + "text_contrastive_loss": 0.615 + }, + { + "contrastive_loss": 0.4704, + "epoch": 3.0225733634311513, + "grad_norm": 15.107719421386719, + "learning_rate": 3.524078325577084e-06, + "lm_loss": 5.5308, + "loss": 1.3804, + "step": 1339, + "text_contrastive_loss": 0.7139 + }, + { + "contrastive_loss": 0.5516, + "epoch": 3.024830699774266, + "grad_norm": 15.045534133911133, + "learning_rate": 3.517147770351199e-06, + "lm_loss": 5.4304, + "loss": 1.495, + "step": 1340, + "text_contrastive_loss": 0.8007 + }, + { + "contrastive_loss": 0.5238, + "epoch": 3.0270880361173815, + "grad_norm": 14.877632141113281, + "learning_rate": 3.5102203374764555e-06, + "lm_loss": 5.4377, + "loss": 1.4971, + "step": 1341, + "text_contrastive_loss": 0.859 + }, + { + "contrastive_loss": 0.5307, + "epoch": 3.0293453724604964, + "grad_norm": 14.242051124572754, + "learning_rate": 3.503296041539522e-06, + "lm_loss": 5.6175, + "loss": 1.471, + "step": 1342, + "text_contrastive_loss": 0.7571 + }, + { + "contrastive_loss": 0.4857, + "epoch": 3.0316027088036117, + "grad_norm": 14.8358736038208, + "learning_rate": 3.496374897120467e-06, + "lm_loss": 5.4324, + "loss": 1.4134, + "step": 1343, + "text_contrastive_loss": 0.769 + }, + { + "contrastive_loss": 0.4495, + "epoch": 3.033860045146727, + "grad_norm": 12.73051929473877, + "learning_rate": 3.4894569187927204e-06, + "lm_loss": 5.4313, + "loss": 1.2726, + "step": 1344, + "text_contrastive_loss": 0.56 + }, + { + "contrastive_loss": 0.4155, + "epoch": 3.036117381489842, + "grad_norm": 13.076519012451172, + "learning_rate": 3.4825421211230437e-06, + "lm_loss": 5.4435, + "loss": 1.3203, + "step": 1345, + "text_contrastive_loss": 0.7209 + }, + { + "contrastive_loss": 0.4479, + "epoch": 3.038374717832957, + "grad_norm": 13.648487091064453, + "learning_rate": 3.4756305186715046e-06, + "lm_loss": 5.4931, + "loss": 1.361, + "step": 1346, + "text_contrastive_loss": 0.7275 + }, + { + "contrastive_loss": 0.5394, + "epoch": 3.040632054176072, + "grad_norm": 14.72055721282959, + "learning_rate": 3.4687221259914394e-06, + "lm_loss": 5.3795, + "loss": 1.4644, + "step": 1347, + "text_contrastive_loss": 0.7739 + }, + { + "contrastive_loss": 0.4843, + "epoch": 3.0428893905191874, + "grad_norm": 15.072990417480469, + "learning_rate": 3.461816957629429e-06, + "lm_loss": 5.4639, + "loss": 1.4175, + "step": 1348, + "text_contrastive_loss": 0.7737 + }, + { + "contrastive_loss": 0.4437, + "epoch": 3.0451467268623027, + "grad_norm": 14.582855224609375, + "learning_rate": 3.4549150281252635e-06, + "lm_loss": 5.4765, + "loss": 1.3774, + "step": 1349, + "text_contrastive_loss": 0.7721 + }, + { + "contrastive_loss": 0.5707, + "epoch": 3.0474040632054176, + "grad_norm": 16.179119110107422, + "learning_rate": 3.448016352011914e-06, + "lm_loss": 5.5304, + "loss": 1.5403, + "step": 1350, + "text_contrastive_loss": 0.833 + }, + { + "contrastive_loss": 0.475, + "epoch": 3.049661399548533, + "grad_norm": 15.216938018798828, + "learning_rate": 3.441120943815497e-06, + "lm_loss": 5.4244, + "loss": 1.4228, + "step": 1351, + "text_contrastive_loss": 0.8107 + }, + { + "contrastive_loss": 0.5152, + "epoch": 3.0519187358916477, + "grad_norm": 16.394363403320312, + "learning_rate": 3.4342288180552556e-06, + "lm_loss": 5.4509, + "loss": 1.4574, + "step": 1352, + "text_contrastive_loss": 0.7942 + }, + { + "contrastive_loss": 0.4183, + "epoch": 3.054176072234763, + "grad_norm": 14.682599067687988, + "learning_rate": 3.427339989243514e-06, + "lm_loss": 5.4245, + "loss": 1.3967, + "step": 1353, + "text_contrastive_loss": 0.8719 + }, + { + "contrastive_loss": 0.3864, + "epoch": 3.056433408577878, + "grad_norm": 12.30835247039795, + "learning_rate": 3.420454471885659e-06, + "lm_loss": 5.4758, + "loss": 1.3097, + "step": 1354, + "text_contrastive_loss": 0.7515 + }, + { + "contrastive_loss": 0.5665, + "epoch": 3.0586907449209932, + "grad_norm": 16.540470123291016, + "learning_rate": 3.4135722804801004e-06, + "lm_loss": 5.4518, + "loss": 1.5741, + "step": 1355, + "text_contrastive_loss": 0.9249 + }, + { + "contrastive_loss": 0.5511, + "epoch": 3.0609480812641086, + "grad_norm": 14.620187759399414, + "learning_rate": 3.4066934295182496e-06, + "lm_loss": 5.4517, + "loss": 1.4554, + "step": 1356, + "text_contrastive_loss": 0.7182 + }, + { + "contrastive_loss": 0.3712, + "epoch": 3.0632054176072234, + "grad_norm": 12.636406898498535, + "learning_rate": 3.3998179334844823e-06, + "lm_loss": 5.4696, + "loss": 1.2953, + "step": 1357, + "text_contrastive_loss": 0.7543 + }, + { + "contrastive_loss": 0.5408, + "epoch": 3.0654627539503387, + "grad_norm": 18.53982925415039, + "learning_rate": 3.3929458068561073e-06, + "lm_loss": 5.4789, + "loss": 1.5216, + "step": 1358, + "text_contrastive_loss": 0.8658 + }, + { + "contrastive_loss": 0.4477, + "epoch": 3.0677200902934536, + "grad_norm": 15.201266288757324, + "learning_rate": 3.3860770641033417e-06, + "lm_loss": 5.4602, + "loss": 1.3675, + "step": 1359, + "text_contrastive_loss": 0.7477 + }, + { + "contrastive_loss": 0.5169, + "epoch": 3.069977426636569, + "grad_norm": 15.906881332397461, + "learning_rate": 3.379211719689278e-06, + "lm_loss": 5.4948, + "loss": 1.492, + "step": 1360, + "text_contrastive_loss": 0.8512 + }, + { + "contrastive_loss": 0.5008, + "epoch": 3.072234762979684, + "grad_norm": 14.120617866516113, + "learning_rate": 3.37234978806985e-06, + "lm_loss": 5.4651, + "loss": 1.4897, + "step": 1361, + "text_contrastive_loss": 0.8846 + }, + { + "contrastive_loss": 0.4614, + "epoch": 3.074492099322799, + "grad_norm": 14.68795394897461, + "learning_rate": 3.365491283693807e-06, + "lm_loss": 5.4428, + "loss": 1.3755, + "step": 1362, + "text_contrastive_loss": 0.7397 + }, + { + "contrastive_loss": 0.5015, + "epoch": 3.0767494356659144, + "grad_norm": 14.28650188446045, + "learning_rate": 3.358636221002682e-06, + "lm_loss": 5.5232, + "loss": 1.3865, + "step": 1363, + "text_contrastive_loss": 0.6653 + }, + { + "contrastive_loss": 0.3761, + "epoch": 3.0790067720090293, + "grad_norm": 13.642817497253418, + "learning_rate": 3.351784614430761e-06, + "lm_loss": 5.6015, + "loss": 1.2892, + "step": 1364, + "text_contrastive_loss": 0.706 + }, + { + "contrastive_loss": 0.4728, + "epoch": 3.0812641083521446, + "grad_norm": 13.771718978881836, + "learning_rate": 3.3449364784050515e-06, + "lm_loss": 5.393, + "loss": 1.438, + "step": 1365, + "text_contrastive_loss": 0.8518 + }, + { + "contrastive_loss": 0.3946, + "epoch": 3.0835214446952595, + "grad_norm": 13.04595947265625, + "learning_rate": 3.3380918273452557e-06, + "lm_loss": 5.5928, + "loss": 1.3011, + "step": 1366, + "text_contrastive_loss": 0.6945 + }, + { + "contrastive_loss": 0.5262, + "epoch": 3.0857787810383748, + "grad_norm": 16.817808151245117, + "learning_rate": 3.3312506756637343e-06, + "lm_loss": 5.5432, + "loss": 1.5065, + "step": 1367, + "text_contrastive_loss": 0.8521 + }, + { + "contrastive_loss": 0.4632, + "epoch": 3.0880361173814896, + "grad_norm": 13.763033866882324, + "learning_rate": 3.324413037765483e-06, + "lm_loss": 5.4797, + "loss": 1.4042, + "step": 1368, + "text_contrastive_loss": 0.786 + }, + { + "contrastive_loss": 0.5539, + "epoch": 3.090293453724605, + "grad_norm": 18.377771377563477, + "learning_rate": 3.317578928048096e-06, + "lm_loss": 5.4347, + "loss": 1.4958, + "step": 1369, + "text_contrastive_loss": 0.7969 + }, + { + "contrastive_loss": 0.4844, + "epoch": 3.0925507900677203, + "grad_norm": 15.015218734741211, + "learning_rate": 3.310748360901741e-06, + "lm_loss": 5.4362, + "loss": 1.4578, + "step": 1370, + "text_contrastive_loss": 0.8594 + }, + { + "contrastive_loss": 0.3774, + "epoch": 3.094808126410835, + "grad_norm": 12.887053489685059, + "learning_rate": 3.303921350709124e-06, + "lm_loss": 5.4698, + "loss": 1.2701, + "step": 1371, + "text_contrastive_loss": 0.6914 + }, + { + "contrastive_loss": 0.4761, + "epoch": 3.0970654627539504, + "grad_norm": 15.221031188964844, + "learning_rate": 3.2970979118454616e-06, + "lm_loss": 5.3706, + "loss": 1.3838, + "step": 1372, + "text_contrastive_loss": 0.7412 + }, + { + "contrastive_loss": 0.4335, + "epoch": 3.0993227990970653, + "grad_norm": 15.106535911560059, + "learning_rate": 3.2902780586784542e-06, + "lm_loss": 5.6387, + "loss": 1.4091, + "step": 1373, + "text_contrastive_loss": 0.8233 + }, + { + "contrastive_loss": 0.6104, + "epoch": 3.1015801354401806, + "grad_norm": 15.874143600463867, + "learning_rate": 3.283461805568246e-06, + "lm_loss": 5.6024, + "loss": 1.6476, + "step": 1374, + "text_contrastive_loss": 0.954 + }, + { + "contrastive_loss": 0.5049, + "epoch": 3.1038374717832955, + "grad_norm": 13.519198417663574, + "learning_rate": 3.276649166867406e-06, + "lm_loss": 5.4215, + "loss": 1.4626, + "step": 1375, + "text_contrastive_loss": 0.831 + }, + { + "contrastive_loss": 0.4709, + "epoch": 3.106094808126411, + "grad_norm": 13.302258491516113, + "learning_rate": 3.2698401569208883e-06, + "lm_loss": 5.3711, + "loss": 1.3215, + "step": 1376, + "text_contrastive_loss": 0.6268 + }, + { + "contrastive_loss": 0.4418, + "epoch": 3.108352144469526, + "grad_norm": 14.467670440673828, + "learning_rate": 3.2630347900660094e-06, + "lm_loss": 5.6234, + "loss": 1.3432, + "step": 1377, + "text_contrastive_loss": 0.678 + }, + { + "contrastive_loss": 0.4821, + "epoch": 3.110609480812641, + "grad_norm": 15.450533866882324, + "learning_rate": 3.256233080632414e-06, + "lm_loss": 5.5193, + "loss": 1.3991, + "step": 1378, + "text_contrastive_loss": 0.7302 + }, + { + "contrastive_loss": 0.4553, + "epoch": 3.1128668171557563, + "grad_norm": 13.368943214416504, + "learning_rate": 3.249435042942043e-06, + "lm_loss": 5.4157, + "loss": 1.3467, + "step": 1379, + "text_contrastive_loss": 0.6997 + }, + { + "contrastive_loss": 0.516, + "epoch": 3.115124153498871, + "grad_norm": 14.798224449157715, + "learning_rate": 3.242640691309111e-06, + "lm_loss": 5.5178, + "loss": 1.4446, + "step": 1380, + "text_contrastive_loss": 0.7537 + }, + { + "contrastive_loss": 0.5371, + "epoch": 3.1173814898419865, + "grad_norm": 15.642748832702637, + "learning_rate": 3.235850040040066e-06, + "lm_loss": 5.5772, + "loss": 1.4784, + "step": 1381, + "text_contrastive_loss": 0.7672 + }, + { + "contrastive_loss": 0.505, + "epoch": 3.119638826185102, + "grad_norm": 15.201095581054688, + "learning_rate": 3.2290631034335684e-06, + "lm_loss": 5.4741, + "loss": 1.5187, + "step": 1382, + "text_contrastive_loss": 0.9325 + }, + { + "contrastive_loss": 0.5862, + "epoch": 3.1218961625282167, + "grad_norm": 17.422672271728516, + "learning_rate": 3.2222798957804524e-06, + "lm_loss": 5.4733, + "loss": 1.5078, + "step": 1383, + "text_contrastive_loss": 0.7486 + }, + { + "contrastive_loss": 0.4441, + "epoch": 3.124153498871332, + "grad_norm": 13.599923133850098, + "learning_rate": 3.215500431363706e-06, + "lm_loss": 5.4271, + "loss": 1.3811, + "step": 1384, + "text_contrastive_loss": 0.7885 + }, + { + "contrastive_loss": 0.5033, + "epoch": 3.126410835214447, + "grad_norm": 14.964048385620117, + "learning_rate": 3.20872472445843e-06, + "lm_loss": 5.4844, + "loss": 1.4989, + "step": 1385, + "text_contrastive_loss": 0.8943 + }, + { + "contrastive_loss": 0.5057, + "epoch": 3.128668171557562, + "grad_norm": 15.891070365905762, + "learning_rate": 3.2019527893318177e-06, + "lm_loss": 5.3869, + "loss": 1.4726, + "step": 1386, + "text_contrastive_loss": 0.8565 + }, + { + "contrastive_loss": 0.4319, + "epoch": 3.130925507900677, + "grad_norm": 13.052963256835938, + "learning_rate": 3.195184640243115e-06, + "lm_loss": 5.4589, + "loss": 1.3472, + "step": 1387, + "text_contrastive_loss": 0.7387 + }, + { + "contrastive_loss": 0.544, + "epoch": 3.1331828442437923, + "grad_norm": 16.660829544067383, + "learning_rate": 3.1884202914436024e-06, + "lm_loss": 5.4135, + "loss": 1.455, + "step": 1388, + "text_contrastive_loss": 0.7393 + }, + { + "contrastive_loss": 0.4587, + "epoch": 3.1354401805869077, + "grad_norm": 14.902560234069824, + "learning_rate": 3.1816597571765517e-06, + "lm_loss": 5.4346, + "loss": 1.4785, + "step": 1389, + "text_contrastive_loss": 0.9526 + }, + { + "contrastive_loss": 0.4568, + "epoch": 3.1376975169300225, + "grad_norm": 13.48047924041748, + "learning_rate": 3.1749030516772084e-06, + "lm_loss": 5.4984, + "loss": 1.4147, + "step": 1390, + "text_contrastive_loss": 0.8161 + }, + { + "contrastive_loss": 0.4536, + "epoch": 3.139954853273138, + "grad_norm": 14.445048332214355, + "learning_rate": 3.168150189172754e-06, + "lm_loss": 5.5247, + "loss": 1.393, + "step": 1391, + "text_contrastive_loss": 0.7738 + }, + { + "contrastive_loss": 0.4446, + "epoch": 3.1422121896162527, + "grad_norm": 15.271405220031738, + "learning_rate": 3.1614011838822755e-06, + "lm_loss": 5.592, + "loss": 1.4035, + "step": 1392, + "text_contrastive_loss": 0.7994 + }, + { + "contrastive_loss": 0.4147, + "epoch": 3.144469525959368, + "grad_norm": 13.864374160766602, + "learning_rate": 3.154656050016742e-06, + "lm_loss": 5.4321, + "loss": 1.4075, + "step": 1393, + "text_contrastive_loss": 0.8991 + }, + { + "contrastive_loss": 0.4614, + "epoch": 3.146726862302483, + "grad_norm": 15.20340347290039, + "learning_rate": 3.1479148017789673e-06, + "lm_loss": 5.4993, + "loss": 1.3472, + "step": 1394, + "text_contrastive_loss": 0.6717 + }, + { + "contrastive_loss": 0.3928, + "epoch": 3.148984198645598, + "grad_norm": 13.848822593688965, + "learning_rate": 3.1411774533635854e-06, + "lm_loss": 5.5124, + "loss": 1.3486, + "step": 1395, + "text_contrastive_loss": 0.8091 + }, + { + "contrastive_loss": 0.3748, + "epoch": 3.1512415349887135, + "grad_norm": 12.218823432922363, + "learning_rate": 3.134444018957019e-06, + "lm_loss": 5.5252, + "loss": 1.3103, + "step": 1396, + "text_contrastive_loss": 0.7659 + }, + { + "contrastive_loss": 0.4481, + "epoch": 3.1534988713318284, + "grad_norm": 14.697312355041504, + "learning_rate": 3.1277145127374475e-06, + "lm_loss": 5.5859, + "loss": 1.4681, + "step": 1397, + "text_contrastive_loss": 0.9228 + }, + { + "contrastive_loss": 0.4735, + "epoch": 3.1557562076749437, + "grad_norm": 15.564329147338867, + "learning_rate": 3.1209889488747813e-06, + "lm_loss": 5.5729, + "loss": 1.4461, + "step": 1398, + "text_contrastive_loss": 0.8308 + }, + { + "contrastive_loss": 0.4814, + "epoch": 3.1580135440180586, + "grad_norm": 14.367502212524414, + "learning_rate": 3.114267341530627e-06, + "lm_loss": 5.4576, + "loss": 1.4155, + "step": 1399, + "text_contrastive_loss": 0.7766 + }, + { + "contrastive_loss": 0.4437, + "epoch": 3.160270880361174, + "grad_norm": 13.590703010559082, + "learning_rate": 3.1075497048582635e-06, + "lm_loss": 5.3726, + "loss": 1.3244, + "step": 1400, + "text_contrastive_loss": 0.687 + }, + { + "contrastive_loss": 0.4385, + "epoch": 3.1625282167042887, + "grad_norm": 13.9979887008667, + "learning_rate": 3.1008360530026053e-06, + "lm_loss": 5.4082, + "loss": 1.4658, + "step": 1401, + "text_contrastive_loss": 0.9728 + }, + { + "contrastive_loss": 0.437, + "epoch": 3.164785553047404, + "grad_norm": 12.361220359802246, + "learning_rate": 3.0941264001001796e-06, + "lm_loss": 5.4415, + "loss": 1.3352, + "step": 1402, + "text_contrastive_loss": 0.7081 + }, + { + "contrastive_loss": 0.4989, + "epoch": 3.1670428893905194, + "grad_norm": 14.86351203918457, + "learning_rate": 3.0874207602790895e-06, + "lm_loss": 5.435, + "loss": 1.467, + "step": 1403, + "text_contrastive_loss": 0.8492 + }, + { + "contrastive_loss": 0.5322, + "epoch": 3.1693002257336342, + "grad_norm": 14.946853637695312, + "learning_rate": 3.0807191476589926e-06, + "lm_loss": 5.4772, + "loss": 1.5568, + "step": 1404, + "text_contrastive_loss": 0.9538 + }, + { + "contrastive_loss": 0.518, + "epoch": 3.1715575620767495, + "grad_norm": 15.552018165588379, + "learning_rate": 3.0740215763510617e-06, + "lm_loss": 5.5036, + "loss": 1.455, + "step": 1405, + "text_contrastive_loss": 0.7732 + }, + { + "contrastive_loss": 0.418, + "epoch": 3.1738148984198644, + "grad_norm": 13.696687698364258, + "learning_rate": 3.0673280604579623e-06, + "lm_loss": 5.3499, + "loss": 1.36, + "step": 1406, + "text_contrastive_loss": 0.814 + }, + { + "contrastive_loss": 0.5179, + "epoch": 3.1760722347629797, + "grad_norm": 16.097196578979492, + "learning_rate": 3.0606386140738253e-06, + "lm_loss": 5.5417, + "loss": 1.4344, + "step": 1407, + "text_contrastive_loss": 0.7247 + }, + { + "contrastive_loss": 0.3777, + "epoch": 3.1783295711060946, + "grad_norm": 15.0361328125, + "learning_rate": 3.053953251284205e-06, + "lm_loss": 5.4739, + "loss": 1.2988, + "step": 1408, + "text_contrastive_loss": 0.7475 + }, + { + "contrastive_loss": 0.4927, + "epoch": 3.18058690744921, + "grad_norm": 13.873917579650879, + "learning_rate": 3.047271986166061e-06, + "lm_loss": 5.546, + "loss": 1.4229, + "step": 1409, + "text_contrastive_loss": 0.7513 + }, + { + "contrastive_loss": 0.4725, + "epoch": 3.1828442437923252, + "grad_norm": 15.101540565490723, + "learning_rate": 3.0405948327877233e-06, + "lm_loss": 5.4231, + "loss": 1.477, + "step": 1410, + "text_contrastive_loss": 0.9243 + }, + { + "contrastive_loss": 0.4674, + "epoch": 3.18510158013544, + "grad_norm": 13.89570140838623, + "learning_rate": 3.033921805208867e-06, + "lm_loss": 5.4773, + "loss": 1.4004, + "step": 1411, + "text_contrastive_loss": 0.7706 + }, + { + "contrastive_loss": 0.4747, + "epoch": 3.1873589164785554, + "grad_norm": 14.832307815551758, + "learning_rate": 3.027252917480476e-06, + "lm_loss": 5.5482, + "loss": 1.384, + "step": 1412, + "text_contrastive_loss": 0.7089 + }, + { + "contrastive_loss": 0.5005, + "epoch": 3.1896162528216703, + "grad_norm": 14.91125202178955, + "learning_rate": 3.0205881836448186e-06, + "lm_loss": 5.6158, + "loss": 1.413, + "step": 1413, + "text_contrastive_loss": 0.7018 + }, + { + "contrastive_loss": 0.4625, + "epoch": 3.1918735891647856, + "grad_norm": 13.466875076293945, + "learning_rate": 3.0139276177354188e-06, + "lm_loss": 5.4973, + "loss": 1.4075, + "step": 1414, + "text_contrastive_loss": 0.7905 + }, + { + "contrastive_loss": 0.4334, + "epoch": 3.194130925507901, + "grad_norm": 12.072528839111328, + "learning_rate": 3.00727123377702e-06, + "lm_loss": 5.5764, + "loss": 1.3598, + "step": 1415, + "text_contrastive_loss": 0.7375 + }, + { + "contrastive_loss": 0.4601, + "epoch": 3.1963882618510158, + "grad_norm": 14.04871654510498, + "learning_rate": 3.0006190457855643e-06, + "lm_loss": 5.5244, + "loss": 1.4103, + "step": 1416, + "text_contrastive_loss": 0.7956 + }, + { + "contrastive_loss": 0.4577, + "epoch": 3.198645598194131, + "grad_norm": 16.075223922729492, + "learning_rate": 2.9939710677681545e-06, + "lm_loss": 5.4323, + "loss": 1.3909, + "step": 1417, + "text_contrastive_loss": 0.7801 + }, + { + "contrastive_loss": 0.433, + "epoch": 3.200902934537246, + "grad_norm": 14.013437271118164, + "learning_rate": 2.987327313723033e-06, + "lm_loss": 5.4048, + "loss": 1.3656, + "step": 1418, + "text_contrastive_loss": 0.7842 + }, + { + "contrastive_loss": 0.4274, + "epoch": 3.2031602708803613, + "grad_norm": 12.726801872253418, + "learning_rate": 2.980687797639543e-06, + "lm_loss": 5.4843, + "loss": 1.3715, + "step": 1419, + "text_contrastive_loss": 0.7914 + }, + { + "contrastive_loss": 0.4272, + "epoch": 3.205417607223476, + "grad_norm": 15.397507667541504, + "learning_rate": 2.9740525334981105e-06, + "lm_loss": 5.5576, + "loss": 1.3759, + "step": 1420, + "text_contrastive_loss": 0.7858 + }, + { + "contrastive_loss": 0.4926, + "epoch": 3.2076749435665914, + "grad_norm": 14.691521644592285, + "learning_rate": 2.967421535270203e-06, + "lm_loss": 5.556, + "loss": 1.4491, + "step": 1421, + "text_contrastive_loss": 0.8018 + }, + { + "contrastive_loss": 0.5022, + "epoch": 3.2099322799097068, + "grad_norm": 16.048423767089844, + "learning_rate": 2.9607948169183077e-06, + "lm_loss": 5.4641, + "loss": 1.4238, + "step": 1422, + "text_contrastive_loss": 0.7504 + }, + { + "contrastive_loss": 0.4289, + "epoch": 3.2121896162528216, + "grad_norm": 13.062213897705078, + "learning_rate": 2.9541723923958975e-06, + "lm_loss": 5.4684, + "loss": 1.354, + "step": 1423, + "text_contrastive_loss": 0.7564 + }, + { + "contrastive_loss": 0.5075, + "epoch": 3.214446952595937, + "grad_norm": 15.797381401062012, + "learning_rate": 2.94755427564741e-06, + "lm_loss": 5.6064, + "loss": 1.515, + "step": 1424, + "text_contrastive_loss": 0.8937 + }, + { + "contrastive_loss": 0.491, + "epoch": 3.216704288939052, + "grad_norm": 16.2073917388916, + "learning_rate": 2.9409404806082077e-06, + "lm_loss": 5.3949, + "loss": 1.4089, + "step": 1425, + "text_contrastive_loss": 0.7569 + }, + { + "contrastive_loss": 0.4289, + "epoch": 3.218961625282167, + "grad_norm": 14.816788673400879, + "learning_rate": 2.934331021204551e-06, + "lm_loss": 5.4471, + "loss": 1.3935, + "step": 1426, + "text_contrastive_loss": 0.8399 + }, + { + "contrastive_loss": 0.4099, + "epoch": 3.221218961625282, + "grad_norm": 13.100132942199707, + "learning_rate": 2.9277259113535774e-06, + "lm_loss": 5.5277, + "loss": 1.2779, + "step": 1427, + "text_contrastive_loss": 0.6304 + }, + { + "contrastive_loss": 0.4655, + "epoch": 3.2234762979683973, + "grad_norm": 13.787766456604004, + "learning_rate": 2.9211251649632587e-06, + "lm_loss": 5.4817, + "loss": 1.4278, + "step": 1428, + "text_contrastive_loss": 0.8283 + }, + { + "contrastive_loss": 0.4856, + "epoch": 3.2257336343115126, + "grad_norm": 14.164327621459961, + "learning_rate": 2.9145287959323852e-06, + "lm_loss": 5.4339, + "loss": 1.3871, + "step": 1429, + "text_contrastive_loss": 0.7163 + }, + { + "contrastive_loss": 0.4384, + "epoch": 3.2279909706546275, + "grad_norm": 14.701132774353027, + "learning_rate": 2.9079368181505263e-06, + "lm_loss": 5.4552, + "loss": 1.3807, + "step": 1430, + "text_contrastive_loss": 0.7937 + }, + { + "contrastive_loss": 0.4371, + "epoch": 3.230248306997743, + "grad_norm": 13.563252449035645, + "learning_rate": 2.9013492454980074e-06, + "lm_loss": 5.4159, + "loss": 1.3341, + "step": 1431, + "text_contrastive_loss": 0.7107 + }, + { + "contrastive_loss": 0.501, + "epoch": 3.2325056433408577, + "grad_norm": 14.329377174377441, + "learning_rate": 2.894766091845873e-06, + "lm_loss": 5.51, + "loss": 1.4502, + "step": 1432, + "text_contrastive_loss": 0.7963 + }, + { + "contrastive_loss": 0.5936, + "epoch": 3.234762979683973, + "grad_norm": 17.089048385620117, + "learning_rate": 2.88818737105587e-06, + "lm_loss": 5.5394, + "loss": 1.6195, + "step": 1433, + "text_contrastive_loss": 0.9439 + }, + { + "contrastive_loss": 0.4673, + "epoch": 3.237020316027088, + "grad_norm": 13.321817398071289, + "learning_rate": 2.881613096980407e-06, + "lm_loss": 5.4206, + "loss": 1.4222, + "step": 1434, + "text_contrastive_loss": 0.8256 + }, + { + "contrastive_loss": 0.6084, + "epoch": 3.239277652370203, + "grad_norm": 16.38990020751953, + "learning_rate": 2.8750432834625312e-06, + "lm_loss": 5.4195, + "loss": 1.6313, + "step": 1435, + "text_contrastive_loss": 0.962 + }, + { + "contrastive_loss": 0.4847, + "epoch": 3.2415349887133185, + "grad_norm": 13.881186485290527, + "learning_rate": 2.8684779443358945e-06, + "lm_loss": 5.5713, + "loss": 1.4127, + "step": 1436, + "text_contrastive_loss": 0.7417 + }, + { + "contrastive_loss": 0.48, + "epoch": 3.2437923250564333, + "grad_norm": 14.018843650817871, + "learning_rate": 2.861917093424731e-06, + "lm_loss": 5.46, + "loss": 1.5133, + "step": 1437, + "text_contrastive_loss": 0.9746 + }, + { + "contrastive_loss": 0.4335, + "epoch": 3.2460496613995486, + "grad_norm": 16.42011260986328, + "learning_rate": 2.855360744543822e-06, + "lm_loss": 5.4131, + "loss": 1.3443, + "step": 1438, + "text_contrastive_loss": 0.7389 + }, + { + "contrastive_loss": 0.5224, + "epoch": 3.2483069977426635, + "grad_norm": 14.962552070617676, + "learning_rate": 2.8488089114984725e-06, + "lm_loss": 5.4925, + "loss": 1.4171, + "step": 1439, + "text_contrastive_loss": 0.691 + }, + { + "contrastive_loss": 0.4703, + "epoch": 3.250564334085779, + "grad_norm": 14.868867874145508, + "learning_rate": 2.84226160808447e-06, + "lm_loss": 5.5117, + "loss": 1.4661, + "step": 1440, + "text_contrastive_loss": 0.8893 + }, + { + "contrastive_loss": 0.4934, + "epoch": 3.2528216704288937, + "grad_norm": 14.345926284790039, + "learning_rate": 2.835718848088076e-06, + "lm_loss": 5.4378, + "loss": 1.3861, + "step": 1441, + "text_contrastive_loss": 0.6978 + }, + { + "contrastive_loss": 0.434, + "epoch": 3.255079006772009, + "grad_norm": 14.553913116455078, + "learning_rate": 2.8291806452859803e-06, + "lm_loss": 5.5351, + "loss": 1.387, + "step": 1442, + "text_contrastive_loss": 0.799 + }, + { + "contrastive_loss": 0.4189, + "epoch": 3.2573363431151243, + "grad_norm": 12.91666030883789, + "learning_rate": 2.822647013445272e-06, + "lm_loss": 5.4574, + "loss": 1.3539, + "step": 1443, + "text_contrastive_loss": 0.7785 + }, + { + "contrastive_loss": 0.4741, + "epoch": 3.259593679458239, + "grad_norm": 13.636680603027344, + "learning_rate": 2.8161179663234215e-06, + "lm_loss": 5.3573, + "loss": 1.4103, + "step": 1444, + "text_contrastive_loss": 0.8009 + }, + { + "contrastive_loss": 0.479, + "epoch": 3.2618510158013545, + "grad_norm": 14.111836433410645, + "learning_rate": 2.809593517668243e-06, + "lm_loss": 5.4982, + "loss": 1.4668, + "step": 1445, + "text_contrastive_loss": 0.8761 + }, + { + "contrastive_loss": 0.544, + "epoch": 3.2641083521444694, + "grad_norm": 15.635238647460938, + "learning_rate": 2.8030736812178717e-06, + "lm_loss": 5.4932, + "loss": 1.5298, + "step": 1446, + "text_contrastive_loss": 0.873 + }, + { + "contrastive_loss": 0.4661, + "epoch": 3.2663656884875847, + "grad_norm": 15.782267570495605, + "learning_rate": 2.796558470700723e-06, + "lm_loss": 5.4626, + "loss": 1.4277, + "step": 1447, + "text_contrastive_loss": 0.8306 + }, + { + "contrastive_loss": 0.4545, + "epoch": 3.2686230248307, + "grad_norm": 12.718178749084473, + "learning_rate": 2.790047899835479e-06, + "lm_loss": 5.3625, + "loss": 1.4277, + "step": 1448, + "text_contrastive_loss": 0.8739 + }, + { + "contrastive_loss": 0.5246, + "epoch": 3.270880361173815, + "grad_norm": 15.982274055480957, + "learning_rate": 2.7835419823310507e-06, + "lm_loss": 5.5273, + "loss": 1.5151, + "step": 1449, + "text_contrastive_loss": 0.8756 + }, + { + "contrastive_loss": 0.454, + "epoch": 3.27313769751693, + "grad_norm": 16.382341384887695, + "learning_rate": 2.777040731886549e-06, + "lm_loss": 5.4664, + "loss": 1.377, + "step": 1450, + "text_contrastive_loss": 0.7526 + }, + { + "contrastive_loss": 0.4552, + "epoch": 3.275395033860045, + "grad_norm": 15.161858558654785, + "learning_rate": 2.770544162191261e-06, + "lm_loss": 5.4801, + "loss": 1.3577, + "step": 1451, + "text_contrastive_loss": 0.7091 + }, + { + "contrastive_loss": 0.4925, + "epoch": 3.2776523702031604, + "grad_norm": 14.238808631896973, + "learning_rate": 2.7640522869246134e-06, + "lm_loss": 5.4234, + "loss": 1.4124, + "step": 1452, + "text_contrastive_loss": 0.7551 + }, + { + "contrastive_loss": 0.4706, + "epoch": 3.2799097065462752, + "grad_norm": 15.547906875610352, + "learning_rate": 2.7575651197561504e-06, + "lm_loss": 5.5024, + "loss": 1.4493, + "step": 1453, + "text_contrastive_loss": 0.8567 + }, + { + "contrastive_loss": 0.4395, + "epoch": 3.2821670428893905, + "grad_norm": 14.469873428344727, + "learning_rate": 2.7510826743455037e-06, + "lm_loss": 5.4523, + "loss": 1.3504, + "step": 1454, + "text_contrastive_loss": 0.7314 + }, + { + "contrastive_loss": 0.5703, + "epoch": 3.2844243792325054, + "grad_norm": 15.518226623535156, + "learning_rate": 2.744604964342364e-06, + "lm_loss": 5.4507, + "loss": 1.5544, + "step": 1455, + "text_contrastive_loss": 0.8782 + }, + { + "contrastive_loss": 0.495, + "epoch": 3.2866817155756207, + "grad_norm": 13.762090682983398, + "learning_rate": 2.7381320033864434e-06, + "lm_loss": 5.5043, + "loss": 1.4291, + "step": 1456, + "text_contrastive_loss": 0.7674 + }, + { + "contrastive_loss": 0.4654, + "epoch": 3.288939051918736, + "grad_norm": 13.592652320861816, + "learning_rate": 2.7316638051074605e-06, + "lm_loss": 5.4472, + "loss": 1.3406, + "step": 1457, + "text_contrastive_loss": 0.661 + }, + { + "contrastive_loss": 0.4706, + "epoch": 3.291196388261851, + "grad_norm": 14.618680953979492, + "learning_rate": 2.72520038312511e-06, + "lm_loss": 5.4162, + "loss": 1.4038, + "step": 1458, + "text_contrastive_loss": 0.7831 + }, + { + "contrastive_loss": 0.4144, + "epoch": 3.293453724604966, + "grad_norm": 13.54752254486084, + "learning_rate": 2.7187417510490176e-06, + "lm_loss": 5.5561, + "loss": 1.3636, + "step": 1459, + "text_contrastive_loss": 0.7872 + }, + { + "contrastive_loss": 0.4795, + "epoch": 3.295711060948081, + "grad_norm": 15.078756332397461, + "learning_rate": 2.7122879224787315e-06, + "lm_loss": 5.5425, + "loss": 1.4632, + "step": 1460, + "text_contrastive_loss": 0.8588 + }, + { + "contrastive_loss": 0.4911, + "epoch": 3.2979683972911964, + "grad_norm": 16.779354095458984, + "learning_rate": 2.7058389110036835e-06, + "lm_loss": 5.4832, + "loss": 1.4307, + "step": 1461, + "text_contrastive_loss": 0.7826 + }, + { + "contrastive_loss": 0.5161, + "epoch": 3.3002257336343117, + "grad_norm": 14.62807846069336, + "learning_rate": 2.6993947302031643e-06, + "lm_loss": 5.483, + "loss": 1.523, + "step": 1462, + "text_contrastive_loss": 0.9172 + }, + { + "contrastive_loss": 0.4469, + "epoch": 3.3024830699774266, + "grad_norm": 14.627190589904785, + "learning_rate": 2.692955393646286e-06, + "lm_loss": 5.4985, + "loss": 1.343, + "step": 1463, + "text_contrastive_loss": 0.6924 + }, + { + "contrastive_loss": 0.4104, + "epoch": 3.304740406320542, + "grad_norm": 14.061870574951172, + "learning_rate": 2.686520914891968e-06, + "lm_loss": 5.4354, + "loss": 1.2653, + "step": 1464, + "text_contrastive_loss": 0.6227 + }, + { + "contrastive_loss": 0.4294, + "epoch": 3.3069977426636568, + "grad_norm": 13.842485427856445, + "learning_rate": 2.6800913074888984e-06, + "lm_loss": 5.4847, + "loss": 1.3673, + "step": 1465, + "text_contrastive_loss": 0.7789 + }, + { + "contrastive_loss": 0.3909, + "epoch": 3.309255079006772, + "grad_norm": 13.694429397583008, + "learning_rate": 2.6736665849755073e-06, + "lm_loss": 5.4691, + "loss": 1.3706, + "step": 1466, + "text_contrastive_loss": 0.8657 + }, + { + "contrastive_loss": 0.4237, + "epoch": 3.311512415349887, + "grad_norm": 13.254097938537598, + "learning_rate": 2.6672467608799413e-06, + "lm_loss": 5.4571, + "loss": 1.3693, + "step": 1467, + "text_contrastive_loss": 0.7997 + }, + { + "contrastive_loss": 0.5489, + "epoch": 3.3137697516930023, + "grad_norm": 14.583216667175293, + "learning_rate": 2.660831848720028e-06, + "lm_loss": 5.4687, + "loss": 1.5559, + "step": 1468, + "text_contrastive_loss": 0.9203 + }, + { + "contrastive_loss": 0.4414, + "epoch": 3.3160270880361176, + "grad_norm": 19.19298553466797, + "learning_rate": 2.654421862003256e-06, + "lm_loss": 5.425, + "loss": 1.3671, + "step": 1469, + "text_contrastive_loss": 0.7665 + }, + { + "contrastive_loss": 0.4941, + "epoch": 3.3182844243792324, + "grad_norm": 15.22370433807373, + "learning_rate": 2.648016814226742e-06, + "lm_loss": 5.5326, + "loss": 1.4579, + "step": 1470, + "text_contrastive_loss": 0.8211 + }, + { + "contrastive_loss": 0.5334, + "epoch": 3.3205417607223477, + "grad_norm": 15.47024917602539, + "learning_rate": 2.6416167188772052e-06, + "lm_loss": 5.4499, + "loss": 1.48, + "step": 1471, + "text_contrastive_loss": 0.8033 + }, + { + "contrastive_loss": 0.4386, + "epoch": 3.3227990970654626, + "grad_norm": 13.895892143249512, + "learning_rate": 2.6352215894309306e-06, + "lm_loss": 5.4307, + "loss": 1.3779, + "step": 1472, + "text_contrastive_loss": 0.7924 + }, + { + "contrastive_loss": 0.5044, + "epoch": 3.325056433408578, + "grad_norm": 13.95846176147461, + "learning_rate": 2.6288314393537522e-06, + "lm_loss": 5.4381, + "loss": 1.4103, + "step": 1473, + "text_contrastive_loss": 0.7243 + }, + { + "contrastive_loss": 0.4356, + "epoch": 3.327313769751693, + "grad_norm": 12.028575897216797, + "learning_rate": 2.6224462821010185e-06, + "lm_loss": 5.4458, + "loss": 1.4117, + "step": 1474, + "text_contrastive_loss": 0.8631 + }, + { + "contrastive_loss": 0.4626, + "epoch": 3.329571106094808, + "grad_norm": 13.620333671569824, + "learning_rate": 2.616066131117563e-06, + "lm_loss": 5.4997, + "loss": 1.4582, + "step": 1475, + "text_contrastive_loss": 0.8913 + }, + { + "contrastive_loss": 0.5076, + "epoch": 3.3318284424379234, + "grad_norm": 15.211213111877441, + "learning_rate": 2.6096909998376794e-06, + "lm_loss": 5.4001, + "loss": 1.4919, + "step": 1476, + "text_contrastive_loss": 0.8886 + }, + { + "contrastive_loss": 0.5028, + "epoch": 3.3340857787810383, + "grad_norm": 15.72059154510498, + "learning_rate": 2.6033209016850926e-06, + "lm_loss": 5.4267, + "loss": 1.4654, + "step": 1477, + "text_contrastive_loss": 0.8399 + }, + { + "contrastive_loss": 0.4581, + "epoch": 3.3363431151241536, + "grad_norm": 15.193544387817383, + "learning_rate": 2.596955850072928e-06, + "lm_loss": 5.4303, + "loss": 1.4232, + "step": 1478, + "text_contrastive_loss": 0.8442 + }, + { + "contrastive_loss": 0.59, + "epoch": 3.3386004514672685, + "grad_norm": 15.448929786682129, + "learning_rate": 2.5905958584036826e-06, + "lm_loss": 5.4836, + "loss": 1.5784, + "step": 1479, + "text_contrastive_loss": 0.8801 + }, + { + "contrastive_loss": 0.4063, + "epoch": 3.340857787810384, + "grad_norm": 12.013140678405762, + "learning_rate": 2.5842409400692026e-06, + "lm_loss": 5.4711, + "loss": 1.2616, + "step": 1480, + "text_contrastive_loss": 0.6164 + }, + { + "contrastive_loss": 0.472, + "epoch": 3.343115124153499, + "grad_norm": 14.958868026733398, + "learning_rate": 2.577891108450651e-06, + "lm_loss": 5.4646, + "loss": 1.4138, + "step": 1481, + "text_contrastive_loss": 0.7907 + }, + { + "contrastive_loss": 0.4594, + "epoch": 3.345372460496614, + "grad_norm": 14.461827278137207, + "learning_rate": 2.571546376918479e-06, + "lm_loss": 5.4562, + "loss": 1.4113, + "step": 1482, + "text_contrastive_loss": 0.8125 + }, + { + "contrastive_loss": 0.4867, + "epoch": 3.3476297968397293, + "grad_norm": 13.656875610351562, + "learning_rate": 2.5652067588324015e-06, + "lm_loss": 5.6341, + "loss": 1.4406, + "step": 1483, + "text_contrastive_loss": 0.7811 + }, + { + "contrastive_loss": 0.3927, + "epoch": 3.349887133182844, + "grad_norm": 13.251200675964355, + "learning_rate": 2.55887226754136e-06, + "lm_loss": 5.5251, + "loss": 1.3182, + "step": 1484, + "text_contrastive_loss": 0.7459 + }, + { + "contrastive_loss": 0.4508, + "epoch": 3.3521444695259595, + "grad_norm": 14.159133911132812, + "learning_rate": 2.552542916383507e-06, + "lm_loss": 5.3832, + "loss": 1.4053, + "step": 1485, + "text_contrastive_loss": 0.8324 + }, + { + "contrastive_loss": 0.5345, + "epoch": 3.3544018058690743, + "grad_norm": 13.861654281616211, + "learning_rate": 2.5462187186861697e-06, + "lm_loss": 5.6045, + "loss": 1.5416, + "step": 1486, + "text_contrastive_loss": 0.8932 + }, + { + "contrastive_loss": 0.4247, + "epoch": 3.3566591422121896, + "grad_norm": 13.136606216430664, + "learning_rate": 2.5398996877658256e-06, + "lm_loss": 5.4987, + "loss": 1.3594, + "step": 1487, + "text_contrastive_loss": 0.7698 + }, + { + "contrastive_loss": 0.5246, + "epoch": 3.3589164785553045, + "grad_norm": 15.686463356018066, + "learning_rate": 2.5335858369280674e-06, + "lm_loss": 5.4922, + "loss": 1.5206, + "step": 1488, + "text_contrastive_loss": 0.8934 + }, + { + "contrastive_loss": 0.4245, + "epoch": 3.36117381489842, + "grad_norm": 14.053102493286133, + "learning_rate": 2.5272771794675866e-06, + "lm_loss": 5.473, + "loss": 1.3263, + "step": 1489, + "text_contrastive_loss": 0.7091 + }, + { + "contrastive_loss": 0.551, + "epoch": 3.363431151241535, + "grad_norm": 14.97412109375, + "learning_rate": 2.5209737286681367e-06, + "lm_loss": 5.3077, + "loss": 1.4425, + "step": 1490, + "text_contrastive_loss": 0.7216 + }, + { + "contrastive_loss": 0.5847, + "epoch": 3.36568848758465, + "grad_norm": 16.91216278076172, + "learning_rate": 2.514675497802508e-06, + "lm_loss": 5.4616, + "loss": 1.6223, + "step": 1491, + "text_contrastive_loss": 0.983 + }, + { + "contrastive_loss": 0.5068, + "epoch": 3.3679458239277653, + "grad_norm": 16.04525375366211, + "learning_rate": 2.508382500132499e-06, + "lm_loss": 5.5927, + "loss": 1.4422, + "step": 1492, + "text_contrastive_loss": 0.7522 + }, + { + "contrastive_loss": 0.5001, + "epoch": 3.37020316027088, + "grad_norm": 14.205605506896973, + "learning_rate": 2.50209474890889e-06, + "lm_loss": 5.3962, + "loss": 1.454, + "step": 1493, + "text_contrastive_loss": 0.8286 + }, + { + "contrastive_loss": 0.3953, + "epoch": 3.3724604966139955, + "grad_norm": 13.9257173538208, + "learning_rate": 2.495812257371416e-06, + "lm_loss": 5.4938, + "loss": 1.3433, + "step": 1494, + "text_contrastive_loss": 0.7973 + }, + { + "contrastive_loss": 0.47, + "epoch": 3.374717832957111, + "grad_norm": 14.477286338806152, + "learning_rate": 2.4895350387487304e-06, + "lm_loss": 5.5802, + "loss": 1.4061, + "step": 1495, + "text_contrastive_loss": 0.7561 + }, + { + "contrastive_loss": 0.5315, + "epoch": 3.3769751693002257, + "grad_norm": 15.752272605895996, + "learning_rate": 2.4832631062583906e-06, + "lm_loss": 5.4693, + "loss": 1.5519, + "step": 1496, + "text_contrastive_loss": 0.9469 + }, + { + "contrastive_loss": 0.586, + "epoch": 3.379232505643341, + "grad_norm": 16.18108558654785, + "learning_rate": 2.47699647310682e-06, + "lm_loss": 5.4499, + "loss": 1.5836, + "step": 1497, + "text_contrastive_loss": 0.9054 + }, + { + "contrastive_loss": 0.4066, + "epoch": 3.381489841986456, + "grad_norm": 12.718653678894043, + "learning_rate": 2.470735152489287e-06, + "lm_loss": 5.4281, + "loss": 1.3057, + "step": 1498, + "text_contrastive_loss": 0.7126 + }, + { + "contrastive_loss": 0.4779, + "epoch": 3.383747178329571, + "grad_norm": 15.179163932800293, + "learning_rate": 2.4644791575898665e-06, + "lm_loss": 5.4779, + "loss": 1.453, + "step": 1499, + "text_contrastive_loss": 0.8545 + }, + { + "contrastive_loss": 0.4282, + "epoch": 3.386004514672686, + "grad_norm": 14.62775707244873, + "learning_rate": 2.4582285015814263e-06, + "lm_loss": 5.4897, + "loss": 1.3416, + "step": 1500, + "text_contrastive_loss": 0.7288 + }, + { + "contrastive_loss": 0.4135, + "epoch": 3.3882618510158014, + "grad_norm": 13.477630615234375, + "learning_rate": 2.4519831976255892e-06, + "lm_loss": 5.4605, + "loss": 1.2748, + "step": 1501, + "text_contrastive_loss": 0.6304 + }, + { + "contrastive_loss": 0.4456, + "epoch": 3.3905191873589167, + "grad_norm": 15.13442325592041, + "learning_rate": 2.445743258872711e-06, + "lm_loss": 5.4527, + "loss": 1.3953, + "step": 1502, + "text_contrastive_loss": 0.809 + }, + { + "contrastive_loss": 0.443, + "epoch": 3.3927765237020315, + "grad_norm": 16.368581771850586, + "learning_rate": 2.4395086984618486e-06, + "lm_loss": 5.5224, + "loss": 1.4082, + "step": 1503, + "text_contrastive_loss": 0.8261 + }, + { + "contrastive_loss": 0.4459, + "epoch": 3.395033860045147, + "grad_norm": 14.504566192626953, + "learning_rate": 2.433279529520732e-06, + "lm_loss": 5.4485, + "loss": 1.393, + "step": 1504, + "text_contrastive_loss": 0.8045 + }, + { + "contrastive_loss": 0.5416, + "epoch": 3.3972911963882617, + "grad_norm": 16.19035530090332, + "learning_rate": 2.427055765165741e-06, + "lm_loss": 5.4202, + "loss": 1.4919, + "step": 1505, + "text_contrastive_loss": 0.8166 + }, + { + "contrastive_loss": 0.5178, + "epoch": 3.399548532731377, + "grad_norm": 15.244782447814941, + "learning_rate": 2.420837418501876e-06, + "lm_loss": 5.4967, + "loss": 1.5043, + "step": 1506, + "text_contrastive_loss": 0.8738 + }, + { + "contrastive_loss": 0.4833, + "epoch": 3.401805869074492, + "grad_norm": 15.166763305664062, + "learning_rate": 2.414624502622731e-06, + "lm_loss": 5.416, + "loss": 1.4109, + "step": 1507, + "text_contrastive_loss": 0.7721 + }, + { + "contrastive_loss": 0.4482, + "epoch": 3.404063205417607, + "grad_norm": 15.023187637329102, + "learning_rate": 2.408417030610457e-06, + "lm_loss": 5.5333, + "loss": 1.3832, + "step": 1508, + "text_contrastive_loss": 0.7634 + }, + { + "contrastive_loss": 0.4076, + "epoch": 3.4063205417607225, + "grad_norm": 16.45237159729004, + "learning_rate": 2.4022150155357526e-06, + "lm_loss": 5.5212, + "loss": 1.3757, + "step": 1509, + "text_contrastive_loss": 0.8319 + }, + { + "contrastive_loss": 0.4563, + "epoch": 3.4085778781038374, + "grad_norm": 15.986237525939941, + "learning_rate": 2.396018470457821e-06, + "lm_loss": 5.5014, + "loss": 1.4568, + "step": 1510, + "text_contrastive_loss": 0.9007 + }, + { + "contrastive_loss": 0.442, + "epoch": 3.4108352144469527, + "grad_norm": 14.491031646728516, + "learning_rate": 2.389827408424345e-06, + "lm_loss": 5.4717, + "loss": 1.3683, + "step": 1511, + "text_contrastive_loss": 0.7582 + }, + { + "contrastive_loss": 0.5759, + "epoch": 3.4130925507900676, + "grad_norm": 17.080780029296875, + "learning_rate": 2.3836418424714665e-06, + "lm_loss": 5.427, + "loss": 1.5746, + "step": 1512, + "text_contrastive_loss": 0.9121 + }, + { + "contrastive_loss": 0.5488, + "epoch": 3.415349887133183, + "grad_norm": 15.472517967224121, + "learning_rate": 2.377461785623752e-06, + "lm_loss": 5.465, + "loss": 1.4835, + "step": 1513, + "text_contrastive_loss": 0.7765 + }, + { + "contrastive_loss": 0.4572, + "epoch": 3.417607223476298, + "grad_norm": 14.8615140914917, + "learning_rate": 2.3712872508941714e-06, + "lm_loss": 5.4163, + "loss": 1.3947, + "step": 1514, + "text_contrastive_loss": 0.7918 + }, + { + "contrastive_loss": 0.4622, + "epoch": 3.419864559819413, + "grad_norm": 16.12051010131836, + "learning_rate": 2.3651182512840604e-06, + "lm_loss": 5.4557, + "loss": 1.3376, + "step": 1515, + "text_contrastive_loss": 0.6597 + }, + { + "contrastive_loss": 0.5526, + "epoch": 3.4221218961625284, + "grad_norm": 16.446517944335938, + "learning_rate": 2.358954799783106e-06, + "lm_loss": 5.5447, + "loss": 1.6239, + "step": 1516, + "text_contrastive_loss": 1.0335 + }, + { + "contrastive_loss": 0.5071, + "epoch": 3.4243792325056432, + "grad_norm": 14.582303047180176, + "learning_rate": 2.3527969093693105e-06, + "lm_loss": 5.3842, + "loss": 1.377, + "step": 1517, + "text_contrastive_loss": 0.6629 + }, + { + "contrastive_loss": 0.4184, + "epoch": 3.4266365688487586, + "grad_norm": 12.374114036560059, + "learning_rate": 2.346644593008966e-06, + "lm_loss": 5.3269, + "loss": 1.3535, + "step": 1518, + "text_contrastive_loss": 0.8049 + }, + { + "contrastive_loss": 0.5105, + "epoch": 3.4288939051918734, + "grad_norm": 15.860591888427734, + "learning_rate": 2.3404978636566312e-06, + "lm_loss": 5.5244, + "loss": 1.4897, + "step": 1519, + "text_contrastive_loss": 0.8536 + }, + { + "contrastive_loss": 0.4791, + "epoch": 3.4311512415349887, + "grad_norm": 15.427270889282227, + "learning_rate": 2.3343567342550933e-06, + "lm_loss": 5.5524, + "loss": 1.4218, + "step": 1520, + "text_contrastive_loss": 0.775 + }, + { + "contrastive_loss": 0.5032, + "epoch": 3.4334085778781036, + "grad_norm": 16.81085205078125, + "learning_rate": 2.328221217735355e-06, + "lm_loss": 5.4353, + "loss": 1.4378, + "step": 1521, + "text_contrastive_loss": 0.7823 + }, + { + "contrastive_loss": 0.5521, + "epoch": 3.435665914221219, + "grad_norm": 14.574199676513672, + "learning_rate": 2.322091327016597e-06, + "lm_loss": 5.3922, + "loss": 1.4772, + "step": 1522, + "text_contrastive_loss": 0.7718 + }, + { + "contrastive_loss": 0.4961, + "epoch": 3.4379232505643342, + "grad_norm": 14.520478248596191, + "learning_rate": 2.3159670750061563e-06, + "lm_loss": 5.5431, + "loss": 1.4631, + "step": 1523, + "text_contrastive_loss": 0.8255 + }, + { + "contrastive_loss": 0.5214, + "epoch": 3.440180586907449, + "grad_norm": 17.026172637939453, + "learning_rate": 2.3098484745994933e-06, + "lm_loss": 5.3637, + "loss": 1.489, + "step": 1524, + "text_contrastive_loss": 0.8624 + }, + { + "contrastive_loss": 0.4649, + "epoch": 3.4424379232505644, + "grad_norm": 14.461862564086914, + "learning_rate": 2.3037355386801683e-06, + "lm_loss": 5.5148, + "loss": 1.4059, + "step": 1525, + "text_contrastive_loss": 0.7791 + }, + { + "contrastive_loss": 0.4097, + "epoch": 3.4446952595936793, + "grad_norm": 14.151453018188477, + "learning_rate": 2.2976282801198237e-06, + "lm_loss": 5.5114, + "loss": 1.3445, + "step": 1526, + "text_contrastive_loss": 0.7673 + }, + { + "contrastive_loss": 0.5351, + "epoch": 3.4469525959367946, + "grad_norm": 15.692597389221191, + "learning_rate": 2.2915267117781328e-06, + "lm_loss": 5.4158, + "loss": 1.5236, + "step": 1527, + "text_contrastive_loss": 0.8938 + }, + { + "contrastive_loss": 0.4498, + "epoch": 3.44920993227991, + "grad_norm": 13.88866138458252, + "learning_rate": 2.2854308465027963e-06, + "lm_loss": 5.4396, + "loss": 1.3871, + "step": 1528, + "text_contrastive_loss": 0.7867 + }, + { + "contrastive_loss": 0.463, + "epoch": 3.4514672686230248, + "grad_norm": 14.166723251342773, + "learning_rate": 2.279340697129505e-06, + "lm_loss": 5.4298, + "loss": 1.3945, + "step": 1529, + "text_contrastive_loss": 0.7769 + }, + { + "contrastive_loss": 0.5447, + "epoch": 3.45372460496614, + "grad_norm": 15.951334953308105, + "learning_rate": 2.2732562764819157e-06, + "lm_loss": 5.4896, + "loss": 1.5022, + "step": 1530, + "text_contrastive_loss": 0.817 + }, + { + "contrastive_loss": 0.4087, + "epoch": 3.455981941309255, + "grad_norm": 15.156458854675293, + "learning_rate": 2.267177597371616e-06, + "lm_loss": 5.4176, + "loss": 1.2951, + "step": 1531, + "text_contrastive_loss": 0.6893 + }, + { + "contrastive_loss": 0.5003, + "epoch": 3.4582392776523703, + "grad_norm": 15.428759574890137, + "learning_rate": 2.26110467259811e-06, + "lm_loss": 5.3475, + "loss": 1.4773, + "step": 1532, + "text_contrastive_loss": 0.8846 + }, + { + "contrastive_loss": 0.4233, + "epoch": 3.460496613995485, + "grad_norm": 15.227729797363281, + "learning_rate": 2.255037514948785e-06, + "lm_loss": 5.3626, + "loss": 1.3801, + "step": 1533, + "text_contrastive_loss": 0.841 + }, + { + "contrastive_loss": 0.5161, + "epoch": 3.4627539503386005, + "grad_norm": 15.018134117126465, + "learning_rate": 2.2489761371988826e-06, + "lm_loss": 5.4664, + "loss": 1.4959, + "step": 1534, + "text_contrastive_loss": 0.8663 + }, + { + "contrastive_loss": 0.4129, + "epoch": 3.4650112866817158, + "grad_norm": 14.654855728149414, + "learning_rate": 2.242920552111473e-06, + "lm_loss": 5.4464, + "loss": 1.3298, + "step": 1535, + "text_contrastive_loss": 0.7445 + }, + { + "contrastive_loss": 0.4956, + "epoch": 3.4672686230248306, + "grad_norm": 15.13311767578125, + "learning_rate": 2.236870772437433e-06, + "lm_loss": 5.5042, + "loss": 1.4198, + "step": 1536, + "text_contrastive_loss": 0.7476 + }, + { + "contrastive_loss": 0.378, + "epoch": 3.469525959367946, + "grad_norm": 13.35870361328125, + "learning_rate": 2.2308268109154126e-06, + "lm_loss": 5.4243, + "loss": 1.3046, + "step": 1537, + "text_contrastive_loss": 0.7683 + }, + { + "contrastive_loss": 0.4847, + "epoch": 3.471783295711061, + "grad_norm": 15.677458763122559, + "learning_rate": 2.224788680271811e-06, + "lm_loss": 5.4965, + "loss": 1.3943, + "step": 1538, + "text_contrastive_loss": 0.7197 + }, + { + "contrastive_loss": 0.4336, + "epoch": 3.474040632054176, + "grad_norm": 13.910303115844727, + "learning_rate": 2.218756393220753e-06, + "lm_loss": 5.4606, + "loss": 1.3446, + "step": 1539, + "text_contrastive_loss": 0.7298 + }, + { + "contrastive_loss": 0.4165, + "epoch": 3.476297968397291, + "grad_norm": 14.130895614624023, + "learning_rate": 2.212729962464051e-06, + "lm_loss": 5.5941, + "loss": 1.282, + "step": 1540, + "text_contrastive_loss": 0.6122 + }, + { + "contrastive_loss": 0.5003, + "epoch": 3.4785553047404063, + "grad_norm": 14.862709999084473, + "learning_rate": 2.2067094006911943e-06, + "lm_loss": 5.4334, + "loss": 1.4298, + "step": 1541, + "text_contrastive_loss": 0.7724 + }, + { + "contrastive_loss": 0.5442, + "epoch": 3.4808126410835216, + "grad_norm": 15.639609336853027, + "learning_rate": 2.2006947205793107e-06, + "lm_loss": 5.4148, + "loss": 1.4872, + "step": 1542, + "text_contrastive_loss": 0.803 + }, + { + "contrastive_loss": 0.4212, + "epoch": 3.4830699774266365, + "grad_norm": 14.766778945922852, + "learning_rate": 2.1946859347931442e-06, + "lm_loss": 5.3569, + "loss": 1.2715, + "step": 1543, + "text_contrastive_loss": 0.6293 + }, + { + "contrastive_loss": 0.5174, + "epoch": 3.485327313769752, + "grad_norm": 15.845047950744629, + "learning_rate": 2.1886830559850264e-06, + "lm_loss": 5.4649, + "loss": 1.5043, + "step": 1544, + "text_contrastive_loss": 0.8808 + }, + { + "contrastive_loss": 0.4867, + "epoch": 3.4875846501128667, + "grad_norm": 14.702009201049805, + "learning_rate": 2.182686096794852e-06, + "lm_loss": 5.4745, + "loss": 1.4068, + "step": 1545, + "text_contrastive_loss": 0.7454 + }, + { + "contrastive_loss": 0.5007, + "epoch": 3.489841986455982, + "grad_norm": 16.13895034790039, + "learning_rate": 2.176695069850053e-06, + "lm_loss": 5.3923, + "loss": 1.513, + "step": 1546, + "text_contrastive_loss": 0.9463 + }, + { + "contrastive_loss": 0.5704, + "epoch": 3.4920993227990973, + "grad_norm": 15.552277565002441, + "learning_rate": 2.1707099877655634e-06, + "lm_loss": 5.406, + "loss": 1.5892, + "step": 1547, + "text_contrastive_loss": 0.9565 + }, + { + "contrastive_loss": 0.4615, + "epoch": 3.494356659142212, + "grad_norm": 14.597433090209961, + "learning_rate": 2.1647308631438068e-06, + "lm_loss": 5.4116, + "loss": 1.3862, + "step": 1548, + "text_contrastive_loss": 0.7671 + }, + { + "contrastive_loss": 0.3948, + "epoch": 3.4966139954853275, + "grad_norm": 12.96075439453125, + "learning_rate": 2.1587577085746596e-06, + "lm_loss": 5.4675, + "loss": 1.3205, + "step": 1549, + "text_contrastive_loss": 0.7579 + }, + { + "contrastive_loss": 0.5114, + "epoch": 3.4988713318284423, + "grad_norm": 15.659757614135742, + "learning_rate": 2.1527905366354292e-06, + "lm_loss": 5.4302, + "loss": 1.3715, + "step": 1550, + "text_contrastive_loss": 0.6342 + }, + { + "contrastive_loss": 0.6357, + "epoch": 3.5011286681715577, + "grad_norm": 16.875946044921875, + "learning_rate": 2.14682935989082e-06, + "lm_loss": 5.4462, + "loss": 1.6049, + "step": 1551, + "text_contrastive_loss": 0.8491 + }, + { + "contrastive_loss": 0.5353, + "epoch": 3.5033860045146725, + "grad_norm": 16.93952751159668, + "learning_rate": 2.14087419089292e-06, + "lm_loss": 5.5169, + "loss": 1.5205, + "step": 1552, + "text_contrastive_loss": 0.8669 + }, + { + "contrastive_loss": 0.5005, + "epoch": 3.505643340857788, + "grad_norm": 15.430221557617188, + "learning_rate": 2.1349250421811622e-06, + "lm_loss": 5.5929, + "loss": 1.4884, + "step": 1553, + "text_contrastive_loss": 0.8571 + }, + { + "contrastive_loss": 0.4847, + "epoch": 3.5079006772009027, + "grad_norm": 15.394756317138672, + "learning_rate": 2.1289819262823065e-06, + "lm_loss": 5.4703, + "loss": 1.386, + "step": 1554, + "text_contrastive_loss": 0.7087 + }, + { + "contrastive_loss": 0.4992, + "epoch": 3.510158013544018, + "grad_norm": 15.481775283813477, + "learning_rate": 2.1230448557104087e-06, + "lm_loss": 5.3952, + "loss": 1.4936, + "step": 1555, + "text_contrastive_loss": 0.9097 + }, + { + "contrastive_loss": 0.4316, + "epoch": 3.5124153498871333, + "grad_norm": 15.534514427185059, + "learning_rate": 2.117113842966792e-06, + "lm_loss": 5.4969, + "loss": 1.3996, + "step": 1556, + "text_contrastive_loss": 0.8367 + }, + { + "contrastive_loss": 0.4614, + "epoch": 3.514672686230248, + "grad_norm": 15.321314811706543, + "learning_rate": 2.111188900540028e-06, + "lm_loss": 5.3665, + "loss": 1.3687, + "step": 1557, + "text_contrastive_loss": 0.7413 + }, + { + "contrastive_loss": 0.4875, + "epoch": 3.5169300225733635, + "grad_norm": 13.656997680664062, + "learning_rate": 2.1052700409059057e-06, + "lm_loss": 5.4849, + "loss": 1.3841, + "step": 1558, + "text_contrastive_loss": 0.6963 + }, + { + "contrastive_loss": 0.423, + "epoch": 3.5191873589164784, + "grad_norm": 15.110286712646484, + "learning_rate": 2.0993572765274044e-06, + "lm_loss": 5.426, + "loss": 1.3611, + "step": 1559, + "text_contrastive_loss": 0.7909 + }, + { + "contrastive_loss": 0.4749, + "epoch": 3.5214446952595937, + "grad_norm": 13.659746170043945, + "learning_rate": 2.093450619854671e-06, + "lm_loss": 5.4491, + "loss": 1.514, + "step": 1560, + "text_contrastive_loss": 0.9883 + }, + { + "contrastive_loss": 0.3576, + "epoch": 3.523702031602709, + "grad_norm": 12.51785945892334, + "learning_rate": 2.08755008332499e-06, + "lm_loss": 5.4431, + "loss": 1.2728, + "step": 1561, + "text_contrastive_loss": 0.7418 + }, + { + "contrastive_loss": 0.4406, + "epoch": 3.525959367945824, + "grad_norm": 14.191641807556152, + "learning_rate": 2.0816556793627624e-06, + "lm_loss": 5.5272, + "loss": 1.3843, + "step": 1562, + "text_contrastive_loss": 0.7819 + }, + { + "contrastive_loss": 0.5504, + "epoch": 3.528216704288939, + "grad_norm": 14.523859024047852, + "learning_rate": 2.0757674203794696e-06, + "lm_loss": 5.5149, + "loss": 1.5177, + "step": 1563, + "text_contrastive_loss": 0.8316 + }, + { + "contrastive_loss": 0.4686, + "epoch": 3.530474040632054, + "grad_norm": 14.75478744506836, + "learning_rate": 2.06988531877366e-06, + "lm_loss": 5.511, + "loss": 1.3882, + "step": 1564, + "text_contrastive_loss": 0.7371 + }, + { + "contrastive_loss": 0.5425, + "epoch": 3.5327313769751694, + "grad_norm": 15.918438911437988, + "learning_rate": 2.064009386930915e-06, + "lm_loss": 5.4933, + "loss": 1.4855, + "step": 1565, + "text_contrastive_loss": 0.7873 + }, + { + "contrastive_loss": 0.5333, + "epoch": 3.5349887133182847, + "grad_norm": 16.604145050048828, + "learning_rate": 2.0581396372238254e-06, + "lm_loss": 5.4734, + "loss": 1.5831, + "step": 1566, + "text_contrastive_loss": 1.0049 + }, + { + "contrastive_loss": 0.4561, + "epoch": 3.5372460496613995, + "grad_norm": 13.487076759338379, + "learning_rate": 2.0522760820119615e-06, + "lm_loss": 5.3955, + "loss": 1.3157, + "step": 1567, + "text_contrastive_loss": 0.64 + }, + { + "contrastive_loss": 0.4488, + "epoch": 3.5395033860045144, + "grad_norm": 14.998278617858887, + "learning_rate": 2.046418733641853e-06, + "lm_loss": 5.4563, + "loss": 1.3863, + "step": 1568, + "text_contrastive_loss": 0.7837 + }, + { + "contrastive_loss": 0.4252, + "epoch": 3.5417607223476297, + "grad_norm": 14.402261734008789, + "learning_rate": 2.04056760444696e-06, + "lm_loss": 5.5006, + "loss": 1.3672, + "step": 1569, + "text_contrastive_loss": 0.7838 + }, + { + "contrastive_loss": 0.5558, + "epoch": 3.544018058690745, + "grad_norm": 16.445056915283203, + "learning_rate": 2.0347227067476478e-06, + "lm_loss": 5.5556, + "loss": 1.5293, + "step": 1570, + "text_contrastive_loss": 0.8359 + }, + { + "contrastive_loss": 0.3906, + "epoch": 3.54627539503386, + "grad_norm": 13.13399600982666, + "learning_rate": 2.02888405285116e-06, + "lm_loss": 5.5058, + "loss": 1.3473, + "step": 1571, + "text_contrastive_loss": 0.8122 + }, + { + "contrastive_loss": 0.4879, + "epoch": 3.5485327313769752, + "grad_norm": 15.91758918762207, + "learning_rate": 2.02305165505159e-06, + "lm_loss": 5.4396, + "loss": 1.514, + "step": 1572, + "text_contrastive_loss": 0.9643 + }, + { + "contrastive_loss": 0.4369, + "epoch": 3.55079006772009, + "grad_norm": 13.412368774414062, + "learning_rate": 2.0172255256298623e-06, + "lm_loss": 5.3833, + "loss": 1.4127, + "step": 1573, + "text_contrastive_loss": 0.8749 + }, + { + "contrastive_loss": 0.4322, + "epoch": 3.5530474040632054, + "grad_norm": 13.076878547668457, + "learning_rate": 2.0114056768537005e-06, + "lm_loss": 5.4023, + "loss": 1.2969, + "step": 1574, + "text_contrastive_loss": 0.6489 + }, + { + "contrastive_loss": 0.4718, + "epoch": 3.5553047404063207, + "grad_norm": 15.465753555297852, + "learning_rate": 2.005592120977606e-06, + "lm_loss": 5.4437, + "loss": 1.4883, + "step": 1575, + "text_contrastive_loss": 0.9444 + }, + { + "contrastive_loss": 0.5009, + "epoch": 3.5575620767494356, + "grad_norm": 15.188689231872559, + "learning_rate": 1.9997848702428226e-06, + "lm_loss": 5.4143, + "loss": 1.4225, + "step": 1576, + "text_contrastive_loss": 0.7604 + }, + { + "contrastive_loss": 0.5332, + "epoch": 3.559819413092551, + "grad_norm": 16.304014205932617, + "learning_rate": 1.9939839368773267e-06, + "lm_loss": 5.6139, + "loss": 1.5483, + "step": 1577, + "text_contrastive_loss": 0.9075 + }, + { + "contrastive_loss": 0.5034, + "epoch": 3.5620767494356658, + "grad_norm": 15.200770378112793, + "learning_rate": 1.9881893330957893e-06, + "lm_loss": 5.5008, + "loss": 1.5508, + "step": 1578, + "text_contrastive_loss": 0.9947 + }, + { + "contrastive_loss": 0.4698, + "epoch": 3.564334085778781, + "grad_norm": 13.521749496459961, + "learning_rate": 1.982401071099549e-06, + "lm_loss": 5.4551, + "loss": 1.3238, + "step": 1579, + "text_contrastive_loss": 0.6172 + }, + { + "contrastive_loss": 0.5775, + "epoch": 3.5665914221218964, + "grad_norm": 15.051464080810547, + "learning_rate": 1.9766191630765964e-06, + "lm_loss": 5.5055, + "loss": 1.584, + "step": 1580, + "text_contrastive_loss": 0.9119 + }, + { + "contrastive_loss": 0.4802, + "epoch": 3.5688487584650113, + "grad_norm": 14.562134742736816, + "learning_rate": 1.970843621201541e-06, + "lm_loss": 5.5413, + "loss": 1.4693, + "step": 1581, + "text_contrastive_loss": 0.8698 + }, + { + "contrastive_loss": 0.5069, + "epoch": 3.5711060948081266, + "grad_norm": 14.539229393005371, + "learning_rate": 1.9650744576355894e-06, + "lm_loss": 5.3543, + "loss": 1.4229, + "step": 1582, + "text_contrastive_loss": 0.7612 + }, + { + "contrastive_loss": 0.4969, + "epoch": 3.5733634311512414, + "grad_norm": 14.167746543884277, + "learning_rate": 1.959311684526513e-06, + "lm_loss": 5.3595, + "loss": 1.4696, + "step": 1583, + "text_contrastive_loss": 0.8736 + }, + { + "contrastive_loss": 0.4733, + "epoch": 3.5756207674943568, + "grad_norm": 13.857242584228516, + "learning_rate": 1.9535553140086322e-06, + "lm_loss": 5.3496, + "loss": 1.3703, + "step": 1584, + "text_contrastive_loss": 0.724 + }, + { + "contrastive_loss": 0.5158, + "epoch": 3.5778781038374716, + "grad_norm": 16.7324275970459, + "learning_rate": 1.9478053582027826e-06, + "lm_loss": 5.3888, + "loss": 1.486, + "step": 1585, + "text_contrastive_loss": 0.8625 + }, + { + "contrastive_loss": 0.4899, + "epoch": 3.580135440180587, + "grad_norm": 14.35672378540039, + "learning_rate": 1.9420618292162974e-06, + "lm_loss": 5.4006, + "loss": 1.4499, + "step": 1586, + "text_contrastive_loss": 0.84 + }, + { + "contrastive_loss": 0.449, + "epoch": 3.582392776523702, + "grad_norm": 15.682668685913086, + "learning_rate": 1.9363247391429695e-06, + "lm_loss": 5.36, + "loss": 1.3723, + "step": 1587, + "text_contrastive_loss": 0.7747 + }, + { + "contrastive_loss": 0.3653, + "epoch": 3.584650112866817, + "grad_norm": 11.676576614379883, + "learning_rate": 1.93059410006304e-06, + "lm_loss": 5.4248, + "loss": 1.2342, + "step": 1588, + "text_contrastive_loss": 0.6528 + }, + { + "contrastive_loss": 0.4217, + "epoch": 3.5869074492099324, + "grad_norm": 14.936031341552734, + "learning_rate": 1.924869924043165e-06, + "lm_loss": 5.3893, + "loss": 1.3702, + "step": 1589, + "text_contrastive_loss": 0.8192 + }, + { + "contrastive_loss": 0.4403, + "epoch": 3.5891647855530473, + "grad_norm": 15.025103569030762, + "learning_rate": 1.919152223136391e-06, + "lm_loss": 5.5393, + "loss": 1.3577, + "step": 1590, + "text_contrastive_loss": 0.7269 + }, + { + "contrastive_loss": 0.4997, + "epoch": 3.5914221218961626, + "grad_norm": 15.543709754943848, + "learning_rate": 1.913441009382133e-06, + "lm_loss": 5.4648, + "loss": 1.4077, + "step": 1591, + "text_contrastive_loss": 0.7231 + }, + { + "contrastive_loss": 0.542, + "epoch": 3.5936794582392775, + "grad_norm": 13.94593620300293, + "learning_rate": 1.9077362948061404e-06, + "lm_loss": 5.4439, + "loss": 1.4549, + "step": 1592, + "text_contrastive_loss": 0.7371 + }, + { + "contrastive_loss": 0.4923, + "epoch": 3.595936794582393, + "grad_norm": 13.830196380615234, + "learning_rate": 1.902038091420481e-06, + "lm_loss": 5.6238, + "loss": 1.4659, + "step": 1593, + "text_contrastive_loss": 0.8224 + }, + { + "contrastive_loss": 0.5044, + "epoch": 3.598194130925508, + "grad_norm": 14.951301574707031, + "learning_rate": 1.8963464112235185e-06, + "lm_loss": 5.4754, + "loss": 1.4658, + "step": 1594, + "text_contrastive_loss": 0.8278 + }, + { + "contrastive_loss": 0.4535, + "epoch": 3.600451467268623, + "grad_norm": 15.162139892578125, + "learning_rate": 1.8906612661998698e-06, + "lm_loss": 5.4616, + "loss": 1.3023, + "step": 1595, + "text_contrastive_loss": 0.6052 + }, + { + "contrastive_loss": 0.4793, + "epoch": 3.6027088036117383, + "grad_norm": 14.030896186828613, + "learning_rate": 1.884982668320398e-06, + "lm_loss": 5.4521, + "loss": 1.3707, + "step": 1596, + "text_contrastive_loss": 0.6923 + }, + { + "contrastive_loss": 0.4435, + "epoch": 3.604966139954853, + "grad_norm": 15.813409805297852, + "learning_rate": 1.8793106295421797e-06, + "lm_loss": 5.4842, + "loss": 1.3921, + "step": 1597, + "text_contrastive_loss": 0.8005 + }, + { + "contrastive_loss": 0.5051, + "epoch": 3.6072234762979685, + "grad_norm": 15.220564842224121, + "learning_rate": 1.873645161808481e-06, + "lm_loss": 5.4737, + "loss": 1.4929, + "step": 1598, + "text_contrastive_loss": 0.8807 + }, + { + "contrastive_loss": 0.4523, + "epoch": 3.609480812641084, + "grad_norm": 14.675507545471191, + "learning_rate": 1.8679862770487273e-06, + "lm_loss": 5.5892, + "loss": 1.4142, + "step": 1599, + "text_contrastive_loss": 0.806 + }, + { + "contrastive_loss": 0.5317, + "epoch": 3.6117381489841986, + "grad_norm": 15.481648445129395, + "learning_rate": 1.8623339871784869e-06, + "lm_loss": 5.375, + "loss": 1.5374, + "step": 1600, + "text_contrastive_loss": 0.9363 + }, + { + "contrastive_loss": 0.6098, + "epoch": 3.6139954853273135, + "grad_norm": 18.212594985961914, + "learning_rate": 1.8566883040994411e-06, + "lm_loss": 5.43, + "loss": 1.6688, + "step": 1601, + "text_contrastive_loss": 1.032 + }, + { + "contrastive_loss": 0.4442, + "epoch": 3.616252821670429, + "grad_norm": 14.503066062927246, + "learning_rate": 1.8510492396993595e-06, + "lm_loss": 5.5556, + "loss": 1.3495, + "step": 1602, + "text_contrastive_loss": 0.6995 + }, + { + "contrastive_loss": 0.5378, + "epoch": 3.618510158013544, + "grad_norm": 14.18012523651123, + "learning_rate": 1.8454168058520732e-06, + "lm_loss": 5.4036, + "loss": 1.4484, + "step": 1603, + "text_contrastive_loss": 0.7403 + }, + { + "contrastive_loss": 0.4311, + "epoch": 3.620767494356659, + "grad_norm": 13.57884407043457, + "learning_rate": 1.8397910144174536e-06, + "lm_loss": 5.3306, + "loss": 1.3593, + "step": 1604, + "text_contrastive_loss": 0.7902 + }, + { + "contrastive_loss": 0.4251, + "epoch": 3.6230248306997743, + "grad_norm": 14.99509048461914, + "learning_rate": 1.8341718772413852e-06, + "lm_loss": 5.2957, + "loss": 1.3066, + "step": 1605, + "text_contrastive_loss": 0.7039 + }, + { + "contrastive_loss": 0.4451, + "epoch": 3.625282167042889, + "grad_norm": 13.73076343536377, + "learning_rate": 1.8285594061557421e-06, + "lm_loss": 5.4181, + "loss": 1.4453, + "step": 1606, + "text_contrastive_loss": 0.9168 + }, + { + "contrastive_loss": 0.5404, + "epoch": 3.6275395033860045, + "grad_norm": 15.812504768371582, + "learning_rate": 1.822953612978362e-06, + "lm_loss": 5.3931, + "loss": 1.4733, + "step": 1607, + "text_contrastive_loss": 0.7871 + }, + { + "contrastive_loss": 0.4427, + "epoch": 3.62979683972912, + "grad_norm": 13.716792106628418, + "learning_rate": 1.817354509513017e-06, + "lm_loss": 5.4027, + "loss": 1.3851, + "step": 1608, + "text_contrastive_loss": 0.8042 + }, + { + "contrastive_loss": 0.5463, + "epoch": 3.6320541760722347, + "grad_norm": 17.64068031311035, + "learning_rate": 1.8117621075493979e-06, + "lm_loss": 5.4712, + "loss": 1.4617, + "step": 1609, + "text_contrastive_loss": 0.7365 + }, + { + "contrastive_loss": 0.4548, + "epoch": 3.63431151241535, + "grad_norm": 15.060933113098145, + "learning_rate": 1.8061764188630831e-06, + "lm_loss": 5.4258, + "loss": 1.3966, + "step": 1610, + "text_contrastive_loss": 0.7985 + }, + { + "contrastive_loss": 0.4503, + "epoch": 3.636568848758465, + "grad_norm": 13.883279800415039, + "learning_rate": 1.8005974552155158e-06, + "lm_loss": 5.5137, + "loss": 1.371, + "step": 1611, + "text_contrastive_loss": 0.7387 + }, + { + "contrastive_loss": 0.5283, + "epoch": 3.63882618510158, + "grad_norm": 14.469231605529785, + "learning_rate": 1.7950252283539776e-06, + "lm_loss": 5.4306, + "loss": 1.5237, + "step": 1612, + "text_contrastive_loss": 0.9048 + }, + { + "contrastive_loss": 0.4296, + "epoch": 3.6410835214446955, + "grad_norm": 14.495342254638672, + "learning_rate": 1.7894597500115657e-06, + "lm_loss": 5.431, + "loss": 1.3127, + "step": 1613, + "text_contrastive_loss": 0.68 + }, + { + "contrastive_loss": 0.4475, + "epoch": 3.6433408577878104, + "grad_norm": 14.319993019104004, + "learning_rate": 1.7839010319071687e-06, + "lm_loss": 5.4516, + "loss": 1.4174, + "step": 1614, + "text_contrastive_loss": 0.8496 + }, + { + "contrastive_loss": 0.4959, + "epoch": 3.6455981941309257, + "grad_norm": 15.90335464477539, + "learning_rate": 1.7783490857454354e-06, + "lm_loss": 5.4921, + "loss": 1.4638, + "step": 1615, + "text_contrastive_loss": 0.8374 + }, + { + "contrastive_loss": 0.5409, + "epoch": 3.6478555304740405, + "grad_norm": 15.516691207885742, + "learning_rate": 1.7728039232167603e-06, + "lm_loss": 5.4983, + "loss": 1.4933, + "step": 1616, + "text_contrastive_loss": 0.8051 + }, + { + "contrastive_loss": 0.449, + "epoch": 3.650112866817156, + "grad_norm": 13.99092960357666, + "learning_rate": 1.7672655559972535e-06, + "lm_loss": 5.3946, + "loss": 1.3597, + "step": 1617, + "text_contrastive_loss": 0.7424 + }, + { + "contrastive_loss": 0.3678, + "epoch": 3.6523702031602707, + "grad_norm": 13.596165657043457, + "learning_rate": 1.7617339957487167e-06, + "lm_loss": 5.6572, + "loss": 1.3028, + "step": 1618, + "text_contrastive_loss": 0.7386 + }, + { + "contrastive_loss": 0.496, + "epoch": 3.654627539503386, + "grad_norm": 15.550728797912598, + "learning_rate": 1.7562092541186144e-06, + "lm_loss": 5.512, + "loss": 1.4366, + "step": 1619, + "text_contrastive_loss": 0.7788 + }, + { + "contrastive_loss": 0.3968, + "epoch": 3.656884875846501, + "grad_norm": 12.663150787353516, + "learning_rate": 1.750691342740058e-06, + "lm_loss": 5.3956, + "loss": 1.2821, + "step": 1620, + "text_contrastive_loss": 0.6915 + }, + { + "contrastive_loss": 0.4398, + "epoch": 3.659142212189616, + "grad_norm": 13.776789665222168, + "learning_rate": 1.7451802732317763e-06, + "lm_loss": 5.4691, + "loss": 1.3938, + "step": 1621, + "text_contrastive_loss": 0.8142 + }, + { + "contrastive_loss": 0.3829, + "epoch": 3.6613995485327315, + "grad_norm": 13.244256973266602, + "learning_rate": 1.7396760571980902e-06, + "lm_loss": 5.5434, + "loss": 1.3086, + "step": 1622, + "text_contrastive_loss": 0.7428 + }, + { + "contrastive_loss": 0.4854, + "epoch": 3.6636568848758464, + "grad_norm": 14.853069305419922, + "learning_rate": 1.7341787062288928e-06, + "lm_loss": 5.4619, + "loss": 1.3947, + "step": 1623, + "text_contrastive_loss": 0.7262 + }, + { + "contrastive_loss": 0.6319, + "epoch": 3.6659142212189617, + "grad_norm": 16.079355239868164, + "learning_rate": 1.7286882318996162e-06, + "lm_loss": 5.5133, + "loss": 1.5815, + "step": 1624, + "text_contrastive_loss": 0.7966 + }, + { + "contrastive_loss": 0.437, + "epoch": 3.6681715575620766, + "grad_norm": 13.949938774108887, + "learning_rate": 1.7232046457712164e-06, + "lm_loss": 5.4726, + "loss": 1.3366, + "step": 1625, + "text_contrastive_loss": 0.7048 + }, + { + "contrastive_loss": 0.5804, + "epoch": 3.670428893905192, + "grad_norm": 19.09020233154297, + "learning_rate": 1.7177279593901463e-06, + "lm_loss": 5.358, + "loss": 1.5091, + "step": 1626, + "text_contrastive_loss": 0.7858 + }, + { + "contrastive_loss": 0.4591, + "epoch": 3.672686230248307, + "grad_norm": 13.983572959899902, + "learning_rate": 1.712258184288328e-06, + "lm_loss": 5.3888, + "loss": 1.3874, + "step": 1627, + "text_contrastive_loss": 0.7789 + }, + { + "contrastive_loss": 0.486, + "epoch": 3.674943566591422, + "grad_norm": 14.134383201599121, + "learning_rate": 1.7067953319831327e-06, + "lm_loss": 5.371, + "loss": 1.423, + "step": 1628, + "text_contrastive_loss": 0.7999 + }, + { + "contrastive_loss": 0.5198, + "epoch": 3.6772009029345374, + "grad_norm": 15.602533340454102, + "learning_rate": 1.7013394139773537e-06, + "lm_loss": 5.478, + "loss": 1.458, + "step": 1629, + "text_contrastive_loss": 0.7809 + }, + { + "contrastive_loss": 0.4249, + "epoch": 3.6794582392776523, + "grad_norm": 14.587101936340332, + "learning_rate": 1.6958904417591853e-06, + "lm_loss": 5.4295, + "loss": 1.3875, + "step": 1630, + "text_contrastive_loss": 0.8394 + }, + { + "contrastive_loss": 0.6462, + "epoch": 3.6817155756207676, + "grad_norm": 17.65336036682129, + "learning_rate": 1.6904484268021915e-06, + "lm_loss": 5.4553, + "loss": 1.6541, + "step": 1631, + "text_contrastive_loss": 0.9248 + }, + { + "contrastive_loss": 0.5325, + "epoch": 3.683972911963883, + "grad_norm": 16.566082000732422, + "learning_rate": 1.6850133805652907e-06, + "lm_loss": 5.3911, + "loss": 1.5751, + "step": 1632, + "text_contrastive_loss": 1.0071 + }, + { + "contrastive_loss": 0.38, + "epoch": 3.6862302483069977, + "grad_norm": 13.386906623840332, + "learning_rate": 1.6795853144927282e-06, + "lm_loss": 5.45, + "loss": 1.2994, + "step": 1633, + "text_contrastive_loss": 0.7489 + }, + { + "contrastive_loss": 0.3964, + "epoch": 3.6884875846501126, + "grad_norm": 12.794499397277832, + "learning_rate": 1.6741642400140513e-06, + "lm_loss": 5.4881, + "loss": 1.3298, + "step": 1634, + "text_contrastive_loss": 0.7691 + }, + { + "contrastive_loss": 0.4154, + "epoch": 3.690744920993228, + "grad_norm": 13.860358238220215, + "learning_rate": 1.668750168544081e-06, + "lm_loss": 5.5105, + "loss": 1.3183, + "step": 1635, + "text_contrastive_loss": 0.7037 + }, + { + "contrastive_loss": 0.5132, + "epoch": 3.6930022573363432, + "grad_norm": 16.342418670654297, + "learning_rate": 1.663343111482898e-06, + "lm_loss": 5.342, + "loss": 1.4531, + "step": 1636, + "text_contrastive_loss": 0.8115 + }, + { + "contrastive_loss": 0.4659, + "epoch": 3.695259593679458, + "grad_norm": 14.466636657714844, + "learning_rate": 1.657943080215812e-06, + "lm_loss": 5.5518, + "loss": 1.3731, + "step": 1637, + "text_contrastive_loss": 0.704 + }, + { + "contrastive_loss": 0.3998, + "epoch": 3.6975169300225734, + "grad_norm": 13.353544235229492, + "learning_rate": 1.6525500861133386e-06, + "lm_loss": 5.4991, + "loss": 1.2143, + "step": 1638, + "text_contrastive_loss": 0.5293 + }, + { + "contrastive_loss": 0.5195, + "epoch": 3.6997742663656883, + "grad_norm": 15.653892517089844, + "learning_rate": 1.6471641405311727e-06, + "lm_loss": 5.3331, + "loss": 1.4522, + "step": 1639, + "text_contrastive_loss": 0.7987 + }, + { + "contrastive_loss": 0.3807, + "epoch": 3.7020316027088036, + "grad_norm": 13.191279411315918, + "learning_rate": 1.641785254810172e-06, + "lm_loss": 5.3584, + "loss": 1.2324, + "step": 1640, + "text_contrastive_loss": 0.6317 + }, + { + "contrastive_loss": 0.4739, + "epoch": 3.704288939051919, + "grad_norm": 14.209907531738281, + "learning_rate": 1.636413440276326e-06, + "lm_loss": 5.4345, + "loss": 1.4423, + "step": 1641, + "text_contrastive_loss": 0.8498 + }, + { + "contrastive_loss": 0.527, + "epoch": 3.706546275395034, + "grad_norm": 15.565753936767578, + "learning_rate": 1.631048708240736e-06, + "lm_loss": 5.4348, + "loss": 1.5435, + "step": 1642, + "text_contrastive_loss": 0.9461 + }, + { + "contrastive_loss": 0.5093, + "epoch": 3.708803611738149, + "grad_norm": 16.196430206298828, + "learning_rate": 1.6256910699995921e-06, + "lm_loss": 5.4427, + "loss": 1.4312, + "step": 1643, + "text_contrastive_loss": 0.7552 + }, + { + "contrastive_loss": 0.5041, + "epoch": 3.711060948081264, + "grad_norm": 16.06947898864746, + "learning_rate": 1.620340536834139e-06, + "lm_loss": 5.4575, + "loss": 1.4531, + "step": 1644, + "text_contrastive_loss": 0.8063 + }, + { + "contrastive_loss": 0.4474, + "epoch": 3.7133182844243793, + "grad_norm": 15.422207832336426, + "learning_rate": 1.6149971200106723e-06, + "lm_loss": 5.4493, + "loss": 1.3903, + "step": 1645, + "text_contrastive_loss": 0.7959 + }, + { + "contrastive_loss": 0.4229, + "epoch": 3.7155756207674946, + "grad_norm": 13.07422924041748, + "learning_rate": 1.6096608307804973e-06, + "lm_loss": 5.3305, + "loss": 1.3167, + "step": 1646, + "text_contrastive_loss": 0.7215 + }, + { + "contrastive_loss": 0.4428, + "epoch": 3.7178329571106095, + "grad_norm": 13.395735740661621, + "learning_rate": 1.604331680379908e-06, + "lm_loss": 5.5048, + "loss": 1.3651, + "step": 1647, + "text_contrastive_loss": 0.7435 + }, + { + "contrastive_loss": 0.3978, + "epoch": 3.7200902934537243, + "grad_norm": 12.62199878692627, + "learning_rate": 1.599009680030173e-06, + "lm_loss": 5.4844, + "loss": 1.2788, + "step": 1648, + "text_contrastive_loss": 0.665 + }, + { + "contrastive_loss": 0.4683, + "epoch": 3.7223476297968396, + "grad_norm": 13.023398399353027, + "learning_rate": 1.5936948409375007e-06, + "lm_loss": 5.3911, + "loss": 1.4088, + "step": 1649, + "text_contrastive_loss": 0.8027 + }, + { + "contrastive_loss": 0.5661, + "epoch": 3.724604966139955, + "grad_norm": 14.816571235656738, + "learning_rate": 1.5883871742930257e-06, + "lm_loss": 5.4983, + "loss": 1.6129, + "step": 1650, + "text_contrastive_loss": 0.9938 + }, + { + "contrastive_loss": 0.5046, + "epoch": 3.72686230248307, + "grad_norm": 16.056814193725586, + "learning_rate": 1.5830866912727722e-06, + "lm_loss": 5.343, + "loss": 1.4311, + "step": 1651, + "text_contrastive_loss": 0.7844 + }, + { + "contrastive_loss": 0.4904, + "epoch": 3.729119638826185, + "grad_norm": 15.371965408325195, + "learning_rate": 1.5777934030376445e-06, + "lm_loss": 5.3956, + "loss": 1.5193, + "step": 1652, + "text_contrastive_loss": 0.9785 + }, + { + "contrastive_loss": 0.4133, + "epoch": 3.7313769751693, + "grad_norm": 14.072219848632812, + "learning_rate": 1.5725073207333963e-06, + "lm_loss": 5.4368, + "loss": 1.2556, + "step": 1653, + "text_contrastive_loss": 0.5972 + }, + { + "contrastive_loss": 0.4406, + "epoch": 3.7336343115124153, + "grad_norm": 14.953420639038086, + "learning_rate": 1.5672284554906087e-06, + "lm_loss": 5.4728, + "loss": 1.3302, + "step": 1654, + "text_contrastive_loss": 0.6846 + }, + { + "contrastive_loss": 0.4477, + "epoch": 3.7358916478555306, + "grad_norm": 13.6334228515625, + "learning_rate": 1.561956818424661e-06, + "lm_loss": 5.4214, + "loss": 1.4501, + "step": 1655, + "text_contrastive_loss": 0.9206 + }, + { + "contrastive_loss": 0.386, + "epoch": 3.7381489841986455, + "grad_norm": 13.051822662353516, + "learning_rate": 1.5566924206357187e-06, + "lm_loss": 5.4257, + "loss": 1.3043, + "step": 1656, + "text_contrastive_loss": 0.7516 + }, + { + "contrastive_loss": 0.4568, + "epoch": 3.740406320541761, + "grad_norm": 14.413710594177246, + "learning_rate": 1.5514352732087024e-06, + "lm_loss": 5.5269, + "loss": 1.3322, + "step": 1657, + "text_contrastive_loss": 0.6453 + }, + { + "contrastive_loss": 0.3845, + "epoch": 3.7426636568848757, + "grad_norm": 13.425925254821777, + "learning_rate": 1.5461853872132648e-06, + "lm_loss": 5.3478, + "loss": 1.2498, + "step": 1658, + "text_contrastive_loss": 0.6612 + }, + { + "contrastive_loss": 0.4097, + "epoch": 3.744920993227991, + "grad_norm": 12.664298057556152, + "learning_rate": 1.5409427737037713e-06, + "lm_loss": 5.41, + "loss": 1.326, + "step": 1659, + "text_contrastive_loss": 0.7505 + }, + { + "contrastive_loss": 0.5343, + "epoch": 3.7471783295711063, + "grad_norm": 16.94868278503418, + "learning_rate": 1.5357074437192688e-06, + "lm_loss": 5.5243, + "loss": 1.5483, + "step": 1660, + "text_contrastive_loss": 0.9232 + }, + { + "contrastive_loss": 0.4782, + "epoch": 3.749435665914221, + "grad_norm": 13.815977096557617, + "learning_rate": 1.5304794082834713e-06, + "lm_loss": 5.4108, + "loss": 1.4028, + "step": 1661, + "text_contrastive_loss": 0.7669 + }, + { + "contrastive_loss": 0.4463, + "epoch": 3.7516930022573365, + "grad_norm": 13.913975715637207, + "learning_rate": 1.5252586784047374e-06, + "lm_loss": 5.4865, + "loss": 1.383, + "step": 1662, + "text_contrastive_loss": 0.7761 + }, + { + "contrastive_loss": 0.4646, + "epoch": 3.7539503386004514, + "grad_norm": 13.805709838867188, + "learning_rate": 1.520045265076034e-06, + "lm_loss": 5.4509, + "loss": 1.3843, + "step": 1663, + "text_contrastive_loss": 0.7493 + }, + { + "contrastive_loss": 0.4252, + "epoch": 3.7562076749435667, + "grad_norm": 15.102557182312012, + "learning_rate": 1.5148391792749272e-06, + "lm_loss": 5.3517, + "loss": 1.3715, + "step": 1664, + "text_contrastive_loss": 0.8222 + }, + { + "contrastive_loss": 0.5332, + "epoch": 3.758465011286682, + "grad_norm": 16.021533966064453, + "learning_rate": 1.5096404319635533e-06, + "lm_loss": 5.3574, + "loss": 1.4838, + "step": 1665, + "text_contrastive_loss": 0.8297 + }, + { + "contrastive_loss": 0.3996, + "epoch": 3.760722347629797, + "grad_norm": 13.671859741210938, + "learning_rate": 1.5044490340885987e-06, + "lm_loss": 5.5058, + "loss": 1.2973, + "step": 1666, + "text_contrastive_loss": 0.6942 + }, + { + "contrastive_loss": 0.5671, + "epoch": 3.7629796839729117, + "grad_norm": 15.433830261230469, + "learning_rate": 1.4992649965812673e-06, + "lm_loss": 5.3907, + "loss": 1.5554, + "step": 1667, + "text_contrastive_loss": 0.8985 + }, + { + "contrastive_loss": 0.4496, + "epoch": 3.765237020316027, + "grad_norm": 12.787394523620605, + "learning_rate": 1.4940883303572724e-06, + "lm_loss": 5.2853, + "loss": 1.3244, + "step": 1668, + "text_contrastive_loss": 0.6927 + }, + { + "contrastive_loss": 0.5049, + "epoch": 3.7674943566591423, + "grad_norm": 15.6350736618042, + "learning_rate": 1.4889190463168019e-06, + "lm_loss": 5.3913, + "loss": 1.4614, + "step": 1669, + "text_contrastive_loss": 0.8348 + }, + { + "contrastive_loss": 0.463, + "epoch": 3.769751693002257, + "grad_norm": 14.160258293151855, + "learning_rate": 1.483757155344503e-06, + "lm_loss": 5.4456, + "loss": 1.4444, + "step": 1670, + "text_contrastive_loss": 0.8737 + }, + { + "contrastive_loss": 0.464, + "epoch": 3.7720090293453725, + "grad_norm": 14.008391380310059, + "learning_rate": 1.47860266830945e-06, + "lm_loss": 5.3581, + "loss": 1.3778, + "step": 1671, + "text_contrastive_loss": 0.756 + }, + { + "contrastive_loss": 0.3969, + "epoch": 3.7742663656884874, + "grad_norm": 12.669832229614258, + "learning_rate": 1.473455596065133e-06, + "lm_loss": 5.3824, + "loss": 1.2799, + "step": 1672, + "text_contrastive_loss": 0.6896 + }, + { + "contrastive_loss": 0.4465, + "epoch": 3.7765237020316027, + "grad_norm": 14.399201393127441, + "learning_rate": 1.4683159494494259e-06, + "lm_loss": 5.4547, + "loss": 1.3726, + "step": 1673, + "text_contrastive_loss": 0.7614 + }, + { + "contrastive_loss": 0.5326, + "epoch": 3.778781038374718, + "grad_norm": 15.133414268493652, + "learning_rate": 1.4631837392845694e-06, + "lm_loss": 5.474, + "loss": 1.4792, + "step": 1674, + "text_contrastive_loss": 0.7982 + }, + { + "contrastive_loss": 0.4698, + "epoch": 3.781038374717833, + "grad_norm": 14.398970603942871, + "learning_rate": 1.4580589763771413e-06, + "lm_loss": 5.4299, + "loss": 1.4172, + "step": 1675, + "text_contrastive_loss": 0.8089 + }, + { + "contrastive_loss": 0.4365, + "epoch": 3.783295711060948, + "grad_norm": 12.78225040435791, + "learning_rate": 1.4529416715180434e-06, + "lm_loss": 5.3764, + "loss": 1.3824, + "step": 1676, + "text_contrastive_loss": 0.8166 + }, + { + "contrastive_loss": 0.4425, + "epoch": 3.785553047404063, + "grad_norm": 15.15576457977295, + "learning_rate": 1.44783183548247e-06, + "lm_loss": 5.4167, + "loss": 1.3419, + "step": 1677, + "text_contrastive_loss": 0.7154 + }, + { + "contrastive_loss": 0.4026, + "epoch": 3.7878103837471784, + "grad_norm": 14.194446563720703, + "learning_rate": 1.4427294790298902e-06, + "lm_loss": 5.3794, + "loss": 1.259, + "step": 1678, + "text_contrastive_loss": 0.6371 + }, + { + "contrastive_loss": 0.4905, + "epoch": 3.7900677200902937, + "grad_norm": 15.428753852844238, + "learning_rate": 1.4376346129040243e-06, + "lm_loss": 5.5568, + "loss": 1.4511, + "step": 1679, + "text_contrastive_loss": 0.8099 + }, + { + "contrastive_loss": 0.4435, + "epoch": 3.7923250564334086, + "grad_norm": 14.056903839111328, + "learning_rate": 1.432547247832819e-06, + "lm_loss": 5.4258, + "loss": 1.3487, + "step": 1680, + "text_contrastive_loss": 0.7252 + }, + { + "contrastive_loss": 0.4828, + "epoch": 3.7945823927765234, + "grad_norm": 15.9234037399292, + "learning_rate": 1.4274673945284278e-06, + "lm_loss": 5.404, + "loss": 1.3773, + "step": 1681, + "text_contrastive_loss": 0.7082 + }, + { + "contrastive_loss": 0.5236, + "epoch": 3.7968397291196387, + "grad_norm": 16.38911247253418, + "learning_rate": 1.422395063687188e-06, + "lm_loss": 5.4339, + "loss": 1.4378, + "step": 1682, + "text_contrastive_loss": 0.7415 + }, + { + "contrastive_loss": 0.4982, + "epoch": 3.799097065462754, + "grad_norm": 15.284442901611328, + "learning_rate": 1.4173302659895938e-06, + "lm_loss": 5.3332, + "loss": 1.4302, + "step": 1683, + "text_contrastive_loss": 0.7973 + }, + { + "contrastive_loss": 0.5244, + "epoch": 3.801354401805869, + "grad_norm": 16.61286735534668, + "learning_rate": 1.4122730121002808e-06, + "lm_loss": 5.4125, + "loss": 1.4997, + "step": 1684, + "text_contrastive_loss": 0.8681 + }, + { + "contrastive_loss": 0.355, + "epoch": 3.8036117381489842, + "grad_norm": 13.047539710998535, + "learning_rate": 1.4072233126679985e-06, + "lm_loss": 5.4868, + "loss": 1.1981, + "step": 1685, + "text_contrastive_loss": 0.5888 + }, + { + "contrastive_loss": 0.4249, + "epoch": 3.805869074492099, + "grad_norm": 13.029471397399902, + "learning_rate": 1.4021811783255912e-06, + "lm_loss": 5.5234, + "loss": 1.3422, + "step": 1686, + "text_contrastive_loss": 0.73 + }, + { + "contrastive_loss": 0.3907, + "epoch": 3.8081264108352144, + "grad_norm": 14.358359336853027, + "learning_rate": 1.3971466196899697e-06, + "lm_loss": 5.4404, + "loss": 1.3179, + "step": 1687, + "text_contrastive_loss": 0.7664 + }, + { + "contrastive_loss": 0.4448, + "epoch": 3.8103837471783297, + "grad_norm": 13.757017135620117, + "learning_rate": 1.3921196473620975e-06, + "lm_loss": 5.4708, + "loss": 1.3456, + "step": 1688, + "text_contrastive_loss": 0.7073 + }, + { + "contrastive_loss": 0.4341, + "epoch": 3.8126410835214446, + "grad_norm": 14.16545295715332, + "learning_rate": 1.3871002719269616e-06, + "lm_loss": 5.5009, + "loss": 1.3487, + "step": 1689, + "text_contrastive_loss": 0.7291 + }, + { + "contrastive_loss": 0.413, + "epoch": 3.81489841986456, + "grad_norm": 13.017946243286133, + "learning_rate": 1.3820885039535564e-06, + "lm_loss": 5.4397, + "loss": 1.3781, + "step": 1690, + "text_contrastive_loss": 0.8422 + }, + { + "contrastive_loss": 0.4212, + "epoch": 3.8171557562076748, + "grad_norm": 14.79958438873291, + "learning_rate": 1.3770843539948508e-06, + "lm_loss": 5.4827, + "loss": 1.3174, + "step": 1691, + "text_contrastive_loss": 0.6958 + }, + { + "contrastive_loss": 0.4541, + "epoch": 3.81941309255079, + "grad_norm": 15.317020416259766, + "learning_rate": 1.3720878325877785e-06, + "lm_loss": 5.2869, + "loss": 1.3768, + "step": 1692, + "text_contrastive_loss": 0.788 + }, + { + "contrastive_loss": 0.4699, + "epoch": 3.8216704288939054, + "grad_norm": 15.10213565826416, + "learning_rate": 1.3670989502532089e-06, + "lm_loss": 5.4177, + "loss": 1.3706, + "step": 1693, + "text_contrastive_loss": 0.718 + }, + { + "contrastive_loss": 0.5646, + "epoch": 3.8239277652370203, + "grad_norm": 15.355016708374023, + "learning_rate": 1.362117717495926e-06, + "lm_loss": 5.5147, + "loss": 1.4691, + "step": 1694, + "text_contrastive_loss": 0.7061 + }, + { + "contrastive_loss": 0.4912, + "epoch": 3.8261851015801356, + "grad_norm": 16.03980255126953, + "learning_rate": 1.3571441448046086e-06, + "lm_loss": 5.3597, + "loss": 1.3763, + "step": 1695, + "text_contrastive_loss": 0.6984 + }, + { + "contrastive_loss": 0.5484, + "epoch": 3.8284424379232505, + "grad_norm": 15.086762428283691, + "learning_rate": 1.3521782426517988e-06, + "lm_loss": 5.4318, + "loss": 1.5481, + "step": 1696, + "text_contrastive_loss": 0.913 + }, + { + "contrastive_loss": 0.4006, + "epoch": 3.8306997742663658, + "grad_norm": 13.688698768615723, + "learning_rate": 1.3472200214938974e-06, + "lm_loss": 5.3465, + "loss": 1.2766, + "step": 1697, + "text_contrastive_loss": 0.6826 + }, + { + "contrastive_loss": 0.4939, + "epoch": 3.832957110609481, + "grad_norm": 14.000205993652344, + "learning_rate": 1.3422694917711276e-06, + "lm_loss": 5.3961, + "loss": 1.4655, + "step": 1698, + "text_contrastive_loss": 0.8639 + }, + { + "contrastive_loss": 0.4755, + "epoch": 3.835214446952596, + "grad_norm": 14.355860710144043, + "learning_rate": 1.3373266639075134e-06, + "lm_loss": 5.4054, + "loss": 1.445, + "step": 1699, + "text_contrastive_loss": 0.8579 + }, + { + "contrastive_loss": 0.5761, + "epoch": 3.837471783295711, + "grad_norm": 16.03438377380371, + "learning_rate": 1.3323915483108662e-06, + "lm_loss": 5.4396, + "loss": 1.4326, + "step": 1700, + "text_contrastive_loss": 0.6251 + }, + { + "contrastive_loss": 0.4676, + "epoch": 3.839729119638826, + "grad_norm": 13.848464012145996, + "learning_rate": 1.3274641553727568e-06, + "lm_loss": 5.5739, + "loss": 1.4439, + "step": 1701, + "text_contrastive_loss": 0.8377 + }, + { + "contrastive_loss": 0.5247, + "epoch": 3.8419864559819414, + "grad_norm": 15.230380058288574, + "learning_rate": 1.3225444954684962e-06, + "lm_loss": 5.4977, + "loss": 1.4957, + "step": 1702, + "text_contrastive_loss": 0.8424 + }, + { + "contrastive_loss": 0.5274, + "epoch": 3.8442437923250563, + "grad_norm": 15.571111679077148, + "learning_rate": 1.3176325789571075e-06, + "lm_loss": 5.5268, + "loss": 1.5071, + "step": 1703, + "text_contrastive_loss": 0.854 + }, + { + "contrastive_loss": 0.4379, + "epoch": 3.8465011286681716, + "grad_norm": 13.863898277282715, + "learning_rate": 1.3127284161813153e-06, + "lm_loss": 5.4114, + "loss": 1.294, + "step": 1704, + "text_contrastive_loss": 0.6299 + }, + { + "contrastive_loss": 0.4852, + "epoch": 3.8487584650112865, + "grad_norm": 15.147299766540527, + "learning_rate": 1.3078320174675141e-06, + "lm_loss": 5.4927, + "loss": 1.4492, + "step": 1705, + "text_contrastive_loss": 0.8295 + }, + { + "contrastive_loss": 0.3782, + "epoch": 3.851015801354402, + "grad_norm": 12.823390007019043, + "learning_rate": 1.3029433931257524e-06, + "lm_loss": 5.4188, + "loss": 1.2186, + "step": 1706, + "text_contrastive_loss": 0.5971 + }, + { + "contrastive_loss": 0.4107, + "epoch": 3.853273137697517, + "grad_norm": 12.695087432861328, + "learning_rate": 1.2980625534497037e-06, + "lm_loss": 5.4325, + "loss": 1.321, + "step": 1707, + "text_contrastive_loss": 0.7341 + }, + { + "contrastive_loss": 0.3792, + "epoch": 3.855530474040632, + "grad_norm": 12.667981147766113, + "learning_rate": 1.2931895087166551e-06, + "lm_loss": 5.4338, + "loss": 1.346, + "step": 1708, + "text_contrastive_loss": 0.8469 + }, + { + "contrastive_loss": 0.442, + "epoch": 3.8577878103837473, + "grad_norm": 13.558056831359863, + "learning_rate": 1.2883242691874792e-06, + "lm_loss": 5.4484, + "loss": 1.354, + "step": 1709, + "text_contrastive_loss": 0.7343 + }, + { + "contrastive_loss": 0.511, + "epoch": 3.860045146726862, + "grad_norm": 14.185285568237305, + "learning_rate": 1.2834668451066118e-06, + "lm_loss": 5.4079, + "loss": 1.4592, + "step": 1710, + "text_contrastive_loss": 0.8148 + }, + { + "contrastive_loss": 0.4396, + "epoch": 3.8623024830699775, + "grad_norm": 13.763110160827637, + "learning_rate": 1.2786172467020357e-06, + "lm_loss": 5.4503, + "loss": 1.323, + "step": 1711, + "text_contrastive_loss": 0.6768 + }, + { + "contrastive_loss": 0.597, + "epoch": 3.864559819413093, + "grad_norm": 14.601582527160645, + "learning_rate": 1.2737754841852501e-06, + "lm_loss": 5.3727, + "loss": 1.5593, + "step": 1712, + "text_contrastive_loss": 0.8499 + }, + { + "contrastive_loss": 0.4817, + "epoch": 3.8668171557562077, + "grad_norm": 13.016423225402832, + "learning_rate": 1.2689415677512574e-06, + "lm_loss": 5.4965, + "loss": 1.4231, + "step": 1713, + "text_contrastive_loss": 0.7835 + }, + { + "contrastive_loss": 0.4452, + "epoch": 3.8690744920993225, + "grad_norm": 14.020245552062988, + "learning_rate": 1.2641155075785444e-06, + "lm_loss": 5.5019, + "loss": 1.3958, + "step": 1714, + "text_contrastive_loss": 0.8008 + }, + { + "contrastive_loss": 0.5186, + "epoch": 3.871331828442438, + "grad_norm": 14.728551864624023, + "learning_rate": 1.259297313829046e-06, + "lm_loss": 5.3166, + "loss": 1.4605, + "step": 1715, + "text_contrastive_loss": 0.8205 + }, + { + "contrastive_loss": 0.5546, + "epoch": 3.873589164785553, + "grad_norm": 15.889209747314453, + "learning_rate": 1.2544869966481389e-06, + "lm_loss": 5.3853, + "loss": 1.4501, + "step": 1716, + "text_contrastive_loss": 0.714 + }, + { + "contrastive_loss": 0.375, + "epoch": 3.875846501128668, + "grad_norm": 12.50887393951416, + "learning_rate": 1.249684566164614e-06, + "lm_loss": 5.2945, + "loss": 1.2586, + "step": 1717, + "text_contrastive_loss": 0.7083 + }, + { + "contrastive_loss": 0.5103, + "epoch": 3.8781038374717833, + "grad_norm": 14.486296653747559, + "learning_rate": 1.2448900324906559e-06, + "lm_loss": 5.3843, + "loss": 1.5037, + "step": 1718, + "text_contrastive_loss": 0.91 + }, + { + "contrastive_loss": 0.4905, + "epoch": 3.880361173814898, + "grad_norm": 14.827803611755371, + "learning_rate": 1.2401034057218181e-06, + "lm_loss": 5.3555, + "loss": 1.4405, + "step": 1719, + "text_contrastive_loss": 0.8288 + }, + { + "contrastive_loss": 0.524, + "epoch": 3.8826185101580135, + "grad_norm": 16.471458435058594, + "learning_rate": 1.2353246959370086e-06, + "lm_loss": 5.4732, + "loss": 1.5214, + "step": 1720, + "text_contrastive_loss": 0.9001 + }, + { + "contrastive_loss": 0.5092, + "epoch": 3.884875846501129, + "grad_norm": 16.233083724975586, + "learning_rate": 1.2305539131984646e-06, + "lm_loss": 5.4302, + "loss": 1.4203, + "step": 1721, + "text_contrastive_loss": 0.7361 + }, + { + "contrastive_loss": 0.4494, + "epoch": 3.8871331828442437, + "grad_norm": 16.18627166748047, + "learning_rate": 1.2257910675517315e-06, + "lm_loss": 5.3978, + "loss": 1.3565, + "step": 1722, + "text_contrastive_loss": 0.7346 + }, + { + "contrastive_loss": 0.4763, + "epoch": 3.889390519187359, + "grad_norm": 14.904261589050293, + "learning_rate": 1.22103616902564e-06, + "lm_loss": 5.4004, + "loss": 1.4777, + "step": 1723, + "text_contrastive_loss": 0.9226 + }, + { + "contrastive_loss": 0.5008, + "epoch": 3.891647855530474, + "grad_norm": 15.24119758605957, + "learning_rate": 1.21628922763229e-06, + "lm_loss": 5.5289, + "loss": 1.4381, + "step": 1724, + "text_contrastive_loss": 0.7689 + }, + { + "contrastive_loss": 0.4041, + "epoch": 3.893905191873589, + "grad_norm": 13.18508529663086, + "learning_rate": 1.2115502533670253e-06, + "lm_loss": 5.4764, + "loss": 1.2925, + "step": 1725, + "text_contrastive_loss": 0.6815 + }, + { + "contrastive_loss": 0.5578, + "epoch": 3.8961625282167045, + "grad_norm": 15.574223518371582, + "learning_rate": 1.2068192562084146e-06, + "lm_loss": 5.3439, + "loss": 1.5831, + "step": 1726, + "text_contrastive_loss": 0.9817 + }, + { + "contrastive_loss": 0.481, + "epoch": 3.8984198645598194, + "grad_norm": 13.740864753723145, + "learning_rate": 1.2020962461182268e-06, + "lm_loss": 5.3276, + "loss": 1.3377, + "step": 1727, + "text_contrastive_loss": 0.6479 + }, + { + "contrastive_loss": 0.3554, + "epoch": 3.9006772009029347, + "grad_norm": 13.950639724731445, + "learning_rate": 1.1973812330414159e-06, + "lm_loss": 5.3181, + "loss": 1.1667, + "step": 1728, + "text_contrastive_loss": 0.559 + }, + { + "contrastive_loss": 0.5262, + "epoch": 3.9029345372460496, + "grad_norm": 14.745194435119629, + "learning_rate": 1.1926742269060965e-06, + "lm_loss": 5.391, + "loss": 1.4259, + "step": 1729, + "text_contrastive_loss": 0.7212 + }, + { + "contrastive_loss": 0.4077, + "epoch": 3.905191873589165, + "grad_norm": 13.899352073669434, + "learning_rate": 1.1879752376235231e-06, + "lm_loss": 5.5298, + "loss": 1.3059, + "step": 1730, + "text_contrastive_loss": 0.6904 + }, + { + "contrastive_loss": 0.4328, + "epoch": 3.90744920993228, + "grad_norm": 13.672224044799805, + "learning_rate": 1.1832842750880702e-06, + "lm_loss": 5.425, + "loss": 1.3616, + "step": 1731, + "text_contrastive_loss": 0.7726 + }, + { + "contrastive_loss": 0.4109, + "epoch": 3.909706546275395, + "grad_norm": 13.264911651611328, + "learning_rate": 1.1786013491772103e-06, + "lm_loss": 5.3754, + "loss": 1.3222, + "step": 1732, + "text_contrastive_loss": 0.7474 + }, + { + "contrastive_loss": 0.4658, + "epoch": 3.91196388261851, + "grad_norm": 14.345077514648438, + "learning_rate": 1.173926469751493e-06, + "lm_loss": 5.3543, + "loss": 1.426, + "step": 1733, + "text_contrastive_loss": 0.8496 + }, + { + "contrastive_loss": 0.5206, + "epoch": 3.9142212189616252, + "grad_norm": 14.702255249023438, + "learning_rate": 1.1692596466545275e-06, + "lm_loss": 5.4292, + "loss": 1.5241, + "step": 1734, + "text_contrastive_loss": 0.9211 + }, + { + "contrastive_loss": 0.4251, + "epoch": 3.9164785553047405, + "grad_norm": 15.130386352539062, + "learning_rate": 1.1646008897129546e-06, + "lm_loss": 5.4999, + "loss": 1.4109, + "step": 1735, + "text_contrastive_loss": 0.8716 + }, + { + "contrastive_loss": 0.4766, + "epoch": 3.9187358916478554, + "grad_norm": 14.442639350891113, + "learning_rate": 1.1599502087364345e-06, + "lm_loss": 5.4516, + "loss": 1.3841, + "step": 1736, + "text_contrastive_loss": 0.7246 + }, + { + "contrastive_loss": 0.5367, + "epoch": 3.9209932279909707, + "grad_norm": 14.633401870727539, + "learning_rate": 1.1553076135176222e-06, + "lm_loss": 5.4943, + "loss": 1.5827, + "step": 1737, + "text_contrastive_loss": 0.9931 + }, + { + "contrastive_loss": 0.4812, + "epoch": 3.9232505643340856, + "grad_norm": 15.330426216125488, + "learning_rate": 1.1506731138321474e-06, + "lm_loss": 5.3996, + "loss": 1.4129, + "step": 1738, + "text_contrastive_loss": 0.7835 + }, + { + "contrastive_loss": 0.4586, + "epoch": 3.925507900677201, + "grad_norm": 14.180137634277344, + "learning_rate": 1.1460467194385889e-06, + "lm_loss": 5.5286, + "loss": 1.4647, + "step": 1739, + "text_contrastive_loss": 0.9064 + }, + { + "contrastive_loss": 0.5139, + "epoch": 3.927765237020316, + "grad_norm": 15.674978256225586, + "learning_rate": 1.1414284400784643e-06, + "lm_loss": 5.4489, + "loss": 1.5258, + "step": 1740, + "text_contrastive_loss": 0.934 + }, + { + "contrastive_loss": 0.4641, + "epoch": 3.930022573363431, + "grad_norm": 15.997124671936035, + "learning_rate": 1.1368182854762005e-06, + "lm_loss": 5.4158, + "loss": 1.3675, + "step": 1741, + "text_contrastive_loss": 0.7236 + }, + { + "contrastive_loss": 0.3942, + "epoch": 3.9322799097065464, + "grad_norm": 14.213823318481445, + "learning_rate": 1.13221626533912e-06, + "lm_loss": 5.4638, + "loss": 1.2717, + "step": 1742, + "text_contrastive_loss": 0.6622 + }, + { + "contrastive_loss": 0.4433, + "epoch": 3.9345372460496613, + "grad_norm": 15.289708137512207, + "learning_rate": 1.1276223893574123e-06, + "lm_loss": 5.4448, + "loss": 1.3599, + "step": 1743, + "text_contrastive_loss": 0.7442 + }, + { + "contrastive_loss": 0.3326, + "epoch": 3.9367945823927766, + "grad_norm": 11.817400932312012, + "learning_rate": 1.1230366672041216e-06, + "lm_loss": 5.3922, + "loss": 1.2229, + "step": 1744, + "text_contrastive_loss": 0.7022 + }, + { + "contrastive_loss": 0.4143, + "epoch": 3.939051918735892, + "grad_norm": 14.098214149475098, + "learning_rate": 1.118459108535122e-06, + "lm_loss": 5.3779, + "loss": 1.324, + "step": 1745, + "text_contrastive_loss": 0.7438 + }, + { + "contrastive_loss": 0.4169, + "epoch": 3.9413092550790068, + "grad_norm": 12.802600860595703, + "learning_rate": 1.1138897229890995e-06, + "lm_loss": 5.5226, + "loss": 1.2836, + "step": 1746, + "text_contrastive_loss": 0.6289 + }, + { + "contrastive_loss": 0.5109, + "epoch": 3.9435665914221216, + "grad_norm": 15.707575798034668, + "learning_rate": 1.109328520187528e-06, + "lm_loss": 5.4749, + "loss": 1.5525, + "step": 1747, + "text_contrastive_loss": 0.9882 + }, + { + "contrastive_loss": 0.4281, + "epoch": 3.945823927765237, + "grad_norm": 13.964192390441895, + "learning_rate": 1.1047755097346541e-06, + "lm_loss": 5.4598, + "loss": 1.3548, + "step": 1748, + "text_contrastive_loss": 0.7615 + }, + { + "contrastive_loss": 0.5166, + "epoch": 3.9480812641083523, + "grad_norm": 15.360565185546875, + "learning_rate": 1.100230701217473e-06, + "lm_loss": 5.3465, + "loss": 1.5531, + "step": 1749, + "text_contrastive_loss": 1.0037 + }, + { + "contrastive_loss": 0.4775, + "epoch": 3.950338600451467, + "grad_norm": 13.847414016723633, + "learning_rate": 1.0956941042057106e-06, + "lm_loss": 5.4561, + "loss": 1.4327, + "step": 1750, + "text_contrastive_loss": 0.819 + }, + { + "contrastive_loss": 0.505, + "epoch": 3.9525959367945824, + "grad_norm": 14.46722412109375, + "learning_rate": 1.091165728251799e-06, + "lm_loss": 5.526, + "loss": 1.4573, + "step": 1751, + "text_contrastive_loss": 0.7993 + }, + { + "contrastive_loss": 0.4729, + "epoch": 3.9548532731376973, + "grad_norm": 13.864908218383789, + "learning_rate": 1.0866455828908634e-06, + "lm_loss": 5.5209, + "loss": 1.3877, + "step": 1752, + "text_contrastive_loss": 0.7255 + }, + { + "contrastive_loss": 0.4516, + "epoch": 3.9571106094808126, + "grad_norm": 13.434297561645508, + "learning_rate": 1.082133677640697e-06, + "lm_loss": 5.6237, + "loss": 1.3943, + "step": 1753, + "text_contrastive_loss": 0.7606 + }, + { + "contrastive_loss": 0.5079, + "epoch": 3.959367945823928, + "grad_norm": 14.11734676361084, + "learning_rate": 1.0776300220017437e-06, + "lm_loss": 5.4648, + "loss": 1.5113, + "step": 1754, + "text_contrastive_loss": 0.914 + }, + { + "contrastive_loss": 0.5111, + "epoch": 3.961625282167043, + "grad_norm": 15.088600158691406, + "learning_rate": 1.0731346254570735e-06, + "lm_loss": 5.4019, + "loss": 1.5318, + "step": 1755, + "text_contrastive_loss": 0.961 + }, + { + "contrastive_loss": 0.4083, + "epoch": 3.963882618510158, + "grad_norm": 12.624308586120605, + "learning_rate": 1.068647497472368e-06, + "lm_loss": 5.3553, + "loss": 1.3268, + "step": 1756, + "text_contrastive_loss": 0.7659 + }, + { + "contrastive_loss": 0.5247, + "epoch": 3.966139954853273, + "grad_norm": 15.862508773803711, + "learning_rate": 1.064168647495899e-06, + "lm_loss": 5.4139, + "loss": 1.4569, + "step": 1757, + "text_contrastive_loss": 0.7817 + }, + { + "contrastive_loss": 0.5116, + "epoch": 3.9683972911963883, + "grad_norm": 16.503145217895508, + "learning_rate": 1.0596980849585065e-06, + "lm_loss": 5.4638, + "loss": 1.4759, + "step": 1758, + "text_contrastive_loss": 0.8359 + }, + { + "contrastive_loss": 0.4919, + "epoch": 3.9706546275395036, + "grad_norm": 16.222732543945312, + "learning_rate": 1.0552358192735784e-06, + "lm_loss": 5.3278, + "loss": 1.4059, + "step": 1759, + "text_contrastive_loss": 0.7624 + }, + { + "contrastive_loss": 0.5374, + "epoch": 3.9729119638826185, + "grad_norm": 15.1146240234375, + "learning_rate": 1.0507818598370355e-06, + "lm_loss": 5.5162, + "loss": 1.4554, + "step": 1760, + "text_contrastive_loss": 0.7328 + }, + { + "contrastive_loss": 0.4234, + "epoch": 3.975169300225734, + "grad_norm": 13.736013412475586, + "learning_rate": 1.0463362160273076e-06, + "lm_loss": 5.3969, + "loss": 1.3239, + "step": 1761, + "text_contrastive_loss": 0.7215 + }, + { + "contrastive_loss": 0.504, + "epoch": 3.9774266365688487, + "grad_norm": 21.926189422607422, + "learning_rate": 1.0418988972053162e-06, + "lm_loss": 5.4649, + "loss": 1.4763, + "step": 1762, + "text_contrastive_loss": 0.8517 + }, + { + "contrastive_loss": 0.4926, + "epoch": 3.979683972911964, + "grad_norm": 16.662925720214844, + "learning_rate": 1.037469912714449e-06, + "lm_loss": 5.4606, + "loss": 1.4339, + "step": 1763, + "text_contrastive_loss": 0.7906 + }, + { + "contrastive_loss": 0.4355, + "epoch": 3.9819413092550793, + "grad_norm": 14.614496231079102, + "learning_rate": 1.0330492718805469e-06, + "lm_loss": 5.4014, + "loss": 1.3031, + "step": 1764, + "text_contrastive_loss": 0.6549 + }, + { + "contrastive_loss": 0.4375, + "epoch": 3.984198645598194, + "grad_norm": 14.14395809173584, + "learning_rate": 1.0286369840118859e-06, + "lm_loss": 5.3914, + "loss": 1.374, + "step": 1765, + "text_contrastive_loss": 0.7948 + }, + { + "contrastive_loss": 0.5569, + "epoch": 3.986455981941309, + "grad_norm": 15.8093843460083, + "learning_rate": 1.0242330583991507e-06, + "lm_loss": 5.2657, + "loss": 1.5593, + "step": 1766, + "text_contrastive_loss": 0.9518 + }, + { + "contrastive_loss": 0.4489, + "epoch": 3.9887133182844243, + "grad_norm": 15.425673484802246, + "learning_rate": 1.0198375043154142e-06, + "lm_loss": 5.4266, + "loss": 1.376, + "step": 1767, + "text_contrastive_loss": 0.7689 + }, + { + "contrastive_loss": 0.4843, + "epoch": 3.9909706546275396, + "grad_norm": 15.343976974487305, + "learning_rate": 1.0154503310161269e-06, + "lm_loss": 5.4899, + "loss": 1.4064, + "step": 1768, + "text_contrastive_loss": 0.7462 + }, + { + "contrastive_loss": 0.4129, + "epoch": 3.9932279909706545, + "grad_norm": 13.66511344909668, + "learning_rate": 1.0110715477390915e-06, + "lm_loss": 5.4581, + "loss": 1.349, + "step": 1769, + "text_contrastive_loss": 0.7806 + }, + { + "contrastive_loss": 0.4695, + "epoch": 3.99548532731377, + "grad_norm": 15.749361038208008, + "learning_rate": 1.006701163704445e-06, + "lm_loss": 5.4947, + "loss": 1.4914, + "step": 1770, + "text_contrastive_loss": 0.9449 + }, + { + "contrastive_loss": 0.3889, + "epoch": 3.9977426636568847, + "grad_norm": 13.284844398498535, + "learning_rate": 1.0023391881146349e-06, + "lm_loss": 5.504, + "loss": 1.3642, + "step": 1771, + "text_contrastive_loss": 0.8498 + }, + { + "contrastive_loss": 0.3234, + "epoch": 4.0, + "grad_norm": 17.181509017944336, + "learning_rate": 9.97985630154407e-07, + "lm_loss": 5.4794, + "loss": 1.1494, + "step": 1772, + "text_contrastive_loss": 0.5561 + }, + { + "contrastive_loss": 0.4377, + "epoch": 4.002257336343115, + "grad_norm": 13.104743003845215, + "learning_rate": 9.936404989907828e-07, + "lm_loss": 5.4367, + "loss": 1.3376, + "step": 1773, + "text_contrastive_loss": 0.7125 + }, + { + "contrastive_loss": 0.5083, + "epoch": 4.004514672686231, + "grad_norm": 15.707433700561523, + "learning_rate": 9.89303803773039e-07, + "lm_loss": 5.4245, + "loss": 1.4998, + "step": 1774, + "text_contrastive_loss": 0.8981 + }, + { + "contrastive_loss": 0.4177, + "epoch": 4.006772009029345, + "grad_norm": 14.663115501403809, + "learning_rate": 9.849755536326866e-07, + "lm_loss": 5.3359, + "loss": 1.3495, + "step": 1775, + "text_contrastive_loss": 0.7963 + }, + { + "contrastive_loss": 0.4854, + "epoch": 4.00902934537246, + "grad_norm": 15.18082046508789, + "learning_rate": 9.806557576834591e-07, + "lm_loss": 5.4073, + "loss": 1.4111, + "step": 1776, + "text_contrastive_loss": 0.77 + }, + { + "contrastive_loss": 0.471, + "epoch": 4.011286681715576, + "grad_norm": 14.365391731262207, + "learning_rate": 9.763444250212855e-07, + "lm_loss": 5.4356, + "loss": 1.4296, + "step": 1777, + "text_contrastive_loss": 0.8301 + }, + { + "contrastive_loss": 0.4621, + "epoch": 4.013544018058691, + "grad_norm": 13.901593208312988, + "learning_rate": 9.72041564724277e-07, + "lm_loss": 5.5117, + "loss": 1.4068, + "step": 1778, + "text_contrastive_loss": 0.7871 + }, + { + "contrastive_loss": 0.4065, + "epoch": 4.015801354401806, + "grad_norm": 11.42273235321045, + "learning_rate": 9.677471858526998e-07, + "lm_loss": 5.5015, + "loss": 1.3673, + "step": 1779, + "text_contrastive_loss": 0.8212 + }, + { + "contrastive_loss": 0.4544, + "epoch": 4.018058690744921, + "grad_norm": 13.887290000915527, + "learning_rate": 9.63461297448966e-07, + "lm_loss": 5.3918, + "loss": 1.3662, + "step": 1780, + "text_contrastive_loss": 0.7452 + }, + { + "contrastive_loss": 0.5371, + "epoch": 4.020316027088036, + "grad_norm": 15.941572189331055, + "learning_rate": 9.59183908537607e-07, + "lm_loss": 5.4437, + "loss": 1.5493, + "step": 1781, + "text_contrastive_loss": 0.9356 + }, + { + "contrastive_loss": 0.4149, + "epoch": 4.022573363431151, + "grad_norm": 15.179197311401367, + "learning_rate": 9.549150281252633e-07, + "lm_loss": 5.5243, + "loss": 1.3193, + "step": 1782, + "text_contrastive_loss": 0.7039 + }, + { + "contrastive_loss": 0.4376, + "epoch": 4.024830699774267, + "grad_norm": 14.96658706665039, + "learning_rate": 9.506546652006504e-07, + "lm_loss": 5.4864, + "loss": 1.3762, + "step": 1783, + "text_contrastive_loss": 0.78 + }, + { + "contrastive_loss": 0.4023, + "epoch": 4.027088036117381, + "grad_norm": 14.274824142456055, + "learning_rate": 9.464028287345551e-07, + "lm_loss": 5.4383, + "loss": 1.32, + "step": 1784, + "text_contrastive_loss": 0.7477 + }, + { + "contrastive_loss": 0.4299, + "epoch": 4.029345372460496, + "grad_norm": 13.89789867401123, + "learning_rate": 9.421595276798084e-07, + "lm_loss": 5.5811, + "loss": 1.4355, + "step": 1785, + "text_contrastive_loss": 0.895 + }, + { + "contrastive_loss": 0.4944, + "epoch": 4.031602708803612, + "grad_norm": 15.849641799926758, + "learning_rate": 9.379247709712725e-07, + "lm_loss": 5.4862, + "loss": 1.5002, + "step": 1786, + "text_contrastive_loss": 0.9144 + }, + { + "contrastive_loss": 0.5184, + "epoch": 4.033860045146727, + "grad_norm": 13.822864532470703, + "learning_rate": 9.336985675258109e-07, + "lm_loss": 5.3553, + "loss": 1.5108, + "step": 1787, + "text_contrastive_loss": 0.9138 + }, + { + "contrastive_loss": 0.4489, + "epoch": 4.036117381489842, + "grad_norm": 13.175774574279785, + "learning_rate": 9.294809262422838e-07, + "lm_loss": 5.5385, + "loss": 1.392, + "step": 1788, + "text_contrastive_loss": 0.7784 + }, + { + "contrastive_loss": 0.4705, + "epoch": 4.038374717832957, + "grad_norm": 15.42026138305664, + "learning_rate": 9.2527185600152e-07, + "lm_loss": 5.3664, + "loss": 1.3918, + "step": 1789, + "text_contrastive_loss": 0.7694 + }, + { + "contrastive_loss": 0.376, + "epoch": 4.040632054176072, + "grad_norm": 12.850754737854004, + "learning_rate": 9.210713656663023e-07, + "lm_loss": 5.3777, + "loss": 1.3102, + "step": 1790, + "text_contrastive_loss": 0.7928 + }, + { + "contrastive_loss": 0.417, + "epoch": 4.042889390519187, + "grad_norm": 14.971044540405273, + "learning_rate": 9.168794640813428e-07, + "lm_loss": 5.3417, + "loss": 1.2681, + "step": 1791, + "text_contrastive_loss": 0.6338 + }, + { + "contrastive_loss": 0.4723, + "epoch": 4.045146726862303, + "grad_norm": 14.143229484558105, + "learning_rate": 9.126961600732742e-07, + "lm_loss": 5.4334, + "loss": 1.3816, + "step": 1792, + "text_contrastive_loss": 0.7319 + }, + { + "contrastive_loss": 0.4157, + "epoch": 4.047404063205418, + "grad_norm": 12.646199226379395, + "learning_rate": 9.085214624506228e-07, + "lm_loss": 5.5195, + "loss": 1.3676, + "step": 1793, + "text_contrastive_loss": 0.8 + }, + { + "contrastive_loss": 0.4584, + "epoch": 4.049661399548532, + "grad_norm": 13.476517677307129, + "learning_rate": 9.043553800037952e-07, + "lm_loss": 5.4054, + "loss": 1.3187, + "step": 1794, + "text_contrastive_loss": 0.6395 + }, + { + "contrastive_loss": 0.489, + "epoch": 4.051918735891648, + "grad_norm": 15.695509910583496, + "learning_rate": 9.001979215050544e-07, + "lm_loss": 5.417, + "loss": 1.4629, + "step": 1795, + "text_contrastive_loss": 0.8644 + }, + { + "contrastive_loss": 0.4725, + "epoch": 4.054176072234763, + "grad_norm": 14.158413887023926, + "learning_rate": 8.960490957085061e-07, + "lm_loss": 5.2968, + "loss": 1.3294, + "step": 1796, + "text_contrastive_loss": 0.6545 + }, + { + "contrastive_loss": 0.5789, + "epoch": 4.056433408577878, + "grad_norm": 16.090099334716797, + "learning_rate": 8.919089113500795e-07, + "lm_loss": 5.4087, + "loss": 1.5167, + "step": 1797, + "text_contrastive_loss": 0.794 + }, + { + "contrastive_loss": 0.5213, + "epoch": 4.058690744920993, + "grad_norm": 14.36949348449707, + "learning_rate": 8.877773771475074e-07, + "lm_loss": 5.5536, + "loss": 1.453, + "step": 1798, + "text_contrastive_loss": 0.7525 + }, + { + "contrastive_loss": 0.4, + "epoch": 4.060948081264108, + "grad_norm": 12.922706604003906, + "learning_rate": 8.836545018003084e-07, + "lm_loss": 5.5052, + "loss": 1.3011, + "step": 1799, + "text_contrastive_loss": 0.7011 + }, + { + "contrastive_loss": 0.4973, + "epoch": 4.063205417607223, + "grad_norm": 14.795793533325195, + "learning_rate": 8.795402939897679e-07, + "lm_loss": 5.4265, + "loss": 1.4791, + "step": 1800, + "text_contrastive_loss": 0.8782 + }, + { + "contrastive_loss": 0.4619, + "epoch": 4.065462753950339, + "grad_norm": 14.072027206420898, + "learning_rate": 8.754347623789222e-07, + "lm_loss": 5.4428, + "loss": 1.4786, + "step": 1801, + "text_contrastive_loss": 0.9448 + }, + { + "contrastive_loss": 0.3627, + "epoch": 4.067720090293454, + "grad_norm": 13.702142715454102, + "learning_rate": 8.713379156125385e-07, + "lm_loss": 5.537, + "loss": 1.2641, + "step": 1802, + "text_contrastive_loss": 0.6955 + }, + { + "contrastive_loss": 0.5504, + "epoch": 4.0699774266365685, + "grad_norm": 15.301467895507812, + "learning_rate": 8.672497623170944e-07, + "lm_loss": 5.341, + "loss": 1.4963, + "step": 1803, + "text_contrastive_loss": 0.8237 + }, + { + "contrastive_loss": 0.3903, + "epoch": 4.072234762979684, + "grad_norm": 13.139961242675781, + "learning_rate": 8.631703111007645e-07, + "lm_loss": 5.4437, + "loss": 1.2991, + "step": 1804, + "text_contrastive_loss": 0.7289 + }, + { + "contrastive_loss": 0.4969, + "epoch": 4.074492099322799, + "grad_norm": 13.641329765319824, + "learning_rate": 8.590995705533994e-07, + "lm_loss": 5.458, + "loss": 1.4525, + "step": 1805, + "text_contrastive_loss": 0.8197 + }, + { + "contrastive_loss": 0.4982, + "epoch": 4.076749435665914, + "grad_norm": 13.534671783447266, + "learning_rate": 8.550375492465102e-07, + "lm_loss": 5.5236, + "loss": 1.384, + "step": 1806, + "text_contrastive_loss": 0.6671 + }, + { + "contrastive_loss": 0.4624, + "epoch": 4.07900677200903, + "grad_norm": 13.669743537902832, + "learning_rate": 8.509842557332437e-07, + "lm_loss": 5.5115, + "loss": 1.4749, + "step": 1807, + "text_contrastive_loss": 0.9227 + }, + { + "contrastive_loss": 0.4852, + "epoch": 4.081264108352144, + "grad_norm": 16.874771118164062, + "learning_rate": 8.469396985483724e-07, + "lm_loss": 5.3365, + "loss": 1.4552, + "step": 1808, + "text_contrastive_loss": 0.8728 + }, + { + "contrastive_loss": 0.5253, + "epoch": 4.0835214446952595, + "grad_norm": 15.528621673583984, + "learning_rate": 8.429038862082734e-07, + "lm_loss": 5.52, + "loss": 1.5023, + "step": 1809, + "text_contrastive_loss": 0.8501 + }, + { + "contrastive_loss": 0.409, + "epoch": 4.085778781038375, + "grad_norm": 13.915301322937012, + "learning_rate": 8.388768272109105e-07, + "lm_loss": 5.4367, + "loss": 1.3735, + "step": 1810, + "text_contrastive_loss": 0.8417 + }, + { + "contrastive_loss": 0.4184, + "epoch": 4.08803611738149, + "grad_norm": 14.407670974731445, + "learning_rate": 8.34858530035813e-07, + "lm_loss": 5.4788, + "loss": 1.2872, + "step": 1811, + "text_contrastive_loss": 0.6419 + }, + { + "contrastive_loss": 0.6038, + "epoch": 4.090293453724605, + "grad_norm": 16.431411743164062, + "learning_rate": 8.308490031440641e-07, + "lm_loss": 5.4824, + "loss": 1.6084, + "step": 1812, + "text_contrastive_loss": 0.9129 + }, + { + "contrastive_loss": 0.3561, + "epoch": 4.09255079006772, + "grad_norm": 13.252738952636719, + "learning_rate": 8.268482549782797e-07, + "lm_loss": 5.4143, + "loss": 1.3345, + "step": 1813, + "text_contrastive_loss": 0.874 + }, + { + "contrastive_loss": 0.4037, + "epoch": 4.094808126410835, + "grad_norm": 14.096956253051758, + "learning_rate": 8.228562939625906e-07, + "lm_loss": 5.3922, + "loss": 1.2912, + "step": 1814, + "text_contrastive_loss": 0.6966 + }, + { + "contrastive_loss": 0.4045, + "epoch": 4.0970654627539504, + "grad_norm": 13.017566680908203, + "learning_rate": 8.188731285026219e-07, + "lm_loss": 5.3951, + "loss": 1.3021, + "step": 1815, + "text_contrastive_loss": 0.7161 + }, + { + "contrastive_loss": 0.5328, + "epoch": 4.099322799097066, + "grad_norm": 14.632208824157715, + "learning_rate": 8.148987669854846e-07, + "lm_loss": 5.4135, + "loss": 1.4236, + "step": 1816, + "text_contrastive_loss": 0.699 + }, + { + "contrastive_loss": 0.4509, + "epoch": 4.10158013544018, + "grad_norm": 13.238189697265625, + "learning_rate": 8.109332177797469e-07, + "lm_loss": 5.553, + "loss": 1.4275, + "step": 1817, + "text_contrastive_loss": 0.8426 + }, + { + "contrastive_loss": 0.4242, + "epoch": 4.1038374717832955, + "grad_norm": 14.228952407836914, + "learning_rate": 8.069764892354237e-07, + "lm_loss": 5.3318, + "loss": 1.4155, + "step": 1818, + "text_contrastive_loss": 0.9163 + }, + { + "contrastive_loss": 0.4922, + "epoch": 4.106094808126411, + "grad_norm": 14.51209831237793, + "learning_rate": 8.030285896839546e-07, + "lm_loss": 5.4793, + "loss": 1.4443, + "step": 1819, + "text_contrastive_loss": 0.8084 + }, + { + "contrastive_loss": 0.5215, + "epoch": 4.108352144469526, + "grad_norm": 16.435653686523438, + "learning_rate": 7.99089527438191e-07, + "lm_loss": 5.3626, + "loss": 1.461, + "step": 1820, + "text_contrastive_loss": 0.8065 + }, + { + "contrastive_loss": 0.4164, + "epoch": 4.110609480812641, + "grad_norm": 14.234169960021973, + "learning_rate": 7.951593107923744e-07, + "lm_loss": 5.4942, + "loss": 1.3652, + "step": 1821, + "text_contrastive_loss": 0.7987 + }, + { + "contrastive_loss": 0.4092, + "epoch": 4.112866817155756, + "grad_norm": 13.296266555786133, + "learning_rate": 7.912379480221228e-07, + "lm_loss": 5.3685, + "loss": 1.3136, + "step": 1822, + "text_contrastive_loss": 0.7351 + }, + { + "contrastive_loss": 0.4893, + "epoch": 4.115124153498871, + "grad_norm": 15.535466194152832, + "learning_rate": 7.873254473844077e-07, + "lm_loss": 5.4085, + "loss": 1.5374, + "step": 1823, + "text_contrastive_loss": 1.0144 + }, + { + "contrastive_loss": 0.4283, + "epoch": 4.1173814898419865, + "grad_norm": 12.822464942932129, + "learning_rate": 7.834218171175428e-07, + "lm_loss": 5.3586, + "loss": 1.2899, + "step": 1824, + "text_contrastive_loss": 0.6514 + }, + { + "contrastive_loss": 0.5152, + "epoch": 4.119638826185102, + "grad_norm": 14.885557174682617, + "learning_rate": 7.795270654411635e-07, + "lm_loss": 5.4649, + "loss": 1.4202, + "step": 1825, + "text_contrastive_loss": 0.7169 + }, + { + "contrastive_loss": 0.501, + "epoch": 4.121896162528217, + "grad_norm": 14.53622817993164, + "learning_rate": 7.756412005562114e-07, + "lm_loss": 5.4078, + "loss": 1.4516, + "step": 1826, + "text_contrastive_loss": 0.8195 + }, + { + "contrastive_loss": 0.4134, + "epoch": 4.1241534988713315, + "grad_norm": 13.508371353149414, + "learning_rate": 7.717642306449113e-07, + "lm_loss": 5.4248, + "loss": 1.3355, + "step": 1827, + "text_contrastive_loss": 0.7593 + }, + { + "contrastive_loss": 0.521, + "epoch": 4.126410835214447, + "grad_norm": 15.592608451843262, + "learning_rate": 7.678961638707633e-07, + "lm_loss": 5.4752, + "loss": 1.4903, + "step": 1828, + "text_contrastive_loss": 0.8436 + }, + { + "contrastive_loss": 0.4885, + "epoch": 4.128668171557562, + "grad_norm": 14.605595588684082, + "learning_rate": 7.640370083785175e-07, + "lm_loss": 5.3554, + "loss": 1.4552, + "step": 1829, + "text_contrastive_loss": 0.8622 + }, + { + "contrastive_loss": 0.454, + "epoch": 4.1309255079006775, + "grad_norm": 15.124018669128418, + "learning_rate": 7.601867722941642e-07, + "lm_loss": 5.5599, + "loss": 1.4615, + "step": 1830, + "text_contrastive_loss": 0.903 + }, + { + "contrastive_loss": 0.3847, + "epoch": 4.133182844243792, + "grad_norm": 12.90140438079834, + "learning_rate": 7.563454637249056e-07, + "lm_loss": 5.6135, + "loss": 1.352, + "step": 1831, + "text_contrastive_loss": 0.8119 + }, + { + "contrastive_loss": 0.4153, + "epoch": 4.135440180586907, + "grad_norm": 13.231061935424805, + "learning_rate": 7.52513090759151e-07, + "lm_loss": 5.419, + "loss": 1.3291, + "step": 1832, + "text_contrastive_loss": 0.7439 + }, + { + "contrastive_loss": 0.4059, + "epoch": 4.1376975169300225, + "grad_norm": 12.05694580078125, + "learning_rate": 7.486896614664962e-07, + "lm_loss": 5.5334, + "loss": 1.3346, + "step": 1833, + "text_contrastive_loss": 0.7508 + }, + { + "contrastive_loss": 0.3801, + "epoch": 4.139954853273138, + "grad_norm": 11.814329147338867, + "learning_rate": 7.448751838977014e-07, + "lm_loss": 5.4661, + "loss": 1.2422, + "step": 1834, + "text_contrastive_loss": 0.6309 + }, + { + "contrastive_loss": 0.4647, + "epoch": 4.142212189616253, + "grad_norm": 14.641359329223633, + "learning_rate": 7.410696660846761e-07, + "lm_loss": 5.5061, + "loss": 1.4345, + "step": 1835, + "text_contrastive_loss": 0.8383 + }, + { + "contrastive_loss": 0.4706, + "epoch": 4.144469525959368, + "grad_norm": 14.823846817016602, + "learning_rate": 7.372731160404672e-07, + "lm_loss": 5.3886, + "loss": 1.3952, + "step": 1836, + "text_contrastive_loss": 0.7714 + }, + { + "contrastive_loss": 0.4976, + "epoch": 4.146726862302483, + "grad_norm": 14.605664253234863, + "learning_rate": 7.334855417592385e-07, + "lm_loss": 5.41, + "loss": 1.4627, + "step": 1837, + "text_contrastive_loss": 0.8481 + }, + { + "contrastive_loss": 0.4269, + "epoch": 4.148984198645598, + "grad_norm": 13.060043334960938, + "learning_rate": 7.297069512162535e-07, + "lm_loss": 5.5036, + "loss": 1.3205, + "step": 1838, + "text_contrastive_loss": 0.6865 + }, + { + "contrastive_loss": 0.485, + "epoch": 4.1512415349887135, + "grad_norm": 15.681782722473145, + "learning_rate": 7.25937352367857e-07, + "lm_loss": 5.4305, + "loss": 1.4929, + "step": 1839, + "text_contrastive_loss": 0.9297 + }, + { + "contrastive_loss": 0.4293, + "epoch": 4.153498871331829, + "grad_norm": 13.646442413330078, + "learning_rate": 7.22176753151464e-07, + "lm_loss": 5.515, + "loss": 1.3904, + "step": 1840, + "text_contrastive_loss": 0.8192 + }, + { + "contrastive_loss": 0.3938, + "epoch": 4.155756207674943, + "grad_norm": 13.667914390563965, + "learning_rate": 7.184251614855369e-07, + "lm_loss": 5.5125, + "loss": 1.319, + "step": 1841, + "text_contrastive_loss": 0.7477 + }, + { + "contrastive_loss": 0.4512, + "epoch": 4.158013544018059, + "grad_norm": 15.908001899719238, + "learning_rate": 7.146825852695749e-07, + "lm_loss": 5.4195, + "loss": 1.4085, + "step": 1842, + "text_contrastive_loss": 0.8306 + }, + { + "contrastive_loss": 0.4323, + "epoch": 4.160270880361174, + "grad_norm": 13.84862232208252, + "learning_rate": 7.109490323840884e-07, + "lm_loss": 5.4441, + "loss": 1.35, + "step": 1843, + "text_contrastive_loss": 0.7466 + }, + { + "contrastive_loss": 0.3973, + "epoch": 4.162528216704289, + "grad_norm": 13.460807800292969, + "learning_rate": 7.072245106905928e-07, + "lm_loss": 5.5048, + "loss": 1.3119, + "step": 1844, + "text_contrastive_loss": 0.7281 + }, + { + "contrastive_loss": 0.3929, + "epoch": 4.164785553047404, + "grad_norm": 12.910388946533203, + "learning_rate": 7.035090280315854e-07, + "lm_loss": 5.4203, + "loss": 1.3607, + "step": 1845, + "text_contrastive_loss": 0.8514 + }, + { + "contrastive_loss": 0.4592, + "epoch": 4.167042889390519, + "grad_norm": 13.615018844604492, + "learning_rate": 6.998025922305313e-07, + "lm_loss": 5.4446, + "loss": 1.3625, + "step": 1846, + "text_contrastive_loss": 0.7177 + }, + { + "contrastive_loss": 0.4183, + "epoch": 4.169300225733634, + "grad_norm": 12.962539672851562, + "learning_rate": 6.961052110918432e-07, + "lm_loss": 5.385, + "loss": 1.3024, + "step": 1847, + "text_contrastive_loss": 0.6911 + }, + { + "contrastive_loss": 0.3318, + "epoch": 4.1715575620767495, + "grad_norm": 13.18665885925293, + "learning_rate": 6.924168924008712e-07, + "lm_loss": 5.3562, + "loss": 1.2534, + "step": 1848, + "text_contrastive_loss": 0.772 + }, + { + "contrastive_loss": 0.4751, + "epoch": 4.173814898419865, + "grad_norm": 16.380186080932617, + "learning_rate": 6.887376439238813e-07, + "lm_loss": 5.4731, + "loss": 1.4056, + "step": 1849, + "text_contrastive_loss": 0.7664 + }, + { + "contrastive_loss": 0.3609, + "epoch": 4.176072234762979, + "grad_norm": 13.097086906433105, + "learning_rate": 6.850674734080454e-07, + "lm_loss": 5.2816, + "loss": 1.2457, + "step": 1850, + "text_contrastive_loss": 0.7133 + }, + { + "contrastive_loss": 0.4522, + "epoch": 4.178329571106095, + "grad_norm": 14.39322280883789, + "learning_rate": 6.814063885814127e-07, + "lm_loss": 5.4431, + "loss": 1.3514, + "step": 1851, + "text_contrastive_loss": 0.7098 + }, + { + "contrastive_loss": 0.4811, + "epoch": 4.18058690744921, + "grad_norm": 14.79831600189209, + "learning_rate": 6.77754397152906e-07, + "lm_loss": 5.3433, + "loss": 1.387, + "step": 1852, + "text_contrastive_loss": 0.7431 + }, + { + "contrastive_loss": 0.5088, + "epoch": 4.182844243792325, + "grad_norm": 14.93602466583252, + "learning_rate": 6.741115068123017e-07, + "lm_loss": 5.4505, + "loss": 1.499, + "step": 1853, + "text_contrastive_loss": 0.8904 + }, + { + "contrastive_loss": 0.4484, + "epoch": 4.1851015801354405, + "grad_norm": 16.18085289001465, + "learning_rate": 6.704777252302108e-07, + "lm_loss": 5.3618, + "loss": 1.3419, + "step": 1854, + "text_contrastive_loss": 0.7147 + }, + { + "contrastive_loss": 0.3328, + "epoch": 4.187358916478555, + "grad_norm": 12.081189155578613, + "learning_rate": 6.66853060058063e-07, + "lm_loss": 5.4285, + "loss": 1.189, + "step": 1855, + "text_contrastive_loss": 0.6266 + }, + { + "contrastive_loss": 0.371, + "epoch": 4.18961625282167, + "grad_norm": 12.349919319152832, + "learning_rate": 6.632375189280948e-07, + "lm_loss": 5.4435, + "loss": 1.2288, + "step": 1856, + "text_contrastive_loss": 0.6268 + }, + { + "contrastive_loss": 0.4886, + "epoch": 4.191873589164786, + "grad_norm": 15.420004844665527, + "learning_rate": 6.596311094533292e-07, + "lm_loss": 5.4754, + "loss": 1.5055, + "step": 1857, + "text_contrastive_loss": 0.9387 + }, + { + "contrastive_loss": 0.4205, + "epoch": 4.194130925507901, + "grad_norm": 13.639395713806152, + "learning_rate": 6.56033839227564e-07, + "lm_loss": 5.4007, + "loss": 1.2925, + "step": 1858, + "text_contrastive_loss": 0.6639 + }, + { + "contrastive_loss": 0.4923, + "epoch": 4.196388261851016, + "grad_norm": 14.98202133178711, + "learning_rate": 6.524457158253472e-07, + "lm_loss": 5.397, + "loss": 1.4891, + "step": 1859, + "text_contrastive_loss": 0.9143 + }, + { + "contrastive_loss": 0.5228, + "epoch": 4.198645598194131, + "grad_norm": 15.258172988891602, + "learning_rate": 6.488667468019727e-07, + "lm_loss": 5.4108, + "loss": 1.4718, + "step": 1860, + "text_contrastive_loss": 0.8158 + }, + { + "contrastive_loss": 0.3381, + "epoch": 4.200902934537246, + "grad_norm": 11.965526580810547, + "learning_rate": 6.452969396934567e-07, + "lm_loss": 5.6014, + "loss": 1.2549, + "step": 1861, + "text_contrastive_loss": 0.7134 + }, + { + "contrastive_loss": 0.33, + "epoch": 4.203160270880361, + "grad_norm": 12.12585735321045, + "learning_rate": 6.417363020165235e-07, + "lm_loss": 5.3321, + "loss": 1.2319, + "step": 1862, + "text_contrastive_loss": 0.7373 + }, + { + "contrastive_loss": 0.361, + "epoch": 4.205417607223477, + "grad_norm": 11.505969047546387, + "learning_rate": 6.381848412685882e-07, + "lm_loss": 5.4543, + "loss": 1.3033, + "step": 1863, + "text_contrastive_loss": 0.7938 + }, + { + "contrastive_loss": 0.4826, + "epoch": 4.207674943566591, + "grad_norm": 15.115910530090332, + "learning_rate": 6.346425649277454e-07, + "lm_loss": 5.3651, + "loss": 1.4377, + "step": 1864, + "text_contrastive_loss": 0.8371 + }, + { + "contrastive_loss": 0.5046, + "epoch": 4.209932279909706, + "grad_norm": 17.10780143737793, + "learning_rate": 6.31109480452749e-07, + "lm_loss": 5.4801, + "loss": 1.4573, + "step": 1865, + "text_contrastive_loss": 0.8095 + }, + { + "contrastive_loss": 0.4326, + "epoch": 4.212189616252822, + "grad_norm": 13.92415714263916, + "learning_rate": 6.275855952829995e-07, + "lm_loss": 5.4767, + "loss": 1.3341, + "step": 1866, + "text_contrastive_loss": 0.7076 + }, + { + "contrastive_loss": 0.4305, + "epoch": 4.214446952595937, + "grad_norm": 13.554123878479004, + "learning_rate": 6.240709168385251e-07, + "lm_loss": 5.3376, + "loss": 1.3276, + "step": 1867, + "text_contrastive_loss": 0.7266 + }, + { + "contrastive_loss": 0.4668, + "epoch": 4.216704288939052, + "grad_norm": 14.565821647644043, + "learning_rate": 6.2056545251997e-07, + "lm_loss": 5.4353, + "loss": 1.3705, + "step": 1868, + "text_contrastive_loss": 0.7203 + }, + { + "contrastive_loss": 0.5562, + "epoch": 4.218961625282167, + "grad_norm": 17.39376449584961, + "learning_rate": 6.170692097085751e-07, + "lm_loss": 5.4903, + "loss": 1.5995, + "step": 1869, + "text_contrastive_loss": 0.9887 + }, + { + "contrastive_loss": 0.3762, + "epoch": 4.221218961625282, + "grad_norm": 13.919958114624023, + "learning_rate": 6.135821957661658e-07, + "lm_loss": 5.5311, + "loss": 1.2931, + "step": 1870, + "text_contrastive_loss": 0.7276 + }, + { + "contrastive_loss": 0.4136, + "epoch": 4.223476297968397, + "grad_norm": 14.79681396484375, + "learning_rate": 6.101044180351318e-07, + "lm_loss": 5.4694, + "loss": 1.3332, + "step": 1871, + "text_contrastive_loss": 0.7453 + }, + { + "contrastive_loss": 0.4591, + "epoch": 4.225733634311513, + "grad_norm": 14.416868209838867, + "learning_rate": 6.066358838384184e-07, + "lm_loss": 5.4351, + "loss": 1.3984, + "step": 1872, + "text_contrastive_loss": 0.7916 + }, + { + "contrastive_loss": 0.4307, + "epoch": 4.227990970654628, + "grad_norm": 14.388350486755371, + "learning_rate": 6.031766004795047e-07, + "lm_loss": 5.4554, + "loss": 1.3488, + "step": 1873, + "text_contrastive_loss": 0.7452 + }, + { + "contrastive_loss": 0.4374, + "epoch": 4.230248306997742, + "grad_norm": 14.363672256469727, + "learning_rate": 5.997265752423936e-07, + "lm_loss": 5.463, + "loss": 1.3909, + "step": 1874, + "text_contrastive_loss": 0.8144 + }, + { + "contrastive_loss": 0.4589, + "epoch": 4.232505643340858, + "grad_norm": 13.792438507080078, + "learning_rate": 5.962858153915896e-07, + "lm_loss": 5.5655, + "loss": 1.345, + "step": 1875, + "text_contrastive_loss": 0.659 + }, + { + "contrastive_loss": 0.5754, + "epoch": 4.234762979683973, + "grad_norm": 16.32940673828125, + "learning_rate": 5.928543281720917e-07, + "lm_loss": 5.4759, + "loss": 1.5221, + "step": 1876, + "text_contrastive_loss": 0.7982 + }, + { + "contrastive_loss": 0.4951, + "epoch": 4.237020316027088, + "grad_norm": 15.022712707519531, + "learning_rate": 5.894321208093712e-07, + "lm_loss": 5.4656, + "loss": 1.4397, + "step": 1877, + "text_contrastive_loss": 0.796 + }, + { + "contrastive_loss": 0.4523, + "epoch": 4.239277652370204, + "grad_norm": 13.635482788085938, + "learning_rate": 5.860192005093624e-07, + "lm_loss": 5.4288, + "loss": 1.3612, + "step": 1878, + "text_contrastive_loss": 0.7321 + }, + { + "contrastive_loss": 0.4383, + "epoch": 4.241534988713318, + "grad_norm": 15.492339134216309, + "learning_rate": 5.826155744584405e-07, + "lm_loss": 5.3379, + "loss": 1.3359, + "step": 1879, + "text_contrastive_loss": 0.7277 + }, + { + "contrastive_loss": 0.3843, + "epoch": 4.243792325056433, + "grad_norm": 12.946342468261719, + "learning_rate": 5.792212498234134e-07, + "lm_loss": 5.39, + "loss": 1.3303, + "step": 1880, + "text_contrastive_loss": 0.8139 + }, + { + "contrastive_loss": 0.3976, + "epoch": 4.246049661399549, + "grad_norm": 13.186134338378906, + "learning_rate": 5.758362337515028e-07, + "lm_loss": 5.3961, + "loss": 1.2939, + "step": 1881, + "text_contrastive_loss": 0.7133 + }, + { + "contrastive_loss": 0.4201, + "epoch": 4.248306997742664, + "grad_norm": 13.884965896606445, + "learning_rate": 5.724605333703303e-07, + "lm_loss": 5.5192, + "loss": 1.4111, + "step": 1882, + "text_contrastive_loss": 0.878 + }, + { + "contrastive_loss": 0.395, + "epoch": 4.250564334085778, + "grad_norm": 13.937795639038086, + "learning_rate": 5.690941557878988e-07, + "lm_loss": 5.4367, + "loss": 1.3737, + "step": 1883, + "text_contrastive_loss": 0.87 + }, + { + "contrastive_loss": 0.3783, + "epoch": 4.252821670428894, + "grad_norm": 12.562926292419434, + "learning_rate": 5.657371080925866e-07, + "lm_loss": 5.4335, + "loss": 1.2505, + "step": 1884, + "text_contrastive_loss": 0.6577 + }, + { + "contrastive_loss": 0.4003, + "epoch": 4.255079006772009, + "grad_norm": 13.565262794494629, + "learning_rate": 5.623893973531225e-07, + "lm_loss": 5.4687, + "loss": 1.2524, + "step": 1885, + "text_contrastive_loss": 0.6104 + }, + { + "contrastive_loss": 0.4286, + "epoch": 4.257336343115124, + "grad_norm": 13.630789756774902, + "learning_rate": 5.590510306185765e-07, + "lm_loss": 5.5025, + "loss": 1.3563, + "step": 1886, + "text_contrastive_loss": 0.755 + }, + { + "contrastive_loss": 0.444, + "epoch": 4.25959367945824, + "grad_norm": 14.704750061035156, + "learning_rate": 5.557220149183412e-07, + "lm_loss": 5.4745, + "loss": 1.4715, + "step": 1887, + "text_contrastive_loss": 0.9602 + }, + { + "contrastive_loss": 0.4543, + "epoch": 4.261851015801354, + "grad_norm": 15.774658203125, + "learning_rate": 5.524023572621229e-07, + "lm_loss": 5.5554, + "loss": 1.4817, + "step": 1888, + "text_contrastive_loss": 0.9437 + }, + { + "contrastive_loss": 0.4249, + "epoch": 4.264108352144469, + "grad_norm": 14.661408424377441, + "learning_rate": 5.4909206463992e-07, + "lm_loss": 5.5236, + "loss": 1.4554, + "step": 1889, + "text_contrastive_loss": 0.9563 + }, + { + "contrastive_loss": 0.4631, + "epoch": 4.266365688487585, + "grad_norm": 14.910243034362793, + "learning_rate": 5.457911440220154e-07, + "lm_loss": 5.3929, + "loss": 1.392, + "step": 1890, + "text_contrastive_loss": 0.7791 + }, + { + "contrastive_loss": 0.4282, + "epoch": 4.2686230248307, + "grad_norm": 15.006203651428223, + "learning_rate": 5.424996023589524e-07, + "lm_loss": 5.4791, + "loss": 1.3596, + "step": 1891, + "text_contrastive_loss": 0.7669 + }, + { + "contrastive_loss": 0.447, + "epoch": 4.270880361173815, + "grad_norm": 14.607666969299316, + "learning_rate": 5.392174465815308e-07, + "lm_loss": 5.4387, + "loss": 1.3757, + "step": 1892, + "text_contrastive_loss": 0.7698 + }, + { + "contrastive_loss": 0.2817, + "epoch": 4.27313769751693, + "grad_norm": 10.532715797424316, + "learning_rate": 5.359446836007842e-07, + "lm_loss": 5.496, + "loss": 1.1333, + "step": 1893, + "text_contrastive_loss": 0.6041 + }, + { + "contrastive_loss": 0.421, + "epoch": 4.275395033860045, + "grad_norm": 15.028196334838867, + "learning_rate": 5.326813203079706e-07, + "lm_loss": 5.5297, + "loss": 1.3248, + "step": 1894, + "text_contrastive_loss": 0.7017 + }, + { + "contrastive_loss": 0.4315, + "epoch": 4.27765237020316, + "grad_norm": 12.929876327514648, + "learning_rate": 5.294273635745517e-07, + "lm_loss": 5.3855, + "loss": 1.3244, + "step": 1895, + "text_contrastive_loss": 0.7088 + }, + { + "contrastive_loss": 0.4986, + "epoch": 4.279909706546276, + "grad_norm": 13.516950607299805, + "learning_rate": 5.261828202521868e-07, + "lm_loss": 5.3402, + "loss": 1.4781, + "step": 1896, + "text_contrastive_loss": 0.891 + }, + { + "contrastive_loss": 0.3615, + "epoch": 4.282167042889391, + "grad_norm": 12.600197792053223, + "learning_rate": 5.229476971727115e-07, + "lm_loss": 5.392, + "loss": 1.289, + "step": 1897, + "text_contrastive_loss": 0.7765 + }, + { + "contrastive_loss": 0.4494, + "epoch": 4.284424379232505, + "grad_norm": 14.624226570129395, + "learning_rate": 5.197220011481274e-07, + "lm_loss": 5.3513, + "loss": 1.3867, + "step": 1898, + "text_contrastive_loss": 0.8043 + }, + { + "contrastive_loss": 0.4321, + "epoch": 4.286681715575621, + "grad_norm": 12.813630104064941, + "learning_rate": 5.165057389705835e-07, + "lm_loss": 5.4004, + "loss": 1.42, + "step": 1899, + "text_contrastive_loss": 0.8957 + }, + { + "contrastive_loss": 0.433, + "epoch": 4.288939051918736, + "grad_norm": 14.062405586242676, + "learning_rate": 5.132989174123659e-07, + "lm_loss": 5.4482, + "loss": 1.3866, + "step": 1900, + "text_contrastive_loss": 0.8175 + }, + { + "contrastive_loss": 0.4673, + "epoch": 4.291196388261851, + "grad_norm": 14.387012481689453, + "learning_rate": 5.101015432258843e-07, + "lm_loss": 5.3963, + "loss": 1.3872, + "step": 1901, + "text_contrastive_loss": 0.7604 + }, + { + "contrastive_loss": 0.4905, + "epoch": 4.293453724604966, + "grad_norm": 14.758625030517578, + "learning_rate": 5.069136231436539e-07, + "lm_loss": 5.4778, + "loss": 1.4374, + "step": 1902, + "text_contrastive_loss": 0.7982 + }, + { + "contrastive_loss": 0.4708, + "epoch": 4.295711060948081, + "grad_norm": 12.979969024658203, + "learning_rate": 5.037351638782812e-07, + "lm_loss": 5.4252, + "loss": 1.4259, + "step": 1903, + "text_contrastive_loss": 0.8251 + }, + { + "contrastive_loss": 0.4759, + "epoch": 4.297968397291196, + "grad_norm": 15.118889808654785, + "learning_rate": 5.00566172122453e-07, + "lm_loss": 5.4265, + "loss": 1.4689, + "step": 1904, + "text_contrastive_loss": 0.9008 + }, + { + "contrastive_loss": 0.4953, + "epoch": 4.300225733634312, + "grad_norm": 14.978907585144043, + "learning_rate": 4.97406654548922e-07, + "lm_loss": 5.4912, + "loss": 1.4967, + "step": 1905, + "text_contrastive_loss": 0.9045 + }, + { + "contrastive_loss": 0.5469, + "epoch": 4.302483069977427, + "grad_norm": 15.992609024047852, + "learning_rate": 4.942566178104924e-07, + "lm_loss": 5.3889, + "loss": 1.5341, + "step": 1906, + "text_contrastive_loss": 0.8967 + }, + { + "contrastive_loss": 0.431, + "epoch": 4.3047404063205414, + "grad_norm": 13.045210838317871, + "learning_rate": 4.911160685400008e-07, + "lm_loss": 5.3889, + "loss": 1.3757, + "step": 1907, + "text_contrastive_loss": 0.8116 + }, + { + "contrastive_loss": 0.4624, + "epoch": 4.306997742663657, + "grad_norm": 15.070562362670898, + "learning_rate": 4.879850133503106e-07, + "lm_loss": 5.4262, + "loss": 1.3649, + "step": 1908, + "text_contrastive_loss": 0.7199 + }, + { + "contrastive_loss": 0.4193, + "epoch": 4.309255079006772, + "grad_norm": 14.346759796142578, + "learning_rate": 4.848634588342932e-07, + "lm_loss": 5.6188, + "loss": 1.3668, + "step": 1909, + "text_contrastive_loss": 0.7712 + }, + { + "contrastive_loss": 0.4336, + "epoch": 4.311512415349887, + "grad_norm": 13.917705535888672, + "learning_rate": 4.817514115648164e-07, + "lm_loss": 5.3995, + "loss": 1.3475, + "step": 1910, + "text_contrastive_loss": 0.748 + }, + { + "contrastive_loss": 0.4357, + "epoch": 4.313769751693002, + "grad_norm": 14.126520156860352, + "learning_rate": 4.786488780947246e-07, + "lm_loss": 5.4209, + "loss": 1.337, + "step": 1911, + "text_contrastive_loss": 0.7184 + }, + { + "contrastive_loss": 0.4058, + "epoch": 4.316027088036117, + "grad_norm": 13.53493595123291, + "learning_rate": 4.755558649568337e-07, + "lm_loss": 5.5103, + "loss": 1.2778, + "step": 1912, + "text_contrastive_loss": 0.642 + }, + { + "contrastive_loss": 0.4, + "epoch": 4.318284424379232, + "grad_norm": 13.509313583374023, + "learning_rate": 4.7247237866391236e-07, + "lm_loss": 5.3604, + "loss": 1.2979, + "step": 1913, + "text_contrastive_loss": 0.7236 + }, + { + "contrastive_loss": 0.3733, + "epoch": 4.320541760722348, + "grad_norm": 12.044815063476562, + "learning_rate": 4.6939842570867034e-07, + "lm_loss": 5.4313, + "loss": 1.2297, + "step": 1914, + "text_contrastive_loss": 0.6266 + }, + { + "contrastive_loss": 0.4015, + "epoch": 4.322799097065463, + "grad_norm": 13.225977897644043, + "learning_rate": 4.663340125637389e-07, + "lm_loss": 5.458, + "loss": 1.3096, + "step": 1915, + "text_contrastive_loss": 0.7246 + }, + { + "contrastive_loss": 0.504, + "epoch": 4.3250564334085775, + "grad_norm": 15.951153755187988, + "learning_rate": 4.6327914568166763e-07, + "lm_loss": 5.4381, + "loss": 1.4387, + "step": 1916, + "text_contrastive_loss": 0.7817 + }, + { + "contrastive_loss": 0.4944, + "epoch": 4.327313769751693, + "grad_norm": 15.046382904052734, + "learning_rate": 4.6023383149490066e-07, + "lm_loss": 5.4797, + "loss": 1.5171, + "step": 1917, + "text_contrastive_loss": 0.9493 + }, + { + "contrastive_loss": 0.373, + "epoch": 4.329571106094808, + "grad_norm": 14.629324913024902, + "learning_rate": 4.571980764157724e-07, + "lm_loss": 5.5032, + "loss": 1.2773, + "step": 1918, + "text_contrastive_loss": 0.7081 + }, + { + "contrastive_loss": 0.3948, + "epoch": 4.331828442437923, + "grad_norm": 13.322248458862305, + "learning_rate": 4.5417188683648417e-07, + "lm_loss": 5.4454, + "loss": 1.3763, + "step": 1919, + "text_contrastive_loss": 0.8739 + }, + { + "contrastive_loss": 0.3949, + "epoch": 4.334085778781039, + "grad_norm": 12.811241149902344, + "learning_rate": 4.511552691290988e-07, + "lm_loss": 5.425, + "loss": 1.298, + "step": 1920, + "text_contrastive_loss": 0.7213 + }, + { + "contrastive_loss": 0.4061, + "epoch": 4.336343115124153, + "grad_norm": 13.924341201782227, + "learning_rate": 4.4814822964552363e-07, + "lm_loss": 5.4122, + "loss": 1.3042, + "step": 1921, + "text_contrastive_loss": 0.7136 + }, + { + "contrastive_loss": 0.4002, + "epoch": 4.3386004514672685, + "grad_norm": 14.01033878326416, + "learning_rate": 4.4515077471749767e-07, + "lm_loss": 5.472, + "loss": 1.3447, + "step": 1922, + "text_contrastive_loss": 0.7946 + }, + { + "contrastive_loss": 0.6312, + "epoch": 4.340857787810384, + "grad_norm": 15.406018257141113, + "learning_rate": 4.421629106565778e-07, + "lm_loss": 5.4587, + "loss": 1.6234, + "step": 1923, + "text_contrastive_loss": 0.8926 + }, + { + "contrastive_loss": 0.4491, + "epoch": 4.343115124153499, + "grad_norm": 13.877806663513184, + "learning_rate": 4.391846437541258e-07, + "lm_loss": 5.552, + "loss": 1.4634, + "step": 1924, + "text_contrastive_loss": 0.9181 + }, + { + "contrastive_loss": 0.5328, + "epoch": 4.345372460496614, + "grad_norm": 13.972939491271973, + "learning_rate": 4.362159802812971e-07, + "lm_loss": 5.3688, + "loss": 1.4885, + "step": 1925, + "text_contrastive_loss": 0.8374 + }, + { + "contrastive_loss": 0.485, + "epoch": 4.347629796839729, + "grad_norm": 14.351852416992188, + "learning_rate": 4.332569264890252e-07, + "lm_loss": 5.3837, + "loss": 1.4136, + "step": 1926, + "text_contrastive_loss": 0.7806 + }, + { + "contrastive_loss": 0.4459, + "epoch": 4.349887133182844, + "grad_norm": 13.421630859375, + "learning_rate": 4.3030748860800606e-07, + "lm_loss": 5.4281, + "loss": 1.3481, + "step": 1927, + "text_contrastive_loss": 0.7187 + }, + { + "contrastive_loss": 0.437, + "epoch": 4.3521444695259595, + "grad_norm": 14.19968032836914, + "learning_rate": 4.273676728486925e-07, + "lm_loss": 5.4662, + "loss": 1.3448, + "step": 1928, + "text_contrastive_loss": 0.7223 + }, + { + "contrastive_loss": 0.3847, + "epoch": 4.354401805869075, + "grad_norm": 14.895120620727539, + "learning_rate": 4.244374854012734e-07, + "lm_loss": 5.5523, + "loss": 1.2974, + "step": 1929, + "text_contrastive_loss": 0.7148 + }, + { + "contrastive_loss": 0.4926, + "epoch": 4.356659142212189, + "grad_norm": 14.713385581970215, + "learning_rate": 4.215169324356666e-07, + "lm_loss": 5.5129, + "loss": 1.4233, + "step": 1930, + "text_contrastive_loss": 0.7587 + }, + { + "contrastive_loss": 0.4918, + "epoch": 4.3589164785553045, + "grad_norm": 16.201128005981445, + "learning_rate": 4.186060201014991e-07, + "lm_loss": 5.5518, + "loss": 1.4261, + "step": 1931, + "text_contrastive_loss": 0.7584 + }, + { + "contrastive_loss": 0.3521, + "epoch": 4.36117381489842, + "grad_norm": 11.698371887207031, + "learning_rate": 4.157047545281029e-07, + "lm_loss": 5.4341, + "loss": 1.2211, + "step": 1932, + "text_contrastive_loss": 0.6511 + }, + { + "contrastive_loss": 0.5779, + "epoch": 4.363431151241535, + "grad_norm": 15.930716514587402, + "learning_rate": 4.1281314182449405e-07, + "lm_loss": 5.4676, + "loss": 1.5558, + "step": 1933, + "text_contrastive_loss": 0.8623 + }, + { + "contrastive_loss": 0.5238, + "epoch": 4.3656884875846504, + "grad_norm": 17.162410736083984, + "learning_rate": 4.099311880793655e-07, + "lm_loss": 5.4837, + "loss": 1.5025, + "step": 1934, + "text_contrastive_loss": 0.8608 + }, + { + "contrastive_loss": 0.3549, + "epoch": 4.367945823927765, + "grad_norm": 11.071837425231934, + "learning_rate": 4.070588993610697e-07, + "lm_loss": 5.4856, + "loss": 1.2363, + "step": 1935, + "text_contrastive_loss": 0.6657 + }, + { + "contrastive_loss": 0.4307, + "epoch": 4.37020316027088, + "grad_norm": 14.291321754455566, + "learning_rate": 4.0419628171760927e-07, + "lm_loss": 5.3843, + "loss": 1.3906, + "step": 1936, + "text_contrastive_loss": 0.843 + }, + { + "contrastive_loss": 0.4539, + "epoch": 4.3724604966139955, + "grad_norm": 13.059618949890137, + "learning_rate": 4.0134334117662375e-07, + "lm_loss": 5.3896, + "loss": 1.3542, + "step": 1937, + "text_contrastive_loss": 0.7226 + }, + { + "contrastive_loss": 0.4515, + "epoch": 4.374717832957111, + "grad_norm": 14.538610458374023, + "learning_rate": 3.985000837453756e-07, + "lm_loss": 5.3757, + "loss": 1.3529, + "step": 1938, + "text_contrastive_loss": 0.7277 + }, + { + "contrastive_loss": 0.4556, + "epoch": 4.376975169300226, + "grad_norm": 13.206327438354492, + "learning_rate": 3.9566651541073586e-07, + "lm_loss": 5.3729, + "loss": 1.3516, + "step": 1939, + "text_contrastive_loss": 0.7175 + }, + { + "contrastive_loss": 0.4274, + "epoch": 4.3792325056433405, + "grad_norm": 12.887798309326172, + "learning_rate": 3.928426421391773e-07, + "lm_loss": 5.4968, + "loss": 1.3883, + "step": 1940, + "text_contrastive_loss": 0.8225 + }, + { + "contrastive_loss": 0.4148, + "epoch": 4.381489841986456, + "grad_norm": 13.589632987976074, + "learning_rate": 3.9002846987675704e-07, + "lm_loss": 5.3314, + "loss": 1.2816, + "step": 1941, + "text_contrastive_loss": 0.6673 + }, + { + "contrastive_loss": 0.4093, + "epoch": 4.383747178329571, + "grad_norm": 14.307594299316406, + "learning_rate": 3.872240045491055e-07, + "lm_loss": 5.3974, + "loss": 1.3727, + "step": 1942, + "text_contrastive_loss": 0.8474 + }, + { + "contrastive_loss": 0.4296, + "epoch": 4.3860045146726865, + "grad_norm": 15.387022972106934, + "learning_rate": 3.8442925206141237e-07, + "lm_loss": 5.4738, + "loss": 1.3277, + "step": 1943, + "text_contrastive_loss": 0.7015 + }, + { + "contrastive_loss": 0.4235, + "epoch": 4.388261851015802, + "grad_norm": 13.895306587219238, + "learning_rate": 3.8164421829841756e-07, + "lm_loss": 5.4586, + "loss": 1.3625, + "step": 1944, + "text_contrastive_loss": 0.7863 + }, + { + "contrastive_loss": 0.5274, + "epoch": 4.390519187358916, + "grad_norm": 16.344566345214844, + "learning_rate": 3.7886890912439633e-07, + "lm_loss": 5.3447, + "loss": 1.4867, + "step": 1945, + "text_contrastive_loss": 0.8496 + }, + { + "contrastive_loss": 0.3899, + "epoch": 4.3927765237020315, + "grad_norm": 12.419146537780762, + "learning_rate": 3.761033303831474e-07, + "lm_loss": 5.3228, + "loss": 1.2334, + "step": 1946, + "text_contrastive_loss": 0.6224 + }, + { + "contrastive_loss": 0.4346, + "epoch": 4.395033860045147, + "grad_norm": 14.739822387695312, + "learning_rate": 3.733474878979798e-07, + "lm_loss": 5.4051, + "loss": 1.403, + "step": 1947, + "text_contrastive_loss": 0.8557 + }, + { + "contrastive_loss": 0.4122, + "epoch": 4.397291196388262, + "grad_norm": 13.91802978515625, + "learning_rate": 3.706013874717024e-07, + "lm_loss": 5.4228, + "loss": 1.2934, + "step": 1948, + "text_contrastive_loss": 0.6778 + }, + { + "contrastive_loss": 0.4537, + "epoch": 4.399548532731377, + "grad_norm": 14.394438743591309, + "learning_rate": 3.678650348866114e-07, + "lm_loss": 5.4463, + "loss": 1.374, + "step": 1949, + "text_contrastive_loss": 0.7513 + }, + { + "contrastive_loss": 0.4394, + "epoch": 4.401805869074492, + "grad_norm": 13.035497665405273, + "learning_rate": 3.651384359044774e-07, + "lm_loss": 5.4935, + "loss": 1.3769, + "step": 1950, + "text_contrastive_loss": 0.7764 + }, + { + "contrastive_loss": 0.3555, + "epoch": 4.404063205417607, + "grad_norm": 13.068267822265625, + "learning_rate": 3.6242159626653004e-07, + "lm_loss": 5.4236, + "loss": 1.2316, + "step": 1951, + "text_contrastive_loss": 0.6675 + }, + { + "contrastive_loss": 0.4441, + "epoch": 4.4063205417607225, + "grad_norm": 13.434308052062988, + "learning_rate": 3.597145216934556e-07, + "lm_loss": 5.4971, + "loss": 1.3334, + "step": 1952, + "text_contrastive_loss": 0.6792 + }, + { + "contrastive_loss": 0.4748, + "epoch": 4.408577878103838, + "grad_norm": 15.370001792907715, + "learning_rate": 3.570172178853731e-07, + "lm_loss": 5.4621, + "loss": 1.4576, + "step": 1953, + "text_contrastive_loss": 0.8732 + }, + { + "contrastive_loss": 0.4463, + "epoch": 4.410835214446952, + "grad_norm": 13.672945022583008, + "learning_rate": 3.5432969052183186e-07, + "lm_loss": 5.3621, + "loss": 1.394, + "step": 1954, + "text_contrastive_loss": 0.823 + }, + { + "contrastive_loss": 0.3904, + "epoch": 4.413092550790068, + "grad_norm": 12.61204719543457, + "learning_rate": 3.516519452617922e-07, + "lm_loss": 5.492, + "loss": 1.3419, + "step": 1955, + "text_contrastive_loss": 0.8045 + }, + { + "contrastive_loss": 0.5187, + "epoch": 4.415349887133183, + "grad_norm": 14.892425537109375, + "learning_rate": 3.4898398774361854e-07, + "lm_loss": 5.3201, + "loss": 1.491, + "step": 1956, + "text_contrastive_loss": 0.8804 + }, + { + "contrastive_loss": 0.3911, + "epoch": 4.417607223476298, + "grad_norm": 13.570329666137695, + "learning_rate": 3.463258235850653e-07, + "lm_loss": 5.4478, + "loss": 1.3248, + "step": 1957, + "text_contrastive_loss": 0.7779 + }, + { + "contrastive_loss": 0.474, + "epoch": 4.4198645598194135, + "grad_norm": 14.859395980834961, + "learning_rate": 3.4367745838326807e-07, + "lm_loss": 5.4708, + "loss": 1.3871, + "step": 1958, + "text_contrastive_loss": 0.7321 + }, + { + "contrastive_loss": 0.4756, + "epoch": 4.422121896162528, + "grad_norm": 14.346101760864258, + "learning_rate": 3.410388977147244e-07, + "lm_loss": 5.3331, + "loss": 1.4394, + "step": 1959, + "text_contrastive_loss": 0.8611 + }, + { + "contrastive_loss": 0.4861, + "epoch": 4.424379232505643, + "grad_norm": 15.08126163482666, + "learning_rate": 3.3841014713529184e-07, + "lm_loss": 5.4299, + "loss": 1.4246, + "step": 1960, + "text_contrastive_loss": 0.7911 + }, + { + "contrastive_loss": 0.5389, + "epoch": 4.426636568848759, + "grad_norm": 14.744633674621582, + "learning_rate": 3.357912121801682e-07, + "lm_loss": 5.49, + "loss": 1.526, + "step": 1961, + "text_contrastive_loss": 0.8762 + }, + { + "contrastive_loss": 0.4777, + "epoch": 4.428893905191874, + "grad_norm": 14.078417778015137, + "learning_rate": 3.331820983638867e-07, + "lm_loss": 5.5039, + "loss": 1.4177, + "step": 1962, + "text_contrastive_loss": 0.7793 + }, + { + "contrastive_loss": 0.5299, + "epoch": 4.431151241534989, + "grad_norm": 16.73001480102539, + "learning_rate": 3.3058281118029553e-07, + "lm_loss": 5.5428, + "loss": 1.4909, + "step": 1963, + "text_contrastive_loss": 0.8135 + }, + { + "contrastive_loss": 0.4935, + "epoch": 4.433408577878104, + "grad_norm": 13.877079010009766, + "learning_rate": 3.279933561025567e-07, + "lm_loss": 5.465, + "loss": 1.4524, + "step": 1964, + "text_contrastive_loss": 0.8249 + }, + { + "contrastive_loss": 0.43, + "epoch": 4.435665914221219, + "grad_norm": 12.670683860778809, + "learning_rate": 3.254137385831263e-07, + "lm_loss": 5.4102, + "loss": 1.3859, + "step": 1965, + "text_contrastive_loss": 0.8298 + }, + { + "contrastive_loss": 0.5592, + "epoch": 4.437923250564334, + "grad_norm": 15.805489540100098, + "learning_rate": 3.2284396405374787e-07, + "lm_loss": 5.4256, + "loss": 1.532, + "step": 1966, + "text_contrastive_loss": 0.8603 + }, + { + "contrastive_loss": 0.3286, + "epoch": 4.4401805869074495, + "grad_norm": 11.388947486877441, + "learning_rate": 3.202840379254374e-07, + "lm_loss": 5.4651, + "loss": 1.2775, + "step": 1967, + "text_contrastive_loss": 0.8049 + }, + { + "contrastive_loss": 0.4544, + "epoch": 4.442437923250564, + "grad_norm": 13.956562995910645, + "learning_rate": 3.177339655884737e-07, + "lm_loss": 5.5186, + "loss": 1.3878, + "step": 1968, + "text_contrastive_loss": 0.763 + }, + { + "contrastive_loss": 0.4719, + "epoch": 4.444695259593679, + "grad_norm": 13.878486633300781, + "learning_rate": 3.151937524123905e-07, + "lm_loss": 5.4145, + "loss": 1.4145, + "step": 1969, + "text_contrastive_loss": 0.8022 + }, + { + "contrastive_loss": 0.3935, + "epoch": 4.446952595936795, + "grad_norm": 13.50877571105957, + "learning_rate": 3.1266340374595693e-07, + "lm_loss": 5.4862, + "loss": 1.3173, + "step": 1970, + "text_contrastive_loss": 0.7503 + }, + { + "contrastive_loss": 0.3619, + "epoch": 4.44920993227991, + "grad_norm": 13.413561820983887, + "learning_rate": 3.1014292491717444e-07, + "lm_loss": 5.5391, + "loss": 1.2586, + "step": 1971, + "text_contrastive_loss": 0.6856 + }, + { + "contrastive_loss": 0.5004, + "epoch": 4.451467268623025, + "grad_norm": 14.72677993774414, + "learning_rate": 3.076323212332605e-07, + "lm_loss": 5.5368, + "loss": 1.4929, + "step": 1972, + "text_contrastive_loss": 0.8777 + }, + { + "contrastive_loss": 0.3491, + "epoch": 4.45372460496614, + "grad_norm": 12.470561027526855, + "learning_rate": 3.0513159798063906e-07, + "lm_loss": 5.4153, + "loss": 1.3191, + "step": 1973, + "text_contrastive_loss": 0.857 + }, + { + "contrastive_loss": 0.4606, + "epoch": 4.455981941309255, + "grad_norm": 14.206003189086914, + "learning_rate": 3.026407604249315e-07, + "lm_loss": 5.4837, + "loss": 1.3799, + "step": 1974, + "text_contrastive_loss": 0.7418 + }, + { + "contrastive_loss": 0.4276, + "epoch": 4.45823927765237, + "grad_norm": 13.35498046875, + "learning_rate": 3.0015981381094073e-07, + "lm_loss": 5.3522, + "loss": 1.388, + "step": 1975, + "text_contrastive_loss": 0.8504 + }, + { + "contrastive_loss": 0.4814, + "epoch": 4.460496613995486, + "grad_norm": 14.195591926574707, + "learning_rate": 2.976887633626435e-07, + "lm_loss": 5.5419, + "loss": 1.4163, + "step": 1976, + "text_contrastive_loss": 0.7613 + }, + { + "contrastive_loss": 0.361, + "epoch": 4.4627539503386, + "grad_norm": 13.182605743408203, + "learning_rate": 2.952276142831806e-07, + "lm_loss": 5.4325, + "loss": 1.2021, + "step": 1977, + "text_contrastive_loss": 0.5957 + }, + { + "contrastive_loss": 0.3725, + "epoch": 4.465011286681715, + "grad_norm": 14.167166709899902, + "learning_rate": 2.9277637175484376e-07, + "lm_loss": 5.4348, + "loss": 1.2838, + "step": 1978, + "text_contrastive_loss": 0.7356 + }, + { + "contrastive_loss": 0.424, + "epoch": 4.467268623024831, + "grad_norm": 13.755749702453613, + "learning_rate": 2.9033504093906207e-07, + "lm_loss": 5.4003, + "loss": 1.3916, + "step": 1979, + "text_contrastive_loss": 0.8553 + }, + { + "contrastive_loss": 0.5471, + "epoch": 4.469525959367946, + "grad_norm": 17.518085479736328, + "learning_rate": 2.8790362697639685e-07, + "lm_loss": 5.4183, + "loss": 1.559, + "step": 1980, + "text_contrastive_loss": 0.9402 + }, + { + "contrastive_loss": 0.4565, + "epoch": 4.471783295711061, + "grad_norm": 15.208088874816895, + "learning_rate": 2.854821349865289e-07, + "lm_loss": 5.492, + "loss": 1.4434, + "step": 1981, + "text_contrastive_loss": 0.8754 + }, + { + "contrastive_loss": 0.3749, + "epoch": 4.474040632054176, + "grad_norm": 13.097951889038086, + "learning_rate": 2.8307057006824514e-07, + "lm_loss": 5.4126, + "loss": 1.3196, + "step": 1982, + "text_contrastive_loss": 0.8068 + }, + { + "contrastive_loss": 0.4867, + "epoch": 4.476297968397291, + "grad_norm": 15.0460205078125, + "learning_rate": 2.806689372994292e-07, + "lm_loss": 5.3848, + "loss": 1.4281, + "step": 1983, + "text_contrastive_loss": 0.8059 + }, + { + "contrastive_loss": 0.4521, + "epoch": 4.478555304740406, + "grad_norm": 14.740790367126465, + "learning_rate": 2.7827724173705273e-07, + "lm_loss": 5.418, + "loss": 1.3498, + "step": 1984, + "text_contrastive_loss": 0.7118 + }, + { + "contrastive_loss": 0.4936, + "epoch": 4.480812641083522, + "grad_norm": 15.868513107299805, + "learning_rate": 2.7589548841716274e-07, + "lm_loss": 5.3949, + "loss": 1.432, + "step": 1985, + "text_contrastive_loss": 0.7978 + }, + { + "contrastive_loss": 0.4035, + "epoch": 4.483069977426637, + "grad_norm": 14.197154998779297, + "learning_rate": 2.735236823548715e-07, + "lm_loss": 5.4923, + "loss": 1.3214, + "step": 1986, + "text_contrastive_loss": 0.7372 + }, + { + "contrastive_loss": 0.4937, + "epoch": 4.485327313769751, + "grad_norm": 15.719579696655273, + "learning_rate": 2.711618285443457e-07, + "lm_loss": 5.4, + "loss": 1.4754, + "step": 1987, + "text_contrastive_loss": 0.8834 + }, + { + "contrastive_loss": 0.5175, + "epoch": 4.487584650112867, + "grad_norm": 15.776280403137207, + "learning_rate": 2.6880993195879614e-07, + "lm_loss": 5.5162, + "loss": 1.539, + "step": 1988, + "text_contrastive_loss": 0.9399 + }, + { + "contrastive_loss": 0.3959, + "epoch": 4.489841986455982, + "grad_norm": 14.887728691101074, + "learning_rate": 2.6646799755046746e-07, + "lm_loss": 5.4649, + "loss": 1.3176, + "step": 1989, + "text_contrastive_loss": 0.7504 + }, + { + "contrastive_loss": 0.3833, + "epoch": 4.492099322799097, + "grad_norm": 12.696367263793945, + "learning_rate": 2.64136030250628e-07, + "lm_loss": 5.3242, + "loss": 1.2353, + "step": 1990, + "text_contrastive_loss": 0.6392 + }, + { + "contrastive_loss": 0.4416, + "epoch": 4.494356659142213, + "grad_norm": 13.526455879211426, + "learning_rate": 2.618140349695575e-07, + "lm_loss": 5.4823, + "loss": 1.3949, + "step": 1991, + "text_contrastive_loss": 0.8103 + }, + { + "contrastive_loss": 0.3788, + "epoch": 4.496613995485327, + "grad_norm": 13.053594589233398, + "learning_rate": 2.595020165965401e-07, + "lm_loss": 5.4288, + "loss": 1.3021, + "step": 1992, + "text_contrastive_loss": 0.7609 + }, + { + "contrastive_loss": 0.4455, + "epoch": 4.498871331828442, + "grad_norm": 13.829413414001465, + "learning_rate": 2.571999799998509e-07, + "lm_loss": 5.4231, + "loss": 1.4108, + "step": 1993, + "text_contrastive_loss": 0.846 + }, + { + "contrastive_loss": 0.437, + "epoch": 4.501128668171558, + "grad_norm": 13.574397087097168, + "learning_rate": 2.549079300267482e-07, + "lm_loss": 5.4456, + "loss": 1.3316, + "step": 1994, + "text_contrastive_loss": 0.7002 + }, + { + "contrastive_loss": 0.5266, + "epoch": 4.503386004514673, + "grad_norm": 15.497942924499512, + "learning_rate": 2.526258715034602e-07, + "lm_loss": 5.3979, + "loss": 1.5456, + "step": 1995, + "text_contrastive_loss": 0.9583 + }, + { + "contrastive_loss": 0.5077, + "epoch": 4.505643340857787, + "grad_norm": 15.3302583694458, + "learning_rate": 2.503538092351782e-07, + "lm_loss": 5.466, + "loss": 1.4918, + "step": 1996, + "text_contrastive_loss": 0.8749 + }, + { + "contrastive_loss": 0.4095, + "epoch": 4.507900677200903, + "grad_norm": 13.3062162399292, + "learning_rate": 2.480917480060441e-07, + "lm_loss": 5.4016, + "loss": 1.3543, + "step": 1997, + "text_contrastive_loss": 0.8092 + }, + { + "contrastive_loss": 0.4771, + "epoch": 4.510158013544018, + "grad_norm": 14.244848251342773, + "learning_rate": 2.458396925791434e-07, + "lm_loss": 5.5374, + "loss": 1.445, + "step": 1998, + "text_contrastive_loss": 0.8283 + }, + { + "contrastive_loss": 0.4325, + "epoch": 4.512415349887133, + "grad_norm": 13.032058715820312, + "learning_rate": 2.4359764769648907e-07, + "lm_loss": 5.4126, + "loss": 1.3252, + "step": 1999, + "text_contrastive_loss": 0.703 + }, + { + "contrastive_loss": 0.4971, + "epoch": 4.514672686230249, + "grad_norm": 15.64871597290039, + "learning_rate": 2.4136561807901916e-07, + "lm_loss": 5.42, + "loss": 1.5228, + "step": 2000, + "text_contrastive_loss": 0.9674 + }, + { + "contrastive_loss": 0.4192, + "epoch": 4.516930022573363, + "grad_norm": 14.64123249053955, + "learning_rate": 2.391436084265814e-07, + "lm_loss": 5.3506, + "loss": 1.3261, + "step": 2001, + "text_contrastive_loss": 0.7437 + }, + { + "contrastive_loss": 0.4378, + "epoch": 4.519187358916478, + "grad_norm": 13.714446067810059, + "learning_rate": 2.3693162341792532e-07, + "lm_loss": 5.3923, + "loss": 1.364, + "step": 2002, + "text_contrastive_loss": 0.774 + }, + { + "contrastive_loss": 0.4487, + "epoch": 4.521444695259594, + "grad_norm": 13.786931991577148, + "learning_rate": 2.347296677106925e-07, + "lm_loss": 5.4813, + "loss": 1.3724, + "step": 2003, + "text_contrastive_loss": 0.7511 + }, + { + "contrastive_loss": 0.4898, + "epoch": 4.523702031602709, + "grad_norm": 13.873068809509277, + "learning_rate": 2.3253774594140633e-07, + "lm_loss": 5.4082, + "loss": 1.4093, + "step": 2004, + "text_contrastive_loss": 0.7574 + }, + { + "contrastive_loss": 0.4589, + "epoch": 4.525959367945823, + "grad_norm": 14.35563850402832, + "learning_rate": 2.3035586272546207e-07, + "lm_loss": 5.3493, + "loss": 1.3454, + "step": 2005, + "text_contrastive_loss": 0.703 + }, + { + "contrastive_loss": 0.3581, + "epoch": 4.528216704288939, + "grad_norm": 12.581876754760742, + "learning_rate": 2.2818402265711858e-07, + "lm_loss": 5.4672, + "loss": 1.2163, + "step": 2006, + "text_contrastive_loss": 0.6229 + }, + { + "contrastive_loss": 0.4342, + "epoch": 4.530474040632054, + "grad_norm": 14.916787147521973, + "learning_rate": 2.2602223030948445e-07, + "lm_loss": 5.3392, + "loss": 1.4251, + "step": 2007, + "text_contrastive_loss": 0.914 + }, + { + "contrastive_loss": 0.4943, + "epoch": 4.532731376975169, + "grad_norm": 16.06678009033203, + "learning_rate": 2.2387049023451458e-07, + "lm_loss": 5.4612, + "loss": 1.4213, + "step": 2008, + "text_contrastive_loss": 0.7617 + }, + { + "contrastive_loss": 0.447, + "epoch": 4.534988713318285, + "grad_norm": 14.13612174987793, + "learning_rate": 2.2172880696299692e-07, + "lm_loss": 5.375, + "loss": 1.3563, + "step": 2009, + "text_contrastive_loss": 0.7437 + }, + { + "contrastive_loss": 0.4334, + "epoch": 4.5372460496614, + "grad_norm": 14.04653263092041, + "learning_rate": 2.1959718500454196e-07, + "lm_loss": 5.4325, + "loss": 1.3352, + "step": 2010, + "text_contrastive_loss": 0.7172 + }, + { + "contrastive_loss": 0.3929, + "epoch": 4.539503386004514, + "grad_norm": 13.984199523925781, + "learning_rate": 2.17475628847576e-07, + "lm_loss": 5.4223, + "loss": 1.3207, + "step": 2011, + "text_contrastive_loss": 0.7711 + }, + { + "contrastive_loss": 0.3626, + "epoch": 4.54176072234763, + "grad_norm": 11.853575706481934, + "learning_rate": 2.1536414295932896e-07, + "lm_loss": 5.3687, + "loss": 1.2356, + "step": 2012, + "text_contrastive_loss": 0.6723 + }, + { + "contrastive_loss": 0.4397, + "epoch": 4.544018058690745, + "grad_norm": 13.724562644958496, + "learning_rate": 2.1326273178582822e-07, + "lm_loss": 5.4594, + "loss": 1.4015, + "step": 2013, + "text_contrastive_loss": 0.8316 + }, + { + "contrastive_loss": 0.5173, + "epoch": 4.54627539503386, + "grad_norm": 14.897256851196289, + "learning_rate": 2.1117139975188716e-07, + "lm_loss": 5.4468, + "loss": 1.4539, + "step": 2014, + "text_contrastive_loss": 0.7838 + }, + { + "contrastive_loss": 0.4113, + "epoch": 4.548532731376975, + "grad_norm": 15.196002960205078, + "learning_rate": 2.0909015126109488e-07, + "lm_loss": 5.411, + "loss": 1.3154, + "step": 2015, + "text_contrastive_loss": 0.726 + }, + { + "contrastive_loss": 0.4638, + "epoch": 4.55079006772009, + "grad_norm": 14.481385231018066, + "learning_rate": 2.070189906958081e-07, + "lm_loss": 5.4344, + "loss": 1.3786, + "step": 2016, + "text_contrastive_loss": 0.7428 + }, + { + "contrastive_loss": 0.4465, + "epoch": 4.553047404063205, + "grad_norm": 14.491253852844238, + "learning_rate": 2.0495792241714386e-07, + "lm_loss": 5.315, + "loss": 1.3858, + "step": 2017, + "text_contrastive_loss": 0.8157 + }, + { + "contrastive_loss": 0.5093, + "epoch": 4.555304740406321, + "grad_norm": 16.6595401763916, + "learning_rate": 2.029069507649678e-07, + "lm_loss": 5.4377, + "loss": 1.4429, + "step": 2018, + "text_contrastive_loss": 0.7795 + }, + { + "contrastive_loss": 0.4251, + "epoch": 4.557562076749436, + "grad_norm": 14.268233299255371, + "learning_rate": 2.0086608005788376e-07, + "lm_loss": 5.4478, + "loss": 1.3497, + "step": 2019, + "text_contrastive_loss": 0.7597 + }, + { + "contrastive_loss": 0.3755, + "epoch": 4.5598194130925505, + "grad_norm": 13.330026626586914, + "learning_rate": 1.988353145932298e-07, + "lm_loss": 5.4278, + "loss": 1.1776, + "step": 2020, + "text_contrastive_loss": 0.5188 + }, + { + "contrastive_loss": 0.3888, + "epoch": 4.562076749435666, + "grad_norm": 12.81385612487793, + "learning_rate": 1.9681465864706372e-07, + "lm_loss": 5.5862, + "loss": 1.3231, + "step": 2021, + "text_contrastive_loss": 0.7513 + }, + { + "contrastive_loss": 0.4296, + "epoch": 4.564334085778781, + "grad_norm": 13.457114219665527, + "learning_rate": 1.9480411647415708e-07, + "lm_loss": 5.4892, + "loss": 1.3491, + "step": 2022, + "text_contrastive_loss": 0.7413 + }, + { + "contrastive_loss": 0.4774, + "epoch": 4.566591422121896, + "grad_norm": 13.965457916259766, + "learning_rate": 1.9280369230798568e-07, + "lm_loss": 5.4485, + "loss": 1.4685, + "step": 2023, + "text_contrastive_loss": 0.8926 + }, + { + "contrastive_loss": 0.4722, + "epoch": 4.568848758465011, + "grad_norm": 13.905646324157715, + "learning_rate": 1.9081339036071956e-07, + "lm_loss": 5.4528, + "loss": 1.4211, + "step": 2024, + "text_contrastive_loss": 0.8073 + }, + { + "contrastive_loss": 0.4237, + "epoch": 4.571106094808126, + "grad_norm": 12.96377944946289, + "learning_rate": 1.8883321482321583e-07, + "lm_loss": 5.4854, + "loss": 1.4255, + "step": 2025, + "text_contrastive_loss": 0.9064 + }, + { + "contrastive_loss": 0.4817, + "epoch": 4.573363431151241, + "grad_norm": 15.496378898620605, + "learning_rate": 1.8686316986500974e-07, + "lm_loss": 5.4471, + "loss": 1.4965, + "step": 2026, + "text_contrastive_loss": 0.9402 + }, + { + "contrastive_loss": 0.4595, + "epoch": 4.575620767494357, + "grad_norm": 13.194942474365234, + "learning_rate": 1.8490325963430368e-07, + "lm_loss": 5.5628, + "loss": 1.4637, + "step": 2027, + "text_contrastive_loss": 0.8958 + }, + { + "contrastive_loss": 0.4527, + "epoch": 4.577878103837472, + "grad_norm": 14.945164680480957, + "learning_rate": 1.829534882579598e-07, + "lm_loss": 5.4007, + "loss": 1.4171, + "step": 2028, + "text_contrastive_loss": 0.8487 + }, + { + "contrastive_loss": 0.4101, + "epoch": 4.580135440180587, + "grad_norm": 12.962059020996094, + "learning_rate": 1.8101385984149343e-07, + "lm_loss": 5.5119, + "loss": 1.3662, + "step": 2029, + "text_contrastive_loss": 0.8098 + }, + { + "contrastive_loss": 0.472, + "epoch": 4.582392776523702, + "grad_norm": 14.130294799804688, + "learning_rate": 1.7908437846906158e-07, + "lm_loss": 5.5662, + "loss": 1.5369, + "step": 2030, + "text_contrastive_loss": 1.0166 + }, + { + "contrastive_loss": 0.5411, + "epoch": 4.584650112866817, + "grad_norm": 14.8324613571167, + "learning_rate": 1.7716504820345427e-07, + "lm_loss": 5.5394, + "loss": 1.4772, + "step": 2031, + "text_contrastive_loss": 0.7645 + }, + { + "contrastive_loss": 0.36, + "epoch": 4.586907449209932, + "grad_norm": 14.10505485534668, + "learning_rate": 1.752558730860876e-07, + "lm_loss": 5.454, + "loss": 1.2786, + "step": 2032, + "text_contrastive_loss": 0.7464 + }, + { + "contrastive_loss": 0.4411, + "epoch": 4.589164785553048, + "grad_norm": 14.178133964538574, + "learning_rate": 1.733568571369948e-07, + "lm_loss": 5.4993, + "loss": 1.4287, + "step": 2033, + "text_contrastive_loss": 0.8754 + }, + { + "contrastive_loss": 0.4109, + "epoch": 4.591422121896162, + "grad_norm": 12.664596557617188, + "learning_rate": 1.7146800435481837e-07, + "lm_loss": 5.4682, + "loss": 1.4096, + "step": 2034, + "text_contrastive_loss": 0.9037 + }, + { + "contrastive_loss": 0.477, + "epoch": 4.5936794582392775, + "grad_norm": 15.237536430358887, + "learning_rate": 1.6958931871679908e-07, + "lm_loss": 5.4776, + "loss": 1.4628, + "step": 2035, + "text_contrastive_loss": 0.8761 + }, + { + "contrastive_loss": 0.4118, + "epoch": 4.595936794582393, + "grad_norm": 13.368701934814453, + "learning_rate": 1.677208041787698e-07, + "lm_loss": 5.3856, + "loss": 1.3221, + "step": 2036, + "text_contrastive_loss": 0.7435 + }, + { + "contrastive_loss": 0.3885, + "epoch": 4.598194130925508, + "grad_norm": 14.1590576171875, + "learning_rate": 1.6586246467514833e-07, + "lm_loss": 5.3744, + "loss": 1.3235, + "step": 2037, + "text_contrastive_loss": 0.795 + }, + { + "contrastive_loss": 0.4713, + "epoch": 4.600451467268623, + "grad_norm": 15.29771614074707, + "learning_rate": 1.6401430411892572e-07, + "lm_loss": 5.3639, + "loss": 1.4432, + "step": 2038, + "text_contrastive_loss": 0.871 + }, + { + "contrastive_loss": 0.5518, + "epoch": 4.602708803611738, + "grad_norm": 14.823325157165527, + "learning_rate": 1.621763264016607e-07, + "lm_loss": 5.2741, + "loss": 1.506, + "step": 2039, + "text_contrastive_loss": 0.8535 + }, + { + "contrastive_loss": 0.4574, + "epoch": 4.604966139954853, + "grad_norm": 15.320375442504883, + "learning_rate": 1.603485353934703e-07, + "lm_loss": 5.319, + "loss": 1.4113, + "step": 2040, + "text_contrastive_loss": 0.8439 + }, + { + "contrastive_loss": 0.3564, + "epoch": 4.6072234762979685, + "grad_norm": 12.8389253616333, + "learning_rate": 1.5853093494302195e-07, + "lm_loss": 5.6329, + "loss": 1.2724, + "step": 2041, + "text_contrastive_loss": 0.7054 + }, + { + "contrastive_loss": 0.4384, + "epoch": 4.609480812641084, + "grad_norm": 14.57319450378418, + "learning_rate": 1.567235288775265e-07, + "lm_loss": 5.4662, + "loss": 1.3948, + "step": 2042, + "text_contrastive_loss": 0.8196 + }, + { + "contrastive_loss": 0.4363, + "epoch": 4.611738148984198, + "grad_norm": 13.848459243774414, + "learning_rate": 1.5492632100272686e-07, + "lm_loss": 5.4411, + "loss": 1.3514, + "step": 2043, + "text_contrastive_loss": 0.7419 + }, + { + "contrastive_loss": 0.477, + "epoch": 4.6139954853273135, + "grad_norm": 14.179244041442871, + "learning_rate": 1.5313931510289482e-07, + "lm_loss": 5.3852, + "loss": 1.4202, + "step": 2044, + "text_contrastive_loss": 0.8094 + }, + { + "contrastive_loss": 0.4274, + "epoch": 4.616252821670429, + "grad_norm": 13.810185432434082, + "learning_rate": 1.5136251494081822e-07, + "lm_loss": 5.3967, + "loss": 1.3777, + "step": 2045, + "text_contrastive_loss": 0.8212 + }, + { + "contrastive_loss": 0.4711, + "epoch": 4.618510158013544, + "grad_norm": 14.85326862335205, + "learning_rate": 1.4959592425779768e-07, + "lm_loss": 5.5282, + "loss": 1.3928, + "step": 2046, + "text_contrastive_loss": 0.7378 + }, + { + "contrastive_loss": 0.472, + "epoch": 4.6207674943566595, + "grad_norm": 13.674016952514648, + "learning_rate": 1.4783954677363376e-07, + "lm_loss": 5.5314, + "loss": 1.3513, + "step": 2047, + "text_contrastive_loss": 0.6524 + }, + { + "contrastive_loss": 0.4051, + "epoch": 4.623024830699774, + "grad_norm": 12.20019245147705, + "learning_rate": 1.4609338618662318e-07, + "lm_loss": 5.4559, + "loss": 1.3385, + "step": 2048, + "text_contrastive_loss": 0.7756 + }, + { + "contrastive_loss": 0.532, + "epoch": 4.625282167042889, + "grad_norm": 15.878229141235352, + "learning_rate": 1.4435744617354975e-07, + "lm_loss": 5.4619, + "loss": 1.5532, + "step": 2049, + "text_contrastive_loss": 0.9501 + }, + { + "contrastive_loss": 0.399, + "epoch": 4.6275395033860045, + "grad_norm": 12.863652229309082, + "learning_rate": 1.4263173038967627e-07, + "lm_loss": 5.3891, + "loss": 1.3012, + "step": 2050, + "text_contrastive_loss": 0.7265 + }, + { + "contrastive_loss": 0.4699, + "epoch": 4.62979683972912, + "grad_norm": 16.129262924194336, + "learning_rate": 1.409162424687366e-07, + "lm_loss": 5.465, + "loss": 1.4483, + "step": 2051, + "text_contrastive_loss": 0.8638 + }, + { + "contrastive_loss": 0.4412, + "epoch": 4.632054176072235, + "grad_norm": 15.443099021911621, + "learning_rate": 1.3921098602292793e-07, + "lm_loss": 5.4736, + "loss": 1.443, + "step": 2052, + "text_contrastive_loss": 0.9088 + }, + { + "contrastive_loss": 0.4351, + "epoch": 4.6343115124153496, + "grad_norm": 14.124246597290039, + "learning_rate": 1.3751596464290529e-07, + "lm_loss": 5.4517, + "loss": 1.3379, + "step": 2053, + "text_contrastive_loss": 0.7153 + }, + { + "contrastive_loss": 0.419, + "epoch": 4.636568848758465, + "grad_norm": 14.496289253234863, + "learning_rate": 1.358311818977709e-07, + "lm_loss": 5.4731, + "loss": 1.3683, + "step": 2054, + "text_contrastive_loss": 0.804 + }, + { + "contrastive_loss": 0.5128, + "epoch": 4.63882618510158, + "grad_norm": 14.910599708557129, + "learning_rate": 1.3415664133506812e-07, + "lm_loss": 5.4918, + "loss": 1.4829, + "step": 2055, + "text_contrastive_loss": 0.8417 + }, + { + "contrastive_loss": 0.3839, + "epoch": 4.6410835214446955, + "grad_norm": 13.312881469726562, + "learning_rate": 1.324923464807759e-07, + "lm_loss": 5.3817, + "loss": 1.3029, + "step": 2056, + "text_contrastive_loss": 0.7616 + }, + { + "contrastive_loss": 0.529, + "epoch": 4.643340857787811, + "grad_norm": 15.78528118133545, + "learning_rate": 1.308383008392977e-07, + "lm_loss": 5.4242, + "loss": 1.5217, + "step": 2057, + "text_contrastive_loss": 0.9006 + }, + { + "contrastive_loss": 0.4262, + "epoch": 4.645598194130925, + "grad_norm": 13.879262924194336, + "learning_rate": 1.2919450789345477e-07, + "lm_loss": 5.4463, + "loss": 1.3103, + "step": 2058, + "text_contrastive_loss": 0.6789 + }, + { + "contrastive_loss": 0.3579, + "epoch": 4.6478555304740405, + "grad_norm": 13.028387069702148, + "learning_rate": 1.275609711044823e-07, + "lm_loss": 5.4102, + "loss": 1.2543, + "step": 2059, + "text_contrastive_loss": 0.7108 + }, + { + "contrastive_loss": 0.4899, + "epoch": 4.650112866817156, + "grad_norm": 13.89792251586914, + "learning_rate": 1.2593769391201827e-07, + "lm_loss": 5.5376, + "loss": 1.4622, + "step": 2060, + "text_contrastive_loss": 0.8369 + }, + { + "contrastive_loss": 0.4765, + "epoch": 4.652370203160271, + "grad_norm": 13.671670913696289, + "learning_rate": 1.2432467973409857e-07, + "lm_loss": 5.3557, + "loss": 1.4552, + "step": 2061, + "text_contrastive_loss": 0.8864 + }, + { + "contrastive_loss": 0.4409, + "epoch": 4.654627539503386, + "grad_norm": 16.313514709472656, + "learning_rate": 1.2272193196714854e-07, + "lm_loss": 5.5179, + "loss": 1.3112, + "step": 2062, + "text_contrastive_loss": 0.6372 + }, + { + "contrastive_loss": 0.4862, + "epoch": 4.656884875846501, + "grad_norm": 14.998929977416992, + "learning_rate": 1.211294539859753e-07, + "lm_loss": 5.4963, + "loss": 1.4207, + "step": 2063, + "text_contrastive_loss": 0.7698 + }, + { + "contrastive_loss": 0.346, + "epoch": 4.659142212189616, + "grad_norm": 12.416764259338379, + "learning_rate": 1.1954724914376215e-07, + "lm_loss": 5.558, + "loss": 1.2345, + "step": 2064, + "text_contrastive_loss": 0.6653 + }, + { + "contrastive_loss": 0.3529, + "epoch": 4.6613995485327315, + "grad_norm": 11.774885177612305, + "learning_rate": 1.1797532077206187e-07, + "lm_loss": 5.4785, + "loss": 1.2877, + "step": 2065, + "text_contrastive_loss": 0.774 + }, + { + "contrastive_loss": 0.4346, + "epoch": 4.663656884875847, + "grad_norm": 13.302604675292969, + "learning_rate": 1.1641367218078736e-07, + "lm_loss": 5.3816, + "loss": 1.4156, + "step": 2066, + "text_contrastive_loss": 0.8857 + }, + { + "contrastive_loss": 0.4733, + "epoch": 4.665914221218961, + "grad_norm": 13.181002616882324, + "learning_rate": 1.1486230665820552e-07, + "lm_loss": 5.371, + "loss": 1.4199, + "step": 2067, + "text_contrastive_loss": 0.8189 + }, + { + "contrastive_loss": 0.5697, + "epoch": 4.668171557562077, + "grad_norm": 15.390506744384766, + "learning_rate": 1.1332122747093277e-07, + "lm_loss": 5.3907, + "loss": 1.6271, + "step": 2068, + "text_contrastive_loss": 1.0366 + }, + { + "contrastive_loss": 0.4268, + "epoch": 4.670428893905192, + "grad_norm": 14.155954360961914, + "learning_rate": 1.1179043786392507e-07, + "lm_loss": 5.3104, + "loss": 1.337, + "step": 2069, + "text_contrastive_loss": 0.7584 + }, + { + "contrastive_loss": 0.5129, + "epoch": 4.672686230248307, + "grad_norm": 14.72008228302002, + "learning_rate": 1.1026994106047296e-07, + "lm_loss": 5.3694, + "loss": 1.5167, + "step": 2070, + "text_contrastive_loss": 0.9338 + }, + { + "contrastive_loss": 0.477, + "epoch": 4.674943566591422, + "grad_norm": 13.70339298248291, + "learning_rate": 1.0875974026219149e-07, + "lm_loss": 5.3814, + "loss": 1.41, + "step": 2071, + "text_contrastive_loss": 0.7897 + }, + { + "contrastive_loss": 0.4021, + "epoch": 4.677200902934537, + "grad_norm": 13.836319923400879, + "learning_rate": 1.0725983864901978e-07, + "lm_loss": 5.404, + "loss": 1.3078, + "step": 2072, + "text_contrastive_loss": 0.7307 + }, + { + "contrastive_loss": 0.4337, + "epoch": 4.679458239277652, + "grad_norm": 12.894781112670898, + "learning_rate": 1.0577023937920816e-07, + "lm_loss": 5.4553, + "loss": 1.3727, + "step": 2073, + "text_contrastive_loss": 0.787 + }, + { + "contrastive_loss": 0.4722, + "epoch": 4.681715575620768, + "grad_norm": 14.383455276489258, + "learning_rate": 1.0429094558931485e-07, + "lm_loss": 5.4431, + "loss": 1.4711, + "step": 2074, + "text_contrastive_loss": 0.9092 + }, + { + "contrastive_loss": 0.4462, + "epoch": 4.683972911963883, + "grad_norm": 14.542683601379395, + "learning_rate": 1.0282196039419823e-07, + "lm_loss": 5.4639, + "loss": 1.3895, + "step": 2075, + "text_contrastive_loss": 0.7937 + }, + { + "contrastive_loss": 0.4731, + "epoch": 4.686230248306998, + "grad_norm": 14.17236614227295, + "learning_rate": 1.0136328688700958e-07, + "lm_loss": 5.4445, + "loss": 1.3846, + "step": 2076, + "text_contrastive_loss": 0.7341 + }, + { + "contrastive_loss": 0.4429, + "epoch": 4.688487584650113, + "grad_norm": 14.122315406799316, + "learning_rate": 9.99149281391898e-08, + "lm_loss": 5.3135, + "loss": 1.343, + "step": 2077, + "text_contrastive_loss": 0.7374 + }, + { + "contrastive_loss": 0.5026, + "epoch": 4.690744920993228, + "grad_norm": 15.895790100097656, + "learning_rate": 9.847688720045878e-08, + "lm_loss": 5.5192, + "loss": 1.4753, + "step": 2078, + "text_contrastive_loss": 0.8416 + }, + { + "contrastive_loss": 0.3838, + "epoch": 4.693002257336343, + "grad_norm": 13.768829345703125, + "learning_rate": 9.704916709881052e-08, + "lm_loss": 5.501, + "loss": 1.2695, + "step": 2079, + "text_contrastive_loss": 0.6712 + }, + { + "contrastive_loss": 0.4364, + "epoch": 4.6952595936794586, + "grad_norm": 15.006620407104492, + "learning_rate": 9.5631770840508e-08, + "lm_loss": 5.341, + "loss": 1.3842, + "step": 2080, + "text_contrastive_loss": 0.8275 + }, + { + "contrastive_loss": 0.4271, + "epoch": 4.697516930022573, + "grad_norm": 11.774499893188477, + "learning_rate": 9.422470141007667e-08, + "lm_loss": 5.3438, + "loss": 1.2992, + "step": 2081, + "text_contrastive_loss": 0.6754 + }, + { + "contrastive_loss": 0.4275, + "epoch": 4.699774266365688, + "grad_norm": 12.590245246887207, + "learning_rate": 9.282796177029596e-08, + "lm_loss": 5.4998, + "loss": 1.4736, + "step": 2082, + "text_contrastive_loss": 0.9923 + }, + { + "contrastive_loss": 0.4735, + "epoch": 4.702031602708804, + "grad_norm": 16.8325252532959, + "learning_rate": 9.144155486219442e-08, + "lm_loss": 5.4661, + "loss": 1.4121, + "step": 2083, + "text_contrastive_loss": 0.7839 + }, + { + "contrastive_loss": 0.4601, + "epoch": 4.704288939051919, + "grad_norm": 14.064495086669922, + "learning_rate": 9.006548360504463e-08, + "lm_loss": 5.478, + "loss": 1.3478, + "step": 2084, + "text_contrastive_loss": 0.6799 + }, + { + "contrastive_loss": 0.3832, + "epoch": 4.706546275395034, + "grad_norm": 13.605438232421875, + "learning_rate": 8.869975089635552e-08, + "lm_loss": 5.4232, + "loss": 1.3083, + "step": 2085, + "text_contrastive_loss": 0.7655 + }, + { + "contrastive_loss": 0.4496, + "epoch": 4.708803611738149, + "grad_norm": 13.64784049987793, + "learning_rate": 8.734435961186782e-08, + "lm_loss": 5.3262, + "loss": 1.4139, + "step": 2086, + "text_contrastive_loss": 0.8633 + }, + { + "contrastive_loss": 0.4386, + "epoch": 4.711060948081264, + "grad_norm": 14.360736846923828, + "learning_rate": 8.599931260554417e-08, + "lm_loss": 5.4841, + "loss": 1.3418, + "step": 2087, + "text_contrastive_loss": 0.7097 + }, + { + "contrastive_loss": 0.4388, + "epoch": 4.713318284424379, + "grad_norm": 14.426239013671875, + "learning_rate": 8.466461270956794e-08, + "lm_loss": 5.5207, + "loss": 1.4497, + "step": 2088, + "text_contrastive_loss": 0.9176 + }, + { + "contrastive_loss": 0.4233, + "epoch": 4.715575620767495, + "grad_norm": 14.370715141296387, + "learning_rate": 8.334026273433659e-08, + "lm_loss": 5.413, + "loss": 1.4366, + "step": 2089, + "text_contrastive_loss": 0.9438 + }, + { + "contrastive_loss": 0.4106, + "epoch": 4.717832957110609, + "grad_norm": 14.648252487182617, + "learning_rate": 8.202626546845172e-08, + "lm_loss": 5.4194, + "loss": 1.2998, + "step": 2090, + "text_contrastive_loss": 0.6945 + }, + { + "contrastive_loss": 0.3891, + "epoch": 4.720090293453724, + "grad_norm": 12.321579933166504, + "learning_rate": 8.072262367871675e-08, + "lm_loss": 5.5332, + "loss": 1.3766, + "step": 2091, + "text_contrastive_loss": 0.8684 + }, + { + "contrastive_loss": 0.3416, + "epoch": 4.72234762979684, + "grad_norm": 14.964371681213379, + "learning_rate": 7.942934011013037e-08, + "lm_loss": 5.4944, + "loss": 1.2942, + "step": 2092, + "text_contrastive_loss": 0.8062 + }, + { + "contrastive_loss": 0.4328, + "epoch": 4.724604966139955, + "grad_norm": 13.795408248901367, + "learning_rate": 7.814641748588148e-08, + "lm_loss": 5.4023, + "loss": 1.3785, + "step": 2093, + "text_contrastive_loss": 0.811 + }, + { + "contrastive_loss": 0.3735, + "epoch": 4.72686230248307, + "grad_norm": 13.68954086303711, + "learning_rate": 7.687385850734086e-08, + "lm_loss": 5.4513, + "loss": 1.3088, + "step": 2094, + "text_contrastive_loss": 0.7805 + }, + { + "contrastive_loss": 0.3887, + "epoch": 4.729119638826186, + "grad_norm": 12.677016258239746, + "learning_rate": 7.561166585405789e-08, + "lm_loss": 5.3759, + "loss": 1.2922, + "step": 2095, + "text_contrastive_loss": 0.7319 + }, + { + "contrastive_loss": 0.4128, + "epoch": 4.7313769751693, + "grad_norm": 15.801119804382324, + "learning_rate": 7.435984218375436e-08, + "lm_loss": 5.4852, + "loss": 1.3634, + "step": 2096, + "text_contrastive_loss": 0.8041 + }, + { + "contrastive_loss": 0.3626, + "epoch": 4.733634311512415, + "grad_norm": 12.709342002868652, + "learning_rate": 7.311839013231959e-08, + "lm_loss": 5.4356, + "loss": 1.2718, + "step": 2097, + "text_contrastive_loss": 0.7314 + }, + { + "contrastive_loss": 0.4318, + "epoch": 4.735891647855531, + "grad_norm": 14.638545036315918, + "learning_rate": 7.188731231380253e-08, + "lm_loss": 5.4465, + "loss": 1.4102, + "step": 2098, + "text_contrastive_loss": 0.8676 + }, + { + "contrastive_loss": 0.4499, + "epoch": 4.738148984198646, + "grad_norm": 15.033775329589844, + "learning_rate": 7.066661132040853e-08, + "lm_loss": 5.4048, + "loss": 1.3958, + "step": 2099, + "text_contrastive_loss": 0.8108 + }, + { + "contrastive_loss": 0.4161, + "epoch": 4.74040632054176, + "grad_norm": 15.725586891174316, + "learning_rate": 6.945628972249208e-08, + "lm_loss": 5.442, + "loss": 1.3051, + "step": 2100, + "text_contrastive_loss": 0.6896 + }, + { + "contrastive_loss": 0.3725, + "epoch": 4.742663656884876, + "grad_norm": 12.941192626953125, + "learning_rate": 6.825635006855458e-08, + "lm_loss": 5.398, + "loss": 1.2867, + "step": 2101, + "text_contrastive_loss": 0.7488 + }, + { + "contrastive_loss": 0.4289, + "epoch": 4.744920993227991, + "grad_norm": 14.20491886138916, + "learning_rate": 6.706679488523494e-08, + "lm_loss": 5.4437, + "loss": 1.2981, + "step": 2102, + "text_contrastive_loss": 0.6496 + }, + { + "contrastive_loss": 0.447, + "epoch": 4.747178329571106, + "grad_norm": 14.286320686340332, + "learning_rate": 6.58876266773062e-08, + "lm_loss": 5.3617, + "loss": 1.3436, + "step": 2103, + "text_contrastive_loss": 0.7208 + }, + { + "contrastive_loss": 0.492, + "epoch": 4.749435665914222, + "grad_norm": 15.338074684143066, + "learning_rate": 6.471884792767169e-08, + "lm_loss": 5.5514, + "loss": 1.4135, + "step": 2104, + "text_contrastive_loss": 0.7327 + }, + { + "contrastive_loss": 0.4855, + "epoch": 4.751693002257336, + "grad_norm": 14.050557136535645, + "learning_rate": 6.356046109735614e-08, + "lm_loss": 5.4199, + "loss": 1.4453, + "step": 2105, + "text_contrastive_loss": 0.8356 + }, + { + "contrastive_loss": 0.4435, + "epoch": 4.753950338600451, + "grad_norm": 14.893415451049805, + "learning_rate": 6.241246862550398e-08, + "lm_loss": 5.5084, + "loss": 1.4007, + "step": 2106, + "text_contrastive_loss": 0.8127 + }, + { + "contrastive_loss": 0.453, + "epoch": 4.756207674943567, + "grad_norm": 14.830306053161621, + "learning_rate": 6.127487292937328e-08, + "lm_loss": 5.4497, + "loss": 1.4174, + "step": 2107, + "text_contrastive_loss": 0.8388 + }, + { + "contrastive_loss": 0.4327, + "epoch": 4.758465011286682, + "grad_norm": 14.190763473510742, + "learning_rate": 6.014767640432905e-08, + "lm_loss": 5.4272, + "loss": 1.2878, + "step": 2108, + "text_contrastive_loss": 0.6248 + }, + { + "contrastive_loss": 0.4893, + "epoch": 4.760722347629796, + "grad_norm": 13.900871276855469, + "learning_rate": 5.903088142384106e-08, + "lm_loss": 5.4392, + "loss": 1.5198, + "step": 2109, + "text_contrastive_loss": 0.9731 + }, + { + "contrastive_loss": 0.4402, + "epoch": 4.762979683972912, + "grad_norm": 13.574312210083008, + "learning_rate": 5.7924490339474335e-08, + "lm_loss": 5.6821, + "loss": 1.4436, + "step": 2110, + "text_contrastive_loss": 0.8703 + }, + { + "contrastive_loss": 0.4133, + "epoch": 4.765237020316027, + "grad_norm": 12.468347549438477, + "learning_rate": 5.682850548089036e-08, + "lm_loss": 5.4131, + "loss": 1.2932, + "step": 2111, + "text_contrastive_loss": 0.6772 + }, + { + "contrastive_loss": 0.443, + "epoch": 4.767494356659142, + "grad_norm": 13.637884140014648, + "learning_rate": 5.574292915583646e-08, + "lm_loss": 5.4979, + "loss": 1.413, + "step": 2112, + "text_contrastive_loss": 0.8404 + }, + { + "contrastive_loss": 0.4399, + "epoch": 4.769751693002258, + "grad_norm": 14.228924751281738, + "learning_rate": 5.46677636501447e-08, + "lm_loss": 5.5151, + "loss": 1.3432, + "step": 2113, + "text_contrastive_loss": 0.7036 + }, + { + "contrastive_loss": 0.4076, + "epoch": 4.772009029345372, + "grad_norm": 13.731237411499023, + "learning_rate": 5.3603011227725265e-08, + "lm_loss": 5.333, + "loss": 1.3463, + "step": 2114, + "text_contrastive_loss": 0.8109 + }, + { + "contrastive_loss": 0.4588, + "epoch": 4.774266365688487, + "grad_norm": 15.329798698425293, + "learning_rate": 5.2548674130561974e-08, + "lm_loss": 5.4235, + "loss": 1.4041, + "step": 2115, + "text_contrastive_loss": 0.8059 + }, + { + "contrastive_loss": 0.4198, + "epoch": 4.776523702031603, + "grad_norm": 14.6963472366333, + "learning_rate": 5.1504754578707294e-08, + "lm_loss": 5.3975, + "loss": 1.2944, + "step": 2116, + "text_contrastive_loss": 0.6697 + }, + { + "contrastive_loss": 0.466, + "epoch": 4.778781038374718, + "grad_norm": 13.653057098388672, + "learning_rate": 5.047125477027959e-08, + "lm_loss": 5.3148, + "loss": 1.3894, + "step": 2117, + "text_contrastive_loss": 0.7838 + }, + { + "contrastive_loss": 0.4982, + "epoch": 4.781038374717833, + "grad_norm": 14.952482223510742, + "learning_rate": 4.944817688145642e-08, + "lm_loss": 5.4016, + "loss": 1.4058, + "step": 2118, + "text_contrastive_loss": 0.735 + }, + { + "contrastive_loss": 0.5136, + "epoch": 4.783295711060948, + "grad_norm": 16.02672576904297, + "learning_rate": 4.843552306646904e-08, + "lm_loss": 5.5164, + "loss": 1.54, + "step": 2119, + "text_contrastive_loss": 0.9495 + }, + { + "contrastive_loss": 0.4505, + "epoch": 4.785553047404063, + "grad_norm": 15.640702247619629, + "learning_rate": 4.743329545760122e-08, + "lm_loss": 5.5695, + "loss": 1.4046, + "step": 2120, + "text_contrastive_loss": 0.7943 + }, + { + "contrastive_loss": 0.4595, + "epoch": 4.787810383747178, + "grad_norm": 13.84422779083252, + "learning_rate": 4.644149616518212e-08, + "lm_loss": 5.4796, + "loss": 1.3819, + "step": 2121, + "text_contrastive_loss": 0.7487 + }, + { + "contrastive_loss": 0.4441, + "epoch": 4.790067720090294, + "grad_norm": 14.773959159851074, + "learning_rate": 4.5460127277582863e-08, + "lm_loss": 5.3661, + "loss": 1.3248, + "step": 2122, + "text_contrastive_loss": 0.6881 + }, + { + "contrastive_loss": 0.3099, + "epoch": 4.792325056433409, + "grad_norm": 12.641318321228027, + "learning_rate": 4.448919086121217e-08, + "lm_loss": 5.4332, + "loss": 1.2295, + "step": 2123, + "text_contrastive_loss": 0.7525 + }, + { + "contrastive_loss": 0.4525, + "epoch": 4.794582392776523, + "grad_norm": 14.078239440917969, + "learning_rate": 4.352868896051077e-08, + "lm_loss": 5.3568, + "loss": 1.4022, + "step": 2124, + "text_contrastive_loss": 0.828 + }, + { + "contrastive_loss": 0.5084, + "epoch": 4.796839729119639, + "grad_norm": 14.259356498718262, + "learning_rate": 4.2578623597949174e-08, + "lm_loss": 5.4509, + "loss": 1.4819, + "step": 2125, + "text_contrastive_loss": 0.8568 + }, + { + "contrastive_loss": 0.4212, + "epoch": 4.799097065462754, + "grad_norm": 13.38056468963623, + "learning_rate": 4.163899677402161e-08, + "lm_loss": 5.3855, + "loss": 1.2987, + "step": 2126, + "text_contrastive_loss": 0.678 + }, + { + "contrastive_loss": 0.3706, + "epoch": 4.801354401805869, + "grad_norm": 13.210601806640625, + "learning_rate": 4.0709810467243204e-08, + "lm_loss": 5.4815, + "loss": 1.2833, + "step": 2127, + "text_contrastive_loss": 0.7292 + }, + { + "contrastive_loss": 0.4523, + "epoch": 4.803611738148984, + "grad_norm": 15.537960052490234, + "learning_rate": 3.979106663414389e-08, + "lm_loss": 5.5434, + "loss": 1.4061, + "step": 2128, + "text_contrastive_loss": 0.799 + }, + { + "contrastive_loss": 0.53, + "epoch": 4.805869074492099, + "grad_norm": 14.932734489440918, + "learning_rate": 3.8882767209266756e-08, + "lm_loss": 5.4925, + "loss": 1.5161, + "step": 2129, + "text_contrastive_loss": 0.8738 + }, + { + "contrastive_loss": 0.3426, + "epoch": 4.808126410835214, + "grad_norm": 12.81087589263916, + "learning_rate": 3.7984914105162474e-08, + "lm_loss": 5.421, + "loss": 1.2241, + "step": 2130, + "text_contrastive_loss": 0.6787 + }, + { + "contrastive_loss": 0.5529, + "epoch": 4.81038374717833, + "grad_norm": 15.956156730651855, + "learning_rate": 3.709750921238486e-08, + "lm_loss": 5.3988, + "loss": 1.4407, + "step": 2131, + "text_contrastive_loss": 0.6957 + }, + { + "contrastive_loss": 0.3956, + "epoch": 4.812641083521445, + "grad_norm": 12.624953269958496, + "learning_rate": 3.622055439948813e-08, + "lm_loss": 5.4586, + "loss": 1.3469, + "step": 2132, + "text_contrastive_loss": 0.8108 + }, + { + "contrastive_loss": 0.4179, + "epoch": 4.8148984198645595, + "grad_norm": 14.218852996826172, + "learning_rate": 3.5354051513022405e-08, + "lm_loss": 5.5242, + "loss": 1.3791, + "step": 2133, + "text_contrastive_loss": 0.8176 + }, + { + "contrastive_loss": 0.4733, + "epoch": 4.817155756207675, + "grad_norm": 13.357524871826172, + "learning_rate": 3.449800237753043e-08, + "lm_loss": 5.4931, + "loss": 1.4689, + "step": 2134, + "text_contrastive_loss": 0.8926 + }, + { + "contrastive_loss": 0.3953, + "epoch": 4.81941309255079, + "grad_norm": 13.375908851623535, + "learning_rate": 3.365240879554144e-08, + "lm_loss": 5.4471, + "loss": 1.316, + "step": 2135, + "text_contrastive_loss": 0.752 + }, + { + "contrastive_loss": 0.4906, + "epoch": 4.821670428893905, + "grad_norm": 15.428407669067383, + "learning_rate": 3.281727254757061e-08, + "lm_loss": 5.458, + "loss": 1.4649, + "step": 2136, + "text_contrastive_loss": 0.8569 + }, + { + "contrastive_loss": 0.4583, + "epoch": 4.82392776523702, + "grad_norm": 13.634210586547852, + "learning_rate": 3.1992595392112966e-08, + "lm_loss": 5.4425, + "loss": 1.3316, + "step": 2137, + "text_contrastive_loss": 0.6581 + }, + { + "contrastive_loss": 0.4418, + "epoch": 4.826185101580135, + "grad_norm": 13.692935943603516, + "learning_rate": 3.117837906564114e-08, + "lm_loss": 5.4512, + "loss": 1.3735, + "step": 2138, + "text_contrastive_loss": 0.7731 + }, + { + "contrastive_loss": 0.3172, + "epoch": 4.8284424379232505, + "grad_norm": 12.065055847167969, + "learning_rate": 3.0374625282599826e-08, + "lm_loss": 5.4717, + "loss": 1.2044, + "step": 2139, + "text_contrastive_loss": 0.6802 + }, + { + "contrastive_loss": 0.5013, + "epoch": 4.830699774266366, + "grad_norm": 15.22729206085205, + "learning_rate": 2.9581335735404672e-08, + "lm_loss": 5.4379, + "loss": 1.4651, + "step": 2140, + "text_contrastive_loss": 0.84 + }, + { + "contrastive_loss": 0.4933, + "epoch": 4.832957110609481, + "grad_norm": 14.925759315490723, + "learning_rate": 2.8798512094436738e-08, + "lm_loss": 5.4871, + "loss": 1.4504, + "step": 2141, + "text_contrastive_loss": 0.8167 + }, + { + "contrastive_loss": 0.4151, + "epoch": 4.835214446952596, + "grad_norm": 13.641921997070312, + "learning_rate": 2.802615600804026e-08, + "lm_loss": 5.4401, + "loss": 1.3243, + "step": 2142, + "text_contrastive_loss": 0.7305 + }, + { + "contrastive_loss": 0.4343, + "epoch": 4.837471783295711, + "grad_norm": 13.87121868133545, + "learning_rate": 2.7264269102517117e-08, + "lm_loss": 5.4836, + "loss": 1.3676, + "step": 2143, + "text_contrastive_loss": 0.77 + }, + { + "contrastive_loss": 0.3371, + "epoch": 4.839729119638826, + "grad_norm": 11.819186210632324, + "learning_rate": 2.6512852982127357e-08, + "lm_loss": 5.342, + "loss": 1.2904, + "step": 2144, + "text_contrastive_loss": 0.8383 + }, + { + "contrastive_loss": 0.4695, + "epoch": 4.841986455981941, + "grad_norm": 16.108856201171875, + "learning_rate": 2.577190922908035e-08, + "lm_loss": 5.4491, + "loss": 1.4813, + "step": 2145, + "text_contrastive_loss": 0.9338 + }, + { + "contrastive_loss": 0.539, + "epoch": 4.844243792325057, + "grad_norm": 15.733048439025879, + "learning_rate": 2.5041439403537537e-08, + "lm_loss": 5.3733, + "loss": 1.4817, + "step": 2146, + "text_contrastive_loss": 0.8109 + }, + { + "contrastive_loss": 0.4406, + "epoch": 4.846501128668171, + "grad_norm": 13.685016632080078, + "learning_rate": 2.4321445043603565e-08, + "lm_loss": 5.4232, + "loss": 1.3548, + "step": 2147, + "text_contrastive_loss": 0.7437 + }, + { + "contrastive_loss": 0.3976, + "epoch": 4.8487584650112865, + "grad_norm": 13.296514511108398, + "learning_rate": 2.3611927665326275e-08, + "lm_loss": 5.4631, + "loss": 1.3468, + "step": 2148, + "text_contrastive_loss": 0.8058 + }, + { + "contrastive_loss": 0.4797, + "epoch": 4.851015801354402, + "grad_norm": 13.962272644042969, + "learning_rate": 2.291288876269393e-08, + "lm_loss": 5.4876, + "loss": 1.455, + "step": 2149, + "text_contrastive_loss": 0.8533 + }, + { + "contrastive_loss": 0.495, + "epoch": 4.853273137697517, + "grad_norm": 14.14433479309082, + "learning_rate": 2.222432980762912e-08, + "lm_loss": 5.3203, + "loss": 1.5115, + "step": 2150, + "text_contrastive_loss": 0.969 + }, + { + "contrastive_loss": 0.4221, + "epoch": 4.855530474040632, + "grad_norm": 14.23910903930664, + "learning_rate": 2.1546252249988186e-08, + "lm_loss": 5.4121, + "loss": 1.3148, + "step": 2151, + "text_contrastive_loss": 0.7029 + }, + { + "contrastive_loss": 0.4442, + "epoch": 4.857787810383747, + "grad_norm": 14.589170455932617, + "learning_rate": 2.087865751755791e-08, + "lm_loss": 5.452, + "loss": 1.361, + "step": 2152, + "text_contrastive_loss": 0.7431 + }, + { + "contrastive_loss": 0.4601, + "epoch": 4.860045146726862, + "grad_norm": 14.748631477355957, + "learning_rate": 2.0221547016051614e-08, + "lm_loss": 5.4208, + "loss": 1.4126, + "step": 2153, + "text_contrastive_loss": 0.8209 + }, + { + "contrastive_loss": 0.4349, + "epoch": 4.8623024830699775, + "grad_norm": 14.336165428161621, + "learning_rate": 1.957492212910639e-08, + "lm_loss": 5.4313, + "loss": 1.4636, + "step": 2154, + "text_contrastive_loss": 0.9711 + }, + { + "contrastive_loss": 0.3987, + "epoch": 4.864559819413093, + "grad_norm": 15.184710502624512, + "learning_rate": 1.8938784218281435e-08, + "lm_loss": 5.5225, + "loss": 1.3297, + "step": 2155, + "text_contrastive_loss": 0.7576 + }, + { + "contrastive_loss": 0.4392, + "epoch": 4.866817155756207, + "grad_norm": 14.413093566894531, + "learning_rate": 1.8313134623051955e-08, + "lm_loss": 5.4948, + "loss": 1.3908, + "step": 2156, + "text_contrastive_loss": 0.8041 + }, + { + "contrastive_loss": 0.4065, + "epoch": 4.8690744920993225, + "grad_norm": 12.513361930847168, + "learning_rate": 1.7697974660811357e-08, + "lm_loss": 5.225, + "loss": 1.2455, + "step": 2157, + "text_contrastive_loss": 0.6329 + }, + { + "contrastive_loss": 0.4739, + "epoch": 4.871331828442438, + "grad_norm": 14.576942443847656, + "learning_rate": 1.7093305626864065e-08, + "lm_loss": 5.3395, + "loss": 1.4442, + "step": 2158, + "text_contrastive_loss": 0.8727 + }, + { + "contrastive_loss": 0.5077, + "epoch": 4.873589164785553, + "grad_norm": 15.830459594726562, + "learning_rate": 1.6499128794423836e-08, + "lm_loss": 5.4957, + "loss": 1.5044, + "step": 2159, + "text_contrastive_loss": 0.8943 + }, + { + "contrastive_loss": 0.5232, + "epoch": 4.8758465011286685, + "grad_norm": 16.970827102661133, + "learning_rate": 1.5915445414613208e-08, + "lm_loss": 5.4019, + "loss": 1.5436, + "step": 2160, + "text_contrastive_loss": 0.9604 + }, + { + "contrastive_loss": 0.4526, + "epoch": 4.878103837471784, + "grad_norm": 14.501419067382812, + "learning_rate": 1.5342256716459058e-08, + "lm_loss": 5.4434, + "loss": 1.4074, + "step": 2161, + "text_contrastive_loss": 0.821 + }, + { + "contrastive_loss": 0.4276, + "epoch": 4.880361173814898, + "grad_norm": 13.567630767822266, + "learning_rate": 1.4779563906888172e-08, + "lm_loss": 5.4065, + "loss": 1.3312, + "step": 2162, + "text_contrastive_loss": 0.726 + }, + { + "contrastive_loss": 0.4361, + "epoch": 4.8826185101580135, + "grad_norm": 13.970993041992188, + "learning_rate": 1.4227368170728894e-08, + "lm_loss": 5.3345, + "loss": 1.3425, + "step": 2163, + "text_contrastive_loss": 0.7458 + }, + { + "contrastive_loss": 0.4098, + "epoch": 4.884875846501129, + "grad_norm": 13.189488410949707, + "learning_rate": 1.3685670670706697e-08, + "lm_loss": 5.5338, + "loss": 1.3498, + "step": 2164, + "text_contrastive_loss": 0.7734 + }, + { + "contrastive_loss": 0.4967, + "epoch": 4.887133182844244, + "grad_norm": 15.030165672302246, + "learning_rate": 1.3154472547440289e-08, + "lm_loss": 5.5263, + "loss": 1.4195, + "step": 2165, + "text_contrastive_loss": 0.7402 + }, + { + "contrastive_loss": 0.4859, + "epoch": 4.889390519187359, + "grad_norm": 13.789488792419434, + "learning_rate": 1.2633774919441622e-08, + "lm_loss": 5.4352, + "loss": 1.46, + "step": 2166, + "text_contrastive_loss": 0.8613 + }, + { + "contrastive_loss": 0.3811, + "epoch": 4.891647855530474, + "grad_norm": 12.546195983886719, + "learning_rate": 1.2123578883110887e-08, + "lm_loss": 5.5252, + "loss": 1.324, + "step": 2167, + "text_contrastive_loss": 0.7807 + }, + { + "contrastive_loss": 0.4371, + "epoch": 4.893905191873589, + "grad_norm": 13.049041748046875, + "learning_rate": 1.1623885512737076e-08, + "lm_loss": 5.4636, + "loss": 1.3888, + "step": 2168, + "text_contrastive_loss": 0.8107 + }, + { + "contrastive_loss": 0.3575, + "epoch": 4.8961625282167045, + "grad_norm": 13.92898178100586, + "learning_rate": 1.1134695860493539e-08, + "lm_loss": 5.356, + "loss": 1.2361, + "step": 2169, + "text_contrastive_loss": 0.686 + }, + { + "contrastive_loss": 0.5755, + "epoch": 4.89841986455982, + "grad_norm": 15.284348487854004, + "learning_rate": 1.0656010956437979e-08, + "lm_loss": 5.3665, + "loss": 1.4893, + "step": 2170, + "text_contrastive_loss": 0.7544 + }, + { + "contrastive_loss": 0.4703, + "epoch": 4.900677200902934, + "grad_norm": 14.312403678894043, + "learning_rate": 1.018783180850691e-08, + "lm_loss": 5.4431, + "loss": 1.3639, + "step": 2171, + "text_contrastive_loss": 0.6985 + }, + { + "contrastive_loss": 0.5322, + "epoch": 4.9029345372460496, + "grad_norm": 14.89530086517334, + "learning_rate": 9.73015940251676e-09, + "lm_loss": 5.364, + "loss": 1.5712, + "step": 2172, + "text_contrastive_loss": 1.0051 + }, + { + "contrastive_loss": 0.5007, + "epoch": 4.905191873589165, + "grad_norm": 14.06528091430664, + "learning_rate": 9.282994702159986e-09, + "lm_loss": 5.4174, + "loss": 1.4469, + "step": 2173, + "text_contrastive_loss": 0.8088 + }, + { + "contrastive_loss": 0.5287, + "epoch": 4.90744920993228, + "grad_norm": 14.88776969909668, + "learning_rate": 8.846338649005082e-09, + "lm_loss": 5.4686, + "loss": 1.5223, + "step": 2174, + "text_contrastive_loss": 0.8934 + }, + { + "contrastive_loss": 0.4793, + "epoch": 4.909706546275395, + "grad_norm": 17.28993797302246, + "learning_rate": 8.42019216249046e-09, + "lm_loss": 5.5607, + "loss": 1.4531, + "step": 2175, + "text_contrastive_loss": 0.8355 + }, + { + "contrastive_loss": 0.4204, + "epoch": 4.91196388261851, + "grad_norm": 13.668623924255371, + "learning_rate": 8.004556139927788e-09, + "lm_loss": 5.4568, + "loss": 1.3353, + "step": 2176, + "text_contrastive_loss": 0.7385 + }, + { + "contrastive_loss": 0.4881, + "epoch": 4.914221218961625, + "grad_norm": 15.048455238342285, + "learning_rate": 7.599431456495888e-09, + "lm_loss": 5.4685, + "loss": 1.3765, + "step": 2177, + "text_contrastive_loss": 0.683 + }, + { + "contrastive_loss": 0.3824, + "epoch": 4.9164785553047405, + "grad_norm": 13.137067794799805, + "learning_rate": 7.2048189652412784e-09, + "lm_loss": 5.5002, + "loss": 1.3224, + "step": 2178, + "text_contrastive_loss": 0.7798 + }, + { + "contrastive_loss": 0.4013, + "epoch": 4.918735891647856, + "grad_norm": 12.891266822814941, + "learning_rate": 6.820719497074857e-09, + "lm_loss": 5.4584, + "loss": 1.3154, + "step": 2179, + "text_contrastive_loss": 0.7366 + }, + { + "contrastive_loss": 0.4835, + "epoch": 4.92099322799097, + "grad_norm": 14.461901664733887, + "learning_rate": 6.447133860771893e-09, + "lm_loss": 5.393, + "loss": 1.4567, + "step": 2180, + "text_contrastive_loss": 0.8678 + }, + { + "contrastive_loss": 0.44, + "epoch": 4.923250564334086, + "grad_norm": 13.45708179473877, + "learning_rate": 6.084062842968696e-09, + "lm_loss": 5.339, + "loss": 1.3715, + "step": 2181, + "text_contrastive_loss": 0.7952 + }, + { + "contrastive_loss": 0.4562, + "epoch": 4.925507900677201, + "grad_norm": 14.697256088256836, + "learning_rate": 5.731507208160958e-09, + "lm_loss": 5.4975, + "loss": 1.4098, + "step": 2182, + "text_contrastive_loss": 0.8077 + }, + { + "contrastive_loss": 0.5158, + "epoch": 4.927765237020316, + "grad_norm": 15.932068824768066, + "learning_rate": 5.389467698704298e-09, + "lm_loss": 5.4076, + "loss": 1.4842, + "step": 2183, + "text_contrastive_loss": 0.8553 + }, + { + "contrastive_loss": 0.481, + "epoch": 4.9300225733634315, + "grad_norm": 13.238265991210938, + "learning_rate": 5.057945034810385e-09, + "lm_loss": 5.3901, + "loss": 1.3689, + "step": 2184, + "text_contrastive_loss": 0.6978 + }, + { + "contrastive_loss": 0.3412, + "epoch": 4.932279909706546, + "grad_norm": 11.94501781463623, + "learning_rate": 4.736939914545824e-09, + "lm_loss": 5.3867, + "loss": 1.29, + "step": 2185, + "text_contrastive_loss": 0.8203 + }, + { + "contrastive_loss": 0.4008, + "epoch": 4.934537246049661, + "grad_norm": 12.516329765319824, + "learning_rate": 4.4264530138310445e-09, + "lm_loss": 5.3825, + "loss": 1.3353, + "step": 2186, + "text_contrastive_loss": 0.7925 + }, + { + "contrastive_loss": 0.4752, + "epoch": 4.936794582392777, + "grad_norm": 14.929271697998047, + "learning_rate": 4.1264849864403044e-09, + "lm_loss": 5.4827, + "loss": 1.4275, + "step": 2187, + "text_contrastive_loss": 0.8081 + }, + { + "contrastive_loss": 0.5297, + "epoch": 4.939051918735892, + "grad_norm": 15.888978958129883, + "learning_rate": 3.837036463997246e-09, + "lm_loss": 5.4956, + "loss": 1.5813, + "step": 2188, + "text_contrastive_loss": 1.004 + }, + { + "contrastive_loss": 0.5268, + "epoch": 4.941309255079007, + "grad_norm": 16.01238250732422, + "learning_rate": 3.558108055976006e-09, + "lm_loss": 5.4604, + "loss": 1.5329, + "step": 2189, + "text_contrastive_loss": 0.9199 + }, + { + "contrastive_loss": 0.474, + "epoch": 4.943566591422122, + "grad_norm": 14.927141189575195, + "learning_rate": 3.289700349698999e-09, + "lm_loss": 5.4481, + "loss": 1.3322, + "step": 2190, + "text_contrastive_loss": 0.6269 + }, + { + "contrastive_loss": 0.3963, + "epoch": 4.945823927765237, + "grad_norm": 13.972898483276367, + "learning_rate": 3.0318139103363564e-09, + "lm_loss": 5.4095, + "loss": 1.3252, + "step": 2191, + "text_contrastive_loss": 0.7757 + }, + { + "contrastive_loss": 0.4645, + "epoch": 4.948081264108352, + "grad_norm": 15.38901424407959, + "learning_rate": 2.7844492809031567e-09, + "lm_loss": 5.5284, + "loss": 1.4136, + "step": 2192, + "text_contrastive_loss": 0.7926 + }, + { + "contrastive_loss": 0.4929, + "epoch": 4.950338600451468, + "grad_norm": 14.944534301757812, + "learning_rate": 2.547606982260531e-09, + "lm_loss": 5.4075, + "loss": 1.5026, + "step": 2193, + "text_contrastive_loss": 0.9379 + }, + { + "contrastive_loss": 0.4853, + "epoch": 4.952595936794582, + "grad_norm": 16.36857032775879, + "learning_rate": 2.3212875131117805e-09, + "lm_loss": 5.3899, + "loss": 1.5077, + "step": 2194, + "text_contrastive_loss": 0.9668 + }, + { + "contrastive_loss": 0.4602, + "epoch": 4.954853273137697, + "grad_norm": 16.05774688720703, + "learning_rate": 2.1054913500051512e-09, + "lm_loss": 5.3544, + "loss": 1.3854, + "step": 2195, + "text_contrastive_loss": 0.7795 + }, + { + "contrastive_loss": 0.4751, + "epoch": 4.957110609480813, + "grad_norm": 13.837918281555176, + "learning_rate": 1.9002189473288356e-09, + "lm_loss": 5.4565, + "loss": 1.4566, + "step": 2196, + "text_contrastive_loss": 0.8716 + }, + { + "contrastive_loss": 0.4103, + "epoch": 4.959367945823928, + "grad_norm": 13.76887035369873, + "learning_rate": 1.7054707373126423e-09, + "lm_loss": 5.4632, + "loss": 1.3389, + "step": 2197, + "text_contrastive_loss": 0.7646 + }, + { + "contrastive_loss": 0.3864, + "epoch": 4.961625282167043, + "grad_norm": 12.906379699707031, + "learning_rate": 1.5212471300252163e-09, + "lm_loss": 5.385, + "loss": 1.2397, + "step": 2198, + "text_contrastive_loss": 0.6297 + }, + { + "contrastive_loss": 0.3989, + "epoch": 4.963882618510158, + "grad_norm": 13.055458068847656, + "learning_rate": 1.347548513375707e-09, + "lm_loss": 5.4405, + "loss": 1.2693, + "step": 2199, + "text_contrastive_loss": 0.6528 + }, + { + "contrastive_loss": 0.4608, + "epoch": 4.966139954853273, + "grad_norm": 14.787433624267578, + "learning_rate": 1.1843752531104368e-09, + "lm_loss": 5.4163, + "loss": 1.4305, + "step": 2200, + "text_contrastive_loss": 0.8561 + }, + { + "contrastive_loss": 0.4724, + "epoch": 4.968397291196388, + "grad_norm": 14.985630989074707, + "learning_rate": 1.0317276928134557e-09, + "lm_loss": 5.4577, + "loss": 1.3722, + "step": 2201, + "text_contrastive_loss": 0.7079 + }, + { + "contrastive_loss": 0.5348, + "epoch": 4.970654627539504, + "grad_norm": 14.82340145111084, + "learning_rate": 8.896061539048762e-10, + "lm_loss": 5.4347, + "loss": 1.4779, + "step": 2202, + "text_contrastive_loss": 0.7993 + }, + { + "contrastive_loss": 0.4554, + "epoch": 4.972911963882618, + "grad_norm": 14.87088680267334, + "learning_rate": 7.580109356419841e-10, + "lm_loss": 5.4693, + "loss": 1.4443, + "step": 2203, + "text_contrastive_loss": 0.8839 + }, + { + "contrastive_loss": 0.4641, + "epoch": 4.975169300225733, + "grad_norm": 14.46483039855957, + "learning_rate": 6.369423151164622e-10, + "lm_loss": 5.4776, + "loss": 1.3967, + "step": 2204, + "text_contrastive_loss": 0.7697 + }, + { + "contrastive_loss": 0.4639, + "epoch": 4.977426636568849, + "grad_norm": 15.03017807006836, + "learning_rate": 5.264005472549461e-10, + "lm_loss": 5.4595, + "loss": 1.3375, + "step": 2205, + "text_contrastive_loss": 0.6554 + }, + { + "contrastive_loss": 0.3595, + "epoch": 4.979683972911964, + "grad_norm": 12.984131813049316, + "learning_rate": 4.2638586481846823e-10, + "lm_loss": 5.4768, + "loss": 1.2663, + "step": 2206, + "text_contrastive_loss": 0.7183 + }, + { + "contrastive_loss": 0.406, + "epoch": 4.981941309255079, + "grad_norm": 12.96347713470459, + "learning_rate": 3.368984784024587e-10, + "lm_loss": 5.5238, + "loss": 1.3241, + "step": 2207, + "text_contrastive_loss": 0.7313 + }, + { + "contrastive_loss": 0.4922, + "epoch": 4.984198645598195, + "grad_norm": 14.127623558044434, + "learning_rate": 2.5793857643396924e-10, + "lm_loss": 5.4, + "loss": 1.4226, + "step": 2208, + "text_contrastive_loss": 0.7808 + }, + { + "contrastive_loss": 0.4765, + "epoch": 4.986455981941309, + "grad_norm": 14.21364974975586, + "learning_rate": 1.89506325175004e-10, + "lm_loss": 5.4665, + "loss": 1.4513, + "step": 2209, + "text_contrastive_loss": 0.8562 + }, + { + "contrastive_loss": 0.4283, + "epoch": 4.988713318284424, + "grad_norm": 14.55248737335205, + "learning_rate": 1.316018687191889e-10, + "lm_loss": 5.4608, + "loss": 1.4691, + "step": 2210, + "text_contrastive_loss": 0.9894 + }, + { + "contrastive_loss": 0.4747, + "epoch": 4.99097065462754, + "grad_norm": 14.030537605285645, + "learning_rate": 8.422532899121649e-11, + "lm_loss": 5.3691, + "loss": 1.3971, + "step": 2211, + "text_contrastive_loss": 0.7709 + }, + { + "contrastive_loss": 0.4853, + "epoch": 4.993227990970655, + "grad_norm": 16.008319854736328, + "learning_rate": 4.737680575017667e-11, + "lm_loss": 5.347, + "loss": 1.5166, + "step": 2212, + "text_contrastive_loss": 0.9931 + }, + { + "contrastive_loss": 0.4301, + "epoch": 4.995485327313769, + "grad_norm": 15.602864265441895, + "learning_rate": 2.1056376585115723e-11, + "lm_loss": 5.4221, + "loss": 1.3487, + "step": 2213, + "text_contrastive_loss": 0.7528 + }, + { + "contrastive_loss": 0.4854, + "epoch": 4.997742663656885, + "grad_norm": 14.93419361114502, + "learning_rate": 5.2640969172568225e-12, + "lm_loss": 5.5717, + "loss": 1.4927, + "step": 2214, + "text_contrastive_loss": 0.9002 + }, + { + "contrastive_loss": 0.3065, + "epoch": 5.0, + "grad_norm": 16.354124069213867, + "learning_rate": 0.0, + "lm_loss": 5.4002, + "loss": 1.0928, + "step": 2215, + "text_contrastive_loss": 0.4926 + }, + { + "epoch": 5.0, + "step": 2215, + "total_flos": 5.949279369691136e+17, + "train_loss": 1.6049966390730297, + "train_runtime": 4868.8105, + "train_samples_per_second": 116.34, + "train_steps_per_second": 0.455 + } + ], + "logging_steps": 1, + "max_steps": 2215, + "num_input_tokens_seen": 0, + "num_train_epochs": 5, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 5.949279369691136e+17, + "train_batch_size": 256, + "trial_name": null, + "trial_params": null +}