| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.46860356138706655, |
| "eval_steps": 250, |
| "global_step": 1000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.004686035613870665, |
| "grad_norm": 2.907787561416626, |
| "learning_rate": 9.997071227741332e-06, |
| "loss": 3.3815, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.00937207122774133, |
| "grad_norm": 2.2910118103027344, |
| "learning_rate": 9.994142455482663e-06, |
| "loss": 3.3605, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.014058106841611996, |
| "grad_norm": 2.791727066040039, |
| "learning_rate": 9.991213683223994e-06, |
| "loss": 3.3338, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.01874414245548266, |
| "grad_norm": 2.881253242492676, |
| "learning_rate": 9.988284910965324e-06, |
| "loss": 3.3047, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.023430178069353328, |
| "grad_norm": 3.5495920181274414, |
| "learning_rate": 9.985356138706655e-06, |
| "loss": 3.266, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.028116213683223992, |
| "grad_norm": 3.8195812702178955, |
| "learning_rate": 9.982427366447985e-06, |
| "loss": 3.2116, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.03280224929709466, |
| "grad_norm": 5.006792068481445, |
| "learning_rate": 9.979498594189316e-06, |
| "loss": 3.1271, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.03748828491096532, |
| "grad_norm": 5.206729412078857, |
| "learning_rate": 9.976569821930647e-06, |
| "loss": 3.0472, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.04217432052483599, |
| "grad_norm": 6.317724227905273, |
| "learning_rate": 9.973641049671978e-06, |
| "loss": 2.9458, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.046860356138706656, |
| "grad_norm": 7.30826997756958, |
| "learning_rate": 9.97071227741331e-06, |
| "loss": 2.9002, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.05154639175257732, |
| "grad_norm": 7.05161190032959, |
| "learning_rate": 9.96778350515464e-06, |
| "loss": 2.8379, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.056232427366447985, |
| "grad_norm": 12.389013290405273, |
| "learning_rate": 9.964854732895972e-06, |
| "loss": 2.7637, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.06091846298031865, |
| "grad_norm": 19.661762237548828, |
| "learning_rate": 9.961925960637301e-06, |
| "loss": 2.7413, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.06560449859418932, |
| "grad_norm": 7.9712018966674805, |
| "learning_rate": 9.958997188378632e-06, |
| "loss": 2.6953, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.07029053420805999, |
| "grad_norm": 44.79791259765625, |
| "learning_rate": 9.956068416119962e-06, |
| "loss": 2.6795, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.07497656982193064, |
| "grad_norm": 7.748485565185547, |
| "learning_rate": 9.953139643861293e-06, |
| "loss": 2.6179, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.07966260543580131, |
| "grad_norm": 7.135361194610596, |
| "learning_rate": 9.950210871602624e-06, |
| "loss": 2.5714, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.08434864104967198, |
| "grad_norm": 5.464244365692139, |
| "learning_rate": 9.947282099343956e-06, |
| "loss": 2.4817, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.08903467666354264, |
| "grad_norm": 10.304727554321289, |
| "learning_rate": 9.944353327085287e-06, |
| "loss": 2.3939, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.09372071227741331, |
| "grad_norm": 8.390380859375, |
| "learning_rate": 9.941424554826618e-06, |
| "loss": 2.3162, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.09840674789128398, |
| "grad_norm": 7.206277847290039, |
| "learning_rate": 9.938495782567949e-06, |
| "loss": 2.2413, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.10309278350515463, |
| "grad_norm": 10.72529411315918, |
| "learning_rate": 9.935567010309279e-06, |
| "loss": 2.1816, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.1077788191190253, |
| "grad_norm": 8.411327362060547, |
| "learning_rate": 9.93263823805061e-06, |
| "loss": 2.0204, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.11246485473289597, |
| "grad_norm": 9.118602752685547, |
| "learning_rate": 9.929709465791941e-06, |
| "loss": 1.9329, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.11715089034676664, |
| "grad_norm": 11.883502960205078, |
| "learning_rate": 9.92678069353327e-06, |
| "loss": 1.8041, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.11715089034676664, |
| "eval_loss": 0.20095524191856384, |
| "eval_pearson_cosine": 0.5629603652959432, |
| "eval_pearson_dot": 0.32442021258601983, |
| "eval_pearson_euclidean": 0.5948642130310873, |
| "eval_pearson_manhattan": 0.5931866084570743, |
| "eval_runtime": 46.3498, |
| "eval_samples_per_second": 32.363, |
| "eval_spearman_cosine": 0.5645428688364399, |
| "eval_spearman_dot": 0.3123519595505677, |
| "eval_spearman_euclidean": 0.5966715855304487, |
| "eval_spearman_manhattan": 0.5951499296436052, |
| "eval_steps_per_second": 32.363, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.1218369259606373, |
| "grad_norm": 9.455839157104492, |
| "learning_rate": 9.923851921274602e-06, |
| "loss": 1.7175, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.12652296157450796, |
| "grad_norm": 9.907763481140137, |
| "learning_rate": 9.920923149015933e-06, |
| "loss": 1.5752, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.13120899718837864, |
| "grad_norm": 10.268372535705566, |
| "learning_rate": 9.917994376757264e-06, |
| "loss": 1.5905, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.1358950328022493, |
| "grad_norm": 12.264440536499023, |
| "learning_rate": 9.915065604498595e-06, |
| "loss": 1.4994, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.14058106841611998, |
| "grad_norm": 10.21927547454834, |
| "learning_rate": 9.912136832239926e-06, |
| "loss": 1.4741, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.14526710402999063, |
| "grad_norm": 12.204063415527344, |
| "learning_rate": 9.909208059981256e-06, |
| "loss": 1.3685, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.14995313964386128, |
| "grad_norm": 8.701486587524414, |
| "learning_rate": 9.906279287722587e-06, |
| "loss": 1.3407, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.15463917525773196, |
| "grad_norm": 11.478012084960938, |
| "learning_rate": 9.903350515463918e-06, |
| "loss": 1.3996, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.15932521087160262, |
| "grad_norm": 8.862137794494629, |
| "learning_rate": 9.90042174320525e-06, |
| "loss": 1.2921, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.1640112464854733, |
| "grad_norm": 8.181413650512695, |
| "learning_rate": 9.897492970946579e-06, |
| "loss": 1.2948, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.16869728209934395, |
| "grad_norm": 12.891910552978516, |
| "learning_rate": 9.89456419868791e-06, |
| "loss": 1.2444, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.1733833177132146, |
| "grad_norm": 9.783638000488281, |
| "learning_rate": 9.891635426429241e-06, |
| "loss": 1.1765, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.1780693533270853, |
| "grad_norm": 10.521812438964844, |
| "learning_rate": 9.888706654170573e-06, |
| "loss": 1.2163, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.18275538894095594, |
| "grad_norm": 9.507091522216797, |
| "learning_rate": 9.885777881911904e-06, |
| "loss": 1.1555, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.18744142455482662, |
| "grad_norm": 10.072102546691895, |
| "learning_rate": 9.882849109653235e-06, |
| "loss": 1.1631, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.19212746016869728, |
| "grad_norm": 12.557927131652832, |
| "learning_rate": 9.879920337394564e-06, |
| "loss": 1.1319, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.19681349578256796, |
| "grad_norm": 7.743768692016602, |
| "learning_rate": 9.876991565135896e-06, |
| "loss": 1.2022, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.2014995313964386, |
| "grad_norm": 9.258079528808594, |
| "learning_rate": 9.874062792877227e-06, |
| "loss": 1.1219, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.20618556701030927, |
| "grad_norm": 8.362629890441895, |
| "learning_rate": 9.871134020618558e-06, |
| "loss": 1.1138, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.21087160262417995, |
| "grad_norm": 8.71789264678955, |
| "learning_rate": 9.868205248359888e-06, |
| "loss": 1.0473, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.2155576382380506, |
| "grad_norm": 8.710640907287598, |
| "learning_rate": 9.865276476101219e-06, |
| "loss": 1.0933, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.22024367385192128, |
| "grad_norm": 7.57949686050415, |
| "learning_rate": 9.86234770384255e-06, |
| "loss": 1.0429, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.22492970946579194, |
| "grad_norm": 8.775091171264648, |
| "learning_rate": 9.859418931583881e-06, |
| "loss": 1.0406, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.2296157450796626, |
| "grad_norm": 9.942752838134766, |
| "learning_rate": 9.856490159325212e-06, |
| "loss": 1.0526, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.23430178069353327, |
| "grad_norm": 10.166437149047852, |
| "learning_rate": 9.853561387066542e-06, |
| "loss": 1.0265, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.23430178069353327, |
| "eval_loss": 0.09848710149526596, |
| "eval_pearson_cosine": 0.7114527090607083, |
| "eval_pearson_dot": 0.5814656567702485, |
| "eval_pearson_euclidean": 0.7022168021213133, |
| "eval_pearson_manhattan": 0.7010309676073874, |
| "eval_runtime": 48.356, |
| "eval_samples_per_second": 31.02, |
| "eval_spearman_cosine": 0.7098203386273151, |
| "eval_spearman_dot": 0.5861254786395066, |
| "eval_spearman_euclidean": 0.7102590115372712, |
| "eval_spearman_manhattan": 0.7094011853041999, |
| "eval_steps_per_second": 31.02, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.23898781630740393, |
| "grad_norm": 6.910321235656738, |
| "learning_rate": 9.850632614807873e-06, |
| "loss": 1.0267, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.2436738519212746, |
| "grad_norm": 8.010503768920898, |
| "learning_rate": 9.847703842549204e-06, |
| "loss": 0.97, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.24835988753514526, |
| "grad_norm": 8.340336799621582, |
| "learning_rate": 9.844775070290535e-06, |
| "loss": 0.9773, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.2530459231490159, |
| "grad_norm": 6.75998592376709, |
| "learning_rate": 9.841846298031867e-06, |
| "loss": 0.9694, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.25773195876288657, |
| "grad_norm": 6.592973709106445, |
| "learning_rate": 9.838917525773196e-06, |
| "loss": 0.9101, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.2624179943767573, |
| "grad_norm": 8.13701343536377, |
| "learning_rate": 9.835988753514527e-06, |
| "loss": 0.9693, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.26710402999062793, |
| "grad_norm": 10.256951332092285, |
| "learning_rate": 9.833059981255859e-06, |
| "loss": 0.9405, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.2717900656044986, |
| "grad_norm": 9.521321296691895, |
| "learning_rate": 9.83013120899719e-06, |
| "loss": 0.8731, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.27647610121836924, |
| "grad_norm": 7.164852142333984, |
| "learning_rate": 9.82720243673852e-06, |
| "loss": 0.9387, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.28116213683223995, |
| "grad_norm": 8.326433181762695, |
| "learning_rate": 9.82427366447985e-06, |
| "loss": 0.8388, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.2858481724461106, |
| "grad_norm": 8.819974899291992, |
| "learning_rate": 9.821344892221182e-06, |
| "loss": 0.9034, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.29053420805998126, |
| "grad_norm": 6.0674052238464355, |
| "learning_rate": 9.818416119962513e-06, |
| "loss": 0.8225, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.2952202436738519, |
| "grad_norm": 7.898690223693848, |
| "learning_rate": 9.815487347703844e-06, |
| "loss": 0.8916, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.29990627928772257, |
| "grad_norm": 9.459305763244629, |
| "learning_rate": 9.812558575445175e-06, |
| "loss": 0.8771, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.3045923149015933, |
| "grad_norm": 7.231110095977783, |
| "learning_rate": 9.809629803186505e-06, |
| "loss": 0.8575, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.30927835051546393, |
| "grad_norm": 5.850890159606934, |
| "learning_rate": 9.806701030927836e-06, |
| "loss": 0.8294, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.3139643861293346, |
| "grad_norm": 12.532159805297852, |
| "learning_rate": 9.803772258669167e-06, |
| "loss": 0.8745, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.31865042174320524, |
| "grad_norm": 6.576635837554932, |
| "learning_rate": 9.800843486410497e-06, |
| "loss": 0.8167, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.3233364573570759, |
| "grad_norm": 7.243174076080322, |
| "learning_rate": 9.797914714151828e-06, |
| "loss": 0.8886, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.3280224929709466, |
| "grad_norm": 6.775111675262451, |
| "learning_rate": 9.794985941893159e-06, |
| "loss": 0.8205, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.33270852858481725, |
| "grad_norm": 7.494016647338867, |
| "learning_rate": 9.79205716963449e-06, |
| "loss": 0.7778, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.3373945641986879, |
| "grad_norm": 5.593213081359863, |
| "learning_rate": 9.789128397375821e-06, |
| "loss": 0.7875, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.34208059981255856, |
| "grad_norm": 7.325387001037598, |
| "learning_rate": 9.786199625117153e-06, |
| "loss": 0.7839, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.3467666354264292, |
| "grad_norm": 5.411241054534912, |
| "learning_rate": 9.783270852858484e-06, |
| "loss": 0.8363, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.3514526710402999, |
| "grad_norm": 5.667125225067139, |
| "learning_rate": 9.780342080599813e-06, |
| "loss": 0.7904, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.3514526710402999, |
| "eval_loss": 0.07609602808952332, |
| "eval_pearson_cosine": 0.7390127527190131, |
| "eval_pearson_dot": 0.6193519334256266, |
| "eval_pearson_euclidean": 0.7286540107637123, |
| "eval_pearson_manhattan": 0.7280163166143723, |
| "eval_runtime": 48.6286, |
| "eval_samples_per_second": 30.846, |
| "eval_spearman_cosine": 0.7392385981828663, |
| "eval_spearman_dot": 0.6275059521836013, |
| "eval_spearman_euclidean": 0.7379755721813188, |
| "eval_spearman_manhattan": 0.7372480627669395, |
| "eval_steps_per_second": 30.846, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.3561387066541706, |
| "grad_norm": 5.931227207183838, |
| "learning_rate": 9.777413308341144e-06, |
| "loss": 0.7801, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.36082474226804123, |
| "grad_norm": 5.550874710083008, |
| "learning_rate": 9.774484536082474e-06, |
| "loss": 0.7466, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.3655107778819119, |
| "grad_norm": 5.67214298248291, |
| "learning_rate": 9.771555763823805e-06, |
| "loss": 0.7561, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.3701968134957826, |
| "grad_norm": 5.121714115142822, |
| "learning_rate": 9.768626991565136e-06, |
| "loss": 0.7395, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.37488284910965325, |
| "grad_norm": 4.957924842834473, |
| "learning_rate": 9.765698219306467e-06, |
| "loss": 0.7368, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.3795688847235239, |
| "grad_norm": 6.30219030380249, |
| "learning_rate": 9.762769447047799e-06, |
| "loss": 0.8091, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.38425492033739456, |
| "grad_norm": 6.518470287322998, |
| "learning_rate": 9.75984067478913e-06, |
| "loss": 0.7525, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.3889409559512652, |
| "grad_norm": 6.101437568664551, |
| "learning_rate": 9.756911902530461e-06, |
| "loss": 0.7263, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.3936269915651359, |
| "grad_norm": 5.428840160369873, |
| "learning_rate": 9.75398313027179e-06, |
| "loss": 0.7881, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.3983130271790066, |
| "grad_norm": 7.170475482940674, |
| "learning_rate": 9.751054358013122e-06, |
| "loss": 0.7218, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.4029990627928772, |
| "grad_norm": 6.153990745544434, |
| "learning_rate": 9.748125585754453e-06, |
| "loss": 0.748, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.4076850984067479, |
| "grad_norm": 5.364086151123047, |
| "learning_rate": 9.745196813495782e-06, |
| "loss": 0.786, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.41237113402061853, |
| "grad_norm": 5.541423797607422, |
| "learning_rate": 9.742268041237114e-06, |
| "loss": 0.7427, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.41705716963448924, |
| "grad_norm": 5.1667022705078125, |
| "learning_rate": 9.739339268978445e-06, |
| "loss": 0.6918, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.4217432052483599, |
| "grad_norm": 4.839612007141113, |
| "learning_rate": 9.736410496719776e-06, |
| "loss": 0.7056, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.42642924086223055, |
| "grad_norm": 4.407963275909424, |
| "learning_rate": 9.733481724461107e-06, |
| "loss": 0.6313, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.4311152764761012, |
| "grad_norm": 7.052595138549805, |
| "learning_rate": 9.730552952202438e-06, |
| "loss": 0.7489, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.43580131208997186, |
| "grad_norm": 5.71290397644043, |
| "learning_rate": 9.727624179943768e-06, |
| "loss": 0.6578, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.44048734770384257, |
| "grad_norm": 6.3575825691223145, |
| "learning_rate": 9.724695407685099e-06, |
| "loss": 0.6914, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.4451733833177132, |
| "grad_norm": 5.223476886749268, |
| "learning_rate": 9.72176663542643e-06, |
| "loss": 0.6494, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.4498594189315839, |
| "grad_norm": 6.220378398895264, |
| "learning_rate": 9.71883786316776e-06, |
| "loss": 0.6996, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.45454545454545453, |
| "grad_norm": 6.475409507751465, |
| "learning_rate": 9.715909090909091e-06, |
| "loss": 0.721, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.4592314901593252, |
| "grad_norm": 5.10095739364624, |
| "learning_rate": 9.712980318650422e-06, |
| "loss": 0.6734, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.4639175257731959, |
| "grad_norm": 7.8438801765441895, |
| "learning_rate": 9.710051546391753e-06, |
| "loss": 0.7409, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.46860356138706655, |
| "grad_norm": 5.446135997772217, |
| "learning_rate": 9.707122774133085e-06, |
| "loss": 0.6772, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.46860356138706655, |
| "eval_loss": 0.06938865035772324, |
| "eval_pearson_cosine": 0.7523242546763527, |
| "eval_pearson_dot": 0.6339033623348058, |
| "eval_pearson_euclidean": 0.7449881727323344, |
| "eval_pearson_manhattan": 0.7443626147120028, |
| "eval_runtime": 47.885, |
| "eval_samples_per_second": 31.325, |
| "eval_spearman_cosine": 0.7542578168613095, |
| "eval_spearman_dot": 0.6408093688850417, |
| "eval_spearman_euclidean": 0.7532432307302356, |
| "eval_spearman_manhattan": 0.7526380381288565, |
| "eval_steps_per_second": 31.325, |
| "step": 1000 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 4268, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 1000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|