{ "best_global_step": 4324, "best_metric": 8.351966857910156, "best_model_checkpoint": "printing_press/author-paraphrase/models/intfloat/multilingual-e5-base/checkpoint-4324", "epoch": 3.0, "eval_steps": 500, "global_step": 12972, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02312673450508788, "grad_norm": 10.87702751159668, "learning_rate": 4.977104532839963e-05, "loss": 2.4824, "step": 100 }, { "epoch": 0.04625346901017576, "grad_norm": 11.214370727539062, "learning_rate": 4.953977798334876e-05, "loss": 2.2607, "step": 200 }, { "epoch": 0.06938020351526364, "grad_norm": 9.89275074005127, "learning_rate": 4.930851063829787e-05, "loss": 2.1716, "step": 300 }, { "epoch": 0.09250693802035152, "grad_norm": 8.298206329345703, "learning_rate": 4.9077243293247e-05, "loss": 2.0981, "step": 400 }, { "epoch": 0.11563367252543941, "grad_norm": 9.424484252929688, "learning_rate": 4.8845975948196116e-05, "loss": 1.9617, "step": 500 }, { "epoch": 0.13876040703052728, "grad_norm": 8.752665519714355, "learning_rate": 4.8614708603145235e-05, "loss": 1.987, "step": 600 }, { "epoch": 0.16188714153561518, "grad_norm": 9.41073989868164, "learning_rate": 4.838344125809436e-05, "loss": 1.9429, "step": 700 }, { "epoch": 0.18501387604070305, "grad_norm": 11.025175094604492, "learning_rate": 4.815217391304348e-05, "loss": 1.9398, "step": 800 }, { "epoch": 0.20814061054579094, "grad_norm": 9.1474027633667, "learning_rate": 4.79209065679926e-05, "loss": 1.8745, "step": 900 }, { "epoch": 0.23126734505087881, "grad_norm": 8.528326034545898, "learning_rate": 4.7689639222941726e-05, "loss": 1.8484, "step": 1000 }, { "epoch": 0.2543940795559667, "grad_norm": 9.510236740112305, "learning_rate": 4.7458371877890846e-05, "loss": 1.846, "step": 1100 }, { "epoch": 0.27752081406105455, "grad_norm": 10.6268892288208, "learning_rate": 4.7227104532839965e-05, "loss": 1.7953, "step": 1200 }, { "epoch": 0.30064754856614245, "grad_norm": 9.342752456665039, "learning_rate": 4.699583718778909e-05, "loss": 1.8189, "step": 1300 }, { "epoch": 0.32377428307123035, "grad_norm": 8.43892765045166, "learning_rate": 4.676456984273821e-05, "loss": 1.7775, "step": 1400 }, { "epoch": 0.34690101757631825, "grad_norm": 12.730334281921387, "learning_rate": 4.653330249768733e-05, "loss": 1.76, "step": 1500 }, { "epoch": 0.3700277520814061, "grad_norm": 27.328882217407227, "learning_rate": 4.6302035152636456e-05, "loss": 1.7638, "step": 1600 }, { "epoch": 0.393154486586494, "grad_norm": 9.745387077331543, "learning_rate": 4.607076780758557e-05, "loss": 1.7039, "step": 1700 }, { "epoch": 0.4162812210915819, "grad_norm": 9.242189407348633, "learning_rate": 4.583950046253469e-05, "loss": 1.706, "step": 1800 }, { "epoch": 0.43940795559666973, "grad_norm": 9.079887390136719, "learning_rate": 4.5608233117483814e-05, "loss": 1.7255, "step": 1900 }, { "epoch": 0.46253469010175763, "grad_norm": 8.81753158569336, "learning_rate": 4.537696577243293e-05, "loss": 1.705, "step": 2000 }, { "epoch": 0.4856614246068455, "grad_norm": 10.990808486938477, "learning_rate": 4.514569842738205e-05, "loss": 1.6823, "step": 2100 }, { "epoch": 0.5087881591119334, "grad_norm": 9.342223167419434, "learning_rate": 4.491443108233118e-05, "loss": 1.6921, "step": 2200 }, { "epoch": 0.5319148936170213, "grad_norm": 9.898025512695312, "learning_rate": 4.46831637372803e-05, "loss": 1.6801, "step": 2300 }, { "epoch": 0.5550416281221091, "grad_norm": 8.3154296875, "learning_rate": 4.445189639222942e-05, "loss": 1.6547, "step": 2400 }, { "epoch": 0.5781683626271971, "grad_norm": 8.5817232131958, "learning_rate": 4.422062904717854e-05, "loss": 1.6512, "step": 2500 }, { "epoch": 0.6012950971322849, "grad_norm": 8.931965827941895, "learning_rate": 4.398936170212766e-05, "loss": 1.6466, "step": 2600 }, { "epoch": 0.6244218316373727, "grad_norm": 10.271759033203125, "learning_rate": 4.375809435707678e-05, "loss": 1.6545, "step": 2700 }, { "epoch": 0.6475485661424607, "grad_norm": 8.621456146240234, "learning_rate": 4.352682701202591e-05, "loss": 1.5985, "step": 2800 }, { "epoch": 0.6706753006475485, "grad_norm": 9.487942695617676, "learning_rate": 4.329555966697503e-05, "loss": 1.5941, "step": 2900 }, { "epoch": 0.6938020351526365, "grad_norm": 9.156566619873047, "learning_rate": 4.306429232192415e-05, "loss": 1.6178, "step": 3000 }, { "epoch": 0.7169287696577243, "grad_norm": 11.126986503601074, "learning_rate": 4.2833024976873266e-05, "loss": 1.6035, "step": 3100 }, { "epoch": 0.7400555041628122, "grad_norm": 10.304364204406738, "learning_rate": 4.2601757631822385e-05, "loss": 1.568, "step": 3200 }, { "epoch": 0.7631822386679001, "grad_norm": 9.863882064819336, "learning_rate": 4.2370490286771505e-05, "loss": 1.5733, "step": 3300 }, { "epoch": 0.786308973172988, "grad_norm": 10.20065689086914, "learning_rate": 4.213922294172063e-05, "loss": 1.5841, "step": 3400 }, { "epoch": 0.8094357076780758, "grad_norm": 9.240017890930176, "learning_rate": 4.190795559666975e-05, "loss": 1.5869, "step": 3500 }, { "epoch": 0.8325624421831638, "grad_norm": 9.452214241027832, "learning_rate": 4.167668825161887e-05, "loss": 1.5806, "step": 3600 }, { "epoch": 0.8556891766882516, "grad_norm": 10.292157173156738, "learning_rate": 4.1445420906567996e-05, "loss": 1.5682, "step": 3700 }, { "epoch": 0.8788159111933395, "grad_norm": 12.675426483154297, "learning_rate": 4.1214153561517115e-05, "loss": 1.553, "step": 3800 }, { "epoch": 0.9019426456984274, "grad_norm": 11.213452339172363, "learning_rate": 4.0982886216466234e-05, "loss": 1.5564, "step": 3900 }, { "epoch": 0.9250693802035153, "grad_norm": 11.688211441040039, "learning_rate": 4.075161887141536e-05, "loss": 1.5389, "step": 4000 }, { "epoch": 0.9481961147086031, "grad_norm": 19.469762802124023, "learning_rate": 4.052035152636448e-05, "loss": 1.5091, "step": 4100 }, { "epoch": 0.971322849213691, "grad_norm": 11.896566390991211, "learning_rate": 4.02890841813136e-05, "loss": 1.5358, "step": 4200 }, { "epoch": 0.9944495837187789, "grad_norm": 26.442358016967773, "learning_rate": 4.0057816836262725e-05, "loss": 1.512, "step": 4300 }, { "epoch": 1.0, "eval_cosine_accuracy": 0.9215032424577613, "eval_cosine_accuracy_threshold": 0.8687731027603149, "eval_cosine_ap": 0.844612185583288, "eval_cosine_f1": 0.7527066450567261, "eval_cosine_f1_threshold": 0.8616929054260254, "eval_cosine_mcc": 0.7030390290848069, "eval_cosine_precision": 0.7499838511724048, "eval_cosine_recall": 0.7554492810202356, "eval_loss": 8.351966857910156, "eval_runtime": 1467.219, "eval_samples_per_second": 125.699, "eval_steps_per_second": 1.964, "step": 4324 }, { "epoch": 1.0175763182238668, "grad_norm": 10.624114990234375, "learning_rate": 3.9826549491211844e-05, "loss": 1.4186, "step": 4400 }, { "epoch": 1.0407030527289547, "grad_norm": 10.404448509216309, "learning_rate": 3.9595282146160964e-05, "loss": 1.4075, "step": 4500 }, { "epoch": 1.0638297872340425, "grad_norm": 9.519079208374023, "learning_rate": 3.936401480111008e-05, "loss": 1.3934, "step": 4600 }, { "epoch": 1.0869565217391304, "grad_norm": 9.599648475646973, "learning_rate": 3.91327474560592e-05, "loss": 1.3799, "step": 4700 }, { "epoch": 1.1100832562442182, "grad_norm": 14.585043907165527, "learning_rate": 3.890148011100833e-05, "loss": 1.3597, "step": 4800 }, { "epoch": 1.1332099907493063, "grad_norm": 11.30241584777832, "learning_rate": 3.867021276595745e-05, "loss": 1.3351, "step": 4900 }, { "epoch": 1.1563367252543941, "grad_norm": 10.888731002807617, "learning_rate": 3.843894542090657e-05, "loss": 1.3082, "step": 5000 }, { "epoch": 1.179463459759482, "grad_norm": 9.09694766998291, "learning_rate": 3.820767807585569e-05, "loss": 1.3105, "step": 5100 }, { "epoch": 1.2025901942645698, "grad_norm": 13.162363052368164, "learning_rate": 3.797641073080481e-05, "loss": 1.2948, "step": 5200 }, { "epoch": 1.2257169287696577, "grad_norm": 26.262020111083984, "learning_rate": 3.774514338575393e-05, "loss": 1.3486, "step": 5300 }, { "epoch": 1.2488436632747457, "grad_norm": 21.114444732666016, "learning_rate": 3.751387604070306e-05, "loss": 1.3155, "step": 5400 }, { "epoch": 1.2719703977798336, "grad_norm": 10.668533325195312, "learning_rate": 3.728260869565218e-05, "loss": 1.2761, "step": 5500 }, { "epoch": 1.2950971322849214, "grad_norm": 12.978555679321289, "learning_rate": 3.70513413506013e-05, "loss": 1.2541, "step": 5600 }, { "epoch": 1.3182238667900092, "grad_norm": 13.066253662109375, "learning_rate": 3.682007400555042e-05, "loss": 1.2346, "step": 5700 }, { "epoch": 1.341350601295097, "grad_norm": 11.974534034729004, "learning_rate": 3.658880666049954e-05, "loss": 1.2285, "step": 5800 }, { "epoch": 1.364477335800185, "grad_norm": 12.733871459960938, "learning_rate": 3.6357539315448655e-05, "loss": 1.2013, "step": 5900 }, { "epoch": 1.3876040703052728, "grad_norm": 13.184294700622559, "learning_rate": 3.612627197039778e-05, "loss": 1.1986, "step": 6000 }, { "epoch": 1.4107308048103608, "grad_norm": 10.159133911132812, "learning_rate": 3.58950046253469e-05, "loss": 1.1755, "step": 6100 }, { "epoch": 1.4338575393154487, "grad_norm": 12.212769508361816, "learning_rate": 3.566373728029602e-05, "loss": 1.1937, "step": 6200 }, { "epoch": 1.4569842738205365, "grad_norm": 15.231562614440918, "learning_rate": 3.5432469935245146e-05, "loss": 1.202, "step": 6300 }, { "epoch": 1.4801110083256244, "grad_norm": 14.845349311828613, "learning_rate": 3.5201202590194265e-05, "loss": 1.1607, "step": 6400 }, { "epoch": 1.5032377428307124, "grad_norm": 12.119277954101562, "learning_rate": 3.4969935245143384e-05, "loss": 1.2116, "step": 6500 }, { "epoch": 1.5263644773358003, "grad_norm": 12.19071102142334, "learning_rate": 3.473866790009251e-05, "loss": 1.1797, "step": 6600 }, { "epoch": 1.5494912118408881, "grad_norm": 12.373006820678711, "learning_rate": 3.450740055504163e-05, "loss": 1.1571, "step": 6700 }, { "epoch": 1.572617946345976, "grad_norm": 14.413765907287598, "learning_rate": 3.427613320999075e-05, "loss": 1.1526, "step": 6800 }, { "epoch": 1.5957446808510638, "grad_norm": 12.39973258972168, "learning_rate": 3.4044865864939875e-05, "loss": 1.1438, "step": 6900 }, { "epoch": 1.6188714153561516, "grad_norm": 15.030281066894531, "learning_rate": 3.3813598519888994e-05, "loss": 1.1634, "step": 7000 }, { "epoch": 1.6419981498612395, "grad_norm": 15.445487022399902, "learning_rate": 3.3582331174838114e-05, "loss": 1.1367, "step": 7100 }, { "epoch": 1.6651248843663273, "grad_norm": 15.726140022277832, "learning_rate": 3.335106382978724e-05, "loss": 1.1133, "step": 7200 }, { "epoch": 1.6882516188714154, "grad_norm": 12.940756797790527, "learning_rate": 3.311979648473636e-05, "loss": 1.1156, "step": 7300 }, { "epoch": 1.7113783533765032, "grad_norm": 14.717055320739746, "learning_rate": 3.288852913968548e-05, "loss": 1.1102, "step": 7400 }, { "epoch": 1.734505087881591, "grad_norm": 12.775771141052246, "learning_rate": 3.26572617946346e-05, "loss": 1.1123, "step": 7500 }, { "epoch": 1.7576318223866791, "grad_norm": 12.529901504516602, "learning_rate": 3.242599444958372e-05, "loss": 1.1066, "step": 7600 }, { "epoch": 1.780758556891767, "grad_norm": 12.506126403808594, "learning_rate": 3.219472710453284e-05, "loss": 1.1291, "step": 7700 }, { "epoch": 1.8038852913968548, "grad_norm": 16.9326114654541, "learning_rate": 3.196345975948196e-05, "loss": 1.1094, "step": 7800 }, { "epoch": 1.8270120259019427, "grad_norm": 12.257229804992676, "learning_rate": 3.173219241443108e-05, "loss": 1.094, "step": 7900 }, { "epoch": 1.8501387604070305, "grad_norm": 15.346363067626953, "learning_rate": 3.150092506938021e-05, "loss": 1.1585, "step": 8000 }, { "epoch": 1.8732654949121184, "grad_norm": 12.800529479980469, "learning_rate": 3.126965772432933e-05, "loss": 1.077, "step": 8100 }, { "epoch": 1.8963922294172062, "grad_norm": 14.989642143249512, "learning_rate": 3.103839037927845e-05, "loss": 1.108, "step": 8200 }, { "epoch": 1.919518963922294, "grad_norm": 14.930624008178711, "learning_rate": 3.080712303422757e-05, "loss": 1.1431, "step": 8300 }, { "epoch": 1.942645698427382, "grad_norm": 13.488320350646973, "learning_rate": 3.057585568917669e-05, "loss": 1.0784, "step": 8400 }, { "epoch": 1.96577243293247, "grad_norm": 12.30305004119873, "learning_rate": 3.034458834412581e-05, "loss": 1.0834, "step": 8500 }, { "epoch": 1.9888991674375578, "grad_norm": 13.152817726135254, "learning_rate": 3.0113320999074934e-05, "loss": 1.1268, "step": 8600 }, { "epoch": 2.0, "eval_cosine_accuracy": 0.9209068037391286, "eval_cosine_accuracy_threshold": 0.8271753191947937, "eval_cosine_ap": 0.8450278341834471, "eval_cosine_f1": 0.7521474761123443, "eval_cosine_f1_threshold": 0.8189181089401245, "eval_cosine_mcc": 0.701977809831327, "eval_cosine_precision": 0.7438907980145093, "eval_cosine_recall": 0.7605894983408159, "eval_loss": 9.69921588897705, "eval_runtime": 1472.4233, "eval_samples_per_second": 125.255, "eval_steps_per_second": 1.957, "step": 8648 }, { "epoch": 2.012025901942646, "grad_norm": 15.71314811706543, "learning_rate": 2.9882053654024057e-05, "loss": 1.0443, "step": 8700 }, { "epoch": 2.0351526364477337, "grad_norm": 14.261523246765137, "learning_rate": 2.9650786308973173e-05, "loss": 0.9715, "step": 8800 }, { "epoch": 2.0582793709528215, "grad_norm": 13.405384063720703, "learning_rate": 2.9419518963922292e-05, "loss": 0.957, "step": 8900 }, { "epoch": 2.0814061054579094, "grad_norm": 13.5853853225708, "learning_rate": 2.9188251618871415e-05, "loss": 0.9784, "step": 9000 }, { "epoch": 2.1045328399629972, "grad_norm": 14.918572425842285, "learning_rate": 2.8956984273820538e-05, "loss": 0.9581, "step": 9100 }, { "epoch": 2.127659574468085, "grad_norm": 13.354079246520996, "learning_rate": 2.8725716928769657e-05, "loss": 0.9569, "step": 9200 }, { "epoch": 2.150786308973173, "grad_norm": 15.024975776672363, "learning_rate": 2.849444958371878e-05, "loss": 0.9518, "step": 9300 }, { "epoch": 2.1739130434782608, "grad_norm": 13.5723876953125, "learning_rate": 2.8263182238667902e-05, "loss": 0.9485, "step": 9400 }, { "epoch": 2.1970397779833486, "grad_norm": 12.383338928222656, "learning_rate": 2.8031914893617022e-05, "loss": 0.9433, "step": 9500 }, { "epoch": 2.2201665124884364, "grad_norm": 13.54079532623291, "learning_rate": 2.7800647548566144e-05, "loss": 0.9392, "step": 9600 }, { "epoch": 2.2432932469935247, "grad_norm": 15.325400352478027, "learning_rate": 2.7569380203515267e-05, "loss": 0.9248, "step": 9700 }, { "epoch": 2.2664199814986126, "grad_norm": 15.651702880859375, "learning_rate": 2.7338112858464387e-05, "loss": 0.9105, "step": 9800 }, { "epoch": 2.2895467160037004, "grad_norm": 15.196932792663574, "learning_rate": 2.710684551341351e-05, "loss": 0.9769, "step": 9900 }, { "epoch": 2.3126734505087883, "grad_norm": 13.820411682128906, "learning_rate": 2.6875578168362632e-05, "loss": 0.9502, "step": 10000 }, { "epoch": 2.335800185013876, "grad_norm": 15.181200981140137, "learning_rate": 2.664431082331175e-05, "loss": 0.9604, "step": 10100 }, { "epoch": 2.358926919518964, "grad_norm": 17.202287673950195, "learning_rate": 2.6413043478260867e-05, "loss": 0.9291, "step": 10200 }, { "epoch": 2.382053654024052, "grad_norm": 16.69881248474121, "learning_rate": 2.618177613320999e-05, "loss": 0.9552, "step": 10300 }, { "epoch": 2.4051803885291396, "grad_norm": 16.424978256225586, "learning_rate": 2.5950508788159113e-05, "loss": 0.9621, "step": 10400 }, { "epoch": 2.4283071230342275, "grad_norm": 17.192264556884766, "learning_rate": 2.5719241443108232e-05, "loss": 0.9357, "step": 10500 }, { "epoch": 2.4514338575393153, "grad_norm": 16.9178524017334, "learning_rate": 2.5487974098057355e-05, "loss": 0.9323, "step": 10600 }, { "epoch": 2.474560592044403, "grad_norm": 11.07499885559082, "learning_rate": 2.5256706753006477e-05, "loss": 0.9327, "step": 10700 }, { "epoch": 2.4976873265494914, "grad_norm": 28.121694564819336, "learning_rate": 2.5025439407955597e-05, "loss": 0.9067, "step": 10800 }, { "epoch": 2.520814061054579, "grad_norm": 15.262548446655273, "learning_rate": 2.479417206290472e-05, "loss": 0.9411, "step": 10900 }, { "epoch": 2.543940795559667, "grad_norm": 16.59368133544922, "learning_rate": 2.4562904717853842e-05, "loss": 0.9305, "step": 11000 }, { "epoch": 2.567067530064755, "grad_norm": 12.39183235168457, "learning_rate": 2.433163737280296e-05, "loss": 0.9378, "step": 11100 }, { "epoch": 2.590194264569843, "grad_norm": 19.034120559692383, "learning_rate": 2.4100370027752084e-05, "loss": 0.9171, "step": 11200 }, { "epoch": 2.6133209990749307, "grad_norm": 16.727380752563477, "learning_rate": 2.3869102682701204e-05, "loss": 0.9074, "step": 11300 }, { "epoch": 2.6364477335800185, "grad_norm": 17.257230758666992, "learning_rate": 2.3637835337650323e-05, "loss": 0.9262, "step": 11400 }, { "epoch": 2.6595744680851063, "grad_norm": 18.956735610961914, "learning_rate": 2.3406567992599446e-05, "loss": 0.9063, "step": 11500 }, { "epoch": 2.682701202590194, "grad_norm": 15.463052749633789, "learning_rate": 2.317530064754857e-05, "loss": 0.8814, "step": 11600 }, { "epoch": 2.705827937095282, "grad_norm": 13.263724327087402, "learning_rate": 2.2944033302497688e-05, "loss": 0.9089, "step": 11700 }, { "epoch": 2.72895467160037, "grad_norm": 15.596953392028809, "learning_rate": 2.271276595744681e-05, "loss": 0.9048, "step": 11800 }, { "epoch": 2.752081406105458, "grad_norm": 16.61760711669922, "learning_rate": 2.2481498612395933e-05, "loss": 0.9268, "step": 11900 }, { "epoch": 2.7752081406105455, "grad_norm": 16.409652709960938, "learning_rate": 2.2250231267345052e-05, "loss": 0.8913, "step": 12000 }, { "epoch": 2.798334875115634, "grad_norm": 14.509052276611328, "learning_rate": 2.2018963922294172e-05, "loss": 0.9064, "step": 12100 }, { "epoch": 2.8214616096207217, "grad_norm": 13.914097785949707, "learning_rate": 2.1787696577243295e-05, "loss": 0.8585, "step": 12200 }, { "epoch": 2.8445883441258095, "grad_norm": 13.852704048156738, "learning_rate": 2.1556429232192414e-05, "loss": 0.878, "step": 12300 }, { "epoch": 2.8677150786308974, "grad_norm": 15.329246520996094, "learning_rate": 2.1325161887141537e-05, "loss": 0.8612, "step": 12400 }, { "epoch": 2.890841813135985, "grad_norm": 15.046784400939941, "learning_rate": 2.109389454209066e-05, "loss": 0.8799, "step": 12500 }, { "epoch": 2.913968547641073, "grad_norm": 15.824640274047852, "learning_rate": 2.086262719703978e-05, "loss": 0.8541, "step": 12600 }, { "epoch": 2.937095282146161, "grad_norm": 15.991775512695312, "learning_rate": 2.0631359851988898e-05, "loss": 0.8521, "step": 12700 }, { "epoch": 2.9602220166512487, "grad_norm": 16.13051986694336, "learning_rate": 2.040009250693802e-05, "loss": 0.8582, "step": 12800 }, { "epoch": 2.9833487511563366, "grad_norm": 14.818473815917969, "learning_rate": 2.0168825161887143e-05, "loss": 0.869, "step": 12900 }, { "epoch": 3.0, "eval_cosine_accuracy": 0.9225280326197758, "eval_cosine_accuracy_threshold": 0.7901061773300171, "eval_cosine_ap": 0.8478615501518483, "eval_cosine_f1": 0.7559554803436604, "eval_cosine_f1_threshold": 0.7817596793174744, "eval_cosine_mcc": 0.7071656901034916, "eval_cosine_precision": 0.756201575623413, "eval_cosine_recall": 0.7557095451883662, "eval_loss": 10.411548614501953, "eval_runtime": 1486.9605, "eval_samples_per_second": 124.03, "eval_steps_per_second": 1.938, "step": 12972 } ], "logging_steps": 100, "max_steps": 21620, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 64, "trial_name": null, "trial_params": null }