| { | |
| "best_global_step": 4324, | |
| "best_metric": 8.351966857910156, | |
| "best_model_checkpoint": "printing_press/author-paraphrase/models/intfloat/multilingual-e5-base/checkpoint-4324", | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 12972, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.02312673450508788, | |
| "grad_norm": 10.87702751159668, | |
| "learning_rate": 4.977104532839963e-05, | |
| "loss": 2.4824, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.04625346901017576, | |
| "grad_norm": 11.214370727539062, | |
| "learning_rate": 4.953977798334876e-05, | |
| "loss": 2.2607, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.06938020351526364, | |
| "grad_norm": 9.89275074005127, | |
| "learning_rate": 4.930851063829787e-05, | |
| "loss": 2.1716, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.09250693802035152, | |
| "grad_norm": 8.298206329345703, | |
| "learning_rate": 4.9077243293247e-05, | |
| "loss": 2.0981, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.11563367252543941, | |
| "grad_norm": 9.424484252929688, | |
| "learning_rate": 4.8845975948196116e-05, | |
| "loss": 1.9617, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.13876040703052728, | |
| "grad_norm": 8.752665519714355, | |
| "learning_rate": 4.8614708603145235e-05, | |
| "loss": 1.987, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.16188714153561518, | |
| "grad_norm": 9.41073989868164, | |
| "learning_rate": 4.838344125809436e-05, | |
| "loss": 1.9429, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.18501387604070305, | |
| "grad_norm": 11.025175094604492, | |
| "learning_rate": 4.815217391304348e-05, | |
| "loss": 1.9398, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.20814061054579094, | |
| "grad_norm": 9.1474027633667, | |
| "learning_rate": 4.79209065679926e-05, | |
| "loss": 1.8745, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.23126734505087881, | |
| "grad_norm": 8.528326034545898, | |
| "learning_rate": 4.7689639222941726e-05, | |
| "loss": 1.8484, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.2543940795559667, | |
| "grad_norm": 9.510236740112305, | |
| "learning_rate": 4.7458371877890846e-05, | |
| "loss": 1.846, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.27752081406105455, | |
| "grad_norm": 10.6268892288208, | |
| "learning_rate": 4.7227104532839965e-05, | |
| "loss": 1.7953, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.30064754856614245, | |
| "grad_norm": 9.342752456665039, | |
| "learning_rate": 4.699583718778909e-05, | |
| "loss": 1.8189, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.32377428307123035, | |
| "grad_norm": 8.43892765045166, | |
| "learning_rate": 4.676456984273821e-05, | |
| "loss": 1.7775, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.34690101757631825, | |
| "grad_norm": 12.730334281921387, | |
| "learning_rate": 4.653330249768733e-05, | |
| "loss": 1.76, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.3700277520814061, | |
| "grad_norm": 27.328882217407227, | |
| "learning_rate": 4.6302035152636456e-05, | |
| "loss": 1.7638, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.393154486586494, | |
| "grad_norm": 9.745387077331543, | |
| "learning_rate": 4.607076780758557e-05, | |
| "loss": 1.7039, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.4162812210915819, | |
| "grad_norm": 9.242189407348633, | |
| "learning_rate": 4.583950046253469e-05, | |
| "loss": 1.706, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.43940795559666973, | |
| "grad_norm": 9.079887390136719, | |
| "learning_rate": 4.5608233117483814e-05, | |
| "loss": 1.7255, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.46253469010175763, | |
| "grad_norm": 8.81753158569336, | |
| "learning_rate": 4.537696577243293e-05, | |
| "loss": 1.705, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.4856614246068455, | |
| "grad_norm": 10.990808486938477, | |
| "learning_rate": 4.514569842738205e-05, | |
| "loss": 1.6823, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.5087881591119334, | |
| "grad_norm": 9.342223167419434, | |
| "learning_rate": 4.491443108233118e-05, | |
| "loss": 1.6921, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.5319148936170213, | |
| "grad_norm": 9.898025512695312, | |
| "learning_rate": 4.46831637372803e-05, | |
| "loss": 1.6801, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.5550416281221091, | |
| "grad_norm": 8.3154296875, | |
| "learning_rate": 4.445189639222942e-05, | |
| "loss": 1.6547, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.5781683626271971, | |
| "grad_norm": 8.5817232131958, | |
| "learning_rate": 4.422062904717854e-05, | |
| "loss": 1.6512, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.6012950971322849, | |
| "grad_norm": 8.931965827941895, | |
| "learning_rate": 4.398936170212766e-05, | |
| "loss": 1.6466, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.6244218316373727, | |
| "grad_norm": 10.271759033203125, | |
| "learning_rate": 4.375809435707678e-05, | |
| "loss": 1.6545, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.6475485661424607, | |
| "grad_norm": 8.621456146240234, | |
| "learning_rate": 4.352682701202591e-05, | |
| "loss": 1.5985, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.6706753006475485, | |
| "grad_norm": 9.487942695617676, | |
| "learning_rate": 4.329555966697503e-05, | |
| "loss": 1.5941, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.6938020351526365, | |
| "grad_norm": 9.156566619873047, | |
| "learning_rate": 4.306429232192415e-05, | |
| "loss": 1.6178, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.7169287696577243, | |
| "grad_norm": 11.126986503601074, | |
| "learning_rate": 4.2833024976873266e-05, | |
| "loss": 1.6035, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.7400555041628122, | |
| "grad_norm": 10.304364204406738, | |
| "learning_rate": 4.2601757631822385e-05, | |
| "loss": 1.568, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.7631822386679001, | |
| "grad_norm": 9.863882064819336, | |
| "learning_rate": 4.2370490286771505e-05, | |
| "loss": 1.5733, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.786308973172988, | |
| "grad_norm": 10.20065689086914, | |
| "learning_rate": 4.213922294172063e-05, | |
| "loss": 1.5841, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.8094357076780758, | |
| "grad_norm": 9.240017890930176, | |
| "learning_rate": 4.190795559666975e-05, | |
| "loss": 1.5869, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.8325624421831638, | |
| "grad_norm": 9.452214241027832, | |
| "learning_rate": 4.167668825161887e-05, | |
| "loss": 1.5806, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.8556891766882516, | |
| "grad_norm": 10.292157173156738, | |
| "learning_rate": 4.1445420906567996e-05, | |
| "loss": 1.5682, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.8788159111933395, | |
| "grad_norm": 12.675426483154297, | |
| "learning_rate": 4.1214153561517115e-05, | |
| "loss": 1.553, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.9019426456984274, | |
| "grad_norm": 11.213452339172363, | |
| "learning_rate": 4.0982886216466234e-05, | |
| "loss": 1.5564, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.9250693802035153, | |
| "grad_norm": 11.688211441040039, | |
| "learning_rate": 4.075161887141536e-05, | |
| "loss": 1.5389, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.9481961147086031, | |
| "grad_norm": 19.469762802124023, | |
| "learning_rate": 4.052035152636448e-05, | |
| "loss": 1.5091, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.971322849213691, | |
| "grad_norm": 11.896566390991211, | |
| "learning_rate": 4.02890841813136e-05, | |
| "loss": 1.5358, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.9944495837187789, | |
| "grad_norm": 26.442358016967773, | |
| "learning_rate": 4.0057816836262725e-05, | |
| "loss": 1.512, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_cosine_accuracy": 0.9215032424577613, | |
| "eval_cosine_accuracy_threshold": 0.8687731027603149, | |
| "eval_cosine_ap": 0.844612185583288, | |
| "eval_cosine_f1": 0.7527066450567261, | |
| "eval_cosine_f1_threshold": 0.8616929054260254, | |
| "eval_cosine_mcc": 0.7030390290848069, | |
| "eval_cosine_precision": 0.7499838511724048, | |
| "eval_cosine_recall": 0.7554492810202356, | |
| "eval_loss": 8.351966857910156, | |
| "eval_runtime": 1467.219, | |
| "eval_samples_per_second": 125.699, | |
| "eval_steps_per_second": 1.964, | |
| "step": 4324 | |
| }, | |
| { | |
| "epoch": 1.0175763182238668, | |
| "grad_norm": 10.624114990234375, | |
| "learning_rate": 3.9826549491211844e-05, | |
| "loss": 1.4186, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 1.0407030527289547, | |
| "grad_norm": 10.404448509216309, | |
| "learning_rate": 3.9595282146160964e-05, | |
| "loss": 1.4075, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 1.0638297872340425, | |
| "grad_norm": 9.519079208374023, | |
| "learning_rate": 3.936401480111008e-05, | |
| "loss": 1.3934, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 1.0869565217391304, | |
| "grad_norm": 9.599648475646973, | |
| "learning_rate": 3.91327474560592e-05, | |
| "loss": 1.3799, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 1.1100832562442182, | |
| "grad_norm": 14.585043907165527, | |
| "learning_rate": 3.890148011100833e-05, | |
| "loss": 1.3597, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 1.1332099907493063, | |
| "grad_norm": 11.30241584777832, | |
| "learning_rate": 3.867021276595745e-05, | |
| "loss": 1.3351, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 1.1563367252543941, | |
| "grad_norm": 10.888731002807617, | |
| "learning_rate": 3.843894542090657e-05, | |
| "loss": 1.3082, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 1.179463459759482, | |
| "grad_norm": 9.09694766998291, | |
| "learning_rate": 3.820767807585569e-05, | |
| "loss": 1.3105, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 1.2025901942645698, | |
| "grad_norm": 13.162363052368164, | |
| "learning_rate": 3.797641073080481e-05, | |
| "loss": 1.2948, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 1.2257169287696577, | |
| "grad_norm": 26.262020111083984, | |
| "learning_rate": 3.774514338575393e-05, | |
| "loss": 1.3486, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 1.2488436632747457, | |
| "grad_norm": 21.114444732666016, | |
| "learning_rate": 3.751387604070306e-05, | |
| "loss": 1.3155, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 1.2719703977798336, | |
| "grad_norm": 10.668533325195312, | |
| "learning_rate": 3.728260869565218e-05, | |
| "loss": 1.2761, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 1.2950971322849214, | |
| "grad_norm": 12.978555679321289, | |
| "learning_rate": 3.70513413506013e-05, | |
| "loss": 1.2541, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 1.3182238667900092, | |
| "grad_norm": 13.066253662109375, | |
| "learning_rate": 3.682007400555042e-05, | |
| "loss": 1.2346, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 1.341350601295097, | |
| "grad_norm": 11.974534034729004, | |
| "learning_rate": 3.658880666049954e-05, | |
| "loss": 1.2285, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 1.364477335800185, | |
| "grad_norm": 12.733871459960938, | |
| "learning_rate": 3.6357539315448655e-05, | |
| "loss": 1.2013, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 1.3876040703052728, | |
| "grad_norm": 13.184294700622559, | |
| "learning_rate": 3.612627197039778e-05, | |
| "loss": 1.1986, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 1.4107308048103608, | |
| "grad_norm": 10.159133911132812, | |
| "learning_rate": 3.58950046253469e-05, | |
| "loss": 1.1755, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 1.4338575393154487, | |
| "grad_norm": 12.212769508361816, | |
| "learning_rate": 3.566373728029602e-05, | |
| "loss": 1.1937, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 1.4569842738205365, | |
| "grad_norm": 15.231562614440918, | |
| "learning_rate": 3.5432469935245146e-05, | |
| "loss": 1.202, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 1.4801110083256244, | |
| "grad_norm": 14.845349311828613, | |
| "learning_rate": 3.5201202590194265e-05, | |
| "loss": 1.1607, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 1.5032377428307124, | |
| "grad_norm": 12.119277954101562, | |
| "learning_rate": 3.4969935245143384e-05, | |
| "loss": 1.2116, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 1.5263644773358003, | |
| "grad_norm": 12.19071102142334, | |
| "learning_rate": 3.473866790009251e-05, | |
| "loss": 1.1797, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 1.5494912118408881, | |
| "grad_norm": 12.373006820678711, | |
| "learning_rate": 3.450740055504163e-05, | |
| "loss": 1.1571, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 1.572617946345976, | |
| "grad_norm": 14.413765907287598, | |
| "learning_rate": 3.427613320999075e-05, | |
| "loss": 1.1526, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 1.5957446808510638, | |
| "grad_norm": 12.39973258972168, | |
| "learning_rate": 3.4044865864939875e-05, | |
| "loss": 1.1438, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 1.6188714153561516, | |
| "grad_norm": 15.030281066894531, | |
| "learning_rate": 3.3813598519888994e-05, | |
| "loss": 1.1634, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 1.6419981498612395, | |
| "grad_norm": 15.445487022399902, | |
| "learning_rate": 3.3582331174838114e-05, | |
| "loss": 1.1367, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 1.6651248843663273, | |
| "grad_norm": 15.726140022277832, | |
| "learning_rate": 3.335106382978724e-05, | |
| "loss": 1.1133, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 1.6882516188714154, | |
| "grad_norm": 12.940756797790527, | |
| "learning_rate": 3.311979648473636e-05, | |
| "loss": 1.1156, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 1.7113783533765032, | |
| "grad_norm": 14.717055320739746, | |
| "learning_rate": 3.288852913968548e-05, | |
| "loss": 1.1102, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 1.734505087881591, | |
| "grad_norm": 12.775771141052246, | |
| "learning_rate": 3.26572617946346e-05, | |
| "loss": 1.1123, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 1.7576318223866791, | |
| "grad_norm": 12.529901504516602, | |
| "learning_rate": 3.242599444958372e-05, | |
| "loss": 1.1066, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 1.780758556891767, | |
| "grad_norm": 12.506126403808594, | |
| "learning_rate": 3.219472710453284e-05, | |
| "loss": 1.1291, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 1.8038852913968548, | |
| "grad_norm": 16.9326114654541, | |
| "learning_rate": 3.196345975948196e-05, | |
| "loss": 1.1094, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 1.8270120259019427, | |
| "grad_norm": 12.257229804992676, | |
| "learning_rate": 3.173219241443108e-05, | |
| "loss": 1.094, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 1.8501387604070305, | |
| "grad_norm": 15.346363067626953, | |
| "learning_rate": 3.150092506938021e-05, | |
| "loss": 1.1585, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 1.8732654949121184, | |
| "grad_norm": 12.800529479980469, | |
| "learning_rate": 3.126965772432933e-05, | |
| "loss": 1.077, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 1.8963922294172062, | |
| "grad_norm": 14.989642143249512, | |
| "learning_rate": 3.103839037927845e-05, | |
| "loss": 1.108, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 1.919518963922294, | |
| "grad_norm": 14.930624008178711, | |
| "learning_rate": 3.080712303422757e-05, | |
| "loss": 1.1431, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 1.942645698427382, | |
| "grad_norm": 13.488320350646973, | |
| "learning_rate": 3.057585568917669e-05, | |
| "loss": 1.0784, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 1.96577243293247, | |
| "grad_norm": 12.30305004119873, | |
| "learning_rate": 3.034458834412581e-05, | |
| "loss": 1.0834, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 1.9888991674375578, | |
| "grad_norm": 13.152817726135254, | |
| "learning_rate": 3.0113320999074934e-05, | |
| "loss": 1.1268, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_cosine_accuracy": 0.9209068037391286, | |
| "eval_cosine_accuracy_threshold": 0.8271753191947937, | |
| "eval_cosine_ap": 0.8450278341834471, | |
| "eval_cosine_f1": 0.7521474761123443, | |
| "eval_cosine_f1_threshold": 0.8189181089401245, | |
| "eval_cosine_mcc": 0.701977809831327, | |
| "eval_cosine_precision": 0.7438907980145093, | |
| "eval_cosine_recall": 0.7605894983408159, | |
| "eval_loss": 9.69921588897705, | |
| "eval_runtime": 1472.4233, | |
| "eval_samples_per_second": 125.255, | |
| "eval_steps_per_second": 1.957, | |
| "step": 8648 | |
| }, | |
| { | |
| "epoch": 2.012025901942646, | |
| "grad_norm": 15.71314811706543, | |
| "learning_rate": 2.9882053654024057e-05, | |
| "loss": 1.0443, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 2.0351526364477337, | |
| "grad_norm": 14.261523246765137, | |
| "learning_rate": 2.9650786308973173e-05, | |
| "loss": 0.9715, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 2.0582793709528215, | |
| "grad_norm": 13.405384063720703, | |
| "learning_rate": 2.9419518963922292e-05, | |
| "loss": 0.957, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 2.0814061054579094, | |
| "grad_norm": 13.5853853225708, | |
| "learning_rate": 2.9188251618871415e-05, | |
| "loss": 0.9784, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 2.1045328399629972, | |
| "grad_norm": 14.918572425842285, | |
| "learning_rate": 2.8956984273820538e-05, | |
| "loss": 0.9581, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 2.127659574468085, | |
| "grad_norm": 13.354079246520996, | |
| "learning_rate": 2.8725716928769657e-05, | |
| "loss": 0.9569, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 2.150786308973173, | |
| "grad_norm": 15.024975776672363, | |
| "learning_rate": 2.849444958371878e-05, | |
| "loss": 0.9518, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 2.1739130434782608, | |
| "grad_norm": 13.5723876953125, | |
| "learning_rate": 2.8263182238667902e-05, | |
| "loss": 0.9485, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 2.1970397779833486, | |
| "grad_norm": 12.383338928222656, | |
| "learning_rate": 2.8031914893617022e-05, | |
| "loss": 0.9433, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 2.2201665124884364, | |
| "grad_norm": 13.54079532623291, | |
| "learning_rate": 2.7800647548566144e-05, | |
| "loss": 0.9392, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 2.2432932469935247, | |
| "grad_norm": 15.325400352478027, | |
| "learning_rate": 2.7569380203515267e-05, | |
| "loss": 0.9248, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 2.2664199814986126, | |
| "grad_norm": 15.651702880859375, | |
| "learning_rate": 2.7338112858464387e-05, | |
| "loss": 0.9105, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 2.2895467160037004, | |
| "grad_norm": 15.196932792663574, | |
| "learning_rate": 2.710684551341351e-05, | |
| "loss": 0.9769, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 2.3126734505087883, | |
| "grad_norm": 13.820411682128906, | |
| "learning_rate": 2.6875578168362632e-05, | |
| "loss": 0.9502, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 2.335800185013876, | |
| "grad_norm": 15.181200981140137, | |
| "learning_rate": 2.664431082331175e-05, | |
| "loss": 0.9604, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 2.358926919518964, | |
| "grad_norm": 17.202287673950195, | |
| "learning_rate": 2.6413043478260867e-05, | |
| "loss": 0.9291, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 2.382053654024052, | |
| "grad_norm": 16.69881248474121, | |
| "learning_rate": 2.618177613320999e-05, | |
| "loss": 0.9552, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 2.4051803885291396, | |
| "grad_norm": 16.424978256225586, | |
| "learning_rate": 2.5950508788159113e-05, | |
| "loss": 0.9621, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 2.4283071230342275, | |
| "grad_norm": 17.192264556884766, | |
| "learning_rate": 2.5719241443108232e-05, | |
| "loss": 0.9357, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 2.4514338575393153, | |
| "grad_norm": 16.9178524017334, | |
| "learning_rate": 2.5487974098057355e-05, | |
| "loss": 0.9323, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 2.474560592044403, | |
| "grad_norm": 11.07499885559082, | |
| "learning_rate": 2.5256706753006477e-05, | |
| "loss": 0.9327, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 2.4976873265494914, | |
| "grad_norm": 28.121694564819336, | |
| "learning_rate": 2.5025439407955597e-05, | |
| "loss": 0.9067, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 2.520814061054579, | |
| "grad_norm": 15.262548446655273, | |
| "learning_rate": 2.479417206290472e-05, | |
| "loss": 0.9411, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 2.543940795559667, | |
| "grad_norm": 16.59368133544922, | |
| "learning_rate": 2.4562904717853842e-05, | |
| "loss": 0.9305, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 2.567067530064755, | |
| "grad_norm": 12.39183235168457, | |
| "learning_rate": 2.433163737280296e-05, | |
| "loss": 0.9378, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 2.590194264569843, | |
| "grad_norm": 19.034120559692383, | |
| "learning_rate": 2.4100370027752084e-05, | |
| "loss": 0.9171, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 2.6133209990749307, | |
| "grad_norm": 16.727380752563477, | |
| "learning_rate": 2.3869102682701204e-05, | |
| "loss": 0.9074, | |
| "step": 11300 | |
| }, | |
| { | |
| "epoch": 2.6364477335800185, | |
| "grad_norm": 17.257230758666992, | |
| "learning_rate": 2.3637835337650323e-05, | |
| "loss": 0.9262, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 2.6595744680851063, | |
| "grad_norm": 18.956735610961914, | |
| "learning_rate": 2.3406567992599446e-05, | |
| "loss": 0.9063, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 2.682701202590194, | |
| "grad_norm": 15.463052749633789, | |
| "learning_rate": 2.317530064754857e-05, | |
| "loss": 0.8814, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 2.705827937095282, | |
| "grad_norm": 13.263724327087402, | |
| "learning_rate": 2.2944033302497688e-05, | |
| "loss": 0.9089, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 2.72895467160037, | |
| "grad_norm": 15.596953392028809, | |
| "learning_rate": 2.271276595744681e-05, | |
| "loss": 0.9048, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 2.752081406105458, | |
| "grad_norm": 16.61760711669922, | |
| "learning_rate": 2.2481498612395933e-05, | |
| "loss": 0.9268, | |
| "step": 11900 | |
| }, | |
| { | |
| "epoch": 2.7752081406105455, | |
| "grad_norm": 16.409652709960938, | |
| "learning_rate": 2.2250231267345052e-05, | |
| "loss": 0.8913, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 2.798334875115634, | |
| "grad_norm": 14.509052276611328, | |
| "learning_rate": 2.2018963922294172e-05, | |
| "loss": 0.9064, | |
| "step": 12100 | |
| }, | |
| { | |
| "epoch": 2.8214616096207217, | |
| "grad_norm": 13.914097785949707, | |
| "learning_rate": 2.1787696577243295e-05, | |
| "loss": 0.8585, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 2.8445883441258095, | |
| "grad_norm": 13.852704048156738, | |
| "learning_rate": 2.1556429232192414e-05, | |
| "loss": 0.878, | |
| "step": 12300 | |
| }, | |
| { | |
| "epoch": 2.8677150786308974, | |
| "grad_norm": 15.329246520996094, | |
| "learning_rate": 2.1325161887141537e-05, | |
| "loss": 0.8612, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 2.890841813135985, | |
| "grad_norm": 15.046784400939941, | |
| "learning_rate": 2.109389454209066e-05, | |
| "loss": 0.8799, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 2.913968547641073, | |
| "grad_norm": 15.824640274047852, | |
| "learning_rate": 2.086262719703978e-05, | |
| "loss": 0.8541, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 2.937095282146161, | |
| "grad_norm": 15.991775512695312, | |
| "learning_rate": 2.0631359851988898e-05, | |
| "loss": 0.8521, | |
| "step": 12700 | |
| }, | |
| { | |
| "epoch": 2.9602220166512487, | |
| "grad_norm": 16.13051986694336, | |
| "learning_rate": 2.040009250693802e-05, | |
| "loss": 0.8582, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 2.9833487511563366, | |
| "grad_norm": 14.818473815917969, | |
| "learning_rate": 2.0168825161887143e-05, | |
| "loss": 0.869, | |
| "step": 12900 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_cosine_accuracy": 0.9225280326197758, | |
| "eval_cosine_accuracy_threshold": 0.7901061773300171, | |
| "eval_cosine_ap": 0.8478615501518483, | |
| "eval_cosine_f1": 0.7559554803436604, | |
| "eval_cosine_f1_threshold": 0.7817596793174744, | |
| "eval_cosine_mcc": 0.7071656901034916, | |
| "eval_cosine_precision": 0.756201575623413, | |
| "eval_cosine_recall": 0.7557095451883662, | |
| "eval_loss": 10.411548614501953, | |
| "eval_runtime": 1486.9605, | |
| "eval_samples_per_second": 124.03, | |
| "eval_steps_per_second": 1.938, | |
| "step": 12972 | |
| } | |
| ], | |
| "logging_steps": 100, | |
| "max_steps": 21620, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 64, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |