Upload training_logs/dpo_log.jsonl with huggingface_hub
Browse files- training_logs/dpo_log.jsonl +95 -0
training_logs/dpo_log.jsonl
ADDED
|
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"step": 10, "loss": 0.7798, "accuracy": 0.4375, "reward_margin": 0.0867, "lr": 4.5e-08, "elapsed_s": 11.5}
|
| 2 |
+
{"step": 20, "loss": 0.7714, "accuracy": 0.375, "reward_margin": -0.0647, "lr": 9.499999999999999e-08, "elapsed_s": 19.4}
|
| 3 |
+
{"step": 30, "loss": 0.7522, "accuracy": 0.4125, "reward_margin": -0.0699, "lr": 1.45e-07, "elapsed_s": 27.2}
|
| 4 |
+
{"step": 40, "loss": 0.6921, "accuracy": 0.525, "reward_margin": 0.0268, "lr": 1.9499999999999999e-07, "elapsed_s": 35.0}
|
| 5 |
+
{"step": 50, "loss": 0.6667, "accuracy": 0.525, "reward_margin": 0.1128, "lr": 2.45e-07, "elapsed_s": 42.7}
|
| 6 |
+
{"step": 60, "loss": 0.6777, "accuracy": 0.4625, "reward_margin": 0.0764, "lr": 2.95e-07, "elapsed_s": 50.5}
|
| 7 |
+
{"step": 70, "loss": 0.6515, "accuracy": 0.55, "reward_margin": 0.1272, "lr": 3.45e-07, "elapsed_s": 58.3}
|
| 8 |
+
{"step": 80, "loss": 0.6387, "accuracy": 0.6, "reward_margin": 0.1564, "lr": 3.95e-07, "elapsed_s": 66.2}
|
| 9 |
+
{"step": 90, "loss": 0.586, "accuracy": 0.675, "reward_margin": 0.2776, "lr": 4.45e-07, "elapsed_s": 74.0}
|
| 10 |
+
{"step": 100, "loss": 0.5986, "accuracy": 0.675, "reward_margin": 0.2868, "lr": 4.949999999999999e-07, "elapsed_s": 81.8}
|
| 11 |
+
{"step": 110, "loss": 0.6282, "accuracy": 0.6125, "reward_margin": 0.2265, "lr": 4.998898801231603e-07, "elapsed_s": 89.6}
|
| 12 |
+
{"step": 120, "loss": 0.5832, "accuracy": 0.6375, "reward_margin": 0.3602, "lr": 4.995093745023968e-07, "elapsed_s": 97.5}
|
| 13 |
+
{"step": 130, "loss": 0.6263, "accuracy": 0.6375, "reward_margin": 0.2447, "lr": 4.988576407978475e-07, "elapsed_s": 105.3}
|
| 14 |
+
{"step": 140, "loss": 0.5755, "accuracy": 0.7125, "reward_margin": 0.4094, "lr": 4.979355650254416e-07, "elapsed_s": 113.0}
|
| 15 |
+
{"step": 150, "loss": 0.6593, "accuracy": 0.6125, "reward_margin": 0.271, "lr": 4.967444007244951e-07, "elapsed_s": 120.8}
|
| 16 |
+
{"step": 160, "loss": 0.6044, "accuracy": 0.6, "reward_margin": 0.3641, "lr": 4.952857672535551e-07, "elapsed_s": 128.7}
|
| 17 |
+
{"step": 170, "loss": 0.5297, "accuracy": 0.7, "reward_margin": 0.5777, "lr": 4.935616475889216e-07, "elapsed_s": 136.5}
|
| 18 |
+
{"step": 180, "loss": 0.6645, "accuracy": 0.625, "reward_margin": 0.3387, "lr": 4.9157438562884e-07, "elapsed_s": 144.3}
|
| 19 |
+
{"step": 190, "loss": 0.5507, "accuracy": 0.625, "reward_margin": 0.509, "lr": 4.893266830070295e-07, "elapsed_s": 152.1}
|
| 20 |
+
{"step": 200, "loss": 0.5935, "accuracy": 0.6125, "reward_margin": 0.4919, "lr": 4.86821595419878e-07, "elapsed_s": 159.8}
|
| 21 |
+
{"step": 210, "loss": 0.5545, "accuracy": 0.6125, "reward_margin": 0.486, "lr": 4.840625284722983e-07, "elapsed_s": 179.6}
|
| 22 |
+
{"step": 220, "loss": 0.601, "accuracy": 0.6, "reward_margin": 0.3861, "lr": 4.810532330478923e-07, "elapsed_s": 187.5}
|
| 23 |
+
{"step": 230, "loss": 0.5981, "accuracy": 0.5875, "reward_margin": 0.3972, "lr": 4.777978002097169e-07, "elapsed_s": 195.4}
|
| 24 |
+
{"step": 240, "loss": 0.552, "accuracy": 0.6625, "reward_margin": 0.5146, "lr": 4.743006556385841e-07, "elapsed_s": 203.3}
|
| 25 |
+
{"step": 250, "loss": 0.5747, "accuracy": 0.6, "reward_margin": 0.5023, "lr": 4.7056655361645756e-07, "elapsed_s": 211.1}
|
| 26 |
+
{"step": 260, "loss": 0.6258, "accuracy": 0.575, "reward_margin": 0.3457, "lr": 4.666005705631227e-07, "elapsed_s": 219.0}
|
| 27 |
+
{"step": 270, "loss": 0.5409, "accuracy": 0.6625, "reward_margin": 0.7137, "lr": 4.6240809813491944e-07, "elapsed_s": 226.9}
|
| 28 |
+
{"step": 280, "loss": 0.6261, "accuracy": 0.55, "reward_margin": 0.3915, "lr": 4.579948358949176e-07, "elapsed_s": 234.7}
|
| 29 |
+
{"step": 290, "loss": 0.5672, "accuracy": 0.5875, "reward_margin": 0.5594, "lr": 4.5336678356450174e-07, "elapsed_s": 242.6}
|
| 30 |
+
{"step": 300, "loss": 0.5656, "accuracy": 0.65, "reward_margin": 0.5763, "lr": 4.485302328668972e-07, "elapsed_s": 250.5}
|
| 31 |
+
{"step": 310, "loss": 0.5736, "accuracy": 0.6625, "reward_margin": 0.5858, "lr": 4.4349175897372746e-07, "elapsed_s": 258.4}
|
| 32 |
+
{"step": 320, "loss": 0.5536, "accuracy": 0.675, "reward_margin": 0.5922, "lr": 4.3825821156623e-07, "elapsed_s": 266.3}
|
| 33 |
+
{"step": 330, "loss": 0.5818, "accuracy": 0.65, "reward_margin": 0.4938, "lr": 4.328367055232836e-07, "elapsed_s": 276.1}
|
| 34 |
+
{"step": 340, "loss": 0.517, "accuracy": 0.7375, "reward_margin": 0.8002, "lr": 4.2723461124890523e-07, "elapsed_s": 284.0}
|
| 35 |
+
{"step": 350, "loss": 0.54, "accuracy": 0.7375, "reward_margin": 0.6545, "lr": 4.2145954465236736e-07, "elapsed_s": 291.8}
|
| 36 |
+
{"step": 360, "loss": 0.5317, "accuracy": 0.7125, "reward_margin": 0.6229, "lr": 4.155193567945568e-07, "elapsed_s": 299.6}
|
| 37 |
+
{"step": 370, "loss": 0.6032, "accuracy": 0.65, "reward_margin": 0.4373, "lr": 4.094221232146508e-07, "elapsed_s": 307.5}
|
| 38 |
+
{"step": 380, "loss": 0.6854, "accuracy": 0.575, "reward_margin": 0.2799, "lr": 4.0317613295162e-07, "elapsed_s": 315.4}
|
| 39 |
+
{"step": 390, "loss": 0.5968, "accuracy": 0.6125, "reward_margin": 0.4253, "lr": 3.967898772754842e-07, "elapsed_s": 323.3}
|
| 40 |
+
{"step": 400, "loss": 0.5062, "accuracy": 0.7625, "reward_margin": 0.7942, "lr": 3.9027203814363984e-07, "elapsed_s": 331.2}
|
| 41 |
+
{"step": 410, "loss": 0.648, "accuracy": 0.5625, "reward_margin": 0.2849, "lr": 3.8363147639795234e-07, "elapsed_s": 353.3}
|
| 42 |
+
{"step": 420, "loss": 0.6355, "accuracy": 0.625, "reward_margin": 0.4522, "lr": 3.7687721971866007e-07, "elapsed_s": 361.3}
|
| 43 |
+
{"step": 430, "loss": 0.5253, "accuracy": 0.7, "reward_margin": 0.5549, "lr": 3.7001845035146485e-07, "elapsed_s": 369.2}
|
| 44 |
+
{"step": 440, "loss": 0.5343, "accuracy": 0.7125, "reward_margin": 0.6107, "lr": 3.6306449262449543e-07, "elapsed_s": 377.1}
|
| 45 |
+
{"step": 450, "loss": 0.479, "accuracy": 0.775, "reward_margin": 0.7882, "lr": 3.560248002721124e-07, "elapsed_s": 385.0}
|
| 46 |
+
{"step": 460, "loss": 0.5344, "accuracy": 0.65, "reward_margin": 0.6866, "lr": 3.4890894358278937e-07, "elapsed_s": 392.9}
|
| 47 |
+
{"step": 470, "loss": 0.534, "accuracy": 0.675, "reward_margin": 0.6002, "lr": 3.417265963885413e-07, "elapsed_s": 400.8}
|
| 48 |
+
{"step": 480, "loss": 0.5583, "accuracy": 0.5625, "reward_margin": 0.7706, "lr": 3.3448752291358786e-07, "elapsed_s": 408.7}
|
| 49 |
+
{"step": 490, "loss": 0.5412, "accuracy": 0.65, "reward_margin": 0.7216, "lr": 3.272015645001312e-07, "elapsed_s": 416.6}
|
| 50 |
+
{"step": 500, "loss": 0.542, "accuracy": 0.6625, "reward_margin": 0.8168, "lr": 3.1987862622929316e-07, "elapsed_s": 424.5}
|
| 51 |
+
{"step": 510, "loss": 0.6497, "accuracy": 0.5625, "reward_margin": 0.5525, "lr": 3.125286634554015e-07, "elapsed_s": 432.4}
|
| 52 |
+
{"step": 520, "loss": 0.5132, "accuracy": 0.7125, "reward_margin": 0.712, "lr": 3.0516166827193075e-07, "elapsed_s": 440.4}
|
| 53 |
+
{"step": 530, "loss": 0.5197, "accuracy": 0.6375, "reward_margin": 0.7373, "lr": 2.977876559274969e-07, "elapsed_s": 448.3}
|
| 54 |
+
{"step": 540, "loss": 0.4932, "accuracy": 0.7125, "reward_margin": 0.8599, "lr": 2.9041665121037345e-07, "elapsed_s": 456.2}
|
| 55 |
+
{"step": 550, "loss": 0.5508, "accuracy": 0.7, "reward_margin": 0.6573, "lr": 2.8305867482003896e-07, "elapsed_s": 464.1}
|
| 56 |
+
{"step": 560, "loss": 0.5707, "accuracy": 0.6125, "reward_margin": 0.8398, "lr": 2.757237297442821e-07, "elapsed_s": 472.1}
|
| 57 |
+
{"step": 570, "loss": 0.5083, "accuracy": 0.7375, "reward_margin": 0.8327, "lr": 2.6842178766038637e-07, "elapsed_s": 480.0}
|
| 58 |
+
{"step": 580, "loss": 0.5289, "accuracy": 0.675, "reward_margin": 0.8025, "lr": 2.611627753788802e-07, "elapsed_s": 487.9}
|
| 59 |
+
{"step": 590, "loss": 0.5546, "accuracy": 0.6625, "reward_margin": 0.5996, "lr": 2.5395656134828237e-07, "elapsed_s": 495.8}
|
| 60 |
+
{"step": 600, "loss": 0.5843, "accuracy": 0.65, "reward_margin": 0.5105, "lr": 2.468129422391892e-07, "elapsed_s": 503.8}
|
| 61 |
+
{"step": 610, "loss": 0.4989, "accuracy": 0.675, "reward_margin": 0.73, "lr": 2.3974162962594177e-07, "elapsed_s": 526.7}
|
| 62 |
+
{"step": 620, "loss": 0.5662, "accuracy": 0.6875, "reward_margin": 0.5796, "lr": 2.3275223678398024e-07, "elapsed_s": 534.6}
|
| 63 |
+
{"step": 630, "loss": 0.5067, "accuracy": 0.7, "reward_margin": 0.7825, "lr": 2.2585426562083175e-07, "elapsed_s": 542.5}
|
| 64 |
+
{"step": 640, "loss": 0.5426, "accuracy": 0.6375, "reward_margin": 0.6038, "lr": 2.1905709375850164e-07, "elapsed_s": 550.4}
|
| 65 |
+
{"step": 650, "loss": 0.5449, "accuracy": 0.6625, "reward_margin": 0.6423, "lr": 2.1236996178482677e-07, "elapsed_s": 558.3}
|
| 66 |
+
{"step": 660, "loss": 0.4974, "accuracy": 0.725, "reward_margin": 0.9135, "lr": 2.058019606911242e-07, "elapsed_s": 566.2}
|
| 67 |
+
{"step": 670, "loss": 0.4447, "accuracy": 0.7125, "reward_margin": 0.955, "lr": 1.9936201951321162e-07, "elapsed_s": 574.1}
|
| 68 |
+
{"step": 680, "loss": 0.5137, "accuracy": 0.6875, "reward_margin": 0.8216, "lr": 1.9305889319260398e-07, "elapsed_s": 582.0}
|
| 69 |
+
{"step": 690, "loss": 0.4977, "accuracy": 0.6875, "reward_margin": 0.9253, "lr": 1.869011506743846e-07, "elapsed_s": 589.9}
|
| 70 |
+
{"step": 700, "loss": 0.5017, "accuracy": 0.675, "reward_margin": 0.8083, "lr": 1.8089716325793666e-07, "elapsed_s": 597.8}
|
| 71 |
+
{"step": 710, "loss": 0.5133, "accuracy": 0.7125, "reward_margin": 0.872, "lr": 1.7505509321636675e-07, "elapsed_s": 605.7}
|
| 72 |
+
{"step": 720, "loss": 0.5824, "accuracy": 0.7, "reward_margin": 0.7471, "lr": 1.6938288270009618e-07, "elapsed_s": 613.5}
|
| 73 |
+
{"step": 730, "loss": 0.5589, "accuracy": 0.6, "reward_margin": 0.6109, "lr": 1.638882429397021e-07, "elapsed_s": 621.4}
|
| 74 |
+
{"step": 740, "loss": 0.5899, "accuracy": 0.625, "reward_margin": 0.5541, "lr": 1.585786437626905e-07, "elapsed_s": 629.4}
|
| 75 |
+
{"step": 750, "loss": 0.506, "accuracy": 0.7125, "reward_margin": 0.8685, "lr": 1.5346130343844857e-07, "elapsed_s": 637.3}
|
| 76 |
+
{"step": 760, "loss": 0.571, "accuracy": 0.6875, "reward_margin": 0.6311, "lr": 1.485431788651856e-07, "elapsed_s": 645.3}
|
| 77 |
+
{"step": 770, "loss": 0.5197, "accuracy": 0.6875, "reward_margin": 0.7544, "lr": 1.438309561122013e-07, "elapsed_s": 653.2}
|
| 78 |
+
{"step": 780, "loss": 0.5176, "accuracy": 0.675, "reward_margin": 0.8367, "lr": 1.3933104133033846e-07, "elapsed_s": 661.2}
|
| 79 |
+
{"step": 790, "loss": 0.6066, "accuracy": 0.6, "reward_margin": 0.5395, "lr": 1.3504955204297946e-07, "elapsed_s": 669.1}
|
| 80 |
+
{"step": 800, "loss": 0.5212, "accuracy": 0.6875, "reward_margin": 0.7758, "lr": 1.3099230882942304e-07, "elapsed_s": 677.0}
|
| 81 |
+
{"step": 810, "loss": 0.4471, "accuracy": 0.7625, "reward_margin": 1.008, "lr": 1.2716482741195066e-07, "elapsed_s": 698.5}
|
| 82 |
+
{"step": 820, "loss": 0.5211, "accuracy": 0.675, "reward_margin": 0.7755, "lr": 1.235723111573371e-07, "elapsed_s": 708.0}
|
| 83 |
+
{"step": 830, "loss": 0.5618, "accuracy": 0.7, "reward_margin": 0.6765, "lr": 1.2021964400300216e-07, "elapsed_s": 715.8}
|
| 84 |
+
{"step": 840, "loss": 0.4606, "accuracy": 0.7, "reward_margin": 1.1556, "lr": 1.171113838174174e-07, "elapsed_s": 723.8}
|
| 85 |
+
{"step": 850, "loss": 0.5364, "accuracy": 0.6625, "reward_margin": 0.7852, "lr": 1.1425175620379659e-07, "elapsed_s": 731.7}
|
| 86 |
+
{"step": 860, "loss": 0.4544, "accuracy": 0.75, "reward_margin": 1.0015, "lr": 1.1164464875549158e-07, "elapsed_s": 739.6}
|
| 87 |
+
{"step": 870, "loss": 0.4745, "accuracy": 0.7375, "reward_margin": 0.9336, "lr": 1.0929360577090547e-07, "elapsed_s": 747.4}
|
| 88 |
+
{"step": 880, "loss": 0.5352, "accuracy": 0.7375, "reward_margin": 0.6781, "lr": 1.0720182343510565e-07, "elapsed_s": 755.4}
|
| 89 |
+
{"step": 890, "loss": 0.5363, "accuracy": 0.75, "reward_margin": 0.8074, "lr": 1.0537214547468929e-07, "elapsed_s": 763.3}
|
| 90 |
+
{"step": 900, "loss": 0.5094, "accuracy": 0.625, "reward_margin": 0.8066, "lr": 1.0380705929180662e-07, "elapsed_s": 771.2}
|
| 91 |
+
{"step": 910, "loss": 0.4909, "accuracy": 0.7125, "reward_margin": 1.0196, "lr": 1.0250869258259928e-07, "elapsed_s": 779.0}
|
| 92 |
+
{"step": 920, "loss": 0.4845, "accuracy": 0.725, "reward_margin": 0.8083, "lr": 1.0147881044464963e-07, "elapsed_s": 786.9}
|
| 93 |
+
{"step": 930, "loss": 0.4922, "accuracy": 0.7375, "reward_margin": 0.8589, "lr": 1.0071881297737406e-07, "elapsed_s": 794.8}
|
| 94 |
+
{"step": 940, "loss": 0.4391, "accuracy": 0.725, "reward_margin": 1.084, "lr": 1.0022973337862222e-07, "elapsed_s": 802.7}
|
| 95 |
+
{"step": 950, "loss": 0.5421, "accuracy": 0.625, "reward_margin": 0.8424, "lr": 1.0001223654007014e-07, "elapsed_s": 810.6}
|