| { | |
| "model_base": "Qwen/Qwen2.5-14B-Instruct", | |
| "model_name": "RegTech-14B-Instruct", | |
| "dataset": "./train.jsonl", | |
| "env_file": "/home/ubuntu/sophia-core-server/.tuning/.env.14B", | |
| "train_samples": 923, | |
| "eval_samples": 102, | |
| "params": { | |
| "rank": 16, | |
| "alpha": 32, | |
| "dropout": 0.1, | |
| "lr": 5e-06, | |
| "scheduler": "cosine", | |
| "epochs": 3, | |
| "effective_batch": 4, | |
| "max_seq_length": 4096, | |
| "neftune_alpha": 5.0, | |
| "target_modules": [ | |
| "q_proj", | |
| "k_proj", | |
| "v_proj", | |
| "o_proj", | |
| "gate_proj", | |
| "up_proj", | |
| "down_proj" | |
| ] | |
| }, | |
| "results": { | |
| "total_steps": 693, | |
| "final_train_loss": 1.1265, | |
| "best_eval_loss": 1.2247475385665894, | |
| "best_eval_step": 640, | |
| "elapsed_minutes": 23.5 | |
| }, | |
| "loss_history": { | |
| "train": [ | |
| [ | |
| 5, | |
| 1.7295 | |
| ], | |
| [ | |
| 10, | |
| 1.802 | |
| ], | |
| [ | |
| 15, | |
| 2.0237 | |
| ], | |
| [ | |
| 20, | |
| 1.7917 | |
| ], | |
| [ | |
| 25, | |
| 2.0068 | |
| ], | |
| [ | |
| 30, | |
| 1.9094 | |
| ], | |
| [ | |
| 35, | |
| 1.8299 | |
| ], | |
| [ | |
| 40, | |
| 1.7277 | |
| ], | |
| [ | |
| 45, | |
| 1.8172 | |
| ], | |
| [ | |
| 50, | |
| 1.7058 | |
| ], | |
| [ | |
| 55, | |
| 1.3853 | |
| ], | |
| [ | |
| 60, | |
| 1.7634 | |
| ], | |
| [ | |
| 65, | |
| 1.5767 | |
| ], | |
| [ | |
| 70, | |
| 1.754 | |
| ], | |
| [ | |
| 75, | |
| 1.7128 | |
| ], | |
| [ | |
| 80, | |
| 1.5807 | |
| ], | |
| [ | |
| 85, | |
| 1.5355 | |
| ], | |
| [ | |
| 90, | |
| 1.4244 | |
| ], | |
| [ | |
| 95, | |
| 1.5826 | |
| ], | |
| [ | |
| 100, | |
| 1.5446 | |
| ], | |
| [ | |
| 105, | |
| 1.4907 | |
| ], | |
| [ | |
| 110, | |
| 1.432 | |
| ], | |
| [ | |
| 115, | |
| 1.5543 | |
| ], | |
| [ | |
| 120, | |
| 1.3322 | |
| ], | |
| [ | |
| 125, | |
| 1.2579 | |
| ], | |
| [ | |
| 130, | |
| 1.7049 | |
| ], | |
| [ | |
| 135, | |
| 1.2563 | |
| ], | |
| [ | |
| 140, | |
| 1.6188 | |
| ], | |
| [ | |
| 145, | |
| 1.3941 | |
| ], | |
| [ | |
| 150, | |
| 1.456 | |
| ], | |
| [ | |
| 155, | |
| 1.5397 | |
| ], | |
| [ | |
| 160, | |
| 1.3587 | |
| ], | |
| [ | |
| 165, | |
| 1.4827 | |
| ], | |
| [ | |
| 170, | |
| 1.4644 | |
| ], | |
| [ | |
| 175, | |
| 1.1337 | |
| ], | |
| [ | |
| 180, | |
| 1.302 | |
| ], | |
| [ | |
| 185, | |
| 1.4177 | |
| ], | |
| [ | |
| 190, | |
| 1.3671 | |
| ], | |
| [ | |
| 195, | |
| 1.3418 | |
| ], | |
| [ | |
| 200, | |
| 1.0722 | |
| ], | |
| [ | |
| 205, | |
| 1.315 | |
| ], | |
| [ | |
| 210, | |
| 1.1977 | |
| ], | |
| [ | |
| 215, | |
| 1.2221 | |
| ], | |
| [ | |
| 220, | |
| 1.1184 | |
| ], | |
| [ | |
| 225, | |
| 1.282 | |
| ], | |
| [ | |
| 230, | |
| 1.1874 | |
| ], | |
| [ | |
| 235, | |
| 1.4661 | |
| ], | |
| [ | |
| 240, | |
| 1.1914 | |
| ], | |
| [ | |
| 245, | |
| 1.1856 | |
| ], | |
| [ | |
| 250, | |
| 1.0897 | |
| ], | |
| [ | |
| 255, | |
| 1.1284 | |
| ], | |
| [ | |
| 260, | |
| 1.27 | |
| ], | |
| [ | |
| 265, | |
| 1.378 | |
| ], | |
| [ | |
| 270, | |
| 1.1756 | |
| ], | |
| [ | |
| 275, | |
| 1.1799 | |
| ], | |
| [ | |
| 280, | |
| 1.3092 | |
| ], | |
| [ | |
| 285, | |
| 1.3576 | |
| ], | |
| [ | |
| 290, | |
| 1.2254 | |
| ], | |
| [ | |
| 295, | |
| 1.076 | |
| ], | |
| [ | |
| 300, | |
| 1.0677 | |
| ], | |
| [ | |
| 305, | |
| 1.3067 | |
| ], | |
| [ | |
| 310, | |
| 1.1462 | |
| ], | |
| [ | |
| 315, | |
| 1.2189 | |
| ], | |
| [ | |
| 320, | |
| 1.1138 | |
| ], | |
| [ | |
| 325, | |
| 1.2075 | |
| ], | |
| [ | |
| 330, | |
| 1.1388 | |
| ], | |
| [ | |
| 335, | |
| 1.1792 | |
| ], | |
| [ | |
| 340, | |
| 1.1284 | |
| ], | |
| [ | |
| 345, | |
| 1.3012 | |
| ], | |
| [ | |
| 350, | |
| 0.9941 | |
| ], | |
| [ | |
| 355, | |
| 1.1375 | |
| ], | |
| [ | |
| 360, | |
| 1.2087 | |
| ], | |
| [ | |
| 365, | |
| 1.2662 | |
| ], | |
| [ | |
| 370, | |
| 1.2091 | |
| ], | |
| [ | |
| 375, | |
| 1.0686 | |
| ], | |
| [ | |
| 380, | |
| 0.9329 | |
| ], | |
| [ | |
| 385, | |
| 1.0478 | |
| ], | |
| [ | |
| 390, | |
| 1.0606 | |
| ], | |
| [ | |
| 395, | |
| 1.0607 | |
| ], | |
| [ | |
| 400, | |
| 1.1947 | |
| ], | |
| [ | |
| 405, | |
| 1.158 | |
| ], | |
| [ | |
| 410, | |
| 1.2246 | |
| ], | |
| [ | |
| 415, | |
| 1.0888 | |
| ], | |
| [ | |
| 420, | |
| 1.1178 | |
| ], | |
| [ | |
| 425, | |
| 1.0749 | |
| ], | |
| [ | |
| 430, | |
| 1.3176 | |
| ], | |
| [ | |
| 435, | |
| 1.0919 | |
| ], | |
| [ | |
| 440, | |
| 1.0837 | |
| ], | |
| [ | |
| 445, | |
| 0.9845 | |
| ], | |
| [ | |
| 450, | |
| 1.0481 | |
| ], | |
| [ | |
| 455, | |
| 1.1088 | |
| ], | |
| [ | |
| 460, | |
| 0.9743 | |
| ], | |
| [ | |
| 465, | |
| 1.3255 | |
| ], | |
| [ | |
| 470, | |
| 1.0583 | |
| ], | |
| [ | |
| 475, | |
| 1.1048 | |
| ], | |
| [ | |
| 480, | |
| 0.9751 | |
| ], | |
| [ | |
| 485, | |
| 0.9574 | |
| ], | |
| [ | |
| 490, | |
| 1.1434 | |
| ], | |
| [ | |
| 495, | |
| 1.2023 | |
| ], | |
| [ | |
| 500, | |
| 1.1073 | |
| ], | |
| [ | |
| 505, | |
| 1.053 | |
| ], | |
| [ | |
| 510, | |
| 1.1742 | |
| ], | |
| [ | |
| 515, | |
| 0.9444 | |
| ], | |
| [ | |
| 520, | |
| 1.088 | |
| ], | |
| [ | |
| 525, | |
| 1.0151 | |
| ], | |
| [ | |
| 530, | |
| 1.0707 | |
| ], | |
| [ | |
| 535, | |
| 1.0992 | |
| ], | |
| [ | |
| 540, | |
| 1.0729 | |
| ], | |
| [ | |
| 545, | |
| 0.9759 | |
| ], | |
| [ | |
| 550, | |
| 1.0995 | |
| ], | |
| [ | |
| 555, | |
| 1.1917 | |
| ], | |
| [ | |
| 560, | |
| 1.0671 | |
| ], | |
| [ | |
| 565, | |
| 1.1079 | |
| ], | |
| [ | |
| 570, | |
| 0.8946 | |
| ], | |
| [ | |
| 575, | |
| 1.0232 | |
| ], | |
| [ | |
| 580, | |
| 1.0634 | |
| ], | |
| [ | |
| 585, | |
| 1.0558 | |
| ], | |
| [ | |
| 590, | |
| 1.1866 | |
| ], | |
| [ | |
| 595, | |
| 0.9822 | |
| ], | |
| [ | |
| 600, | |
| 1.0456 | |
| ], | |
| [ | |
| 605, | |
| 0.8698 | |
| ], | |
| [ | |
| 610, | |
| 1.1024 | |
| ], | |
| [ | |
| 615, | |
| 1.1079 | |
| ], | |
| [ | |
| 620, | |
| 1.0469 | |
| ], | |
| [ | |
| 625, | |
| 1.0726 | |
| ], | |
| [ | |
| 630, | |
| 1.0963 | |
| ], | |
| [ | |
| 635, | |
| 1.0431 | |
| ], | |
| [ | |
| 640, | |
| 1.0866 | |
| ], | |
| [ | |
| 645, | |
| 1.0284 | |
| ], | |
| [ | |
| 650, | |
| 1.2035 | |
| ], | |
| [ | |
| 655, | |
| 1.0262 | |
| ], | |
| [ | |
| 660, | |
| 0.9705 | |
| ], | |
| [ | |
| 665, | |
| 1.0958 | |
| ], | |
| [ | |
| 670, | |
| 1.1898 | |
| ], | |
| [ | |
| 675, | |
| 1.0921 | |
| ], | |
| [ | |
| 680, | |
| 0.9948 | |
| ], | |
| [ | |
| 685, | |
| 1.1798 | |
| ], | |
| [ | |
| 690, | |
| 1.1265 | |
| ] | |
| ], | |
| "eval": [ | |
| [ | |
| 40, | |
| 2.2252511978149414 | |
| ], | |
| [ | |
| 80, | |
| 1.9501206874847412 | |
| ], | |
| [ | |
| 120, | |
| 1.708126187324524 | |
| ], | |
| [ | |
| 160, | |
| 1.5921595096588135 | |
| ], | |
| [ | |
| 200, | |
| 1.513823390007019 | |
| ], | |
| [ | |
| 240, | |
| 1.4531652927398682 | |
| ], | |
| [ | |
| 280, | |
| 1.400567650794983 | |
| ], | |
| [ | |
| 320, | |
| 1.356279969215393 | |
| ], | |
| [ | |
| 360, | |
| 1.3178586959838867 | |
| ], | |
| [ | |
| 400, | |
| 1.288638949394226 | |
| ], | |
| [ | |
| 440, | |
| 1.2657065391540527 | |
| ], | |
| [ | |
| 480, | |
| 1.250612735748291 | |
| ], | |
| [ | |
| 520, | |
| 1.2387887239456177 | |
| ], | |
| [ | |
| 560, | |
| 1.2310446500778198 | |
| ], | |
| [ | |
| 600, | |
| 1.2271382808685303 | |
| ], | |
| [ | |
| 640, | |
| 1.2247475385665894 | |
| ], | |
| [ | |
| 680, | |
| 1.2248592376708984 | |
| ] | |
| ] | |
| } | |
| } |