| { | |
| "model_base": "Qwen/Qwen3-4B-Instruct-2507", | |
| "model_name": "RegTech-4B-Instruct", | |
| "dataset": "./train.jsonl", | |
| "env_file": "/home/ubuntu/sophia-core-server/.tuning/.env.4B", | |
| "train_samples": 923, | |
| "eval_samples": 102, | |
| "params": { | |
| "rank": 16, | |
| "alpha": 32, | |
| "dropout": 0.1, | |
| "lr": 5e-06, | |
| "scheduler": "cosine", | |
| "epochs": 3, | |
| "effective_batch": 4, | |
| "max_seq_length": 4096, | |
| "neftune_alpha": 5.0, | |
| "target_modules": [ | |
| "q_proj", | |
| "k_proj", | |
| "v_proj", | |
| "o_proj", | |
| "gate_proj", | |
| "up_proj", | |
| "down_proj" | |
| ] | |
| }, | |
| "results": { | |
| "total_steps": 693, | |
| "final_train_loss": 1.241, | |
| "best_eval_loss": 1.1907687187194824, | |
| "best_eval_step": 680, | |
| "elapsed_minutes": 11.9 | |
| }, | |
| "loss_history": { | |
| "train": [ | |
| [ | |
| 5, | |
| 1.9266 | |
| ], | |
| [ | |
| 10, | |
| 2.1546 | |
| ], | |
| [ | |
| 15, | |
| 2.377 | |
| ], | |
| [ | |
| 20, | |
| 2.1251 | |
| ], | |
| [ | |
| 25, | |
| 2.3294 | |
| ], | |
| [ | |
| 30, | |
| 2.2016 | |
| ], | |
| [ | |
| 35, | |
| 2.1198 | |
| ], | |
| [ | |
| 40, | |
| 2.0536 | |
| ], | |
| [ | |
| 45, | |
| 2.0804 | |
| ], | |
| [ | |
| 50, | |
| 1.98 | |
| ], | |
| [ | |
| 55, | |
| 1.6711 | |
| ], | |
| [ | |
| 60, | |
| 2.043 | |
| ], | |
| [ | |
| 65, | |
| 1.7905 | |
| ], | |
| [ | |
| 70, | |
| 1.9725 | |
| ], | |
| [ | |
| 75, | |
| 1.8905 | |
| ], | |
| [ | |
| 80, | |
| 1.8013 | |
| ], | |
| [ | |
| 85, | |
| 1.6943 | |
| ], | |
| [ | |
| 90, | |
| 1.5479 | |
| ], | |
| [ | |
| 95, | |
| 1.71 | |
| ], | |
| [ | |
| 100, | |
| 1.7296 | |
| ], | |
| [ | |
| 105, | |
| 1.6177 | |
| ], | |
| [ | |
| 110, | |
| 1.5224 | |
| ], | |
| [ | |
| 115, | |
| 1.6453 | |
| ], | |
| [ | |
| 120, | |
| 1.4372 | |
| ], | |
| [ | |
| 125, | |
| 1.3471 | |
| ], | |
| [ | |
| 130, | |
| 1.7257 | |
| ], | |
| [ | |
| 135, | |
| 1.307 | |
| ], | |
| [ | |
| 140, | |
| 1.6563 | |
| ], | |
| [ | |
| 145, | |
| 1.4156 | |
| ], | |
| [ | |
| 150, | |
| 1.5117 | |
| ], | |
| [ | |
| 155, | |
| 1.5545 | |
| ], | |
| [ | |
| 160, | |
| 1.3888 | |
| ], | |
| [ | |
| 165, | |
| 1.5327 | |
| ], | |
| [ | |
| 170, | |
| 1.4855 | |
| ], | |
| [ | |
| 175, | |
| 1.1613 | |
| ], | |
| [ | |
| 180, | |
| 1.2964 | |
| ], | |
| [ | |
| 185, | |
| 1.443 | |
| ], | |
| [ | |
| 190, | |
| 1.4158 | |
| ], | |
| [ | |
| 195, | |
| 1.3793 | |
| ], | |
| [ | |
| 200, | |
| 1.1051 | |
| ], | |
| [ | |
| 205, | |
| 1.3441 | |
| ], | |
| [ | |
| 210, | |
| 1.2532 | |
| ], | |
| [ | |
| 215, | |
| 1.296 | |
| ], | |
| [ | |
| 220, | |
| 1.1679 | |
| ], | |
| [ | |
| 225, | |
| 1.3165 | |
| ], | |
| [ | |
| 230, | |
| 1.2472 | |
| ], | |
| [ | |
| 235, | |
| 1.5172 | |
| ], | |
| [ | |
| 240, | |
| 1.2431 | |
| ], | |
| [ | |
| 245, | |
| 1.2391 | |
| ], | |
| [ | |
| 250, | |
| 1.1359 | |
| ], | |
| [ | |
| 255, | |
| 1.181 | |
| ], | |
| [ | |
| 260, | |
| 1.3262 | |
| ], | |
| [ | |
| 265, | |
| 1.4351 | |
| ], | |
| [ | |
| 270, | |
| 1.2283 | |
| ], | |
| [ | |
| 275, | |
| 1.2195 | |
| ], | |
| [ | |
| 280, | |
| 1.3892 | |
| ], | |
| [ | |
| 285, | |
| 1.4254 | |
| ], | |
| [ | |
| 290, | |
| 1.2606 | |
| ], | |
| [ | |
| 295, | |
| 1.1506 | |
| ], | |
| [ | |
| 300, | |
| 1.138 | |
| ], | |
| [ | |
| 305, | |
| 1.3738 | |
| ], | |
| [ | |
| 310, | |
| 1.2216 | |
| ], | |
| [ | |
| 315, | |
| 1.2873 | |
| ], | |
| [ | |
| 320, | |
| 1.1959 | |
| ], | |
| [ | |
| 325, | |
| 1.2746 | |
| ], | |
| [ | |
| 330, | |
| 1.2089 | |
| ], | |
| [ | |
| 335, | |
| 1.2466 | |
| ], | |
| [ | |
| 340, | |
| 1.1855 | |
| ], | |
| [ | |
| 345, | |
| 1.3954 | |
| ], | |
| [ | |
| 350, | |
| 1.0918 | |
| ], | |
| [ | |
| 355, | |
| 1.2062 | |
| ], | |
| [ | |
| 360, | |
| 1.2809 | |
| ], | |
| [ | |
| 365, | |
| 1.3698 | |
| ], | |
| [ | |
| 370, | |
| 1.3147 | |
| ], | |
| [ | |
| 375, | |
| 1.1451 | |
| ], | |
| [ | |
| 380, | |
| 1.0194 | |
| ], | |
| [ | |
| 385, | |
| 1.1412 | |
| ], | |
| [ | |
| 390, | |
| 1.1606 | |
| ], | |
| [ | |
| 395, | |
| 1.1469 | |
| ], | |
| [ | |
| 400, | |
| 1.2781 | |
| ], | |
| [ | |
| 405, | |
| 1.2447 | |
| ], | |
| [ | |
| 410, | |
| 1.3158 | |
| ], | |
| [ | |
| 415, | |
| 1.1832 | |
| ], | |
| [ | |
| 420, | |
| 1.2036 | |
| ], | |
| [ | |
| 425, | |
| 1.1602 | |
| ], | |
| [ | |
| 430, | |
| 1.4217 | |
| ], | |
| [ | |
| 435, | |
| 1.1954 | |
| ], | |
| [ | |
| 440, | |
| 1.1913 | |
| ], | |
| [ | |
| 445, | |
| 1.0779 | |
| ], | |
| [ | |
| 450, | |
| 1.1522 | |
| ], | |
| [ | |
| 455, | |
| 1.2223 | |
| ], | |
| [ | |
| 460, | |
| 1.0818 | |
| ], | |
| [ | |
| 465, | |
| 1.429 | |
| ], | |
| [ | |
| 470, | |
| 1.145 | |
| ], | |
| [ | |
| 475, | |
| 1.1874 | |
| ], | |
| [ | |
| 480, | |
| 1.0495 | |
| ], | |
| [ | |
| 485, | |
| 1.0664 | |
| ], | |
| [ | |
| 490, | |
| 1.2633 | |
| ], | |
| [ | |
| 495, | |
| 1.3179 | |
| ], | |
| [ | |
| 500, | |
| 1.2056 | |
| ], | |
| [ | |
| 505, | |
| 1.1259 | |
| ], | |
| [ | |
| 510, | |
| 1.2683 | |
| ], | |
| [ | |
| 515, | |
| 1.0229 | |
| ], | |
| [ | |
| 520, | |
| 1.2201 | |
| ], | |
| [ | |
| 525, | |
| 1.1291 | |
| ], | |
| [ | |
| 530, | |
| 1.1688 | |
| ], | |
| [ | |
| 535, | |
| 1.2019 | |
| ], | |
| [ | |
| 540, | |
| 1.1733 | |
| ], | |
| [ | |
| 545, | |
| 1.0913 | |
| ], | |
| [ | |
| 550, | |
| 1.2309 | |
| ], | |
| [ | |
| 555, | |
| 1.315 | |
| ], | |
| [ | |
| 560, | |
| 1.1578 | |
| ], | |
| [ | |
| 565, | |
| 1.2228 | |
| ], | |
| [ | |
| 570, | |
| 0.9872 | |
| ], | |
| [ | |
| 575, | |
| 1.1328 | |
| ], | |
| [ | |
| 580, | |
| 1.1709 | |
| ], | |
| [ | |
| 585, | |
| 1.1675 | |
| ], | |
| [ | |
| 590, | |
| 1.3192 | |
| ], | |
| [ | |
| 595, | |
| 1.0701 | |
| ], | |
| [ | |
| 600, | |
| 1.1484 | |
| ], | |
| [ | |
| 605, | |
| 0.9753 | |
| ], | |
| [ | |
| 610, | |
| 1.2143 | |
| ], | |
| [ | |
| 615, | |
| 1.238 | |
| ], | |
| [ | |
| 620, | |
| 1.149 | |
| ], | |
| [ | |
| 625, | |
| 1.1859 | |
| ], | |
| [ | |
| 630, | |
| 1.225 | |
| ], | |
| [ | |
| 635, | |
| 1.1635 | |
| ], | |
| [ | |
| 640, | |
| 1.2214 | |
| ], | |
| [ | |
| 645, | |
| 1.1366 | |
| ], | |
| [ | |
| 650, | |
| 1.3328 | |
| ], | |
| [ | |
| 655, | |
| 1.1442 | |
| ], | |
| [ | |
| 660, | |
| 1.0937 | |
| ], | |
| [ | |
| 665, | |
| 1.1968 | |
| ], | |
| [ | |
| 670, | |
| 1.2908 | |
| ], | |
| [ | |
| 675, | |
| 1.2003 | |
| ], | |
| [ | |
| 680, | |
| 1.0809 | |
| ], | |
| [ | |
| 685, | |
| 1.2908 | |
| ], | |
| [ | |
| 690, | |
| 1.241 | |
| ] | |
| ], | |
| "eval": [ | |
| [ | |
| 40, | |
| 2.1869590282440186 | |
| ], | |
| [ | |
| 80, | |
| 1.8702703714370728 | |
| ], | |
| [ | |
| 120, | |
| 1.5918081998825073 | |
| ], | |
| [ | |
| 160, | |
| 1.460960865020752 | |
| ], | |
| [ | |
| 200, | |
| 1.3858165740966797 | |
| ], | |
| [ | |
| 240, | |
| 1.3407992124557495 | |
| ], | |
| [ | |
| 280, | |
| 1.3039580583572388 | |
| ], | |
| [ | |
| 320, | |
| 1.2727303504943848 | |
| ], | |
| [ | |
| 360, | |
| 1.245557188987732 | |
| ], | |
| [ | |
| 400, | |
| 1.2276334762573242 | |
| ], | |
| [ | |
| 440, | |
| 1.213688850402832 | |
| ], | |
| [ | |
| 480, | |
| 1.2049111127853394 | |
| ], | |
| [ | |
| 520, | |
| 1.1985464096069336 | |
| ], | |
| [ | |
| 560, | |
| 1.1946879625320435 | |
| ], | |
| [ | |
| 600, | |
| 1.1919087171554565 | |
| ], | |
| [ | |
| 640, | |
| 1.1909754276275635 | |
| ], | |
| [ | |
| 680, | |
| 1.1907687187194824 | |
| ] | |
| ] | |
| } | |
| } |