| { | |
| "model_base": "Qwen/Qwen2.5-7B-Instruct", | |
| "model_name": "RegTech-7B-Instruct", | |
| "dataset": "./train.jsonl", | |
| "env_file": "/home/ubuntu/sophia-core-server/.tuning/.env.7B", | |
| "train_samples": 923, | |
| "eval_samples": 102, | |
| "params": { | |
| "rank": 16, | |
| "alpha": 32, | |
| "dropout": 0.1, | |
| "lr": 5e-06, | |
| "scheduler": "cosine", | |
| "epochs": 3, | |
| "effective_batch": 4, | |
| "max_seq_length": 4096, | |
| "neftune_alpha": 5.0, | |
| "target_modules": [ | |
| "q_proj", | |
| "k_proj", | |
| "v_proj", | |
| "o_proj", | |
| "gate_proj", | |
| "up_proj", | |
| "down_proj" | |
| ] | |
| }, | |
| "results": { | |
| "total_steps": 693, | |
| "final_train_loss": 1.2468, | |
| "best_eval_loss": 1.330103874206543, | |
| "best_eval_step": 680, | |
| "elapsed_minutes": 13.2 | |
| }, | |
| "loss_history": { | |
| "train": [ | |
| [ | |
| 5, | |
| 1.8757 | |
| ], | |
| [ | |
| 10, | |
| 2.0909 | |
| ], | |
| [ | |
| 15, | |
| 2.2852 | |
| ], | |
| [ | |
| 20, | |
| 2.0089 | |
| ], | |
| [ | |
| 25, | |
| 2.2477 | |
| ], | |
| [ | |
| 30, | |
| 2.1666 | |
| ], | |
| [ | |
| 35, | |
| 2.067 | |
| ], | |
| [ | |
| 40, | |
| 1.9636 | |
| ], | |
| [ | |
| 45, | |
| 2.033 | |
| ], | |
| [ | |
| 50, | |
| 1.9022 | |
| ], | |
| [ | |
| 55, | |
| 1.5956 | |
| ], | |
| [ | |
| 60, | |
| 1.9338 | |
| ], | |
| [ | |
| 65, | |
| 1.7406 | |
| ], | |
| [ | |
| 70, | |
| 1.9361 | |
| ], | |
| [ | |
| 75, | |
| 1.8629 | |
| ], | |
| [ | |
| 80, | |
| 1.7614 | |
| ], | |
| [ | |
| 85, | |
| 1.6929 | |
| ], | |
| [ | |
| 90, | |
| 1.5479 | |
| ], | |
| [ | |
| 95, | |
| 1.7109 | |
| ], | |
| [ | |
| 100, | |
| 1.711 | |
| ], | |
| [ | |
| 105, | |
| 1.6211 | |
| ], | |
| [ | |
| 110, | |
| 1.5504 | |
| ], | |
| [ | |
| 115, | |
| 1.6812 | |
| ], | |
| [ | |
| 120, | |
| 1.448 | |
| ], | |
| [ | |
| 125, | |
| 1.3824 | |
| ], | |
| [ | |
| 130, | |
| 1.8145 | |
| ], | |
| [ | |
| 135, | |
| 1.3646 | |
| ], | |
| [ | |
| 140, | |
| 1.7156 | |
| ], | |
| [ | |
| 145, | |
| 1.506 | |
| ], | |
| [ | |
| 150, | |
| 1.5769 | |
| ], | |
| [ | |
| 155, | |
| 1.6277 | |
| ], | |
| [ | |
| 160, | |
| 1.4456 | |
| ], | |
| [ | |
| 165, | |
| 1.6294 | |
| ], | |
| [ | |
| 170, | |
| 1.5673 | |
| ], | |
| [ | |
| 175, | |
| 1.2307 | |
| ], | |
| [ | |
| 180, | |
| 1.3767 | |
| ], | |
| [ | |
| 185, | |
| 1.5116 | |
| ], | |
| [ | |
| 190, | |
| 1.4807 | |
| ], | |
| [ | |
| 195, | |
| 1.448 | |
| ], | |
| [ | |
| 200, | |
| 1.1518 | |
| ], | |
| [ | |
| 205, | |
| 1.4285 | |
| ], | |
| [ | |
| 210, | |
| 1.3302 | |
| ], | |
| [ | |
| 215, | |
| 1.37 | |
| ], | |
| [ | |
| 220, | |
| 1.2196 | |
| ], | |
| [ | |
| 225, | |
| 1.359 | |
| ], | |
| [ | |
| 230, | |
| 1.3188 | |
| ], | |
| [ | |
| 235, | |
| 1.5809 | |
| ], | |
| [ | |
| 240, | |
| 1.3002 | |
| ], | |
| [ | |
| 245, | |
| 1.3007 | |
| ], | |
| [ | |
| 250, | |
| 1.1849 | |
| ], | |
| [ | |
| 255, | |
| 1.2327 | |
| ], | |
| [ | |
| 260, | |
| 1.3838 | |
| ], | |
| [ | |
| 265, | |
| 1.4939 | |
| ], | |
| [ | |
| 270, | |
| 1.2799 | |
| ], | |
| [ | |
| 275, | |
| 1.2842 | |
| ], | |
| [ | |
| 280, | |
| 1.4464 | |
| ], | |
| [ | |
| 285, | |
| 1.4758 | |
| ], | |
| [ | |
| 290, | |
| 1.3162 | |
| ], | |
| [ | |
| 295, | |
| 1.2075 | |
| ], | |
| [ | |
| 300, | |
| 1.1628 | |
| ], | |
| [ | |
| 305, | |
| 1.4353 | |
| ], | |
| [ | |
| 310, | |
| 1.2514 | |
| ], | |
| [ | |
| 315, | |
| 1.3283 | |
| ], | |
| [ | |
| 320, | |
| 1.235 | |
| ], | |
| [ | |
| 325, | |
| 1.342 | |
| ], | |
| [ | |
| 330, | |
| 1.2538 | |
| ], | |
| [ | |
| 335, | |
| 1.2831 | |
| ], | |
| [ | |
| 340, | |
| 1.233 | |
| ], | |
| [ | |
| 345, | |
| 1.4519 | |
| ], | |
| [ | |
| 350, | |
| 1.1003 | |
| ], | |
| [ | |
| 355, | |
| 1.2341 | |
| ], | |
| [ | |
| 360, | |
| 1.3318 | |
| ], | |
| [ | |
| 365, | |
| 1.3949 | |
| ], | |
| [ | |
| 370, | |
| 1.3434 | |
| ], | |
| [ | |
| 375, | |
| 1.205 | |
| ], | |
| [ | |
| 380, | |
| 1.0517 | |
| ], | |
| [ | |
| 385, | |
| 1.1666 | |
| ], | |
| [ | |
| 390, | |
| 1.1897 | |
| ], | |
| [ | |
| 395, | |
| 1.1931 | |
| ], | |
| [ | |
| 400, | |
| 1.3111 | |
| ], | |
| [ | |
| 405, | |
| 1.2772 | |
| ], | |
| [ | |
| 410, | |
| 1.3447 | |
| ], | |
| [ | |
| 415, | |
| 1.2165 | |
| ], | |
| [ | |
| 420, | |
| 1.2571 | |
| ], | |
| [ | |
| 425, | |
| 1.1914 | |
| ], | |
| [ | |
| 430, | |
| 1.4423 | |
| ], | |
| [ | |
| 435, | |
| 1.2113 | |
| ], | |
| [ | |
| 440, | |
| 1.2164 | |
| ], | |
| [ | |
| 445, | |
| 1.0846 | |
| ], | |
| [ | |
| 450, | |
| 1.1764 | |
| ], | |
| [ | |
| 455, | |
| 1.2395 | |
| ], | |
| [ | |
| 460, | |
| 1.0939 | |
| ], | |
| [ | |
| 465, | |
| 1.4491 | |
| ], | |
| [ | |
| 470, | |
| 1.1819 | |
| ], | |
| [ | |
| 475, | |
| 1.22 | |
| ], | |
| [ | |
| 480, | |
| 1.0822 | |
| ], | |
| [ | |
| 485, | |
| 1.0775 | |
| ], | |
| [ | |
| 490, | |
| 1.2643 | |
| ], | |
| [ | |
| 495, | |
| 1.3388 | |
| ], | |
| [ | |
| 500, | |
| 1.2206 | |
| ], | |
| [ | |
| 505, | |
| 1.1611 | |
| ], | |
| [ | |
| 510, | |
| 1.2877 | |
| ], | |
| [ | |
| 515, | |
| 1.0433 | |
| ], | |
| [ | |
| 520, | |
| 1.2022 | |
| ], | |
| [ | |
| 525, | |
| 1.1367 | |
| ], | |
| [ | |
| 530, | |
| 1.1948 | |
| ], | |
| [ | |
| 535, | |
| 1.2277 | |
| ], | |
| [ | |
| 540, | |
| 1.2089 | |
| ], | |
| [ | |
| 545, | |
| 1.0989 | |
| ], | |
| [ | |
| 550, | |
| 1.2395 | |
| ], | |
| [ | |
| 555, | |
| 1.3229 | |
| ], | |
| [ | |
| 560, | |
| 1.1935 | |
| ], | |
| [ | |
| 565, | |
| 1.2408 | |
| ], | |
| [ | |
| 570, | |
| 1.0056 | |
| ], | |
| [ | |
| 575, | |
| 1.1342 | |
| ], | |
| [ | |
| 580, | |
| 1.1848 | |
| ], | |
| [ | |
| 585, | |
| 1.1714 | |
| ], | |
| [ | |
| 590, | |
| 1.3105 | |
| ], | |
| [ | |
| 595, | |
| 1.0947 | |
| ], | |
| [ | |
| 600, | |
| 1.1644 | |
| ], | |
| [ | |
| 605, | |
| 0.9748 | |
| ], | |
| [ | |
| 610, | |
| 1.2325 | |
| ], | |
| [ | |
| 615, | |
| 1.2575 | |
| ], | |
| [ | |
| 620, | |
| 1.1819 | |
| ], | |
| [ | |
| 625, | |
| 1.1815 | |
| ], | |
| [ | |
| 630, | |
| 1.2208 | |
| ], | |
| [ | |
| 635, | |
| 1.17 | |
| ], | |
| [ | |
| 640, | |
| 1.2175 | |
| ], | |
| [ | |
| 645, | |
| 1.1482 | |
| ], | |
| [ | |
| 650, | |
| 1.3592 | |
| ], | |
| [ | |
| 655, | |
| 1.15 | |
| ], | |
| [ | |
| 660, | |
| 1.0846 | |
| ], | |
| [ | |
| 665, | |
| 1.2221 | |
| ], | |
| [ | |
| 670, | |
| 1.3214 | |
| ], | |
| [ | |
| 675, | |
| 1.2098 | |
| ], | |
| [ | |
| 680, | |
| 1.1029 | |
| ], | |
| [ | |
| 685, | |
| 1.291 | |
| ], | |
| [ | |
| 690, | |
| 1.2468 | |
| ] | |
| ], | |
| "eval": [ | |
| [ | |
| 40, | |
| 2.1986565589904785 | |
| ], | |
| [ | |
| 80, | |
| 1.9078620672225952 | |
| ], | |
| [ | |
| 120, | |
| 1.7110655307769775 | |
| ], | |
| [ | |
| 160, | |
| 1.6229565143585205 | |
| ], | |
| [ | |
| 200, | |
| 1.5677918195724487 | |
| ], | |
| [ | |
| 240, | |
| 1.5240660905838013 | |
| ], | |
| [ | |
| 280, | |
| 1.483543038368225 | |
| ], | |
| [ | |
| 320, | |
| 1.447533369064331 | |
| ], | |
| [ | |
| 360, | |
| 1.4138736724853516 | |
| ], | |
| [ | |
| 400, | |
| 1.3881839513778687 | |
| ], | |
| [ | |
| 440, | |
| 1.3691993951797485 | |
| ], | |
| [ | |
| 480, | |
| 1.3529773950576782 | |
| ], | |
| [ | |
| 520, | |
| 1.343029499053955 | |
| ], | |
| [ | |
| 560, | |
| 1.3368796110153198 | |
| ], | |
| [ | |
| 600, | |
| 1.3322473764419556 | |
| ], | |
| [ | |
| 640, | |
| 1.3307619094848633 | |
| ], | |
| [ | |
| 680, | |
| 1.330103874206543 | |
| ] | |
| ] | |
| } | |
| } |