| { | |
| "model_base": "Qwen/Qwen2.5-32B-Instruct", | |
| "model_name": "RegTech-32B-Instruct", | |
| "dataset": "./train.jsonl", | |
| "env_file": "/home/ubuntu/sophia-core-server/.tuning/.env.32B", | |
| "train_samples": 923, | |
| "eval_samples": 102, | |
| "params": { | |
| "rank": 16, | |
| "alpha": 32, | |
| "dropout": 0.1, | |
| "lr": 1e-05, | |
| "scheduler": "cosine", | |
| "epochs": 3, | |
| "effective_batch": 4, | |
| "max_seq_length": 4096, | |
| "neftune_alpha": 5.0, | |
| "target_modules": [ | |
| "q_proj", | |
| "k_proj", | |
| "v_proj", | |
| "o_proj", | |
| "gate_proj", | |
| "up_proj", | |
| "down_proj" | |
| ] | |
| }, | |
| "results": { | |
| "total_steps": 693, | |
| "final_train_loss": 0.8432, | |
| "best_eval_loss": 0.8133148550987244, | |
| "best_eval_step": 640, | |
| "elapsed_minutes": 40.0 | |
| }, | |
| "loss_history": { | |
| "train": [ | |
| [ | |
| 5, | |
| 1.7606 | |
| ], | |
| [ | |
| 10, | |
| 1.8257 | |
| ], | |
| [ | |
| 15, | |
| 2.0366 | |
| ], | |
| [ | |
| 20, | |
| 1.8063 | |
| ], | |
| [ | |
| 25, | |
| 1.9763 | |
| ], | |
| [ | |
| 30, | |
| 1.9014 | |
| ], | |
| [ | |
| 35, | |
| 1.8005 | |
| ], | |
| [ | |
| 40, | |
| 1.6563 | |
| ], | |
| [ | |
| 45, | |
| 1.6997 | |
| ], | |
| [ | |
| 50, | |
| 1.5501 | |
| ], | |
| [ | |
| 55, | |
| 1.273 | |
| ], | |
| [ | |
| 60, | |
| 1.5863 | |
| ], | |
| [ | |
| 65, | |
| 1.3741 | |
| ], | |
| [ | |
| 70, | |
| 1.5504 | |
| ], | |
| [ | |
| 75, | |
| 1.5004 | |
| ], | |
| [ | |
| 80, | |
| 1.3517 | |
| ], | |
| [ | |
| 85, | |
| 1.3193 | |
| ], | |
| [ | |
| 90, | |
| 1.2519 | |
| ], | |
| [ | |
| 95, | |
| 1.4066 | |
| ], | |
| [ | |
| 100, | |
| 1.3787 | |
| ], | |
| [ | |
| 105, | |
| 1.317 | |
| ], | |
| [ | |
| 110, | |
| 1.273 | |
| ], | |
| [ | |
| 115, | |
| 1.366 | |
| ], | |
| [ | |
| 120, | |
| 1.1712 | |
| ], | |
| [ | |
| 125, | |
| 1.1309 | |
| ], | |
| [ | |
| 130, | |
| 1.5067 | |
| ], | |
| [ | |
| 135, | |
| 1.1212 | |
| ], | |
| [ | |
| 140, | |
| 1.4059 | |
| ], | |
| [ | |
| 145, | |
| 1.2398 | |
| ], | |
| [ | |
| 150, | |
| 1.2904 | |
| ], | |
| [ | |
| 155, | |
| 1.3055 | |
| ], | |
| [ | |
| 160, | |
| 1.1672 | |
| ], | |
| [ | |
| 165, | |
| 1.2585 | |
| ], | |
| [ | |
| 170, | |
| 1.252 | |
| ], | |
| [ | |
| 175, | |
| 0.9654 | |
| ], | |
| [ | |
| 180, | |
| 1.1089 | |
| ], | |
| [ | |
| 185, | |
| 1.1518 | |
| ], | |
| [ | |
| 190, | |
| 1.1216 | |
| ], | |
| [ | |
| 195, | |
| 1.1329 | |
| ], | |
| [ | |
| 200, | |
| 0.9045 | |
| ], | |
| [ | |
| 205, | |
| 1.0882 | |
| ], | |
| [ | |
| 210, | |
| 1.0312 | |
| ], | |
| [ | |
| 215, | |
| 0.9879 | |
| ], | |
| [ | |
| 220, | |
| 0.9716 | |
| ], | |
| [ | |
| 225, | |
| 1.0379 | |
| ], | |
| [ | |
| 230, | |
| 1.0032 | |
| ], | |
| [ | |
| 235, | |
| 1.1349 | |
| ], | |
| [ | |
| 240, | |
| 0.9745 | |
| ], | |
| [ | |
| 245, | |
| 0.9717 | |
| ], | |
| [ | |
| 250, | |
| 0.8808 | |
| ], | |
| [ | |
| 255, | |
| 0.9055 | |
| ], | |
| [ | |
| 260, | |
| 1.0285 | |
| ], | |
| [ | |
| 265, | |
| 1.0887 | |
| ], | |
| [ | |
| 270, | |
| 0.8846 | |
| ], | |
| [ | |
| 275, | |
| 0.9121 | |
| ], | |
| [ | |
| 280, | |
| 1.032 | |
| ], | |
| [ | |
| 285, | |
| 1.043 | |
| ], | |
| [ | |
| 290, | |
| 0.9623 | |
| ], | |
| [ | |
| 295, | |
| 0.8949 | |
| ], | |
| [ | |
| 300, | |
| 0.8456 | |
| ], | |
| [ | |
| 305, | |
| 0.8745 | |
| ], | |
| [ | |
| 310, | |
| 0.94 | |
| ], | |
| [ | |
| 315, | |
| 0.8265 | |
| ], | |
| [ | |
| 320, | |
| 0.9407 | |
| ], | |
| [ | |
| 325, | |
| 0.9309 | |
| ], | |
| [ | |
| 330, | |
| 0.8971 | |
| ], | |
| [ | |
| 335, | |
| 0.8191 | |
| ], | |
| [ | |
| 340, | |
| 0.7918 | |
| ], | |
| [ | |
| 345, | |
| 0.9414 | |
| ], | |
| [ | |
| 350, | |
| 0.783 | |
| ], | |
| [ | |
| 355, | |
| 0.8103 | |
| ], | |
| [ | |
| 360, | |
| 0.8443 | |
| ], | |
| [ | |
| 365, | |
| 0.9374 | |
| ], | |
| [ | |
| 370, | |
| 0.9564 | |
| ], | |
| [ | |
| 375, | |
| 0.8063 | |
| ], | |
| [ | |
| 380, | |
| 0.7323 | |
| ], | |
| [ | |
| 385, | |
| 0.826 | |
| ], | |
| [ | |
| 390, | |
| 0.8682 | |
| ], | |
| [ | |
| 395, | |
| 0.8559 | |
| ], | |
| [ | |
| 400, | |
| 0.9216 | |
| ], | |
| [ | |
| 405, | |
| 0.8368 | |
| ], | |
| [ | |
| 410, | |
| 0.9346 | |
| ], | |
| [ | |
| 415, | |
| 0.8632 | |
| ], | |
| [ | |
| 420, | |
| 0.8698 | |
| ], | |
| [ | |
| 425, | |
| 0.7973 | |
| ], | |
| [ | |
| 430, | |
| 0.8807 | |
| ], | |
| [ | |
| 435, | |
| 0.7736 | |
| ], | |
| [ | |
| 440, | |
| 0.7795 | |
| ], | |
| [ | |
| 445, | |
| 0.768 | |
| ], | |
| [ | |
| 450, | |
| 0.8426 | |
| ], | |
| [ | |
| 455, | |
| 0.8065 | |
| ], | |
| [ | |
| 460, | |
| 0.7986 | |
| ], | |
| [ | |
| 465, | |
| 1.0072 | |
| ], | |
| [ | |
| 470, | |
| 0.7926 | |
| ], | |
| [ | |
| 475, | |
| 0.7541 | |
| ], | |
| [ | |
| 480, | |
| 0.7158 | |
| ], | |
| [ | |
| 485, | |
| 0.7268 | |
| ], | |
| [ | |
| 490, | |
| 0.793 | |
| ], | |
| [ | |
| 495, | |
| 0.9075 | |
| ], | |
| [ | |
| 500, | |
| 0.8299 | |
| ], | |
| [ | |
| 505, | |
| 0.8684 | |
| ], | |
| [ | |
| 510, | |
| 0.8688 | |
| ], | |
| [ | |
| 515, | |
| 0.6847 | |
| ], | |
| [ | |
| 520, | |
| 0.7814 | |
| ], | |
| [ | |
| 525, | |
| 0.7811 | |
| ], | |
| [ | |
| 530, | |
| 0.7919 | |
| ], | |
| [ | |
| 535, | |
| 0.8829 | |
| ], | |
| [ | |
| 540, | |
| 0.7775 | |
| ], | |
| [ | |
| 545, | |
| 0.7112 | |
| ], | |
| [ | |
| 550, | |
| 0.7917 | |
| ], | |
| [ | |
| 555, | |
| 0.769 | |
| ], | |
| [ | |
| 560, | |
| 0.7328 | |
| ], | |
| [ | |
| 565, | |
| 0.7695 | |
| ], | |
| [ | |
| 570, | |
| 0.5831 | |
| ], | |
| [ | |
| 575, | |
| 0.7399 | |
| ], | |
| [ | |
| 580, | |
| 0.8321 | |
| ], | |
| [ | |
| 585, | |
| 0.8167 | |
| ], | |
| [ | |
| 590, | |
| 0.832 | |
| ], | |
| [ | |
| 595, | |
| 0.7446 | |
| ], | |
| [ | |
| 600, | |
| 0.698 | |
| ], | |
| [ | |
| 605, | |
| 0.5891 | |
| ], | |
| [ | |
| 610, | |
| 0.8106 | |
| ], | |
| [ | |
| 615, | |
| 0.7455 | |
| ], | |
| [ | |
| 620, | |
| 0.7738 | |
| ], | |
| [ | |
| 625, | |
| 0.7939 | |
| ], | |
| [ | |
| 630, | |
| 0.8003 | |
| ], | |
| [ | |
| 635, | |
| 0.7349 | |
| ], | |
| [ | |
| 640, | |
| 0.7863 | |
| ], | |
| [ | |
| 645, | |
| 0.7968 | |
| ], | |
| [ | |
| 650, | |
| 0.8229 | |
| ], | |
| [ | |
| 655, | |
| 0.7529 | |
| ], | |
| [ | |
| 660, | |
| 0.6354 | |
| ], | |
| [ | |
| 665, | |
| 0.8064 | |
| ], | |
| [ | |
| 670, | |
| 0.9099 | |
| ], | |
| [ | |
| 675, | |
| 0.8156 | |
| ], | |
| [ | |
| 680, | |
| 0.7462 | |
| ], | |
| [ | |
| 685, | |
| 0.907 | |
| ], | |
| [ | |
| 690, | |
| 0.8432 | |
| ] | |
| ], | |
| "eval": [ | |
| [ | |
| 40, | |
| 2.072058916091919 | |
| ], | |
| [ | |
| 80, | |
| 1.625858187675476 | |
| ], | |
| [ | |
| 120, | |
| 1.4893486499786377 | |
| ], | |
| [ | |
| 160, | |
| 1.359459400177002 | |
| ], | |
| [ | |
| 200, | |
| 1.2217525243759155 | |
| ], | |
| [ | |
| 240, | |
| 1.113280177116394 | |
| ], | |
| [ | |
| 280, | |
| 1.0186439752578735 | |
| ], | |
| [ | |
| 320, | |
| 0.937186598777771 | |
| ], | |
| [ | |
| 360, | |
| 0.8894771337509155 | |
| ], | |
| [ | |
| 400, | |
| 0.862065315246582 | |
| ], | |
| [ | |
| 440, | |
| 0.8410201072692871 | |
| ], | |
| [ | |
| 480, | |
| 0.8306860327720642 | |
| ], | |
| [ | |
| 520, | |
| 0.8223150372505188 | |
| ], | |
| [ | |
| 560, | |
| 0.8172081112861633 | |
| ], | |
| [ | |
| 600, | |
| 0.8150315880775452 | |
| ], | |
| [ | |
| 640, | |
| 0.8133148550987244 | |
| ], | |
| [ | |
| 680, | |
| 0.8135352730751038 | |
| ] | |
| ] | |
| } | |
| } |