| { |
| "best_global_step": 244, |
| "best_metric": 0.34599506855010986, |
| "best_model_checkpoint": "./lora_qwen7b_java_ab_v1/checkpoint-244", |
| "epoch": 3.0, |
| "eval_steps": 500, |
| "global_step": 732, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.041109969167523124, |
| "grad_norm": 0.23909567296504974, |
| "learning_rate": 3.648648648648649e-06, |
| "loss": 0.6695, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.08221993833504625, |
| "grad_norm": 0.19790810346603394, |
| "learning_rate": 7.702702702702703e-06, |
| "loss": 0.6695, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.12332990750256938, |
| "grad_norm": 0.1794859915971756, |
| "learning_rate": 1.1756756756756757e-05, |
| "loss": 0.6502, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.1644398766700925, |
| "grad_norm": 0.2528444528579712, |
| "learning_rate": 1.5810810810810808e-05, |
| "loss": 0.6094, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.20554984583761562, |
| "grad_norm": 0.30964621901512146, |
| "learning_rate": 1.9864864864864866e-05, |
| "loss": 0.5216, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.24665981500513876, |
| "grad_norm": 0.13139910995960236, |
| "learning_rate": 2.3918918918918917e-05, |
| "loss": 0.4225, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.28776978417266186, |
| "grad_norm": 0.09227403998374939, |
| "learning_rate": 2.7972972972972975e-05, |
| "loss": 0.3913, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.328879753340185, |
| "grad_norm": 0.08170370757579803, |
| "learning_rate": 2.9772036474164135e-05, |
| "loss": 0.3778, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.3699897225077081, |
| "grad_norm": 0.07981819659471512, |
| "learning_rate": 2.9316109422492404e-05, |
| "loss": 0.3634, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.41109969167523125, |
| "grad_norm": 0.06885957717895508, |
| "learning_rate": 2.886018237082067e-05, |
| "loss": 0.3447, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.4522096608427544, |
| "grad_norm": 0.08964607119560242, |
| "learning_rate": 2.8404255319148935e-05, |
| "loss": 0.3475, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.4933196300102775, |
| "grad_norm": 0.08145549893379211, |
| "learning_rate": 2.7948328267477204e-05, |
| "loss": 0.3471, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.5344295991778006, |
| "grad_norm": 0.07885731011629105, |
| "learning_rate": 2.7492401215805473e-05, |
| "loss": 0.3376, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.5755395683453237, |
| "grad_norm": 0.10354865342378616, |
| "learning_rate": 2.7036474164133738e-05, |
| "loss": 0.3311, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.6166495375128469, |
| "grad_norm": 0.08575163781642914, |
| "learning_rate": 2.6580547112462007e-05, |
| "loss": 0.3269, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.65775950668037, |
| "grad_norm": 0.12186736613512039, |
| "learning_rate": 2.6124620060790272e-05, |
| "loss": 0.3217, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.6988694758478932, |
| "grad_norm": 0.10888725519180298, |
| "learning_rate": 2.566869300911854e-05, |
| "loss": 0.3098, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.7399794450154162, |
| "grad_norm": 0.10800354182720184, |
| "learning_rate": 2.521276595744681e-05, |
| "loss": 0.304, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.7810894141829393, |
| "grad_norm": 0.11221329867839813, |
| "learning_rate": 2.4756838905775076e-05, |
| "loss": 0.3146, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.8221993833504625, |
| "grad_norm": 0.14716410636901855, |
| "learning_rate": 2.4300911854103345e-05, |
| "loss": 0.3093, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.8633093525179856, |
| "grad_norm": 0.12797018885612488, |
| "learning_rate": 2.3844984802431613e-05, |
| "loss": 0.2895, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.9044193216855088, |
| "grad_norm": 0.15831203758716583, |
| "learning_rate": 2.338905775075988e-05, |
| "loss": 0.3014, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.9455292908530318, |
| "grad_norm": 0.14818865060806274, |
| "learning_rate": 2.2933130699088144e-05, |
| "loss": 0.2765, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.986639260020555, |
| "grad_norm": 0.1700122207403183, |
| "learning_rate": 2.2477203647416413e-05, |
| "loss": 0.2776, |
| "step": 240 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_loss": 0.34599506855010986, |
| "eval_runtime": 41.9519, |
| "eval_samples_per_second": 15.542, |
| "eval_steps_per_second": 7.771, |
| "step": 244 |
| }, |
| { |
| "epoch": 1.024665981500514, |
| "grad_norm": 0.17512771487236023, |
| "learning_rate": 2.2021276595744682e-05, |
| "loss": 0.2825, |
| "step": 250 |
| }, |
| { |
| "epoch": 1.065775950668037, |
| "grad_norm": 0.16911087930202484, |
| "learning_rate": 2.156534954407295e-05, |
| "loss": 0.2623, |
| "step": 260 |
| }, |
| { |
| "epoch": 1.10688591983556, |
| "grad_norm": 0.2555592656135559, |
| "learning_rate": 2.1109422492401216e-05, |
| "loss": 0.2553, |
| "step": 270 |
| }, |
| { |
| "epoch": 1.1479958890030832, |
| "grad_norm": 0.248292475938797, |
| "learning_rate": 2.0653495440729482e-05, |
| "loss": 0.2346, |
| "step": 280 |
| }, |
| { |
| "epoch": 1.1891058581706064, |
| "grad_norm": 0.33669453859329224, |
| "learning_rate": 2.019756838905775e-05, |
| "loss": 0.2298, |
| "step": 290 |
| }, |
| { |
| "epoch": 1.2302158273381294, |
| "grad_norm": 0.2764478921890259, |
| "learning_rate": 1.974164133738602e-05, |
| "loss": 0.2368, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.2713257965056526, |
| "grad_norm": 0.2844967842102051, |
| "learning_rate": 1.928571428571429e-05, |
| "loss": 0.2325, |
| "step": 310 |
| }, |
| { |
| "epoch": 1.3124357656731758, |
| "grad_norm": 0.38718295097351074, |
| "learning_rate": 1.8829787234042554e-05, |
| "loss": 0.2168, |
| "step": 320 |
| }, |
| { |
| "epoch": 1.353545734840699, |
| "grad_norm": 0.4138449430465698, |
| "learning_rate": 1.837386018237082e-05, |
| "loss": 0.2059, |
| "step": 330 |
| }, |
| { |
| "epoch": 1.394655704008222, |
| "grad_norm": 0.3654138445854187, |
| "learning_rate": 1.7917933130699088e-05, |
| "loss": 0.2075, |
| "step": 340 |
| }, |
| { |
| "epoch": 1.435765673175745, |
| "grad_norm": 0.38186466693878174, |
| "learning_rate": 1.7462006079027357e-05, |
| "loss": 0.1942, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.4768756423432683, |
| "grad_norm": 0.3314580023288727, |
| "learning_rate": 1.7006079027355622e-05, |
| "loss": 0.1847, |
| "step": 360 |
| }, |
| { |
| "epoch": 1.5179856115107913, |
| "grad_norm": 0.36531996726989746, |
| "learning_rate": 1.655015197568389e-05, |
| "loss": 0.1844, |
| "step": 370 |
| }, |
| { |
| "epoch": 1.5590955806783144, |
| "grad_norm": 0.43975991010665894, |
| "learning_rate": 1.609422492401216e-05, |
| "loss": 0.1717, |
| "step": 380 |
| }, |
| { |
| "epoch": 1.6002055498458376, |
| "grad_norm": 0.5350055694580078, |
| "learning_rate": 1.5638297872340426e-05, |
| "loss": 0.1771, |
| "step": 390 |
| }, |
| { |
| "epoch": 1.6413155190133608, |
| "grad_norm": 0.6035364270210266, |
| "learning_rate": 1.5182370820668691e-05, |
| "loss": 0.1659, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.682425488180884, |
| "grad_norm": 0.45260411500930786, |
| "learning_rate": 1.4726443768996962e-05, |
| "loss": 0.1457, |
| "step": 410 |
| }, |
| { |
| "epoch": 1.723535457348407, |
| "grad_norm": 0.5686559677124023, |
| "learning_rate": 1.4270516717325229e-05, |
| "loss": 0.1627, |
| "step": 420 |
| }, |
| { |
| "epoch": 1.7646454265159301, |
| "grad_norm": 0.5539414882659912, |
| "learning_rate": 1.3814589665653496e-05, |
| "loss": 0.1351, |
| "step": 430 |
| }, |
| { |
| "epoch": 1.8057553956834531, |
| "grad_norm": 0.6315564513206482, |
| "learning_rate": 1.3358662613981763e-05, |
| "loss": 0.1581, |
| "step": 440 |
| }, |
| { |
| "epoch": 1.8468653648509763, |
| "grad_norm": 0.5697396397590637, |
| "learning_rate": 1.2902735562310032e-05, |
| "loss": 0.1412, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.8879753340184995, |
| "grad_norm": 0.5553473830223083, |
| "learning_rate": 1.2446808510638298e-05, |
| "loss": 0.1204, |
| "step": 460 |
| }, |
| { |
| "epoch": 1.9290853031860227, |
| "grad_norm": 0.5264490246772766, |
| "learning_rate": 1.1990881458966566e-05, |
| "loss": 0.1371, |
| "step": 470 |
| }, |
| { |
| "epoch": 1.9701952723535459, |
| "grad_norm": 0.5465613007545471, |
| "learning_rate": 1.1534954407294832e-05, |
| "loss": 0.1311, |
| "step": 480 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_loss": 0.4124738574028015, |
| "eval_runtime": 41.761, |
| "eval_samples_per_second": 15.613, |
| "eval_steps_per_second": 7.806, |
| "step": 488 |
| }, |
| { |
| "epoch": 2.0082219938335046, |
| "grad_norm": 0.5448176860809326, |
| "learning_rate": 1.10790273556231e-05, |
| "loss": 0.1197, |
| "step": 490 |
| }, |
| { |
| "epoch": 2.049331963001028, |
| "grad_norm": 0.48690664768218994, |
| "learning_rate": 1.0623100303951368e-05, |
| "loss": 0.122, |
| "step": 500 |
| }, |
| { |
| "epoch": 2.090441932168551, |
| "grad_norm": 0.5225591659545898, |
| "learning_rate": 1.0167173252279635e-05, |
| "loss": 0.0963, |
| "step": 510 |
| }, |
| { |
| "epoch": 2.131551901336074, |
| "grad_norm": 0.4576282799243927, |
| "learning_rate": 9.711246200607902e-06, |
| "loss": 0.1118, |
| "step": 520 |
| }, |
| { |
| "epoch": 2.172661870503597, |
| "grad_norm": 0.5207735300064087, |
| "learning_rate": 9.255319148936171e-06, |
| "loss": 0.1188, |
| "step": 530 |
| }, |
| { |
| "epoch": 2.21377183967112, |
| "grad_norm": 0.753419816493988, |
| "learning_rate": 8.799392097264438e-06, |
| "loss": 0.1031, |
| "step": 540 |
| }, |
| { |
| "epoch": 2.2548818088386433, |
| "grad_norm": 0.5905725359916687, |
| "learning_rate": 8.343465045592705e-06, |
| "loss": 0.1174, |
| "step": 550 |
| }, |
| { |
| "epoch": 2.2959917780061665, |
| "grad_norm": 0.4930141270160675, |
| "learning_rate": 7.887537993920974e-06, |
| "loss": 0.0954, |
| "step": 560 |
| }, |
| { |
| "epoch": 2.3371017471736897, |
| "grad_norm": 0.6282473206520081, |
| "learning_rate": 7.43161094224924e-06, |
| "loss": 0.0983, |
| "step": 570 |
| }, |
| { |
| "epoch": 2.378211716341213, |
| "grad_norm": 0.5251069068908691, |
| "learning_rate": 6.975683890577508e-06, |
| "loss": 0.0958, |
| "step": 580 |
| }, |
| { |
| "epoch": 2.419321685508736, |
| "grad_norm": 0.4728357195854187, |
| "learning_rate": 6.519756838905775e-06, |
| "loss": 0.103, |
| "step": 590 |
| }, |
| { |
| "epoch": 2.460431654676259, |
| "grad_norm": 0.481865257024765, |
| "learning_rate": 6.063829787234042e-06, |
| "loss": 0.0883, |
| "step": 600 |
| }, |
| { |
| "epoch": 2.501541623843782, |
| "grad_norm": 0.4945089519023895, |
| "learning_rate": 5.607902735562311e-06, |
| "loss": 0.0987, |
| "step": 610 |
| }, |
| { |
| "epoch": 2.542651593011305, |
| "grad_norm": 0.5637158751487732, |
| "learning_rate": 5.151975683890578e-06, |
| "loss": 0.085, |
| "step": 620 |
| }, |
| { |
| "epoch": 2.5837615621788284, |
| "grad_norm": 0.5263553261756897, |
| "learning_rate": 4.696048632218845e-06, |
| "loss": 0.0954, |
| "step": 630 |
| }, |
| { |
| "epoch": 2.6248715313463515, |
| "grad_norm": 0.6571027040481567, |
| "learning_rate": 4.240121580547112e-06, |
| "loss": 0.0905, |
| "step": 640 |
| }, |
| { |
| "epoch": 2.6659815005138747, |
| "grad_norm": 0.4254496991634369, |
| "learning_rate": 3.7841945288753804e-06, |
| "loss": 0.0844, |
| "step": 650 |
| }, |
| { |
| "epoch": 2.707091469681398, |
| "grad_norm": 0.3524133861064911, |
| "learning_rate": 3.3282674772036475e-06, |
| "loss": 0.0989, |
| "step": 660 |
| }, |
| { |
| "epoch": 2.7482014388489207, |
| "grad_norm": 0.3972431719303131, |
| "learning_rate": 2.872340425531915e-06, |
| "loss": 0.0802, |
| "step": 670 |
| }, |
| { |
| "epoch": 2.789311408016444, |
| "grad_norm": 0.543141782283783, |
| "learning_rate": 2.4164133738601823e-06, |
| "loss": 0.0926, |
| "step": 680 |
| }, |
| { |
| "epoch": 2.830421377183967, |
| "grad_norm": 0.44491565227508545, |
| "learning_rate": 1.96048632218845e-06, |
| "loss": 0.092, |
| "step": 690 |
| }, |
| { |
| "epoch": 2.87153134635149, |
| "grad_norm": 0.5631129741668701, |
| "learning_rate": 1.5045592705167174e-06, |
| "loss": 0.0902, |
| "step": 700 |
| }, |
| { |
| "epoch": 2.9126413155190134, |
| "grad_norm": 0.44580066204071045, |
| "learning_rate": 1.0486322188449848e-06, |
| "loss": 0.086, |
| "step": 710 |
| }, |
| { |
| "epoch": 2.9537512846865366, |
| "grad_norm": 0.587339460849762, |
| "learning_rate": 5.927051671732523e-07, |
| "loss": 0.082, |
| "step": 720 |
| }, |
| { |
| "epoch": 2.9948612538540598, |
| "grad_norm": 0.43604910373687744, |
| "learning_rate": 1.3677811550151974e-07, |
| "loss": 0.0996, |
| "step": 730 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_loss": 0.4389069080352783, |
| "eval_runtime": 41.7604, |
| "eval_samples_per_second": 15.613, |
| "eval_steps_per_second": 7.806, |
| "step": 732 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 732, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 7.928802006486467e+17, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|