| { |
| "best_global_step": 244, |
| "best_metric": 0.2577258348464966, |
| "best_model_checkpoint": "lora_qwen7b_java_adiff_v2/checkpoint-244", |
| "epoch": 3.0, |
| "eval_steps": 500, |
| "global_step": 732, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.041109969167523124, |
| "grad_norm": 0.19480174779891968, |
| "learning_rate": 3.648648648648649e-06, |
| "loss": 0.5966, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.08221993833504625, |
| "grad_norm": 0.15218280255794525, |
| "learning_rate": 7.702702702702703e-06, |
| "loss": 0.5917, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.12332990750256938, |
| "grad_norm": 0.19138699769973755, |
| "learning_rate": 1.1756756756756757e-05, |
| "loss": 0.566, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.1644398766700925, |
| "grad_norm": 0.3009093403816223, |
| "learning_rate": 1.5810810810810808e-05, |
| "loss": 0.5276, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.20554984583761562, |
| "grad_norm": 0.2303069680929184, |
| "learning_rate": 1.9864864864864866e-05, |
| "loss": 0.4148, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.24665981500513876, |
| "grad_norm": 0.10509632527828217, |
| "learning_rate": 2.3918918918918917e-05, |
| "loss": 0.344, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.28776978417266186, |
| "grad_norm": 0.09443257004022598, |
| "learning_rate": 2.7972972972972975e-05, |
| "loss": 0.3244, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.328879753340185, |
| "grad_norm": 0.07592365890741348, |
| "learning_rate": 2.9772036474164135e-05, |
| "loss": 0.3107, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.3699897225077081, |
| "grad_norm": 0.06483208388090134, |
| "learning_rate": 2.9316109422492404e-05, |
| "loss": 0.2978, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.41109969167523125, |
| "grad_norm": 0.07318229973316193, |
| "learning_rate": 2.886018237082067e-05, |
| "loss": 0.2807, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.4522096608427544, |
| "grad_norm": 0.07056762278079987, |
| "learning_rate": 2.8404255319148935e-05, |
| "loss": 0.2844, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.4933196300102775, |
| "grad_norm": 0.0744948536157608, |
| "learning_rate": 2.7948328267477204e-05, |
| "loss": 0.2819, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.5344295991778006, |
| "grad_norm": 0.06527914851903915, |
| "learning_rate": 2.7492401215805473e-05, |
| "loss": 0.2682, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.5755395683453237, |
| "grad_norm": 0.0773576870560646, |
| "learning_rate": 2.7036474164133738e-05, |
| "loss": 0.2673, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.6166495375128469, |
| "grad_norm": 0.06686092913150787, |
| "learning_rate": 2.6580547112462007e-05, |
| "loss": 0.2675, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.65775950668037, |
| "grad_norm": 0.10009180009365082, |
| "learning_rate": 2.6124620060790272e-05, |
| "loss": 0.2597, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.6988694758478932, |
| "grad_norm": 0.08303786814212799, |
| "learning_rate": 2.566869300911854e-05, |
| "loss": 0.2582, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.7399794450154162, |
| "grad_norm": 0.10950975120067596, |
| "learning_rate": 2.521276595744681e-05, |
| "loss": 0.2513, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.7810894141829393, |
| "grad_norm": 0.09281215071678162, |
| "learning_rate": 2.4756838905775076e-05, |
| "loss": 0.2579, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.8221993833504625, |
| "grad_norm": 0.11291969567537308, |
| "learning_rate": 2.4300911854103345e-05, |
| "loss": 0.2638, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.8633093525179856, |
| "grad_norm": 0.09836319833993912, |
| "learning_rate": 2.3844984802431613e-05, |
| "loss": 0.247, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.9044193216855088, |
| "grad_norm": 0.11001324653625488, |
| "learning_rate": 2.338905775075988e-05, |
| "loss": 0.2478, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.9455292908530318, |
| "grad_norm": 0.109553761780262, |
| "learning_rate": 2.2933130699088144e-05, |
| "loss": 0.2347, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.986639260020555, |
| "grad_norm": 0.18094155192375183, |
| "learning_rate": 2.2477203647416413e-05, |
| "loss": 0.2381, |
| "step": 240 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_loss": 0.2577258348464966, |
| "eval_runtime": 47.9357, |
| "eval_samples_per_second": 13.602, |
| "eval_steps_per_second": 3.4, |
| "step": 244 |
| }, |
| { |
| "epoch": 1.024665981500514, |
| "grad_norm": 0.1261935979127884, |
| "learning_rate": 2.2021276595744682e-05, |
| "loss": 0.2378, |
| "step": 250 |
| }, |
| { |
| "epoch": 1.065775950668037, |
| "grad_norm": 0.12793003022670746, |
| "learning_rate": 2.156534954407295e-05, |
| "loss": 0.2232, |
| "step": 260 |
| }, |
| { |
| "epoch": 1.10688591983556, |
| "grad_norm": 0.1802845448255539, |
| "learning_rate": 2.1109422492401216e-05, |
| "loss": 0.2329, |
| "step": 270 |
| }, |
| { |
| "epoch": 1.1479958890030832, |
| "grad_norm": 0.14601245522499084, |
| "learning_rate": 2.0653495440729482e-05, |
| "loss": 0.2107, |
| "step": 280 |
| }, |
| { |
| "epoch": 1.1891058581706064, |
| "grad_norm": 0.23713846504688263, |
| "learning_rate": 2.019756838905775e-05, |
| "loss": 0.2065, |
| "step": 290 |
| }, |
| { |
| "epoch": 1.2302158273381294, |
| "grad_norm": 0.18008890748023987, |
| "learning_rate": 1.974164133738602e-05, |
| "loss": 0.2168, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.2713257965056526, |
| "grad_norm": 0.2030894011259079, |
| "learning_rate": 1.928571428571429e-05, |
| "loss": 0.214, |
| "step": 310 |
| }, |
| { |
| "epoch": 1.3124357656731758, |
| "grad_norm": 0.2112809121608734, |
| "learning_rate": 1.8829787234042554e-05, |
| "loss": 0.1999, |
| "step": 320 |
| }, |
| { |
| "epoch": 1.353545734840699, |
| "grad_norm": 0.22484244406223297, |
| "learning_rate": 1.837386018237082e-05, |
| "loss": 0.195, |
| "step": 330 |
| }, |
| { |
| "epoch": 1.394655704008222, |
| "grad_norm": 0.2325150966644287, |
| "learning_rate": 1.7917933130699088e-05, |
| "loss": 0.1917, |
| "step": 340 |
| }, |
| { |
| "epoch": 1.435765673175745, |
| "grad_norm": 0.23104484379291534, |
| "learning_rate": 1.7462006079027357e-05, |
| "loss": 0.183, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.4768756423432683, |
| "grad_norm": 0.25936955213546753, |
| "learning_rate": 1.7006079027355622e-05, |
| "loss": 0.1785, |
| "step": 360 |
| }, |
| { |
| "epoch": 1.5179856115107913, |
| "grad_norm": 0.23920413851737976, |
| "learning_rate": 1.655015197568389e-05, |
| "loss": 0.1775, |
| "step": 370 |
| }, |
| { |
| "epoch": 1.5590955806783144, |
| "grad_norm": 0.2573305666446686, |
| "learning_rate": 1.609422492401216e-05, |
| "loss": 0.1698, |
| "step": 380 |
| }, |
| { |
| "epoch": 1.6002055498458376, |
| "grad_norm": 0.3216232359409332, |
| "learning_rate": 1.5638297872340426e-05, |
| "loss": 0.1739, |
| "step": 390 |
| }, |
| { |
| "epoch": 1.6413155190133608, |
| "grad_norm": 0.33290883898735046, |
| "learning_rate": 1.5182370820668691e-05, |
| "loss": 0.1702, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.682425488180884, |
| "grad_norm": 0.32269543409347534, |
| "learning_rate": 1.4726443768996962e-05, |
| "loss": 0.1611, |
| "step": 410 |
| }, |
| { |
| "epoch": 1.723535457348407, |
| "grad_norm": 0.3431931734085083, |
| "learning_rate": 1.4270516717325229e-05, |
| "loss": 0.1666, |
| "step": 420 |
| }, |
| { |
| "epoch": 1.7646454265159301, |
| "grad_norm": 0.33378058671951294, |
| "learning_rate": 1.3814589665653496e-05, |
| "loss": 0.1486, |
| "step": 430 |
| }, |
| { |
| "epoch": 1.8057553956834531, |
| "grad_norm": 0.32158151268959045, |
| "learning_rate": 1.3358662613981763e-05, |
| "loss": 0.1626, |
| "step": 440 |
| }, |
| { |
| "epoch": 1.8468653648509763, |
| "grad_norm": 0.4073275327682495, |
| "learning_rate": 1.2902735562310032e-05, |
| "loss": 0.1459, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.8879753340184995, |
| "grad_norm": 0.33566728234291077, |
| "learning_rate": 1.2446808510638298e-05, |
| "loss": 0.1328, |
| "step": 460 |
| }, |
| { |
| "epoch": 1.9290853031860227, |
| "grad_norm": 0.40398478507995605, |
| "learning_rate": 1.1990881458966566e-05, |
| "loss": 0.1469, |
| "step": 470 |
| }, |
| { |
| "epoch": 1.9701952723535459, |
| "grad_norm": 0.39873984456062317, |
| "learning_rate": 1.1534954407294832e-05, |
| "loss": 0.1403, |
| "step": 480 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_loss": 0.29330888390541077, |
| "eval_runtime": 47.6643, |
| "eval_samples_per_second": 13.679, |
| "eval_steps_per_second": 3.42, |
| "step": 488 |
| }, |
| { |
| "epoch": 2.0082219938335046, |
| "grad_norm": 0.3640258312225342, |
| "learning_rate": 1.10790273556231e-05, |
| "loss": 0.1332, |
| "step": 490 |
| }, |
| { |
| "epoch": 2.049331963001028, |
| "grad_norm": 0.35593941807746887, |
| "learning_rate": 1.0623100303951368e-05, |
| "loss": 0.1315, |
| "step": 500 |
| }, |
| { |
| "epoch": 2.090441932168551, |
| "grad_norm": 0.478542298078537, |
| "learning_rate": 1.0167173252279635e-05, |
| "loss": 0.1126, |
| "step": 510 |
| }, |
| { |
| "epoch": 2.131551901336074, |
| "grad_norm": 0.4178543984889984, |
| "learning_rate": 9.711246200607902e-06, |
| "loss": 0.1266, |
| "step": 520 |
| }, |
| { |
| "epoch": 2.172661870503597, |
| "grad_norm": 0.468169242143631, |
| "learning_rate": 9.255319148936171e-06, |
| "loss": 0.1282, |
| "step": 530 |
| }, |
| { |
| "epoch": 2.21377183967112, |
| "grad_norm": 0.3565237820148468, |
| "learning_rate": 8.799392097264438e-06, |
| "loss": 0.1138, |
| "step": 540 |
| }, |
| { |
| "epoch": 2.2548818088386433, |
| "grad_norm": 0.4303134083747864, |
| "learning_rate": 8.343465045592705e-06, |
| "loss": 0.1314, |
| "step": 550 |
| }, |
| { |
| "epoch": 2.2959917780061665, |
| "grad_norm": 0.39298295974731445, |
| "learning_rate": 7.887537993920974e-06, |
| "loss": 0.1109, |
| "step": 560 |
| }, |
| { |
| "epoch": 2.3371017471736897, |
| "grad_norm": 0.4263976812362671, |
| "learning_rate": 7.43161094224924e-06, |
| "loss": 0.1081, |
| "step": 570 |
| }, |
| { |
| "epoch": 2.378211716341213, |
| "grad_norm": 0.42722198367118835, |
| "learning_rate": 6.975683890577508e-06, |
| "loss": 0.1093, |
| "step": 580 |
| }, |
| { |
| "epoch": 2.419321685508736, |
| "grad_norm": 0.3445146977901459, |
| "learning_rate": 6.519756838905775e-06, |
| "loss": 0.1166, |
| "step": 590 |
| }, |
| { |
| "epoch": 2.460431654676259, |
| "grad_norm": 0.3934553563594818, |
| "learning_rate": 6.063829787234042e-06, |
| "loss": 0.1061, |
| "step": 600 |
| }, |
| { |
| "epoch": 2.501541623843782, |
| "grad_norm": 0.4379635155200958, |
| "learning_rate": 5.607902735562311e-06, |
| "loss": 0.1103, |
| "step": 610 |
| }, |
| { |
| "epoch": 2.542651593011305, |
| "grad_norm": 0.36748257279396057, |
| "learning_rate": 5.151975683890578e-06, |
| "loss": 0.1011, |
| "step": 620 |
| }, |
| { |
| "epoch": 2.5837615621788284, |
| "grad_norm": 0.4660473167896271, |
| "learning_rate": 4.696048632218845e-06, |
| "loss": 0.1114, |
| "step": 630 |
| }, |
| { |
| "epoch": 2.6248715313463515, |
| "grad_norm": 0.4990362226963043, |
| "learning_rate": 4.240121580547112e-06, |
| "loss": 0.1035, |
| "step": 640 |
| }, |
| { |
| "epoch": 2.6659815005138747, |
| "grad_norm": 0.37723225355148315, |
| "learning_rate": 3.7841945288753804e-06, |
| "loss": 0.0989, |
| "step": 650 |
| }, |
| { |
| "epoch": 2.707091469681398, |
| "grad_norm": 0.37326356768608093, |
| "learning_rate": 3.3282674772036475e-06, |
| "loss": 0.1084, |
| "step": 660 |
| }, |
| { |
| "epoch": 2.7482014388489207, |
| "grad_norm": 0.38979873061180115, |
| "learning_rate": 2.872340425531915e-06, |
| "loss": 0.0918, |
| "step": 670 |
| }, |
| { |
| "epoch": 2.789311408016444, |
| "grad_norm": 0.42690524458885193, |
| "learning_rate": 2.4164133738601823e-06, |
| "loss": 0.1025, |
| "step": 680 |
| }, |
| { |
| "epoch": 2.830421377183967, |
| "grad_norm": 0.326016366481781, |
| "learning_rate": 1.96048632218845e-06, |
| "loss": 0.1079, |
| "step": 690 |
| }, |
| { |
| "epoch": 2.87153134635149, |
| "grad_norm": 0.472915917634964, |
| "learning_rate": 1.5045592705167174e-06, |
| "loss": 0.1005, |
| "step": 700 |
| }, |
| { |
| "epoch": 2.9126413155190134, |
| "grad_norm": 0.47058117389678955, |
| "learning_rate": 1.0486322188449848e-06, |
| "loss": 0.1026, |
| "step": 710 |
| }, |
| { |
| "epoch": 2.9537512846865366, |
| "grad_norm": 0.4792344570159912, |
| "learning_rate": 5.927051671732523e-07, |
| "loss": 0.0951, |
| "step": 720 |
| }, |
| { |
| "epoch": 2.9948612538540598, |
| "grad_norm": 0.4005649983882904, |
| "learning_rate": 1.3677811550151974e-07, |
| "loss": 0.1113, |
| "step": 730 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_loss": 0.30852633714675903, |
| "eval_runtime": 47.6701, |
| "eval_samples_per_second": 13.677, |
| "eval_steps_per_second": 3.419, |
| "step": 732 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 732, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 9.924221268359946e+17, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|