| { | |
| "best_metric": 1.798671007156372, | |
| "best_model_checkpoint": "output/kasta/checkpoint-246", | |
| "epoch": 1.0, | |
| "global_step": 246, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00013706019712792517, | |
| "loss": 2.7165, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.00013664135833219747, | |
| "loss": 2.3564, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.00013594519075178427, | |
| "loss": 2.2801, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.00013497453188602036, | |
| "loss": 2.095, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.00013373333802928601, | |
| "loss": 2.1713, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.0001322266681456037, | |
| "loss": 2.0746, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.00013046066324888032, | |
| "loss": 2.1268, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.00012844252137283782, | |
| "loss": 2.1756, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.00012618046823265178, | |
| "loss": 2.0614, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.00012368372369787862, | |
| "loss": 1.9928, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.00012096246421332296, | |
| "loss": 2.0832, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.00011802778132101399, | |
| "loss": 1.9764, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.00011489163645235038, | |
| "loss": 2.0674, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.00011156681217467561, | |
| "loss": 2.0935, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.00010806686009099738, | |
| "loss": 1.9101, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 0.00010440604560520553, | |
| "loss": 1.9785, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.00010059928977791948, | |
| "loss": 1.9156, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 9.666210850995393e-05, | |
| "loss": 2.0267, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 9.261054930128376e-05, | |
| "loss": 1.8933, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 8.846112584327212e-05, | |
| "loss": 1.9379, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 8.423075071075525e-05, | |
| "loss": 2.0375, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 7.993666642832438e-05, | |
| "loss": 2.014, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 7.55963751917701e-05, | |
| "loss": 1.9119, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 7.122756753113643e-05, | |
| "loss": 1.9155, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 6.684805020614639e-05, | |
| "loss": 1.9652, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 6.247567362788848e-05, | |
| "loss": 1.8941, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 5.81282591025852e-05, | |
| "loss": 1.8734, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 5.382352619398988e-05, | |
| "loss": 1.866, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 4.957902050047381e-05, | |
| "loss": 1.9107, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 4.541204214117672e-05, | |
| "loss": 1.8795, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 4.1339575242702164e-05, | |
| "loss": 1.9314, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 3.7378218713762616e-05, | |
| "loss": 1.8708, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 3.354411858992822e-05, | |
| "loss": 1.9518, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 2.985290222423505e-05, | |
| "loss": 1.9226, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 2.6319614591883445e-05, | |
| "loss": 1.9602, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 2.2958656968642224e-05, | |
| "loss": 1.8641, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 1.9783728232897674e-05, | |
| "loss": 1.8891, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 1.6807769030594122e-05, | |
| "loss": 1.7877, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 1.4042909030642942e-05, | |
| "loss": 1.8873, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 1.1500417485781092e-05, | |
| "loss": 1.9452, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 9.190657300387505e-06, | |
| "loss": 1.8256, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 7.123042792471594e-06, | |
| "loss": 1.817, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 5.306001321991061e-06, | |
| "loss": 1.9063, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 3.7469389418978793e-06, | |
| "loss": 1.9345, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 2.4522102119145282e-06, | |
| "loss": 1.8102, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 1.42709229807627e-06, | |
| "loss": 1.8263, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 6.757634636067098e-07, | |
| "loss": 1.9225, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 2.0128603879540573e-07, | |
| "loss": 1.8347, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 5.593939290255423e-09, | |
| "loss": 1.8634, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_loss": 1.798671007156372, | |
| "eval_runtime": 14.7214, | |
| "eval_samples_per_second": 21.058, | |
| "eval_steps_per_second": 2.649, | |
| "step": 246 | |
| } | |
| ], | |
| "max_steps": 246, | |
| "num_train_epochs": 1, | |
| "total_flos": 256588775424000.0, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |