| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.0, |
| "eval_steps": 50, |
| "global_step": 448, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0447427293064877, |
| "grad_norm": 3.718431234359741, |
| "learning_rate": 4e-07, |
| "loss": 1.7667, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.0894854586129754, |
| "grad_norm": 4.964531421661377, |
| "learning_rate": 8.444444444444444e-07, |
| "loss": 1.9644, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.1342281879194631, |
| "grad_norm": 3.4113519191741943, |
| "learning_rate": 1.2888888888888889e-06, |
| "loss": 1.5387, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.1789709172259508, |
| "grad_norm": 3.774691343307495, |
| "learning_rate": 1.7333333333333334e-06, |
| "loss": 1.7512, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.22371364653243847, |
| "grad_norm": 3.0480496883392334, |
| "learning_rate": 1.999513878924193e-06, |
| "loss": 1.4911, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.22371364653243847, |
| "eval_loss": 1.5801321268081665, |
| "eval_runtime": 14.3639, |
| "eval_samples_per_second": 13.019, |
| "eval_steps_per_second": 6.544, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.2684563758389262, |
| "grad_norm": 1.273130178451538, |
| "learning_rate": 1.994050443200529e-06, |
| "loss": 1.5055, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.3131991051454139, |
| "grad_norm": 1.0292898416519165, |
| "learning_rate": 1.9825492157072085e-06, |
| "loss": 1.2169, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.3579418344519016, |
| "grad_norm": 0.6090050339698792, |
| "learning_rate": 1.96508005408292e-06, |
| "loss": 1.143, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.40268456375838924, |
| "grad_norm": 0.6865583658218384, |
| "learning_rate": 1.9417490647742737e-06, |
| "loss": 1.3681, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.44742729306487694, |
| "grad_norm": 0.5751066207885742, |
| "learning_rate": 1.9126979585527774e-06, |
| "loss": 1.3044, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.44742729306487694, |
| "eval_loss": 1.3282246589660645, |
| "eval_runtime": 14.2866, |
| "eval_samples_per_second": 13.089, |
| "eval_steps_per_second": 6.58, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.49217002237136465, |
| "grad_norm": 0.5603207945823669, |
| "learning_rate": 1.878103189773686e-06, |
| "loss": 1.2144, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.5369127516778524, |
| "grad_norm": 0.35719171166419983, |
| "learning_rate": 1.8381748846047758e-06, |
| "loss": 1.144, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.5816554809843401, |
| "grad_norm": 0.42039912939071655, |
| "learning_rate": 1.7931555647349358e-06, |
| "loss": 1.1254, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.6263982102908278, |
| "grad_norm": 0.5204155445098877, |
| "learning_rate": 1.7433186743146559e-06, |
| "loss": 1.1564, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.6711409395973155, |
| "grad_norm": 0.5500156879425049, |
| "learning_rate": 1.6889669190756866e-06, |
| "loss": 1.3011, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.6711409395973155, |
| "eval_loss": 1.2590997219085693, |
| "eval_runtime": 14.1462, |
| "eval_samples_per_second": 13.219, |
| "eval_steps_per_second": 6.645, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.7158836689038032, |
| "grad_norm": 0.49012938141822815, |
| "learning_rate": 1.6304304277179263e-06, |
| "loss": 1.1263, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.7606263982102909, |
| "grad_norm": 0.5430646538734436, |
| "learning_rate": 1.5680647467311555e-06, |
| "loss": 1.1312, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.8053691275167785, |
| "grad_norm": 0.41151630878448486, |
| "learning_rate": 1.5022486808309168e-06, |
| "loss": 1.2291, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.8501118568232662, |
| "grad_norm": 0.5034804940223694, |
| "learning_rate": 1.4333819921255834e-06, |
| "loss": 1.1628, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.8948545861297539, |
| "grad_norm": 0.427683025598526, |
| "learning_rate": 1.3618829719897156e-06, |
| "loss": 1.1956, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.8948545861297539, |
| "eval_loss": 1.208958387374878, |
| "eval_runtime": 14.4714, |
| "eval_samples_per_second": 12.922, |
| "eval_steps_per_second": 6.496, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.9395973154362416, |
| "grad_norm": 0.4355259835720062, |
| "learning_rate": 1.2881859003919686e-06, |
| "loss": 0.9802, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.9843400447427293, |
| "grad_norm": 0.492121160030365, |
| "learning_rate": 1.2127384081094166e-06, |
| "loss": 1.2044, |
| "step": 220 |
| }, |
| { |
| "epoch": 1.0268456375838926, |
| "grad_norm": 0.5313476920127869, |
| "learning_rate": 1.1359987578500148e-06, |
| "loss": 1.0695, |
| "step": 230 |
| }, |
| { |
| "epoch": 1.0715883668903803, |
| "grad_norm": 0.45777904987335205, |
| "learning_rate": 1.0584330607974673e-06, |
| "loss": 0.9792, |
| "step": 240 |
| }, |
| { |
| "epoch": 1.116331096196868, |
| "grad_norm": 0.4507655203342438, |
| "learning_rate": 9.805124454850148e-07, |
| "loss": 1.3124, |
| "step": 250 |
| }, |
| { |
| "epoch": 1.116331096196868, |
| "eval_loss": 1.1702429056167603, |
| "eval_runtime": 14.4003, |
| "eval_samples_per_second": 12.986, |
| "eval_steps_per_second": 6.528, |
| "step": 250 |
| }, |
| { |
| "epoch": 1.1610738255033557, |
| "grad_norm": 0.723739743232727, |
| "learning_rate": 9.027101961941923e-07, |
| "loss": 1.2004, |
| "step": 260 |
| }, |
| { |
| "epoch": 1.2058165548098434, |
| "grad_norm": 0.5017468929290771, |
| "learning_rate": 8.254988782597293e-07, |
| "loss": 1.0577, |
| "step": 270 |
| }, |
| { |
| "epoch": 1.250559284116331, |
| "grad_norm": 0.5562602281570435, |
| "learning_rate": 7.493474677412793e-07, |
| "loss": 1.0526, |
| "step": 280 |
| }, |
| { |
| "epoch": 1.2953020134228188, |
| "grad_norm": 0.49207189679145813, |
| "learning_rate": 6.747185028961523e-07, |
| "loss": 1.0995, |
| "step": 290 |
| }, |
| { |
| "epoch": 1.3400447427293065, |
| "grad_norm": 0.45156824588775635, |
| "learning_rate": 6.020652747548007e-07, |
| "loss": 0.9151, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.3400447427293065, |
| "eval_loss": 1.146506667137146, |
| "eval_runtime": 14.3216, |
| "eval_samples_per_second": 13.057, |
| "eval_steps_per_second": 6.564, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.3847874720357942, |
| "grad_norm": 0.45592594146728516, |
| "learning_rate": 5.31829073863304e-07, |
| "loss": 0.9596, |
| "step": 310 |
| }, |
| { |
| "epoch": 1.429530201342282, |
| "grad_norm": 0.4450604021549225, |
| "learning_rate": 4.644365099159442e-07, |
| "loss": 1.1611, |
| "step": 320 |
| }, |
| { |
| "epoch": 1.4742729306487696, |
| "grad_norm": 0.5685081481933594, |
| "learning_rate": 4.002969205582313e-07, |
| "loss": 0.9714, |
| "step": 330 |
| }, |
| { |
| "epoch": 1.5190156599552571, |
| "grad_norm": 0.6313215494155884, |
| "learning_rate": 3.3979988509912437e-07, |
| "loss": 1.1366, |
| "step": 340 |
| }, |
| { |
| "epoch": 1.563758389261745, |
| "grad_norm": 0.5398988127708435, |
| "learning_rate": 2.833128582339887e-07, |
| "loss": 1.0271, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.563758389261745, |
| "eval_loss": 1.1347407102584839, |
| "eval_runtime": 14.3855, |
| "eval_samples_per_second": 12.999, |
| "eval_steps_per_second": 6.534, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.6085011185682325, |
| "grad_norm": 0.6401699781417847, |
| "learning_rate": 2.3117893815088062e-07, |
| "loss": 1.1358, |
| "step": 360 |
| }, |
| { |
| "epoch": 1.6532438478747205, |
| "grad_norm": 0.44730058312416077, |
| "learning_rate": 1.8371478257652906e-07, |
| "loss": 1.0872, |
| "step": 370 |
| }, |
| { |
| "epoch": 1.697986577181208, |
| "grad_norm": 0.5303833484649658, |
| "learning_rate": 1.4120868541980025e-07, |
| "loss": 1.195, |
| "step": 380 |
| }, |
| { |
| "epoch": 1.7427293064876959, |
| "grad_norm": 0.5479047894477844, |
| "learning_rate": 1.0391882569497757e-07, |
| "loss": 0.9301, |
| "step": 390 |
| }, |
| { |
| "epoch": 1.7874720357941833, |
| "grad_norm": 0.4859919846057892, |
| "learning_rate": 7.207169936076973e-08, |
| "loss": 1.0629, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.7874720357941833, |
| "eval_loss": 1.1303904056549072, |
| "eval_runtime": 14.442, |
| "eval_samples_per_second": 12.948, |
| "eval_steps_per_second": 6.509, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.8322147651006713, |
| "grad_norm": 0.5255556106567383, |
| "learning_rate": 4.586074359995118e-08, |
| "loss": 1.0262, |
| "step": 410 |
| }, |
| { |
| "epoch": 1.8769574944071588, |
| "grad_norm": 0.3805781602859497, |
| "learning_rate": 2.544516189565482e-08, |
| "loss": 1.1142, |
| "step": 420 |
| }, |
| { |
| "epoch": 1.9217002237136467, |
| "grad_norm": 0.6308678984642029, |
| "learning_rate": 1.094895704072707e-08, |
| "loss": 0.9937, |
| "step": 430 |
| }, |
| { |
| "epoch": 1.9664429530201342, |
| "grad_norm": 0.439409464597702, |
| "learning_rate": 2.460177953573339e-09, |
| "loss": 0.9789, |
| "step": 440 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 448, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 100, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 7.065477369807667e+16, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|