{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 375, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.08, "grad_norm": 2.356316089630127, "learning_rate": 2e-05, "loss": 0.9561, "step": 10 }, { "epoch": 0.16, "grad_norm": 2.9643399715423584, "learning_rate": 2e-05, "loss": 0.5253, "step": 20 }, { "epoch": 0.24, "grad_norm": 3.6455039978027344, "learning_rate": 2e-05, "loss": 0.3936, "step": 30 }, { "epoch": 0.32, "grad_norm": 10.778894424438477, "learning_rate": 2e-05, "loss": 0.3738, "step": 40 }, { "epoch": 0.4, "grad_norm": 2.7228519916534424, "learning_rate": 2e-05, "loss": 0.3413, "step": 50 }, { "epoch": 0.48, "grad_norm": 5.166695594787598, "learning_rate": 2e-05, "loss": 0.345, "step": 60 }, { "epoch": 0.56, "grad_norm": 3.4323439598083496, "learning_rate": 2e-05, "loss": 0.3115, "step": 70 }, { "epoch": 0.64, "grad_norm": 4.534356594085693, "learning_rate": 2e-05, "loss": 0.2928, "step": 80 }, { "epoch": 0.72, "grad_norm": 6.345835208892822, "learning_rate": 2e-05, "loss": 0.2943, "step": 90 }, { "epoch": 0.8, "grad_norm": 5.384378433227539, "learning_rate": 2e-05, "loss": 0.3092, "step": 100 }, { "epoch": 0.88, "grad_norm": 6.48573637008667, "learning_rate": 2e-05, "loss": 0.287, "step": 110 }, { "epoch": 0.96, "grad_norm": 5.112663745880127, "learning_rate": 2e-05, "loss": 0.2571, "step": 120 }, { "epoch": 1.0, "eval_accuracy": 0.8957646462084707, "eval_f1": 0.8177339901477834, "eval_loss": 0.288971871137619, "eval_precision": 0.7918902802623733, "eval_recall": 0.8453214513049013, "eval_runtime": 0.4071, "eval_samples_per_second": 368.482, "eval_steps_per_second": 24.565, "step": 125 }, { "epoch": 1.04, "grad_norm": 2.791001558303833, "learning_rate": 2e-05, "loss": 0.2704, "step": 130 }, { "epoch": 1.12, "grad_norm": 5.1042585372924805, "learning_rate": 2e-05, "loss": 0.2193, "step": 140 }, { "epoch": 1.2, "grad_norm": 3.7543392181396484, "learning_rate": 2e-05, "loss": 0.2822, "step": 150 }, { "epoch": 1.28, "grad_norm": 3.875131130218506, "learning_rate": 2e-05, "loss": 0.2661, "step": 160 }, { "epoch": 1.3599999999999999, "grad_norm": 10.078240394592285, "learning_rate": 2e-05, "loss": 0.2568, "step": 170 }, { "epoch": 1.44, "grad_norm": 4.0438055992126465, "learning_rate": 2e-05, "loss": 0.2369, "step": 180 }, { "epoch": 1.52, "grad_norm": 2.991152048110962, "learning_rate": 2e-05, "loss": 0.2568, "step": 190 }, { "epoch": 1.6, "grad_norm": 5.471771717071533, "learning_rate": 2e-05, "loss": 0.2741, "step": 200 }, { "epoch": 1.6800000000000002, "grad_norm": 2.9698643684387207, "learning_rate": 2e-05, "loss": 0.2338, "step": 210 }, { "epoch": 1.76, "grad_norm": 2.496001958847046, "learning_rate": 2e-05, "loss": 0.2254, "step": 220 }, { "epoch": 1.8399999999999999, "grad_norm": 2.994489908218384, "learning_rate": 2e-05, "loss": 0.2305, "step": 230 }, { "epoch": 1.92, "grad_norm": 5.702557563781738, "learning_rate": 2e-05, "loss": 0.2227, "step": 240 }, { "epoch": 2.0, "grad_norm": 5.2754130363464355, "learning_rate": 2e-05, "loss": 0.2274, "step": 250 }, { "epoch": 2.0, "eval_accuracy": 0.8923408572153183, "eval_f1": 0.8256227758007116, "eval_loss": 0.33088892698287964, "eval_precision": 0.7729039422543031, "eval_recall": 0.8860598345003182, "eval_runtime": 0.4096, "eval_samples_per_second": 366.202, "eval_steps_per_second": 24.413, "step": 250 }, { "epoch": 2.08, "grad_norm": 2.392334461212158, "learning_rate": 2e-05, "loss": 0.1498, "step": 260 }, { "epoch": 2.16, "grad_norm": 3.8733561038970947, "learning_rate": 2e-05, "loss": 0.2306, "step": 270 }, { "epoch": 2.24, "grad_norm": 1.9787098169326782, "learning_rate": 2e-05, "loss": 0.1853, "step": 280 }, { "epoch": 2.32, "grad_norm": 4.077596187591553, "learning_rate": 2e-05, "loss": 0.1701, "step": 290 }, { "epoch": 2.4, "grad_norm": 7.104099750518799, "learning_rate": 2e-05, "loss": 0.1887, "step": 300 }, { "epoch": 2.48, "grad_norm": 3.239741086959839, "learning_rate": 2e-05, "loss": 0.183, "step": 310 }, { "epoch": 2.56, "grad_norm": 2.7079355716705322, "learning_rate": 2e-05, "loss": 0.1804, "step": 320 }, { "epoch": 2.64, "grad_norm": 4.828829288482666, "learning_rate": 2e-05, "loss": 0.2166, "step": 330 }, { "epoch": 2.7199999999999998, "grad_norm": 2.2557036876678467, "learning_rate": 2e-05, "loss": 0.168, "step": 340 }, { "epoch": 2.8, "grad_norm": 4.428867816925049, "learning_rate": 2e-05, "loss": 0.296, "step": 350 }, { "epoch": 2.88, "grad_norm": 4.281528949737549, "learning_rate": 2e-05, "loss": 0.2209, "step": 360 }, { "epoch": 2.96, "grad_norm": 4.2655792236328125, "learning_rate": 2e-05, "loss": 0.2238, "step": 370 } ], "logging_steps": 10, "max_steps": 375, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 473453787413376.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }