{ "best_metric": 0.39622641509433965, "best_model_checkpoint": "./Validated_Balanced_Raw_Data_model_boost8_outputs/checkpoint-80", "epoch": 5.0, "eval_steps": 500, "global_step": 400, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.625, "grad_norm": 1.026680588722229, "learning_rate": 2.9541003989089956e-05, "loss": 1.3729, "step": 50 }, { "epoch": 1.0, "eval_accuracy": 0.39622641509433965, "eval_loss": 1.3302552700042725, "eval_runtime": 1.4024, "eval_samples_per_second": 151.165, "eval_steps_per_second": 9.983, "step": 80 }, { "epoch": 1.25, "grad_norm": 1.0671523809432983, "learning_rate": 2.6837107640945904e-05, "loss": 1.3422, "step": 100 }, { "epoch": 1.875, "grad_norm": 1.1258270740509033, "learning_rate": 2.2139210895556104e-05, "loss": 1.3243, "step": 150 }, { "epoch": 2.0, "eval_accuracy": 0.38207547169811323, "eval_loss": 1.3050692081451416, "eval_runtime": 1.3895, "eval_samples_per_second": 152.569, "eval_steps_per_second": 10.075, "step": 160 }, { "epoch": 2.5, "grad_norm": 1.1245783567428589, "learning_rate": 1.623869018208499e-05, "loss": 1.3093, "step": 200 }, { "epoch": 3.0, "eval_accuracy": 0.37735849056603776, "eval_loss": 1.2962387800216675, "eval_runtime": 1.3825, "eval_samples_per_second": 153.341, "eval_steps_per_second": 10.126, "step": 240 }, { "epoch": 3.125, "grad_norm": 0.9223591685295105, "learning_rate": 1.0129507961929749e-05, "loss": 1.3138, "step": 250 }, { "epoch": 3.75, "grad_norm": 1.2660913467407227, "learning_rate": 4.840776425613887e-06, "loss": 1.2908, "step": 300 }, { "epoch": 4.0, "eval_accuracy": 0.38207547169811323, "eval_loss": 1.2940889596939087, "eval_runtime": 1.3786, "eval_samples_per_second": 153.781, "eval_steps_per_second": 10.155, "step": 320 }, { "epoch": 4.375, "grad_norm": 1.12974214553833, "learning_rate": 1.2634001001741375e-06, "loss": 1.3193, "step": 350 }, { "epoch": 5.0, "grad_norm": 1.500447392463684, "learning_rate": 0.0, "loss": 1.2836, "step": 400 }, { "epoch": 5.0, "eval_accuracy": 0.37735849056603776, "eval_loss": 1.2936153411865234, "eval_runtime": 1.4054, "eval_samples_per_second": 150.848, "eval_steps_per_second": 9.962, "step": 400 }, { "epoch": 5.0, "step": 400, "total_flos": 4.2409746528731136e+17, "train_loss": 1.319512176513672, "train_runtime": 85.175, "train_samples_per_second": 37.335, "train_steps_per_second": 4.696 } ], "logging_steps": 50, "max_steps": 400, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4.2409746528731136e+17, "train_batch_size": 8, "trial_name": null, "trial_params": null }