| { | |
| "best_global_step": 2000, | |
| "best_metric": 0.9999500948197181, | |
| "best_model_checkpoint": "trained_models/intent_classifier/checkpoint-2000", | |
| "epoch": 1.596169193934557, | |
| "eval_steps": 1000, | |
| "global_step": 2000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0007980845969672786, | |
| "grad_norm": 51713.75390625, | |
| "learning_rate": 0.0, | |
| "loss": 0.695, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.07980845969672785, | |
| "grad_norm": 91372.3984375, | |
| "learning_rate": 1.98e-06, | |
| "loss": 0.6689, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.1596169193934557, | |
| "grad_norm": 47582.55078125, | |
| "learning_rate": 3.980000000000001e-06, | |
| "loss": 0.2889, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.23942537909018355, | |
| "grad_norm": 2023.180908203125, | |
| "learning_rate": 5.98e-06, | |
| "loss": 0.0206, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.3192338387869114, | |
| "grad_norm": 105.5784912109375, | |
| "learning_rate": 7.980000000000002e-06, | |
| "loss": 0.0006, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.39904229848363926, | |
| "grad_norm": 2.8826353549957275, | |
| "learning_rate": 9.980000000000001e-06, | |
| "loss": 0.0016, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.4788507581803671, | |
| "grad_norm": 0.18906153738498688, | |
| "learning_rate": 1.198e-05, | |
| "loss": 0.0, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.5586592178770949, | |
| "grad_norm": 0.06490982323884964, | |
| "learning_rate": 1.398e-05, | |
| "loss": 0.001, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.6384676775738228, | |
| "grad_norm": 0.07483379542827606, | |
| "learning_rate": 1.5980000000000003e-05, | |
| "loss": 0.0026, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.7182761372705507, | |
| "grad_norm": 0.029934018850326538, | |
| "learning_rate": 1.798e-05, | |
| "loss": 0.0027, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.7980845969672785, | |
| "grad_norm": 0.023010307922959328, | |
| "learning_rate": 1.9980000000000002e-05, | |
| "loss": 0.0049, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.7980845969672785, | |
| "eval_accuracy": 0.9999001896396846, | |
| "eval_f1": 0.9999001896386902, | |
| "eval_f1_macro": 0.9999001896386903, | |
| "eval_loss": 0.0016707783797755837, | |
| "eval_runtime": 88.2175, | |
| "eval_samples_per_second": 227.143, | |
| "eval_steps_per_second": 1.78, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.8778930566640064, | |
| "grad_norm": 0.030565178021788597, | |
| "learning_rate": 1.9282348677056906e-05, | |
| "loss": 0.0091, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.9577015163607342, | |
| "grad_norm": 0.025367770344018936, | |
| "learning_rate": 1.8557448350851758e-05, | |
| "loss": 0.0029, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.037509976057462, | |
| "grad_norm": 0.01835496723651886, | |
| "learning_rate": 1.7832548024646613e-05, | |
| "loss": 0.0025, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 1.1173184357541899, | |
| "grad_norm": 0.013528961688280106, | |
| "learning_rate": 1.7107647698441465e-05, | |
| "loss": 0.0004, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 1.1971268954509178, | |
| "grad_norm": 0.013094124384224415, | |
| "learning_rate": 1.638274737223632e-05, | |
| "loss": 0.0043, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.2769353551476457, | |
| "grad_norm": 0.010992957279086113, | |
| "learning_rate": 1.565784704603117e-05, | |
| "loss": 0.0001, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 1.3567438148443736, | |
| "grad_norm": 0.009696166031062603, | |
| "learning_rate": 1.4932946719826025e-05, | |
| "loss": 0.0001, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 1.4365522745411012, | |
| "grad_norm": 0.008244643919169903, | |
| "learning_rate": 1.4208046393620877e-05, | |
| "loss": 0.0, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 1.5163607342378294, | |
| "grad_norm": 0.00833104643970728, | |
| "learning_rate": 1.348314606741573e-05, | |
| "loss": 0.0, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 1.596169193934557, | |
| "grad_norm": 0.008447665721178055, | |
| "learning_rate": 1.2758245741210584e-05, | |
| "loss": 0.0031, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.596169193934557, | |
| "eval_accuracy": 0.9999500948198423, | |
| "eval_f1": 0.9999500948197181, | |
| "eval_f1_macro": 0.999950094819718, | |
| "eval_loss": 0.000961420766543597, | |
| "eval_runtime": 88.1534, | |
| "eval_samples_per_second": 227.308, | |
| "eval_steps_per_second": 1.781, | |
| "step": 2000 | |
| } | |
| ], | |
| "logging_steps": 100, | |
| "max_steps": 3759, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 1000, | |
| "stateful_callbacks": { | |
| "EarlyStoppingCallback": { | |
| "args": { | |
| "early_stopping_patience": 3, | |
| "early_stopping_threshold": 0.0 | |
| }, | |
| "attributes": { | |
| "early_stopping_patience_counter": 0 | |
| } | |
| }, | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 8475131698612224.0, | |
| "train_batch_size": 64, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |