ThreatScan / checkpoint-2000 /trainer_state.json
Addyk24's picture
Upload fine-tuned Intent Classifier model -- 23:43
642da08 verified
{
"best_global_step": 2000,
"best_metric": 0.9999500948197181,
"best_model_checkpoint": "trained_models/intent_classifier/checkpoint-2000",
"epoch": 1.596169193934557,
"eval_steps": 1000,
"global_step": 2000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0007980845969672786,
"grad_norm": 51713.75390625,
"learning_rate": 0.0,
"loss": 0.695,
"step": 1
},
{
"epoch": 0.07980845969672785,
"grad_norm": 91372.3984375,
"learning_rate": 1.98e-06,
"loss": 0.6689,
"step": 100
},
{
"epoch": 0.1596169193934557,
"grad_norm": 47582.55078125,
"learning_rate": 3.980000000000001e-06,
"loss": 0.2889,
"step": 200
},
{
"epoch": 0.23942537909018355,
"grad_norm": 2023.180908203125,
"learning_rate": 5.98e-06,
"loss": 0.0206,
"step": 300
},
{
"epoch": 0.3192338387869114,
"grad_norm": 105.5784912109375,
"learning_rate": 7.980000000000002e-06,
"loss": 0.0006,
"step": 400
},
{
"epoch": 0.39904229848363926,
"grad_norm": 2.8826353549957275,
"learning_rate": 9.980000000000001e-06,
"loss": 0.0016,
"step": 500
},
{
"epoch": 0.4788507581803671,
"grad_norm": 0.18906153738498688,
"learning_rate": 1.198e-05,
"loss": 0.0,
"step": 600
},
{
"epoch": 0.5586592178770949,
"grad_norm": 0.06490982323884964,
"learning_rate": 1.398e-05,
"loss": 0.001,
"step": 700
},
{
"epoch": 0.6384676775738228,
"grad_norm": 0.07483379542827606,
"learning_rate": 1.5980000000000003e-05,
"loss": 0.0026,
"step": 800
},
{
"epoch": 0.7182761372705507,
"grad_norm": 0.029934018850326538,
"learning_rate": 1.798e-05,
"loss": 0.0027,
"step": 900
},
{
"epoch": 0.7980845969672785,
"grad_norm": 0.023010307922959328,
"learning_rate": 1.9980000000000002e-05,
"loss": 0.0049,
"step": 1000
},
{
"epoch": 0.7980845969672785,
"eval_accuracy": 0.9999001896396846,
"eval_f1": 0.9999001896386902,
"eval_f1_macro": 0.9999001896386903,
"eval_loss": 0.0016707783797755837,
"eval_runtime": 88.2175,
"eval_samples_per_second": 227.143,
"eval_steps_per_second": 1.78,
"step": 1000
},
{
"epoch": 0.8778930566640064,
"grad_norm": 0.030565178021788597,
"learning_rate": 1.9282348677056906e-05,
"loss": 0.0091,
"step": 1100
},
{
"epoch": 0.9577015163607342,
"grad_norm": 0.025367770344018936,
"learning_rate": 1.8557448350851758e-05,
"loss": 0.0029,
"step": 1200
},
{
"epoch": 1.037509976057462,
"grad_norm": 0.01835496723651886,
"learning_rate": 1.7832548024646613e-05,
"loss": 0.0025,
"step": 1300
},
{
"epoch": 1.1173184357541899,
"grad_norm": 0.013528961688280106,
"learning_rate": 1.7107647698441465e-05,
"loss": 0.0004,
"step": 1400
},
{
"epoch": 1.1971268954509178,
"grad_norm": 0.013094124384224415,
"learning_rate": 1.638274737223632e-05,
"loss": 0.0043,
"step": 1500
},
{
"epoch": 1.2769353551476457,
"grad_norm": 0.010992957279086113,
"learning_rate": 1.565784704603117e-05,
"loss": 0.0001,
"step": 1600
},
{
"epoch": 1.3567438148443736,
"grad_norm": 0.009696166031062603,
"learning_rate": 1.4932946719826025e-05,
"loss": 0.0001,
"step": 1700
},
{
"epoch": 1.4365522745411012,
"grad_norm": 0.008244643919169903,
"learning_rate": 1.4208046393620877e-05,
"loss": 0.0,
"step": 1800
},
{
"epoch": 1.5163607342378294,
"grad_norm": 0.00833104643970728,
"learning_rate": 1.348314606741573e-05,
"loss": 0.0,
"step": 1900
},
{
"epoch": 1.596169193934557,
"grad_norm": 0.008447665721178055,
"learning_rate": 1.2758245741210584e-05,
"loss": 0.0031,
"step": 2000
},
{
"epoch": 1.596169193934557,
"eval_accuracy": 0.9999500948198423,
"eval_f1": 0.9999500948197181,
"eval_f1_macro": 0.999950094819718,
"eval_loss": 0.000961420766543597,
"eval_runtime": 88.1534,
"eval_samples_per_second": 227.308,
"eval_steps_per_second": 1.781,
"step": 2000
}
],
"logging_steps": 100,
"max_steps": 3759,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 1000,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 3,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 8475131698612224.0,
"train_batch_size": 64,
"trial_name": null,
"trial_params": null
}