Barzillian's picture
Upload folder using huggingface_hub
4b52572 verified
{
"best_metric": 0.646373450756073,
"best_model_checkpoint": "knowledge-Distillation/checkpoint-500",
"epoch": 2.0,
"eval_steps": 500,
"global_step": 500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.048,
"grad_norm": 4.009671688079834,
"learning_rate": 4.800000000000001e-06,
"loss": 1.1378,
"step": 12
},
{
"epoch": 0.096,
"grad_norm": 1.55279541015625,
"learning_rate": 9.600000000000001e-06,
"loss": 1.1198,
"step": 24
},
{
"epoch": 0.144,
"grad_norm": 2.4240620136260986,
"learning_rate": 1.44e-05,
"loss": 1.0868,
"step": 36
},
{
"epoch": 0.192,
"grad_norm": 4.712751388549805,
"learning_rate": 1.9200000000000003e-05,
"loss": 1.0798,
"step": 48
},
{
"epoch": 0.24,
"grad_norm": 12.531917572021484,
"learning_rate": 2.4e-05,
"loss": 1.0255,
"step": 60
},
{
"epoch": 0.288,
"grad_norm": 7.460992336273193,
"learning_rate": 2.88e-05,
"loss": 1.0152,
"step": 72
},
{
"epoch": 0.336,
"grad_norm": 12.766624450683594,
"learning_rate": 3.3600000000000004e-05,
"loss": 0.8645,
"step": 84
},
{
"epoch": 0.384,
"grad_norm": 8.186197280883789,
"learning_rate": 3.8400000000000005e-05,
"loss": 0.8678,
"step": 96
},
{
"epoch": 0.432,
"grad_norm": 16.78915786743164,
"learning_rate": 4.32e-05,
"loss": 0.7689,
"step": 108
},
{
"epoch": 0.48,
"grad_norm": 22.758197784423828,
"learning_rate": 4.8e-05,
"loss": 0.7054,
"step": 120
},
{
"epoch": 0.528,
"grad_norm": 10.546210289001465,
"learning_rate": 4.968888888888889e-05,
"loss": 0.8524,
"step": 132
},
{
"epoch": 0.576,
"grad_norm": 9.998793601989746,
"learning_rate": 4.915555555555556e-05,
"loss": 0.7466,
"step": 144
},
{
"epoch": 0.624,
"grad_norm": 16.722980499267578,
"learning_rate": 4.862222222222222e-05,
"loss": 0.8522,
"step": 156
},
{
"epoch": 0.672,
"grad_norm": 9.654285430908203,
"learning_rate": 4.808888888888889e-05,
"loss": 0.748,
"step": 168
},
{
"epoch": 0.72,
"grad_norm": 23.497644424438477,
"learning_rate": 4.755555555555556e-05,
"loss": 0.8113,
"step": 180
},
{
"epoch": 0.768,
"grad_norm": 29.93133544921875,
"learning_rate": 4.702222222222222e-05,
"loss": 0.7375,
"step": 192
},
{
"epoch": 0.816,
"grad_norm": 21.097806930541992,
"learning_rate": 4.648888888888889e-05,
"loss": 0.6061,
"step": 204
},
{
"epoch": 0.864,
"grad_norm": 11.148388862609863,
"learning_rate": 4.5955555555555555e-05,
"loss": 0.8176,
"step": 216
},
{
"epoch": 0.912,
"grad_norm": 41.14344787597656,
"learning_rate": 4.5422222222222225e-05,
"loss": 0.7304,
"step": 228
},
{
"epoch": 0.96,
"grad_norm": 11.007186889648438,
"learning_rate": 4.4888888888888894e-05,
"loss": 0.8662,
"step": 240
},
{
"epoch": 1.0,
"eval_accuracy": 0.7207207207207207,
"eval_f1_macro": 0.6400173200358859,
"eval_f1_micro": 0.7207207207207207,
"eval_f1_weighted": 0.6863741252230461,
"eval_loss": 0.8032302260398865,
"eval_precision_macro": 0.7771056423671171,
"eval_precision_micro": 0.7207207207207207,
"eval_precision_weighted": 0.754485589161179,
"eval_recall_macro": 0.64472593537768,
"eval_recall_micro": 0.7207207207207207,
"eval_recall_weighted": 0.7207207207207207,
"eval_runtime": 162.5414,
"eval_samples_per_second": 6.146,
"eval_steps_per_second": 0.197,
"step": 250
},
{
"epoch": 1.008,
"grad_norm": 8.793386459350586,
"learning_rate": 4.435555555555556e-05,
"loss": 0.6726,
"step": 252
},
{
"epoch": 1.056,
"grad_norm": 8.431066513061523,
"learning_rate": 4.3822222222222227e-05,
"loss": 0.728,
"step": 264
},
{
"epoch": 1.104,
"grad_norm": 13.071714401245117,
"learning_rate": 4.328888888888889e-05,
"loss": 0.5935,
"step": 276
},
{
"epoch": 1.152,
"grad_norm": 11.652353286743164,
"learning_rate": 4.275555555555556e-05,
"loss": 0.5623,
"step": 288
},
{
"epoch": 1.2,
"grad_norm": 15.519989013671875,
"learning_rate": 4.222222222222222e-05,
"loss": 0.5258,
"step": 300
},
{
"epoch": 1.248,
"grad_norm": 12.270814895629883,
"learning_rate": 4.168888888888889e-05,
"loss": 0.5467,
"step": 312
},
{
"epoch": 1.296,
"grad_norm": 45.41197967529297,
"learning_rate": 4.115555555555556e-05,
"loss": 0.687,
"step": 324
},
{
"epoch": 1.3439999999999999,
"grad_norm": 18.447298049926758,
"learning_rate": 4.062222222222222e-05,
"loss": 0.7523,
"step": 336
},
{
"epoch": 1.392,
"grad_norm": 16.430402755737305,
"learning_rate": 4.008888888888889e-05,
"loss": 0.5786,
"step": 348
},
{
"epoch": 1.44,
"grad_norm": 13.083454132080078,
"learning_rate": 3.9555555555555556e-05,
"loss": 0.4946,
"step": 360
},
{
"epoch": 1.488,
"grad_norm": 17.238567352294922,
"learning_rate": 3.9022222222222225e-05,
"loss": 0.6058,
"step": 372
},
{
"epoch": 1.536,
"grad_norm": 18.45892906188965,
"learning_rate": 3.848888888888889e-05,
"loss": 0.5246,
"step": 384
},
{
"epoch": 1.584,
"grad_norm": 23.937816619873047,
"learning_rate": 3.795555555555556e-05,
"loss": 0.6662,
"step": 396
},
{
"epoch": 1.6320000000000001,
"grad_norm": 17.260282516479492,
"learning_rate": 3.742222222222223e-05,
"loss": 0.6118,
"step": 408
},
{
"epoch": 1.6800000000000002,
"grad_norm": 15.230131149291992,
"learning_rate": 3.688888888888889e-05,
"loss": 0.6536,
"step": 420
},
{
"epoch": 1.728,
"grad_norm": 12.828449249267578,
"learning_rate": 3.635555555555556e-05,
"loss": 0.6201,
"step": 432
},
{
"epoch": 1.776,
"grad_norm": 20.50193214416504,
"learning_rate": 3.582222222222222e-05,
"loss": 0.5495,
"step": 444
},
{
"epoch": 1.8239999999999998,
"grad_norm": 15.018689155578613,
"learning_rate": 3.528888888888889e-05,
"loss": 0.4981,
"step": 456
},
{
"epoch": 1.8719999999999999,
"grad_norm": 20.459606170654297,
"learning_rate": 3.475555555555556e-05,
"loss": 0.5907,
"step": 468
},
{
"epoch": 1.92,
"grad_norm": 14.490087509155273,
"learning_rate": 3.4222222222222224e-05,
"loss": 0.519,
"step": 480
},
{
"epoch": 1.968,
"grad_norm": 23.004623413085938,
"learning_rate": 3.368888888888889e-05,
"loss": 0.5737,
"step": 492
},
{
"epoch": 2.0,
"eval_accuracy": 0.7447447447447447,
"eval_f1_macro": 0.7298437618326189,
"eval_f1_micro": 0.7447447447447447,
"eval_f1_weighted": 0.7467396946425192,
"eval_loss": 0.646373450756073,
"eval_precision_macro": 0.7348636077090247,
"eval_precision_micro": 0.7447447447447447,
"eval_precision_weighted": 0.7644157119297172,
"eval_recall_macro": 0.7382166799546654,
"eval_recall_micro": 0.7447447447447447,
"eval_recall_weighted": 0.7447447447447447,
"eval_runtime": 163.7809,
"eval_samples_per_second": 6.1,
"eval_steps_per_second": 0.195,
"step": 500
}
],
"logging_steps": 12,
"max_steps": 1250,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 500,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 5,
"early_stopping_threshold": 0.01
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 525305938493952.0,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}