junaid1993's picture
Upload 8 files
24b1771 verified
{
"best_metric": 0.5962514877319336,
"best_model_checkpoint": "./ttm_finetuned_models/output/checkpoint-60",
"epoch": 30.0,
"eval_steps": 500,
"global_step": 60,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"grad_norm": 0.7443529367446899,
"learning_rate": 4.732772046676698e-05,
"loss": 0.8431,
"step": 2
},
{
"epoch": 1.0,
"eval_loss": 0.6096403002738953,
"eval_runtime": 0.3627,
"eval_samples_per_second": 192.988,
"eval_steps_per_second": 2.757,
"step": 2
},
{
"epoch": 2.0,
"grad_norm": 0.8946840763092041,
"learning_rate": 4.809321690693591e-05,
"loss": 0.8393,
"step": 4
},
{
"epoch": 2.0,
"eval_loss": 0.60591721534729,
"eval_runtime": 0.36,
"eval_samples_per_second": 194.421,
"eval_steps_per_second": 2.777,
"step": 4
},
{
"epoch": 3.0,
"grad_norm": 0.7669185996055603,
"learning_rate": 4.936827562256963e-05,
"loss": 0.8184,
"step": 6
},
{
"epoch": 3.0,
"eval_loss": 0.6019852757453918,
"eval_runtime": 0.3543,
"eval_samples_per_second": 197.593,
"eval_steps_per_second": 2.823,
"step": 6
},
{
"epoch": 4.0,
"grad_norm": 0.7224565744400024,
"learning_rate": 5.115174431611605e-05,
"loss": 0.8012,
"step": 8
},
{
"epoch": 4.0,
"eval_loss": 0.6119012832641602,
"eval_runtime": 0.3476,
"eval_samples_per_second": 201.402,
"eval_steps_per_second": 2.877,
"step": 8
},
{
"epoch": 5.0,
"grad_norm": 0.5908293128013611,
"learning_rate": 5.344201122916398e-05,
"loss": 0.7874,
"step": 10
},
{
"epoch": 5.0,
"eval_loss": 0.6097100973129272,
"eval_runtime": 0.3479,
"eval_samples_per_second": 201.225,
"eval_steps_per_second": 2.875,
"step": 10
},
{
"epoch": 6.0,
"grad_norm": 0.6504310369491577,
"learning_rate": 5.623700659902384e-05,
"loss": 0.8027,
"step": 12
},
{
"epoch": 6.0,
"eval_loss": 0.6008864045143127,
"eval_runtime": 0.3626,
"eval_samples_per_second": 193.042,
"eval_steps_per_second": 2.758,
"step": 12
},
{
"epoch": 7.0,
"grad_norm": 0.4692302346229553,
"learning_rate": 5.9534204529214504e-05,
"loss": 0.7916,
"step": 14
},
{
"epoch": 7.0,
"eval_loss": 0.6066852807998657,
"eval_runtime": 0.3625,
"eval_samples_per_second": 193.12,
"eval_steps_per_second": 2.759,
"step": 14
},
{
"epoch": 8.0,
"grad_norm": 1.273665189743042,
"learning_rate": 6.333062527217074e-05,
"loss": 0.79,
"step": 16
},
{
"epoch": 8.0,
"eval_loss": 0.6052869558334351,
"eval_runtime": 0.3656,
"eval_samples_per_second": 191.441,
"eval_steps_per_second": 2.735,
"step": 16
},
{
"epoch": 9.0,
"grad_norm": 0.6313425302505493,
"learning_rate": 6.762283792210435e-05,
"loss": 0.7741,
"step": 18
},
{
"epoch": 9.0,
"eval_loss": 0.6067262887954712,
"eval_runtime": 0.3465,
"eval_samples_per_second": 201.993,
"eval_steps_per_second": 2.886,
"step": 18
},
{
"epoch": 10.0,
"grad_norm": 1.0959845781326294,
"learning_rate": 7.240696351558515e-05,
"loss": 0.7734,
"step": 20
},
{
"epoch": 10.0,
"eval_loss": 0.6191284656524658,
"eval_runtime": 0.7504,
"eval_samples_per_second": 93.288,
"eval_steps_per_second": 1.333,
"step": 20
},
{
"epoch": 11.0,
"grad_norm": 0.440314382314682,
"learning_rate": 7.767867853704304e-05,
"loss": 0.7732,
"step": 22
},
{
"epoch": 11.0,
"eval_loss": 0.6069210171699524,
"eval_runtime": 0.3562,
"eval_samples_per_second": 196.544,
"eval_steps_per_second": 2.808,
"step": 22
},
{
"epoch": 12.0,
"grad_norm": 0.5499612092971802,
"learning_rate": 8.343321882602062e-05,
"loss": 0.7621,
"step": 24
},
{
"epoch": 12.0,
"eval_loss": 0.6009647250175476,
"eval_runtime": 0.3571,
"eval_samples_per_second": 196.025,
"eval_steps_per_second": 2.8,
"step": 24
},
{
"epoch": 13.0,
"grad_norm": 0.5899090766906738,
"learning_rate": 8.966538388264578e-05,
"loss": 0.7535,
"step": 26
},
{
"epoch": 13.0,
"eval_loss": 0.6042256355285645,
"eval_runtime": 0.3477,
"eval_samples_per_second": 201.309,
"eval_steps_per_second": 2.876,
"step": 26
},
{
"epoch": 14.0,
"grad_norm": 0.5770731568336487,
"learning_rate": 9.636954156743349e-05,
"loss": 0.7807,
"step": 28
},
{
"epoch": 14.0,
"eval_loss": 0.6018546223640442,
"eval_runtime": 0.355,
"eval_samples_per_second": 197.191,
"eval_steps_per_second": 2.817,
"step": 28
},
{
"epoch": 15.0,
"grad_norm": 0.423060804605484,
"learning_rate": 0.00010353963319116892,
"loss": 0.7376,
"step": 30
},
{
"epoch": 15.0,
"eval_loss": 0.6036869287490845,
"eval_runtime": 0.3443,
"eval_samples_per_second": 203.313,
"eval_steps_per_second": 2.904,
"step": 30
},
{
"epoch": 16.0,
"grad_norm": 0.444051593542099,
"learning_rate": 0.00011116917899027415,
"loss": 0.7431,
"step": 32
},
{
"epoch": 16.0,
"eval_loss": 0.5995596051216125,
"eval_runtime": 0.4433,
"eval_samples_per_second": 157.902,
"eval_steps_per_second": 2.256,
"step": 32
},
{
"epoch": 17.0,
"grad_norm": 0.7095910906791687,
"learning_rate": 0.00011925128398270726,
"loss": 0.7551,
"step": 34
},
{
"epoch": 17.0,
"eval_loss": 0.6076721549034119,
"eval_runtime": 0.3753,
"eval_samples_per_second": 186.526,
"eval_steps_per_second": 2.665,
"step": 34
},
{
"epoch": 18.0,
"grad_norm": 0.938398540019989,
"learning_rate": 0.0001277786441991034,
"loss": 0.741,
"step": 36
},
{
"epoch": 18.0,
"eval_loss": 0.6111529469490051,
"eval_runtime": 0.3616,
"eval_samples_per_second": 193.562,
"eval_steps_per_second": 2.765,
"step": 36
},
{
"epoch": 19.0,
"grad_norm": 0.45169830322265625,
"learning_rate": 0.00013674355328352613,
"loss": 0.7409,
"step": 38
},
{
"epoch": 19.0,
"eval_loss": 0.5968735814094543,
"eval_runtime": 0.3676,
"eval_samples_per_second": 190.435,
"eval_steps_per_second": 2.72,
"step": 38
},
{
"epoch": 20.0,
"grad_norm": 0.3840300440788269,
"learning_rate": 0.0001461379094578651,
"loss": 0.7308,
"step": 40
},
{
"epoch": 20.0,
"eval_loss": 0.6049572229385376,
"eval_runtime": 0.7422,
"eval_samples_per_second": 94.316,
"eval_steps_per_second": 1.347,
"step": 40
},
{
"epoch": 21.0,
"grad_norm": 1.2706289291381836,
"learning_rate": 0.00015595322284358464,
"loss": 0.7472,
"step": 42
},
{
"epoch": 21.0,
"eval_loss": 0.6062923669815063,
"eval_runtime": 0.3735,
"eval_samples_per_second": 187.409,
"eval_steps_per_second": 2.677,
"step": 42
},
{
"epoch": 22.0,
"grad_norm": 0.5400757193565369,
"learning_rate": 0.00016618062313420584,
"loss": 0.7378,
"step": 44
},
{
"epoch": 22.0,
"eval_loss": 0.5963739156723022,
"eval_runtime": 0.3635,
"eval_samples_per_second": 192.591,
"eval_steps_per_second": 2.751,
"step": 44
},
{
"epoch": 23.0,
"grad_norm": 0.42090097069740295,
"learning_rate": 0.00017681086761159098,
"loss": 0.7316,
"step": 46
},
{
"epoch": 23.0,
"eval_loss": 0.6015997529029846,
"eval_runtime": 0.3627,
"eval_samples_per_second": 192.989,
"eval_steps_per_second": 2.757,
"step": 46
},
{
"epoch": 24.0,
"grad_norm": 0.47120270133018494,
"learning_rate": 0.00018783434949878235,
"loss": 0.7405,
"step": 48
},
{
"epoch": 24.0,
"eval_loss": 0.5992478728294373,
"eval_runtime": 0.3755,
"eval_samples_per_second": 186.405,
"eval_steps_per_second": 2.663,
"step": 48
},
{
"epoch": 25.0,
"grad_norm": 0.3660564422607422,
"learning_rate": 0.00019924110664185264,
"loss": 0.713,
"step": 50
},
{
"epoch": 25.0,
"eval_loss": 0.5973361134529114,
"eval_runtime": 0.371,
"eval_samples_per_second": 188.686,
"eval_steps_per_second": 2.696,
"step": 50
},
{
"epoch": 26.0,
"grad_norm": 0.4799431562423706,
"learning_rate": 0.00021102083051291085,
"loss": 0.7258,
"step": 52
},
{
"epoch": 26.0,
"eval_loss": 0.5997373461723328,
"eval_runtime": 0.3657,
"eval_samples_per_second": 191.402,
"eval_steps_per_second": 2.734,
"step": 52
},
{
"epoch": 27.0,
"grad_norm": 0.30694785714149475,
"learning_rate": 0.0002231628755261397,
"loss": 0.7112,
"step": 54
},
{
"epoch": 27.0,
"eval_loss": 0.5968739986419678,
"eval_runtime": 0.3648,
"eval_samples_per_second": 191.882,
"eval_steps_per_second": 2.741,
"step": 54
},
{
"epoch": 28.0,
"grad_norm": 0.22820888459682465,
"learning_rate": 0.0002356562686584362,
"loss": 0.7121,
"step": 56
},
{
"epoch": 28.0,
"eval_loss": 0.5964872241020203,
"eval_runtime": 0.3458,
"eval_samples_per_second": 202.446,
"eval_steps_per_second": 2.892,
"step": 56
},
{
"epoch": 29.0,
"grad_norm": 0.55999755859375,
"learning_rate": 0.0002484897193659642,
"loss": 0.717,
"step": 58
},
{
"epoch": 29.0,
"eval_loss": 0.597991943359375,
"eval_runtime": 0.3373,
"eval_samples_per_second": 207.519,
"eval_steps_per_second": 2.965,
"step": 58
},
{
"epoch": 30.0,
"grad_norm": 0.25612083077430725,
"learning_rate": 0.0002616516297876592,
"loss": 0.7091,
"step": 60
},
{
"epoch": 30.0,
"eval_loss": 0.5962514877319336,
"eval_runtime": 0.3376,
"eval_samples_per_second": 207.368,
"eval_steps_per_second": 2.962,
"step": 60
}
],
"logging_steps": 500,
"max_steps": 100,
"num_input_tokens_seen": 0,
"num_train_epochs": 50,
"save_steps": 500,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 10,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 15922917144000.0,
"train_batch_size": 288,
"trial_name": null,
"trial_params": null
}