talentfilterdeployment / trainer_state.json
chiemekakalu's picture
Rename model/trainer_state.json to trainer_state.json
26f908e verified
{
"best_metric": 0.20053359866142273,
"best_model_checkpoint": "models/startup-team-phi-qlora/checkpoint-100",
"epoch": 2.937728937728938,
"eval_steps": 10,
"global_step": 102,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.029304029304029304,
"grad_norm": 0.14737844467163086,
"learning_rate": 3.3333333333333335e-05,
"loss": 1.1363,
"mean_token_accuracy": 0.7657624632120132,
"step": 1
},
{
"epoch": 0.14652014652014653,
"grad_norm": 0.18032211065292358,
"learning_rate": 0.0001666666666666667,
"loss": 1.1028,
"mean_token_accuracy": 0.771841392852366,
"step": 5
},
{
"epoch": 0.29304029304029305,
"grad_norm": 0.2304220199584961,
"learning_rate": 0.00019914448613738106,
"loss": 1.0475,
"mean_token_accuracy": 0.7810361601412297,
"step": 10
},
{
"epoch": 0.29304029304029305,
"eval_loss": 0.9339649081230164,
"eval_mean_token_accuracy": 0.7965992987155914,
"eval_runtime": 50.6795,
"eval_samples_per_second": 1.204,
"eval_steps_per_second": 0.158,
"step": 10
},
{
"epoch": 0.43956043956043955,
"grad_norm": 0.30109331011772156,
"learning_rate": 0.0001956940335732209,
"loss": 0.8973,
"mean_token_accuracy": 0.8004154480993748,
"step": 15
},
{
"epoch": 0.5860805860805861,
"grad_norm": 0.3704577684402466,
"learning_rate": 0.00018968727415326884,
"loss": 0.703,
"mean_token_accuracy": 0.8325344368815422,
"step": 20
},
{
"epoch": 0.5860805860805861,
"eval_loss": 0.5746913552284241,
"eval_mean_token_accuracy": 0.8676818311214447,
"eval_runtime": 44.3992,
"eval_samples_per_second": 1.374,
"eval_steps_per_second": 0.18,
"step": 20
},
{
"epoch": 0.7326007326007326,
"grad_norm": 0.3730122447013855,
"learning_rate": 0.00018128466845916154,
"loss": 0.5533,
"mean_token_accuracy": 0.8719696968793869,
"step": 25
},
{
"epoch": 0.8791208791208791,
"grad_norm": 0.30677032470703125,
"learning_rate": 0.00017071067811865476,
"loss": 0.4128,
"mean_token_accuracy": 0.9108748823404312,
"step": 30
},
{
"epoch": 0.8791208791208791,
"eval_loss": 0.32779568433761597,
"eval_mean_token_accuracy": 0.9334485232830048,
"eval_runtime": 49.8784,
"eval_samples_per_second": 1.223,
"eval_steps_per_second": 0.16,
"step": 30
},
{
"epoch": 1.0,
"grad_norm": 0.5578669905662537,
"learning_rate": 0.00015824776968678024,
"loss": 0.335,
"mean_token_accuracy": 0.9305518567562103,
"step": 35
},
{
"epoch": 1.1465201465201464,
"grad_norm": 0.2113618403673172,
"learning_rate": 0.00014422886902190014,
"loss": 0.306,
"mean_token_accuracy": 0.9388440825045109,
"step": 40
},
{
"epoch": 1.1465201465201464,
"eval_loss": 0.2558988034725189,
"eval_mean_token_accuracy": 0.9488431662321091,
"eval_runtime": 42.7021,
"eval_samples_per_second": 1.428,
"eval_steps_per_second": 0.187,
"step": 40
},
{
"epoch": 1.293040293040293,
"grad_norm": 0.16832281649112701,
"learning_rate": 0.00012902846772544624,
"loss": 0.2482,
"mean_token_accuracy": 0.9499588944017887,
"step": 45
},
{
"epoch": 1.4395604395604396,
"grad_norm": 0.1472930908203125,
"learning_rate": 0.00011305261922200519,
"loss": 0.2318,
"mean_token_accuracy": 0.9537023350596427,
"step": 50
},
{
"epoch": 1.4395604395604396,
"eval_loss": 0.22639355063438416,
"eval_mean_token_accuracy": 0.953661359846592,
"eval_runtime": 42.6645,
"eval_samples_per_second": 1.43,
"eval_steps_per_second": 0.188,
"step": 50
},
{
"epoch": 1.5860805860805862,
"grad_norm": 0.16793935000896454,
"learning_rate": 9.67280917178224e-05,
"loss": 0.2364,
"mean_token_accuracy": 0.9529081016778946,
"step": 55
},
{
"epoch": 1.7326007326007327,
"grad_norm": 0.15153780579566956,
"learning_rate": 8.049096779838719e-05,
"loss": 0.2431,
"mean_token_accuracy": 0.9506231568753719,
"step": 60
},
{
"epoch": 1.7326007326007327,
"eval_loss": 0.2124478667974472,
"eval_mean_token_accuracy": 0.9563756883144379,
"eval_runtime": 42.4992,
"eval_samples_per_second": 1.435,
"eval_steps_per_second": 0.188,
"step": 60
},
{
"epoch": 1.879120879120879,
"grad_norm": 0.1492297649383545,
"learning_rate": 6.477499520787665e-05,
"loss": 0.2357,
"mean_token_accuracy": 0.9522971525788307,
"step": 65
},
{
"epoch": 2.0,
"grad_norm": 0.5314327478408813,
"learning_rate": 5.000000000000002e-05,
"loss": 0.2323,
"mean_token_accuracy": 0.9523700963367115,
"step": 70
},
{
"epoch": 2.0,
"eval_loss": 0.20574581623077393,
"eval_mean_token_accuracy": 0.9567286148667336,
"eval_runtime": 43.0255,
"eval_samples_per_second": 1.418,
"eval_steps_per_second": 0.186,
"step": 70
},
{
"epoch": 2.1465201465201464,
"grad_norm": 0.15407854318618774,
"learning_rate": 3.6560671583635467e-05,
"loss": 0.2353,
"mean_token_accuracy": 0.9516373299062252,
"step": 75
},
{
"epoch": 2.293040293040293,
"grad_norm": 0.21263441443443298,
"learning_rate": 2.4816019252102273e-05,
"loss": 0.2195,
"mean_token_accuracy": 0.9543178603053093,
"step": 80
},
{
"epoch": 2.293040293040293,
"eval_loss": 0.202470600605011,
"eval_mean_token_accuracy": 0.957635909318924,
"eval_runtime": 42.2023,
"eval_samples_per_second": 1.445,
"eval_steps_per_second": 0.19,
"step": 80
},
{
"epoch": 2.4395604395604398,
"grad_norm": 0.21033723652362823,
"learning_rate": 1.5079781847342123e-05,
"loss": 0.2149,
"mean_token_accuracy": 0.9557917781174183,
"step": 85
},
{
"epoch": 2.586080586080586,
"grad_norm": 0.131508931517601,
"learning_rate": 7.612046748871327e-06,
"loss": 0.2072,
"mean_token_accuracy": 0.9576979361474514,
"step": 90
},
{
"epoch": 2.586080586080586,
"eval_loss": 0.20078732073307037,
"eval_mean_token_accuracy": 0.9580192342400551,
"eval_runtime": 42.6779,
"eval_samples_per_second": 1.429,
"eval_steps_per_second": 0.187,
"step": 90
},
{
"epoch": 2.7326007326007327,
"grad_norm": 0.1332683563232422,
"learning_rate": 2.612302072266637e-06,
"loss": 0.2166,
"mean_token_accuracy": 0.9556084908545017,
"step": 95
},
{
"epoch": 2.879120879120879,
"grad_norm": 0.13702718913555145,
"learning_rate": 2.141076761396521e-07,
"loss": 0.2034,
"mean_token_accuracy": 0.9584921665489674,
"step": 100
},
{
"epoch": 2.879120879120879,
"eval_loss": 0.20053359866142273,
"eval_mean_token_accuracy": 0.9581777453422546,
"eval_runtime": 43.3904,
"eval_samples_per_second": 1.406,
"eval_steps_per_second": 0.184,
"step": 100
}
],
"logging_steps": 5,
"max_steps": 102,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 10,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.61256195259904e+16,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}