initial-colbert / trainer_state.json
souvickdascmsa019's picture
Upload folder using huggingface_hub
7a50d08 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9961439588688946,
"eval_steps": 500,
"global_step": 15500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.032133676092544985,
"grad_norm": 7.696083068847656,
"learning_rate": 2.9039845758354757e-06,
"loss": 0.4976,
"step": 500
},
{
"epoch": 0.06426735218508997,
"grad_norm": 4.741117000579834,
"learning_rate": 2.807583547557841e-06,
"loss": 0.3532,
"step": 1000
},
{
"epoch": 0.09640102827763496,
"grad_norm": 6.354885101318359,
"learning_rate": 2.711182519280206e-06,
"loss": 0.3195,
"step": 1500
},
{
"epoch": 0.12853470437017994,
"grad_norm": 4.841858386993408,
"learning_rate": 2.614781491002571e-06,
"loss": 0.3079,
"step": 2000
},
{
"epoch": 0.16066838046272494,
"grad_norm": 4.776275634765625,
"learning_rate": 2.518573264781491e-06,
"loss": 0.3067,
"step": 2500
},
{
"epoch": 0.1928020565552699,
"grad_norm": 5.285233974456787,
"learning_rate": 2.422172236503856e-06,
"loss": 0.2957,
"step": 3000
},
{
"epoch": 0.2249357326478149,
"grad_norm": 5.628823757171631,
"learning_rate": 2.3257712082262213e-06,
"loss": 0.3086,
"step": 3500
},
{
"epoch": 0.2570694087403599,
"grad_norm": 4.082389831542969,
"learning_rate": 2.229370179948586e-06,
"loss": 0.2927,
"step": 4000
},
{
"epoch": 0.2892030848329049,
"grad_norm": 5.4696478843688965,
"learning_rate": 2.1331619537275066e-06,
"loss": 0.2922,
"step": 4500
},
{
"epoch": 0.3213367609254499,
"grad_norm": 4.862800598144531,
"learning_rate": 2.0367609254498712e-06,
"loss": 0.2931,
"step": 5000
},
{
"epoch": 0.35347043701799485,
"grad_norm": 4.961813449859619,
"learning_rate": 1.9403598971722367e-06,
"loss": 0.2957,
"step": 5500
},
{
"epoch": 0.3856041131105398,
"grad_norm": 4.734184741973877,
"learning_rate": 1.8439588688946016e-06,
"loss": 0.2809,
"step": 6000
},
{
"epoch": 0.41773778920308485,
"grad_norm": 4.716980934143066,
"learning_rate": 1.7477506426735218e-06,
"loss": 0.2773,
"step": 6500
},
{
"epoch": 0.4498714652956298,
"grad_norm": 4.844335079193115,
"learning_rate": 1.651349614395887e-06,
"loss": 0.2728,
"step": 7000
},
{
"epoch": 0.4820051413881748,
"grad_norm": 5.491813659667969,
"learning_rate": 1.554948586118252e-06,
"loss": 0.2888,
"step": 7500
},
{
"epoch": 0.5141388174807198,
"grad_norm": 4.701641082763672,
"learning_rate": 1.458547557840617e-06,
"loss": 0.2863,
"step": 8000
},
{
"epoch": 0.5462724935732648,
"grad_norm": 5.017972469329834,
"learning_rate": 1.3623393316195374e-06,
"loss": 0.2813,
"step": 8500
},
{
"epoch": 0.5784061696658098,
"grad_norm": 5.8628764152526855,
"learning_rate": 1.2659383033419025e-06,
"loss": 0.2695,
"step": 9000
},
{
"epoch": 0.6105398457583547,
"grad_norm": 5.396206378936768,
"learning_rate": 1.1695372750642673e-06,
"loss": 0.2834,
"step": 9500
},
{
"epoch": 0.6426735218508998,
"grad_norm": 4.796625137329102,
"learning_rate": 1.0731362467866324e-06,
"loss": 0.2739,
"step": 10000
},
{
"epoch": 0.6748071979434447,
"grad_norm": 3.604219436645508,
"learning_rate": 9.769280205655526e-07,
"loss": 0.2744,
"step": 10500
},
{
"epoch": 0.7069408740359897,
"grad_norm": 4.8642048835754395,
"learning_rate": 8.80719794344473e-07,
"loss": 0.2849,
"step": 11000
},
{
"epoch": 0.7390745501285347,
"grad_norm": 4.076746940612793,
"learning_rate": 7.84318766066838e-07,
"loss": 0.2808,
"step": 11500
},
{
"epoch": 0.7712082262210797,
"grad_norm": 2.8937087059020996,
"learning_rate": 6.879177377892031e-07,
"loss": 0.2796,
"step": 12000
},
{
"epoch": 0.8033419023136247,
"grad_norm": 4.379210948944092,
"learning_rate": 5.915167095115681e-07,
"loss": 0.2772,
"step": 12500
},
{
"epoch": 0.8354755784061697,
"grad_norm": 6.368309020996094,
"learning_rate": 4.951156812339331e-07,
"loss": 0.2813,
"step": 13000
},
{
"epoch": 0.8676092544987146,
"grad_norm": 5.409502029418945,
"learning_rate": 3.9871465295629823e-07,
"loss": 0.2756,
"step": 13500
},
{
"epoch": 0.8997429305912596,
"grad_norm": 2.8725733757019043,
"learning_rate": 3.0231362467866326e-07,
"loss": 0.2771,
"step": 14000
},
{
"epoch": 0.9318766066838047,
"grad_norm": 8.13409423828125,
"learning_rate": 2.059125964010283e-07,
"loss": 0.283,
"step": 14500
},
{
"epoch": 0.9640102827763496,
"grad_norm": 5.422169208526611,
"learning_rate": 1.0970437017994858e-07,
"loss": 0.2731,
"step": 15000
},
{
"epoch": 0.9961439588688946,
"grad_norm": 4.597813606262207,
"learning_rate": 1.3303341902313626e-08,
"loss": 0.2865,
"step": 15500
}
],
"logging_steps": 500,
"max_steps": 15560,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}