k1h0's picture
Upload folder using huggingface_hub
9605235 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9986996098829649,
"eval_steps": 500,
"global_step": 384,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.002600780234070221,
"grad_norm": 0.02962934412062168,
"learning_rate": 1.282051282051282e-06,
"loss": 0.619,
"step": 1
},
{
"epoch": 0.02600780234070221,
"grad_norm": 0.06379027664661407,
"learning_rate": 1.282051282051282e-05,
"loss": 0.6962,
"step": 10
},
{
"epoch": 0.05201560468140442,
"grad_norm": 0.0363883376121521,
"learning_rate": 2.564102564102564e-05,
"loss": 0.7759,
"step": 20
},
{
"epoch": 0.07802340702210664,
"grad_norm": 0.03419478237628937,
"learning_rate": 3.846153846153846e-05,
"loss": 0.8087,
"step": 30
},
{
"epoch": 0.10403120936280884,
"grad_norm": 0.04424262419342995,
"learning_rate": 4.985507246376812e-05,
"loss": 0.7775,
"step": 40
},
{
"epoch": 0.13003901170351106,
"grad_norm": 0.22272075712680817,
"learning_rate": 4.840579710144928e-05,
"loss": 0.7476,
"step": 50
},
{
"epoch": 0.15604681404421328,
"grad_norm": 0.049193304032087326,
"learning_rate": 4.695652173913044e-05,
"loss": 0.6617,
"step": 60
},
{
"epoch": 0.18205461638491546,
"grad_norm": 0.04189423844218254,
"learning_rate": 4.5507246376811595e-05,
"loss": 0.7254,
"step": 70
},
{
"epoch": 0.20806241872561768,
"grad_norm": 0.033223457634449005,
"learning_rate": 4.405797101449275e-05,
"loss": 0.7454,
"step": 80
},
{
"epoch": 0.2340702210663199,
"grad_norm": 0.023022688925266266,
"learning_rate": 4.2608695652173916e-05,
"loss": 0.7263,
"step": 90
},
{
"epoch": 0.26007802340702213,
"grad_norm": 0.1517011970281601,
"learning_rate": 4.115942028985507e-05,
"loss": 0.7241,
"step": 100
},
{
"epoch": 0.28608582574772434,
"grad_norm": 0.041623640805482864,
"learning_rate": 3.971014492753624e-05,
"loss": 0.647,
"step": 110
},
{
"epoch": 0.31209362808842656,
"grad_norm": 0.03412195295095444,
"learning_rate": 3.8260869565217395e-05,
"loss": 0.6991,
"step": 120
},
{
"epoch": 0.3381014304291287,
"grad_norm": 0.02426602691411972,
"learning_rate": 3.681159420289855e-05,
"loss": 0.7115,
"step": 130
},
{
"epoch": 0.3641092327698309,
"grad_norm": 0.023634808138012886,
"learning_rate": 3.536231884057971e-05,
"loss": 0.6992,
"step": 140
},
{
"epoch": 0.39011703511053314,
"grad_norm": 0.1857312172651291,
"learning_rate": 3.3913043478260867e-05,
"loss": 0.7133,
"step": 150
},
{
"epoch": 0.41612483745123535,
"grad_norm": 0.057914506644010544,
"learning_rate": 3.246376811594203e-05,
"loss": 0.637,
"step": 160
},
{
"epoch": 0.44213263979193757,
"grad_norm": 0.0314478725194931,
"learning_rate": 3.1014492753623195e-05,
"loss": 0.69,
"step": 170
},
{
"epoch": 0.4681404421326398,
"grad_norm": 0.02375701256096363,
"learning_rate": 2.9565217391304352e-05,
"loss": 0.7052,
"step": 180
},
{
"epoch": 0.494148244473342,
"grad_norm": 0.017046812921762466,
"learning_rate": 2.811594202898551e-05,
"loss": 0.6963,
"step": 190
},
{
"epoch": 0.5201560468140443,
"grad_norm": 0.14757999777793884,
"learning_rate": 2.6666666666666667e-05,
"loss": 0.699,
"step": 200
},
{
"epoch": 0.5461638491547465,
"grad_norm": 0.03953570872545242,
"learning_rate": 2.5217391304347827e-05,
"loss": 0.6362,
"step": 210
},
{
"epoch": 0.5721716514954487,
"grad_norm": 0.031761154532432556,
"learning_rate": 2.3768115942028988e-05,
"loss": 0.6929,
"step": 220
},
{
"epoch": 0.5981794538361509,
"grad_norm": 0.019830092787742615,
"learning_rate": 2.2318840579710145e-05,
"loss": 0.6936,
"step": 230
},
{
"epoch": 0.6241872561768531,
"grad_norm": 0.017688650637865067,
"learning_rate": 2.0869565217391303e-05,
"loss": 0.692,
"step": 240
},
{
"epoch": 0.6501950585175552,
"grad_norm": 0.18702688813209534,
"learning_rate": 1.9420289855072467e-05,
"loss": 0.7103,
"step": 250
},
{
"epoch": 0.6762028608582574,
"grad_norm": 0.03623680770397186,
"learning_rate": 1.7971014492753624e-05,
"loss": 0.6185,
"step": 260
},
{
"epoch": 0.7022106631989596,
"grad_norm": 0.026319777593016624,
"learning_rate": 1.652173913043478e-05,
"loss": 0.7065,
"step": 270
},
{
"epoch": 0.7282184655396619,
"grad_norm": 0.018396981060504913,
"learning_rate": 1.5072463768115944e-05,
"loss": 0.6869,
"step": 280
},
{
"epoch": 0.7542262678803641,
"grad_norm": 0.016413649544119835,
"learning_rate": 1.3623188405797103e-05,
"loss": 0.6865,
"step": 290
},
{
"epoch": 0.7802340702210663,
"grad_norm": 0.1341114193201065,
"learning_rate": 1.2173913043478261e-05,
"loss": 0.7022,
"step": 300
},
{
"epoch": 0.8062418725617685,
"grad_norm": 0.03741007670760155,
"learning_rate": 1.072463768115942e-05,
"loss": 0.6272,
"step": 310
},
{
"epoch": 0.8322496749024707,
"grad_norm": 0.024399157613515854,
"learning_rate": 9.27536231884058e-06,
"loss": 0.6793,
"step": 320
},
{
"epoch": 0.8582574772431729,
"grad_norm": 0.016972342506051064,
"learning_rate": 7.82608695652174e-06,
"loss": 0.7078,
"step": 330
},
{
"epoch": 0.8842652795838751,
"grad_norm": 0.014587855897843838,
"learning_rate": 6.376811594202898e-06,
"loss": 0.7041,
"step": 340
},
{
"epoch": 0.9102730819245773,
"grad_norm": 0.13855686783790588,
"learning_rate": 4.927536231884058e-06,
"loss": 0.6831,
"step": 350
},
{
"epoch": 0.9362808842652796,
"grad_norm": 0.03484239801764488,
"learning_rate": 3.4782608695652175e-06,
"loss": 0.6321,
"step": 360
},
{
"epoch": 0.9622886866059818,
"grad_norm": 0.022825093939900398,
"learning_rate": 2.028985507246377e-06,
"loss": 0.6889,
"step": 370
},
{
"epoch": 0.988296488946684,
"grad_norm": 0.019488025456666946,
"learning_rate": 5.797101449275362e-07,
"loss": 0.6797,
"step": 380
}
],
"logging_steps": 10,
"max_steps": 384,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 8.415557240450187e+18,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}