llama3_lora / trainer_state.json
fzkun's picture
Upload folder using huggingface_hub
2b770f0 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.9710982658959537,
"eval_steps": 500,
"global_step": 258,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.057803468208092484,
"grad_norm": 0.7586865425109863,
"learning_rate": 9.615384615384616e-06,
"loss": 1.5305,
"step": 5
},
{
"epoch": 0.11560693641618497,
"grad_norm": 1.5047568082809448,
"learning_rate": 1.923076923076923e-05,
"loss": 1.3586,
"step": 10
},
{
"epoch": 0.17341040462427745,
"grad_norm": 1.0730587244033813,
"learning_rate": 2.8846153846153845e-05,
"loss": 1.3282,
"step": 15
},
{
"epoch": 0.23121387283236994,
"grad_norm": 1.0204637050628662,
"learning_rate": 3.846153846153846e-05,
"loss": 1.246,
"step": 20
},
{
"epoch": 0.28901734104046245,
"grad_norm": 0.5301410555839539,
"learning_rate": 4.8076923076923084e-05,
"loss": 1.3034,
"step": 25
},
{
"epoch": 0.3468208092485549,
"grad_norm": 0.723696768283844,
"learning_rate": 4.996333534627809e-05,
"loss": 1.1816,
"step": 30
},
{
"epoch": 0.4046242774566474,
"grad_norm": 1.3614885807037354,
"learning_rate": 4.981456948708014e-05,
"loss": 1.2341,
"step": 35
},
{
"epoch": 0.4624277456647399,
"grad_norm": 1.0017260313034058,
"learning_rate": 4.95520920685539e-05,
"loss": 1.2838,
"step": 40
},
{
"epoch": 0.5202312138728323,
"grad_norm": 0.6403581500053406,
"learning_rate": 4.9177105880720173e-05,
"loss": 1.2135,
"step": 45
},
{
"epoch": 0.5780346820809249,
"grad_norm": 0.5783727765083313,
"learning_rate": 4.869132927957007e-05,
"loss": 1.11,
"step": 50
},
{
"epoch": 0.6358381502890174,
"grad_norm": 0.7453054189682007,
"learning_rate": 4.8096988312782174e-05,
"loss": 1.2103,
"step": 55
},
{
"epoch": 0.6936416184971098,
"grad_norm": 1.236171007156372,
"learning_rate": 4.73968065189672e-05,
"loss": 1.2226,
"step": 60
},
{
"epoch": 0.7514450867052023,
"grad_norm": 0.44787439703941345,
"learning_rate": 4.6593992447184586e-05,
"loss": 1.1403,
"step": 65
},
{
"epoch": 0.8092485549132948,
"grad_norm": 0.7945877313613892,
"learning_rate": 4.5692224953922266e-05,
"loss": 1.1933,
"step": 70
},
{
"epoch": 0.8670520231213873,
"grad_norm": 1.6053190231323242,
"learning_rate": 4.469563634491554e-05,
"loss": 1.1941,
"step": 75
},
{
"epoch": 0.9248554913294798,
"grad_norm": 1.0948492288589478,
"learning_rate": 4.360879343905676e-05,
"loss": 1.3349,
"step": 80
},
{
"epoch": 0.9826589595375722,
"grad_norm": 0.726474940776825,
"learning_rate": 4.243667664116956e-05,
"loss": 1.3004,
"step": 85
},
{
"epoch": 1.0346820809248556,
"grad_norm": 1.4559762477874756,
"learning_rate": 4.118465711954569e-05,
"loss": 1.0116,
"step": 90
},
{
"epoch": 1.092485549132948,
"grad_norm": 1.5781135559082031,
"learning_rate": 3.985847219282725e-05,
"loss": 0.8764,
"step": 95
},
{
"epoch": 1.1502890173410405,
"grad_norm": 0.6205704212188721,
"learning_rate": 3.8464199039022605e-05,
"loss": 0.9051,
"step": 100
},
{
"epoch": 1.208092485549133,
"grad_norm": 1.4496972560882568,
"learning_rate": 3.700822684713349e-05,
"loss": 0.9408,
"step": 105
},
{
"epoch": 1.2658959537572254,
"grad_norm": 0.4967881143093109,
"learning_rate": 3.5497227539006614e-05,
"loss": 0.7376,
"step": 110
},
{
"epoch": 1.323699421965318,
"grad_norm": 1.4739594459533691,
"learning_rate": 3.3938125195576e-05,
"loss": 0.9192,
"step": 115
},
{
"epoch": 1.3815028901734103,
"grad_norm": 1.443954348564148,
"learning_rate": 3.233806432759837e-05,
"loss": 0.7502,
"step": 120
},
{
"epoch": 1.439306358381503,
"grad_norm": 0.8857870697975159,
"learning_rate": 3.070437713627965e-05,
"loss": 0.7896,
"step": 125
},
{
"epoch": 1.4971098265895955,
"grad_norm": 0.49113208055496216,
"learning_rate": 2.9044549913819124e-05,
"loss": 0.7826,
"step": 130
},
{
"epoch": 1.5549132947976878,
"grad_norm": 0.5606523752212524,
"learning_rate": 2.7366188737839026e-05,
"loss": 0.7622,
"step": 135
},
{
"epoch": 1.6127167630057804,
"grad_norm": 0.5834754705429077,
"learning_rate": 2.5676984616903367e-05,
"loss": 0.6622,
"step": 140
},
{
"epoch": 1.6705202312138727,
"grad_norm": 0.9665216207504272,
"learning_rate": 2.3984678246844677e-05,
"loss": 0.809,
"step": 145
},
{
"epoch": 1.7283236994219653,
"grad_norm": 0.7581700086593628,
"learning_rate": 2.2297024539401463e-05,
"loss": 0.7095,
"step": 150
},
{
"epoch": 1.7861271676300579,
"grad_norm": 0.8531942367553711,
"learning_rate": 2.0621757085711734e-05,
"loss": 0.8316,
"step": 155
},
{
"epoch": 1.8439306358381504,
"grad_norm": 1.121618390083313,
"learning_rate": 1.8966552717507364e-05,
"loss": 0.7683,
"step": 160
},
{
"epoch": 1.9017341040462428,
"grad_norm": 1.0460470914840698,
"learning_rate": 1.7338996328405526e-05,
"loss": 0.7656,
"step": 165
},
{
"epoch": 1.9595375722543351,
"grad_norm": 1.1561076641082764,
"learning_rate": 1.574654611650214e-05,
"loss": 0.7079,
"step": 170
},
{
"epoch": 2.0115606936416186,
"grad_norm": 0.6570937037467957,
"learning_rate": 1.4196499407541359e-05,
"loss": 0.7448,
"step": 175
},
{
"epoch": 2.069364161849711,
"grad_norm": 0.8971176147460938,
"learning_rate": 1.2695959215274816e-05,
"loss": 0.5049,
"step": 180
},
{
"epoch": 2.1271676300578033,
"grad_norm": 0.7877609133720398,
"learning_rate": 1.125180169224613e-05,
"loss": 0.4581,
"step": 185
},
{
"epoch": 2.184971098265896,
"grad_norm": 1.099473476409912,
"learning_rate": 9.870644620155877e-06,
"loss": 0.4307,
"step": 190
},
{
"epoch": 2.2427745664739884,
"grad_norm": 1.1364312171936035,
"learning_rate": 8.558817084198387e-06,
"loss": 0.4858,
"step": 195
},
{
"epoch": 2.300578034682081,
"grad_norm": 0.7978260517120361,
"learning_rate": 7.3223304703363135e-06,
"loss": 0.4745,
"step": 200
},
{
"epoch": 2.3583815028901736,
"grad_norm": 0.6734775900840759,
"learning_rate": 6.166850918416406e-06,
"loss": 0.5683,
"step": 205
},
{
"epoch": 2.416184971098266,
"grad_norm": 0.9826372265815735,
"learning_rate": 5.097673357358907e-06,
"loss": 0.466,
"step": 210
},
{
"epoch": 2.4739884393063583,
"grad_norm": 1.0204384326934814,
"learning_rate": 4.119697241402998e-06,
"loss": 0.4577,
"step": 215
},
{
"epoch": 2.531791907514451,
"grad_norm": 1.038061261177063,
"learning_rate": 3.2374040985957004e-06,
"loss": 0.3862,
"step": 220
},
{
"epoch": 2.5895953757225434,
"grad_norm": 0.9037131071090698,
"learning_rate": 2.4548369944073004e-06,
"loss": 0.4205,
"step": 225
},
{
"epoch": 2.647398843930636,
"grad_norm": 0.7115334272384644,
"learning_rate": 1.7755820045802145e-06,
"loss": 0.3581,
"step": 230
},
{
"epoch": 2.705202312138728,
"grad_norm": 0.8673137426376343,
"learning_rate": 1.2027517821111112e-06,
"loss": 0.4342,
"step": 235
},
{
"epoch": 2.7630057803468207,
"grad_norm": 0.8103125691413879,
"learning_rate": 7.389712936697129e-07,
"loss": 0.4275,
"step": 240
},
{
"epoch": 2.820809248554913,
"grad_norm": 0.638612687587738,
"learning_rate": 3.8636579081657577e-07,
"loss": 0.4198,
"step": 245
},
{
"epoch": 2.878612716763006,
"grad_norm": 0.8524174690246582,
"learning_rate": 1.4655107114101007e-07,
"loss": 0.5151,
"step": 250
},
{
"epoch": 2.9364161849710984,
"grad_norm": 0.9686955809593201,
"learning_rate": 2.0626073947138668e-08,
"loss": 0.4155,
"step": 255
},
{
"epoch": 2.9710982658959537,
"step": 258,
"total_flos": 2.017433878246195e+16,
"train_loss": 0.8397066662477892,
"train_runtime": 2578.3607,
"train_samples_per_second": 0.804,
"train_steps_per_second": 0.1
}
],
"logging_steps": 5,
"max_steps": 258,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 1000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.017433878246195e+16,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}