Rubywong123's picture
Upload folder using huggingface_hub
2a65b40 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9967335510965936,
"eval_steps": 500,
"global_step": 178,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0055996266915538965,
"grad_norm": 0.9287750653110117,
"learning_rate": 5.555555555555555e-07,
"loss": 0.7737,
"step": 1
},
{
"epoch": 0.027998133457769483,
"grad_norm": 0.7051671404282305,
"learning_rate": 2.7777777777777783e-06,
"loss": 0.7341,
"step": 5
},
{
"epoch": 0.05599626691553897,
"grad_norm": 0.2520214877481558,
"learning_rate": 5.555555555555557e-06,
"loss": 0.5258,
"step": 10
},
{
"epoch": 0.08399440037330845,
"grad_norm": 0.14052119101634014,
"learning_rate": 8.333333333333334e-06,
"loss": 0.2894,
"step": 15
},
{
"epoch": 0.11199253383107793,
"grad_norm": 0.056427898284759595,
"learning_rate": 9.996145181203616e-06,
"loss": 0.1822,
"step": 20
},
{
"epoch": 0.1399906672888474,
"grad_norm": 0.03842870313192144,
"learning_rate": 9.952846702217886e-06,
"loss": 0.144,
"step": 25
},
{
"epoch": 0.1679888007466169,
"grad_norm": 0.03200746314332259,
"learning_rate": 9.861849601988384e-06,
"loss": 0.1211,
"step": 30
},
{
"epoch": 0.19598693420438637,
"grad_norm": 0.02565149233539642,
"learning_rate": 9.72403023233439e-06,
"loss": 0.1101,
"step": 35
},
{
"epoch": 0.22398506766215587,
"grad_norm": 0.024613549317984977,
"learning_rate": 9.540715869125407e-06,
"loss": 0.097,
"step": 40
},
{
"epoch": 0.25198320111992534,
"grad_norm": 0.02144179428017716,
"learning_rate": 9.31367192988896e-06,
"loss": 0.0871,
"step": 45
},
{
"epoch": 0.2799813345776948,
"grad_norm": 0.023284769150725856,
"learning_rate": 9.045084971874738e-06,
"loss": 0.0817,
"step": 50
},
{
"epoch": 0.3079794680354643,
"grad_norm": 0.02269598632952473,
"learning_rate": 8.737541634312985e-06,
"loss": 0.0748,
"step": 55
},
{
"epoch": 0.3359776014932338,
"grad_norm": 0.024675985934094376,
"learning_rate": 8.39400372766471e-06,
"loss": 0.0708,
"step": 60
},
{
"epoch": 0.36397573495100327,
"grad_norm": 0.02363318129449854,
"learning_rate": 8.017779709767857e-06,
"loss": 0.0632,
"step": 65
},
{
"epoch": 0.39197386840877274,
"grad_norm": 0.017744170199618194,
"learning_rate": 7.612492823579744e-06,
"loss": 0.0601,
"step": 70
},
{
"epoch": 0.4199720018665422,
"grad_norm": 0.02180319562108693,
"learning_rate": 7.18204620336671e-06,
"loss": 0.0557,
"step": 75
},
{
"epoch": 0.44797013532431174,
"grad_norm": 0.021481125876423392,
"learning_rate": 6.730585285387465e-06,
"loss": 0.0612,
"step": 80
},
{
"epoch": 0.4759682687820812,
"grad_norm": 0.01901447075970861,
"learning_rate": 6.26245788507579e-06,
"loss": 0.0511,
"step": 85
},
{
"epoch": 0.5039664022398507,
"grad_norm": 0.019624242818030622,
"learning_rate": 5.782172325201155e-06,
"loss": 0.0541,
"step": 90
},
{
"epoch": 0.5319645356976201,
"grad_norm": 0.021770565533330784,
"learning_rate": 5.294354018255945e-06,
"loss": 0.052,
"step": 95
},
{
"epoch": 0.5599626691553896,
"grad_norm": 0.019329162918422096,
"learning_rate": 4.803700921204659e-06,
"loss": 0.0494,
"step": 100
},
{
"epoch": 0.5879608026131591,
"grad_norm": 0.020279337189906023,
"learning_rate": 4.314938291590161e-06,
"loss": 0.0472,
"step": 105
},
{
"epoch": 0.6159589360709286,
"grad_norm": 0.020370573885900606,
"learning_rate": 3.832773180720475e-06,
"loss": 0.0496,
"step": 110
},
{
"epoch": 0.6439570695286981,
"grad_norm": 0.01780890820627811,
"learning_rate": 3.3618491021915334e-06,
"loss": 0.0418,
"step": 115
},
{
"epoch": 0.6719552029864676,
"grad_norm": 0.01770442385609227,
"learning_rate": 2.906701312312861e-06,
"loss": 0.0463,
"step": 120
},
{
"epoch": 0.6999533364442371,
"grad_norm": 0.017517610950091075,
"learning_rate": 2.471713133110078e-06,
"loss": 0.0427,
"step": 125
},
{
"epoch": 0.7279514699020065,
"grad_norm": 0.017896600937746723,
"learning_rate": 2.061073738537635e-06,
"loss": 0.0436,
"step": 130
},
{
"epoch": 0.755949603359776,
"grad_norm": 0.01501649892368704,
"learning_rate": 1.6787378104435931e-06,
"loss": 0.0428,
"step": 135
},
{
"epoch": 0.7839477368175455,
"grad_norm": 0.0163607070923351,
"learning_rate": 1.3283874528215735e-06,
"loss": 0.047,
"step": 140
},
{
"epoch": 0.811945870275315,
"grad_norm": 0.019565825006344753,
"learning_rate": 1.013396731136465e-06,
"loss": 0.0441,
"step": 145
},
{
"epoch": 0.8399440037330844,
"grad_norm": 0.017999287355466544,
"learning_rate": 7.367991782295392e-07,
"loss": 0.0454,
"step": 150
},
{
"epoch": 0.8679421371908539,
"grad_norm": 0.020504960252671952,
"learning_rate": 5.012585797388936e-07,
"loss": 0.0485,
"step": 155
},
{
"epoch": 0.8959402706486235,
"grad_norm": 0.016379587248382697,
"learning_rate": 3.0904332038757977e-07,
"loss": 0.0434,
"step": 160
},
{
"epoch": 0.9239384041063929,
"grad_norm": 0.0174476169165481,
"learning_rate": 1.6200453819870122e-07,
"loss": 0.05,
"step": 165
},
{
"epoch": 0.9519365375641624,
"grad_norm": 0.01783292237434629,
"learning_rate": 6.15582970243117e-08,
"loss": 0.0452,
"step": 170
},
{
"epoch": 0.9799346710219319,
"grad_norm": 0.01908510011245694,
"learning_rate": 8.671949076420883e-09,
"loss": 0.0427,
"step": 175
},
{
"epoch": 0.9967335510965936,
"step": 178,
"total_flos": 9.035586631704248e+17,
"train_loss": 0.10327898384479994,
"train_runtime": 2928.2846,
"train_samples_per_second": 2.927,
"train_steps_per_second": 0.061
}
],
"logging_steps": 5,
"max_steps": 178,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 9.035586631704248e+17,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}