la-sailor2-20b-10k_4bit_1ep-lora / trainer_state.json
PrepAI's picture
Upload folder using huggingface_hub
3c5e213 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9998005186515061,
"eval_steps": 10000,
"global_step": 3759,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.026597513132522108,
"grad_norm": 0.14929383993148804,
"learning_rate": 2.6595744680851064e-05,
"loss": 0.7258,
"step": 100
},
{
"epoch": 0.053195026265044215,
"grad_norm": 0.22324731945991516,
"learning_rate": 5.319148936170213e-05,
"loss": 0.477,
"step": 200
},
{
"epoch": 0.07979253939756632,
"grad_norm": 0.1394360512495041,
"learning_rate": 7.978723404255319e-05,
"loss": 0.4484,
"step": 300
},
{
"epoch": 0.10639005253008843,
"grad_norm": 0.1933247148990631,
"learning_rate": 9.99875823256999e-05,
"loss": 0.4301,
"step": 400
},
{
"epoch": 0.13298756566261055,
"grad_norm": 0.1871040314435959,
"learning_rate": 9.966886949974127e-05,
"loss": 0.4265,
"step": 500
},
{
"epoch": 0.15958507879513265,
"grad_norm": 0.12056238204240799,
"learning_rate": 9.892213289286789e-05,
"loss": 0.4169,
"step": 600
},
{
"epoch": 0.18618259192765477,
"grad_norm": 0.23017819225788116,
"learning_rate": 9.775380754233831e-05,
"loss": 0.4149,
"step": 700
},
{
"epoch": 0.21278010506017686,
"grad_norm": 0.22511781752109528,
"learning_rate": 9.617396154591494e-05,
"loss": 0.4127,
"step": 800
},
{
"epoch": 0.23937761819269898,
"grad_norm": 0.15255825221538544,
"learning_rate": 9.41962092995658e-05,
"loss": 0.4059,
"step": 900
},
{
"epoch": 0.2659751313252211,
"grad_norm": 0.1079307496547699,
"learning_rate": 9.183759417477731e-05,
"loss": 0.4018,
"step": 1000
},
{
"epoch": 0.2925726444577432,
"grad_norm": 0.1743098944425583,
"learning_rate": 8.9118441646512e-05,
"loss": 0.3919,
"step": 1100
},
{
"epoch": 0.3191701575902653,
"grad_norm": 0.19822756946086884,
"learning_rate": 8.606218413748768e-05,
"loss": 0.3909,
"step": 1200
},
{
"epoch": 0.3457676707227874,
"grad_norm": 0.1920367330312729,
"learning_rate": 8.26951590881904e-05,
"loss": 0.3855,
"step": 1300
},
{
"epoch": 0.37236518385530953,
"grad_norm": 0.1861521601676941,
"learning_rate": 7.904638199276271e-05,
"loss": 0.3907,
"step": 1400
},
{
"epoch": 0.39896269698783166,
"grad_norm": 0.10632487386465073,
"learning_rate": 7.514729635664032e-05,
"loss": 0.3846,
"step": 1500
},
{
"epoch": 0.4255602101203537,
"grad_norm": 0.2400396317243576,
"learning_rate": 7.103150273068921e-05,
"loss": 0.3904,
"step": 1600
},
{
"epoch": 0.45215772325287584,
"grad_norm": 0.1306409388780594,
"learning_rate": 6.673446915690408e-05,
"loss": 0.3912,
"step": 1700
},
{
"epoch": 0.47875523638539796,
"grad_norm": 0.16852012276649475,
"learning_rate": 6.229322552091536e-05,
"loss": 0.3806,
"step": 1800
},
{
"epoch": 0.5053527495179201,
"grad_norm": 0.23730169236660004,
"learning_rate": 5.774604444523663e-05,
"loss": 0.3812,
"step": 1900
},
{
"epoch": 0.5319502626504422,
"grad_norm": 0.17537447810173035,
"learning_rate": 5.313211147316933e-05,
"loss": 0.3767,
"step": 2000
},
{
"epoch": 0.5585477757829643,
"grad_norm": 0.13570892810821533,
"learning_rate": 4.849118738557042e-05,
"loss": 0.3782,
"step": 2100
},
{
"epoch": 0.5851452889154865,
"grad_norm": 0.21518413722515106,
"learning_rate": 4.386326556048369e-05,
"loss": 0.3706,
"step": 2200
},
{
"epoch": 0.6117428020480085,
"grad_norm": 0.28089213371276855,
"learning_rate": 3.9288227328354234e-05,
"loss": 0.3805,
"step": 2300
},
{
"epoch": 0.6383403151805306,
"grad_norm": 0.19185791909694672,
"learning_rate": 3.4805498292818055e-05,
"loss": 0.3683,
"step": 2400
},
{
"epoch": 0.6649378283130527,
"grad_norm": 0.2617637515068054,
"learning_rate": 3.045370857873868e-05,
"loss": 0.3825,
"step": 2500
},
{
"epoch": 0.6915353414455748,
"grad_norm": 0.11534757167100906,
"learning_rate": 2.6270359935318967e-05,
"loss": 0.3721,
"step": 2600
},
{
"epoch": 0.718132854578097,
"grad_norm": 0.18955928087234497,
"learning_rate": 2.22915025630421e-05,
"loss": 0.3682,
"step": 2700
},
{
"epoch": 0.7447303677106191,
"grad_norm": 0.22698479890823364,
"learning_rate": 1.8551424449401173e-05,
"loss": 0.3675,
"step": 2800
},
{
"epoch": 0.7713278808431412,
"grad_norm": 0.18734973669052124,
"learning_rate": 1.5082355890580507e-05,
"loss": 0.3719,
"step": 2900
},
{
"epoch": 0.7979253939756633,
"grad_norm": 0.22010599076747894,
"learning_rate": 1.1914191745387143e-05,
"loss": 0.363,
"step": 3000
},
{
"epoch": 0.8245229071081854,
"grad_norm": 0.25703734159469604,
"learning_rate": 9.074233814921846e-06,
"loss": 0.3711,
"step": 3100
},
{
"epoch": 0.8511204202407074,
"grad_norm": 0.15355700254440308,
"learning_rate": 6.586955568045134e-06,
"loss": 0.362,
"step": 3200
},
{
"epoch": 0.8777179333732296,
"grad_norm": 0.20059217512607574,
"learning_rate": 4.47379124012689e-06,
"loss": 0.3422,
"step": 3300
},
{
"epoch": 0.9043154465057517,
"grad_norm": 0.16272881627082825,
"learning_rate": 2.7529511225315162e-06,
"loss": 0.3667,
"step": 3400
},
{
"epoch": 0.9309129596382738,
"grad_norm": 0.24089215695858002,
"learning_rate": 1.4392646345894934e-06,
"loss": 0.3883,
"step": 3500
},
{
"epoch": 0.9575104727707959,
"grad_norm": 0.14056609570980072,
"learning_rate": 5.440525303902377e-07,
"loss": 0.3723,
"step": 3600
},
{
"epoch": 0.984107985903318,
"grad_norm": 0.18514706194400787,
"learning_rate": 7.502934165993791e-08,
"loss": 0.366,
"step": 3700
},
{
"epoch": 0.9998005186515061,
"step": 3759,
"total_flos": 1.8198794606166934e+19,
"train_loss": 0.39680684224885016,
"train_runtime": 142849.6117,
"train_samples_per_second": 0.421,
"train_steps_per_second": 0.026
}
],
"logging_steps": 100,
"max_steps": 3759,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 2500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.8198794606166934e+19,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}