DgMind-20B-Adapters / trainer_state.json
digitalai's picture
Upload folder using huggingface_hub
3f1133d verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.00041734485205124997,
"eval_steps": 500,
"global_step": 30,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.3911495068374998e-05,
"grad_norm": 68.36048889160156,
"learning_rate": 0.0,
"loss": 18.5386,
"step": 1
},
{
"epoch": 2.7822990136749997e-05,
"grad_norm": 71.29923248291016,
"learning_rate": 4e-05,
"loss": 17.6841,
"step": 2
},
{
"epoch": 4.1734485205125e-05,
"grad_norm": 75.4001693725586,
"learning_rate": 8e-05,
"loss": 19.0986,
"step": 3
},
{
"epoch": 5.5645980273499994e-05,
"grad_norm": 74.53475189208984,
"learning_rate": 0.00012,
"loss": 17.6713,
"step": 4
},
{
"epoch": 6.955747534187499e-05,
"grad_norm": 62.05671691894531,
"learning_rate": 0.00016,
"loss": 14.1331,
"step": 5
},
{
"epoch": 8.346897041025e-05,
"grad_norm": 64.3744888305664,
"learning_rate": 0.0002,
"loss": 10.9915,
"step": 6
},
{
"epoch": 9.738046547862498e-05,
"grad_norm": 79.88032531738281,
"learning_rate": 0.000192,
"loss": 6.3303,
"step": 7
},
{
"epoch": 0.00011129196054699999,
"grad_norm": 33.379764556884766,
"learning_rate": 0.00018400000000000003,
"loss": 2.7904,
"step": 8
},
{
"epoch": 0.000125203455615375,
"grad_norm": 15.933351516723633,
"learning_rate": 0.00017600000000000002,
"loss": 1.9766,
"step": 9
},
{
"epoch": 0.00013911495068374998,
"grad_norm": 4.526773929595947,
"learning_rate": 0.000168,
"loss": 0.9661,
"step": 10
},
{
"epoch": 0.00015302644575212497,
"grad_norm": 1.9286530017852783,
"learning_rate": 0.00016,
"loss": 0.7805,
"step": 11
},
{
"epoch": 0.0001669379408205,
"grad_norm": 1.0767860412597656,
"learning_rate": 0.000152,
"loss": 0.9457,
"step": 12
},
{
"epoch": 0.00018084943588887498,
"grad_norm": 0.9176913499832153,
"learning_rate": 0.000144,
"loss": 0.6094,
"step": 13
},
{
"epoch": 0.00019476093095724997,
"grad_norm": 0.8865966200828552,
"learning_rate": 0.00013600000000000003,
"loss": 0.5635,
"step": 14
},
{
"epoch": 0.00020867242602562499,
"grad_norm": 0.8140920400619507,
"learning_rate": 0.00012800000000000002,
"loss": 0.6797,
"step": 15
},
{
"epoch": 0.00022258392109399998,
"grad_norm": 1.4326921701431274,
"learning_rate": 0.00012,
"loss": 0.8631,
"step": 16
},
{
"epoch": 0.00023649541616237497,
"grad_norm": 0.8285752534866333,
"learning_rate": 0.00011200000000000001,
"loss": 0.5675,
"step": 17
},
{
"epoch": 0.00025040691123075,
"grad_norm": 1.2299859523773193,
"learning_rate": 0.00010400000000000001,
"loss": 1.1321,
"step": 18
},
{
"epoch": 0.00026431840629912497,
"grad_norm": 0.9245197176933289,
"learning_rate": 9.6e-05,
"loss": 1.0017,
"step": 19
},
{
"epoch": 0.00027822990136749996,
"grad_norm": 1.1311694383621216,
"learning_rate": 8.800000000000001e-05,
"loss": 0.9173,
"step": 20
},
{
"epoch": 0.00029214139643587495,
"grad_norm": 1.061496615409851,
"learning_rate": 8e-05,
"loss": 0.696,
"step": 21
},
{
"epoch": 0.00030605289150424994,
"grad_norm": 0.5580381155014038,
"learning_rate": 7.2e-05,
"loss": 0.3912,
"step": 22
},
{
"epoch": 0.000319964386572625,
"grad_norm": 0.6166224479675293,
"learning_rate": 6.400000000000001e-05,
"loss": 0.6241,
"step": 23
},
{
"epoch": 0.000333875881641,
"grad_norm": 0.590518593788147,
"learning_rate": 5.6000000000000006e-05,
"loss": 0.602,
"step": 24
},
{
"epoch": 0.00034778737670937497,
"grad_norm": 0.6541720032691956,
"learning_rate": 4.8e-05,
"loss": 0.6138,
"step": 25
},
{
"epoch": 0.00036169887177774996,
"grad_norm": 0.5166042447090149,
"learning_rate": 4e-05,
"loss": 0.4645,
"step": 26
},
{
"epoch": 0.00037561036684612495,
"grad_norm": 0.6018291711807251,
"learning_rate": 3.2000000000000005e-05,
"loss": 0.5399,
"step": 27
},
{
"epoch": 0.00038952186191449994,
"grad_norm": 0.8229033350944519,
"learning_rate": 2.4e-05,
"loss": 0.7849,
"step": 28
},
{
"epoch": 0.0004034333569828749,
"grad_norm": 0.8277194499969482,
"learning_rate": 1.6000000000000003e-05,
"loss": 1.1107,
"step": 29
},
{
"epoch": 0.00041734485205124997,
"grad_norm": 0.5916028022766113,
"learning_rate": 8.000000000000001e-06,
"loss": 0.5184,
"step": 30
}
],
"logging_steps": 1,
"max_steps": 30,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 7059632839676928.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}